<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">
From: Alexander Nyberg &lt;alexn@dsv.su.se&gt;

Introduces CONFIG_PAGE_OWNER that keeps track of the call chain under which a
page was allocated.  Includes a user-space helper in
Documentation/page_owner.c to sort the enormous amount of output that this may
give (thanks tridge).

Information available through /proc/page_owner

x86_64 introduces some stack noise in certain call chains so for exact
output use of x86 &amp;&amp; CONFIG_FRAME_POINTER is suggested.  Tested on x86, x86
&amp;&amp; CONFIG_FRAME_POINTER, x86_64

Output looks like:

4819 times:
Page allocated via order 0, mask 0x50
[0xc012b7b9] find_lock_page+25
[0xc012b8c8] find_or_create_page+152
[0xc0147d74] grow_dev_page+36
[0xc0148164] __find_get_block+84
[0xc0147ebc] __getblk_slow+124
[0xc0148164] __find_get_block+84
[0xc01481e7] __getblk+55
[0xc0185d14] do_readahead+100

From: Dave Hansen &lt;haveblue@us.ibm.com&gt;

	make page_owner handle non-contiguous page ranges

Signed-off-by: Alexander Nyberg &lt;alexn@dsv.su.se&gt;
Signed-off-by: Dave Hansen &lt;haveblue@us.ibm.com&gt;
Signed-off-by: Andrew Morton &lt;akpm@osdl.org&gt;
---

 Documentation/page_owner.c |  140 +++++++++++++++++++++++++++++++++++++++++++++
 fs/proc/proc_misc.c        |   68 +++++++++++++++++++++
 include/linux/mm.h         |    5 +
 lib/Kconfig.debug          |   10 +++
 mm/page_alloc.c            |   57 ++++++++++++++++++
 5 files changed, 280 insertions(+)

diff -puN /dev/null Documentation/page_owner.c
--- /dev/null	Thu Apr 11 07:25:15 2002
+++ 25-akpm/Documentation/page_owner.c	Tue Jun 28 17:37:23 2005
@@ -0,0 +1,140 @@
+/*
+ * User-space helper to sort the output of /proc/page_owner
+ *
+ * Example use:
+ * cat /proc/page_owner &gt; page_owner.txt
+ * ./sort page_owner.txt sorted_page_owner.txt
+*/
+
+#include &lt;stdio.h&gt;
+#include &lt;stdlib.h&gt;
+#include &lt;sys/types.h&gt;
+#include &lt;sys/stat.h&gt;
+#include &lt;fcntl.h&gt;
+#include &lt;unistd.h&gt;
+#include &lt;string.h&gt;
+
+struct block_list {
+	char *txt;
+	int len;
+	int num;
+};
+
+
+static struct block_list *list;
+static int list_size;
+static int max_size;
+
+struct block_list *block_head;
+
+int read_block(char *buf, FILE *fin)
+{
+	int ret = 0;
+	int hit = 0;
+	char *curr = buf;
+
+	for (;;) {
+		*curr = getc(fin);
+		if (*curr == EOF) return -1;
+
+		ret++;
+		if (*curr == '\n' &amp;&amp; hit == 1)
+			return ret - 1;
+		else if (*curr == '\n')
+			hit = 1;
+		else
+			hit = 0;
+		curr++;
+	}
+}
+
+static int compare_txt(struct block_list *l1, struct block_list *l2)
+{
+	return strcmp(l1-&gt;txt, l2-&gt;txt);
+}
+
+static int compare_num(struct block_list *l1, struct block_list *l2)
+{
+	return l2-&gt;num - l1-&gt;num;
+}
+
+static void add_list(char *buf, int len)
+{
+	if (list_size != 0 &amp;&amp;
+	    len == list[list_size-1].len &amp;&amp;
+	    memcmp(buf, list[list_size-1].txt, len) == 0) {
+		list[list_size-1].num++;
+		return;
+	}
+	if (list_size == max_size) {
+		printf("max_size too small??\n");
+		exit(1);
+	}
+	list[list_size].txt = malloc(len+1);
+	list[list_size].len = len;
+	list[list_size].num = 1;
+	memcpy(list[list_size].txt, buf, len);
+	list[list_size].txt[len] = 0;
+	list_size++;
+	if (list_size % 1000 == 0) {
+		printf("loaded %d\r", list_size);
+		fflush(stdout);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	FILE *fin, *fout;
+	char buf[1024];
+	int ret, i, count;
+	struct block_list *list2;
+	struct stat st;
+
+	fin = fopen(argv[1], "r");
+	fout = fopen(argv[2], "w");
+	if (!fin || !fout) {
+		printf("Usage: ./program &lt;input&gt; &lt;output&gt;\n");
+		perror("open: ");
+		exit(2);
+	}
+
+	fstat(fileno(fin), &amp;st);
+	max_size = st.st_size / 100; /* hack ... */
+
+	list = malloc(max_size * sizeof(*list));
+
+	for(;;) {
+		ret = read_block(buf, fin);
+		if (ret &lt; 0)
+			break;
+
+		buf[ret] = '\0';
+		add_list(buf, ret);
+	}
+
+	printf("loaded %d\n", list_size);
+
+	printf("sorting ....\n");
+
+	qsort(list, list_size, sizeof(list[0]), compare_txt);
+
+	list2 = malloc(sizeof(*list) * list_size);
+
+	printf("culling\n");
+
+	for (i=count=0;i&lt;list_size;i++) {
+		if (count == 0 ||
+		    strcmp(list2[count-1].txt, list[i].txt) != 0) {
+			list2[count++] = list[i];
+		} else {
+			list2[count-1].num += list[i].num;
+		}
+	}
+
+	qsort(list2, count, sizeof(list[0]), compare_num);
+
+	for (i=0;i&lt;count;i++) {
+		fprintf(fout, "%d times:\n%s\n", list2[i].num, list2[i].txt);
+	}
+	return 0;
+}
diff -puN fs/proc/proc_misc.c~page-owner-tracking-leak-detector fs/proc/proc_misc.c
--- 25/fs/proc/proc_misc.c~page-owner-tracking-leak-detector	Tue Jun 28 17:37:23 2005
+++ 25-akpm/fs/proc/proc_misc.c	Tue Jun 28 17:39:32 2005
@@ -553,6 +553,67 @@ static struct file_operations proc_sysrq
 };
 #endif
 
+#ifdef CONFIG_PAGE_OWNER
+#include &lt;linux/bootmem.h&gt;
+#include &lt;linux/kallsyms.h&gt;
+static ssize_t
+read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	unsigned long start_pfn = min_low_pfn;
+	static unsigned long pfn;
+	struct page *page;
+	char *kbuf, *modname;
+	const char *symname;
+	int ret = 0, next_idx = 1;
+	char namebuf[128];
+	unsigned long offset = 0, symsize;
+	int i;
+
+	pfn = start_pfn + *ppos;
+	page = pfn_to_page(pfn);
+	for (; pfn &lt; max_pfn; pfn++) {
+		if (!pfn_valid(pfn))
+			continue;
+		page = pfn_to_page(pfn);
+		if (page-&gt;order &gt;= 0)
+			break;
+		next_idx++;
+	}
+
+	if (!pfn_valid(pfn))
+		return 0;
+
+	*ppos += next_idx;
+
+	kbuf = kmalloc(count, GFP_KERNEL);
+	if (!kbuf)
+		return -ENOMEM;
+
+	ret = snprintf(kbuf, 1024, "Page allocated via order %d, mask 0x%x\n",
+			page-&gt;order, page-&gt;gfp_mask);
+
+	for (i = 0; i &lt; 8; i++) {
+		if (!page-&gt;trace[i])
+			break;
+		symname = kallsyms_lookup(page-&gt;trace[i], &amp;symsize, &amp;offset, &amp;modname, namebuf);
+		ret += snprintf(kbuf + ret, count - ret, "[0x%lx] %s+%lu\n",
+			page-&gt;trace[i], namebuf, offset);
+	}
+
+	ret += snprintf(kbuf + ret, count -ret, "\n");
+
+	if (copy_to_user(buf, kbuf, ret))
+		ret = -EFAULT;
+
+	kfree(kbuf);
+	return ret;
+}
+
+static struct file_operations proc_page_owner_operations = {
+	.read		= read_page_owner,
+};
+#endif
+
 struct proc_dir_entry *proc_root_kcore;
 
 void create_seq_entry(char *name, mode_t mode, struct file_operations *f)
@@ -637,4 +698,11 @@ void __init proc_misc_init(void)
 			entry-&gt;proc_fops = &amp;ppc_htab_operations;
 	}
 #endif
+#ifdef CONFIG_PAGE_OWNER
+	entry = create_proc_entry("page_owner", S_IWUSR | S_IRUGO, NULL);
+	if (entry) {
+		entry-&gt;proc_fops = &amp;proc_page_owner_operations;
+		entry-&gt;size = 1024;
+	}
+#endif
 }
diff -puN include/linux/mm.h~page-owner-tracking-leak-detector include/linux/mm.h
--- 25/include/linux/mm.h~page-owner-tracking-leak-detector	Tue Jun 28 17:37:23 2005
+++ 25-akpm/include/linux/mm.h	Tue Jun 28 17:39:32 2005
@@ -257,6 +257,11 @@ struct page {
 	void *virtual;			/* Kernel virtual address (NULL if
 					   not kmapped, ie. highmem) */
 #endif /* WANT_PAGE_VIRTUAL */
+#ifdef CONFIG_PAGE_OWNER
+	int order;
+	unsigned int gfp_mask;
+	unsigned long trace[8];
+#endif
 };
 
 /*
diff -puN lib/Kconfig.debug~page-owner-tracking-leak-detector lib/Kconfig.debug
--- 25/lib/Kconfig.debug~page-owner-tracking-leak-detector	Tue Jun 28 17:37:23 2005
+++ 25-akpm/lib/Kconfig.debug	Tue Jun 28 17:37:23 2005
@@ -139,6 +139,16 @@ config DEBUG_IOREMAP
 	  automatically, but we'd like to make it more efficient by not
 	  having to do that.
 
+config PAGE_OWNER
+	bool "Track page owner"
+	depends on DEBUG_KERNEL &amp;&amp; X86
+	help
+	  This keeps track of what call chain is the owner of a page, may
+	  help to find bare alloc_page(s) leaks. Eats a fair amount of memory.
+	  See Documentation/page_owner.c for user-space helper.
+
+	  If unsure, say N.
+
 config DEBUG_FS
 	bool "Debug Filesystem"
 	depends on DEBUG_KERNEL
diff -puN mm/page_alloc.c~page-owner-tracking-leak-detector mm/page_alloc.c
--- 25/mm/page_alloc.c~page-owner-tracking-leak-detector	Tue Jun 28 17:37:23 2005
+++ 25-akpm/mm/page_alloc.c	Tue Jun 28 17:39:32 2005
@@ -769,6 +769,43 @@ should_reclaim_zone(struct zone *z, unsi
 	return 1;
 }
 
+#ifdef CONFIG_PAGE_OWNER
+static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+{
+	return	p &gt; (void *)tinfo &amp;&amp;
+		p &lt; (void *)tinfo + THREAD_SIZE - 3;
+}
+
+static inline void __stack_trace(struct page *page, unsigned long *stack, unsigned long bp)
+{
+	int i = 0;
+	unsigned long addr;
+	struct thread_info *tinfo = (struct thread_info *)
+		((unsigned long)stack &amp; (~(THREAD_SIZE - 1)));
+
+	memset(page-&gt;trace, 0, sizeof(long) * 8);
+
+#ifdef	CONFIG_FRAME_POINTER
+	while (valid_stack_ptr(tinfo, (void *)bp)) {
+		addr = *(unsigned long *)(bp + sizeof(long));
+		page-&gt;trace[i] = addr;
+		if (++i &gt;= 8)
+			break;
+		bp = *(unsigned long *)bp;
+	}
+#else
+	while (valid_stack_ptr(tinfo, stack)) {
+		addr = *stack++;
+		if (__kernel_text_address(addr)) {
+			page-&gt;trace[i] = addr;
+			if (++i &gt;= 8)
+				break;
+		}
+	}
+#endif
+}
+#endif /* CONFIG_PAGE_OWNER */
+
 /*
  * This is the 'heart' of the zoned buddy allocator.
  */
@@ -969,6 +1006,20 @@ nopage:
 	}
 	return NULL;
 got_pg:
+
+#ifdef CONFIG_PAGE_OWNER /* huga... */
+	{
+	unsigned long address, bp;
+#ifdef X86_64
+	asm ("movq %%rbp, %0" : "=r" (bp) : );
+#else
+	asm ("movl %%ebp, %0" : "=r" (bp) : );
+#endif
+	page-&gt;order = (int) order;
+	page-&gt;gfp_mask = gfp_mask;
+	__stack_trace(page, &amp;address, bp);
+	}
+#endif /* CONFIG_PAGE_OWNER */
 	zone_statistics(zonelist, z);
 	return page;
 }
@@ -1022,6 +1073,9 @@ fastcall void __free_pages(struct page *
 			free_hot_page(page);
 		else
 			__free_pages_ok(page, order);
+#ifdef CONFIG_PAGE_OWNER
+		page-&gt;order = -1;
+#endif
 	}
 }
 
@@ -1669,6 +1723,9 @@ void __init memmap_init_zone(unsigned lo
 		if (!is_highmem_idx(zone))
 			set_page_address(page, __va(pfn &lt;&lt; PAGE_SHIFT));
 #endif
+#ifdef CONFIG_PAGE_OWNER
+		page-&gt;order = -1;
+#endif
 	}
 }
 
_
</pre></body></html>