<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">
From: Zachary Amsden &lt;zach@vmware.com&gt;

Any architecture that has hardware updated A/D bits that require
synchronization against other processors during PTE operations can benefit
from doing non-atomic PTE updates during address space destruction. 
Originally done on i386, now ported to x86_64.

Doing a read/write pair instead of an xchg() operation saves the implicit
lock, which turns out to be a big win on 32-bit (esp w PAE).

Signed-off-by: Zachary Amsden &lt;zach@vmware.com&gt;
Signed-off-by: Andrew Morton &lt;akpm@osdl.org&gt;
---

 include/asm-x86_64/pgtable.h |   14 ++++++++++++++
 1 files changed, 14 insertions(+)

diff -puN include/asm-x86_64/pgtable.h~x86_64-ptep-clear-optimization include/asm-x86_64/pgtable.h
--- devel/include/asm-x86_64/pgtable.h~x86_64-ptep-clear-optimization	2005-08-07 10:40:50.000000000 -0700
+++ devel-akpm/include/asm-x86_64/pgtable.h	2005-08-07 10:40:50.000000000 -0700
@@ -104,6 +104,19 @@ extern inline void pgd_clear (pgd_t * pg
 ((unsigned long) __va(pud_val(pud) &amp; PHYSICAL_PAGE_MASK))
 
 #define ptep_get_and_clear(mm,addr,xp)	__pte(xchg(&amp;(xp)-&gt;pte, 0))
+
+static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
+{
+	pte_t pte;
+	if (full) {
+		pte = *ptep;
+		*ptep = __pte(0);
+	} else {
+		pte = ptep_get_and_clear(mm, addr, ptep);
+	}
+	return pte;
+}
+
 #define pte_same(a, b)		((a).pte == (b).pte)
 
 #define PMD_SIZE	(1UL &lt;&lt; PMD_SHIFT)
@@ -433,6 +446,7 @@ extern int kern_addr_valid(unsigned long
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
 #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 #define __HAVE_ARCH_PTE_SAME
 #include &lt;asm-generic/pgtable.h&gt;
_
</pre></body></html>