From 72d56b560928665b0d0e6da14983d027ea2ca180 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 5 Aug 2011 15:15:08 +0200 Subject: [PATCH 1/4] x86, amd: Avoid cache aliasing penalties on AMD family 15h BugLink: http://bugs.launchpad.net/bugs/862583 This patch provides performance tuning for the "Bulldozer" CPU. With its shared instruction cache there is a chance of generating an excessive number of cache cross-invalidates when running specific workloads on the cores of a compute module. This excessive amount of cross-invalidations can be observed if cache lines backed by shared physical memory alias in bits [14:12] of their virtual addresses, as those bits are used for the index generation. This patch addresses the issue by clearing all the bits in the [14:12] slice of the file mapping's virtual address at generation time, thus forcing those bits the same for all mappings of a single shared library across processes and, in doing so, avoids instruction cache aliases. It also adds the command line option "align_va_addr=(32|64|on|off)" with which virtual address alignment can be enabled for 32-bit or 64-bit x86 individually, or both, or be completely disabled. This change leaves virtual region address allocation on other families and/or vendors unaffected. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1312550110-24160-2-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin (cherry picked from git://tesla.tglx.de/git/linux-2.6-tip x86/cpu commit dfb09f9b7ab03fd367740e541a5caf830ed56726) Signed-off-by: Tim Gardner --- Documentation/kernel-parameters.txt | 13 ++++++ arch/x86/include/asm/elf.h | 31 +++++++++++++ arch/x86/kernel/cpu/amd.c | 13 ++++++ arch/x86/kernel/sys_x86_64.c | 81 +++++++++++++++++++++++++++++++++- arch/x86/mm/mmap.c | 15 ------ arch/x86/vdso/vma.c | 9 ++++ 6 files changed, 144 insertions(+), 18 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 670694f..3df412b 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -302,6 +302,19 @@ bytes respectively. Such letter suffixes can also be entirely omitted. behaviour to be specified. Bit 0 enables warnings, bit 1 enables fixups, and bit 2 sends a segfault. + align_va_addr= [X86-64] + Align virtual addresses by clearing slice [14:12] when + allocating a VMA at process creation time. This option + gives you up to 3% performance improvement on AMD F15h + machines (where it is enabled by default) for a + CPU-intensive style benchmark, and it can vary highly in + a microbenchmark depending on workload and compiler. + + 1: only for 32-bit processes + 2: only for 64-bit processes + on: enable for both 32- and 64-bit processes + off: disable for both 32- and 64-bit processes + amd_iommu= [HW,X86-84] Pass parameters to the AMD IOMMU driver in the system. Possible values are: diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index f2ad216..5f962df 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -4,6 +4,7 @@ /* * ELF register definitions.. */ +#include #include #include @@ -320,4 +321,34 @@ extern int syscall32_setup_pages(struct linux_binprm *, int exstack); extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk +/* + * True on X86_32 or when emulating IA32 on X86_64 + */ +static inline int mmap_is_ia32(void) +{ +#ifdef CONFIG_X86_32 + return 1; +#endif +#ifdef CONFIG_IA32_EMULATION + if (test_thread_flag(TIF_IA32)) + return 1; +#endif + return 0; +} + +/* The first two values are special, do not change. See align_addr() */ +enum align_flags { + ALIGN_VA_32 = BIT(0), + ALIGN_VA_64 = BIT(1), + ALIGN_VDSO = BIT(2), + ALIGN_TOPDOWN = BIT(3), +}; + +struct va_alignment { + int flags; + unsigned long mask; +} ____cacheline_aligned; + +extern struct va_alignment va_align; +extern unsigned long align_addr(unsigned long, struct file *, enum align_flags); #endif /* _ASM_X86_ELF_H */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b13ed39..b0234bc 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -458,6 +458,19 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) "with P0 frequency!\n"); } } + + if (c->x86 == 0x15) { + unsigned long upperbit; + u32 cpuid, assoc; + + cpuid = cpuid_edx(0x80000005); + assoc = cpuid >> 16 & 0xff; + upperbit = ((cpuid >> 24) << 10) / assoc; + + va_align.mask = (upperbit - 1) & PAGE_MASK; + va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; + + } } static void __cpuinit init_amd(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index ff14a50..aaa8d09 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -18,6 +18,72 @@ #include #include +struct __read_mostly va_alignment va_align = { + .flags = -1, +}; + +/* + * Align a virtual address to avoid aliasing in the I$ on AMD F15h. + * + * @flags denotes the allocation direction - bottomup or topdown - + * or vDSO; see call sites below. + */ +unsigned long align_addr(unsigned long addr, struct file *filp, + enum align_flags flags) +{ + unsigned long tmp_addr; + + /* handle 32- and 64-bit case with a single conditional */ + if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32()))) + return addr; + + if (!(current->flags & PF_RANDOMIZE)) + return addr; + + if (!((flags & ALIGN_VDSO) || filp)) + return addr; + + tmp_addr = addr; + + /* + * We need an address which is <= than the original + * one only when in topdown direction. + */ + if (!(flags & ALIGN_TOPDOWN)) + tmp_addr += va_align.mask; + + tmp_addr &= ~va_align.mask; + + return tmp_addr; +} + +static int __init control_va_addr_alignment(char *str) +{ + /* guard against enabling this on other CPU families */ + if (va_align.flags < 0) + return 1; + + if (*str == 0) + return 1; + + if (*str == '=') + str++; + + if (!strcmp(str, "32")) + va_align.flags = ALIGN_VA_32; + else if (!strcmp(str, "64")) + va_align.flags = ALIGN_VA_64; + else if (!strcmp(str, "off")) + va_align.flags = 0; + else if (!strcmp(str, "on")) + va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; + else + return 0; + + return 1; +} +__setup("align_va_addr", control_va_addr_alignment); + SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, off) @@ -92,6 +158,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, start_addr = addr; full_search: + + addr = align_addr(addr, filp, 0); + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { /* At this point: (!vma || addr < vma->vm_end). */ if (end - len < addr) { @@ -117,6 +186,7 @@ full_search: mm->cached_hole_size = vma->vm_start - addr; addr = vma->vm_end; + addr = align_addr(addr, filp, 0); } } @@ -161,10 +231,13 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, /* make sure it can fit in the remaining address space */ if (addr > len) { - vma = find_vma(mm, addr-len); - if (!vma || addr <= vma->vm_start) + unsigned long tmp_addr = align_addr(addr - len, filp, + ALIGN_TOPDOWN); + + vma = find_vma(mm, tmp_addr); + if (!vma || tmp_addr + len <= vma->vm_start) /* remember the address as a hint for next time */ - return mm->free_area_cache = addr-len; + return mm->free_area_cache = tmp_addr; } if (mm->mmap_base < len) @@ -173,6 +246,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, addr = mm->mmap_base-len; do { + addr = align_addr(addr, filp, ALIGN_TOPDOWN); + /* * Lookup failure means no vma is above this address, * else if new region fits below vma->vm_start, diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index ff3a3bf..019ae78 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -51,21 +51,6 @@ static unsigned int stack_maxrandom_size(void) #define MIN_GAP (128*1024*1024UL + stack_maxrandom_size()) #define MAX_GAP (TASK_SIZE/6*5) -/* - * True on X86_32 or when emulating IA32 on X86_64 - */ -static int mmap_is_ia32(void) -{ -#ifdef CONFIG_X86_32 - return 1; -#endif -#ifdef CONFIG_IA32_EMULATION - if (test_thread_flag(TIF_IA32)) - return 1; -#endif - return 0; -} - static int mmap_is_legacy(void) { if (current->personality & ADDR_COMPAT_LAYOUT) diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 7abd2be..caa42ce 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -69,6 +69,15 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) addr = start + (offset << PAGE_SHIFT); if (addr >= end) addr = end; + + /* + * page-align it here so that get_unmapped_area doesn't + * align it wrongfully again to the next page. addr can come in 4K + * unaligned here as a result of stack start randomization. + */ + addr = PAGE_ALIGN(addr); + addr = align_addr(addr, NULL, ALIGN_VDSO); + return addr; } -- 1.7.0.4 From b0dea8e9825ec01cbaf4c698df244a02e3a62105 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 5 Aug 2011 20:01:16 +0200 Subject: [PATCH 2/4] x86: Add a BSP cpu_dev helper BugLink: http://bugs.launchpad.net/bugs/862583 Add a function ptr to struct cpu_dev which is destined to be run only once on the BSP during boot. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20110805180116.GB26217@aftab Signed-off-by: H. Peter Anvin (cherry picked from git://tesla.tglx.de/git/linux-2.6-tip x86/cpu commit a110b5ec7371592eac856ac5c22dc7b518952d44) Signed-off-by: Tim Gardner --- arch/x86/kernel/cpu/common.c | 3 +++ arch/x86/kernel/cpu/cpu.h | 1 + 2 files changed, 4 insertions(+), 0 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5403a40..5b4e484 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -681,6 +681,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) filter_cpuid_features(c, false); setup_smep(c); + + if (this_cpu->c_bsp_init) + this_cpu->c_bsp_init(c); } void __init early_cpu_init(void) diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index e765633..1b22dcc 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -18,6 +18,7 @@ struct cpu_dev { struct cpu_model_info c_models[4]; void (*c_early_init)(struct cpuinfo_x86 *); + void (*c_bsp_init)(struct cpuinfo_x86 *); void (*c_init)(struct cpuinfo_x86 *); void (*c_identify)(struct cpuinfo_x86 *); unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int); -- 1.7.0.4 From e18b99f58323558c281fc960c0f0dfb3d468ae59 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 5 Aug 2011 20:04:09 +0200 Subject: [PATCH 3/4] x86, amd: Move BSP code to cpu_dev helper BugLink: http://bugs.launchpad.net/bugs/862583 Move code which is run once on the BSP during boot into the cpu_dev helper. [ hpa: removed bogus cpu_has -> static_cpu_has conversion ] Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20110805180409.GC26217@aftab Signed-off-by: H. Peter Anvin (cherry picked from git://tesla.tglx.de/git/linux-2.6-tip x86/cpu commit 8fa8b035085e7320c15875c1f6b03b290ca2dd66) Signed-off-by: Tim Gardner --- arch/x86/kernel/cpu/amd.c | 59 ++++++++++++++++++++++----------------------- 1 files changed, 29 insertions(+), 30 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b0234bc..b6e3e87 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -410,6 +410,34 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c) #endif } +static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c) +{ + if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { + + if (c->x86 > 0x10 || + (c->x86 == 0x10 && c->x86_model >= 0x2)) { + u64 val; + + rdmsrl(MSR_K7_HWCR, val); + if (!(val & BIT(24))) + printk(KERN_WARNING FW_BUG "TSC doesn't count " + "with P0 frequency!\n"); + } + } + + if (c->x86 == 0x15) { + unsigned long upperbit; + u32 cpuid, assoc; + + cpuid = cpuid_edx(0x80000005); + assoc = cpuid >> 16 & 0xff; + upperbit = ((cpuid >> 24) << 10) / assoc; + + va_align.mask = (upperbit - 1) & PAGE_MASK; + va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; + } +} + static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) { early_init_amd_mc(c); @@ -441,36 +469,6 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_EXTD_APICID); } #endif - - /* We need to do the following only once */ - if (c != &boot_cpu_data) - return; - - if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { - - if (c->x86 > 0x10 || - (c->x86 == 0x10 && c->x86_model >= 0x2)) { - u64 val; - - rdmsrl(MSR_K7_HWCR, val); - if (!(val & BIT(24))) - printk(KERN_WARNING FW_BUG "TSC doesn't count " - "with P0 frequency!\n"); - } - } - - if (c->x86 == 0x15) { - unsigned long upperbit; - u32 cpuid, assoc; - - cpuid = cpuid_edx(0x80000005); - assoc = cpuid >> 16 & 0xff; - upperbit = ((cpuid >> 24) << 10) / assoc; - - va_align.mask = (upperbit - 1) & PAGE_MASK; - va_align.flags = ALIGN_VA_32 | ALIGN_VA_64; - - } } static void __cpuinit init_amd(struct cpuinfo_x86 *c) @@ -692,6 +690,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = { .c_size_cache = amd_size_cache, #endif .c_early_init = early_init_amd, + .c_bsp_init = bsp_init_amd, .c_init = init_amd, .c_x86_vendor = X86_VENDOR_AMD, }; -- 1.7.0.4 From ced0a60251915b657103239c88fee60117988ddf Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 6 Aug 2011 14:31:38 +0200 Subject: [PATCH 4/4] x86-32, amd: Move va_align definition to unbreak 32-bit build BugLink: http://bugs.launchpad.net/bugs/862583 hpa reported that dfb09f9b7ab03fd367740e541a5caf830ed56726 breaks 32-bit builds with the following error message: /home/hpa/kernel/linux-tip.cpu/arch/x86/kernel/cpu/amd.c:437: undefined reference to `va_align' /home/hpa/kernel/linux-tip.cpu/arch/x86/kernel/cpu/amd.c:436: undefined reference to `va_align' This is due to the fact that va_align is a global in a 64-bit only compilation unit. Move it to mmap.c where it is visible to both subarches. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1312633899-1131-1-git-send-email-bp@amd64.org Signed-off-by: H. Peter Anvin (cherry picked from git://tesla.tglx.de/git/linux-2.6-tip x86/cpu commit 9387f774d61b01ab71bade85e6d0bfab0b3419bd) Signed-off-by: Tim Gardner --- arch/x86/kernel/sys_x86_64.c | 4 ---- arch/x86/mm/mmap.c | 5 ++++- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index aaa8d09..fe7d2da 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -18,10 +18,6 @@ #include #include -struct __read_mostly va_alignment va_align = { - .flags = -1, -}; - /* * Align a virtual address to avoid aliasing in the I$ on AMD F15h. * diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 019ae78..ea640c0 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -31,6 +31,10 @@ #include #include +struct __read_mostly va_alignment va_align = { + .flags = -1, +}; + static unsigned int stack_maxrandom_size(void) { unsigned int max = 0; @@ -42,7 +46,6 @@ static unsigned int stack_maxrandom_size(void) return max; } - /* * Top of mmap area (just below the process stack). * -- 1.7.0.4