Update support for the UV kernel to accommodate Intel BIOS changes in NVDIMM alignment, which caused UV BIOS to align the memory boundaries on different blocks than the previous UV standard of 2GB.
--
Add a new function to "adjust" the current fixed UV memory block size of 2GB so it can be changed to a different physical boundary. This is out of necessity so UV BIOS can accommodate Intel BIOS changes for NVDIMM's, which can align these new PMEM modules at other than 2GB boundaries.
A "set order" type of function was used to insure that the memory block size will be a power of two value without requiring a validity check on the size value passed in. 64GB was chosen as the upper limit for memory block size values to accommodate upcoming 4PB systems which have 6 more bits of physical address space (46 becoming 52).
Signed-off-by: Mike Travis mike.travis@hpe.com Reviewed-by: Andrew Banman andrew.banman@hpe.com --- arch/x86/mm/init_64.c | 20 ++++++++++++++++---- include/linux/memory.h | 1 + 2 files changed, 17 insertions(+), 4 deletions(-)
--- linux.orig/arch/x86/mm/init_64.c +++ linux/arch/x86/mm/init_64.c @@ -1350,16 +1350,28 @@ int kern_addr_valid(unsigned long addr) /* Amount of ram needed to start using large blocks */ #define MEM_SIZE_FOR_LARGE_BLOCK (64UL << 30)
+/* Adjustable memory block size */ +static unsigned long set_memory_block_size; +int __init set_memory_block_size_order(unsigned int order) +{ + unsigned long size = 1UL << order; + + if (size > MEM_SIZE_FOR_LARGE_BLOCK || size < MIN_MEMORY_BLOCK_SIZE) + return -EINVAL; + + set_memory_block_size = size; + return 0; +} + static unsigned long probe_memory_block_size(void) { unsigned long boot_mem_end = max_pfn << PAGE_SHIFT; unsigned long bz;
- /* If this is UV system, always set 2G block size */ - if (is_uv_system()) { - bz = MAX_BLOCK_SIZE; + /* If memory block size has been set, then use it */ + bz = set_memory_block_size; + if (bz) goto done; - }
/* Use regular block if RAM is smaller than MEM_SIZE_FOR_LARGE_BLOCK */ if (boot_mem_end < MEM_SIZE_FOR_LARGE_BLOCK) { --- linux.orig/include/linux/memory.h +++ linux/include/linux/memory.h @@ -38,6 +38,7 @@ struct memory_block {
int arch_get_memory_phys_device(unsigned long start_pfn); unsigned long memory_block_size_bytes(void); +int set_memory_block_size_order(unsigned int order);
/* These states are exposed to userspace as text strings in sysfs */ #define MEM_ONLINE (1<<0) /* exposed to userspace */
--
On Thu, May 10, 2018 at 06:18:33PM -0500, mike.travis@hpe.com wrote:
Add a new function to "adjust" the current fixed UV memory block size of 2GB so it can be changed to a different physical boundary. This is out of necessity so UV BIOS can accommodate Intel BIOS changes for NVDIMM's, which can align these new PMEM modules at other than 2GB boundaries.
A "set order" type of function was used to insure that the memory block size will be a power of two value without requiring a validity check on the size value passed in. 64GB was chosen as the upper limit for memory block size values to accommodate upcoming 4PB systems which have 6 more bits of physical address space (46 becoming 52).
Signed-off-by: Mike Travis mike.travis@hpe.com Reviewed-by: Andrew Banman andrew.banman@hpe.com
arch/x86/mm/init_64.c | 20 ++++++++++++++++---- include/linux/memory.h | 1 + 2 files changed, 17 insertions(+), 4 deletions(-)
<formletter>
This is not the correct way to submit patches for inclusion in the stable kernel tree. Please read: https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html for how to do this properly.
</formletter>
Add a call to the new function to "adjust" the current fixed UV memory block size of 2GB so it can be changed to a different physical boundary. This accommodates changes in the Intel BIOS, and therefore UV BIOS, which now can align boundaries different than the previous UV standard of 2GB. It also flags any UV Mem boundaries that cause a change in the mem block size boundary.
Signed-off-by: Mike Travis mike.travis@hpe.com Reviewed-by: Andrew Banman andrew.banman@hpe.com --- arch/x86/kernel/apic/x2apic_uv_x.c | 49 ++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-)
--- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c +++ linux/arch/x86/kernel/apic/x2apic_uv_x.c @@ -26,6 +26,7 @@ #include <linux/delay.h> #include <linux/crash_dump.h> #include <linux/reboot.h> +#include <linux/memory.h>
#include <asm/uv/uv_mmrs.h> #include <asm/uv/uv_hub.h> @@ -392,6 +393,40 @@ extern int uv_hub_info_version(void) } EXPORT_SYMBOL(uv_hub_info_version);
+/* Default UV memory block size is 2GB */ +static unsigned long mem_block_size = (2UL << 30); + +static __init int adj_blksize(u32 lgre) +{ + unsigned long base = (unsigned long)lgre << UV_GAM_RANGE_SHFT; + unsigned long size; + + for (size = mem_block_size; size > MIN_MEMORY_BLOCK_SIZE; size >>= 1) + if (IS_ALIGNED(base, size)) + break; + + if (size >= mem_block_size) + return 0; + + mem_block_size = size; + return 1; +} + +static __init void set_block_size(void) +{ + unsigned int order = ffs(mem_block_size); + + if (order) { + /* adjust for ffs return of 1..64 */ + set_memory_block_size_order(order - 1); + pr_info("UV: mem_block_size set to 0x%lx\n", mem_block_size); + } else { + /* bad or zero value, default to 1UL << 31 (2GB) */ + pr_err("UV: mem_block_size error with 0x%lx\n", mem_block_size); + set_memory_block_size_order(31); + } +} + /* Build GAM range lookup table: */ static __init void build_uv_gr_table(void) { @@ -1180,23 +1215,30 @@ static void __init decode_gam_rng_tbl(un << UV_GAM_RANGE_SHFT); int order = 0; char suffix[] = " KMGTPE"; + int flag = ' ';
while (size > 9999 && order < sizeof(suffix)) { size /= 1024; order++; }
+ /* adjust max block size to current range start */ + if (gre->type == 1 || gre->type == 2) + if (adj_blksize(lgre)) + flag = '*'; + if (!index) { pr_info("UV: GAM Range Table...\n"); - pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN"); + pr_info("UV: # %20s %14s %6s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN"); } - pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d %04x %02x %02x\n", + pr_info("UV: %2d: 0x%014lx-0x%014lx%c %5lu%c %3d %04x %02x %02x\n", index++, (unsigned long)lgre << UV_GAM_RANGE_SHFT, (unsigned long)gre->limit << UV_GAM_RANGE_SHFT, - size, suffix[order], + flag, size, suffix[order], gre->type, gre->nasid, gre->sockid, gre->pnode);
+ /* update to next range start */ lgre = gre->limit; if (sock_min > gre->sockid) sock_min = gre->sockid; @@ -1427,6 +1469,7 @@ static void __init uv_system_init_hub(vo
build_socket_tables(); build_uv_gr_table(); + set_block_size(); uv_init_hub_info(&hub_info); uv_possible_blades = num_possible_nodes(); if (!_node_to_pnode)
--
On Thu, May 10, 2018 at 06:18:34PM -0500, mike.travis@hpe.com wrote:
Add a call to the new function to "adjust" the current fixed UV memory block size of 2GB so it can be changed to a different physical boundary. This accommodates changes in the Intel BIOS, and therefore UV BIOS, which now can align boundaries different than the previous UV standard of 2GB. It also flags any UV Mem boundaries that cause a change in the mem block size boundary.
Signed-off-by: Mike Travis mike.travis@hpe.com Reviewed-by: Andrew Banman andrew.banman@hpe.com
arch/x86/kernel/apic/x2apic_uv_x.c | 49 ++++++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-)
<formletter>
This is not the correct way to submit patches for inclusion in the stable kernel tree. Please read: https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html for how to do this properly.
</formletter>
Add a kernel parameter that allows setting UV memory block size. This is to provide an adjustment for new forms of PMEM and other DIMM memory that might require alignment restrictions other than scanning the global address table for the required minimum alignment. The value set will be further adjusted by both the GAM range table scan as well as restrictions imposed by set_memory_block_size_order().
Signed-off-by: Mike Travis mike.travis@hpe.com Reviewed-by: Andrew Banman andrew.banman@hpe.com --- arch/x86/kernel/apic/x2apic_uv_x.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
--- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c +++ linux/arch/x86/kernel/apic/x2apic_uv_x.c @@ -396,6 +396,17 @@ EXPORT_SYMBOL(uv_hub_info_version); /* Default UV memory block size is 2GB */ static unsigned long mem_block_size = (2UL << 30);
+/* Kernel parameter to specify UV mem block size */ +static int parse_mem_block_size(char *ptr) +{ + unsigned long size = memparse(ptr, NULL); + + /* Size will be rounded down by set_block_size() below */ + mem_block_size = size; + return 0; +} +early_param("uv_memblksize", parse_mem_block_size); + static __init int adj_blksize(u32 lgre) { unsigned long base = (unsigned long)lgre << UV_GAM_RANGE_SHFT;
--
On Thu, May 10, 2018 at 06:18:35PM -0500, mike.travis@hpe.com wrote:
Add a kernel parameter that allows setting UV memory block size. This is to provide an adjustment for new forms of PMEM and other DIMM memory that might require alignment restrictions other than scanning the global address table for the required minimum alignment. The value set will be further adjusted by both the GAM range table scan as well as restrictions imposed by set_memory_block_size_order().
Signed-off-by: Mike Travis mike.travis@hpe.com Reviewed-by: Andrew Banman andrew.banman@hpe.com
arch/x86/kernel/apic/x2apic_uv_x.c | 11 +++++++++++ 1 file changed, 11 insertions(+)
<formletter>
This is not the correct way to submit patches for inclusion in the stable kernel tree. Please read: https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html for how to do this properly.
</formletter>
On Thu, May 10, 2018 at 06:18:32PM -0500, mike.travis@hpe.com wrote:
Update support for the UV kernel to accommodate Intel BIOS changes in NVDIMM alignment, which caused UV BIOS to align the memory boundaries on different blocks than the previous UV standard of 2GB.
--
<formletter>
This is not the correct way to submit patches for inclusion in the stable kernel tree. Please read: https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html for how to do this properly.
</formletter>
On Thu 10-05-18 18:18:32, mike.travis@hpe.com wrote:
Update support for the UV kernel to accommodate Intel BIOS changes in NVDIMM alignment, which caused UV BIOS to align the memory boundaries on different blocks than the previous UV standard of 2GB.
Please elaborate (much) more. What is the actual problem and how is the patchset addressing it.
[re-including everyone for any other comments about these patches...]
On 5/10/2018 11:48 PM, Michal Hocko wrote:
On Thu 10-05-18 18:18:32, mike.travis@hpe.com wrote:
Update support for the UV kernel to accommodate Intel BIOS changes in NVDIMM alignment, which caused UV BIOS to align the memory boundaries on different blocks than the previous UV standard of 2GB.
Please elaborate (much) more. What is the actual problem and how is the patchset addressing it.
On 5/15/2018 1:55 AM, Michal Hocko wrote:
On Fri 11-05-18 08:08:14, Mike Travis wrote: [...]
If you think I need this more detailed explanation in the patch
descriptions
themselves, I'll add it.
Yes we definitely want this information along with a high level description of how this got fixed. Incomplete memblocks need to be handled gracefully (especially when blocks are 2GB in size).
Thanks!
Hi Michal,
I will add more info but this patch does not address anything about incomplete memblocks. They have existed in 2GB mem block size form since 2009 (v2.6) with the first UV1 system release. I am not changing any of that handling.
The fixing part was to adapt to what Intel BIOS was using as a different boundary to align the PMEM NVDIMMs. This is explained in both patch 1 and patch 2:
"Add a new function to "adjust" the current fixed UV memory block size of 2GB so it can be changed to a different physical boundary. This is out of necessity so UV BIOS can accommodate Intel BIOS changes for NVDIMM's, which can align these new PMEM modules at other than 2GB boundaries."
"Add a call to the new function to "adjust" the current fixed UV memory block size of 2GB so it can be changed to a different physical boundary. This accommodates changes in the Intel BIOS, and therefore UV BIOS, which now can align boundaries different than the previous UV standard of 2GB. It also flags any UV Mem boundaries that cause a change in the mem block size boundary."
I can't explain why the Intel BIOS changed the boundaries, it probably has something to do with accommodating other areas of the NVDIMMs physical address space. This caused a boundary alignment to be less than 2GB which UV had been using. From one of the UV BIOS engineers I found out that Intel really only guarantees a 64MB boundary (which is actually less than the current Linux minimum of 128MB.) Our own UVHUB requirements dictated that 2GB was an acceptable boundary up until now.
So let me know how much more info is needed on how I need to explain _this_ change.
Thanks, Mike
On Tue 15-05-18 09:08:10, Mike Travis wrote: [...]
Hi Michal,
I will add more info but this patch does not address anything about incomplete memblocks. They have existed in 2GB mem block size form since 2009 (v2.6) with the first UV1 system release. I am not changing any of that handling.
The fixing part was to adapt to what Intel BIOS was using as a different boundary to align the PMEM NVDIMMs. This is explained in both patch 1 and patch 2:
"Add a new function to "adjust" the current fixed UV memory block size of 2GB so it can be changed to a different physical boundary. This is out of necessity so UV BIOS can accommodate Intel BIOS changes for NVDIMM's, which can align these new PMEM modules at other than 2GB boundaries."
"Add a call to the new function to "adjust" the current fixed UV memory block size of 2GB so it can be changed to a different physical boundary. This accommodates changes in the Intel BIOS, and therefore UV BIOS, which now can align boundaries different than the previous UV standard of 2GB. It also flags any UV Mem boundaries that cause a change in the mem block size boundary."
I can't explain why the Intel BIOS changed the boundaries, it probably has something to do with accommodating other areas of the NVDIMMs physical address space. This caused a boundary alignment to be less than 2GB which UV had been using. From one of the UV BIOS engineers I found out that Intel really only guarantees a 64MB boundary (which is actually less than the current Linux minimum of 128MB.) Our own UVHUB requirements dictated that 2GB was an acceptable boundary up until now.
So let me know how much more info is needed on how I need to explain _this_ change.
Thanks for the additional information. It helps to some extend but still feel rather magic. It doesn't explain why this is UV specific and require an UV specific fix. Why cannot we fix the generic hotplug code instead and potentially allow other 128 unaigned memory ranges instead?
linux-stable-mirror@lists.linaro.org