Hi Mike,
This commit is making my intel box not boot in mm-unstable :>) I bisected it to this commit.
It seems to be having an unhandled kernel page fault on exec, so I guess not actually making it exec somehow?
It is a pretty standard intel machine (arch linux config, slightly scaled down via make localmodconfig to make it easier to quickly rebuild kernels). Can give more details if needed.
Here is the (vaguely annotated-ish) splat:
:: running early hook [udev] Starting systemd-udevd version 257.1-1-arch :: running hook [udev] :: Triggering uevents... [ 4.231005] usb 1-11: new high-speed USB device number 3 using xhci_hcd [ 4.261646] kernel tried to execute NX-protected page - exploit attempt? (uid: 0)
Guessing the execmem didn't make it exec somehow?
[ 4.269136] BUG: unable to handle page fault for address: ffffffffc0641010 [ 4.276009] #PF: supervisor instruction fetch in kernel mode [ 4.281666] #PF: error_code(0x0011) - permissions violation [ 4.287241] PGD 3027067 P4D 3027067 PUD 3029067 PMD 12a29e063 PTE 8000000129441163 [ 4.294818] Oops: Oops: 0011 [#1] PREEMPT SMP NOPTI [ 4.299695] CPU: 31 UID: 0 PID: 331 Comm: (udev-worker) Tainted: G W 6.13.0-rc4-1-custom #1 5815a74228bcc318a2a967d947d4ca4aa132a5ed [ 4.312992] Tainted: [W]=WARN [ 4.315961] Hardware name: Gigabyte Technology Co., Ltd. Z790 AORUS ELITE X WIFI7/Z790 AORUS ELITE X WIFI7, BIOS F7 09/27/2024 [ 4.327349] RIP: 0010:crc32c_intel_mod_init+0x0/0xff0 [crc32c_intel]
My addr2line suggests:
/home/lorenzo/kerndev/kernels/mm/./arch/x86/include/asm/processor.h:43
DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
Which seems... odd :)
[ 4.333702] Code: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 <f3> 0f 1e fa 0f 1f 44 00 00 48 c7 c7 00 e0 8a c0 e8 1b aa a8 c0 48 [ 4.352466] RSP: 0018:ffffc90004223a58 EFLAGS: 00010246 [ 4.357693] RAX: 0000000000000000 RBX: ffffffffc0641010 RCX: 0000000000000000 [ 4.364827] RDX: 0000000000000000 RSI: ffffffffc0641010 RDI: ffffc90004223a40 [ 4.371963] RBP: 0000000000000000 R08: 0000000000000020 R09: ffff88811adeeb00 [ 4.379097] R10: ffffc90004223ad0 R11: 0000000000000000 R12: ffff8881278c3600 [ 4.385530] usb 1-11: New USB device found, idVendor=05e3, idProduct=0608, bcdDevice=60.90 [ 4.386230] R13: ffffc90004223a60 R14: ffff88811ae3ac00 R15: ffff88811f251b80 [ 4.394497] usb 1-11: New USB device strings: Mfr=0, Product=1, SerialNumber=0 [ 4.401628] FS: 00007f92de124880(0000) GS:ffff88afffd80000(0000) knlGS:0000000000000000 [ 4.401630] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 4.408850] usb 1-11: Product: USB2.0 Hub [ 4.416938] CR2: ffffffffc0641010 CR3: 00000001279f6006 CR4: 0000000000f70ef0 [ 4.416939] PKRU: 55555554 [ 4.423588] hub 1-11:1.0: USB hub found [ 4.426696] Call Trace: [ 4.426697] <TASK> [ 4.426699] ? __die_body.cold+0x19/0x27 [ 4.434112] hub 1-11:1.0: 4 ports detected [ 4.436547] ? __pfx_crc32c_intel_mod_init+0x10/0x10 [crc32c_intel 0c38f5da3bf5b6f9ab94fb009bb43215a1542e3a] [ 4.462776] ? page_fault_oops+0x15a/0x2d0 [ 4.466874] ? disable_mmiotrace.cold+0x6a/0xaa [ 4.471404] ? __pfx_crc32c_intel_mod_init+0x10/0x10 [crc32c_intel 0c38f5da3bf5b6f9ab94fb009bb43215a1542e3a] [ 4.481229] ? exc_page_fault+0x18a/0x190 [ 4.485240] ? asm_exc_page_fault+0x26/0x30 [ 4.489422] ? __pfx_crc32c_intel_mod_init+0x10/0x10 [crc32c_intel 0c38f5da3bf5b6f9ab94fb009bb43215a1542e3a] [ 4.499248] ? __pfx_crc32c_intel_mod_init+0x10/0x10 [crc32c_intel 0c38f5da3bf5b6f9ab94fb009bb43215a1542e3a] [ 4.509073] ? __pfx_crc32c_intel_mod_init+0x10/0x10 [crc32c_intel 0c38f5da3bf5b6f9ab94fb009bb43215a1542e3a] [ 4.518900] do_one_initcall+0x58/0x310 [ 4.522736] do_init_module+0x82/0x270 [ 4.526483] init_module_from_file+0x86/0xc0 [ 4.530756] idempotent_init_module+0x115/0x310 [ 4.535286] __x64_sys_finit_module+0x65/0xc0 [ 4.539642] do_syscall_64+0x82/0x190 [ 4.543305] ? do_syscall_64+0x8e/0x190 [ 4.547142] ? filemap_map_pages+0x51f/0x670 [ 4.551412] ? do_filp_open+0xd8/0x190 [ 4.555163] ? __pfx_page_put_link+0x10/0x10 [ 4.559433] ? do_sys_openat2+0x9c/0xe0 [ 4.563268] ? syscall_exit_to_user_mode+0x37/0x1c0 [ 4.568147] ? syscall_exit_to_user_mode+0x37/0x1c0 [ 4.573024] ? do_syscall_64+0x8e/0x190 [ 4.576861] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 4.581003] usb 1-12: new high-speed USB device number 4 using xhci_hcd [ 4.581914] RIP: 0033:0x7f92de91b1fd [ 4.592102] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d e3 fa 0c 00 f7 d8 64 89 01 48 [ 4.610867] RSP: 002b:00007ffcf33fa0b8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 4.618434] RAX: ffffffffffffffda RBX: 0000555fbf68a870 RCX: 00007f92de91b1fd [ 4.625570] RDX: 0000000000000000 RSI: 00007f92de11e05d RDI: 0000000000000014 [ 4.632704] RBP: 00007ffcf33fa170 R08: 0000000000000002 R09: 00007ffcf33fa120 [ 4.639839] R10: 0000000000000007 R11: 0000000000000246 R12: 00007f92de11e05d [ 4.646973] R13: 0000000000020000 R14: 0000555fbf682a70 R15: 0000555fbf682fd0 [ 4.654109] </TASK> [ 4.656296] Modules linked in: crc32c_intel(+) spi_intel_pci drm_display_helper spi_intel nvme_auth cec video wmi
If I revert _just this_ commit it breaks the boot by not being able to find the disk (!) so guess you need to revert everything after it too which kinda makes sense :)
On Fri, Dec 27, 2024 at 09:28:23AM +0200, Mike Rapoport wrote:
From: "Mike Rapoport (Microsoft)" rppt@kernel.org
Instead of using writable copy for module text sections, temporarily remap the memory allocated from execmem's ROX cache as writable and restore its ROX permissions after the module is formed.
This will allow removing nasty games with writable copy in alternatives patching on x86.
Signed-off-by: Mike Rapoport (Microsoft) rppt@kernel.org
include/linux/module.h | 7 +--- include/linux/moduleloader.h | 4 --- kernel/module/main.c | 67 ++++++------------------------------ kernel/module/strict_rwx.c | 9 ++--- 4 files changed, 17 insertions(+), 70 deletions(-)
diff --git a/include/linux/module.h b/include/linux/module.h index bd8cf93d32c8..e9fc9d1fa476 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -368,7 +368,6 @@ enum mod_mem_type {
struct module_memory { void *base;
- void *rw_copy; bool is_rox; unsigned int size;
@@ -775,13 +774,9 @@ static inline bool is_livepatch_module(struct module *mod)
void set_module_sig_enforced(void);
-void *__module_writable_address(struct module *mod, void *loc);
static inline void *module_writable_address(struct module *mod, void *loc) {
- if (!IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) || !mod)
return loc;
- return __module_writable_address(mod, loc);
- return loc;
}
#else /* !CONFIG_MODULES... */ diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h index 1f5507ba5a12..e395461d59e5 100644 --- a/include/linux/moduleloader.h +++ b/include/linux/moduleloader.h @@ -108,10 +108,6 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod);
-int module_post_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *mod);
#ifdef CONFIG_MODULES void flush_module_init_free_work(void); #else diff --git a/kernel/module/main.c b/kernel/module/main.c index ad8ef20c120f..ee6b46e753a0 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1221,18 +1221,6 @@ void __weak module_arch_freeing_init(struct module *mod) { }
-void *__module_writable_address(struct module *mod, void *loc) -{
- for_class_mod_mem_type(type, text) {
struct module_memory *mem = &mod->mem[type];
if (loc >= mem->base && loc < mem->base + mem->size)
return loc + (mem->rw_copy - mem->base);
- }
- return loc;
-}
static int module_memory_alloc(struct module *mod, enum mod_mem_type type) { unsigned int size = PAGE_ALIGN(mod->mem[type].size); @@ -1250,21 +1238,15 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) if (!ptr) return -ENOMEM;
- mod->mem[type].base = ptr;
- if (execmem_is_rox(execmem_type)) {
ptr = vzalloc(size);
int err = execmem_make_temp_rw(ptr, size);
if (!ptr) {
execmem_free(mod->mem[type].base);
if (err) {
}execmem_free(ptr); return -ENOMEM;
mod->mem[type].rw_copy = ptr;
mod->mem[type].is_rox = true;
} else {
mod->mem[type].rw_copy = mod->mem[type].base;
memset(mod->mem[type].base, 0, size);
}
/*
@@ -1280,6 +1262,9 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) */ kmemleak_not_leak(ptr);
- memset(ptr, 0, size);
- mod->mem[type].base = ptr;
- return 0;
}
@@ -1287,8 +1272,8 @@ static void module_memory_free(struct module *mod, enum mod_mem_type type) { struct module_memory *mem = &mod->mem[type];
- if (mem->is_rox)
vfree(mem->rw_copy);
if (mod->state == MODULE_STATE_UNFORMED && mem->is_rox)
execmem_restore_rox(mem->base, mem->size);
execmem_free(mem->base);
} @@ -2561,7 +2546,6 @@ static int move_module(struct module *mod, struct load_info *info) for_each_mod_mem_type(type) { if (!mod->mem[type].size) { mod->mem[type].base = NULL;
}mod->mem[type].rw_copy = NULL; continue;
@@ -2578,7 +2562,6 @@ static int move_module(struct module *mod, struct load_info *info) void *dest; Elf_Shdr *shdr = &info->sechdrs[i]; const char *sname;
unsigned long addr;
if (!(shdr->sh_flags & SHF_ALLOC)) continue;
@@ -2599,14 +2582,12 @@ static int move_module(struct module *mod, struct load_info *info) ret = PTR_ERR(dest); goto out_err; }
addr = (unsigned long)dest; codetag_section_found = true;
} else { enum mod_mem_type type = shdr->sh_entsize >> SH_ENTSIZE_TYPE_SHIFT; unsigned long offset = shdr->sh_entsize & SH_ENTSIZE_OFFSET_MASK;
addr = (unsigned long)mod->mem[type].base + offset;
dest = mod->mem[type].rw_copy + offset;
dest = mod->mem[type].base + offset;
}
if (shdr->sh_type != SHT_NOBITS) {
@@ -2629,7 +2610,7 @@ static int move_module(struct module *mod, struct load_info *info) * users of info can keep taking advantage and using the newly * minted official memory area. */
shdr->sh_addr = addr;
pr_debug("\t0x%lx 0x%.8lx %s\n", (long)shdr->sh_addr, (long)shdr->sh_size, info->secstrings + shdr->sh_name); }shdr->sh_addr = (unsigned long)dest;
@@ -2782,17 +2763,8 @@ int __weak module_finalize(const Elf_Ehdr *hdr, return 0; }
-int __weak module_post_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
-{
- return 0;
-}
static int post_relocation(struct module *mod, const struct load_info *info) {
- int ret;
- /* Sort exception table now relocations are done. */ sort_extable(mod->extable, mod->extable + mod->num_exentries);
@@ -2804,24 +2776,7 @@ static int post_relocation(struct module *mod, const struct load_info *info) add_kallsyms(mod, info);
/* Arch-specific module finalizing. */
- ret = module_finalize(info->hdr, info->sechdrs, mod);
- if (ret)
return ret;
- for_each_mod_mem_type(type) {
struct module_memory *mem = &mod->mem[type];
if (mem->is_rox) {
if (!execmem_update_copy(mem->base, mem->rw_copy,
mem->size))
return -ENOMEM;
vfree(mem->rw_copy);
mem->rw_copy = NULL;
}
- }
- return module_post_finalize(info->hdr, info->sechdrs, mod);
- return module_finalize(info->hdr, info->sechdrs, mod);
}
/* Call module constructors. */ diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c index 239e5013359d..ce47b6346f27 100644 --- a/kernel/module/strict_rwx.c +++ b/kernel/module/strict_rwx.c @@ -9,6 +9,7 @@ #include <linux/mm.h> #include <linux/vmalloc.h> #include <linux/set_memory.h> +#include <linux/execmem.h> #include "internal.h"
static int module_set_memory(const struct module *mod, enum mod_mem_type type, @@ -32,12 +33,12 @@ static int module_set_memory(const struct module *mod, enum mod_mem_type type, int module_enable_text_rox(const struct module *mod) { for_class_mod_mem_type(type, text) {
int ret;const struct module_memory *mem = &mod->mem[type];
if (mod->mem[type].is_rox)
continue;
if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
if (mem->is_rox)
ret = execmem_restore_rox(mem->base, mem->size);
else ret = module_set_memory(mod, type, set_memory_x);else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) ret = module_set_memory(mod, type, set_memory_rox);
-- 2.45.2