From: Jeff Xu jeffxu@chromium.org
Add vm.mfd_noexec. When the value is 1 (enabled), memfd_create syscall will created non-executable memfd.
The default value is 0 (disabled), admin can change the setting from 0 => 1, however 1 => 0 is not allowed, unless reboot.
Signed-off-by: Jeff Xu jeffxu@chromium.org --- include/linux/mm.h | 4 ++++ kernel/sysctl.c | 9 +++++++++ mm/memfd.c | 27 +++++++++++++++++++++++++++ 3 files changed, 40 insertions(+)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 7898e29bcfb5..1c66cf4aca11 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -203,6 +203,10 @@ extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; extern unsigned long sysctl_overcommit_kbytes;
+extern int sysctl_mfd_noexec_scope; +extern int mfd_noexec_dointvec_minmax(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); + int overcommit_ratio_handler(struct ctl_table *, int, void *, size_t *, loff_t *); int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b233714a1c78..54510da007ff 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2362,6 +2362,15 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = mmap_min_addr_handler, }, + { + .procname = "mfd_noexec", + .data = &sysctl_mfd_noexec_scope, + .maxlen = sizeof(sysctl_mfd_noexec_scope), + .mode = 0644, + .proc_handler = mfd_noexec_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, #endif #ifdef CONFIG_NUMA { diff --git a/mm/memfd.c b/mm/memfd.c index b841514eb0fd..c6ccb8481ed2 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -20,6 +20,11 @@ #include <linux/memfd.h> #include <uapi/linux/memfd.h>
+#define MFD_NOEXEC_SCOPE_DISABLED 0 +#define MFD_NOEXEC_SCOPE_ENABLED 1 + +int sysctl_mfd_noexec_scope __read_mostly = MFD_NOEXEC_SCOPE_DISABLED; + /* * We need a tag: a new tag would expand every xa_node by 8 bytes, * so reuse a tag which we firmly believe is never set or cleared on tmpfs @@ -275,6 +280,10 @@ SYSCALL_DEFINE2(memfd_create, char *name; long len;
+ if (sysctl_mfd_noexec_scope == MFD_NOEXEC_SCOPE_ENABLED) { + flags |= MFD_NOEXEC; + } + if (!(flags & MFD_HUGETLB)) { if (flags & ~(unsigned int)MFD_ALL_FLAGS) return -EINVAL; @@ -351,3 +360,21 @@ SYSCALL_DEFINE2(memfd_create, kfree(name); return error; } + +#ifdef CONFIG_SYSCTL +int mfd_noexec_dointvec_minmax(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table table_copy; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* Lock the max value if it ever gets set. */ + table_copy = *table; + if (*(int *)table_copy.data == *(int *)table_copy.extra2) + table_copy.extra1 = table_copy.extra2; + + return proc_dointvec_minmax(&table_copy, write, buffer, lenp, ppos); +} +#endif