As pointed out by Michael Ellerman, the ptrace ABI on powerpc does not allow or require the return code to be set on syscall entry when skipping the syscall. It will always return ENOSYS and the return code must be set on syscall exit.
This code does that, behaving more similarly to strace. It still sets the return code on entry, which is overridden on powerpc, and it will always repeat the same on exit. Also, on powerpc, the errno is not inverted, and depends on ccr.so being set.
This has been tested on powerpc and amd64.
Cc: Michael Ellerman mpe@ellerman.id.au Cc: Kees Cook keescook@google.com Signed-off-by: Thadeu Lima de Souza Cascardo cascardo@canonical.com --- tools/testing/selftests/seccomp/seccomp_bpf.c | 81 ++++++++++++------- 1 file changed, 53 insertions(+), 28 deletions(-)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7a6d40286a42..0ddc0846e9c0 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1837,15 +1837,24 @@ void change_syscall(struct __test_metadata *_metadata, #endif
/* If syscall is skipped, change return value. */ - if (syscall == -1) + if (syscall == -1) { #ifdef SYSCALL_NUM_RET_SHARE_REG TH_LOG("Can't modify syscall return on this architecture"); - #elif defined(__xtensa__) regs.SYSCALL_RET(regs) = result; +#elif defined(__powerpc__) + /* Error is signaled by CR0 SO bit and error code is positive. */ + if (result < 0) { + regs.SYSCALL_RET = -result; + regs.ccr |= 0x10000000; + } else { + regs.SYSCALL_RET = result; + regs.ccr &= ~0x10000000; + } #else regs.SYSCALL_RET = result; #endif + }
#ifdef HAVE_GETREGS ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); @@ -1897,12 +1906,44 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
}
+FIXTURE(TRACE_syscall) { + struct sock_fprog prog; + pid_t tracer, mytid, mypid, parent; +}; + +FIXTURE_VARIANT(TRACE_syscall) { + /* + * All of the SECCOMP_RET_TRACE behaviors can be tested with either + * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL. + * This indicates if we should use SECCOMP_RET_TRACE (false), or + * ptrace (true). + */ + bool use_ptrace; + + /* + * Some archs (like ppc) only support changing the return code during + * syscall exit when ptrace is used. As the syscall number might not + * be available anymore during syscall exit, it needs to be saved + * during syscall enter. + */ + int syscall_nr; +}; + +FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) { + .use_ptrace = true, +}; + +FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) { + .use_ptrace = false, +}; + void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, int status, void *args) { int ret, nr; unsigned long msg; static bool entry; + FIXTURE_VARIANT(TRACE_syscall) * variant = args;
/* * The traditional way to tell PTRACE_SYSCALL entry/exit @@ -1916,10 +1957,15 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY : PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
- if (!entry) + if (!entry && !variant) return;
- nr = get_syscall(_metadata, tracee); + if (entry) + nr = get_syscall(_metadata, tracee); + else if (variant) + nr = variant->syscall_nr; + if (variant) + variant->syscall_nr = nr;
if (nr == __NR_getpid) change_syscall(_metadata, tracee, __NR_getppid, 0); @@ -1929,29 +1975,6 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, change_syscall(_metadata, tracee, -1, -ESRCH); }
-FIXTURE(TRACE_syscall) { - struct sock_fprog prog; - pid_t tracer, mytid, mypid, parent; -}; - -FIXTURE_VARIANT(TRACE_syscall) { - /* - * All of the SECCOMP_RET_TRACE behaviors can be tested with either - * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL. - * This indicates if we should use SECCOMP_RET_TRACE (false), or - * ptrace (true). - */ - bool use_ptrace; -}; - -FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) { - .use_ptrace = true, -}; - -FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) { - .use_ptrace = false, -}; - FIXTURE_SETUP(TRACE_syscall) { struct sock_filter filter[] = { @@ -1992,7 +2015,9 @@ FIXTURE_SETUP(TRACE_syscall) self->tracer = setup_trace_fixture(_metadata, variant->use_ptrace ? tracer_ptrace : tracer_seccomp, - NULL, variant->use_ptrace); + variant->use_ptrace ? (void *) variant + : NULL, + variant->use_ptrace);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret);
On 9/11/20 12:10 PM, Thadeu Lima de Souza Cascardo wrote:
As pointed out by Michael Ellerman, the ptrace ABI on powerpc does not allow or require the return code to be set on syscall entry when skipping the syscall. It will always return ENOSYS and the return code must be set on syscall exit.
This code does that, behaving more similarly to strace. It still sets the return code on entry, which is overridden on powerpc, and it will always repeat the same on exit. Also, on powerpc, the errno is not inverted, and depends on ccr.so being set.
This has been tested on powerpc and amd64.
Cc: Michael Ellerman mpe@ellerman.id.au Cc: Kees Cook keescook@google.com Signed-off-by: Thadeu Lima de Souza Cascardo cascardo@canonical.com
tools/testing/selftests/seccomp/seccomp_bpf.c | 81 ++++++++++++------- 1 file changed, 53 insertions(+), 28 deletions(-)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7a6d40286a42..0ddc0846e9c0 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1837,15 +1837,24 @@ void change_syscall(struct __test_metadata *_metadata, #endif /* If syscall is skipped, change return value. */
- if (syscall == -1)
- if (syscall == -1) { #ifdef SYSCALL_NUM_RET_SHARE_REG TH_LOG("Can't modify syscall return on this architecture");
- #elif defined(__xtensa__) regs.SYSCALL_RET(regs) = result;
+#elif defined(__powerpc__)
/* Error is signaled by CR0 SO bit and error code is positive. */
if (result < 0) {
regs.SYSCALL_RET = -result;
regs.ccr |= 0x10000000;
} else {
regs.SYSCALL_RET = result;
regs.ccr &= ~0x10000000;
#else regs.SYSCALL_RET = result; #endif}
- }
#ifdef HAVE_GETREGS ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); @@ -1897,12 +1906,44 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, } +FIXTURE(TRACE_syscall) {
- struct sock_fprog prog;
- pid_t tracer, mytid, mypid, parent;
+};
+FIXTURE_VARIANT(TRACE_syscall) {
- /*
* All of the SECCOMP_RET_TRACE behaviors can be tested with either
* SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL.
* This indicates if we should use SECCOMP_RET_TRACE (false), or
* ptrace (true).
*/
- bool use_ptrace;
- /*
* Some archs (like ppc) only support changing the return code during
* syscall exit when ptrace is used. As the syscall number might not
* be available anymore during syscall exit, it needs to be saved
* during syscall enter.
*/
- int syscall_nr;
+};
+FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) {
- .use_ptrace = true,
+};
+FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) {
- .use_ptrace = false,
+};
- void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, int status, void *args) { int ret, nr; unsigned long msg; static bool entry;
- FIXTURE_VARIANT(TRACE_syscall) * variant = args;
/* * The traditional way to tell PTRACE_SYSCALL entry/exit @@ -1916,10 +1957,15 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY : PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
- if (!entry)
- if (!entry && !variant) return;
- nr = get_syscall(_metadata, tracee);
- if (entry)
nr = get_syscall(_metadata, tracee);
- else if (variant)
nr = variant->syscall_nr;
- if (variant)
variant->syscall_nr = nr;
if (nr == __NR_getpid) change_syscall(_metadata, tracee, __NR_getppid, 0); @@ -1929,29 +1975,6 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, change_syscall(_metadata, tracee, -1, -ESRCH); } -FIXTURE(TRACE_syscall) {
- struct sock_fprog prog;
- pid_t tracer, mytid, mypid, parent;
-};
-FIXTURE_VARIANT(TRACE_syscall) {
- /*
* All of the SECCOMP_RET_TRACE behaviors can be tested with either
* SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL.
* This indicates if we should use SECCOMP_RET_TRACE (false), or
* ptrace (true).
*/
- bool use_ptrace;
-};
-FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) {
- .use_ptrace = true,
-};
-FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) {
- .use_ptrace = false,
-};
- FIXTURE_SETUP(TRACE_syscall) { struct sock_filter filter[] = {
@@ -1992,7 +2015,9 @@ FIXTURE_SETUP(TRACE_syscall) self->tracer = setup_trace_fixture(_metadata, variant->use_ptrace ? tracer_ptrace : tracer_seccomp,
NULL, variant->use_ptrace);
variant->use_ptrace ? (void *) variant
: NULL,
variant->use_ptrace);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret);
Hi Kees,
If you want to take this through your tree:
Acked-by: Shuah Khan skhan@linuxfoundation.org
thanks, -- Shuah
On Fri, Sep 11, 2020 at 03:10:12PM -0300, Thadeu Lima de Souza Cascardo wrote:
As pointed out by Michael Ellerman, the ptrace ABI on powerpc does not allow or require the return code to be set on syscall entry when skipping the syscall. It will always return ENOSYS and the return code must be set on syscall exit.
This code does that, behaving more similarly to strace. It still sets the return code on entry, which is overridden on powerpc, and it will always repeat the same on exit. Also, on powerpc, the errno is not inverted, and depends on ccr.so being set.
This has been tested on powerpc and amd64.
This looks like two fixes in one, so this should be split. :)
tools/testing/selftests/seccomp/seccomp_bpf.c | 81 ++++++++++++------- 1 file changed, 53 insertions(+), 28 deletions(-)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7a6d40286a42..0ddc0846e9c0 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1837,15 +1837,24 @@ void change_syscall(struct __test_metadata *_metadata, #endif /* If syscall is skipped, change return value. */
- if (syscall == -1)
- if (syscall == -1) {
#ifdef SYSCALL_NUM_RET_SHARE_REG TH_LOG("Can't modify syscall return on this architecture");
#elif defined(__xtensa__) regs.SYSCALL_RET(regs) = result; +#elif defined(__powerpc__)
/* Error is signaled by CR0 SO bit and error code is positive. */
if (result < 0) {
regs.SYSCALL_RET = -result;
regs.ccr |= 0x10000000;
} else {
regs.SYSCALL_RET = result;
regs.ccr &= ~0x10000000;
}
#else regs.SYSCALL_RET = result; #endif
- }
I'll send a series soon that will include this bit, since I don't want to collect these kinds of arch-specific things in the functions. (And the xtensa one went in without my review!)
+FIXTURE(TRACE_syscall) {
- struct sock_fprog prog;
- pid_t tracer, mytid, mypid, parent;
+};
+FIXTURE_VARIANT(TRACE_syscall) {
- /*
* All of the SECCOMP_RET_TRACE behaviors can be tested with either
* SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL.
* This indicates if we should use SECCOMP_RET_TRACE (false), or
* ptrace (true).
*/
- bool use_ptrace;
- /*
* Some archs (like ppc) only support changing the return code during
* syscall exit when ptrace is used. As the syscall number might not
* be available anymore during syscall exit, it needs to be saved
* during syscall enter.
*/
- int syscall_nr;
This should be part of the fixture struct, not the variant.
+};
+FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) {
- .use_ptrace = true,
+};
+FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) {
- .use_ptrace = false,
+};
i.e. if a member isn't initialized in FIXTURE_VARIANT_ADD, it shouldn't be defined in FIXTURE_VARIANT. :)
void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, int status, void *args) { int ret, nr; unsigned long msg; static bool entry;
- FIXTURE_VARIANT(TRACE_syscall) * variant = args;
/* * The traditional way to tell PTRACE_SYSCALL entry/exit @@ -1916,10 +1957,15 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY : PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
- if (!entry)
- if (!entry && !variant) return;
- nr = get_syscall(_metadata, tracee);
- if (entry)
nr = get_syscall(_metadata, tracee);
- else if (variant)
nr = variant->syscall_nr;
- if (variant)
variant->syscall_nr = nr;
So, to be clear this is _only_ an issue for the ptrace side of things, yes? i.e. seccomp's setting of the return value will correct stick?
Kees Cook keescook@chromium.org writes:
On Fri, Sep 11, 2020 at 03:10:12PM -0300, Thadeu Lima de Souza Cascardo wrote:
...
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7a6d40286a42..0ddc0846e9c0 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1916,10 +1957,15 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY : PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
- if (!entry)
- if (!entry && !variant) return;
- nr = get_syscall(_metadata, tracee);
- if (entry)
nr = get_syscall(_metadata, tracee);
- else if (variant)
nr = variant->syscall_nr;
- if (variant)
variant->syscall_nr = nr;
So, to be clear this is _only_ an issue for the ptrace side of things, yes? i.e. seccomp's setting of the return value will correct stick?
Yes. There's a comment which (hopefully) explains the difference here:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch...
Which says:
static int do_seccomp(struct pt_regs *regs) { if (!test_thread_flag(TIF_SECCOMP)) return 0;
/* * The ABI we present to seccomp tracers is that r3 contains * the syscall return value and orig_gpr3 contains the first * syscall parameter. This is different to the ptrace ABI where * both r3 and orig_gpr3 contain the first syscall parameter. */ regs->gpr[3] = -ENOSYS;
cheers
linux-kselftest-mirror@lists.linaro.org