- Linux-kselftest-mirror - lists.linaro.org

[PATCH bpf 1/3] bpf, x32: Fix bug with ALU64 {LSH, RSH, ARSH} BPF_X shift by 0

by Luke Nelson

The current x32 BPF JIT for shift operations is not correct when the shift amount in a register is 0. The expected behavior is a no-op, whereas the current implementation changes bits in the destination register. The following example demonstrates the bug. The expected result of this program is 1, but the current JITed code returns 2. r0 = 1 r1 = 1 r2 = 0 r1 <<= r2 if r1 == 1 goto end r0 = 2 end: exit The bug is caused by an incorrect assumption by the JIT that a shift by 32 clear the register. On x32 however, shifts use the lower 5 bits of the source, making a shift by 32 equivalent to a shift by 0. This patch fixes the bug using double-precision shifts, which also simplifies the code. Fixes: 03f5781be2c7 ("bpf, x86_32: add eBPF JIT compiler for ia32") Co-developed-by: Xi Wang <xi.wang(a)gmail.com> Signed-off-by: Xi Wang <xi.wang(a)gmail.com> Signed-off-by: Luke Nelson <luke.r.nels(a)gmail.com> --- arch/x86/net/bpf_jit_comp32.c | 221 ++++------------------------------ 1 file changed, 23 insertions(+), 198 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index b29e82f190c7..f34ef513f4f9 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -724,9 +724,6 @@ static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[], { u8 *prog = *pprog; int cnt = 0; - static int jmp_label1 = -1; - static int jmp_label2 = -1; - static int jmp_label3 = -1; u8 dreg_lo = dstk ? IA32_EAX : dst_lo; u8 dreg_hi = dstk ? IA32_EDX : dst_hi; @@ -745,79 +742,23 @@ static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[], /* mov ecx,src_lo */ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); - /* cmp ecx,32 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); - /* Jumps when >= 32 */ - if (is_imm8(jmp_label(jmp_label1, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); - - /* < 32 */ - /* shl dreg_hi,cl */ - EMIT2(0xD3, add_1reg(0xE0, dreg_hi)); - /* mov ebx,dreg_lo */ - EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX)); + /* shld dreg_hi,dreg_lo,cl */ + EMIT3(0x0F, 0xA5, add_2reg(0xC0, dreg_hi, dreg_lo)); /* shl dreg_lo,cl */ EMIT2(0xD3, add_1reg(0xE0, dreg_lo)); - /* IA32_ECX = -IA32_ECX + 32 */ - /* neg ecx */ - EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); - /* add ecx,32 */ - EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); - - /* shr ebx,cl */ - EMIT2(0xD3, add_1reg(0xE8, IA32_EBX)); - /* or dreg_hi,ebx */ - EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX)); - - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); - - /* >= 32 */ - if (jmp_label1 == -1) - jmp_label1 = cnt; + /* if ecx >= 32, mov dreg_lo into dreg_hi and clear dreg_lo */ - /* cmp ecx,64 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); - /* Jumps when >= 64 */ - if (is_imm8(jmp_label(jmp_label2, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); + /* cmp ecx,32 */ + EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); + /* skip the next two instructions (4 bytes) when < 32 */ + EMIT2(IA32_JB, 4); - /* >= 32 && < 64 */ - /* sub ecx,32 */ - EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); - /* shl dreg_lo,cl */ - EMIT2(0xD3, add_1reg(0xE0, dreg_lo)); /* mov dreg_hi,dreg_lo */ EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo)); - /* xor dreg_lo,dreg_lo */ EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); - - /* >= 64 */ - if (jmp_label2 == -1) - jmp_label2 = cnt; - /* xor dreg_lo,dreg_lo */ - EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); - /* xor dreg_hi,dreg_hi */ - EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); - - if (jmp_label3 == -1) - jmp_label3 = cnt; - if (dstk) { /* mov dword ptr [ebp+off],dreg_lo */ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), @@ -836,9 +777,6 @@ static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[], { u8 *prog = *pprog; int cnt = 0; - static int jmp_label1 = -1; - static int jmp_label2 = -1; - static int jmp_label3 = -1; u8 dreg_lo = dstk ? IA32_EAX : dst_lo; u8 dreg_hi = dstk ? IA32_EDX : dst_hi; @@ -857,78 +795,22 @@ static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[], /* mov ecx,src_lo */ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); - /* cmp ecx,32 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); - /* Jumps when >= 32 */ - if (is_imm8(jmp_label(jmp_label1, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); - - /* < 32 */ - /* lshr dreg_lo,cl */ - EMIT2(0xD3, add_1reg(0xE8, dreg_lo)); - /* mov ebx,dreg_hi */ - EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); - /* ashr dreg_hi,cl */ + /* shrd dreg_lo,dreg_hi,cl */ + EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi)); + /* sar dreg_hi,cl */ EMIT2(0xD3, add_1reg(0xF8, dreg_hi)); - /* IA32_ECX = -IA32_ECX + 32 */ - /* neg ecx */ - EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); - /* add ecx,32 */ - EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); - - /* shl ebx,cl */ - EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); - /* or dreg_lo,ebx */ - EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); - - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); - - /* >= 32 */ - if (jmp_label1 == -1) - jmp_label1 = cnt; + /* if ecx >= 32, mov dreg_hi to dreg_lo and set/clear dreg_hi depending on sign */ - /* cmp ecx,64 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); - /* Jumps when >= 64 */ - if (is_imm8(jmp_label(jmp_label2, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); + /* cmp ecx,32 */ + EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); + /* skip the next two instructions (5 bytes) when < 32 */ + EMIT2(IA32_JB, 5); - /* >= 32 && < 64 */ - /* sub ecx,32 */ - EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); - /* ashr dreg_hi,cl */ - EMIT2(0xD3, add_1reg(0xF8, dreg_hi)); /* mov dreg_lo,dreg_hi */ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); - - /* ashr dreg_hi,imm8 */ - EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); - - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); - - /* >= 64 */ - if (jmp_label2 == -1) - jmp_label2 = cnt; - /* ashr dreg_hi,imm8 */ + /* sar dreg_hi,31 */ EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31); - /* mov dreg_lo,dreg_hi */ - EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); - - if (jmp_label3 == -1) - jmp_label3 = cnt; if (dstk) { /* mov dword ptr [ebp+off],dreg_lo */ @@ -948,9 +830,6 @@ static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, { u8 *prog = *pprog; int cnt = 0; - static int jmp_label1 = -1; - static int jmp_label2 = -1; - static int jmp_label3 = -1; u8 dreg_lo = dstk ? IA32_EAX : dst_lo; u8 dreg_hi = dstk ? IA32_EDX : dst_hi; @@ -969,77 +848,23 @@ static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk, /* mov ecx,src_lo */ EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX)); - /* cmp ecx,32 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); - /* Jumps when >= 32 */ - if (is_imm8(jmp_label(jmp_label1, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label1, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6)); - - /* < 32 */ - /* lshr dreg_lo,cl */ - EMIT2(0xD3, add_1reg(0xE8, dreg_lo)); - /* mov ebx,dreg_hi */ - EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX)); + /* shrd dreg_lo,dreg_hi,cl */ + EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi)); /* shr dreg_hi,cl */ EMIT2(0xD3, add_1reg(0xE8, dreg_hi)); - /* IA32_ECX = -IA32_ECX + 32 */ - /* neg ecx */ - EMIT2(0xF7, add_1reg(0xD8, IA32_ECX)); - /* add ecx,32 */ - EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32); - - /* shl ebx,cl */ - EMIT2(0xD3, add_1reg(0xE0, IA32_EBX)); - /* or dreg_lo,ebx */ - EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX)); + /* if ecx >= 32, mov dreg_hi to dreg_lo and clear dreg_hi */ - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); - - /* >= 32 */ - if (jmp_label1 == -1) - jmp_label1 = cnt; - /* cmp ecx,64 */ - EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64); - /* Jumps when >= 64 */ - if (is_imm8(jmp_label(jmp_label2, 2))) - EMIT2(IA32_JAE, jmp_label(jmp_label2, 2)); - else - EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6)); + /* cmp ecx,32 */ + EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32); + /* skip the next two instructions (4 bytes) when < 32 */ + EMIT2(IA32_JB, 4); - /* >= 32 && < 64 */ - /* sub ecx,32 */ - EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32); - /* shr dreg_hi,cl */ - EMIT2(0xD3, add_1reg(0xE8, dreg_hi)); /* mov dreg_lo,dreg_hi */ EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi)); /* xor dreg_hi,dreg_hi */ EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); - /* goto out; */ - if (is_imm8(jmp_label(jmp_label3, 2))) - EMIT2(0xEB, jmp_label(jmp_label3, 2)); - else - EMIT1_off32(0xE9, jmp_label(jmp_label3, 5)); - - /* >= 64 */ - if (jmp_label2 == -1) - jmp_label2 = cnt; - /* xor dreg_lo,dreg_lo */ - EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo)); - /* xor dreg_hi,dreg_hi */ - EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi)); - - if (jmp_label3 == -1) - jmp_label3 = cnt; - if (dstk) { /* mov dword ptr [ebp+off],dreg_lo */ EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo), -- 2.20.1

6 years

2
3
0 0

[PATCH v5 0/2] clocksource/drivers: Create new Hyper-V clocksource driver

by Michael Kelley

This patch series moves Hyper-V clock/timer code to a separate Hyper-V clocksource driver. Previously, Hyper-V clock/timer code and data structures were mixed in with other Hyper-V code in the ISA independent drivers/hv code as well as in ISA dependent code. The new Hyper-V clocksource driver is ISA agnostic, with a just few dependencies on ISA specific functions. The patch series does not change any behavior or functionality -- it only reorganizes the existing code and fixes up the linkages. A few places outside of Hyper-V code are fixed up to use the new #include file structure. This restructuring is in response to Marc Zyngier's review comments on supporting Hyper-V running on ARM64, and is a good idea in general. It increases the amount of code shared between the x86 and ARM64 architectures, and reduces the size of the new code for supporting Hyper-V on ARM64. A new version of the Hyper-V on ARM64 patches will follow once this clocksource restructuring is accepted. The code is diff'ed against the upstream tip tree: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/vdso Changes in v5: * Revised commit summaries [Thomas Gleixner] * Removed call to clockevents_unbind_device() [Thomas Gleixner] * Restructured hv_init_clocksource() [Thomas Gleixner] * Various other small code cleanups [Thomas Gleixner] Changes in v4: * Revised commit messages * Rebased to upstream tip tree Changes in v3: * Removed boolean argument to hv_init_clocksource(). Always call sched_clock_register, which is needed on ARM64 but a no-op on x86. * Removed separate cpuhp setup in hv_stimer_alloc() and instead directly call hv_stimer_init() and hv_stimer_cleanup() from corresponding VMbus functions. This more closely matches original code and avoids clocksource stop/restart problems on ARM64 when VMbus code denies CPU offlining request. Changes in v2: * Revised commit short descriptions so the distinction between the first and second patches is clearer [GregKH] * Renamed new clocksource driver files and functions to use existing "timer" and "stimer" names instead of introducing "syntimer". [Vitaly Kuznetsov] * Introduced CONFIG_HYPER_TIMER to fix build problem when CONFIG_HYPERV=m [Vitaly Kuznetsov] * Added "Suggested-by: Marc Zyngier" Michael Kelley (2): clocksource/drivers: Make Hyper-V clocksource ISA agnostic clocksource/drivers: Continue making Hyper-V clocksource ISA agnostic MAINTAINERS | 2 + arch/x86/entry/vdso/vma.c | 2 +- arch/x86/hyperv/hv_init.c | 91 +-------- arch/x86/include/asm/hyperv-tlfs.h | 6 + arch/x86/include/asm/mshyperv.h | 81 +------- arch/x86/include/asm/vdso/gettimeofday.h | 2 +- arch/x86/kernel/cpu/mshyperv.c | 4 +- arch/x86/kvm/x86.c | 1 + drivers/clocksource/Makefile | 1 + drivers/clocksource/hyperv_timer.c | 339 +++++++++++++++++++++++++++++++ drivers/hv/Kconfig | 3 + drivers/hv/hv.c | 156 +------------- drivers/hv/hv_util.c | 1 + drivers/hv/hyperv_vmbus.h | 3 - drivers/hv/vmbus_drv.c | 42 ++-- include/clocksource/hyperv_timer.h | 105 ++++++++++ 16 files changed, 503 insertions(+), 336 deletions(-) create mode 100644 drivers/clocksource/hyperv_timer.c create mode 100644 include/clocksource/hyperv_timer.h -- 1.8.3.1

6 years

2
4
0 0

[PATCH AUTOSEL 5.1 09/39] selftests/powerpc: Add test of fork with mapping above 512TB

by Sasha Levin

From: Michael Ellerman <mpe(a)ellerman.id.au> [ Upstream commit 16391bfc862342f285195013b73c1394fab28b97 ] This tests that when a process with a mapping above 512TB forks we correctly separate the parent and child address spaces. This exercises the bug in the context id handling fixed in the previous commit. Signed-off-by: Michael Ellerman <mpe(a)ellerman.id.au> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/powerpc/mm/.gitignore | 3 +- tools/testing/selftests/powerpc/mm/Makefile | 4 +- .../powerpc/mm/large_vm_fork_separation.c | 87 +++++++++++++++++++ 3 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index ba919308fe30..d503b8764a8e 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -3,4 +3,5 @@ subpage_prot tempfile prot_sao segv_errors -wild_bctr \ No newline at end of file +wild_bctr +large_vm_fork_separation \ No newline at end of file diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 43d68420e363..f1fbc15800c4 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -2,7 +2,8 @@ noarg: $(MAKE) -C ../ -TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr +TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ + large_vm_fork_separation TEST_GEN_FILES := tempfile top_srcdir = ../../../../.. @@ -13,6 +14,7 @@ $(TEST_GEN_PROGS): ../harness.c $(OUTPUT)/prot_sao: ../utils.c $(OUTPUT)/wild_bctr: CFLAGS += -m64 +$(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64 $(OUTPUT)/tempfile: dd if=/dev/zero of=$@ bs=64k count=1 diff --git a/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c b/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c new file mode 100644 index 000000000000..2363a7f3ab0d --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/large_vm_fork_separation.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Copyright 2019, Michael Ellerman, IBM Corp. +// +// Test that allocating memory beyond the memory limit and then forking is +// handled correctly, ie. the child is able to access the mappings beyond the +// memory limit and the child's writes are not visible to the parent. + +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "utils.h" + + +#ifndef MAP_FIXED_NOREPLACE +#define MAP_FIXED_NOREPLACE MAP_FIXED // "Should be safe" above 512TB +#endif + + +static int test(void) +{ + int p2c[2], c2p[2], rc, status, c, *p; + unsigned long page_size; + pid_t pid; + + page_size = sysconf(_SC_PAGESIZE); + SKIP_IF(page_size != 65536); + + // Create a mapping at 512TB to allocate an extended_id + p = mmap((void *)(512ul << 40), page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0); + if (p == MAP_FAILED) { + perror("mmap"); + printf("Error: couldn't mmap(), confirm kernel has 4TB support?\n"); + return 1; + } + + printf("parent writing %p = 1\n", p); + *p = 1; + + FAIL_IF(pipe(p2c) == -1 || pipe(c2p) == -1); + + pid = fork(); + if (pid == 0) { + FAIL_IF(read(p2c[0], &c, 1) != 1); + + pid = getpid(); + printf("child writing %p = %d\n", p, pid); + *p = pid; + + FAIL_IF(write(c2p[1], &c, 1) != 1); + FAIL_IF(read(p2c[0], &c, 1) != 1); + exit(0); + } + + c = 0; + FAIL_IF(write(p2c[1], &c, 1) != 1); + FAIL_IF(read(c2p[0], &c, 1) != 1); + + // Prevent compiler optimisation + barrier(); + + rc = 0; + printf("parent reading %p = %d\n", p, *p); + if (*p != 1) { + printf("Error: BUG! parent saw child's write! *p = %d\n", *p); + rc = 1; + } + + FAIL_IF(write(p2c[1], &c, 1) != 1); + FAIL_IF(waitpid(pid, &status, 0) == -1); + FAIL_IF(!WIFEXITED(status) || WEXITSTATUS(status)); + + if (rc == 0) + printf("success: test completed OK\n"); + + return rc; +} + +int main(void) +{ + return test_harness(test, "large_vm_fork_separation"); +} -- 2.20.1

6 years

1
0
0 0

[PATCHv2] selftests/net: skip psock_tpacket test if KALLSYMS was not enabled

by Po-Hsu Lin

The psock_tpacket test will need to access /proc/kallsyms, this would require the kernel config CONFIG_KALLSYMS to be enabled first. Apart from adding CONFIG_KALLSYMS to the net/config file here, check the file existence to determine if we can run this test will be helpful to avoid a false-positive test result when testing it directly with the following commad against a kernel that have CONFIG_KALLSYMS disabled: make -C tools/testing/selftests TARGETS=net run_tests Signed-off-by: Po-Hsu Lin <po-hsu.lin(a)canonical.com> --- tools/testing/selftests/net/config | 1 + tools/testing/selftests/net/run_afpackettests | 14 +++++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 4740404..3dea2cb 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -25,3 +25,4 @@ CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_KALLSYMS=y diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests index ea5938e..8b42e8b 100755 --- a/tools/testing/selftests/net/run_afpackettests +++ b/tools/testing/selftests/net/run_afpackettests @@ -21,12 +21,16 @@ fi echo "--------------------" echo "running psock_tpacket test" echo "--------------------" -./in_netns.sh ./psock_tpacket -if [ $? -ne 0 ]; then - echo "[FAIL]" - ret=1 +if [ -f /proc/kallsyms ]; then + ./in_netns.sh ./psock_tpacket + if [ $? -ne 0 ]; then + echo "[FAIL]" + ret=1 + else + echo "[PASS]" + fi else - echo "[PASS]" + echo "[SKIP] CONFIG_KALLSYMS not enabled" fi echo "--------------------" -- 2.7.4

6 years

3
2
0 0

[PATCH][next] selftests/x86: fix spelling mistake "FAILT" -> "FAIL"

by Colin King

From: Colin Ian King <colin.king(a)canonical.com> There is an spelling mistake in an a test error message. Fix it. Signed-off-by: Colin Ian King <colin.king(a)canonical.com> --- tools/testing/selftests/x86/test_vsyscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index 4602326b8f5b..a4f4d4cf22c3 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -451,7 +451,7 @@ static int test_vsys_x(void) printf("[OK]\tExecuting the vsyscall page failed: #PF(0x%lx)\n", segv_err); } else { - printf("[FAILT]\tExecution failed with the wrong error: #PF(0x%lx)\n", + printf("[FAIL]\tExecution failed with the wrong error: #PF(0x%lx)\n", segv_err); return 1; } -- 2.20.1

6 years

5
8
0 0

[PATCH] selftests/kselftest/runner.sh: Add 30 second timeout per test

by Anders Roxell

Commit a745f7af3cbd ("selftests/harness: Add 30 second timeout per test") solves that binary tests doesn't hang forever. However, scripts can still hang forever, this adds an timeout to each test script run. This assumes that an individual test doesn't take longer than 30 seconds. Signed-off-by: Anders Roxell <anders.roxell(a)linaro.org> --- tools/testing/selftests/kselftest/runner.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index 00c9020bdda8..cff7d2d83648 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -5,6 +5,7 @@ export skip_rc=4 export logfile=/dev/stdout export per_test_logging= +export TEST_TIMEOUT_DEFAULT=30 # There isn't a shell-agnostic way to find the path of a sourced file, # so we must rely on BASE_DIR being set to find other tools. @@ -24,6 +25,14 @@ tap_prefix() fi } +tap_timeout() +{ + if [ -x /usr/bin/timeout ] && [ -x "$BASENAME_TEST" ] \ + && file $BASENAME_TEST |grep -q "shell script"; then + echo -n "timeout $TEST_TIMEOUT_DEFAULT" + fi +} + run_one() { DIR="$1" @@ -44,7 +53,7 @@ run_one() echo "not ok $test_num $TEST_HDR_MSG" else cd `dirname $TEST` > /dev/null - (((((./$BASENAME_TEST 2>&1; echo $? >&3) | + ((((( tap_timeout ./$BASENAME_TEST 2>&1; echo $? >&3) | tap_prefix >&4) 3>&1) | (read xs; exit $xs)) 4>>"$logfile" && echo "ok $test_num $TEST_HDR_MSG") || -- 2.11.0

6 years

2
1
0 0

[RFC 1/3] rcu: Expedite the rcu quiescent state reporting if help needed

by Joel Fernandes (Google)

The t->rcu_read_unlock_special union's need_qs bit can be set by the scheduler tick (in rcu_flavor_sched_clock_irq) to indicate that help is needed from the rcu_read_unlock path. When this help arrives however, we can do better to speed up the quiescent state reporting which if rcu_read_unlock_special::need_qs is set might be quite urgent. Make use of this information in deciding when to do heavy-weight softirq raising where possible. Signed-off-by: Joel Fernandes (Google) <joel(a)joelfernandes.org> --- kernel/rcu/tree_plugin.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c588ef98efd3..bff6410fac06 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -622,7 +622,8 @@ static void rcu_read_unlock_special(struct task_struct *t) t->rcu_read_unlock_special.b.exp_hint = false; exp = (t->rcu_blocked_node && t->rcu_blocked_node->exp_tasks) || (rdp->grpmask & rnp->expmask) || - tick_nohz_full_cpu(rdp->cpu); + tick_nohz_full_cpu(rdp->cpu) || + t->rcu_read_unlock_special.b.need_qs; // Need to defer quiescent state until everything is enabled. if (irqs_were_disabled && use_softirq && (in_interrupt() || -- 2.22.0.410.gd8fdbe21b5-goog

6 years

5
11
0 0

[RFC] arm64: Detecting tagged addresses

by Andrew Murray

Hello, The proposed introduction of a relaxed ARM64 ABI [1] will allow tagged memory addresses to be passed through the user-kernel syscall ABI boundary. Tagged memory addresses are those which contain a non-zero top byte (the hardware has always ignored this top byte due to TCR_EL1.TBI0) and may be useful for features such as HWASan. To permit this relaxation a proposed patchset [2] strips the top byte (tag) from user provided memory addresses prior to use in kernel functions which require untagged addresses (for example comparasion/arithmetic of addresses). The author of this patchset relied on a variety of techniques [2] (such as grep, BUG_ON, sparse etc) to identify as many instances of possible where tags need to be stipped. To support this effort and to catch future regressions (e.g. in new syscalls or ioctls), I've devised an additional approach for detecting the use of tagged addresses in functions that do not want them. This approach makes use of Smatch [3] and is outlined in this RFC. Due to the ability of Smatch to do flow analysis I believe we can annotate the kernel in fewer places than a similar approach in sparse. I'm keen for feedback on the likely usefulness of this approach. We first add some new annotations that are exclusively consumed by Smatch: --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -19,6 +19,7 @@ # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) # define __rcu __attribute__((noderef, address_space(4))) +# define __untagged __attribute__((address_space(5))) # define __private __attribute__((noderef)) extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); @@ -45,6 +46,7 @@ extern void __chk_io_ptr(const volatile void __iomem *); ... The purpose of this annotation is to indicate in function prototypes that a given argument must not be a user tagged memory address. (The address space number isn't significant here and could be replaced with any other annotation that we get Smatch to understand). An example of how we use this annotation is as follows: --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2224,7 +2224,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, EXPORT_SYMBOL(get_unmapped_area); /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ -struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) +struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long __untagged addr) { struct rb_node *rb_node; struct vm_area_struct *vma; Thus our intent here is to flag up whenever addr is tagged. Specifically modifications I've made to Smatch to test this will flag an issue where all the following conditions are met: 1. the parameter is used in the function 2. the data in the parameter has originated or been derived from userspace (this relies on existing Smatch functionality to detect where data has come from) 3. the data's top byte is non-zero (via flow analysis to determine the range of values that it may be given the known call-tree). Due to the use of smatch and its flow-analysis we don't need to propogate the __untagged annotation up the call chain to the callers and their callers - thus allowing us to only annotate functions that actually does something with the address and it's only necessary if the function has the potential to receive user data. I.e. we only need to tag find_vma to catch an issue with mmap_region (because it calls count_vma_pages_range which calls find_vma_intersection which calls find_vma). Due to condition 3 above, the use of the existing untagged_addr macro (or anything that does something similar) will prevent smatch from producing a warning. Using a vanilla (v5.2-rc2) kernel, and a single find_vma annotation, smatch will produce the following warnings: mm/mmap.c:2227 find_vma() warn: Variable addr looks like a tagged address - it is not allowed here mm/mmap.c:2227 find_vma() warn: Variable addr looks like a tagged address - it is not allowed here mm/mmap.c:2227 find_vma() warn: Variable addr looks like a tagged address - it is not allowed here ... The warning is printed for each call site that calls find_vma with a tagged address from userspace. After 6 runs of smatch, 24 warnings are produced. The warnings are helpful in detecting issues, but not useful in providing enough information to debug the issue and find the offending functions higher up the call stack that call find_vma. Smatch is good at determining the ranges of values that can be passed to a function, but it doesn't keep track of how it determined those ranges - this makes it difficult to determine the offending function. However even this level of limited functionality is helpful - as once the kernel is initially sanitized of tagged addresses, then the use of smatch here can spot regressions and offending code identified via git aiaiai/bisect. Smatch builds a database which includes a table of functions, who they are called by and with what range of parameters. Smatch also provides a bunch of perl and python scripts which can be used to extract helpful information for example to produce a call tree for a given function. I've adapted these scripts such that for a given function (e.g. find_vma) it will show you the call tree where callers pass user data and where that data is tagged addresses. The output looks something like this: find_vma() - 0-u64max (1) kvm_arch_prepare_memory_region() - 0-u64max (1) __kvm_set_memory_region() - 0-u64max (1) kvm_set_memory_region() - 0-u64max (1) kvm_vm_ioctl_set_memory_region() - 0-u64max (1) hugetlb_get_unmapped_area() - 0-u64max (1) shm_get_unmapped_area() - 0-u64max (1) shm_get_unmapped_area() - 32785,40977,98321,106513,2097151-u64max[c] (1) ... In summary the following are found, note this currently unhelpfuly includes functions inbetween find_vma and the leaf functions: $ cat find_vma_tree_orig | sed -e 's/^[ \t]*//' | cut -d ' ' -f 1 | sort | uniq call_mmap() check_and_migrate_cma_pages() compat_ipv6_getsockopt() compat_sock_common_getsockopt() compat_tcp_getsockopt() count_vma_pages_range() __do_compat_sys_get_mempolicy() do_get_mempolicy() do_ioctl() do_mincore() do_mlock() do_mmap() do_mmap_pgoff() ... As you can see, this gives a good point in the right direction for hunting down callers of find_vma with tagged addresses. This can be further improved - the problem here is that for a given function, e.g. find_vma we look for callers where *any* of the parameters passed to find_vma are tagged addresses from userspace - i.e. not *just* the annotated parameter. This is also true for find_vma's callers' callers'. This results in the call tree having false positives. It *is* possible to track parameters (e.g. find_vma arg 1 comes from arg 3 of do_pages_stat_array etc), but this is limited as if functions modify the data then the tracking is stopped (however this can be fixed). After apply the patchset ([PATCH v16 00/16] arm64: untag user pointers passed to the kernel)[2] which untags user addresses, smatch indicates 11 issues. The call tree is reduced. After grep'ing the call tree output, there are some valid instances where untagging is needed, e.g: gntdev_ioctl_get_offset_for_vaddr() kvm_vm_ioctl_set_memory_region() privcmd_ioctl_mmap_batch() privcmd_ioctl_mmap_resource() __se_sys_brk() __se_sys_mremap() __se_sys_munmap() __se_sys_remap_file_pages() __se_sys_shmat() __se_sys_shmdt() __vm_munmap() ... An example of a false positve is do_mlock. We untag the address and pass that to apply_vma_lock_flags - however we also pass a length - because the length came from userspace and could have the top bits set - it's flagged. However with improved parameter tracking we can remove this false positive and similar. Prior to smatch I attempted a similar approach with sparse - however it seemed necessary to propogate the __untagged annotation in every function up the call tree, and resulted in adding the __untagged annotation to functions that would never get near user provided data. This leads to a littering of __untagged all over the kernel which doesn't seem appealing. Smatch is more capable, however it almost certainly won't pick up 100% of issues due to the difficulity of making flow analysis understand everything a compiler can. Is it likely to be acceptable to use the __untagged annotation in user-path functions that require untagged addresses across the kernel? Thanks, Andrew Murray [1] https://lkml.org/lkml/2019/6/13/534 [2] https://patchwork.kernel.org/cover/10989517/ [3] http://smatch.sourceforge.net/

6 years

3
3
0 0

[PATCH v6 00/19] Unify vDSOs across more architectures

by Vincenzo Frascino

vDSO (virtual dynamic shared object) is a mechanism that the Linux kernel provides as an alternative to system calls to reduce where possible the costs in terms of cycles. This is possible because certain syscalls like gettimeofday() do not write any data and return one or more values that are stored in the kernel, which makes relatively safe calling them directly as a library function. Even if the mechanism is pretty much standard, every architecture in the last few years ended up implementing their own vDSO library in the architectural code. The purpose of this patch-set is to identify the commonalities in between the architectures and try to consolidate the common code paths, starting with gettimeofday(). This implementation contains the following design choices: * Every architecture defines the arch specific code in an header in "asm/vdso/". * The generic implementation includes the arch specific one and lives in "lib/vdso". * The arch specific code for gettimeofday lives in "<arch path>/vdso/gettimeofday.c" and includes the generic code only. * The generic implementation of update_vsyscall and update_vsyscall_tz lives in kernel/vdso and provide the bindings that can be implemented by each architecture. * Each architecture provides its implementation of the bindings in "asm/vdso/vsyscall.h". * This approach allows to consolidate the common code in a single place with the benefit of avoiding code duplication. This implementation contains the portings to the common library for: arm64, compat mode for arm64, arm, mips, x86_64, x32, compat mode for x86_64 and i386. The mips porting has been tested on qemu for mips32el. A configuration to repeat the tests can be found at [4]. The x86_64 porting has been tested on an Intel Xeon 5120T based machine running Ubuntu 18.04 and using the Ubuntu provided defconfig. The i386 porting has been tested on qemu using the i386_defconfig configuration. Last but not least from this porting arm64, compat arm64, arm and mips gain the support for: * CLOCK_BOOTTIME that can be useful in certain scenarios since it keeps track of the time during sleep as well. * CLOCK_TAI that is like CLOCK_REALTIME, but uses the International Atomic Time (TAI) reference instead of UTC to avoid jumping on leap second updates. for both clock_gettime and clock_getres. The porting has been validated using the vdsotest test-suite [1] extended to cover all the clock ids [2]. A new test has been added to the linux kselftest in order to validate the newly added library. The porting has been benchmarked and the performance results are provided as part of this cover letter. To simplify the testing, a copy of the patchset on top of a recent linux tree can be found at [3] and [4]. [1] https://github.com/nathanlynch/vdsotest [2] https://github.com/fvincenzo/vdsotest [3] git://linux-arm.org/linux-vf.git vdso/v6 [4] git://linux-arm.org/linux-vf.git vdso-mips/v6 Changes: -------- v6: - Rebased on 5.2-rc2. - Added performance numbers. - Removed vdso_types.h. - Unified update_vsyscall and update_vsyscall_tz. - Reworked the kselftest included in this patchset. - Addressed review comments. v5: - Rebased on 5.0-rc7. - Added x86_64, compat mode for x86_64 and i386 portings. - Extended vDSO kselftest. - Addressed review comments. v4: - Rebased on 5.0-rc2. - Addressed review comments. - Disabled compat vdso on arm64 when the kernel is compiled with clang. v3: - Ported the latest fixes and optimizations done on the x86 architecture to the generic library. - Addressed review comments. - Improved the documentation of the interfaces. - Changed the HAVE_ARCH_TIMER config option to a more generic HAVE_HW_COUNTER. v2: - Added -ffixed-x18 to arm64 - Repleced occurrences of timeval and timespec - Modified datapage.h to be compliant with y2038 on all the architectures - Removed __u_vdso type Cc: Catalin Marinas <catalin.marinas(a)arm.com> Cc: Will Deacon <will.deacon(a)arm.com> Cc: Arnd Bergmann <arnd(a)arndb.de> Cc: Russell King <linux(a)armlinux.org.uk> Cc: Ralf Baechle <ralf(a)linux-mips.org> Cc: Paul Burton <paul.burton(a)mips.com> Cc: Daniel Lezcano <daniel.lezcano(a)linaro.org> Cc: Thomas Gleixner <tglx(a)linutronix.de> Cc: Mark Salyzyn <salyzyn(a)android.com> Cc: Peter Collingbourne <pcc(a)google.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Dmitry Safonov <0x7f454c46(a)gmail.com> Cc: Rasmus Villemoes <linux(a)rasmusvillemoes.dk> Cc: Huw Davies <huw(a)codeweavers.com> Signed-off-by: Vincenzo Frascino <vincenzo.frascino(a)arm.com> Performance Numbers: Linux 5.2.0-rc2 - Xeon Gold 5120T ====================================================== Unified vDSO: ------------- clock-gettime-monotonic: syscall: 342 nsec/call clock-gettime-monotonic: libc: 25 nsec/call clock-gettime-monotonic: vdso: 24 nsec/call clock-getres-monotonic: syscall: 296 nsec/call clock-getres-monotonic: libc: 296 nsec/call clock-getres-monotonic: vdso: 3 nsec/call clock-gettime-monotonic-coarse: syscall: 294 nsec/call clock-gettime-monotonic-coarse: libc: 5 nsec/call clock-gettime-monotonic-coarse: vdso: 5 nsec/call clock-getres-monotonic-coarse: syscall: 295 nsec/call clock-getres-monotonic-coarse: libc: 292 nsec/call clock-getres-monotonic-coarse: vdso: 5 nsec/call clock-gettime-monotonic-raw: syscall: 343 nsec/call clock-gettime-monotonic-raw: libc: 25 nsec/call clock-gettime-monotonic-raw: vdso: 23 nsec/call clock-getres-monotonic-raw: syscall: 290 nsec/call clock-getres-monotonic-raw: libc: 290 nsec/call clock-getres-monotonic-raw: vdso: 4 nsec/call clock-gettime-tai: syscall: 332 nsec/call clock-gettime-tai: libc: 24 nsec/call clock-gettime-tai: vdso: 23 nsec/call clock-getres-tai: syscall: 288 nsec/call clock-getres-tai: libc: 288 nsec/call clock-getres-tai: vdso: 3 nsec/call clock-gettime-boottime: syscall: 342 nsec/call clock-gettime-boottime: libc: 24 nsec/call clock-gettime-boottime: vdso: 23 nsec/call clock-getres-boottime: syscall: 284 nsec/call clock-getres-boottime: libc: 291 nsec/call clock-getres-boottime: vdso: 3 nsec/call clock-gettime-realtime: syscall: 337 nsec/call clock-gettime-realtime: libc: 24 nsec/call clock-gettime-realtime: vdso: 23 nsec/call clock-getres-realtime: syscall: 287 nsec/call clock-getres-realtime: libc: 284 nsec/call clock-getres-realtime: vdso: 3 nsec/call clock-gettime-realtime-coarse: syscall: 307 nsec/call clock-gettime-realtime-coarse: libc: 4 nsec/call clock-gettime-realtime-coarse: vdso: 4 nsec/call clock-getres-realtime-coarse: syscall: 294 nsec/call clock-getres-realtime-coarse: libc: 291 nsec/call clock-getres-realtime-coarse: vdso: 4 nsec/call getcpu: syscall: 246 nsec/call getcpu: libc: 14 nsec/call getcpu: vdso: 11 nsec/call gettimeofday: syscall: 293 nsec/call gettimeofday: libc: 26 nsec/call gettimeofday: vdso: 25 nsec/call Stock Kernel: ------------- clock-gettime-monotonic: syscall: 338 nsec/call clock-gettime-monotonic: libc: 24 nsec/call clock-gettime-monotonic: vdso: 23 nsec/call clock-getres-monotonic: syscall: 291 nsec/call clock-getres-monotonic: libc: 304 nsec/call clock-getres-monotonic: vdso: not tested Note: vDSO version of clock_getres not found clock-gettime-monotonic-coarse: syscall: 297 nsec/call clock-gettime-monotonic-coarse: libc: 5 nsec/call clock-gettime-monotonic-coarse: vdso: 4 nsec/call clock-getres-monotonic-coarse: syscall: 281 nsec/call clock-getres-monotonic-coarse: libc: 286 nsec/call clock-getres-monotonic-coarse: vdso: not tested Note: vDSO version of clock_getres not found clock-gettime-monotonic-raw: syscall: 336 nsec/call clock-gettime-monotonic-raw: libc: 340 nsec/call clock-gettime-monotonic-raw: vdso: 346 nsec/call clock-getres-monotonic-raw: syscall: 297 nsec/call clock-getres-monotonic-raw: libc: 301 nsec/call clock-getres-monotonic-raw: vdso: not tested Note: vDSO version of clock_getres not found clock-gettime-tai: syscall: 351 nsec/call clock-gettime-tai: libc: 24 nsec/call clock-gettime-tai: vdso: 23 nsec/call clock-getres-tai: syscall: 298 nsec/call clock-getres-tai: libc: 290 nsec/call clock-getres-tai: vdso: not tested Note: vDSO version of clock_getres not found clock-gettime-boottime: syscall: 342 nsec/call clock-gettime-boottime: libc: 347 nsec/call clock-gettime-boottime: vdso: 355 nsec/call clock-getres-boottime: syscall: 296 nsec/call clock-getres-boottime: libc: 295 nsec/call clock-getres-boottime: vdso: not tested Note: vDSO version of clock_getres not found clock-gettime-realtime: syscall: 346 nsec/call clock-gettime-realtime: libc: 24 nsec/call clock-gettime-realtime: vdso: 22 nsec/call clock-getres-realtime: syscall: 295 nsec/call clock-getres-realtime: libc: 291 nsec/call clock-getres-realtime: vdso: not tested Note: vDSO version of clock_getres not found clock-gettime-realtime-coarse: syscall: 292 nsec/call clock-gettime-realtime-coarse: libc: 5 nsec/call clock-gettime-realtime-coarse: vdso: 4 nsec/call clock-getres-realtime-coarse: syscall: 300 nsec/call clock-getres-realtime-coarse: libc: 301 nsec/call clock-getres-realtime-coarse: vdso: not tested Note: vDSO version of clock_getres not found getcpu: syscall: 252 nsec/call getcpu: libc: 14 nsec/call getcpu: vdso: 11 nsec/call gettimeofday: syscall: 293 nsec/call gettimeofday: libc: 24 nsec/call gettimeofday: vdso: 25 nsec/call Peter Collingbourne (1): arm64: Build vDSO with -ffixed-x18 Vincenzo Frascino (18): kernel: Standardize vdso_datapage kernel: Define gettimeofday vdso common code kernel: Unify update_vsyscall implementation arm64: Substitute gettimeofday with C implementation arm64: compat: Add missing syscall numbers arm64: compat: Expose signal related structures arm64: compat: Generate asm offsets for signals lib: vdso: Add compat support arm64: compat: Add vDSO arm64: Refactor vDSO code arm64: compat: vDSO setup for compat layer arm64: elf: vDSO code page discovery arm64: compat: Get sigreturn trampolines from vDSO arm64: Add vDSO compat support arm: Add support for generic vDSO mips: Add support for generic vDSO x86: Add support for generic vDSO kselftest: Extend vDSO selftest arch/arm/Kconfig | 3 + arch/arm/include/asm/vdso/gettimeofday.h | 96 +++++ arch/arm/include/asm/vdso/vsyscall.h | 71 ++++ arch/arm/include/asm/vdso_datapage.h | 29 +- arch/arm/kernel/vdso.c | 87 +---- arch/arm/vdso/Makefile | 13 +- arch/arm/vdso/note.c | 15 + arch/arm/vdso/vdso.lds.S | 2 + arch/arm/vdso/vgettimeofday.c | 268 +------------ arch/arm64/Kconfig | 3 + arch/arm64/Makefile | 23 +- arch/arm64/include/asm/elf.h | 14 + arch/arm64/include/asm/signal32.h | 46 +++ arch/arm64/include/asm/unistd.h | 5 + arch/arm64/include/asm/vdso.h | 3 + arch/arm64/include/asm/vdso/compat_barrier.h | 51 +++ .../include/asm/vdso/compat_gettimeofday.h | 108 ++++++ arch/arm64/include/asm/vdso/gettimeofday.h | 84 +++++ arch/arm64/include/asm/vdso/vsyscall.h | 53 +++ arch/arm64/include/asm/vdso_datapage.h | 48 --- arch/arm64/kernel/Makefile | 6 +- arch/arm64/kernel/asm-offsets.c | 39 +- arch/arm64/kernel/signal32.c | 72 ++-- arch/arm64/kernel/vdso.c | 356 ++++++++++++------ arch/arm64/kernel/vdso/Makefile | 34 +- arch/arm64/kernel/vdso/gettimeofday.S | 334 ---------------- arch/arm64/kernel/vdso/vgettimeofday.c | 28 ++ arch/arm64/kernel/vdso32/.gitignore | 2 + arch/arm64/kernel/vdso32/Makefile | 184 +++++++++ arch/arm64/kernel/vdso32/note.c | 15 + arch/arm64/kernel/vdso32/sigreturn.S | 62 +++ arch/arm64/kernel/vdso32/vdso.S | 19 + arch/arm64/kernel/vdso32/vdso.lds.S | 82 ++++ arch/arm64/kernel/vdso32/vgettimeofday.c | 59 +++ arch/mips/Kconfig | 2 + arch/mips/include/asm/vdso.h | 78 +--- arch/mips/include/asm/vdso/gettimeofday.h | 175 +++++++++ arch/mips/{ => include/asm}/vdso/vdso.h | 6 +- arch/mips/include/asm/vdso/vsyscall.h | 43 +++ arch/mips/kernel/vdso.c | 37 +- arch/mips/vdso/Makefile | 25 +- arch/mips/vdso/elf.S | 2 +- arch/mips/vdso/gettimeofday.c | 273 -------------- arch/mips/vdso/sigreturn.S | 2 +- arch/mips/vdso/vdso.lds.S | 4 + arch/mips/vdso/vgettimeofday.c | 57 +++ arch/x86/Kconfig | 3 + arch/x86/entry/vdso/Makefile | 9 + arch/x86/entry/vdso/vclock_gettime.c | 251 +++--------- arch/x86/entry/vdso/vdso.lds.S | 2 + arch/x86/entry/vdso/vdso32/vdso32.lds.S | 2 + arch/x86/entry/vdso/vdsox32.lds.S | 1 + arch/x86/entry/vsyscall/Makefile | 2 - arch/x86/entry/vsyscall/vsyscall_gtod.c | 83 ---- arch/x86/include/asm/mshyperv-tsc.h | 76 ++++ arch/x86/include/asm/mshyperv.h | 70 +--- arch/x86/include/asm/pvclock.h | 2 +- arch/x86/include/asm/vdso/gettimeofday.h | 203 ++++++++++ arch/x86/include/asm/vdso/vsyscall.h | 44 +++ arch/x86/include/asm/vgtod.h | 75 +--- arch/x86/include/asm/vvar.h | 7 +- arch/x86/kernel/pvclock.c | 1 + include/asm-generic/vdso/vsyscall.h | 56 +++ include/linux/hrtimer.h | 15 +- include/linux/hrtimer_defs.h | 25 ++ include/linux/timekeeper_internal.h | 9 + include/vdso/datapage.h | 91 +++++ include/vdso/helpers.h | 56 +++ include/vdso/vsyscall.h | 11 + kernel/Makefile | 1 + kernel/vdso/Makefile | 2 + kernel/vdso/vsyscall.c | 139 +++++++ lib/Kconfig | 5 + lib/vdso/Kconfig | 36 ++ lib/vdso/Makefile | 22 ++ lib/vdso/gettimeofday.c | 229 +++++++++++ tools/testing/selftests/vDSO/Makefile | 2 + tools/testing/selftests/vDSO/vdso_full_test.c | 261 +++++++++++++ 78 files changed, 3042 insertions(+), 1767 deletions(-) create mode 100644 arch/arm/include/asm/vdso/gettimeofday.h create mode 100644 arch/arm/include/asm/vdso/vsyscall.h create mode 100644 arch/arm/vdso/note.c create mode 100644 arch/arm64/include/asm/vdso/compat_barrier.h create mode 100644 arch/arm64/include/asm/vdso/compat_gettimeofday.h create mode 100644 arch/arm64/include/asm/vdso/gettimeofday.h create mode 100644 arch/arm64/include/asm/vdso/vsyscall.h delete mode 100644 arch/arm64/include/asm/vdso_datapage.h delete mode 100644 arch/arm64/kernel/vdso/gettimeofday.S create mode 100644 arch/arm64/kernel/vdso/vgettimeofday.c create mode 100644 arch/arm64/kernel/vdso32/.gitignore create mode 100644 arch/arm64/kernel/vdso32/Makefile create mode 100644 arch/arm64/kernel/vdso32/note.c create mode 100644 arch/arm64/kernel/vdso32/sigreturn.S create mode 100644 arch/arm64/kernel/vdso32/vdso.S create mode 100644 arch/arm64/kernel/vdso32/vdso.lds.S create mode 100644 arch/arm64/kernel/vdso32/vgettimeofday.c create mode 100644 arch/mips/include/asm/vdso/gettimeofday.h rename arch/mips/{ => include/asm}/vdso/vdso.h (90%) create mode 100644 arch/mips/include/asm/vdso/vsyscall.h delete mode 100644 arch/mips/vdso/gettimeofday.c create mode 100644 arch/mips/vdso/vgettimeofday.c delete mode 100644 arch/x86/entry/vsyscall/vsyscall_gtod.c create mode 100644 arch/x86/include/asm/mshyperv-tsc.h create mode 100644 arch/x86/include/asm/vdso/gettimeofday.h create mode 100644 arch/x86/include/asm/vdso/vsyscall.h create mode 100644 include/asm-generic/vdso/vsyscall.h create mode 100644 include/linux/hrtimer_defs.h create mode 100644 include/vdso/datapage.h create mode 100644 include/vdso/helpers.h create mode 100644 include/vdso/vsyscall.h create mode 100644 kernel/vdso/Makefile create mode 100644 kernel/vdso/vsyscall.c create mode 100644 lib/vdso/Kconfig create mode 100644 lib/vdso/Makefile create mode 100644 lib/vdso/gettimeofday.c create mode 100644 tools/testing/selftests/vDSO/vdso_full_test.c -- 2.21.0

6 years

11
68
0 0

[PATCH v4 0/2] Drivers: hv: Move Hyper-V clock/timer code to separate clocksource driver

by Michael Kelley

This patch series moves Hyper-V clock/timer code to a separate Hyper-V clocksource driver. Previously, Hyper-V clock/timer code and data structures were mixed in with other Hyper-V code in the ISA independent drivers/hv code as well as in ISA dependent code. The new Hyper-V clocksource driver is ISA independent, with a just few dependencies on ISA specific functions. The patch series does not change any behavior or functionality -- it only reorganizes the existing code and fixes up the linkages. A few places outside of Hyper-V code are fixed up to use the new #include file structure. This restructuring is in response to Marc Zyngier's review comments on supporting Hyper-V running on ARM64, and is a good idea in general. It increases the amount of code shared between the x86 and ARM64 architectures, and reduces the size of the new code for supporting Hyper-V on ARM64. A new version of the Hyper-V on ARM64 patches will follow once this clocksource restructuring is accepted. The code is diff'ed against the upstream tip tree: git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/vdso Changes in v4: * Revised commit messages * Rebased to upstream tip tree Changes in v3: * Removed boolean argument to hv_init_clocksource(). Always call sched_clock_register, which is needed on ARM64 but a no-op on x86. * Removed separate cpuhp setup in hv_stimer_alloc() and instead directly call hv_stimer_init() and hv_stimer_cleanup() from corresponding VMbus functions. This more closely matches original code and avoids clocksource stop/restart problems on ARM64 when VMbus code denies CPU offlining request. Changes in v2: * Revised commit short descriptions so the distinction between the first and second patches is clearer [GregKH] * Renamed new clocksource driver files and functions to use existing "timer" and "stimer" names instead of introducing "syntimer". [Vitaly Kuznetsov] * Introduced CONFIG_HYPER_TIMER to fix build problem when CONFIG_HYPERV=m [Vitaly Kuznetsov] * Added "Suggested-by: Marc Zyngier" Michael Kelley (2): Drivers: hv: Create Hyper-V clocksource driver from existing clockevents code Drivers: hv: Move Hyper-V clocksource code to new clocksource driver MAINTAINERS | 2 + arch/x86/entry/vdso/vma.c | 2 +- arch/x86/hyperv/hv_init.c | 91 +-------- arch/x86/include/asm/hyperv-tlfs.h | 6 + arch/x86/include/asm/mshyperv.h | 81 ++------ arch/x86/include/asm/vdso/gettimeofday.h | 2 +- arch/x86/kernel/cpu/mshyperv.c | 2 + arch/x86/kvm/x86.c | 1 + drivers/clocksource/Makefile | 1 + drivers/clocksource/hyperv_timer.c | 321 +++++++++++++++++++++++++++++++ drivers/hv/Kconfig | 3 + drivers/hv/hv.c | 156 +-------------- drivers/hv/hyperv_vmbus.h | 3 - drivers/hv/vmbus_drv.c | 42 ++-- include/clocksource/hyperv_timer.h | 105 ++++++++++ 15 files changed, 483 insertions(+), 335 deletions(-) create mode 100644 drivers/clocksource/hyperv_timer.c create mode 100644 include/clocksource/hyperv_timer.h -- 1.8.3.1

6 years

1
2
0 0

[PATCH 19/39] docs: blockdev: add it to the admin-guide

by Mauro Carvalho Chehab

The blockdev book basically contains user-faced documentation. Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung(a)kernel.org> --- .../blockdev/drbd/DRBD-8.3-data-packets.svg | 0 .../blockdev/drbd/DRBD-data-packets.svg | 0 .../blockdev/drbd/conn-states-8.dot | 0 .../blockdev/drbd/data-structure-v9.rst | 0 .../blockdev/drbd/disk-states-8.dot | 0 .../drbd/drbd-connection-state-overview.dot | 0 .../blockdev/drbd/figures.rst | 0 .../{ => admin-guide}/blockdev/drbd/index.rst | 0 .../blockdev/drbd/node-states-8.dot | 1 - .../{ => admin-guide}/blockdev/floppy.rst | 0 .../{ => admin-guide}/blockdev/index.rst | 2 -- .../{ => admin-guide}/blockdev/nbd.rst | 0 .../{ => admin-guide}/blockdev/paride.rst | 0 .../{ => admin-guide}/blockdev/ramdisk.rst | 0 .../{ => admin-guide}/blockdev/zram.rst | 0 Documentation/admin-guide/index.rst | 1 + .../admin-guide/kernel-parameters.txt | 18 +++++++++--------- MAINTAINERS | 10 +++++----- drivers/block/Kconfig | 8 ++++---- drivers/block/floppy.c | 2 +- drivers/block/zram/Kconfig | 6 +++--- tools/testing/selftests/zram/README | 2 +- 22 files changed, 24 insertions(+), 26 deletions(-) rename Documentation/{ => admin-guide}/blockdev/drbd/DRBD-8.3-data-packets.svg (100%) rename Documentation/{ => admin-guide}/blockdev/drbd/DRBD-data-packets.svg (100%) rename Documentation/{ => admin-guide}/blockdev/drbd/conn-states-8.dot (100%) rename Documentation/{ => admin-guide}/blockdev/drbd/data-structure-v9.rst (100%) rename Documentation/{ => admin-guide}/blockdev/drbd/disk-states-8.dot (100%) rename Documentation/{ => admin-guide}/blockdev/drbd/drbd-connection-state-overview.dot (100%) rename Documentation/{ => admin-guide}/blockdev/drbd/figures.rst (100%) rename Documentation/{ => admin-guide}/blockdev/drbd/index.rst (100%) rename Documentation/{ => admin-guide}/blockdev/drbd/node-states-8.dot (99%) rename Documentation/{ => admin-guide}/blockdev/floppy.rst (100%) rename Documentation/{ => admin-guide}/blockdev/index.rst (94%) rename Documentation/{ => admin-guide}/blockdev/nbd.rst (100%) rename Documentation/{ => admin-guide}/blockdev/paride.rst (100%) rename Documentation/{ => admin-guide}/blockdev/ramdisk.rst (100%) rename Documentation/{ => admin-guide}/blockdev/zram.rst (100%) diff --git a/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg b/Documentation/admin-guide/blockdev/drbd/DRBD-8.3-data-packets.svg similarity index 100% rename from Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg rename to Documentation/admin-guide/blockdev/drbd/DRBD-8.3-data-packets.svg diff --git a/Documentation/blockdev/drbd/DRBD-data-packets.svg b/Documentation/admin-guide/blockdev/drbd/DRBD-data-packets.svg similarity index 100% rename from Documentation/blockdev/drbd/DRBD-data-packets.svg rename to Documentation/admin-guide/blockdev/drbd/DRBD-data-packets.svg diff --git a/Documentation/blockdev/drbd/conn-states-8.dot b/Documentation/admin-guide/blockdev/drbd/conn-states-8.dot similarity index 100% rename from Documentation/blockdev/drbd/conn-states-8.dot rename to Documentation/admin-guide/blockdev/drbd/conn-states-8.dot diff --git a/Documentation/blockdev/drbd/data-structure-v9.rst b/Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst similarity index 100% rename from Documentation/blockdev/drbd/data-structure-v9.rst rename to Documentation/admin-guide/blockdev/drbd/data-structure-v9.rst diff --git a/Documentation/blockdev/drbd/disk-states-8.dot b/Documentation/admin-guide/blockdev/drbd/disk-states-8.dot similarity index 100% rename from Documentation/blockdev/drbd/disk-states-8.dot rename to Documentation/admin-guide/blockdev/drbd/disk-states-8.dot diff --git a/Documentation/blockdev/drbd/drbd-connection-state-overview.dot b/Documentation/admin-guide/blockdev/drbd/drbd-connection-state-overview.dot similarity index 100% rename from Documentation/blockdev/drbd/drbd-connection-state-overview.dot rename to Documentation/admin-guide/blockdev/drbd/drbd-connection-state-overview.dot diff --git a/Documentation/blockdev/drbd/figures.rst b/Documentation/admin-guide/blockdev/drbd/figures.rst similarity index 100% rename from Documentation/blockdev/drbd/figures.rst rename to Documentation/admin-guide/blockdev/drbd/figures.rst diff --git a/Documentation/blockdev/drbd/index.rst b/Documentation/admin-guide/blockdev/drbd/index.rst similarity index 100% rename from Documentation/blockdev/drbd/index.rst rename to Documentation/admin-guide/blockdev/drbd/index.rst diff --git a/Documentation/blockdev/drbd/node-states-8.dot b/Documentation/admin-guide/blockdev/drbd/node-states-8.dot similarity index 99% rename from Documentation/blockdev/drbd/node-states-8.dot rename to Documentation/admin-guide/blockdev/drbd/node-states-8.dot index 4a2b00c23547..bfa54e1f8016 100644 --- a/Documentation/blockdev/drbd/node-states-8.dot +++ b/Documentation/admin-guide/blockdev/drbd/node-states-8.dot @@ -11,4 +11,3 @@ digraph peer_states { Unknown -> Primary [ label = "connected" ] Unknown -> Secondary [ label = "connected" ] } - diff --git a/Documentation/blockdev/floppy.rst b/Documentation/admin-guide/blockdev/floppy.rst similarity index 100% rename from Documentation/blockdev/floppy.rst rename to Documentation/admin-guide/blockdev/floppy.rst diff --git a/Documentation/blockdev/index.rst b/Documentation/admin-guide/blockdev/index.rst similarity index 94% rename from Documentation/blockdev/index.rst rename to Documentation/admin-guide/blockdev/index.rst index a9af6ed8b4aa..20a738d9d047 100644 --- a/Documentation/blockdev/index.rst +++ b/Documentation/admin-guide/blockdev/index.rst @@ -1,5 +1,3 @@ -:orphan: - =========================== The Linux RapidIO Subsystem =========================== diff --git a/Documentation/blockdev/nbd.rst b/Documentation/admin-guide/blockdev/nbd.rst similarity index 100% rename from Documentation/blockdev/nbd.rst rename to Documentation/admin-guide/blockdev/nbd.rst diff --git a/Documentation/blockdev/paride.rst b/Documentation/admin-guide/blockdev/paride.rst similarity index 100% rename from Documentation/blockdev/paride.rst rename to Documentation/admin-guide/blockdev/paride.rst diff --git a/Documentation/blockdev/ramdisk.rst b/Documentation/admin-guide/blockdev/ramdisk.rst similarity index 100% rename from Documentation/blockdev/ramdisk.rst rename to Documentation/admin-guide/blockdev/ramdisk.rst diff --git a/Documentation/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst similarity index 100% rename from Documentation/blockdev/zram.rst rename to Documentation/admin-guide/blockdev/zram.rst diff --git a/Documentation/admin-guide/index.rst b/Documentation/admin-guide/index.rst index 65e821a03aca..c073af461cdf 100644 --- a/Documentation/admin-guide/index.rst +++ b/Documentation/admin-guide/index.rst @@ -73,6 +73,7 @@ configure specific aspects of kernel behavior to your liking. java ras bcache + blockdev/index ext4 pm/index thunderbolt diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 9b535c0e22f3..49ad034c4675 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1249,7 +1249,7 @@ See also Documentation/fault-injection/. floppy= [HW] - See Documentation/blockdev/floppy.rst. + See Documentation/admin-guide/blockdev/floppy.rst. force_pal_cache_flush [IA-64] Avoid check_sal_cache_flush which may hang on @@ -2247,7 +2247,7 @@ memblock=debug [KNL] Enable memblock debug messages. load_ramdisk= [RAM] List of ramdisks to load from floppy - See Documentation/blockdev/ramdisk.rst. + See Documentation/admin-guide/blockdev/ramdisk.rst. lockd.nlm_grace_period=P [NFS] Assign grace period. Format: <integer> @@ -3294,7 +3294,7 @@ pcd. [PARIDE] See header of drivers/block/paride/pcd.c. - See also Documentation/blockdev/paride.rst. + See also Documentation/admin-guide/blockdev/paride.rst. pci=option[,option...] [PCI] various PCI subsystem options. @@ -3538,7 +3538,7 @@ needed on a platform with proper driver support. pd. [PARIDE] - See Documentation/blockdev/paride.rst. + See Documentation/admin-guide/blockdev/paride.rst. pdcchassis= [PARISC,HW] Disable/Enable PDC Chassis Status codes at boot time. @@ -3553,10 +3553,10 @@ and performance comparison. pf. [PARIDE] - See Documentation/blockdev/paride.rst. + See Documentation/admin-guide/blockdev/paride.rst. pg. [PARIDE] - See Documentation/blockdev/paride.rst. + See Documentation/admin-guide/blockdev/paride.rst. pirq= [SMP,APIC] Manual mp-table setup See Documentation/x86/i386/IO-APIC.rst. @@ -3668,7 +3668,7 @@ prompt_ramdisk= [RAM] List of RAM disks to prompt for floppy disk before loading. - See Documentation/blockdev/ramdisk.rst. + See Documentation/admin-guide/blockdev/ramdisk.rst. psi= [KNL] Enable or disable pressure stall information tracking. @@ -3690,7 +3690,7 @@ pstore.backend= Specify the name of the pstore backend to use pt. [PARIDE] - See Documentation/blockdev/paride.rst. + See Documentation/admin-guide/blockdev/paride.rst. pti= [X86_64] Control Page Table Isolation of user and kernel address spaces. Disabling this feature @@ -3719,7 +3719,7 @@ See Documentation/admin-guide/md.rst. ramdisk_size= [RAM] Sizes of RAM disks in kilobytes - See Documentation/blockdev/ramdisk.rst. + See Documentation/admin-guide/blockdev/ramdisk.rst. random.trust_cpu={on,off} [KNL] Enable or disable trusting the use of the diff --git a/MAINTAINERS b/MAINTAINERS index 4c622a19ab7d..3f0f654d1166 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4974,7 +4974,7 @@ T: git git://git.linbit.com/drbd-8.4.git S: Supported F: drivers/block/drbd/ F: lib/lru_cache.c -F: Documentation/blockdev/drbd/ +F: Documentation/admin-guide/blockdev/ DRIVER CORE, KOBJECTS, DEBUGFS AND SYSFS M: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> @@ -11024,7 +11024,7 @@ M: Josef Bacik <josef(a)toxicpanda.com> S: Maintained L: linux-block(a)vger.kernel.org L: nbd(a)other.debian.org -F: Documentation/blockdev/nbd.rst +F: Documentation/admin-guide/blockdev/nbd.rst F: drivers/block/nbd.c F: include/trace/events/nbd.h F: include/uapi/linux/nbd.h @@ -12028,7 +12028,7 @@ PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES M: Tim Waugh <tim(a)cyberelk.net> L: linux-parport(a)lists.infradead.org (subscribers-only) S: Maintained -F: Documentation/blockdev/paride.rst +F: Documentation/admin-guide/blockdev/paride.rst F: drivers/block/paride/ PARISC ARCHITECTURE @@ -13310,7 +13310,7 @@ F: drivers/net/wireless/ralink/rt2x00/ RAMDISK RAM BLOCK DEVICE DRIVER M: Jens Axboe <axboe(a)kernel.dk> S: Maintained -F: Documentation/blockdev/ramdisk.rst +F: Documentation/admin-guide/blockdev/ramdisk.rst F: drivers/block/brd.c RANCHU VIRTUAL BOARD FOR MIPS @@ -17672,7 +17672,7 @@ R: Sergey Senozhatsky <sergey.senozhatsky.work(a)gmail.com> L: linux-kernel(a)vger.kernel.org S: Maintained F: drivers/block/zram/ -F: Documentation/blockdev/zram.rst +F: Documentation/admin-guide/blockdev/zram.rst ZS DECSTATION Z85C30 SERIAL DRIVER M: "Maciej W. Rozycki" <macro(a)linux-mips.org> diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index c43690b973d8..1bb8ec575352 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -31,7 +31,7 @@ config BLK_DEV_FD If you want to use the floppy disk drive(s) of your PC under Linux, say Y. Information about this driver, especially important for IBM Thinkpad users, is contained in - <file:Documentation/blockdev/floppy.rst>. + <file:Documentation/admin-guide/blockdev/floppy.rst>. That file also contains the location of the Floppy driver FAQ as well as location of the fdutils package used to configure additional parameters of the driver at run time. @@ -96,7 +96,7 @@ config PARIDE your computer's parallel port. Most of them are actually IDE devices using a parallel port IDE adapter. This option enables the PARIDE subsystem which contains drivers for many of these external drives. - Read <file:Documentation/blockdev/paride.rst> for more information. + Read <file:Documentation/admin-guide/blockdev/paride.rst> for more information. If you have said Y to the "Parallel-port support" configuration option, you may share a single port between your printer and other @@ -261,7 +261,7 @@ config BLK_DEV_NBD userland (making server and client physically the same computer, communicating using the loopback network device). - Read <file:Documentation/blockdev/nbd.rst> for more information, + Read <file:Documentation/admin-guide/blockdev/nbd.rst> for more information, especially about where to find the server code, which runs in user space and does not need special kernel support. @@ -303,7 +303,7 @@ config BLK_DEV_RAM during the initial install of Linux. Note that the kernel command line option "ramdisk=XX" is now obsolete. - For details, read <file:Documentation/blockdev/ramdisk.rst>. + For details, read <file:Documentation/admin-guide/blockdev/ramdisk.rst>. To compile this driver as a module, choose M here: the module will be called brd. An alias "rd" has been defined diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 5c99e52f9dc1..f652c1ac3ae9 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4424,7 +4424,7 @@ static int __init floppy_setup(char *str) pr_cont("\n"); } else DPRINT("botched floppy option\n"); - DPRINT("Read Documentation/blockdev/floppy.rst\n"); + DPRINT("Read Documentation/admin-guide/blockdev/floppy.rst\n"); return 0; } diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index e06b99d54816..fe7a4b7d30cf 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -12,7 +12,7 @@ config ZRAM It has several use cases, for example: /tmp storage, use as swap disks and maybe many more. - See Documentation/blockdev/zram.rst for more information. + See Documentation/admin-guide/blockdev/zram.rst for more information. config ZRAM_WRITEBACK bool "Write back incompressible or idle page to backing device" @@ -26,7 +26,7 @@ config ZRAM_WRITEBACK With /sys/block/zramX/{idle,writeback}, application could ask idle page's writeback to the backing device to save in memory. - See Documentation/blockdev/zram.rst for more information. + See Documentation/admin-guide/blockdev/zram.rst for more information. config ZRAM_MEMORY_TRACKING bool "Track zRam block status" @@ -36,4 +36,4 @@ config ZRAM_MEMORY_TRACKING of zRAM. Admin could see the information via /sys/kernel/debug/zram/zramX/block_state. - See Documentation/blockdev/zram.rst for more information. + See Documentation/admin-guide/blockdev/zram.rst for more information. diff --git a/tools/testing/selftests/zram/README b/tools/testing/selftests/zram/README index 5fa378391d3b..110b34834a6f 100644 --- a/tools/testing/selftests/zram/README +++ b/tools/testing/selftests/zram/README @@ -37,4 +37,4 @@ Commands required for testing: - mkfs/ mkfs.ext4 For more information please refer: -kernel-source-tree/Documentation/blockdev/zram.rst +kernel-source-tree/Documentation/admin-guide/blockdev/zram.rst -- 2.21.0

6 years

1
0
0 0

[PATCH 37/43] docs: blockdev: convert to ReST

by Mauro Carvalho Chehab

Rename the blockdev documentation files to ReST, add an index for them and adjust in order to produce a nice html output via the Sphinx build system. The drbd sub-directory contains some graphs and data flows. Add those too to the documentation. At its new index.rst, let's add a :orphan: while this is not linked to the main index.rst file, in order to avoid build warnings. Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung(a)kernel.org> --- .../admin-guide/kernel-parameters.txt | 18 +- ...structure-v9.txt => data-structure-v9.rst} | 6 +- Documentation/blockdev/drbd/figures.rst | 28 +++ .../blockdev/drbd/{README.txt => index.rst} | 15 +- .../blockdev/{floppy.txt => floppy.rst} | 88 ++++---- Documentation/blockdev/index.rst | 16 ++ Documentation/blockdev/{nbd.txt => nbd.rst} | 2 +- .../blockdev/{paride.txt => paride.rst} | 194 +++++++++-------- .../blockdev/{ramdisk.txt => ramdisk.rst} | 55 ++--- Documentation/blockdev/{zram.txt => zram.rst} | 195 ++++++++++++------ MAINTAINERS | 8 +- drivers/block/Kconfig | 8 +- drivers/block/floppy.c | 2 +- drivers/block/zram/Kconfig | 6 +- tools/testing/selftests/zram/README | 2 +- 15 files changed, 398 insertions(+), 245 deletions(-) rename Documentation/blockdev/drbd/{data-structure-v9.txt => data-structure-v9.rst} (94%) create mode 100644 Documentation/blockdev/drbd/figures.rst rename Documentation/blockdev/drbd/{README.txt => index.rst} (55%) rename Documentation/blockdev/{floppy.txt => floppy.rst} (81%) create mode 100644 Documentation/blockdev/index.rst rename Documentation/blockdev/{nbd.txt => nbd.rst} (96%) rename Documentation/blockdev/{paride.txt => paride.rst} (81%) rename Documentation/blockdev/{ramdisk.txt => ramdisk.rst} (84%) rename Documentation/blockdev/{zram.txt => zram.rst} (76%) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 92d335837c52..7ed94527a4a0 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1249,7 +1249,7 @@ See also Documentation/fault-injection/. floppy= [HW] - See Documentation/blockdev/floppy.txt. + See Documentation/blockdev/floppy.rst. force_pal_cache_flush [IA-64] Avoid check_sal_cache_flush which may hang on @@ -2247,7 +2247,7 @@ memblock=debug [KNL] Enable memblock debug messages. load_ramdisk= [RAM] List of ramdisks to load from floppy - See Documentation/blockdev/ramdisk.txt. + See Documentation/blockdev/ramdisk.rst. lockd.nlm_grace_period=P [NFS] Assign grace period. Format: <integer> @@ -3294,7 +3294,7 @@ pcd. [PARIDE] See header of drivers/block/paride/pcd.c. - See also Documentation/blockdev/paride.txt. + See also Documentation/blockdev/paride.rst. pci=option[,option...] [PCI] various PCI subsystem options. @@ -3538,7 +3538,7 @@ needed on a platform with proper driver support. pd. [PARIDE] - See Documentation/blockdev/paride.txt. + See Documentation/blockdev/paride.rst. pdcchassis= [PARISC,HW] Disable/Enable PDC Chassis Status codes at boot time. @@ -3553,10 +3553,10 @@ and performance comparison. pf. [PARIDE] - See Documentation/blockdev/paride.txt. + See Documentation/blockdev/paride.rst. pg. [PARIDE] - See Documentation/blockdev/paride.txt. + See Documentation/blockdev/paride.rst. pirq= [SMP,APIC] Manual mp-table setup See Documentation/x86/i386/IO-APIC.rst. @@ -3668,7 +3668,7 @@ prompt_ramdisk= [RAM] List of RAM disks to prompt for floppy disk before loading. - See Documentation/blockdev/ramdisk.txt. + See Documentation/blockdev/ramdisk.rst. psi= [KNL] Enable or disable pressure stall information tracking. @@ -3690,7 +3690,7 @@ pstore.backend= Specify the name of the pstore backend to use pt. [PARIDE] - See Documentation/blockdev/paride.txt. + See Documentation/blockdev/paride.rst. pti= [X86_64] Control Page Table Isolation of user and kernel address spaces. Disabling this feature @@ -3719,7 +3719,7 @@ See Documentation/admin-guide/md.rst. ramdisk_size= [RAM] Sizes of RAM disks in kilobytes - See Documentation/blockdev/ramdisk.txt. + See Documentation/blockdev/ramdisk.rst. random.trust_cpu={on,off} [KNL] Enable or disable trusting the use of the diff --git a/Documentation/blockdev/drbd/data-structure-v9.txt b/Documentation/blockdev/drbd/data-structure-v9.rst similarity index 94% rename from Documentation/blockdev/drbd/data-structure-v9.txt rename to Documentation/blockdev/drbd/data-structure-v9.rst index 1e52a0e32624..66036b901644 100644 --- a/Documentation/blockdev/drbd/data-structure-v9.txt +++ b/Documentation/blockdev/drbd/data-structure-v9.rst @@ -1,3 +1,7 @@ +================================ +kernel data structure for DRBD-9 +================================ + This describes the in kernel data structure for DRBD-9. Starting with Linux v3.14 we are reorganizing DRBD to use this data structure. @@ -10,7 +14,7 @@ device is represented by a block device locally. The DRBD objects are interconnected to form a matrix as depicted below; a drbd_peer_device object sits at each intersection between a drbd_device and a -drbd_connection: +drbd_connection:: /--------------+---------------+.....+---------------\ | resource | device | | device | diff --git a/Documentation/blockdev/drbd/figures.rst b/Documentation/blockdev/drbd/figures.rst new file mode 100644 index 000000000000..3e3fd4b8a478 --- /dev/null +++ b/Documentation/blockdev/drbd/figures.rst @@ -0,0 +1,28 @@ +.. The here included files are intended to help understand the implementation + +Data flows that Relate some functions, and write packets +======================================================== + +.. kernel-figure:: DRBD-8.3-data-packets.svg + :alt: DRBD-8.3-data-packets.svg + :align: center + +.. kernel-figure:: DRBD-data-packets.svg + :alt: DRBD-data-packets.svg + :align: center + + +Sub graphs of DRBD's state transitions +====================================== + +.. kernel-figure:: conn-states-8.dot + :alt: conn-states-8.dot + :align: center + +.. kernel-figure:: disk-states-8.dot + :alt: disk-states-8.dot + :align: center + +.. kernel-figure:: node-states-8.dot + :alt: node-states-8.dot + :align: center diff --git a/Documentation/blockdev/drbd/README.txt b/Documentation/blockdev/drbd/index.rst similarity index 55% rename from Documentation/blockdev/drbd/README.txt rename to Documentation/blockdev/drbd/index.rst index 627b0a1bf35e..68ecd5c113e9 100644 --- a/Documentation/blockdev/drbd/README.txt +++ b/Documentation/blockdev/drbd/index.rst @@ -1,4 +1,9 @@ +========================================== +Distributed Replicated Block Device - DRBD +========================================== + Description +=========== DRBD is a shared-nothing, synchronously replicated block device. It is designed to serve as a building block for high availability @@ -7,10 +12,8 @@ Description Please visit http://www.drbd.org to find out more. -The here included files are intended to help understand the implementation +.. toctree:: + :maxdepth: 1 -DRBD-8.3-data-packets.svg, DRBD-data-packets.svg - relates some functions, and write packets. - -conn-states-8.dot, disk-states-8.dot, node-states-8.dot - The sub graphs of DRBD's state transitions + data-structure-v9 + figures diff --git a/Documentation/blockdev/floppy.txt b/Documentation/blockdev/floppy.rst similarity index 81% rename from Documentation/blockdev/floppy.txt rename to Documentation/blockdev/floppy.rst index e2240f5ab64d..4a8f31cf4139 100644 --- a/Documentation/blockdev/floppy.txt +++ b/Documentation/blockdev/floppy.rst @@ -1,35 +1,37 @@ -This file describes the floppy driver. +============= +Floppy Driver +============= FAQ list: ========= - A FAQ list may be found in the fdutils package (see below), and also +A FAQ list may be found in the fdutils package (see below), and also at <http://fdutils.linux.lu/faq.html>. LILO configuration options (Thinkpad users, read this) ====================================================== - The floppy driver is configured using the 'floppy=' option in +The floppy driver is configured using the 'floppy=' option in lilo. This option can be typed at the boot prompt, or entered in the lilo configuration file. - Example: If your kernel is called linux-2.6.9, type the following line -at the lilo boot prompt (if you have a thinkpad): +Example: If your kernel is called linux-2.6.9, type the following line +at the lilo boot prompt (if you have a thinkpad):: linux-2.6.9 floppy=thinkpad You may also enter the following line in /etc/lilo.conf, in the description -of linux-2.6.9: +of linux-2.6.9:: append = "floppy=thinkpad" - Several floppy related options may be given, example: +Several floppy related options may be given, example:: linux-2.6.9 floppy=daring floppy=two_fdc append = "floppy=daring floppy=two_fdc" - If you give options both in the lilo config file and on the boot +If you give options both in the lilo config file and on the boot prompt, the option strings of both places are concatenated, the boot prompt options coming last. That's why there are also options to restore the default behavior. @@ -38,21 +40,23 @@ restore the default behavior. Module configuration options ============================ - If you use the floppy driver as a module, use the following syntax: -modprobe floppy floppy="<options>" +If you use the floppy driver as a module, use the following syntax:: -Example: - modprobe floppy floppy="omnibook messages" + modprobe floppy floppy="<options>" - If you need certain options enabled every time you load the floppy driver, -you can put: +Example:: - options floppy floppy="omnibook messages" + modprobe floppy floppy="omnibook messages" + +If you need certain options enabled every time you load the floppy driver, +you can put:: + + options floppy floppy="omnibook messages" in a configuration file in /etc/modprobe.d/. - The floppy driver related options are: +The floppy driver related options are: floppy=asus_pci Sets the bit mask to allow only units 0 and 1. (default) @@ -70,8 +74,7 @@ in a configuration file in /etc/modprobe.d/. Tells the floppy driver that you have only one floppy controller. (default) - floppy=two_fdc - floppy=<address>,two_fdc + floppy=two_fdc / floppy=<address>,two_fdc Tells the floppy driver that you have two floppy controllers. The second floppy controller is assumed to be at <address>. This option is not needed if the second controller is at address @@ -84,8 +87,7 @@ in a configuration file in /etc/modprobe.d/. floppy=0,thinkpad Tells the floppy driver that you don't have a Thinkpad. - floppy=omnibook - floppy=nodma + floppy=omnibook / floppy=nodma Tells the floppy driver not to use Dma for data transfers. This is needed on HP Omnibooks, which don't have a workable DMA channel for the floppy driver. This option is also useful @@ -144,14 +146,16 @@ in a configuration file in /etc/modprobe.d/. described in the physical CMOS), or if your BIOS uses non-standard CMOS types. The CMOS types are: - 0 - Use the value of the physical CMOS - 1 - 5 1/4 DD - 2 - 5 1/4 HD - 3 - 3 1/2 DD - 4 - 3 1/2 HD - 5 - 3 1/2 ED - 6 - 3 1/2 ED - 16 - unknown or not installed + == ================================== + 0 Use the value of the physical CMOS + 1 5 1/4 DD + 2 5 1/4 HD + 3 3 1/2 DD + 4 3 1/2 HD + 5 3 1/2 ED + 6 3 1/2 ED + 16 unknown or not installed + == ================================== (Note: there are two valid types for ED drives. This is because 5 was initially chosen to represent floppy *tapes*, and 6 for ED drives. @@ -162,8 +166,7 @@ in a configuration file in /etc/modprobe.d/. Print a warning message when an unexpected interrupt is received. (default) - floppy=no_unexpected_interrupts - floppy=L40SX + floppy=no_unexpected_interrupts / floppy=L40SX Don't print a message when an unexpected interrupt is received. This is needed on IBM L40SX laptops in certain video modes. (There seems to be an interaction between video and floppy. The unexpected @@ -199,47 +202,54 @@ in a configuration file in /etc/modprobe.d/. Sets the floppy DMA channel to <nr> instead of 2. floppy=slow - Use PS/2 stepping rate: - " PS/2 floppies have much slower step rates than regular floppies. + Use PS/2 stepping rate:: + + PS/2 floppies have much slower step rates than regular floppies. It's been recommended that take about 1/4 of the default speed - in some more extreme cases." + in some more extreme cases. Supporting utilities and additional documentation: ================================================== - Additional parameters of the floppy driver can be configured at +Additional parameters of the floppy driver can be configured at runtime. Utilities which do this can be found in the fdutils package. This package also contains a new version of mtools which allows to access high capacity disks (up to 1992K on a high density 3 1/2 disk!). It also contains additional documentation about the floppy driver. The latest version can be found at fdutils homepage: + http://fdutils.linux.lu The fdutils releases can be found at: + http://fdutils.linux.lu/download.html + http://www.tux.org/pub/knaff/fdutils/ + ftp://metalab.unc.edu/pub/Linux/utils/disk-management/ Reporting problems about the floppy driver ========================================== - If you have a question or a bug report about the floppy driver, mail +If you have a question or a bug report about the floppy driver, mail me at Alain.Knaff(a)poboxes.com . If you post to Usenet, preferably use comp.os.linux.hardware. As the volume in these groups is rather high, be sure to include the word "floppy" (or "FLOPPY") in the subject line. If the reported problem happens when mounting floppy disks, be sure to mention also the type of the filesystem in the subject line. - Be sure to read the FAQ before mailing/posting any bug reports! +Be sure to read the FAQ before mailing/posting any bug reports! - Alain +Alain Changelog ========= -10-30-2004 : Cleanup, updating, add reference to module configuration. +10-30-2004 : + Cleanup, updating, add reference to module configuration. James Nelson <james4765(a)gmail.com> -6-3-2000 : Original Document +6-3-2000 : + Original Document diff --git a/Documentation/blockdev/index.rst b/Documentation/blockdev/index.rst new file mode 100644 index 000000000000..a9af6ed8b4aa --- /dev/null +++ b/Documentation/blockdev/index.rst @@ -0,0 +1,16 @@ +:orphan: + +=========================== +The Linux RapidIO Subsystem +=========================== + +.. toctree:: + :maxdepth: 1 + + floppy + nbd + paride + ramdisk + zram + + drbd/index diff --git a/Documentation/blockdev/nbd.txt b/Documentation/blockdev/nbd.rst similarity index 96% rename from Documentation/blockdev/nbd.txt rename to Documentation/blockdev/nbd.rst index db242ea2bce8..d78dfe559dcf 100644 --- a/Documentation/blockdev/nbd.txt +++ b/Documentation/blockdev/nbd.rst @@ -1,3 +1,4 @@ +================================== Network Block Device (TCP version) ================================== @@ -28,4 +29,3 @@ max_part nbds_max Number of block devices that should be initialized (default: 16). - diff --git a/Documentation/blockdev/paride.txt b/Documentation/blockdev/paride.rst similarity index 81% rename from Documentation/blockdev/paride.txt rename to Documentation/blockdev/paride.rst index ee6717e3771d..87b4278bf314 100644 --- a/Documentation/blockdev/paride.txt +++ b/Documentation/blockdev/paride.rst @@ -1,15 +1,17 @@ - - Linux and parallel port IDE devices +=================================== +Linux and parallel port IDE devices +=================================== PARIDE v1.03 (c) 1997-8 Grant Guenther <grant(a)torque.net> 1. Introduction +=============== Owing to the simplicity and near universality of the parallel port interface to personal computers, many external devices such as portable hard-disk, CD-ROM, LS-120 and tape drives use the parallel port to connect to their host computer. While some devices (notably scanners) use ad-hoc methods -to pass commands and data through the parallel port interface, most +to pass commands and data through the parallel port interface, most external devices are actually identical to an internal model, but with a parallel-port adapter chip added in. Some of the original parallel port adapters were little more than mechanisms for multiplexing a SCSI bus. @@ -28,47 +30,50 @@ were to open up a parallel port CD-ROM drive, for instance, one would find a standard ATAPI CD-ROM drive, a power supply, and a single adapter that interconnected a standard PC parallel port cable and a standard IDE cable. It is usually possible to exchange the CD-ROM device with -any other device using the IDE interface. +any other device using the IDE interface. The document describes the support in Linux for parallel port IDE devices. It does not cover parallel port SCSI devices, "ditto" tape -drives or scanners. Many different devices are supported by the +drives or scanners. Many different devices are supported by the parallel port IDE subsystem, including: - MicroSolutions backpack CD-ROM - MicroSolutions backpack PD/CD - MicroSolutions backpack hard-drives - MicroSolutions backpack 8000t tape drive - SyQuest EZ-135, EZ-230 & SparQ drives - Avatar Shark - Imation Superdisk LS-120 - Maxell Superdisk LS-120 - FreeCom Power CD - Hewlett-Packard 5GB and 8GB tape drives - Hewlett-Packard 7100 and 7200 CD-RW drives + - MicroSolutions backpack CD-ROM + - MicroSolutions backpack PD/CD + - MicroSolutions backpack hard-drives + - MicroSolutions backpack 8000t tape drive + - SyQuest EZ-135, EZ-230 & SparQ drives + - Avatar Shark + - Imation Superdisk LS-120 + - Maxell Superdisk LS-120 + - FreeCom Power CD + - Hewlett-Packard 5GB and 8GB tape drives + - Hewlett-Packard 7100 and 7200 CD-RW drives as well as most of the clone and no-name products on the market. To support such a wide range of devices, PARIDE, the parallel port IDE subsystem, is actually structured in three parts. There is a base paride module which provides a registry and some common methods for -accessing the parallel ports. The second component is a set of -high-level drivers for each of the different types of supported devices: +accessing the parallel ports. The second component is a set of +high-level drivers for each of the different types of supported devices: + === ============= pd IDE disk pcd ATAPI CD-ROM pf ATAPI disk pt ATAPI tape pg ATAPI generic + === ============= (Currently, the pg driver is only used with CD-R drives). The high-level drivers function according to the relevant standards. The third component of PARIDE is a set of low-level protocol drivers for each of the parallel port IDE adapter chips. Thanks to the interest -and encouragement of Linux users from many parts of the world, +and encouragement of Linux users from many parts of the world, support is available for almost all known adapter protocols: + ==== ====================================== ==== aten ATEN EH-100 (HK) bpck Microsolutions backpack (US) comm DataStor (old-type) "commuter" adapter (TW) @@ -83,9 +88,11 @@ support is available for almost all known adapter protocols: ktti KT Technology PHd adapter (SG) on20 OnSpec 90c20 (US) on26 OnSpec 90c26 (US) + ==== ====================================== ==== 2. Using the PARIDE subsystem +============================= While configuring the Linux kernel, you may choose either to build the PARIDE drivers into your kernel, or to build them as modules. @@ -94,10 +101,10 @@ In either case, you will need to select "Parallel port IDE device support" as well as at least one of the high-level drivers and at least one of the parallel port communication protocols. If you do not know what kind of parallel port adapter is used in your drive, you could -begin by checking the file names and any text files on your DOS +begin by checking the file names and any text files on your DOS installation floppy. Alternatively, you can look at the markings on the adapter chip itself. That's usually sufficient to identify the -correct device. +correct device. You can actually select all the protocol modules, and allow the PARIDE subsystem to try them all for you. @@ -105,8 +112,9 @@ subsystem to try them all for you. For the "brand-name" products listed above, here are the protocol and high-level drivers that you would use: + ================ ============ ====== ======== Manufacturer Model Driver Protocol - + ================ ============ ====== ======== MicroSolutions CD-ROM pcd bpck MicroSolutions PD drive pf bpck MicroSolutions hard-drive pd bpck @@ -119,8 +127,10 @@ and high-level drivers that you would use: Hewlett-Packard 5GB Tape pt epat Hewlett-Packard 7200e (CD) pcd epat Hewlett-Packard 7200e (CD-R) pg epat + ================ ============ ====== ======== 2.1 Configuring built-in drivers +--------------------------------- We recommend that you get to know how the drivers work and how to configure them as loadable modules, before attempting to compile a @@ -143,7 +153,7 @@ protocol identification number and, for some devices, the drive's chain ID. While your system is booting, a number of messages are displayed on the console. Like all such messages, they can be reviewed with the 'dmesg' command. Among those messages will be -some lines like: +some lines like:: paride: bpck registered as protocol 0 paride: epat registered as protocol 1 @@ -158,10 +168,10 @@ the last two digits of the drive's serial number (but read MicroSolutions' documentation about this). As an example, let's assume that you have a MicroSolutions PD/CD drive -with unit ID number 36 connected to the parallel port at 0x378, a SyQuest -EZ-135 connected to the chained port on the PD/CD drive and also an -Imation Superdisk connected to port 0x278. You could give the following -options on your boot command: +with unit ID number 36 connected to the parallel port at 0x378, a SyQuest +EZ-135 connected to the chained port on the PD/CD drive and also an +Imation Superdisk connected to port 0x278. You could give the following +options on your boot command:: pd.drive0=0x378,1 pf.drive0=0x278,1 pf.drive1=0x378,0,36 @@ -169,24 +179,27 @@ In the last option, pf.drive1 configures device /dev/pf1, the 0x378 is the parallel port base address, the 0 is the protocol registration number and 36 is the chain ID. -Please note: while PARIDE will work both with and without the +Please note: while PARIDE will work both with and without the PARPORT parallel port sharing system that is included by the "Parallel port support" option, PARPORT must be included and enabled if you want to use chains of devices on the same parallel port. 2.2 Loading and configuring PARIDE as modules +---------------------------------------------- It is much faster and simpler to get to understand the PARIDE drivers -if you use them as loadable kernel modules. +if you use them as loadable kernel modules. -Note 1: using these drivers with the "kerneld" automatic module loading -system is not recommended for beginners, and is not documented here. +Note 1: + using these drivers with the "kerneld" automatic module loading + system is not recommended for beginners, and is not documented here. -Note 2: if you build PARPORT support as a loadable module, PARIDE must -also be built as loadable modules, and PARPORT must be loaded before the -PARIDE modules. +Note 2: + if you build PARPORT support as a loadable module, PARIDE must + also be built as loadable modules, and PARPORT must be loaded before + the PARIDE modules. -To use PARIDE, you must begin by +To use PARIDE, you must begin by:: insmod paride @@ -195,8 +208,8 @@ among other tasks. Then, load as many of the protocol modules as you think you might need. As you load each module, it will register the protocols that it supports, -and print a log message to your kernel log file and your console. For -example: +and print a log message to your kernel log file and your console. For +example:: # insmod epat paride: epat registered as protocol 0 @@ -205,22 +218,22 @@ example: paride: k971 registered as protocol 2 Finally, you can load high-level drivers for each kind of device that -you have connected. By default, each driver will autoprobe for a single +you have connected. By default, each driver will autoprobe for a single device, but you can support up to four similar devices by giving their individual co-ordinates when you load the driver. For example, if you had two no-name CD-ROM drives both using the KingByte KBIC-951A adapter, one on port 0x378 and the other on 0x3bc -you could give the following command: +you could give the following command:: # insmod pcd drive0=0x378,1 drive1=0x3bc,1 For most adapters, giving a port address and protocol number is sufficient, -but check the source files in linux/drivers/block/paride for more +but check the source files in linux/drivers/block/paride for more information. (Hopefully someone will write some man pages one day !). As another example, here's what happens when PARPORT is installed, and -a SyQuest EZ-135 is attached to port 0x378: +a SyQuest EZ-135 is attached to port 0x378:: # insmod paride paride: version 1.0 installed @@ -237,46 +250,47 @@ Note that the last line is the output from the generic partition table scanner - in this case it reports that it has found a disk with one partition. 2.3 Using a PARIDE device +-------------------------- Once the drivers have been loaded, you can access PARIDE devices in the same way as their traditional counterparts. You will probably need to create the device "special files". Here is a simple script that you can -cut to a file and execute: +cut to a file and execute:: -#!/bin/bash -# -# mkd -- a script to create the device special files for the PARIDE subsystem -# -function mkdev { - mknod $1 $2 $3 $4 ; chmod 0660 $1 ; chown root:disk $1 -} -# -function pd { - D=$( printf \\$( printf "x%03x" $[ $1 + 97 ] ) ) - mkdev pd$D b 45 $[ $1 * 16 ] - for P in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - do mkdev pd$D$P b 45 $[ $1 * 16 + $P ] - done -} -# -cd /dev -# -for u in 0 1 2 3 ; do pd $u ; done -for u in 0 1 2 3 ; do mkdev pcd$u b 46 $u ; done -for u in 0 1 2 3 ; do mkdev pf$u b 47 $u ; done -for u in 0 1 2 3 ; do mkdev pt$u c 96 $u ; done -for u in 0 1 2 3 ; do mkdev npt$u c 96 $[ $u + 128 ] ; done -for u in 0 1 2 3 ; do mkdev pg$u c 97 $u ; done -# -# end of mkd + #!/bin/bash + # + # mkd -- a script to create the device special files for the PARIDE subsystem + # + function mkdev { + mknod $1 $2 $3 $4 ; chmod 0660 $1 ; chown root:disk $1 + } + # + function pd { + D=$( printf \\$( printf "x%03x" $[ $1 + 97 ] ) ) + mkdev pd$D b 45 $[ $1 * 16 ] + for P in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + do mkdev pd$D$P b 45 $[ $1 * 16 + $P ] + done + } + # + cd /dev + # + for u in 0 1 2 3 ; do pd $u ; done + for u in 0 1 2 3 ; do mkdev pcd$u b 46 $u ; done + for u in 0 1 2 3 ; do mkdev pf$u b 47 $u ; done + for u in 0 1 2 3 ; do mkdev pt$u c 96 $u ; done + for u in 0 1 2 3 ; do mkdev npt$u c 96 $[ $u + 128 ] ; done + for u in 0 1 2 3 ; do mkdev pg$u c 97 $u ; done + # + # end of mkd With the device files and drivers in place, you can access PARIDE devices -like any other Linux device. For example, to mount a CD-ROM in pcd0, use: +like any other Linux device. For example, to mount a CD-ROM in pcd0, use:: mount /dev/pcd0 /cdrom If you have a fresh Avatar Shark cartridge, and the drive is pda, you -might do something like: +might do something like:: fdisk /dev/pda -- make a new partition table with partition 1 of type 83 @@ -289,41 +303,46 @@ might do something like: Devices like the Imation superdisk work in the same way, except that they do not have a partition table. For example to make a 120MB -floppy that you could share with a DOS system: +floppy that you could share with a DOS system:: mkdosfs /dev/pf0 mount /dev/pf0 /mnt 2.4 The pf driver +------------------ The pf driver is intended for use with parallel port ATAPI disk devices. The most common devices in this category are PD drives and LS-120 drives. Traditionally, media for these devices are not partitioned. Consequently, the pf driver does not support partitioned -media. This may be changed in a future version of the driver. +media. This may be changed in a future version of the driver. 2.5 Using the pt driver +------------------------ The pt driver for parallel port ATAPI tape drives is a minimal driver. -It does not yet support many of the standard tape ioctl operations. +It does not yet support many of the standard tape ioctl operations. For best performance, a block size of 32KB should be used. You will probably want to set the parallel port delay to 0, if you can. 2.6 Using the pg driver +------------------------ The pg driver can be used in conjunction with the cdrecord program to create CD-ROMs. Please get cdrecord version 1.6.1 or later -from ftp://ftp.fokus.gmd.de/pub/unix/cdrecord/ . To record CD-R media -your parallel port should ideally be set to EPP mode, and the "port delay" -should be set to 0. With those settings it is possible to record at 2x +from ftp://ftp.fokus.gmd.de/pub/unix/cdrecord/ . To record CD-R media +your parallel port should ideally be set to EPP mode, and the "port delay" +should be set to 0. With those settings it is possible to record at 2x speed without any buffer underruns. If you cannot get the driver to work in EPP mode, try to use "bidirectional" or "PS/2" mode and 1x speeds only. 3. Troubleshooting +================== 3.1 Use EPP mode if you can +---------------------------- The most common problems that people report with the PARIDE drivers concern the parallel port CMOS settings. At this time, none of the @@ -332,6 +351,7 @@ If you are able to do so, please set your parallel port into EPP mode using your CMOS setup procedure. 3.2 Check the port delay +------------------------- Some parallel ports cannot reliably transfer data at full speed. To offset the errors, the PARIDE protocol modules introduce a "port @@ -347,23 +367,25 @@ read the comments at the beginning of the driver source files in linux/drivers/block/paride. 3.3 Some drives need a printer reset +------------------------------------- There appear to be a number of "noname" external drives on the market that do not always power up correctly. We have noticed this with some drives based on OnSpec and older Freecom adapters. In these rare cases, the adapter can often be reinitialised by issuing a "printer reset" on -the parallel port. As the reset operation is potentially disruptive in -multiple device environments, the PARIDE drivers will not do it -automatically. You can however, force a printer reset by doing: +the parallel port. As the reset operation is potentially disruptive in +multiple device environments, the PARIDE drivers will not do it +automatically. You can however, force a printer reset by doing:: insmod lp reset=1 rmmod lp If you have one of these marginal cases, you should probably build your paride drivers as modules, and arrange to do the printer reset -before loading the PARIDE drivers. +before loading the PARIDE drivers. 3.4 Use the verbose option and dmesg if you need help +------------------------------------------------------ While a lot of testing has gone into these drivers to make them work as smoothly as possible, problems will arise. If you do have problems, @@ -373,7 +395,7 @@ clues, then please make sure that only one drive is hooked to your system, and that either (a) PARPORT is enabled or (b) no other device driver is using your parallel port (check in /proc/ioports). Then, load the appropriate drivers (you can load several protocol modules if you want) -as in: +as in:: # insmod paride # insmod epat @@ -394,12 +416,14 @@ by e-mail to grant(a)torque.net, or join the linux-parport mailing list and post your report there. 3.5 For more information or help +--------------------------------- You can join the linux-parport mailing list by sending a mail message -to +to: + linux-parport-request(a)torque.net -with the single word +with the single word:: subscribe @@ -412,6 +436,4 @@ have in your mail headers, when sending mail to the list server. You might also find some useful information on the linux-parport web pages (although they are not always up to date) at - http://web.archive.org/web/*/http://www.torque.net/parport/ - - + http://web.archive.org/web/%2E/http://www.torque.net/parport/ diff --git a/Documentation/blockdev/ramdisk.txt b/Documentation/blockdev/ramdisk.rst similarity index 84% rename from Documentation/blockdev/ramdisk.txt rename to Documentation/blockdev/ramdisk.rst index 501e12e0323e..b7c2268f8dec 100644 --- a/Documentation/blockdev/ramdisk.txt +++ b/Documentation/blockdev/ramdisk.rst @@ -1,7 +1,8 @@ +========================================== Using the RAM disk block device with Linux ------------------------------------------- +========================================== -Contents: +.. Contents: 1) Overview 2) Kernel Command Line Parameters @@ -42,7 +43,7 @@ rescue floppy disk. 2a) Kernel Command Line Parameters ramdisk_size=N - ============== + Size of the ramdisk. This parameter tells the RAM disk driver to set up RAM disks of N k size. The default is 4096 (4 MB). @@ -50,16 +51,13 @@ default is 4096 (4 MB). 2b) Module parameters rd_nr - ===== - /dev/ramX devices created. + /dev/ramX devices created. max_part - ======== - Maximum partition number. + Maximum partition number. rd_size - ======= - See ramdisk_size. + See ramdisk_size. 3) Using "rdev -r" ------------------ @@ -71,11 +69,11 @@ to 2 MB (2^11) of where to find the RAM disk (this used to be the size). Bit prompt/wait sequence is to be given before trying to read the RAM disk. Since the RAM disk dynamically grows as data is being written into it, a size field is not required. Bits 11 to 13 are not currently used and may as well be zero. -These numbers are no magical secrets, as seen below: +These numbers are no magical secrets, as seen below:: -./arch/x86/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK 0x07FF -./arch/x86/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000 -./arch/x86/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000 + ./arch/x86/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK 0x07FF + ./arch/x86/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000 + ./arch/x86/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000 Consider a typical two floppy disk setup, where you will have the kernel on disk one, and have already put a RAM disk image onto disk #2. @@ -92,20 +90,23 @@ sequence so that you have a chance to switch floppy disks. The command line equivalent is: "prompt_ramdisk=1" Putting that together gives 2^15 + 2^14 + 0 = 49152 for an rdev word. -So to create disk one of the set, you would do: +So to create disk one of the set, you would do:: /usr/src/linux# cat arch/x86/boot/zImage > /dev/fd0 /usr/src/linux# rdev /dev/fd0 /dev/fd0 /usr/src/linux# rdev -r /dev/fd0 49152 -If you make a boot disk that has LILO, then for the above, you would use: +If you make a boot disk that has LILO, then for the above, you would use:: + append = "ramdisk_start=0 load_ramdisk=1 prompt_ramdisk=1" -Since the default start = 0 and the default prompt = 1, you could use: + +Since the default start = 0 and the default prompt = 1, you could use:: + append = "load_ramdisk=1" 4) An Example of Creating a Compressed RAM Disk ----------------------------------------------- +----------------------------------------------- To create a RAM disk image, you will need a spare block device to construct it on. This can be the RAM disk device itself, or an @@ -120,11 +121,11 @@ a) Decide on the RAM disk size that you want. Say 2 MB for this example. Create it by writing to the RAM disk device. (This step is not currently required, but may be in the future.) It is wise to zero out the area (esp. for disks) so that maximal compression is achieved for - the unused blocks of the image that you are about to create. + the unused blocks of the image that you are about to create:: dd if=/dev/zero of=/dev/ram0 bs=1k count=2048 -b) Make a filesystem on it. Say ext2fs for this example. +b) Make a filesystem on it. Say ext2fs for this example:: mke2fs -vm0 /dev/ram0 2048 @@ -133,11 +134,11 @@ c) Mount it, copy the files you want to it (eg: /etc/* /dev/* ...) d) Compress the contents of the RAM disk. The level of compression will be approximately 50% of the space used by the files. Unused - space on the RAM disk will compress to almost nothing. + space on the RAM disk will compress to almost nothing:: dd if=/dev/ram0 bs=1k count=2048 | gzip -v9 > /tmp/ram_image.gz -e) Put the kernel onto the floppy +e) Put the kernel onto the floppy:: dd if=zImage of=/dev/fd0 bs=1k @@ -146,13 +147,13 @@ f) Put the RAM disk image onto the floppy, after the kernel. Use an offset (possibly larger) kernel onto the same floppy later without overlapping the RAM disk image. An offset of 400 kB for kernels about 350 kB in size would be reasonable. Make sure offset+size of ram_image.gz is - not larger than the total space on your floppy (usually 1440 kB). + not larger than the total space on your floppy (usually 1440 kB):: dd if=/tmp/ram_image.gz of=/dev/fd0 bs=1k seek=400 g) Use "rdev" to set the boot device, RAM disk offset, prompt flag, etc. For prompt_ramdisk=1, load_ramdisk=1, ramdisk_start=400, one would - have 2^15 + 2^14 + 400 = 49552. + have 2^15 + 2^14 + 400 = 49552:: rdev /dev/fd0 /dev/fd0 rdev -r /dev/fd0 49552 @@ -160,15 +161,17 @@ g) Use "rdev" to set the boot device, RAM disk offset, prompt flag, etc. That is it. You now have your boot/root compressed RAM disk floppy. Some users may wish to combine steps (d) and (f) by using a pipe. --------------------------------------------------------------------------- + Paul Gortmaker 12/95 Changelog: ---------- -10-22-04 : Updated to reflect changes in command line options, remove +10-22-04 : + Updated to reflect changes in command line options, remove obsolete references, general cleanup. James Nelson (james4765(a)gmail.com) -12-95 : Original Document +12-95 : + Original Document diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.rst similarity index 76% rename from Documentation/blockdev/zram.txt rename to Documentation/blockdev/zram.rst index 4df0ce271085..2111231c9c0f 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.rst @@ -1,7 +1,9 @@ +======================================== zram: Compressed RAM based block devices ----------------------------------------- +======================================== -* Introduction +Introduction +============ The zram module creates RAM based block devices named /dev/zram<id> (<id> = 0, 1, ...). Pages written to these disks are compressed and stored @@ -12,9 +14,11 @@ use as swap disks, various caches under /var and maybe many more :) Statistics for individual zram devices are exported through sysfs nodes at /sys/block/zram<id>/ -* Usage +Usage +===== There are several ways to configure and manage zram device(-s): + a) using zram and zram_control sysfs attributes b) using zramctl utility, provided by util-linux (util-linux(a)vger.kernel.org). @@ -22,7 +26,7 @@ In this document we will describe only 'manual' zram configuration steps, IOW, zram and zram_control sysfs attributes. In order to get a better idea about zramctl please consult util-linux -documentation, zramctl man-page or `zramctl --help'. Please be informed +documentation, zramctl man-page or `zramctl --help`. Please be informed that zram maintainers do not develop/maintain util-linux or zramctl, should you have any questions please contact util-linux(a)vger.kernel.org @@ -30,19 +34,23 @@ Following shows a typical sequence of steps for using zram. WARNING ======= + For the sake of simplicity we skip error checking parts in most of the examples below. However, it is your sole responsibility to handle errors. zram sysfs attributes always return negative values in case of errors. The list of possible return codes: --EBUSY -- an attempt to modify an attribute that cannot be changed once -the device has been initialised. Please reset device first; --ENOMEM -- zram was not able to allocate enough memory to fulfil your -needs; --EINVAL -- invalid input has been provided. + +======== ============================================================= +-EBUSY an attempt to modify an attribute that cannot be changed once + the device has been initialised. Please reset device first; +-ENOMEM zram was not able to allocate enough memory to fulfil your + needs; +-EINVAL invalid input has been provided. +======== ============================================================= If you use 'echo', the returned value that is changed by 'echo' utility, -and, in general case, something like: +and, in general case, something like:: echo 3 > /sys/block/zram0/max_comp_streams if [ $? -ne 0 ]; @@ -51,7 +59,11 @@ and, in general case, something like: should suffice. -1) Load Module: +1) Load Module +============== + +:: + modprobe zram num_devices=4 This creates 4 devices: /dev/zram{0,1,2,3} @@ -59,6 +71,8 @@ num_devices parameter is optional and tells zram how many devices should be pre-created. Default: 1. 2) Set max number of compression streams +======================================== + Regardless the value passed to this attribute, ZRAM will always allocate multiple compression streams - one per online CPUs - thus allowing several concurrent compression operations. The number of @@ -66,16 +80,20 @@ allocated compression streams goes down when some of the CPUs become offline. There is no single-compression-stream mode anymore, unless you are running a UP system or has only 1 CPU online. -To find out how many streams are currently available: +To find out how many streams are currently available:: + cat /sys/block/zram0/max_comp_streams 3) Select compression algorithm +=============================== + Using comp_algorithm device attribute one can see available and currently selected (shown in square brackets) compression algorithms, change selected compression algorithm (once the device is initialised there is no way to change compression algorithm). -Examples: +Examples:: + #show supported compression algorithms cat /sys/block/zram0/comp_algorithm lzo [lz4] @@ -83,20 +101,23 @@ Examples: #select lzo compression algorithm echo lzo > /sys/block/zram0/comp_algorithm -For the time being, the `comp_algorithm' content does not necessarily +For the time being, the `comp_algorithm` content does not necessarily show every compression algorithm supported by the kernel. We keep this list primarily to simplify device configuration and one can configure a new device with a compression algorithm that is not listed in -`comp_algorithm'. The thing is that, internally, ZRAM uses Crypto API +`comp_algorithm`. The thing is that, internally, ZRAM uses Crypto API and, if some of the algorithms were built as modules, it's impossible to list all of them using, for instance, /proc/crypto or any other method. This, however, has an advantage of permitting the usage of custom crypto compression modules (implementing S/W or H/W compression). 4) Set Disksize +=============== + Set disk size by writing the value to sysfs node 'disksize'. The value can be either in bytes or you can use mem suffixes. -Examples: +Examples:: + # Initialize /dev/zram0 with 50MB disksize echo $((50*1024*1024)) > /sys/block/zram0/disksize @@ -111,10 +132,13 @@ since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the size of the disk when not in use so a huge zram is wasteful. 5) Set memory limit: Optional +============================= + Set memory limit by writing the value to sysfs node 'mem_limit'. The value can be either in bytes or you can use mem suffixes. In addition, you could change the value in runtime. -Examples: +Examples:: + # limit /dev/zram0 with 50MB memory echo $((50*1024*1024)) > /sys/block/zram0/mem_limit @@ -126,7 +150,11 @@ Examples: # To disable memory limit echo 0 > /sys/block/zram0/mem_limit -6) Activate: +6) Activate +=========== + +:: + mkswap /dev/zram0 swapon /dev/zram0 @@ -134,6 +162,7 @@ Examples: mount /dev/zram1 /tmp 7) Add/remove zram devices +========================== zram provides a control interface, which enables dynamic (on-demand) device addition and removal. @@ -142,37 +171,44 @@ In order to add a new /dev/zramX device, perform read operation on hot_add attribute. This will return either new device's device id (meaning that you can use /dev/zram<id>) or error code. -Example: +Example:: + cat /sys/class/zram-control/hot_add 1 To remove the existing /dev/zramX device (where X is a device id) -execute +execute:: + echo X > /sys/class/zram-control/hot_remove -8) Stats: +8) Stats +======== + Per-device statistics are exported as various nodes under /sys/block/zram<id>/ A brief description of exported device attributes. For more details please read Documentation/ABI/testing/sysfs-block-zram. +====================== ====== =============================================== Name access description ----- ------ ----------- +====================== ====== =============================================== disksize RW show and set the device's disk size initstate RO shows the initialization state of the device reset WO trigger device reset -mem_used_max WO reset the `mem_used_max' counter (see later) -mem_limit WO specifies the maximum amount of memory ZRAM can use - to store the compressed data -writeback_limit WO specifies the maximum amount of write IO zram can - write out to backing device as 4KB unit +mem_used_max WO reset the `mem_used_max` counter (see later) +mem_limit WO specifies the maximum amount of memory ZRAM can + use to store the compressed data +writeback_limit WO specifies the maximum amount of write IO zram + can write out to backing device as 4KB unit writeback_limit_enable RW show and set writeback_limit feature -max_comp_streams RW the number of possible concurrent compress operations +max_comp_streams RW the number of possible concurrent compress + operations comp_algorithm RW show and change the compression algorithm compact WO trigger memory compaction debug_stat RO this file is used for zram debugging purposes backing_dev RW set up backend storage for zram to write out idle WO mark allocated slot as idle +====================== ====== =============================================== User space is advised to use the following files to read the device statistics. @@ -188,23 +224,31 @@ The stat file represents device's I/O statistics not accounted by block layer and, thus, not available in zram<id>/stat file. It consists of a single line of text and contains the following stats separated by whitespace: - failed_reads the number of failed reads - failed_writes the number of failed writes - invalid_io the number of non-page-size-aligned I/O requests + + ============= ============================================================= + failed_reads The number of failed reads + failed_writes The number of failed writes + invalid_io The number of non-page-size-aligned I/O requests notify_free Depending on device usage scenario it may account + a) the number of pages freed because of swap slot free - notifications or b) the number of pages freed because of - REQ_OP_DISCARD requests sent by bio. The former ones are - sent to a swap block device when a swap slot is freed, - which implies that this disk is being used as a swap disk. + notifications + b) the number of pages freed because of + REQ_OP_DISCARD requests sent by bio. The former ones are + sent to a swap block device when a swap slot is freed, + which implies that this disk is being used as a swap disk. + The latter ones are sent by filesystem mounted with discard option, whenever some data blocks are getting discarded. + ============= ============================================================= File /sys/block/zram<id>/mm_stat The stat file represents device's mm statistics. It consists of a single line of text and contains the following stats separated by whitespace: + + ================ ============================================================= orig_data_size uncompressed size of data stored in this disk. This excludes same-element-filled pages (same_pages) since no memory is allocated for them. @@ -223,58 +267,71 @@ line of text and contains the following stats separated by whitespace: No memory is allocated for such pages. pages_compacted the number of pages freed during compaction huge_pages the number of incompressible pages + ================ ============================================================= File /sys/block/zram<id>/bd_stat The stat file represents device's backing device statistics. It consists of a single line of text and contains the following stats separated by whitespace: + + ============== ============================================================= bd_count size of data written in backing device. Unit: 4K bytes bd_reads the number of reads from backing device Unit: 4K bytes bd_writes the number of writes to backing device Unit: 4K bytes + ============== ============================================================= + +9) Deactivate +============= + +:: -9) Deactivate: swapoff /dev/zram0 umount /dev/zram1 -10) Reset: - Write any positive value to 'reset' sysfs node - echo 1 > /sys/block/zram0/reset - echo 1 > /sys/block/zram1/reset +10) Reset +========= + + Write any positive value to 'reset' sysfs node:: + + echo 1 > /sys/block/zram0/reset + echo 1 > /sys/block/zram1/reset This frees all the memory allocated for the given device and resets the disksize to zero. You must set the disksize again before reusing the device. -* Optional Feature +Optional Feature +================ -= writeback +writeback +--------- With CONFIG_ZRAM_WRITEBACK, zram can write idle/incompressible page to backing storage rather than keeping it in memory. -To use the feature, admin should set up backing device via +To use the feature, admin should set up backing device via:: - "echo /dev/sda5 > /sys/block/zramX/backing_dev" + echo /dev/sda5 > /sys/block/zramX/backing_dev before disksize setting. It supports only partition at this moment. -If admin want to use incompressible page writeback, they could do via +If admin want to use incompressible page writeback, they could do via:: - "echo huge > /sys/block/zramX/write" + echo huge > /sys/block/zramX/write To use idle page writeback, first, user need to declare zram pages -as idle. +as idle:: - "echo all > /sys/block/zramX/idle" + echo all > /sys/block/zramX/idle From now on, any pages on zram are idle pages. The idle mark will be removed until someone request access of the block. IOW, unless there is access request, those pages are still idle pages. -Admin can request writeback of those idle pages at right timing via +Admin can request writeback of those idle pages at right timing via:: - "echo idle > /sys/block/zramX/writeback" + echo idle > /sys/block/zramX/writeback With the command, zram writeback idle pages from memory to the storage. @@ -285,7 +342,7 @@ to guarantee storage health for entire product life. To overcome the concern, zram supports "writeback_limit" feature. The "writeback_limit_enable"'s default value is 0 so that it doesn't limit any writeback. IOW, if admin want to apply writeback budget, he should -enable writeback_limit_enable via +enable writeback_limit_enable via:: $ echo 1 > /sys/block/zramX/writeback_limit_enable @@ -296,7 +353,7 @@ until admin set the budget via /sys/block/zramX/writeback_limit. assigned via /sys/block/zramX/writeback_limit is meaninless.) If admin want to limit writeback as per-day 400M, he could do it -like below. +like below:: $ MB_SHIFT=20 $ 4K_SHIFT=12 @@ -305,16 +362,16 @@ like below. $ echo 1 > /sys/block/zram0/writeback_limit_enable If admin want to allow further write again once the bugdet is exausted, -he could do it like below +he could do it like below:: $ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \ /sys/block/zram0/writeback_limit -If admin want to see remaining writeback budget since he set, +If admin want to see remaining writeback budget since he set:: $ cat /sys/block/zramX/writeback_limit -If admin want to disable writeback limit, he could do +If admin want to disable writeback limit, he could do:: $ echo 0 > /sys/block/zramX/writeback_limit_enable @@ -326,25 +383,35 @@ budget in next setting is user's job. If admin want to measure writeback count in a certain period, he could know it via /sys/block/zram0/bd_stat's 3rd column. -= memory tracking +memory tracking +=============== With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the zram block. It could be useful to catch cold or incompressible pages of the process with*pagemap. + If you enable the feature, you could see block state via -/sys/kernel/debug/zram/zram0/block_state". The output is as follows, +/sys/kernel/debug/zram/zram0/block_state". The output is as follows:: 300 75.033841 .wh. 301 63.806904 s... 302 63.806919 ..hi -First column is zram's block index. -Second column is access time since the system was booted -Third column is state of the block. -(s: same page -w: written page to backing store -h: huge page -i: idle page) +First column + zram's block index. +Second column + access time since the system was booted +Third column + state of the block: + + s: + same page + w: + written page to backing store + h: + huge page + i: + idle page First line of above example says 300th block is accessed at 75.033841sec and the block's state is huge so it is written back to the backing diff --git a/MAINTAINERS b/MAINTAINERS index 85648a944446..808f65e06ad8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11024,7 +11024,7 @@ M: Josef Bacik <josef(a)toxicpanda.com> S: Maintained L: linux-block(a)vger.kernel.org L: nbd(a)other.debian.org -F: Documentation/blockdev/nbd.txt +F: Documentation/blockdev/nbd.rst F: drivers/block/nbd.c F: include/trace/events/nbd.h F: include/uapi/linux/nbd.h @@ -12028,7 +12028,7 @@ PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES M: Tim Waugh <tim(a)cyberelk.net> L: linux-parport(a)lists.infradead.org (subscribers-only) S: Maintained -F: Documentation/blockdev/paride.txt +F: Documentation/blockdev/paride.rst F: drivers/block/paride/ PARISC ARCHITECTURE @@ -13310,7 +13310,7 @@ F: drivers/net/wireless/ralink/rt2x00/ RAMDISK RAM BLOCK DEVICE DRIVER M: Jens Axboe <axboe(a)kernel.dk> S: Maintained -F: Documentation/blockdev/ramdisk.txt +F: Documentation/blockdev/ramdisk.rst F: drivers/block/brd.c RANCHU VIRTUAL BOARD FOR MIPS @@ -17672,7 +17672,7 @@ R: Sergey Senozhatsky <sergey.senozhatsky.work(a)gmail.com> L: linux-kernel(a)vger.kernel.org S: Maintained F: drivers/block/zram/ -F: Documentation/blockdev/zram.txt +F: Documentation/blockdev/zram.rst ZS DECSTATION Z85C30 SERIAL DRIVER M: "Maciej W. Rozycki" <macro(a)linux-mips.org> diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 96ec7e0fc1ea..c43690b973d8 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -31,7 +31,7 @@ config BLK_DEV_FD If you want to use the floppy disk drive(s) of your PC under Linux, say Y. Information about this driver, especially important for IBM Thinkpad users, is contained in - <file:Documentation/blockdev/floppy.txt>. + <file:Documentation/blockdev/floppy.rst>. That file also contains the location of the Floppy driver FAQ as well as location of the fdutils package used to configure additional parameters of the driver at run time. @@ -96,7 +96,7 @@ config PARIDE your computer's parallel port. Most of them are actually IDE devices using a parallel port IDE adapter. This option enables the PARIDE subsystem which contains drivers for many of these external drives. - Read <file:Documentation/blockdev/paride.txt> for more information. + Read <file:Documentation/blockdev/paride.rst> for more information. If you have said Y to the "Parallel-port support" configuration option, you may share a single port between your printer and other @@ -261,7 +261,7 @@ config BLK_DEV_NBD userland (making server and client physically the same computer, communicating using the loopback network device). - Read <file:Documentation/blockdev/nbd.txt> for more information, + Read <file:Documentation/blockdev/nbd.rst> for more information, especially about where to find the server code, which runs in user space and does not need special kernel support. @@ -303,7 +303,7 @@ config BLK_DEV_RAM during the initial install of Linux. Note that the kernel command line option "ramdisk=XX" is now obsolete. - For details, read <file:Documentation/blockdev/ramdisk.txt>. + For details, read <file:Documentation/blockdev/ramdisk.rst>. To compile this driver as a module, choose M here: the module will be called brd. An alias "rd" has been defined diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index b933a7eea52b..5c99e52f9dc1 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4424,7 +4424,7 @@ static int __init floppy_setup(char *str) pr_cont("\n"); } else DPRINT("botched floppy option\n"); - DPRINT("Read Documentation/blockdev/floppy.txt\n"); + DPRINT("Read Documentation/blockdev/floppy.rst\n"); return 0; } diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index 1ffc64770643..e06b99d54816 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -12,7 +12,7 @@ config ZRAM It has several use cases, for example: /tmp storage, use as swap disks and maybe many more. - See Documentation/blockdev/zram.txt for more information. + See Documentation/blockdev/zram.rst for more information. config ZRAM_WRITEBACK bool "Write back incompressible or idle page to backing device" @@ -26,7 +26,7 @@ config ZRAM_WRITEBACK With /sys/block/zramX/{idle,writeback}, application could ask idle page's writeback to the backing device to save in memory. - See Documentation/blockdev/zram.txt for more information. + See Documentation/blockdev/zram.rst for more information. config ZRAM_MEMORY_TRACKING bool "Track zRam block status" @@ -36,4 +36,4 @@ config ZRAM_MEMORY_TRACKING of zRAM. Admin could see the information via /sys/kernel/debug/zram/zramX/block_state. - See Documentation/blockdev/zram.txt for more information. + See Documentation/blockdev/zram.rst for more information. diff --git a/tools/testing/selftests/zram/README b/tools/testing/selftests/zram/README index 7972cc512408..5fa378391d3b 100644 --- a/tools/testing/selftests/zram/README +++ b/tools/testing/selftests/zram/README @@ -37,4 +37,4 @@ Commands required for testing: - mkfs/ mkfs.ext4 For more information please refer: -kernel-source-tree/Documentation/blockdev/zram.txt +kernel-source-tree/Documentation/blockdev/zram.rst -- 2.21.0

6 years

1
0
0 0

[PATCH] selftests: dma-buf: Adding kernel config fragment CONFIG_UDMABUF=y

by Naresh Kamboju

The test case drivers/dma-buf/udmabuf need this kernel config enabled CONFIG_UDMABUF=y Signed-off-by: Naresh Kamboju <naresh.kamboju(a)linaro.org> --- tools/testing/selftests/drivers/dma-buf/config | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/drivers/dma-buf/config diff --git a/tools/testing/selftests/drivers/dma-buf/config b/tools/testing/selftests/drivers/dma-buf/config new file mode 100644 index 000000000000..d708515cff1b --- /dev/null +++ b/tools/testing/selftests/drivers/dma-buf/config @@ -0,0 +1 @@ +CONFIG_UDMABUF=y -- 2.17.1

6 years

2
1
0 0

[PATCH] selftests/powerpc: ppc_asm.h: typo in the header guard

by Denis Efremov

The guard macro __PPC_ASM_H in the header ppc_asm.h doesn't match the #ifndef macro _PPC_ASM_H. The patch makes them the same. Signed-off-by: Denis Efremov <efremov(a)linux.com> --- tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h index d2c0a911f55e..2b488b78c4f2 100644 --- a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h +++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _PPC_ASM_H -#define __PPC_ASM_H +#define _PPC_ASM_H #include <ppc-asm.h> #ifndef r1 -- 2.21.0

6 years

2
1
0 0

[PATCH] selftests: timestamping: Fix SIOCGSTAMP undeclared build failure

by Shuah Khan

Add missing linux/sockios.h include to fix the following SIOCGSTAMP undeclared build error. In addition, remove the local defines for SIOCGSTAMPNS and SIOCSHWTSTAMP and pick them up from linux/sockios.h. timestamping.c:249:19: error: SIOCGSTAMP undeclared if (ioctl(sock, SIOCGSTAMP, &tv)) Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> --- .../selftests/networking/timestamping/timestamping.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tools/testing/selftests/networking/timestamping/timestamping.c b/tools/testing/selftests/networking/timestamping/timestamping.c index 0fbed67bf4f6..aca3491174a1 100644 --- a/tools/testing/selftests/networking/timestamping/timestamping.c +++ b/tools/testing/selftests/networking/timestamping/timestamping.c @@ -32,6 +32,7 @@ #include <asm/types.h> #include <linux/net_tstamp.h> #include <linux/errqueue.h> +#include <linux/sockios.h> #ifndef SO_TIMESTAMPING # define SO_TIMESTAMPING 37 @@ -42,14 +43,6 @@ # define SO_TIMESTAMPNS 35 #endif -#ifndef SIOCGSTAMPNS -# define SIOCGSTAMPNS 0x8907 -#endif - -#ifndef SIOCSHWTSTAMP -# define SIOCSHWTSTAMP 0x89b0 -#endif - static void usage(const char *error) { if (error) -- 2.17.1

6 years

1
0
0 0

[PATCH v3 1/2] pid: add pidfd_open()

by Christian Brauner

This adds the pidfd_open() syscall. It allows a caller to retrieve pollable pidfds for a process which did not get created via CLONE_PIDFD, i.e. for a process that is created via traditional fork()/clone() calls that is only referenced by a PID: int pidfd = pidfd_open(1234, 0); ret = pidfd_send_signal(pidfd, SIGSTOP, NULL, 0); With the introduction of pidfds through CLONE_PIDFD it is possible to created pidfds at process creation time. However, a lot of processes get created with traditional PID-based calls such as fork() or clone() (without CLONE_PIDFD). For these processes a caller can currently not create a pollable pidfd. This is a problem for Android's low memory killer (LMK) and service managers such as systemd. Both are examples of tools that want to make use of pidfds to get reliable notification of process exit for non-parents (pidfd polling) and race-free signal sending (pidfd_send_signal()). They intend to switch to this API for process supervision/management as soon as possible. Having no way to get pollable pidfds from PID-only processes is one of the biggest blockers for them in adopting this api. With pidfd_open() making it possible to retrieve pidfds for PID-based processes we enable them to adopt this api. In line with Arnd's recent changes to consolidate syscall numbers across architectures, I have added the pidfd_open() syscall to all architectures at the same time. Signed-off-by: Christian Brauner <christian(a)brauner.io> Reviewed-by: Oleg Nesterov <oleg(a)redhat.com> Acked-by: Arnd Bergmann <arnd(a)arndb.de> Cc: "Eric W. Biederman" <ebiederm(a)xmission.com> Cc: Kees Cook <keescook(a)chromium.org> Cc: Joel Fernandes (Google) <joel(a)joelfernandes.org> Cc: Thomas Gleixner <tglx(a)linutronix.de> Cc: Jann Horn <jannh(a)google.com> Cc: David Howells <dhowells(a)redhat.com> Cc: Andy Lutomirsky <luto(a)kernel.org> Cc: Andrew Morton <akpm(a)linux-foundation.org> Cc: Aleksa Sarai <cyphar(a)cyphar.com> Cc: Linus Torvalds <torvalds(a)linux-foundation.org> Cc: Al Viro <viro(a)zeniv.linux.org.uk> Cc: linux-api(a)vger.kernel.org --- v1: - kbuild test robot <lkp(a)intel.com>: - add missing entry for pidfd_open to arch/arm/tools/syscall.tbl - Oleg Nesterov <oleg(a)redhat.com>: - use simpler thread-group leader check v2: - Oleg Nesterov <oleg(a)redhat.com>: - avoid using additional variable - remove unneeded comment - Arnd Bergmann <arnd(a)arndb.de>: - switch from 428 to 434 since the new mount api has taken it - bump syscall numbers in arch/arm64/include/asm/unistd.h - Joel Fernandes (Google) <joel(a)joelfernandes.org>: - switch from ESRCH to EINVAL when the passed-in pid does not refer to a thread-group leader - Christian Brauner <christian(a)brauner.io>: - rebase on v5.2-rc1 - adapt syscall number to account for new mount api syscalls v3: - Arnd Bergmann <arnd(a)arndb.de>: - add missing syscall entries for mips-o32 and mips-n64 --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 + arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + include/linux/pid.h | 1 + include/linux/syscalls.h | 1 + include/uapi/asm-generic/unistd.h | 4 +- kernel/fork.c | 2 +- kernel/pid.c | 43 +++++++++++++++++++++ 23 files changed, 68 insertions(+), 3 deletions(-) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 9e7704e44f6d..1db9bbcfb84e 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -473,3 +473,4 @@ 541 common fsconfig sys_fsconfig 542 common fsmount sys_fsmount 543 common fspick sys_fspick +544 common pidfd_open sys_pidfd_open diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index aaf479a9e92d..81e6e1817c45 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -447,3 +447,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 70e6882853c0..e8f7d95a1481 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -44,7 +44,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 434 +#define __NR_compat_syscalls 435 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index c39e90600bb3..7a3158ccd68e 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -886,6 +886,8 @@ __SYSCALL(__NR_fsconfig, sys_fsconfig) __SYSCALL(__NR_fsmount, sys_fsmount) #define __NR_fspick 433 __SYSCALL(__NR_fspick, sys_fspick) +#define __NR_pidfd_open 434 +__SYSCALL(__NR_pidfd_open, sys_pidfd_open) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index e01df3f2f80d..ecc44926737b 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -354,3 +354,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 7e3d0734b2f3..9a3eb2558568 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -433,3 +433,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 26339e417695..ad706f83c755 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -439,3 +439,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 0e2dd68ade57..97035e19ad03 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -372,3 +372,4 @@ 431 n32 fsconfig sys_fsconfig 432 n32 fsmount sys_fsmount 433 n32 fspick sys_fspick +434 n32 pidfd_open sys_pidfd_open diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 5eebfa0d155c..d7292722d3b0 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -348,3 +348,4 @@ 431 n64 fsconfig sys_fsconfig 432 n64 fsmount sys_fsmount 433 n64 fspick sys_fspick +434 n64 pidfd_open sys_pidfd_open diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 3cc1374e02d0..dba084c92f14 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -421,3 +421,4 @@ 431 o32 fsconfig sys_fsconfig 432 o32 fsmount sys_fsmount 433 o32 fspick sys_fspick +434 o32 pidfd_open sys_pidfd_open diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index c9e377d59232..5022b9e179c2 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -430,3 +430,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 103655d84b4b..f2c3bda2d39f 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -515,3 +515,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index e822b2964a83..6ebacfeaf853 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -436,3 +436,4 @@ 431 common fsconfig sys_fsconfig sys_fsconfig 432 common fsmount sys_fsmount sys_fsmount 433 common fspick sys_fspick sys_fspick +434 common pidfd_open sys_pidfd_open sys_pidfd_open diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 016a727d4357..834c9c7d79fa 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -436,3 +436,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index e047480b1605..c58e71f21129 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -479,3 +479,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ad968b7bac72..43e4429a5272 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -438,3 +438,4 @@ 431 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig 432 i386 fsmount sys_fsmount __ia32_sys_fsmount 433 i386 fspick sys_fspick __ia32_sys_fspick +434 i386 pidfd_open sys_pidfd_open __ia32_sys_pidfd_open diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index b4e6f9e6204a..1bee0a77fdd3 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -355,6 +355,7 @@ 431 common fsconfig __x64_sys_fsconfig 432 common fsmount __x64_sys_fsmount 433 common fspick __x64_sys_fspick +434 common pidfd_open __x64_sys_pidfd_open # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 5fa0ee1c8e00..782b81945ccc 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -404,3 +404,4 @@ 431 common fsconfig sys_fsconfig 432 common fsmount sys_fsmount 433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open diff --git a/include/linux/pid.h b/include/linux/pid.h index 3c8ef5a199ca..c938a92eab99 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -67,6 +67,7 @@ struct pid extern struct pid init_struct_pid; extern const struct file_operations pidfd_fops; +extern int pidfd_create(struct pid *pid); static inline struct pid *get_pid(struct pid *pid) { diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e2870fe1be5b..989055e0b501 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -929,6 +929,7 @@ asmlinkage long sys_clock_adjtime32(clockid_t which_clock, struct old_timex32 __user *tx); asmlinkage long sys_syncfs(int fd); asmlinkage long sys_setns(int fd, int nstype); +asmlinkage long sys_pidfd_open(pid_t pid, unsigned int flags); asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg, unsigned int vlen, unsigned flags); asmlinkage long sys_process_vm_readv(pid_t pid, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index a87904daf103..e5684a4512c0 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -844,9 +844,11 @@ __SYSCALL(__NR_fsconfig, sys_fsconfig) __SYSCALL(__NR_fsmount, sys_fsmount) #define __NR_fspick 433 __SYSCALL(__NR_fspick, sys_fspick) +#define __NR_pidfd_open 434 +__SYSCALL(__NR_pidfd_open, sys_pidfd_open) #undef __NR_syscalls -#define __NR_syscalls 434 +#define __NR_syscalls 435 /* * 32 bit systems traditionally used different diff --git a/kernel/fork.c b/kernel/fork.c index b4cba953040a..c3df226f47a1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1724,7 +1724,7 @@ const struct file_operations pidfd_fops = { * Return: On success, a cloexec pidfd is returned. * On error, a negative errno number will be returned. */ -static int pidfd_create(struct pid *pid) +int pidfd_create(struct pid *pid) { int fd; diff --git a/kernel/pid.c b/kernel/pid.c index 89548d35eefb..8fc9d94f6ac1 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -37,6 +37,7 @@ #include <linux/syscalls.h> #include <linux/proc_ns.h> #include <linux/proc_fs.h> +#include <linux/sched/signal.h> #include <linux/sched/task.h> #include <linux/idr.h> @@ -450,6 +451,48 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) return idr_get_next(&ns->idr, &nr); } +/** + * pidfd_open() - Open new pid file descriptor. + * + * @pid: pid for which to retrieve a pidfd + * @flags: flags to pass + * + * This creates a new pid file descriptor with the O_CLOEXEC flag set for + * the process identified by @pid. Currently, the process identified by + * @pid must be a thread-group leader. This restriction currently exists + * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot + * be used with CLONE_THREAD) and pidfd polling (only supports thread group + * leaders). + * + * Return: On success, a cloexec pidfd is returned. + * On error, a negative errno number will be returned. + */ +SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) +{ + int fd, ret; + struct pid *p; + + if (flags) + return -EINVAL; + + if (pid <= 0) + return -EINVAL; + + p = find_get_pid(pid); + if (!p) + return -ESRCH; + + ret = 0; + rcu_read_lock(); + if (!pid_task(p, PIDTYPE_TGID)) + ret = -EINVAL; + rcu_read_unlock(); + + fd = ret ?: pidfd_create(p); + put_pid(p); + return fd; +} + void __init pid_idr_init(void) { /* Verify no one has done anything silly: */ -- 2.21.0

6 years

2
4
0 0

[PATCH AUTOSEL 4.19 31/60] bpf: lpm_trie: check left child of last leftmost node for NULL

by Sasha Levin

From: Jonathan Lemon <jonathan.lemon(a)gmail.com> [ Upstream commit da2577fdd0932ea4eefe73903f1130ee366767d2 ] If the leftmost parent node of the tree has does not have a child on the left side, then trie_get_next_key (and bpftool map dump) will not look at the child on the right. This leads to the traversal missing elements. Lookup is not affected. Update selftest to handle this case. Reproducer: bpftool map create /sys/fs/bpf/lpm type lpm_trie key 6 \ value 1 entries 256 name test_lpm flags 1 bpftool map update pinned /sys/fs/bpf/lpm key 8 0 0 0 0 0 value 1 bpftool map update pinned /sys/fs/bpf/lpm key 16 0 0 0 0 128 value 2 bpftool map dump pinned /sys/fs/bpf/lpm Returns only 1 element. (2 expected) Fixes: b471f2f1de8b ("bpf: implement MAP_GET_NEXT_KEY command for LPM_TRIE") Signed-off-by: Jonathan Lemon <jonathan.lemon(a)gmail.com> Acked-by: Martin KaFai Lau <kafai(a)fb.com> Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- kernel/bpf/lpm_trie.c | 9 +++-- tools/testing/selftests/bpf/test_lpm_map.c | 41 ++++++++++++++++++++-- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 4f3138e6ecb2..1a8b208f6c55 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -676,9 +676,14 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) * have exact two children, so this function will never return NULL. */ for (node = search_root; node;) { - if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) + if (node->flags & LPM_TREE_NODE_FLAG_IM) { + node = rcu_dereference(node->child[0]); + } else { next_node = node; - node = rcu_dereference(node->child[0]); + node = rcu_dereference(node->child[0]); + if (!node) + node = rcu_dereference(next_node->child[1]); + } } do_copy: next_key->prefixlen = next_node->prefixlen; diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index 02d7c871862a..006be3963977 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -573,13 +573,13 @@ static void test_lpm_get_next_key(void) /* add one more element (total two) */ key_p->prefixlen = 24; - inet_pton(AF_INET, "192.168.0.0", key_p->data); + inet_pton(AF_INET, "192.168.128.0", key_p->data); assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); memset(key_p, 0, key_size); assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && - key_p->data[1] == 168 && key_p->data[2] == 0); + key_p->data[1] == 168 && key_p->data[2] == 128); memset(next_key_p, 0, key_size); assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); @@ -592,7 +592,7 @@ static void test_lpm_get_next_key(void) /* Add one more element (total three) */ key_p->prefixlen = 24; - inet_pton(AF_INET, "192.168.128.0", key_p->data); + inet_pton(AF_INET, "192.168.0.0", key_p->data); assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); memset(key_p, 0, key_size); @@ -643,6 +643,41 @@ static void test_lpm_get_next_key(void) assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && errno == ENOENT); + /* Add one more element (total five) */ + key_p->prefixlen = 28; + inet_pton(AF_INET, "192.168.1.128", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 28 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 1 && + next_key_p->data[3] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 1); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + /* no exact matching key should return the first one in post order */ key_p->prefixlen = 22; inet_pton(AF_INET, "192.168.1.0", key_p->data); -- 2.20.1

6 years

1
0
0 0

[PATCH AUTOSEL 5.1 58/95] bpf: fix div64 overflow tests to properly detect errors

by Sasha Levin

From: "Naveen N. Rao" <naveen.n.rao(a)linux.vnet.ibm.com> [ Upstream commit 3e0682695199bad51dd898fe064d1564637ff77a ] If the result of the division is LLONG_MIN, current tests do not detect the error since the return value is truncated to a 32-bit value and ends up being 0. Signed-off-by: Naveen N. Rao <naveen.n.rao(a)linux.vnet.ibm.com> Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- .../testing/selftests/bpf/verifier/div_overflow.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/verifier/div_overflow.c b/tools/testing/selftests/bpf/verifier/div_overflow.c index bd3f38dbe796..acab4f00819f 100644 --- a/tools/testing/selftests/bpf/verifier/div_overflow.c +++ b/tools/testing/selftests/bpf/verifier/div_overflow.c @@ -29,8 +29,11 @@ "DIV64 overflow, check 1", .insns = { BPF_MOV64_IMM(BPF_REG_1, -1), - BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), - BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1), + BPF_LD_IMM64(BPF_REG_2, LLONG_MIN), + BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -40,8 +43,11 @@ { "DIV64 overflow, check 2", .insns = { - BPF_LD_IMM64(BPF_REG_0, LLONG_MIN), - BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, -1), + BPF_LD_IMM64(BPF_REG_1, LLONG_MIN), + BPF_ALU64_IMM(BPF_DIV, BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, -- 2.20.1

6 years

1
0
0 0

[PATCH AUTOSEL 5.1 48/95] bpf: lpm_trie: check left child of last leftmost node for NULL

by Sasha Levin

From: Jonathan Lemon <jonathan.lemon(a)gmail.com> [ Upstream commit da2577fdd0932ea4eefe73903f1130ee366767d2 ] If the leftmost parent node of the tree has does not have a child on the left side, then trie_get_next_key (and bpftool map dump) will not look at the child on the right. This leads to the traversal missing elements. Lookup is not affected. Update selftest to handle this case. Reproducer: bpftool map create /sys/fs/bpf/lpm type lpm_trie key 6 \ value 1 entries 256 name test_lpm flags 1 bpftool map update pinned /sys/fs/bpf/lpm key 8 0 0 0 0 0 value 1 bpftool map update pinned /sys/fs/bpf/lpm key 16 0 0 0 0 128 value 2 bpftool map dump pinned /sys/fs/bpf/lpm Returns only 1 element. (2 expected) Fixes: b471f2f1de8b ("bpf: implement MAP_GET_NEXT_KEY command for LPM_TRIE") Signed-off-by: Jonathan Lemon <jonathan.lemon(a)gmail.com> Acked-by: Martin KaFai Lau <kafai(a)fb.com> Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- kernel/bpf/lpm_trie.c | 9 +++-- tools/testing/selftests/bpf/test_lpm_map.c | 41 ++++++++++++++++++++-- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 93a5cbbde421..3b03a7342f3c 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -715,9 +715,14 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) * have exact two children, so this function will never return NULL. */ for (node = search_root; node;) { - if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) + if (node->flags & LPM_TREE_NODE_FLAG_IM) { + node = rcu_dereference(node->child[0]); + } else { next_node = node; - node = rcu_dereference(node->child[0]); + node = rcu_dereference(node->child[0]); + if (!node) + node = rcu_dereference(next_node->child[1]); + } } do_copy: next_key->prefixlen = next_node->prefixlen; diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index 02d7c871862a..006be3963977 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -573,13 +573,13 @@ static void test_lpm_get_next_key(void) /* add one more element (total two) */ key_p->prefixlen = 24; - inet_pton(AF_INET, "192.168.0.0", key_p->data); + inet_pton(AF_INET, "192.168.128.0", key_p->data); assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); memset(key_p, 0, key_size); assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && - key_p->data[1] == 168 && key_p->data[2] == 0); + key_p->data[1] == 168 && key_p->data[2] == 128); memset(next_key_p, 0, key_size); assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); @@ -592,7 +592,7 @@ static void test_lpm_get_next_key(void) /* Add one more element (total three) */ key_p->prefixlen = 24; - inet_pton(AF_INET, "192.168.128.0", key_p->data); + inet_pton(AF_INET, "192.168.0.0", key_p->data); assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); memset(key_p, 0, key_size); @@ -643,6 +643,41 @@ static void test_lpm_get_next_key(void) assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && errno == ENOENT); + /* Add one more element (total five) */ + key_p->prefixlen = 28; + inet_pton(AF_INET, "192.168.1.128", key_p->data); + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); + + memset(key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && + key_p->data[1] == 168 && key_p->data[2] == 0); + + memset(next_key_p, 0, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 28 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 1 && + next_key_p->data[3] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 1); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && + next_key_p->data[1] == 168); + + memcpy(key_p, next_key_p, key_size); + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && + errno == ENOENT); + /* no exact matching key should return the first one in post order */ key_p->prefixlen = 22; inet_pton(AF_INET, "192.168.1.0", key_p->data); -- 2.20.1

6 years

1
0
0 0

[RFC PATCH 1/1] Revert "rseq/selftests: arm: use udf instruction for RSEQ_SIG"

by Mathieu Desnoyers

This reverts commit 2b845d4b4acd9422bbb668989db8dc36dfc8f438. That commit introduces build issues for programs compiled in Thumb mode. Rather than try to be clever and emit a valid trap instruction on arm32, which requires special care about big/little endian handling on that architecture, just emit plain data. Data in the instruction stream is technically expected on arm32: this is how literal pools are implemented. Reverting to the prior behavior does exactly that. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> CC: Peter Zijlstra <peterz(a)infradead.org> CC: Thomas Gleixner <tglx(a)linutronix.de> CC: Joel Fernandes <joelaf(a)google.com> CC: Catalin Marinas <catalin.marinas(a)arm.com> CC: Dave Watson <davejwatson(a)fb.com> CC: Will Deacon <will.deacon(a)arm.com> CC: Shuah Khan <shuah(a)kernel.org> CC: Andi Kleen <andi(a)firstfloor.org> CC: linux-kselftest(a)vger.kernel.org CC: "H . Peter Anvin" <hpa(a)zytor.com> CC: Chris Lameter <cl(a)linux.com> CC: Russell King <linux(a)arm.linux.org.uk> CC: Michael Kerrisk <mtk.manpages(a)gmail.com> CC: "Paul E . McKenney" <paulmck(a)linux.vnet.ibm.com> CC: Paul Turner <pjt(a)google.com> CC: Boqun Feng <boqun.feng(a)gmail.com> CC: Josh Triplett <josh(a)joshtriplett.org> CC: Steven Rostedt <rostedt(a)goodmis.org> CC: Ben Maurer <bmaurer(a)fb.com> CC: linux-api(a)vger.kernel.org CC: Andy Lutomirski <luto(a)amacapital.net> CC: Andrew Morton <akpm(a)linux-foundation.org> CC: Linus Torvalds <torvalds(a)linux-foundation.org> CC: Carlos O'Donell <carlos(a)redhat.com> CC: Florian Weimer <fweimer(a)redhat.com> --- tools/testing/selftests/rseq/rseq-arm.h | 52 ++------------------------------- 1 file changed, 2 insertions(+), 50 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h index 84f28f147fb6..5f262c54364f 100644 --- a/tools/testing/selftests/rseq/rseq-arm.h +++ b/tools/testing/selftests/rseq/rseq-arm.h @@ -5,54 +5,7 @@ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers(a)efficios.com> */ -/* - * RSEQ_SIG uses the udf A32 instruction with an uncommon immediate operand - * value 0x5de3. This traps if user-space reaches this instruction by mistake, - * and the uncommon operand ensures the kernel does not move the instruction - * pointer to attacker-controlled code on rseq abort. - * - * The instruction pattern in the A32 instruction set is: - * - * e7f5def3 udf #24035 ; 0x5de3 - * - * This translates to the following instruction pattern in the T16 instruction - * set: - * - * little endian: - * def3 udf #243 ; 0xf3 - * e7f5 b.n <7f5> - * - * pre-ARMv6 big endian code: - * e7f5 b.n <7f5> - * def3 udf #243 ; 0xf3 - * - * ARMv6+ -mbig-endian generates mixed endianness code vs data: little-endian - * code and big-endian data. Ensure the RSEQ_SIG data signature matches code - * endianness. Prior to ARMv6, -mbig-endian generates big-endian code and data - * (which match), so there is no need to reverse the endianness of the data - * representation of the signature. However, the choice between BE32 and BE8 - * is done by the linker, so we cannot know whether code and data endianness - * will be mixed before the linker is invoked. - */ - -#define RSEQ_SIG_CODE 0xe7f5def3 - -#ifndef __ASSEMBLER__ - -#define RSEQ_SIG_DATA \ - ({ \ - int sig; \ - asm volatile ("b 2f\n\t" \ - "1: .inst " __rseq_str(RSEQ_SIG_CODE) "\n\t" \ - "2:\n\t" \ - "ldr %[sig], 1b\n\t" \ - : [sig] "=r" (sig)); \ - sig; \ - }) - -#define RSEQ_SIG RSEQ_SIG_DATA - -#endif +#define RSEQ_SIG 0x53053053 #define rseq_smp_mb() __asm__ __volatile__ ("dmb" ::: "memory", "cc") #define rseq_smp_rmb() __asm__ __volatile__ ("dmb" ::: "memory", "cc") @@ -125,8 +78,7 @@ do { \ __rseq_str(table_label) ":\n\t" \ ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ ".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \ - ".arm\n\t" \ - ".inst " __rseq_str(RSEQ_SIG_CODE) "\n\t" \ + ".word " __rseq_str(RSEQ_SIG) "\n\t" \ __rseq_str(label) ":\n\t" \ teardown \ "b %l[" __rseq_str(abort_label) "]\n\t" -- 2.11.0

6 years

3
5
0 0

selftests: bpf: test_libbpf.sh failed at file test_l4lb.o

by Naresh Kamboju

selftests: bpf test_libbpf.sh failed running Linux -next kernel 20190618 and 20190619. Here is the log from x86_64, # selftests bpf test_libbpf.sh bpf: test_libbpf.sh_ # # [0] libbpf BTF is required, but is missing or corrupted. libbpf: BTF_is # # test_libbpf failed at file test_l4lb.o failed: at_file # # selftests test_libbpf [FAILED] test_libbpf: [FAILED]_ # [FAIL] 29 selftests bpf test_libbpf.sh selftests: bpf_test_libbpf.sh [FAIL] Full test log, https://qa-reports.linaro.org/lkft/linux-next-oe/build/next-20190619/testru… Test results comparison, https://qa-reports.linaro.org/lkft/linux-next-oe/tests/kselftest/bpf_test_l… Good linux -next tag: next-20190617 Bad linux -next tag: next-20190618 git branch master git commit 1c6b40509daf5190b1fd2c758649f7df1da4827b git repo https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git Best regards Naresh Kamboju

6 years

4
7
0 0

Fwd: BUG: KASAN: global-out-of-bounds in ata_exec_internal_sg+0x50f/0xc70

by Jeffrin Thalakkottoor

---------- Forwarded message --------- From: Jeffrin Thalakkottoor <jeffrin(a)rajagiritech.edu.in> Date: Fri, Jun 14, 2019 at 12:06 AM Subject: BUG: KASAN: global-out-of-bounds in ata_exec_internal_sg+0x50f/0xc70 To: <axboe(a)kernel.dk>, <jejb(a)linux.ibm.com>, <martin.petersen(a)oracle.com> Cc: lkml <linux-kernel(a)vger.kernel.org>, <linux-ide(a)vger.kernel.org>, <linux-scsi(a)vger.kernel.org> hello , [ 55.169278] ================================================================== [ 55.169899] BUG: KASAN: global-out-of-bounds in ata_exec_internal_sg+0x50f/0xc70 [libata] [ 55.170039] Read of size 16 at addr ffffffffc0723500 by task scsi_eh_1/149 [ 55.186354] The buggy address belongs to the variable: [ 55.186972] cdb.48295+0x0/0xfffffffffffeab00 [libata] [ 55.187171] Memory state around the buggy address: [ 55.187290] ffffffffc0723400: 00 00 fa fa fa fa fa fa 00 00 fa fa fa fa fa fa [ 55.187417] ffffffffc0723480: 00 00 00 00 00 00 00 00 05 fa fa fa fa fa fa fa [ 55.187544] >ffffffffc0723500: 00 04 fa fa fa fa fa fa 00 00 fa fa fa fa fa fa [ 55.187686] ^ [ 55.187810] ffffffffc0723580: 00 00 05 fa fa fa fa fa 00 00 00 00 00 00 00 00 [ 55.187940] ffffffffc0723600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 55.188060] ================================================================== output of dmesg with the filename kasan.txt attached -- software engineer rajagiri school of engineering and technology

6 years

1
0
0 0

[PATCH v17 00/15] arm64: untag user pointers passed to the kernel

by Andrey Konovalov

=== Overview arm64 has a feature called Top Byte Ignore, which allows to embed pointer tags into the top byte of each pointer. Userspace programs (such as HWASan, a memory debugging tool [1]) might use this feature and pass tagged user pointers to the kernel through syscalls or other interfaces. Right now the kernel is already able to handle user faults with tagged pointers, due to these patches: 1. 81cddd65 ("arm64: traps: fix userspace cache maintenance emulation on a tagged pointer") 2. 7dcd9dd8 ("arm64: hw_breakpoint: fix watchpoint matching for tagged pointers") 3. 276e9327 ("arm64: entry: improve data abort handling of tagged pointers") This patchset extends tagged pointer support to syscall arguments. As per the proposed ABI change [3], tagged pointers are only allowed to be passed to syscalls when they point to memory ranges obtained by anonymous mmap() or sbrk() (see the patchset [3] for more details). For non-memory syscalls this is done by untaging user pointers when the kernel performs pointer checking to find out whether the pointer comes from userspace (most notably in access_ok). The untagging is done only when the pointer is being checked, the tag is preserved as the pointer makes its way through the kernel and stays tagged when the kernel dereferences the pointer when perfoming user memory accesses. The mmap and mremap (only new_addr) syscalls do not currently accept tagged addresses. Architectures may interpret the tag as a background colour for the corresponding vma. Other memory syscalls (mprotect, etc.) don't do user memory accesses but rather deal with memory ranges, and untagged pointers are better suited to describe memory ranges internally. Thus for memory syscalls we untag pointers completely when they enter the kernel. === Other approaches One of the alternative approaches to untagging that was considered is to completely strip the pointer tag as the pointer enters the kernel with some kind of a syscall wrapper, but that won't work with the countless number of different ioctl calls. With this approach we would need a custom wrapper for each ioctl variation, which doesn't seem practical. An alternative approach to untagging pointers in memory syscalls prologues is to inspead allow tagged pointers to be passed to find_vma() (and other vma related functions) and untag them there. Unfortunately, a lot of find_vma() callers then compare or subtract the returned vma start and end fields against the pointer that was being searched. Thus this approach would still require changing all find_vma() callers. === Testing The following testing approaches has been taken to find potential issues with user pointer untagging: 1. Static testing (with sparse [2] and separately with a custom static analyzer based on Clang) to track casts of __user pointers to integer types to find places where untagging needs to be done. 2. Static testing with grep to find parts of the kernel that call find_vma() (and other similar functions) or directly compare against vm_start/vm_end fields of vma. 3. Static testing with grep to find parts of the kernel that compare user pointers with TASK_SIZE or other similar consts and macros. 4. Dynamic testing: adding BUG_ON(has_tag(addr)) to find_vma() and running a modified syzkaller version that passes tagged pointers to the kernel. Based on the results of the testing the requried patches have been added to the patchset. === Notes This patchset is meant to be merged together with "arm64 relaxed ABI" [3]. This patchset is a prerequisite for ARM's memory tagging hardware feature support [4]. This patchset has been merged into the Pixel 2 & 3 kernel trees and is now being used to enable testing of Pixel phones with HWASan. Thanks! [1] http://clang.llvm.org/docs/HardwareAssistedAddressSanitizerDesign.html [2] https://github.com/lucvoo/sparse-dev/commit/5f960cb10f56ec2017c128ef9d16060… [3] https://lkml.org/lkml/2019/3/18/819 [4] https://community.arm.com/processors/b/blog/posts/arm-a-profile-architectur… === History Changes in v17: - The "uaccess: add noop untagged_addr definition" patch is dropped, as it was merged into upstream named as "uaccess: add noop untagged_addr definition". - Merged "mm, arm64: untag user pointers in do_pages_move" into "mm, arm64: untag user pointers passed to memory syscalls". - Added "arm64: Introduce prctl() options to control the tagged user addresses ABI" patch from Catalin. - Add tags_lib.so to tools/testing/selftests/arm64/.gitignore. - Added a comment clarifying untagged in mremap. - Moved untagging back into mlx4_get_umem_mr() for the IB patch. Changes in v16: - Moved untagging for memory syscalls from arm64 wrappers back to generic code. - Dropped untagging for the following memory syscalls: brk, mmap, munmap; mremap (only dropped for new_address); mmap_pgoff (not used on arm64); remap_file_pages (deprecated); shmat, shmdt (work on shared memory). - Changed kselftest to LD_PRELOAD a shared library that overrides malloc to return tagged pointers. - Rebased onto 5.2-rc3. Changes in v15: - Removed unnecessary untagging from radeon_ttm_tt_set_userptr(). - Removed unnecessary untagging from amdgpu_ttm_tt_set_userptr(). - Moved untagging to validate_range() in userfaultfd code. - Moved untagging to ib_uverbs_(re)reg_mr() from mlx4_get_umem_mr(). - Rebased onto 5.1. Changes in v14: - Moved untagging for most memory syscalls to an arm64 specific implementation, instead of doing that in the common code. - Dropped "net, arm64: untag user pointers in tcp_zerocopy_receive", since the provided user pointers don't come from an anonymous map and thus are not covered by this ABI relaxation. - Dropped "kernel, arm64: untag user pointers in prctl_set_mm*". - Moved untagging from __check_mem_type() to tee_shm_register(). - Updated untagging for the amdgpu and radeon drivers to cover the MMU notifier, as suggested by Felix. - Since this ABI relaxation doesn't actually allow tagged instruction pointers, dropped the following patches: - Dropped "tracing, arm64: untag user pointers in seq_print_user_ip". - Dropped "uprobes, arm64: untag user pointers in find_active_uprobe". - Dropped "bpf, arm64: untag user pointers in stack_map_get_build_id_offset". - Rebased onto 5.1-rc7 (37624b58). Changes in v13: - Simplified untagging in tcp_zerocopy_receive(). - Looked at find_vma() callers in drivers/, which allowed to identify a few other places where untagging is needed. - Added patch "mm, arm64: untag user pointers in get_vaddr_frames". - Added patch "drm/amdgpu, arm64: untag user pointers in amdgpu_ttm_tt_get_user_pages". - Added patch "drm/radeon, arm64: untag user pointers in radeon_ttm_tt_pin_userptr". - Added patch "IB/mlx4, arm64: untag user pointers in mlx4_get_umem_mr". - Added patch "media/v4l2-core, arm64: untag user pointers in videobuf_dma_contig_user_get". - Added patch "tee/optee, arm64: untag user pointers in check_mem_type". - Added patch "vfio/type1, arm64: untag user pointers". Changes in v12: - Changed untagging in tcp_zerocopy_receive() to also untag zc->address. - Fixed untagging in prctl_set_mm* to only untag pointers for vma lookups and validity checks, but leave them as is for actual user space accesses. - Updated the link to the v2 of the "arm64 relaxed ABI" patchset [3]. - Dropped the documentation patch, as the "arm64 relaxed ABI" patchset [3] handles that. Changes in v11: - Added "uprobes, arm64: untag user pointers in find_active_uprobe" patch. - Added "bpf, arm64: untag user pointers in stack_map_get_build_id_offset" patch. - Fixed "tracing, arm64: untag user pointers in seq_print_user_ip" to correctly perform subtration with a tagged addr. - Moved untagged_addr() from SYSCALL_DEFINE3(mprotect) and SYSCALL_DEFINE4(pkey_mprotect) to do_mprotect_pkey(). - Moved untagged_addr() definition for other arches from include/linux/memory.h to include/linux/mm.h. - Changed untagging in strn*_user() to perform userspace accesses through tagged pointers. - Updated the documentation to mention that passing tagged pointers to memory syscalls is allowed. - Updated the test to use malloc'ed memory instead of stack memory. Changes in v10: - Added "mm, arm64: untag user pointers passed to memory syscalls" back. - New patch "fs, arm64: untag user pointers in fs/userfaultfd.c". - New patch "net, arm64: untag user pointers in tcp_zerocopy_receive". - New patch "kernel, arm64: untag user pointers in prctl_set_mm*". - New patch "tracing, arm64: untag user pointers in seq_print_user_ip". Changes in v9: - Rebased onto 4.20-rc6. - Used u64 instead of __u64 in type casts in the untagged_addr macro for arm64. - Added braces around (addr) in the untagged_addr macro for other arches. Changes in v8: - Rebased onto 65102238 (4.20-rc1). - Added a note to the cover letter on why syscall wrappers/shims that untag user pointers won't work. - Added a note to the cover letter that this patchset has been merged into the Pixel 2 kernel tree. - Documentation fixes, in particular added a list of syscalls that don't support tagged user pointers. Changes in v7: - Rebased onto 17b57b18 (4.19-rc6). - Dropped the "arm64: untag user address in __do_user_fault" patch, since the existing patches already handle user faults properly. - Dropped the "usb, arm64: untag user addresses in devio" patch, since the passed pointer must come from a vma and therefore be untagged. - Dropped the "arm64: annotate user pointers casts detected by sparse" patch (see the discussion to the replies of the v6 of this patchset). - Added more context to the cover letter. - Updated Documentation/arm64/tagged-pointers.txt. Changes in v6: - Added annotations for user pointer casts found by sparse. - Rebased onto 050cdc6c (4.19-rc1+). Changes in v5: - Added 3 new patches that add untagging to places found with static analysis. - Rebased onto 44c929e1 (4.18-rc8). Changes in v4: - Added a selftest for checking that passing tagged pointers to the kernel succeeds. - Rebased onto 81e97f013 (4.18-rc1+). Changes in v3: - Rebased onto e5c51f30 (4.17-rc6+). - Added linux-arch@ to the list of recipients. Changes in v2: - Rebased onto 2d618bdf (4.17-rc3+). - Removed excessive untagging in gup.c. - Removed untagging pointers returned from __uaccess_mask_ptr. Changes in v1: - Rebased onto 4.17-rc1. Changes in RFC v2: - Added "#ifndef untagged_addr..." fallback in linux/uaccess.h instead of defining it for each arch individually. - Updated Documentation/arm64/tagged-pointers.txt. - Dropped "mm, arm64: untag user addresses in memory syscalls". - Rebased onto 3eb2ce82 (4.16-rc7). Signed-off-by: Andrey Konovalov <andreyknvl(a)google.com> Andrey Konovalov (14): arm64: untag user pointers in access_ok and __uaccess_mask_ptr lib, arm64: untag user pointers in strn*_user mm, arm64: untag user pointers passed to memory syscalls mm, arm64: untag user pointers in mm/gup.c mm, arm64: untag user pointers in get_vaddr_frames fs, arm64: untag user pointers in copy_mount_options userfaultfd, arm64: untag user pointers drm/amdgpu, arm64: untag user pointers drm/radeon, arm64: untag user pointers in radeon_gem_userptr_ioctl IB/mlx4, arm64: untag user pointers in mlx4_get_umem_mr media/v4l2-core, arm64: untag user pointers in videobuf_dma_contig_user_get tee/shm, arm64: untag user pointers in tee_shm_register vfio/type1, arm64: untag user pointers in vaddr_get_pfn selftests, arm64: add a selftest for passing tagged pointers to kernel Catalin Marinas (1): arm64: Introduce prctl() options to control the tagged user addresses ABI arch/arm64/include/asm/processor.h | 6 ++ arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/include/asm/uaccess.h | 11 ++- arch/arm64/kernel/process.c | 67 +++++++++++++++++++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 + drivers/gpu/drm/radeon/radeon_gem.c | 2 + drivers/infiniband/hw/mlx4/mr.c | 7 +- drivers/media/v4l2-core/videobuf-dma-contig.c | 9 +-- drivers/tee/tee_shm.c | 1 + drivers/vfio/vfio_iommu_type1.c | 2 + fs/namespace.c | 2 +- fs/userfaultfd.c | 22 +++--- include/uapi/linux/prctl.h | 5 ++ kernel/sys.c | 16 +++++ lib/strncpy_from_user.c | 3 +- lib/strnlen_user.c | 3 +- mm/frame_vector.c | 2 + mm/gup.c | 4 ++ mm/madvise.c | 2 + mm/mempolicy.c | 3 + mm/migrate.c | 2 +- mm/mincore.c | 2 + mm/mlock.c | 4 ++ mm/mprotect.c | 2 + mm/mremap.c | 7 ++ mm/msync.c | 2 + tools/testing/selftests/arm64/.gitignore | 2 + tools/testing/selftests/arm64/Makefile | 22 ++++++ .../testing/selftests/arm64/run_tags_test.sh | 12 ++++ tools/testing/selftests/arm64/tags_lib.c | 62 +++++++++++++++++ tools/testing/selftests/arm64/tags_test.c | 18 +++++ 32 files changed, 282 insertions(+), 25 deletions(-) create mode 100644 tools/testing/selftests/arm64/.gitignore create mode 100644 tools/testing/selftests/arm64/Makefile create mode 100755 tools/testing/selftests/arm64/run_tags_test.sh create mode 100644 tools/testing/selftests/arm64/tags_lib.c create mode 100644 tools/testing/selftests/arm64/tags_test.c -- 2.22.0.rc2.383.gf4fbbf30c2-goog

6 years

10
75
0 0

[PATCH 4.19 47/90] kselftest/cgroup: fix incorrect test_core skip

by Greg Kroah-Hartman

[ Upstream commit f97f3f8839eb9de5843066d80819884f7722c8c5 ] The test_core will skip the test_cgcore_no_internal_process_constraint_on_threads test case if the 'cpu' controller missing in root's subtree_control. In fact we need to set the 'cpu' in subtree_control, to make the testing meaningful. ./test_core ... ok 4 # skip test_cgcore_no_internal_process_constraint_on_threads ... Signed-off-by: Alex Shi <alex.shi(a)linux.alibaba.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Tejun Heo <tj(a)kernel.org> Cc: Roman Gushchin <guro(a)fb.com> Cc: Claudio Zumbo <claudioz(a)fb.com> Cc: Claudio <claudiozumbo(a)gmail.com> Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Reviewed-by: Roman Gushchin <guro(a)fb.com> Acked-by: Tejun Heo <tj(a)kernel.org> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/cgroup/test_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index d78f1c5366d3..79053a4f4783 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -198,7 +198,7 @@ static int test_cgcore_no_internal_process_constraint_on_threads(const char *roo char *parent = NULL, *child = NULL; if (cg_read_strstr(root, "cgroup.controllers", "cpu") || - cg_read_strstr(root, "cgroup.subtree_control", "cpu")) { + cg_write(root, "cgroup.subtree_control", "+cpu")) { ret = KSFT_SKIP; goto cleanup; } -- 2.20.1

6 years

1
0
0 0

[PATCH 4.19 46/90] kselftest/cgroup: fix unexpected testing failure on test_core

by Greg Kroah-Hartman

[ Upstream commit 00e38a5d753d7788852f81703db804a60a84c26e ] The cgroup testing relys on the root cgroup's subtree_control setting, If the 'memory' controller isn't set, some test cases will be failed as following: $sudo ./test_core not ok 1 test_cgcore_internal_process_constraint ok 2 test_cgcore_top_down_constraint_enable not ok 3 test_cgcore_top_down_constraint_disable ... To correct this unexpected failure, this patch write the 'memory' to subtree_control of root to get a right result. Signed-off-by: Alex Shi <alex.shi(a)linux.alibaba.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Tejun Heo <tj(a)kernel.org> Cc: Roman Gushchin <guro(a)fb.com> Cc: Claudio Zumbo <claudioz(a)fb.com> Cc: Claudio <claudiozumbo(a)gmail.com> Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Reviewed-by: Roman Gushchin <guro(a)fb.com> Acked-by: Tejun Heo <tj(a)kernel.org> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/cgroup/test_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index be59f9c34ea2..d78f1c5366d3 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -376,6 +376,11 @@ int main(int argc, char *argv[]) if (cg_find_unified_root(root, sizeof(root))) ksft_exit_skip("cgroup v2 isn't mounted\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) + if (cg_write(root, "cgroup.subtree_control", "+memory")) + ksft_exit_skip("Failed to set memory controller\n"); + for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { case KSFT_PASS: -- 2.20.1

6 years

1
0
0 0

[PATCH 4.19 45/90] kselftest/cgroup: fix unexpected testing failure on test_memcontrol

by Greg Kroah-Hartman

[ Upstream commit f6131f28057d4fd8922599339e701a2504e0f23d ] The cgroup testing relies on the root cgroup's subtree_control setting, If the 'memory' controller isn't set, all test cases will be failed as following: $ sudo ./test_memcontrol not ok 1 test_memcg_subtree_control not ok 2 test_memcg_current ok 3 # skip test_memcg_min not ok 4 test_memcg_low not ok 5 test_memcg_high not ok 6 test_memcg_max not ok 7 test_memcg_oom_events ok 8 # skip test_memcg_swap_max not ok 9 test_memcg_sock not ok 10 test_memcg_oom_group_leaf_events not ok 11 test_memcg_oom_group_parent_events not ok 12 test_memcg_oom_group_score_events To correct this unexpected failure, this patch write the 'memory' to subtree_control of root to get a right result. Signed-off-by: Alex Shi <alex.shi(a)linux.alibaba.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Roman Gushchin <guro(a)fb.com> Cc: Tejun Heo <tj(a)kernel.org> Cc: Mike Rapoport <rppt(a)linux.vnet.ibm.com> Cc: Jay Kamat <jgkamat(a)fb.com> Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Reviewed-by: Roman Gushchin <guro(a)fb.com> Acked-by: Tejun Heo <tj(a)kernel.org> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/cgroup/test_memcontrol.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 6f339882a6ca..c19a97dd02d4 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -1205,6 +1205,10 @@ int main(int argc, char **argv) if (cg_read_strstr(root, "cgroup.controllers", "memory")) ksft_exit_skip("memory controller isn't available\n"); + if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) + if (cg_write(root, "cgroup.subtree_control", "+memory")) + ksft_exit_skip("Failed to set memory controller\n"); + for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { case KSFT_PASS: -- 2.20.1

6 years

1
0
0 0

[PATCH 5.1 059/121] kselftest/cgroup: fix incorrect test_core skip

by Greg Kroah-Hartman

[ Upstream commit f97f3f8839eb9de5843066d80819884f7722c8c5 ] The test_core will skip the test_cgcore_no_internal_process_constraint_on_threads test case if the 'cpu' controller missing in root's subtree_control. In fact we need to set the 'cpu' in subtree_control, to make the testing meaningful. ./test_core ... ok 4 # skip test_cgcore_no_internal_process_constraint_on_threads ... Signed-off-by: Alex Shi <alex.shi(a)linux.alibaba.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Tejun Heo <tj(a)kernel.org> Cc: Roman Gushchin <guro(a)fb.com> Cc: Claudio Zumbo <claudioz(a)fb.com> Cc: Claudio <claudiozumbo(a)gmail.com> Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Reviewed-by: Roman Gushchin <guro(a)fb.com> Acked-by: Tejun Heo <tj(a)kernel.org> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/cgroup/test_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index d78f1c5366d3..79053a4f4783 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -198,7 +198,7 @@ static int test_cgcore_no_internal_process_constraint_on_threads(const char *roo char *parent = NULL, *child = NULL; if (cg_read_strstr(root, "cgroup.controllers", "cpu") || - cg_read_strstr(root, "cgroup.subtree_control", "cpu")) { + cg_write(root, "cgroup.subtree_control", "+cpu")) { ret = KSFT_SKIP; goto cleanup; } -- 2.20.1

6 years

1
0
0 0

[PATCH 5.1 058/121] kselftest/cgroup: fix unexpected testing failure on test_core

by Greg Kroah-Hartman

[ Upstream commit 00e38a5d753d7788852f81703db804a60a84c26e ] The cgroup testing relys on the root cgroup's subtree_control setting, If the 'memory' controller isn't set, some test cases will be failed as following: $sudo ./test_core not ok 1 test_cgcore_internal_process_constraint ok 2 test_cgcore_top_down_constraint_enable not ok 3 test_cgcore_top_down_constraint_disable ... To correct this unexpected failure, this patch write the 'memory' to subtree_control of root to get a right result. Signed-off-by: Alex Shi <alex.shi(a)linux.alibaba.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Tejun Heo <tj(a)kernel.org> Cc: Roman Gushchin <guro(a)fb.com> Cc: Claudio Zumbo <claudioz(a)fb.com> Cc: Claudio <claudiozumbo(a)gmail.com> Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Reviewed-by: Roman Gushchin <guro(a)fb.com> Acked-by: Tejun Heo <tj(a)kernel.org> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/cgroup/test_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index be59f9c34ea2..d78f1c5366d3 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -376,6 +376,11 @@ int main(int argc, char *argv[]) if (cg_find_unified_root(root, sizeof(root))) ksft_exit_skip("cgroup v2 isn't mounted\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) + if (cg_write(root, "cgroup.subtree_control", "+memory")) + ksft_exit_skip("Failed to set memory controller\n"); + for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { case KSFT_PASS: -- 2.20.1

6 years

1
0
0 0

[PATCH 5.1 057/121] kselftest/cgroup: fix unexpected testing failure on test_memcontrol

by Greg Kroah-Hartman

[ Upstream commit f6131f28057d4fd8922599339e701a2504e0f23d ] The cgroup testing relies on the root cgroup's subtree_control setting, If the 'memory' controller isn't set, all test cases will be failed as following: $ sudo ./test_memcontrol not ok 1 test_memcg_subtree_control not ok 2 test_memcg_current ok 3 # skip test_memcg_min not ok 4 test_memcg_low not ok 5 test_memcg_high not ok 6 test_memcg_max not ok 7 test_memcg_oom_events ok 8 # skip test_memcg_swap_max not ok 9 test_memcg_sock not ok 10 test_memcg_oom_group_leaf_events not ok 11 test_memcg_oom_group_parent_events not ok 12 test_memcg_oom_group_score_events To correct this unexpected failure, this patch write the 'memory' to subtree_control of root to get a right result. Signed-off-by: Alex Shi <alex.shi(a)linux.alibaba.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Roman Gushchin <guro(a)fb.com> Cc: Tejun Heo <tj(a)kernel.org> Cc: Mike Rapoport <rppt(a)linux.vnet.ibm.com> Cc: Jay Kamat <jgkamat(a)fb.com> Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Reviewed-by: Roman Gushchin <guro(a)fb.com> Acked-by: Tejun Heo <tj(a)kernel.org> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/cgroup/test_memcontrol.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 6f339882a6ca..c19a97dd02d4 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -1205,6 +1205,10 @@ int main(int argc, char **argv) if (cg_read_strstr(root, "cgroup.controllers", "memory")) ksft_exit_skip("memory controller isn't available\n"); + if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) + if (cg_write(root, "cgroup.subtree_control", "+memory")) + ksft_exit_skip("Failed to set memory controller\n"); + for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { case KSFT_PASS: -- 2.20.1

6 years

1
0
0 0

[PATCH] selftests/tpm2: Install run-time Python modules

by Daniel Díaz

When ordinarily running the tests, upon `make install', the following error is encountered: ImportError: No module named tpm2_tests because the Python files are not installed at the moment. Fix this by adding both Python modules as accompanying TEST_FILES in the Makefile. Signed-off-by: Daniel Díaz <daniel.diaz(a)linaro.org> --- tools/testing/selftests/tpm2/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/tpm2/Makefile b/tools/testing/selftests/tpm2/Makefile index 9dd848427a7b..bf401f725eef 100644 --- a/tools/testing/selftests/tpm2/Makefile +++ b/tools/testing/selftests/tpm2/Makefile @@ -2,3 +2,4 @@ include ../lib.mk TEST_PROGS := test_smoke.sh test_space.sh +TEST_FILES := tpm2.py tpm2_tests.py -- 2.20.1

6 years

2
1
0 0

[PATCH v2.5 0/3] firmware: Add support for loading compressed files

by Takashi Iwai

[resubmitted with the missing patch] Hi, here are the rest and the main part of patches to add the support for loading the compressed firmware files. The patch was slightly refactored for more easily enhancing for other compression formats (if anyone wants). Also the selftest patch is included. The functionality doesn't change from the previous patchset. thanks, Takashi === Takashi Iwai (3): firmware: Factor out the paged buffer handling code firmware: Add support for loading compressed files selftests: firmware: Add compressed firmware tests drivers/base/firmware_loader/Kconfig | 18 ++ drivers/base/firmware_loader/fallback.c | 61 +------ drivers/base/firmware_loader/firmware.h | 12 +- drivers/base/firmware_loader/main.c | 199 +++++++++++++++++++++- tools/testing/selftests/firmware/fw_filesystem.sh | 73 ++++++-- tools/testing/selftests/firmware/fw_lib.sh | 7 + tools/testing/selftests/firmware/fw_run_tests.sh | 1 + 7 files changed, 295 insertions(+), 76 deletions(-) -- 2.16.4

6 years

3
8
0 0

[PATCH net-next] selftests/net: make udpgso_bench skip unsupported testcases

by Willem de Bruijn

From: Willem de Bruijn <willemb(a)google.com> Kselftest can be run against older kernels. Instead of failing hard when a feature is unsupported, return the KSFT_SKIP exit code. Specifically, do not fail hard on missing udp zerocopy. The udp gso bench test runs multiple test cases from a single script. Fail if any case fails, else return skip if any test is skipped. Link: https://lore.kernel.org/lkml/20190618171516.GA17547@kroah.com/ Signed-off-by: Willem de Bruijn <willemb(a)google.com> --- tools/testing/selftests/net/udpgso_bench.sh | 75 +++++++++++-------- tools/testing/selftests/net/udpgso_bench_tx.c | 18 ++++- 2 files changed, 59 insertions(+), 34 deletions(-) diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 4df1cd8d69d2..80b5d352702e 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -3,9 +3,47 @@ # # Run a series of udpgso benchmarks -GREEN='\033[0;92m' -RED='\033[0;31m' -NC='\033[0m' # No Color +readonly GREEN='\033[0;92m' +readonly YELLOW='\033[0;33m' +readonly RED='\033[0;31m' +readonly NC='\033[0m' # No Color + +readonly KSFT_PASS=0 +readonly KSFT_FAIL=1 +readonly KSFT_SKIP=4 + +num_pass=0 +num_err=0 +num_skip=0 + +kselftest_test_exitcode() { + local -r exitcode=$1 + + if [[ ${exitcode} -eq ${KSFT_PASS} ]]; then + num_pass=$(( $num_pass + 1 )) + elif [[ ${exitcode} -eq ${KSFT_SKIP} ]]; then + num_skip=$(( $num_skip + 1 )) + else + num_err=$(( $num_err + 1 )) + fi +} + +kselftest_exit() { + echo -e "$(basename $0): PASS=${num_pass} SKIP=${num_skip} FAIL=${num_err}" + + if [[ $num_err -ne 0 ]]; then + echo -e "$(basename $0): ${RED}FAIL${NC}" + exit ${KSFT_FAIL} + fi + + if [[ $num_skip -ne 0 ]]; then + echo -e "$(basename $0): ${YELLOW}SKIP${NC}" + exit ${KSFT_SKIP} + fi + + echo -e "$(basename $0): ${GREEN}PASS${NC}" + exit ${KSFT_PASS} +} wake_children() { local -r jobs="$(jobs -p)" @@ -29,93 +67,66 @@ run_in_netns() { local -r args=$@ ./in_netns.sh $0 __subprocess ${args} + kselftest_test_exitcode $? } run_udp() { local -r args=$@ - local errors=0 echo "udp" run_in_netns ${args} - errors=$(( $errors + $? )) echo "udp gso" run_in_netns ${args} -S 0 - errors=$(( $errors + $? )) echo "udp gso zerocopy" run_in_netns ${args} -S 0 -z - errors=$(( $errors + $? )) echo "udp gso timestamp" run_in_netns ${args} -S 0 -T - errors=$(( $errors + $? )) echo "udp gso zerocopy audit" run_in_netns ${args} -S 0 -z -a - errors=$(( $errors + $? )) echo "udp gso timestamp audit" run_in_netns ${args} -S 0 -T -a - errors=$(( $errors + $? )) echo "udp gso zerocopy timestamp audit" run_in_netns ${args} -S 0 -T -z -a - errors=$(( $errors + $? )) - - return $errors } run_tcp() { local -r args=$@ - local errors=0 echo "tcp" run_in_netns ${args} -t - errors=$(( $errors + $? )) echo "tcp zerocopy" run_in_netns ${args} -t -z - errors=$(( $errors + $? )) # excluding for now because test fails intermittently # add -P option to include poll() to reduce possibility of lost messages #echo "tcp zerocopy audit" #run_in_netns ${args} -t -z -P -a - #errors=$(( $errors + $? )) - - return $errors } run_all() { local -r core_args="-l 3" local -r ipv4_args="${core_args} -4 -D 127.0.0.1" local -r ipv6_args="${core_args} -6 -D ::1" - local errors=0 echo "ipv4" run_tcp "${ipv4_args}" - errors=$(( $errors + $? )) run_udp "${ipv4_args}" - errors=$(( $errors + $? )) echo "ipv6" run_tcp "${ipv4_args}" - errors=$(( $errors + $? )) run_udp "${ipv6_args}" - errors=$(( $errors + $? )) - - return $errors } if [[ $# -eq 0 ]]; then run_all - if [ $? -ne 0 ]; then - echo -e "$(basename $0): ${RED}FAIL${NC}" - exit 1 - fi - - echo -e "$(basename $0): ${GREEN}PASS${NC}" + kselftest_exit elif [[ $1 == "__subprocess" ]]; then shift run_one $@ diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index dfa83ad57206..ada99496634a 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -25,6 +25,8 @@ #include <sys/types.h> #include <unistd.h> +#include "../kselftest.h" + #ifndef ETH_MAX_MTU #define ETH_MAX_MTU 0xFFFFU #endif @@ -45,6 +47,10 @@ #define MSG_ZEROCOPY 0x4000000 #endif +#ifndef ENOTSUPP +#define ENOTSUPP 524 +#endif + #define NUM_PKT 100 static bool cfg_cache_trash; @@ -603,7 +609,7 @@ int main(int argc, char **argv) { unsigned long num_msgs, num_sends; unsigned long tnow, treport, tstop; - int fd, i, val; + int fd, i, val, ret; parse_opts(argc, argv); @@ -623,8 +629,16 @@ int main(int argc, char **argv) if (cfg_zerocopy) { val = 1; - if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) + + ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, + &val, sizeof(val)); + if (ret) { + if (errno == ENOPROTOOPT || errno == ENOTSUPP) { + fprintf(stderr, "SO_ZEROCOPY not supported"); + exit(KSFT_SKIP); + } error(1, errno, "setsockopt zerocopy"); + } } if (cfg_connected && -- 2.22.0.410.gd8fdbe21b5-goog

6 years

2
1
0 0

4.19: udpgso_bench_tx: setsockopt zerocopy: Unknown error 524

by Naresh Kamboju

selftests: net: udpgso_bench.sh failed on 4.19, 4.14, 4.9 and 4.4 branches. PASS on stable branch 5.1, mainline and next. This failure is started happening on 4.19 and older kernel branches after kselftest upgrade to version 5.1 Is there any possibilities to backport ? Error: udpgso_bench_tx: setsockopt zerocopy: Unknown error 524 Test output: ----------------- selftests: net: udpgso_bench.sh ipv4 tcp tcp rx: 469 MB/s 7930 calls/s tcp tx: 469 MB/s 7961 calls/s 7961 msg/s tcp rx: 470 MB/s 7941 calls/s tcp tx: 470 MB/s 7977 calls/s 7977 msg/s tcp rx: 470 MB/s 7933 calls/s tcp tx: 470 MB/s 7975 calls/s 7975 msg/s tcp zerocopy tcp tx: 357 MB/s 6064 calls/s 6064 msg/s tcp rx: 357 MB/s 6052 calls/s tcp tx: 352 MB/s 5981 calls/s 5981 msg/s tcp rx: 352 MB/s 5979 calls/s tcp tx: 350 MB/s 5937 calls/s 5937 msg/s tcp rx: 350 MB/s 5938 calls/s udp udp rx: 23 MB/s 16505 calls/s udp tx: 23 MB/s 16464 calls/s 392 msg/s udp rx: 23 MB/s 16500 calls/s udp tx: 23 MB/s 16506 calls/s 393 msg/s udp rx: 23 MB/s 16396 calls/s udp gso udp rx: 536 MB/s 9097 calls/s udp tx: 545 MB/s 9246 calls/s 9246 msg/s udp rx: 545 MB/s 9256 calls/s udp tx: 545 MB/s 9256 calls/s 9256 msg/s udp rx: 545 MB/s 9259 calls/s udp tx: 545 MB/s 9258 calls/s 9258 msg/s udp rx: 545 MB/s 9252 calls/s udp gso zerocopy ./udpgso_bench_tx: setsockopt zerocopy: Unknown error 524 ipv6 tcp tcp tx: 470 MB/s 7979 calls/s 7979 msg/s tcp rx: 470 MB/s 7947 calls/s tcp rx: 471 MB/s 7979 calls/s tcp tx: 514 MB/s 8721 calls/s 8721 msg/s tcp zerocopy tcp tx: 392 MB/s 6658 calls/s 6658 msg/s tcp rx: 392 MB/s 6399 calls/s tcp rx: 350 MB/s 5936 calls/s tcp tx: 350 MB/s 5945 calls/s 5945 msg/s tcp rx: 350 MB/s 5937 calls/s tcp tx: 350 MB/s 5940 calls/s 5940 msg/s udp udp rx: 20 MB/s 14802 calls/s udp tx: 20 MB/s 14921 calls/s 347 msg/s udp rx: 24 MB/s 17797 calls/s udp tx: 24 MB/s 17802 calls/s 414 msg/s udp rx: 17 MB/s 12453 calls/s udp tx: 17 MB/s 12470 calls/s 290 msg/s udp rx: 17 MB/s 12409 calls/s udp tx: 545 MB/s 9257 calls/s 9257 msg/s udp rx: 545 MB/s 9249 calls/s udp tx: 545 MB/s 9248 calls/s 9248 msg/s udp rx: 545 MB/s 9254 calls/s udp tx: 545 MB/s 9254 calls/s 9254 msg/s udp rx: 545 MB/s 9260 calls/s udp gso zerocopy ./udpgso_bench_tx: setsockopt zerocopy: Unknown error 524 not ok 1.. selftests: net: udpgso_bench.sh [FAIL] selftests: net_udpgso_bench.sh [FAIL] Best regards Naresh Kamboju

6 years

4
12
0 0

[PATCH v1 06/22] docs: blockdev: convert to ReST

by Mauro Carvalho Chehab

Rename the blockdev documentation files to ReST, add an index for them and adjust in order to produce a nice html output via the Sphinx build system. The drbd sub-directory contains some graphs and data flows. Add those too to the documentation. At its new index.rst, let's add a :orphan: while this is not linked to the main index.rst file, in order to avoid build warnings. Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung(a)kernel.org> --- .../admin-guide/kernel-parameters.txt | 18 +- ...structure-v9.txt => data-structure-v9.rst} | 6 +- Documentation/blockdev/drbd/figures.rst | 28 +++ .../blockdev/drbd/{README.txt => index.rst} | 15 +- .../blockdev/{floppy.txt => floppy.rst} | 88 ++++---- Documentation/blockdev/index.rst | 16 ++ Documentation/blockdev/{nbd.txt => nbd.rst} | 2 +- .../blockdev/{paride.txt => paride.rst} | 194 +++++++++-------- .../blockdev/{ramdisk.txt => ramdisk.rst} | 55 ++--- Documentation/blockdev/{zram.txt => zram.rst} | 195 ++++++++++++------ MAINTAINERS | 8 +- drivers/block/Kconfig | 8 +- drivers/block/floppy.c | 2 +- drivers/block/zram/Kconfig | 6 +- tools/testing/selftests/zram/README | 2 +- 15 files changed, 398 insertions(+), 245 deletions(-) rename Documentation/blockdev/drbd/{data-structure-v9.txt => data-structure-v9.rst} (94%) create mode 100644 Documentation/blockdev/drbd/figures.rst rename Documentation/blockdev/drbd/{README.txt => index.rst} (55%) rename Documentation/blockdev/{floppy.txt => floppy.rst} (81%) create mode 100644 Documentation/blockdev/index.rst rename Documentation/blockdev/{nbd.txt => nbd.rst} (96%) rename Documentation/blockdev/{paride.txt => paride.rst} (81%) rename Documentation/blockdev/{ramdisk.txt => ramdisk.rst} (84%) rename Documentation/blockdev/{zram.txt => zram.rst} (76%) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 873062810484..20780fbc948d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1249,7 +1249,7 @@ See also Documentation/fault-injection/. floppy= [HW] - See Documentation/blockdev/floppy.txt. + See Documentation/blockdev/floppy.rst. force_pal_cache_flush [IA-64] Avoid check_sal_cache_flush which may hang on @@ -2238,7 +2238,7 @@ memblock=debug [KNL] Enable memblock debug messages. load_ramdisk= [RAM] List of ramdisks to load from floppy - See Documentation/blockdev/ramdisk.txt. + See Documentation/blockdev/ramdisk.rst. lockd.nlm_grace_period=P [NFS] Assign grace period. Format: <integer> @@ -3283,7 +3283,7 @@ pcd. [PARIDE] See header of drivers/block/paride/pcd.c. - See also Documentation/blockdev/paride.txt. + See also Documentation/blockdev/paride.rst. pci=option[,option...] [PCI] various PCI subsystem options. @@ -3527,7 +3527,7 @@ needed on a platform with proper driver support. pd. [PARIDE] - See Documentation/blockdev/paride.txt. + See Documentation/blockdev/paride.rst. pdcchassis= [PARISC,HW] Disable/Enable PDC Chassis Status codes at boot time. @@ -3542,10 +3542,10 @@ and performance comparison. pf. [PARIDE] - See Documentation/blockdev/paride.txt. + See Documentation/blockdev/paride.rst. pg. [PARIDE] - See Documentation/blockdev/paride.txt. + See Documentation/blockdev/paride.rst. pirq= [SMP,APIC] Manual mp-table setup See Documentation/x86/i386/IO-APIC.rst. @@ -3657,7 +3657,7 @@ prompt_ramdisk= [RAM] List of RAM disks to prompt for floppy disk before loading. - See Documentation/blockdev/ramdisk.txt. + See Documentation/blockdev/ramdisk.rst. psi= [KNL] Enable or disable pressure stall information tracking. @@ -3679,7 +3679,7 @@ pstore.backend= Specify the name of the pstore backend to use pt. [PARIDE] - See Documentation/blockdev/paride.txt. + See Documentation/blockdev/paride.rst. pti= [X86_64] Control Page Table Isolation of user and kernel address spaces. Disabling this feature @@ -3708,7 +3708,7 @@ See Documentation/admin-guide/md.rst. ramdisk_size= [RAM] Sizes of RAM disks in kilobytes - See Documentation/blockdev/ramdisk.txt. + See Documentation/blockdev/ramdisk.rst. random.trust_cpu={on,off} [KNL] Enable or disable trusting the use of the diff --git a/Documentation/blockdev/drbd/data-structure-v9.txt b/Documentation/blockdev/drbd/data-structure-v9.rst similarity index 94% rename from Documentation/blockdev/drbd/data-structure-v9.txt rename to Documentation/blockdev/drbd/data-structure-v9.rst index 1e52a0e32624..66036b901644 100644 --- a/Documentation/blockdev/drbd/data-structure-v9.txt +++ b/Documentation/blockdev/drbd/data-structure-v9.rst @@ -1,3 +1,7 @@ +================================ +kernel data structure for DRBD-9 +================================ + This describes the in kernel data structure for DRBD-9. Starting with Linux v3.14 we are reorganizing DRBD to use this data structure. @@ -10,7 +14,7 @@ device is represented by a block device locally. The DRBD objects are interconnected to form a matrix as depicted below; a drbd_peer_device object sits at each intersection between a drbd_device and a -drbd_connection: +drbd_connection:: /--------------+---------------+.....+---------------\ | resource | device | | device | diff --git a/Documentation/blockdev/drbd/figures.rst b/Documentation/blockdev/drbd/figures.rst new file mode 100644 index 000000000000..3e3fd4b8a478 --- /dev/null +++ b/Documentation/blockdev/drbd/figures.rst @@ -0,0 +1,28 @@ +.. The here included files are intended to help understand the implementation + +Data flows that Relate some functions, and write packets +======================================================== + +.. kernel-figure:: DRBD-8.3-data-packets.svg + :alt: DRBD-8.3-data-packets.svg + :align: center + +.. kernel-figure:: DRBD-data-packets.svg + :alt: DRBD-data-packets.svg + :align: center + + +Sub graphs of DRBD's state transitions +====================================== + +.. kernel-figure:: conn-states-8.dot + :alt: conn-states-8.dot + :align: center + +.. kernel-figure:: disk-states-8.dot + :alt: disk-states-8.dot + :align: center + +.. kernel-figure:: node-states-8.dot + :alt: node-states-8.dot + :align: center diff --git a/Documentation/blockdev/drbd/README.txt b/Documentation/blockdev/drbd/index.rst similarity index 55% rename from Documentation/blockdev/drbd/README.txt rename to Documentation/blockdev/drbd/index.rst index 627b0a1bf35e..68ecd5c113e9 100644 --- a/Documentation/blockdev/drbd/README.txt +++ b/Documentation/blockdev/drbd/index.rst @@ -1,4 +1,9 @@ +========================================== +Distributed Replicated Block Device - DRBD +========================================== + Description +=========== DRBD is a shared-nothing, synchronously replicated block device. It is designed to serve as a building block for high availability @@ -7,10 +12,8 @@ Description Please visit http://www.drbd.org to find out more. -The here included files are intended to help understand the implementation +.. toctree:: + :maxdepth: 1 -DRBD-8.3-data-packets.svg, DRBD-data-packets.svg - relates some functions, and write packets. - -conn-states-8.dot, disk-states-8.dot, node-states-8.dot - The sub graphs of DRBD's state transitions + data-structure-v9 + figures diff --git a/Documentation/blockdev/floppy.txt b/Documentation/blockdev/floppy.rst similarity index 81% rename from Documentation/blockdev/floppy.txt rename to Documentation/blockdev/floppy.rst index e2240f5ab64d..4a8f31cf4139 100644 --- a/Documentation/blockdev/floppy.txt +++ b/Documentation/blockdev/floppy.rst @@ -1,35 +1,37 @@ -This file describes the floppy driver. +============= +Floppy Driver +============= FAQ list: ========= - A FAQ list may be found in the fdutils package (see below), and also +A FAQ list may be found in the fdutils package (see below), and also at <http://fdutils.linux.lu/faq.html>. LILO configuration options (Thinkpad users, read this) ====================================================== - The floppy driver is configured using the 'floppy=' option in +The floppy driver is configured using the 'floppy=' option in lilo. This option can be typed at the boot prompt, or entered in the lilo configuration file. - Example: If your kernel is called linux-2.6.9, type the following line -at the lilo boot prompt (if you have a thinkpad): +Example: If your kernel is called linux-2.6.9, type the following line +at the lilo boot prompt (if you have a thinkpad):: linux-2.6.9 floppy=thinkpad You may also enter the following line in /etc/lilo.conf, in the description -of linux-2.6.9: +of linux-2.6.9:: append = "floppy=thinkpad" - Several floppy related options may be given, example: +Several floppy related options may be given, example:: linux-2.6.9 floppy=daring floppy=two_fdc append = "floppy=daring floppy=two_fdc" - If you give options both in the lilo config file and on the boot +If you give options both in the lilo config file and on the boot prompt, the option strings of both places are concatenated, the boot prompt options coming last. That's why there are also options to restore the default behavior. @@ -38,21 +40,23 @@ restore the default behavior. Module configuration options ============================ - If you use the floppy driver as a module, use the following syntax: -modprobe floppy floppy="<options>" +If you use the floppy driver as a module, use the following syntax:: -Example: - modprobe floppy floppy="omnibook messages" + modprobe floppy floppy="<options>" - If you need certain options enabled every time you load the floppy driver, -you can put: +Example:: - options floppy floppy="omnibook messages" + modprobe floppy floppy="omnibook messages" + +If you need certain options enabled every time you load the floppy driver, +you can put:: + + options floppy floppy="omnibook messages" in a configuration file in /etc/modprobe.d/. - The floppy driver related options are: +The floppy driver related options are: floppy=asus_pci Sets the bit mask to allow only units 0 and 1. (default) @@ -70,8 +74,7 @@ in a configuration file in /etc/modprobe.d/. Tells the floppy driver that you have only one floppy controller. (default) - floppy=two_fdc - floppy=<address>,two_fdc + floppy=two_fdc / floppy=<address>,two_fdc Tells the floppy driver that you have two floppy controllers. The second floppy controller is assumed to be at <address>. This option is not needed if the second controller is at address @@ -84,8 +87,7 @@ in a configuration file in /etc/modprobe.d/. floppy=0,thinkpad Tells the floppy driver that you don't have a Thinkpad. - floppy=omnibook - floppy=nodma + floppy=omnibook / floppy=nodma Tells the floppy driver not to use Dma for data transfers. This is needed on HP Omnibooks, which don't have a workable DMA channel for the floppy driver. This option is also useful @@ -144,14 +146,16 @@ in a configuration file in /etc/modprobe.d/. described in the physical CMOS), or if your BIOS uses non-standard CMOS types. The CMOS types are: - 0 - Use the value of the physical CMOS - 1 - 5 1/4 DD - 2 - 5 1/4 HD - 3 - 3 1/2 DD - 4 - 3 1/2 HD - 5 - 3 1/2 ED - 6 - 3 1/2 ED - 16 - unknown or not installed + == ================================== + 0 Use the value of the physical CMOS + 1 5 1/4 DD + 2 5 1/4 HD + 3 3 1/2 DD + 4 3 1/2 HD + 5 3 1/2 ED + 6 3 1/2 ED + 16 unknown or not installed + == ================================== (Note: there are two valid types for ED drives. This is because 5 was initially chosen to represent floppy *tapes*, and 6 for ED drives. @@ -162,8 +166,7 @@ in a configuration file in /etc/modprobe.d/. Print a warning message when an unexpected interrupt is received. (default) - floppy=no_unexpected_interrupts - floppy=L40SX + floppy=no_unexpected_interrupts / floppy=L40SX Don't print a message when an unexpected interrupt is received. This is needed on IBM L40SX laptops in certain video modes. (There seems to be an interaction between video and floppy. The unexpected @@ -199,47 +202,54 @@ in a configuration file in /etc/modprobe.d/. Sets the floppy DMA channel to <nr> instead of 2. floppy=slow - Use PS/2 stepping rate: - " PS/2 floppies have much slower step rates than regular floppies. + Use PS/2 stepping rate:: + + PS/2 floppies have much slower step rates than regular floppies. It's been recommended that take about 1/4 of the default speed - in some more extreme cases." + in some more extreme cases. Supporting utilities and additional documentation: ================================================== - Additional parameters of the floppy driver can be configured at +Additional parameters of the floppy driver can be configured at runtime. Utilities which do this can be found in the fdutils package. This package also contains a new version of mtools which allows to access high capacity disks (up to 1992K on a high density 3 1/2 disk!). It also contains additional documentation about the floppy driver. The latest version can be found at fdutils homepage: + http://fdutils.linux.lu The fdutils releases can be found at: + http://fdutils.linux.lu/download.html + http://www.tux.org/pub/knaff/fdutils/ + ftp://metalab.unc.edu/pub/Linux/utils/disk-management/ Reporting problems about the floppy driver ========================================== - If you have a question or a bug report about the floppy driver, mail +If you have a question or a bug report about the floppy driver, mail me at Alain.Knaff(a)poboxes.com . If you post to Usenet, preferably use comp.os.linux.hardware. As the volume in these groups is rather high, be sure to include the word "floppy" (or "FLOPPY") in the subject line. If the reported problem happens when mounting floppy disks, be sure to mention also the type of the filesystem in the subject line. - Be sure to read the FAQ before mailing/posting any bug reports! +Be sure to read the FAQ before mailing/posting any bug reports! - Alain +Alain Changelog ========= -10-30-2004 : Cleanup, updating, add reference to module configuration. +10-30-2004 : + Cleanup, updating, add reference to module configuration. James Nelson <james4765(a)gmail.com> -6-3-2000 : Original Document +6-3-2000 : + Original Document diff --git a/Documentation/blockdev/index.rst b/Documentation/blockdev/index.rst new file mode 100644 index 000000000000..a9af6ed8b4aa --- /dev/null +++ b/Documentation/blockdev/index.rst @@ -0,0 +1,16 @@ +:orphan: + +=========================== +The Linux RapidIO Subsystem +=========================== + +.. toctree:: + :maxdepth: 1 + + floppy + nbd + paride + ramdisk + zram + + drbd/index diff --git a/Documentation/blockdev/nbd.txt b/Documentation/blockdev/nbd.rst similarity index 96% rename from Documentation/blockdev/nbd.txt rename to Documentation/blockdev/nbd.rst index db242ea2bce8..d78dfe559dcf 100644 --- a/Documentation/blockdev/nbd.txt +++ b/Documentation/blockdev/nbd.rst @@ -1,3 +1,4 @@ +================================== Network Block Device (TCP version) ================================== @@ -28,4 +29,3 @@ max_part nbds_max Number of block devices that should be initialized (default: 16). - diff --git a/Documentation/blockdev/paride.txt b/Documentation/blockdev/paride.rst similarity index 81% rename from Documentation/blockdev/paride.txt rename to Documentation/blockdev/paride.rst index ee6717e3771d..87b4278bf314 100644 --- a/Documentation/blockdev/paride.txt +++ b/Documentation/blockdev/paride.rst @@ -1,15 +1,17 @@ - - Linux and parallel port IDE devices +=================================== +Linux and parallel port IDE devices +=================================== PARIDE v1.03 (c) 1997-8 Grant Guenther <grant(a)torque.net> 1. Introduction +=============== Owing to the simplicity and near universality of the parallel port interface to personal computers, many external devices such as portable hard-disk, CD-ROM, LS-120 and tape drives use the parallel port to connect to their host computer. While some devices (notably scanners) use ad-hoc methods -to pass commands and data through the parallel port interface, most +to pass commands and data through the parallel port interface, most external devices are actually identical to an internal model, but with a parallel-port adapter chip added in. Some of the original parallel port adapters were little more than mechanisms for multiplexing a SCSI bus. @@ -28,47 +30,50 @@ were to open up a parallel port CD-ROM drive, for instance, one would find a standard ATAPI CD-ROM drive, a power supply, and a single adapter that interconnected a standard PC parallel port cable and a standard IDE cable. It is usually possible to exchange the CD-ROM device with -any other device using the IDE interface. +any other device using the IDE interface. The document describes the support in Linux for parallel port IDE devices. It does not cover parallel port SCSI devices, "ditto" tape -drives or scanners. Many different devices are supported by the +drives or scanners. Many different devices are supported by the parallel port IDE subsystem, including: - MicroSolutions backpack CD-ROM - MicroSolutions backpack PD/CD - MicroSolutions backpack hard-drives - MicroSolutions backpack 8000t tape drive - SyQuest EZ-135, EZ-230 & SparQ drives - Avatar Shark - Imation Superdisk LS-120 - Maxell Superdisk LS-120 - FreeCom Power CD - Hewlett-Packard 5GB and 8GB tape drives - Hewlett-Packard 7100 and 7200 CD-RW drives + - MicroSolutions backpack CD-ROM + - MicroSolutions backpack PD/CD + - MicroSolutions backpack hard-drives + - MicroSolutions backpack 8000t tape drive + - SyQuest EZ-135, EZ-230 & SparQ drives + - Avatar Shark + - Imation Superdisk LS-120 + - Maxell Superdisk LS-120 + - FreeCom Power CD + - Hewlett-Packard 5GB and 8GB tape drives + - Hewlett-Packard 7100 and 7200 CD-RW drives as well as most of the clone and no-name products on the market. To support such a wide range of devices, PARIDE, the parallel port IDE subsystem, is actually structured in three parts. There is a base paride module which provides a registry and some common methods for -accessing the parallel ports. The second component is a set of -high-level drivers for each of the different types of supported devices: +accessing the parallel ports. The second component is a set of +high-level drivers for each of the different types of supported devices: + === ============= pd IDE disk pcd ATAPI CD-ROM pf ATAPI disk pt ATAPI tape pg ATAPI generic + === ============= (Currently, the pg driver is only used with CD-R drives). The high-level drivers function according to the relevant standards. The third component of PARIDE is a set of low-level protocol drivers for each of the parallel port IDE adapter chips. Thanks to the interest -and encouragement of Linux users from many parts of the world, +and encouragement of Linux users from many parts of the world, support is available for almost all known adapter protocols: + ==== ====================================== ==== aten ATEN EH-100 (HK) bpck Microsolutions backpack (US) comm DataStor (old-type) "commuter" adapter (TW) @@ -83,9 +88,11 @@ support is available for almost all known adapter protocols: ktti KT Technology PHd adapter (SG) on20 OnSpec 90c20 (US) on26 OnSpec 90c26 (US) + ==== ====================================== ==== 2. Using the PARIDE subsystem +============================= While configuring the Linux kernel, you may choose either to build the PARIDE drivers into your kernel, or to build them as modules. @@ -94,10 +101,10 @@ In either case, you will need to select "Parallel port IDE device support" as well as at least one of the high-level drivers and at least one of the parallel port communication protocols. If you do not know what kind of parallel port adapter is used in your drive, you could -begin by checking the file names and any text files on your DOS +begin by checking the file names and any text files on your DOS installation floppy. Alternatively, you can look at the markings on the adapter chip itself. That's usually sufficient to identify the -correct device. +correct device. You can actually select all the protocol modules, and allow the PARIDE subsystem to try them all for you. @@ -105,8 +112,9 @@ subsystem to try them all for you. For the "brand-name" products listed above, here are the protocol and high-level drivers that you would use: + ================ ============ ====== ======== Manufacturer Model Driver Protocol - + ================ ============ ====== ======== MicroSolutions CD-ROM pcd bpck MicroSolutions PD drive pf bpck MicroSolutions hard-drive pd bpck @@ -119,8 +127,10 @@ and high-level drivers that you would use: Hewlett-Packard 5GB Tape pt epat Hewlett-Packard 7200e (CD) pcd epat Hewlett-Packard 7200e (CD-R) pg epat + ================ ============ ====== ======== 2.1 Configuring built-in drivers +--------------------------------- We recommend that you get to know how the drivers work and how to configure them as loadable modules, before attempting to compile a @@ -143,7 +153,7 @@ protocol identification number and, for some devices, the drive's chain ID. While your system is booting, a number of messages are displayed on the console. Like all such messages, they can be reviewed with the 'dmesg' command. Among those messages will be -some lines like: +some lines like:: paride: bpck registered as protocol 0 paride: epat registered as protocol 1 @@ -158,10 +168,10 @@ the last two digits of the drive's serial number (but read MicroSolutions' documentation about this). As an example, let's assume that you have a MicroSolutions PD/CD drive -with unit ID number 36 connected to the parallel port at 0x378, a SyQuest -EZ-135 connected to the chained port on the PD/CD drive and also an -Imation Superdisk connected to port 0x278. You could give the following -options on your boot command: +with unit ID number 36 connected to the parallel port at 0x378, a SyQuest +EZ-135 connected to the chained port on the PD/CD drive and also an +Imation Superdisk connected to port 0x278. You could give the following +options on your boot command:: pd.drive0=0x378,1 pf.drive0=0x278,1 pf.drive1=0x378,0,36 @@ -169,24 +179,27 @@ In the last option, pf.drive1 configures device /dev/pf1, the 0x378 is the parallel port base address, the 0 is the protocol registration number and 36 is the chain ID. -Please note: while PARIDE will work both with and without the +Please note: while PARIDE will work both with and without the PARPORT parallel port sharing system that is included by the "Parallel port support" option, PARPORT must be included and enabled if you want to use chains of devices on the same parallel port. 2.2 Loading and configuring PARIDE as modules +---------------------------------------------- It is much faster and simpler to get to understand the PARIDE drivers -if you use them as loadable kernel modules. +if you use them as loadable kernel modules. -Note 1: using these drivers with the "kerneld" automatic module loading -system is not recommended for beginners, and is not documented here. +Note 1: + using these drivers with the "kerneld" automatic module loading + system is not recommended for beginners, and is not documented here. -Note 2: if you build PARPORT support as a loadable module, PARIDE must -also be built as loadable modules, and PARPORT must be loaded before the -PARIDE modules. +Note 2: + if you build PARPORT support as a loadable module, PARIDE must + also be built as loadable modules, and PARPORT must be loaded before + the PARIDE modules. -To use PARIDE, you must begin by +To use PARIDE, you must begin by:: insmod paride @@ -195,8 +208,8 @@ among other tasks. Then, load as many of the protocol modules as you think you might need. As you load each module, it will register the protocols that it supports, -and print a log message to your kernel log file and your console. For -example: +and print a log message to your kernel log file and your console. For +example:: # insmod epat paride: epat registered as protocol 0 @@ -205,22 +218,22 @@ example: paride: k971 registered as protocol 2 Finally, you can load high-level drivers for each kind of device that -you have connected. By default, each driver will autoprobe for a single +you have connected. By default, each driver will autoprobe for a single device, but you can support up to four similar devices by giving their individual co-ordinates when you load the driver. For example, if you had two no-name CD-ROM drives both using the KingByte KBIC-951A adapter, one on port 0x378 and the other on 0x3bc -you could give the following command: +you could give the following command:: # insmod pcd drive0=0x378,1 drive1=0x3bc,1 For most adapters, giving a port address and protocol number is sufficient, -but check the source files in linux/drivers/block/paride for more +but check the source files in linux/drivers/block/paride for more information. (Hopefully someone will write some man pages one day !). As another example, here's what happens when PARPORT is installed, and -a SyQuest EZ-135 is attached to port 0x378: +a SyQuest EZ-135 is attached to port 0x378:: # insmod paride paride: version 1.0 installed @@ -237,46 +250,47 @@ Note that the last line is the output from the generic partition table scanner - in this case it reports that it has found a disk with one partition. 2.3 Using a PARIDE device +-------------------------- Once the drivers have been loaded, you can access PARIDE devices in the same way as their traditional counterparts. You will probably need to create the device "special files". Here is a simple script that you can -cut to a file and execute: +cut to a file and execute:: -#!/bin/bash -# -# mkd -- a script to create the device special files for the PARIDE subsystem -# -function mkdev { - mknod $1 $2 $3 $4 ; chmod 0660 $1 ; chown root:disk $1 -} -# -function pd { - D=$( printf \\$( printf "x%03x" $[ $1 + 97 ] ) ) - mkdev pd$D b 45 $[ $1 * 16 ] - for P in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - do mkdev pd$D$P b 45 $[ $1 * 16 + $P ] - done -} -# -cd /dev -# -for u in 0 1 2 3 ; do pd $u ; done -for u in 0 1 2 3 ; do mkdev pcd$u b 46 $u ; done -for u in 0 1 2 3 ; do mkdev pf$u b 47 $u ; done -for u in 0 1 2 3 ; do mkdev pt$u c 96 $u ; done -for u in 0 1 2 3 ; do mkdev npt$u c 96 $[ $u + 128 ] ; done -for u in 0 1 2 3 ; do mkdev pg$u c 97 $u ; done -# -# end of mkd + #!/bin/bash + # + # mkd -- a script to create the device special files for the PARIDE subsystem + # + function mkdev { + mknod $1 $2 $3 $4 ; chmod 0660 $1 ; chown root:disk $1 + } + # + function pd { + D=$( printf \\$( printf "x%03x" $[ $1 + 97 ] ) ) + mkdev pd$D b 45 $[ $1 * 16 ] + for P in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + do mkdev pd$D$P b 45 $[ $1 * 16 + $P ] + done + } + # + cd /dev + # + for u in 0 1 2 3 ; do pd $u ; done + for u in 0 1 2 3 ; do mkdev pcd$u b 46 $u ; done + for u in 0 1 2 3 ; do mkdev pf$u b 47 $u ; done + for u in 0 1 2 3 ; do mkdev pt$u c 96 $u ; done + for u in 0 1 2 3 ; do mkdev npt$u c 96 $[ $u + 128 ] ; done + for u in 0 1 2 3 ; do mkdev pg$u c 97 $u ; done + # + # end of mkd With the device files and drivers in place, you can access PARIDE devices -like any other Linux device. For example, to mount a CD-ROM in pcd0, use: +like any other Linux device. For example, to mount a CD-ROM in pcd0, use:: mount /dev/pcd0 /cdrom If you have a fresh Avatar Shark cartridge, and the drive is pda, you -might do something like: +might do something like:: fdisk /dev/pda -- make a new partition table with partition 1 of type 83 @@ -289,41 +303,46 @@ might do something like: Devices like the Imation superdisk work in the same way, except that they do not have a partition table. For example to make a 120MB -floppy that you could share with a DOS system: +floppy that you could share with a DOS system:: mkdosfs /dev/pf0 mount /dev/pf0 /mnt 2.4 The pf driver +------------------ The pf driver is intended for use with parallel port ATAPI disk devices. The most common devices in this category are PD drives and LS-120 drives. Traditionally, media for these devices are not partitioned. Consequently, the pf driver does not support partitioned -media. This may be changed in a future version of the driver. +media. This may be changed in a future version of the driver. 2.5 Using the pt driver +------------------------ The pt driver for parallel port ATAPI tape drives is a minimal driver. -It does not yet support many of the standard tape ioctl operations. +It does not yet support many of the standard tape ioctl operations. For best performance, a block size of 32KB should be used. You will probably want to set the parallel port delay to 0, if you can. 2.6 Using the pg driver +------------------------ The pg driver can be used in conjunction with the cdrecord program to create CD-ROMs. Please get cdrecord version 1.6.1 or later -from ftp://ftp.fokus.gmd.de/pub/unix/cdrecord/ . To record CD-R media -your parallel port should ideally be set to EPP mode, and the "port delay" -should be set to 0. With those settings it is possible to record at 2x +from ftp://ftp.fokus.gmd.de/pub/unix/cdrecord/ . To record CD-R media +your parallel port should ideally be set to EPP mode, and the "port delay" +should be set to 0. With those settings it is possible to record at 2x speed without any buffer underruns. If you cannot get the driver to work in EPP mode, try to use "bidirectional" or "PS/2" mode and 1x speeds only. 3. Troubleshooting +================== 3.1 Use EPP mode if you can +---------------------------- The most common problems that people report with the PARIDE drivers concern the parallel port CMOS settings. At this time, none of the @@ -332,6 +351,7 @@ If you are able to do so, please set your parallel port into EPP mode using your CMOS setup procedure. 3.2 Check the port delay +------------------------- Some parallel ports cannot reliably transfer data at full speed. To offset the errors, the PARIDE protocol modules introduce a "port @@ -347,23 +367,25 @@ read the comments at the beginning of the driver source files in linux/drivers/block/paride. 3.3 Some drives need a printer reset +------------------------------------- There appear to be a number of "noname" external drives on the market that do not always power up correctly. We have noticed this with some drives based on OnSpec and older Freecom adapters. In these rare cases, the adapter can often be reinitialised by issuing a "printer reset" on -the parallel port. As the reset operation is potentially disruptive in -multiple device environments, the PARIDE drivers will not do it -automatically. You can however, force a printer reset by doing: +the parallel port. As the reset operation is potentially disruptive in +multiple device environments, the PARIDE drivers will not do it +automatically. You can however, force a printer reset by doing:: insmod lp reset=1 rmmod lp If you have one of these marginal cases, you should probably build your paride drivers as modules, and arrange to do the printer reset -before loading the PARIDE drivers. +before loading the PARIDE drivers. 3.4 Use the verbose option and dmesg if you need help +------------------------------------------------------ While a lot of testing has gone into these drivers to make them work as smoothly as possible, problems will arise. If you do have problems, @@ -373,7 +395,7 @@ clues, then please make sure that only one drive is hooked to your system, and that either (a) PARPORT is enabled or (b) no other device driver is using your parallel port (check in /proc/ioports). Then, load the appropriate drivers (you can load several protocol modules if you want) -as in: +as in:: # insmod paride # insmod epat @@ -394,12 +416,14 @@ by e-mail to grant(a)torque.net, or join the linux-parport mailing list and post your report there. 3.5 For more information or help +--------------------------------- You can join the linux-parport mailing list by sending a mail message -to +to: + linux-parport-request(a)torque.net -with the single word +with the single word:: subscribe @@ -412,6 +436,4 @@ have in your mail headers, when sending mail to the list server. You might also find some useful information on the linux-parport web pages (although they are not always up to date) at - http://web.archive.org/web/*/http://www.torque.net/parport/ - - + http://web.archive.org/web/%2E/http://www.torque.net/parport/ diff --git a/Documentation/blockdev/ramdisk.txt b/Documentation/blockdev/ramdisk.rst similarity index 84% rename from Documentation/blockdev/ramdisk.txt rename to Documentation/blockdev/ramdisk.rst index 501e12e0323e..b7c2268f8dec 100644 --- a/Documentation/blockdev/ramdisk.txt +++ b/Documentation/blockdev/ramdisk.rst @@ -1,7 +1,8 @@ +========================================== Using the RAM disk block device with Linux ------------------------------------------- +========================================== -Contents: +.. Contents: 1) Overview 2) Kernel Command Line Parameters @@ -42,7 +43,7 @@ rescue floppy disk. 2a) Kernel Command Line Parameters ramdisk_size=N - ============== + Size of the ramdisk. This parameter tells the RAM disk driver to set up RAM disks of N k size. The default is 4096 (4 MB). @@ -50,16 +51,13 @@ default is 4096 (4 MB). 2b) Module parameters rd_nr - ===== - /dev/ramX devices created. + /dev/ramX devices created. max_part - ======== - Maximum partition number. + Maximum partition number. rd_size - ======= - See ramdisk_size. + See ramdisk_size. 3) Using "rdev -r" ------------------ @@ -71,11 +69,11 @@ to 2 MB (2^11) of where to find the RAM disk (this used to be the size). Bit prompt/wait sequence is to be given before trying to read the RAM disk. Since the RAM disk dynamically grows as data is being written into it, a size field is not required. Bits 11 to 13 are not currently used and may as well be zero. -These numbers are no magical secrets, as seen below: +These numbers are no magical secrets, as seen below:: -./arch/x86/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK 0x07FF -./arch/x86/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000 -./arch/x86/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000 + ./arch/x86/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK 0x07FF + ./arch/x86/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000 + ./arch/x86/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000 Consider a typical two floppy disk setup, where you will have the kernel on disk one, and have already put a RAM disk image onto disk #2. @@ -92,20 +90,23 @@ sequence so that you have a chance to switch floppy disks. The command line equivalent is: "prompt_ramdisk=1" Putting that together gives 2^15 + 2^14 + 0 = 49152 for an rdev word. -So to create disk one of the set, you would do: +So to create disk one of the set, you would do:: /usr/src/linux# cat arch/x86/boot/zImage > /dev/fd0 /usr/src/linux# rdev /dev/fd0 /dev/fd0 /usr/src/linux# rdev -r /dev/fd0 49152 -If you make a boot disk that has LILO, then for the above, you would use: +If you make a boot disk that has LILO, then for the above, you would use:: + append = "ramdisk_start=0 load_ramdisk=1 prompt_ramdisk=1" -Since the default start = 0 and the default prompt = 1, you could use: + +Since the default start = 0 and the default prompt = 1, you could use:: + append = "load_ramdisk=1" 4) An Example of Creating a Compressed RAM Disk ----------------------------------------------- +----------------------------------------------- To create a RAM disk image, you will need a spare block device to construct it on. This can be the RAM disk device itself, or an @@ -120,11 +121,11 @@ a) Decide on the RAM disk size that you want. Say 2 MB for this example. Create it by writing to the RAM disk device. (This step is not currently required, but may be in the future.) It is wise to zero out the area (esp. for disks) so that maximal compression is achieved for - the unused blocks of the image that you are about to create. + the unused blocks of the image that you are about to create:: dd if=/dev/zero of=/dev/ram0 bs=1k count=2048 -b) Make a filesystem on it. Say ext2fs for this example. +b) Make a filesystem on it. Say ext2fs for this example:: mke2fs -vm0 /dev/ram0 2048 @@ -133,11 +134,11 @@ c) Mount it, copy the files you want to it (eg: /etc/* /dev/* ...) d) Compress the contents of the RAM disk. The level of compression will be approximately 50% of the space used by the files. Unused - space on the RAM disk will compress to almost nothing. + space on the RAM disk will compress to almost nothing:: dd if=/dev/ram0 bs=1k count=2048 | gzip -v9 > /tmp/ram_image.gz -e) Put the kernel onto the floppy +e) Put the kernel onto the floppy:: dd if=zImage of=/dev/fd0 bs=1k @@ -146,13 +147,13 @@ f) Put the RAM disk image onto the floppy, after the kernel. Use an offset (possibly larger) kernel onto the same floppy later without overlapping the RAM disk image. An offset of 400 kB for kernels about 350 kB in size would be reasonable. Make sure offset+size of ram_image.gz is - not larger than the total space on your floppy (usually 1440 kB). + not larger than the total space on your floppy (usually 1440 kB):: dd if=/tmp/ram_image.gz of=/dev/fd0 bs=1k seek=400 g) Use "rdev" to set the boot device, RAM disk offset, prompt flag, etc. For prompt_ramdisk=1, load_ramdisk=1, ramdisk_start=400, one would - have 2^15 + 2^14 + 400 = 49552. + have 2^15 + 2^14 + 400 = 49552:: rdev /dev/fd0 /dev/fd0 rdev -r /dev/fd0 49552 @@ -160,15 +161,17 @@ g) Use "rdev" to set the boot device, RAM disk offset, prompt flag, etc. That is it. You now have your boot/root compressed RAM disk floppy. Some users may wish to combine steps (d) and (f) by using a pipe. --------------------------------------------------------------------------- + Paul Gortmaker 12/95 Changelog: ---------- -10-22-04 : Updated to reflect changes in command line options, remove +10-22-04 : + Updated to reflect changes in command line options, remove obsolete references, general cleanup. James Nelson (james4765(a)gmail.com) -12-95 : Original Document +12-95 : + Original Document diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.rst similarity index 76% rename from Documentation/blockdev/zram.txt rename to Documentation/blockdev/zram.rst index 4df0ce271085..2111231c9c0f 100644 --- a/Documentation/blockdev/zram.txt +++ b/Documentation/blockdev/zram.rst @@ -1,7 +1,9 @@ +======================================== zram: Compressed RAM based block devices ----------------------------------------- +======================================== -* Introduction +Introduction +============ The zram module creates RAM based block devices named /dev/zram<id> (<id> = 0, 1, ...). Pages written to these disks are compressed and stored @@ -12,9 +14,11 @@ use as swap disks, various caches under /var and maybe many more :) Statistics for individual zram devices are exported through sysfs nodes at /sys/block/zram<id>/ -* Usage +Usage +===== There are several ways to configure and manage zram device(-s): + a) using zram and zram_control sysfs attributes b) using zramctl utility, provided by util-linux (util-linux(a)vger.kernel.org). @@ -22,7 +26,7 @@ In this document we will describe only 'manual' zram configuration steps, IOW, zram and zram_control sysfs attributes. In order to get a better idea about zramctl please consult util-linux -documentation, zramctl man-page or `zramctl --help'. Please be informed +documentation, zramctl man-page or `zramctl --help`. Please be informed that zram maintainers do not develop/maintain util-linux or zramctl, should you have any questions please contact util-linux(a)vger.kernel.org @@ -30,19 +34,23 @@ Following shows a typical sequence of steps for using zram. WARNING ======= + For the sake of simplicity we skip error checking parts in most of the examples below. However, it is your sole responsibility to handle errors. zram sysfs attributes always return negative values in case of errors. The list of possible return codes: --EBUSY -- an attempt to modify an attribute that cannot be changed once -the device has been initialised. Please reset device first; --ENOMEM -- zram was not able to allocate enough memory to fulfil your -needs; --EINVAL -- invalid input has been provided. + +======== ============================================================= +-EBUSY an attempt to modify an attribute that cannot be changed once + the device has been initialised. Please reset device first; +-ENOMEM zram was not able to allocate enough memory to fulfil your + needs; +-EINVAL invalid input has been provided. +======== ============================================================= If you use 'echo', the returned value that is changed by 'echo' utility, -and, in general case, something like: +and, in general case, something like:: echo 3 > /sys/block/zram0/max_comp_streams if [ $? -ne 0 ]; @@ -51,7 +59,11 @@ and, in general case, something like: should suffice. -1) Load Module: +1) Load Module +============== + +:: + modprobe zram num_devices=4 This creates 4 devices: /dev/zram{0,1,2,3} @@ -59,6 +71,8 @@ num_devices parameter is optional and tells zram how many devices should be pre-created. Default: 1. 2) Set max number of compression streams +======================================== + Regardless the value passed to this attribute, ZRAM will always allocate multiple compression streams - one per online CPUs - thus allowing several concurrent compression operations. The number of @@ -66,16 +80,20 @@ allocated compression streams goes down when some of the CPUs become offline. There is no single-compression-stream mode anymore, unless you are running a UP system or has only 1 CPU online. -To find out how many streams are currently available: +To find out how many streams are currently available:: + cat /sys/block/zram0/max_comp_streams 3) Select compression algorithm +=============================== + Using comp_algorithm device attribute one can see available and currently selected (shown in square brackets) compression algorithms, change selected compression algorithm (once the device is initialised there is no way to change compression algorithm). -Examples: +Examples:: + #show supported compression algorithms cat /sys/block/zram0/comp_algorithm lzo [lz4] @@ -83,20 +101,23 @@ Examples: #select lzo compression algorithm echo lzo > /sys/block/zram0/comp_algorithm -For the time being, the `comp_algorithm' content does not necessarily +For the time being, the `comp_algorithm` content does not necessarily show every compression algorithm supported by the kernel. We keep this list primarily to simplify device configuration and one can configure a new device with a compression algorithm that is not listed in -`comp_algorithm'. The thing is that, internally, ZRAM uses Crypto API +`comp_algorithm`. The thing is that, internally, ZRAM uses Crypto API and, if some of the algorithms were built as modules, it's impossible to list all of them using, for instance, /proc/crypto or any other method. This, however, has an advantage of permitting the usage of custom crypto compression modules (implementing S/W or H/W compression). 4) Set Disksize +=============== + Set disk size by writing the value to sysfs node 'disksize'. The value can be either in bytes or you can use mem suffixes. -Examples: +Examples:: + # Initialize /dev/zram0 with 50MB disksize echo $((50*1024*1024)) > /sys/block/zram0/disksize @@ -111,10 +132,13 @@ since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the size of the disk when not in use so a huge zram is wasteful. 5) Set memory limit: Optional +============================= + Set memory limit by writing the value to sysfs node 'mem_limit'. The value can be either in bytes or you can use mem suffixes. In addition, you could change the value in runtime. -Examples: +Examples:: + # limit /dev/zram0 with 50MB memory echo $((50*1024*1024)) > /sys/block/zram0/mem_limit @@ -126,7 +150,11 @@ Examples: # To disable memory limit echo 0 > /sys/block/zram0/mem_limit -6) Activate: +6) Activate +=========== + +:: + mkswap /dev/zram0 swapon /dev/zram0 @@ -134,6 +162,7 @@ Examples: mount /dev/zram1 /tmp 7) Add/remove zram devices +========================== zram provides a control interface, which enables dynamic (on-demand) device addition and removal. @@ -142,37 +171,44 @@ In order to add a new /dev/zramX device, perform read operation on hot_add attribute. This will return either new device's device id (meaning that you can use /dev/zram<id>) or error code. -Example: +Example:: + cat /sys/class/zram-control/hot_add 1 To remove the existing /dev/zramX device (where X is a device id) -execute +execute:: + echo X > /sys/class/zram-control/hot_remove -8) Stats: +8) Stats +======== + Per-device statistics are exported as various nodes under /sys/block/zram<id>/ A brief description of exported device attributes. For more details please read Documentation/ABI/testing/sysfs-block-zram. +====================== ====== =============================================== Name access description ----- ------ ----------- +====================== ====== =============================================== disksize RW show and set the device's disk size initstate RO shows the initialization state of the device reset WO trigger device reset -mem_used_max WO reset the `mem_used_max' counter (see later) -mem_limit WO specifies the maximum amount of memory ZRAM can use - to store the compressed data -writeback_limit WO specifies the maximum amount of write IO zram can - write out to backing device as 4KB unit +mem_used_max WO reset the `mem_used_max` counter (see later) +mem_limit WO specifies the maximum amount of memory ZRAM can + use to store the compressed data +writeback_limit WO specifies the maximum amount of write IO zram + can write out to backing device as 4KB unit writeback_limit_enable RW show and set writeback_limit feature -max_comp_streams RW the number of possible concurrent compress operations +max_comp_streams RW the number of possible concurrent compress + operations comp_algorithm RW show and change the compression algorithm compact WO trigger memory compaction debug_stat RO this file is used for zram debugging purposes backing_dev RW set up backend storage for zram to write out idle WO mark allocated slot as idle +====================== ====== =============================================== User space is advised to use the following files to read the device statistics. @@ -188,23 +224,31 @@ The stat file represents device's I/O statistics not accounted by block layer and, thus, not available in zram<id>/stat file. It consists of a single line of text and contains the following stats separated by whitespace: - failed_reads the number of failed reads - failed_writes the number of failed writes - invalid_io the number of non-page-size-aligned I/O requests + + ============= ============================================================= + failed_reads The number of failed reads + failed_writes The number of failed writes + invalid_io The number of non-page-size-aligned I/O requests notify_free Depending on device usage scenario it may account + a) the number of pages freed because of swap slot free - notifications or b) the number of pages freed because of - REQ_OP_DISCARD requests sent by bio. The former ones are - sent to a swap block device when a swap slot is freed, - which implies that this disk is being used as a swap disk. + notifications + b) the number of pages freed because of + REQ_OP_DISCARD requests sent by bio. The former ones are + sent to a swap block device when a swap slot is freed, + which implies that this disk is being used as a swap disk. + The latter ones are sent by filesystem mounted with discard option, whenever some data blocks are getting discarded. + ============= ============================================================= File /sys/block/zram<id>/mm_stat The stat file represents device's mm statistics. It consists of a single line of text and contains the following stats separated by whitespace: + + ================ ============================================================= orig_data_size uncompressed size of data stored in this disk. This excludes same-element-filled pages (same_pages) since no memory is allocated for them. @@ -223,58 +267,71 @@ line of text and contains the following stats separated by whitespace: No memory is allocated for such pages. pages_compacted the number of pages freed during compaction huge_pages the number of incompressible pages + ================ ============================================================= File /sys/block/zram<id>/bd_stat The stat file represents device's backing device statistics. It consists of a single line of text and contains the following stats separated by whitespace: + + ============== ============================================================= bd_count size of data written in backing device. Unit: 4K bytes bd_reads the number of reads from backing device Unit: 4K bytes bd_writes the number of writes to backing device Unit: 4K bytes + ============== ============================================================= + +9) Deactivate +============= + +:: -9) Deactivate: swapoff /dev/zram0 umount /dev/zram1 -10) Reset: - Write any positive value to 'reset' sysfs node - echo 1 > /sys/block/zram0/reset - echo 1 > /sys/block/zram1/reset +10) Reset +========= + + Write any positive value to 'reset' sysfs node:: + + echo 1 > /sys/block/zram0/reset + echo 1 > /sys/block/zram1/reset This frees all the memory allocated for the given device and resets the disksize to zero. You must set the disksize again before reusing the device. -* Optional Feature +Optional Feature +================ -= writeback +writeback +--------- With CONFIG_ZRAM_WRITEBACK, zram can write idle/incompressible page to backing storage rather than keeping it in memory. -To use the feature, admin should set up backing device via +To use the feature, admin should set up backing device via:: - "echo /dev/sda5 > /sys/block/zramX/backing_dev" + echo /dev/sda5 > /sys/block/zramX/backing_dev before disksize setting. It supports only partition at this moment. -If admin want to use incompressible page writeback, they could do via +If admin want to use incompressible page writeback, they could do via:: - "echo huge > /sys/block/zramX/write" + echo huge > /sys/block/zramX/write To use idle page writeback, first, user need to declare zram pages -as idle. +as idle:: - "echo all > /sys/block/zramX/idle" + echo all > /sys/block/zramX/idle From now on, any pages on zram are idle pages. The idle mark will be removed until someone request access of the block. IOW, unless there is access request, those pages are still idle pages. -Admin can request writeback of those idle pages at right timing via +Admin can request writeback of those idle pages at right timing via:: - "echo idle > /sys/block/zramX/writeback" + echo idle > /sys/block/zramX/writeback With the command, zram writeback idle pages from memory to the storage. @@ -285,7 +342,7 @@ to guarantee storage health for entire product life. To overcome the concern, zram supports "writeback_limit" feature. The "writeback_limit_enable"'s default value is 0 so that it doesn't limit any writeback. IOW, if admin want to apply writeback budget, he should -enable writeback_limit_enable via +enable writeback_limit_enable via:: $ echo 1 > /sys/block/zramX/writeback_limit_enable @@ -296,7 +353,7 @@ until admin set the budget via /sys/block/zramX/writeback_limit. assigned via /sys/block/zramX/writeback_limit is meaninless.) If admin want to limit writeback as per-day 400M, he could do it -like below. +like below:: $ MB_SHIFT=20 $ 4K_SHIFT=12 @@ -305,16 +362,16 @@ like below. $ echo 1 > /sys/block/zram0/writeback_limit_enable If admin want to allow further write again once the bugdet is exausted, -he could do it like below +he could do it like below:: $ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \ /sys/block/zram0/writeback_limit -If admin want to see remaining writeback budget since he set, +If admin want to see remaining writeback budget since he set:: $ cat /sys/block/zramX/writeback_limit -If admin want to disable writeback limit, he could do +If admin want to disable writeback limit, he could do:: $ echo 0 > /sys/block/zramX/writeback_limit_enable @@ -326,25 +383,35 @@ budget in next setting is user's job. If admin want to measure writeback count in a certain period, he could know it via /sys/block/zram0/bd_stat's 3rd column. -= memory tracking +memory tracking +=============== With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the zram block. It could be useful to catch cold or incompressible pages of the process with*pagemap. + If you enable the feature, you could see block state via -/sys/kernel/debug/zram/zram0/block_state". The output is as follows, +/sys/kernel/debug/zram/zram0/block_state". The output is as follows:: 300 75.033841 .wh. 301 63.806904 s... 302 63.806919 ..hi -First column is zram's block index. -Second column is access time since the system was booted -Third column is state of the block. -(s: same page -w: written page to backing store -h: huge page -i: idle page) +First column + zram's block index. +Second column + access time since the system was booted +Third column + state of the block: + + s: + same page + w: + written page to backing store + h: + huge page + i: + idle page First line of above example says 300th block is accessed at 75.033841sec and the block's state is huge so it is written back to the backing diff --git a/MAINTAINERS b/MAINTAINERS index b2254bc8e495..163327d6a856 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10961,7 +10961,7 @@ M: Josef Bacik <josef(a)toxicpanda.com> S: Maintained L: linux-block(a)vger.kernel.org L: nbd(a)other.debian.org -F: Documentation/blockdev/nbd.txt +F: Documentation/blockdev/nbd.rst F: drivers/block/nbd.c F: include/trace/events/nbd.h F: include/uapi/linux/nbd.h @@ -11963,7 +11963,7 @@ PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES M: Tim Waugh <tim(a)cyberelk.net> L: linux-parport(a)lists.infradead.org (subscribers-only) S: Maintained -F: Documentation/blockdev/paride.txt +F: Documentation/blockdev/paride.rst F: drivers/block/paride/ PARISC ARCHITECTURE @@ -13245,7 +13245,7 @@ F: drivers/net/wireless/ralink/rt2x00/ RAMDISK RAM BLOCK DEVICE DRIVER M: Jens Axboe <axboe(a)kernel.dk> S: Maintained -F: Documentation/blockdev/ramdisk.txt +F: Documentation/blockdev/ramdisk.rst F: drivers/block/brd.c RANCHU VIRTUAL BOARD FOR MIPS @@ -17584,7 +17584,7 @@ R: Sergey Senozhatsky <sergey.senozhatsky.work(a)gmail.com> L: linux-kernel(a)vger.kernel.org S: Maintained F: drivers/block/zram/ -F: Documentation/blockdev/zram.txt +F: Documentation/blockdev/zram.rst ZS DECSTATION Z85C30 SERIAL DRIVER M: "Maciej W. Rozycki" <macro(a)linux-mips.org> diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 96ec7e0fc1ea..c43690b973d8 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -31,7 +31,7 @@ config BLK_DEV_FD If you want to use the floppy disk drive(s) of your PC under Linux, say Y. Information about this driver, especially important for IBM Thinkpad users, is contained in - <file:Documentation/blockdev/floppy.txt>. + <file:Documentation/blockdev/floppy.rst>. That file also contains the location of the Floppy driver FAQ as well as location of the fdutils package used to configure additional parameters of the driver at run time. @@ -96,7 +96,7 @@ config PARIDE your computer's parallel port. Most of them are actually IDE devices using a parallel port IDE adapter. This option enables the PARIDE subsystem which contains drivers for many of these external drives. - Read <file:Documentation/blockdev/paride.txt> for more information. + Read <file:Documentation/blockdev/paride.rst> for more information. If you have said Y to the "Parallel-port support" configuration option, you may share a single port between your printer and other @@ -261,7 +261,7 @@ config BLK_DEV_NBD userland (making server and client physically the same computer, communicating using the loopback network device). - Read <file:Documentation/blockdev/nbd.txt> for more information, + Read <file:Documentation/blockdev/nbd.rst> for more information, especially about where to find the server code, which runs in user space and does not need special kernel support. @@ -303,7 +303,7 @@ config BLK_DEV_RAM during the initial install of Linux. Note that the kernel command line option "ramdisk=XX" is now obsolete. - For details, read <file:Documentation/blockdev/ramdisk.txt>. + For details, read <file:Documentation/blockdev/ramdisk.rst>. To compile this driver as a module, choose M here: the module will be called brd. An alias "rd" has been defined diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 9fb9b312ab6b..af02ca97dcd6 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4424,7 +4424,7 @@ static int __init floppy_setup(char *str) pr_cont("\n"); } else DPRINT("botched floppy option\n"); - DPRINT("Read Documentation/blockdev/floppy.txt\n"); + DPRINT("Read Documentation/blockdev/floppy.rst\n"); return 0; } diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index 1ffc64770643..e06b99d54816 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -12,7 +12,7 @@ config ZRAM It has several use cases, for example: /tmp storage, use as swap disks and maybe many more. - See Documentation/blockdev/zram.txt for more information. + See Documentation/blockdev/zram.rst for more information. config ZRAM_WRITEBACK bool "Write back incompressible or idle page to backing device" @@ -26,7 +26,7 @@ config ZRAM_WRITEBACK With /sys/block/zramX/{idle,writeback}, application could ask idle page's writeback to the backing device to save in memory. - See Documentation/blockdev/zram.txt for more information. + See Documentation/blockdev/zram.rst for more information. config ZRAM_MEMORY_TRACKING bool "Track zRam block status" @@ -36,4 +36,4 @@ config ZRAM_MEMORY_TRACKING of zRAM. Admin could see the information via /sys/kernel/debug/zram/zramX/block_state. - See Documentation/blockdev/zram.txt for more information. + See Documentation/blockdev/zram.rst for more information. diff --git a/tools/testing/selftests/zram/README b/tools/testing/selftests/zram/README index 7972cc512408..5fa378391d3b 100644 --- a/tools/testing/selftests/zram/README +++ b/tools/testing/selftests/zram/README @@ -37,4 +37,4 @@ Commands required for testing: - mkfs/ mkfs.ext4 For more information please refer: -kernel-source-tree/Documentation/blockdev/zram.txt +kernel-source-tree/Documentation/blockdev/zram.rst -- 2.21.0

6 years

1
0
0 0

kernel/workqueue.c:3030 __flush_work+0x2c2/0x2d0

by Naresh Kamboju

Kernel warning while running kernel selftest bpf test_sockmap test case on x86_64 and arm64. The kernel warning log pops up continuously. Linux version 5.1.10-rc2 Steps to reproduce: Boot stable rc 5.1.10-rc2 kernel on x86_64 or arm64 cd selftests/bpf ./test_sockmap [ 37.600406] WARNING: CPU: 3 PID: 57 at /usr/src/kernel/kernel/workqueue.c:3030 __flush_work+0x2c2/0x2d0 [ 37.610034] Modules linked in: x86_pkg_temp_thermal fuse [ 37.615371] CPU: 3 PID: 57 Comm: kworker/3:1 Not tainted 5.1.10-rc2 #1 [ 37.615454] WARNING: CPU: 0 PID: 5 at /usr/src/kernel/kernel/workqueue.c:3030 __flush_work+0x2c2/0x2d0 [ 37.621892] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS 2.0b 07/27/2017 [ 37.621895] Workqueue: events sk_psock_destroy_deferred [ 37.631183] Modules linked in: x86_pkg_temp_thermal fuse [ 37.638654] RIP: 0010:__flush_work+0x2c2/0x2d0 [ 37.638655] Code: c6 00 31 c0 e9 71 ff ff ff 41 8b 0c 24 49 8b 54 24 08 83 e1 08 49 0f ba 2c 24 03 80 c9 f0 e9 d2 fe ff ff 0f 0b e9 50 ff ff ff <0f> 0b 31 c0 e9 47 ff ff ff e8 90 9d fd ff 0f 1f 44 00 00 55 31 f6 [ 37.643879] CPU: 0 PID: 5 Comm: kworker/0:0 Not tainted 5.1.10-rc2 #1 [ 37.643880] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS 2.0b 07/27/2017 [ 37.649183] RSP: 0018:ffffb038c1a23ca0 EFLAGS: 00010246 [ 37.653630] Workqueue: events sk_psock_destroy_deferred [ 37.672375] RAX: 0000000000000000 RBX: ffff9e73d9492068 RCX: 0000000000000006 [ 37.672376] RDX: 0000000000000006 RSI: 0000000000000001 RDI: ffff9e73d9492068 [ 37.678805] RIP: 0010:__flush_work+0x2c2/0x2d0 [ 37.678807] Code: c6 00 31 c0 e9 71 ff ff ff 41 8b 0c 24 49 8b 54 24 08 83 e1 08 49 0f ba 2c 24 03 80 c9 f0 e9 d2 fe ff ff 0f 0b e9 50 ff ff ff <0f> 0b 31 c0 e9 47 ff ff ff e8 90 9d fd ff 0f 1f 44 00 00 55 31 f6 [ 37.686274] RBP: ffffb038c1a23d68 R08: 0000000000000000 R09: 0000000000000000 [ 37.686275] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9e73d9492068 [ 37.691494] RSP: 0018:ffffb038c18fbca0 EFLAGS: 00010246 [ 37.696720] R13: 0000000000000001 R14: ffffb038c1a23d98 R15: ffffffff9a490d40 [ 37.696721] FS: 0000000000000000(0000) GS:ffff9e73dfb80000(0000) knlGS:0000000000000000 [ 37.703851] RAX: 0000000000000000 RBX: ffff9e73d9490868 RCX: 0000000000000006 [ 37.703852] RDX: 0000000000000006 RSI: 0000000000000001 RDI: ffff9e73d9490868 [ 37.710976] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 37.710977] CR2: 00007f38680ca8a0 CR3: 00000002ee614006 CR4: 00000000003606e0 [ 37.715419] RBP: ffffb038c18fbd68 R08: 0000000000000000 R09: 0000000000000000 [ 37.715420] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9e73d9490868 [ 37.734156] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 37.734157] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 37.741282] R13: 0000000000000001 R14: ffffb038c18fbd98 R15: ffffffff9a490d40 [ 37.741283] FS: 0000000000000000(0000) GS:ffff9e73dfa00000(0000) knlGS:0000000000000000 [ 37.748405] Call Trace: [ 37.748410] ? work_busy+0xc0/0xc0 [ 37.753621] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 37.753622] CR2: 00007f38680c9788 CR3: 000000045454a004 CR4: 00000000003606f0 [ 37.760746] ? mark_held_locks+0x4d/0x80 [ 37.768823] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 37.768824] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 37.775946] ? __cancel_work_timer+0x11a/0x1d0 [ 37.783071] Call Trace: [ 37.783075] ? work_busy+0xc0/0xc0 [ 37.788808] ? cancel_delayed_work_sync+0x13/0x20 [ 37.788810] ? lockdep_hardirqs_on+0xf6/0x190 [ 37.795934] ? mark_held_locks+0x4d/0x80 [ 37.803055] ? __cancel_work_timer+0x11a/0x1d0 [ 37.803057] ? work_busy+0xc0/0xc0 [ 37.810179] ? __cancel_work_timer+0x11a/0x1d0 [ 37.817303] __cancel_work_timer+0x134/0x1d0 [ 37.824453] ? cancel_delayed_work_sync+0x13/0x20 [ 37.824455] ? lockdep_hardirqs_on+0xf6/0x190 [ 37.831579] cancel_delayed_work_sync+0x13/0x20 [ 37.839654] ? __cancel_work_timer+0x11a/0x1d0 [ 37.839657] ? work_busy+0xc0/0xc0 [ 37.842100] strp_done+0x1c/0x50 [ 37.845497] __cancel_work_timer+0x134/0x1d0 [ 37.851242] sk_psock_destroy_deferred+0x34/0x1c0 [ 37.858372] cancel_delayed_work_sync+0x13/0x20 [ 37.862292] process_one_work+0x281/0x610 [ 37.869415] strp_done+0x1c/0x50 [ 37.876540] worker_thread+0x3c/0x3f0 [ 37.880975] sk_psock_destroy_deferred+0x34/0x1c0 [ 37.883419] ? __kthread_parkme+0x61/0x90 [ 37.886819] process_one_work+0x281/0x610 [ 37.891514] kthread+0x12c/0x150 [ 37.895868] worker_thread+0x3c/0x3f0 [ 37.899783] ? process_one_work+0x610/0x610 [ 37.904221] kthread+0x12c/0x150 [ 37.907615] ? kthread_park+0x90/0x90 [ 37.907618] ret_from_fork+0x3a/0x50 [ 37.912052] ? process_one_work+0x610/0x610 [ 37.916355] irq event stamp: 57860 [ 37.921058] ? kthread_park+0x90/0x90 [ 37.921060] ret_from_fork+0x3a/0x50 [ 37.925407] hardirqs last enabled at (57859): [<ffffffff9a4949ba>] __cancel_work_timer+0x11a/0x1d0 [ 37.925409] hardirqs last disabled at (57860): [<ffffffff9a401bab>] trace_hardirqs_off_thunk+0x1a/0x1c [ 37.929944] irq event stamp: 47474 [ 37.934378] softirqs last enabled at (57812): [<ffffffff9add14d5>] release_sock+0x85/0xb0 [ 37.934379] softirqs last disabled at (57810): [<ffffffff9add140a>] __release_sock+0xda/0x120 [ 37.937773] hardirqs last enabled at (47473): [<ffffffff9a4949ba>] __cancel_work_timer+0x11a/0x1d0 [ 37.937775] hardirqs last disabled at (47474): [<ffffffff9a401bab>] trace_hardirqs_off_thunk+0x1a/0x1c [ 37.940998] ---[ end trace ae349dc9a55c8bc8 ]--- [ 37.941056] WARNING: CPU: 3 PID: 57 at /usr/src/kernel/kernel/workqueue.c:3030 __flush_work+0x2c2/0x2d0 [ 37.945263] softirqs last enabled at (47440): [<ffffffff9add14d5>] release_sock+0x85/0xb0 [ 37.945264] softirqs last disabled at (47438): [<ffffffff9add140a>] __release_sock+0xda/0x120 [ 37.949968] Modules linked in: x86_pkg_temp_thermal fuse [ 37.954493] ---[ end trace ae349dc9a55c8bc9 ]--- [ 37.954522] WARNING: CPU: 0 PID: 5 at /usr/src/kernel/kernel/workqueue.c:3030 __flush_work+0x2c2/0x2d0 [...] metadata: git branch: linux-5.1.y git repo: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git git commit: b7eabc3862b8717f2bcc47f3f3830ec575423c8c git describe: v5.1.9-157-gb7eabc3862b8 make_kernelversion: 5.1.10-rc2 kernel-config: http://snapshots.linaro.org/openembedded/lkft/lkft/sumo/intel-corei7-64/lkf… kernel-defconfig: http://snapshots.linaro.org/openembedded/lkft/lkft/sumo/intel-corei7-64/lkf… build-url: https://ci.linaro.org/job/openembedded-lkft-linux-stable-rc-5.1/DISTRO=lkft… build-location: http://snapshots.linaro.org/openembedded/lkft/lkft/sumo/intel-corei7-64/lkf… toolchain: x86_64-linaro-linux 7.% series: lkft email-notification: '' kselftest__url: https://www.kernel.org/pub/linux/kernel/v5.x/linux-5.1.tar.xz kselftest__version: '5.1' kselftest__revision: '5.1' Full test log, https://lkft.validation.linaro.org/scheduler/job/775857#L1114 Best regards Naresh Kamboju

6 years

1
1
0 0

Fixing Python 3 syntax errors in tpm2_tests.py

by Christian Clauss

Hello, As my very first contribution to the Linux Kernel, I would like to fix six Python 3 syntax errors in the file ./tools/testing/selftests/tpm2/tpm2_tests.py All six of these errors are of the same form: except ProtocolError, e: To fix the syntax errors, I propose to change the comma (,) to “as” like: except ProtocolError as e: These changes are important because the current form is compatible with Python 2 but is a syntax error in Python 3. The proposed form is compatible with both Python 2 and Python3. This conversion is required because Python 2 will reach its end of life in less than 200 days. The kernel contains at least five other files where I am able to detect Python 3 syntax errors but after studying in detail the process of making kernel modification, I believe that it is best to start with tpm2_tests.py because the changes are straightforward and uncontroversial — all issues are of the same form, and I can detect no other issues (such as undefined names) to fix in that file. If I succeed in getting the modifications to tpm2_tests.py through the review process then I can try the remaining files in turn. I would be interested to know if this is a worthwhile effort. Is there already another initiative to resolve these issue before yearend? Thanks for any advise that you can provide, Chris Clauss

6 years

2
1
0 0

[PATCH net-next v3 0/3] UDP GSO audit tests

by Fred Klassen

Updates to UDP GSO selftests ot optionally stress test CMSG subsytem, and report the reliability and performance of both TX Timestamping and ZEROCOPY messages. Fred Klassen (3): net/udpgso_bench_tx: options to exercise TX CMSG net/udpgso_bench.sh add UDP GSO audit tests net/udpgso_bench.sh test fails on error tools/testing/selftests/net/udpgso_bench.sh | 52 ++++- tools/testing/selftests/net/udpgso_bench_tx.c | 291 ++++++++++++++++++++++++-- 2 files changed, 327 insertions(+), 16 deletions(-) -- 2.11.0

6 years

3
5
0 0

stable-rc: ptrace: peeksiginfo failed on 4.19, 4.14, 4.9 and 4.4

by Naresh Kamboju

selftests: ptrace: peeksiginfo failed on x86_64, i386, arm64 and arm. FAILED on stable rc branches 4.19, 4.14, 4.9 and 4.4. PASS on mainline, next and 5.1 stable rc branch. Test output: ------------------ cd /opt/kselftests/mainline/ptrace ./peeksiginfo Error (peeksiginfo.c:143): Only 0 signals were read The git bisect show that below commit caused this test to fail. git bisect bad 5b6b0eac235ef1f915f24eda6d501a754022cbf0 is the first bad commit commit 5b6b0eac235ef1f915f24eda6d501a754022cbf0 Author: Eric W. Biederman <ebiederm(a)xmission.com> Date: Tue May 28 18:46:37 2019 -0500 signal/ptrace: Don't leak unitialized kernel memory with PTRACE_PEEK_SIGINFO commit f6e2aa91a46d2bc79fce9b93a988dbe7655c90c0 upstream. Recently syzbot in conjunction with KMSAN reported that ptrace_peek_siginfo can copy an uninitialized siginfo to userspace. Inspecting ptrace_peek_siginfo confirms this. The problem is that off when initialized from args.off can be initialized to a negaive value. At which point the "if (off >= 0)" test to see if off became negative fails because off started off negative. Prevent the core problem by adding a variable found that is only true if a siginfo is found and copied to a temporary in preparation for being copied to userspace. Prevent args.off from being truncated when being assigned to off by testing that off is <= the maximum possible value of off. Convert off to an unsigned long so that we should not have to truncate args.off, we have well defined overflow behavior so if we add another check we won't risk fighting undefined compiler behavior, and so that we have a type whose maximum value is easy to test for. Cc: Andrei Vagin <avagin(a)gmail.com> Cc: stable(a)vger.kernel.org Reported-by: syzbot+0d602a1b0d8c95bdf299(a)syzkaller.appspotmail.com Fixes: 84c751bd4aeb ("ptrace: add ability to retrieve signals without removing from a queue (v4)") Signed-off-by: "Eric W. Biederman" <ebiederm(a)xmission.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> :040000 040000 ff9f3109f210274d0b87851d226c35e7305ce44a b36de2c855fe2a0b332f145f0966dc1a0304d4bd M kernel Test case link, https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/too… Test output log link, https://lkft.validation.linaro.org/scheduler/job/777223#L1084 Test results comparison on different branches, https://qa-reports.linaro.org/_/comparetest/?project=22&project=6&project=5… Best regards Naresh Kamboju

6 years

4
3
0 0

BUG: kernel NULL pointer dereference, address: 00000000

by Naresh Kamboju

While running selftest bpf: test_sockmap the kernel BUG found on i386 and arm kernel running on Linux version 5.2.0-rc5-next-20190617 steps to reproduce, cd /opt/kselftests/default-in-kernel/bpf ./test_sockmap [ 33.666964] BUG: kernel NULL pointer dereference, address: 00000000 [ 33.673246] #PF: supervisor read access in kernel mode [ 33.678392] #PF: error_code(0x0000) - not-present page [ 33.683539] *pde = 00000000 [ 33.686435] Oops: 0000 [#1] SMP [ 33.689593] CPU: 1 PID: 619 Comm: test_sockmap Not tainted 5.2.0-rc5-next-20190617 #1 [ 33.697431] Hardware name: Supermicro SYS-5019S-ML/X11SSH-F, BIOS 2.0b 07/27/2017 [ 33.704914] EIP: memcpy+0x1d/0x30 [ 33.708240] Code: 59 58 eb 85 90 90 90 90 90 90 90 90 90 3e 8d 74 26 00 55 89 e5 57 56 89 c7 53 89 d6 89 cb c1 e9 02 f3 a5 89 d9 83 e1 03 74 02 <f3> a4 5b 5e 5f 5d c3 8d b6 00 00 00 00 8d bf 00 00 00 00 3e 8d 74 [ 33.726985] EAX: f1faf000 EBX: 00000001 ECX: 00000001 EDX: 00000000 [ 33.733249] ESI: 00000000 EDI: f1faf000 EBP: f2e6d99c ESP: f2e6d990 [ 33.739505] DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010202 [ 33.746283] CR0: 80050033 CR2: 00000000 CR3: 31fae000 CR4: 003406d0 [ 33.752542] DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000 [ 33.758807] DR6: fffe0ff0 DR7: 00000400 [ 33.762638] Call Trace: [ 33.765084] bpf_msg_push_data+0x635/0x660 [ 33.769183] ? _raw_spin_unlock_irqrestore+0x2f/0x50 [ 33.774150] ? lockdep_hardirqs_on+0xec/0x1a0 [ 33.778512] ___bpf_prog_run+0xa0d/0x15a0 [ 33.782523] ? __lock_acquire+0x1fe/0x1ec0 [ 33.786621] __bpf_prog_run32+0x4b/0x70 [ 33.790462] ? sk_psock_msg_verdict+0x5/0x290 [ 33.794819] sk_psock_msg_verdict+0xad/0x290 [ 33.799091] ? sk_psock_msg_verdict+0xad/0x290 [ 33.803537] ? lockdep_hardirqs_on+0xec/0x1a0 [ 33.807887] ? __local_bh_enable_ip+0x78/0xf0 [ 33.812238] tcp_bpf_send_verdict+0x29c/0x3b0 [ 33.816590] tcp_bpf_sendpage+0x233/0x3d0 [ 33.820603] ? __lock_acquire+0x1fe/0x1ec0 [ 33.824703] ? __lock_acquire+0x1fe/0x1ec0 [ 33.828801] ? find_held_lock+0x27/0xa0 [ 33.832640] ? lock_release+0x92/0x290 [ 33.836392] ? find_get_entry+0x136/0x300 [ 33.840397] ? touch_atime+0x34/0xd0 [ 33.843978] ? copy_page_to_iter+0x245/0x400 [ 33.848248] ? lockdep_hardirqs_on+0xec/0x1a0 [ 33.852600] ? tcp_bpf_send_verdict+0x3b0/0x3b0 [ 33.857132] inet_sendpage+0x53/0x1f0 [ 33.860789] ? inet_recvmsg+0x1e0/0x1e0 [ 33.864620] ? kernel_sendpage+0x40/0x40 [ 33.868536] kernel_sendpage+0x1e/0x40 [ 33.872282] sock_sendpage+0x24/0x30 [ 33.875861] pipe_to_sendpage+0x59/0xa0 [ 33.879692] ? direct_splice_actor+0x40/0x40 [ 33.883962] __splice_from_pipe+0xde/0x1c0 [ 33.888055] ? direct_splice_actor+0x40/0x40 [ 33.892342] ? direct_splice_actor+0x40/0x40 [ 33.896635] splice_from_pipe+0x59/0x80 [ 33.900466] ? splice_from_pipe+0x80/0x80 [ 33.904469] ? generic_splice_sendpage+0x20/0x20 [ 33.909080] generic_splice_sendpage+0x18/0x20 [ 33.913516] ? direct_splice_actor+0x40/0x40 [ 33.917782] direct_splice_actor+0x2d/0x40 [ 33.921880] splice_direct_to_actor+0x127/0x240 [ 33.926403] ? generic_pipe_buf_nosteal+0x10/0x10 [ 33.931105] do_splice_direct+0x7e/0xc0 [ 33.934944] do_sendfile+0x20d/0x3e0 [ 33.938522] sys_sendfile+0xac/0xd0 [ 33.942015] do_fast_syscall_32+0x8e/0x320 [ 33.946114] entry_SYSENTER_32+0x70/0xc8 [ 33.950039] EIP: 0xb7fa67a1 [ 33.952830] Code: 8b 98 60 cd ff ff 85 d2 89 c8 74 02 89 0a 5b 5d c3 8b 04 24 c3 8b 14 24 c3 8b 1c 24 c3 8b 3c 24 c3 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76 [ 33.971567] EAX: ffffffda EBX: 00000018 ECX: 0000001c EDX: 00000000 [ 33.977823] ESI: 00000001 EDI: 00000018 EBP: 00000001 ESP: bfcaa6d4 [ 33.984083] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000206 [ 33.990869] Modules linked in: x86_pkg_temp_thermal fuse [ 33.996181] CR2: 0000000000000000 [ 33.999500] ---[ end trace 0ef7a1496c65bde8 ]--- - Naresh

6 years

1
0
0 0

[PATCH v4 00/18] kunit: introduce KUnit, the Linux kernel unit testing framework

by Brendan Higgins

## TLDR A quick follow up to yesterday's revision. I got some feedback that I wanted to incorporate before anyone else read the update. For this reason, I will leave a TLDR of the biggest changes since v2. Biggest things to look out for (since v2): - KUnit core now outputs results in TAP14. - Heavily reworked tools/testing/kunit/kunit.py - Changed how parsing works. - Added testing. - Greg, Logan, you might want to re-review this. - Added documentation on how to use KUnit on non-UML kernels. You can see the docs rendered here[1]. There is still some discussion going on on the [PATCH v2 00/17] thread, but I wanted to get some of these updates out before they got too stale (and too difficult for me to keep track of). I hope no one minds. ## Background This patch set proposes KUnit, a lightweight unit testing and mocking framework for the Linux kernel. Unlike Autotest and kselftest, KUnit is a true unit testing framework; it does not require installing the kernel on a test machine or in a VM (however, KUnit still allows you to run tests on test machines or in VMs if you want) and does not require tests to be written in userspace running on a host kernel. Additionally, KUnit is fast: From invocation to completion KUnit can run several dozen tests in under a second. Currently, the entire KUnit test suite for KUnit runs in under a second from the initial invocation (build time excluded). KUnit is heavily inspired by JUnit, Python's unittest.mock, and Googletest/Googlemock for C++. KUnit provides facilities for defining unit test cases, grouping related test cases into test suites, providing common infrastructure for running tests, mocking, spying, and much more. ## What's so special about unit testing? A unit test is supposed to test a single unit of code in isolation, hence the name. There should be no dependencies outside the control of the test; this means no external dependencies, which makes tests orders of magnitudes faster. Likewise, since there are no external dependencies, there are no hoops to jump through to run the tests. Additionally, this makes unit tests deterministic: a failing unit test always indicates a problem. Finally, because unit tests necessarily have finer granularity, they are able to test all code paths easily solving the classic problem of difficulty in exercising error handling code. ## Is KUnit trying to replace other testing frameworks for the kernel? No. Most existing tests for the Linux kernel are end-to-end tests, which have their place. A well tested system has lots of unit tests, a reasonable number of integration tests, and some end-to-end tests. KUnit is just trying to address the unit test space which is currently not being addressed. ## More information on KUnit There is a bunch of documentation near the end of this patch set that describes how to use KUnit and best practices for writing unit tests. For convenience I am hosting the compiled docs here[2]. Additionally for convenience, I have applied these patches to a branch[3]. The repo may be cloned with: git clone https://kunit.googlesource.com/linux This patchset is on the kunit/rfc/v5.1/v4 branch. ## Changes Since Last Version As I mentioned above, there are a significant number of updates since v2: - Converted KUnit core to print test results in TAP14 format as suggested by Greg and Frank. - Heavily reworked tools/testing/kunit/kunit.py - Changed how parsing works. - Added testing. - Added documentation on how to use KUnit on non-UML kernels. You can see the docs rendered here[1]. - Added a new set of EXPECTs and ASSERTs for pointer comparison. - Removed more function indirection as suggested by Logan. - Added a new patch that adds `kunit_try_catch_throw` to objtool's noreturn list. - Fixed a number of minorish issues pointed out by Shuah, Masahiro, and kbuild bot. Nevertheless, there are only a couple of minor updates since v3: - Added more context to the changelog on the objtool patch, as per Peter's request. - Moved all KUnit documentation under the Documentation/dev-tools/ directory as per Jonathan's suggestion. [1] https://google.github.io/kunit-docs/third_party/kernel/docs/usage.html#kuni… [2] https://google.github.io/kunit-docs/third_party/kernel/docs/ [3] https://kunit.googlesource.com/linux/+/kunit/rfc/v5.1/v4 -- 2.21.0.1020.gf2820cf01a-goog

6 years

3
35
0 0

[PATCH] selftests/bpf: signedness bug in enable_all_controllers()

by Dan Carpenter

The "len" variable needs to be signed for the error handling to work properly. Fixes: 596092ef8bea ("selftests/bpf: enable all available cgroup v2 controllers") Signed-off-by: Dan Carpenter <dan.carpenter(a)oracle.com> --- tools/testing/selftests/bpf/cgroup_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 0d89f0396be4..e95c33e333a4 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -47,7 +47,7 @@ int enable_all_controllers(char *cgroup_path) char buf[PATH_MAX]; char *c, *c2; int fd, cfd; - size_t len; + ssize_t len; snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path); fd = open(path, O_RDONLY); -- 2.20.1

6 years

3
2
0 0

[PATCH AUTOSEL 4.19 20/39] selftests: vm: install test_vmalloc.sh for run_vmtests

by Sasha Levin

From: Naresh Kamboju <naresh.kamboju(a)linaro.org> [ Upstream commit bc2cce3f2ebcae02aa4bb29e3436bf75ee674c32 ] Add test_vmalloc.sh to TEST_FILES to make sure it gets installed for run_vmtests. Fixed below error: ./run_vmtests: line 217: ./test_vmalloc.sh: No such file or directory Tested with: make TARGETS=vm install INSTALL_PATH=$PWD/x Signed-off-by: Naresh Kamboju <naresh.kamboju(a)linaro.org> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/vm/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index dc68340a6a96..2cf3dc49bd03 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -24,6 +24,8 @@ TEST_GEN_FILES += virtual_address_range TEST_PROGS := run_vmtests +TEST_FILES := test_vmalloc.sh + KSFT_KHDR_INSTALL := 1 include ../lib.mk -- 2.20.1

6 years

1
0
0 0

[PATCH AUTOSEL 4.19 19/39] kselftest/cgroup: fix incorrect test_core skip

by Sasha Levin

From: Alex Shi <alex.shi(a)linux.alibaba.com> [ Upstream commit f97f3f8839eb9de5843066d80819884f7722c8c5 ] The test_core will skip the test_cgcore_no_internal_process_constraint_on_threads test case if the 'cpu' controller missing in root's subtree_control. In fact we need to set the 'cpu' in subtree_control, to make the testing meaningful. ./test_core ... ok 4 # skip test_cgcore_no_internal_process_constraint_on_threads ... Signed-off-by: Alex Shi <alex.shi(a)linux.alibaba.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Tejun Heo <tj(a)kernel.org> Cc: Roman Gushchin <guro(a)fb.com> Cc: Claudio Zumbo <claudioz(a)fb.com> Cc: Claudio <claudiozumbo(a)gmail.com> Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Reviewed-by: Roman Gushchin <guro(a)fb.com> Acked-by: Tejun Heo <tj(a)kernel.org> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/cgroup/test_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index d78f1c5366d3..79053a4f4783 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -198,7 +198,7 @@ static int test_cgcore_no_internal_process_constraint_on_threads(const char *roo char *parent = NULL, *child = NULL; if (cg_read_strstr(root, "cgroup.controllers", "cpu") || - cg_read_strstr(root, "cgroup.subtree_control", "cpu")) { + cg_write(root, "cgroup.subtree_control", "+cpu")) { ret = KSFT_SKIP; goto cleanup; } -- 2.20.1

6 years

1
0
0 0

[PATCH AUTOSEL 4.19 18/39] kselftest/cgroup: fix unexpected testing failure on test_core

by Sasha Levin

From: Alex Shi <alex.shi(a)linux.alibaba.com> [ Upstream commit 00e38a5d753d7788852f81703db804a60a84c26e ] The cgroup testing relys on the root cgroup's subtree_control setting, If the 'memory' controller isn't set, some test cases will be failed as following: $sudo ./test_core not ok 1 test_cgcore_internal_process_constraint ok 2 test_cgcore_top_down_constraint_enable not ok 3 test_cgcore_top_down_constraint_disable ... To correct this unexpected failure, this patch write the 'memory' to subtree_control of root to get a right result. Signed-off-by: Alex Shi <alex.shi(a)linux.alibaba.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Tejun Heo <tj(a)kernel.org> Cc: Roman Gushchin <guro(a)fb.com> Cc: Claudio Zumbo <claudioz(a)fb.com> Cc: Claudio <claudiozumbo(a)gmail.com> Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Reviewed-by: Roman Gushchin <guro(a)fb.com> Acked-by: Tejun Heo <tj(a)kernel.org> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/cgroup/test_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index be59f9c34ea2..d78f1c5366d3 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -376,6 +376,11 @@ int main(int argc, char *argv[]) if (cg_find_unified_root(root, sizeof(root))) ksft_exit_skip("cgroup v2 isn't mounted\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) + if (cg_write(root, "cgroup.subtree_control", "+memory")) + ksft_exit_skip("Failed to set memory controller\n"); + for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { case KSFT_PASS: -- 2.20.1

6 years

1
0
0 0

[PATCH AUTOSEL 4.19 17/39] kselftest/cgroup: fix unexpected testing failure on test_memcontrol

by Sasha Levin

From: Alex Shi <alex.shi(a)linux.alibaba.com> [ Upstream commit f6131f28057d4fd8922599339e701a2504e0f23d ] The cgroup testing relies on the root cgroup's subtree_control setting, If the 'memory' controller isn't set, all test cases will be failed as following: $ sudo ./test_memcontrol not ok 1 test_memcg_subtree_control not ok 2 test_memcg_current ok 3 # skip test_memcg_min not ok 4 test_memcg_low not ok 5 test_memcg_high not ok 6 test_memcg_max not ok 7 test_memcg_oom_events ok 8 # skip test_memcg_swap_max not ok 9 test_memcg_sock not ok 10 test_memcg_oom_group_leaf_events not ok 11 test_memcg_oom_group_parent_events not ok 12 test_memcg_oom_group_score_events To correct this unexpected failure, this patch write the 'memory' to subtree_control of root to get a right result. Signed-off-by: Alex Shi <alex.shi(a)linux.alibaba.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Roman Gushchin <guro(a)fb.com> Cc: Tejun Heo <tj(a)kernel.org> Cc: Mike Rapoport <rppt(a)linux.vnet.ibm.com> Cc: Jay Kamat <jgkamat(a)fb.com> Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Reviewed-by: Roman Gushchin <guro(a)fb.com> Acked-by: Tejun Heo <tj(a)kernel.org> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/cgroup/test_memcontrol.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 6f339882a6ca..c19a97dd02d4 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -1205,6 +1205,10 @@ int main(int argc, char **argv) if (cg_read_strstr(root, "cgroup.controllers", "memory")) ksft_exit_skip("memory controller isn't available\n"); + if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) + if (cg_write(root, "cgroup.subtree_control", "+memory")) + ksft_exit_skip("Failed to set memory controller\n"); + for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { case KSFT_PASS: -- 2.20.1

6 years

1
0
0 0

[PATCH AUTOSEL 5.1 40/59] selftests: set sysctl bc_forwarding properly in router_broadcast.sh

by Sasha Levin

From: Xin Long <lucien.xin(a)gmail.com> [ Upstream commit 67c0aaa1eaec60e9dab301012bdebe6726ae04bd ] sysctl setting bc_forwarding for $rp2 is needed when ping_test_from h2, otherwise the bc packets from $rp2 won't be forwarded. This patch is to add this setting for $rp2. Also, as ping_test_from does grep "$from" only, which could match some unexpected output, some test case doesn't really work, like: # ping_test_from $h2 198.51.200.255 198.51.200.2 PING 198.51.200.255 from 198.51.100.2 veth3: 56(84) bytes of data. 64 bytes from 198.51.100.1: icmp_seq=1 ttl=64 time=0.336 ms When doing grep $form (198.51.200.2), the output could still match. So change to grep "bytes from $from" instead. Fixes: 40f98b9af943 ("selftests: add a selftest for directed broadcast forwarding") Signed-off-by: Xin Long <lucien.xin(a)gmail.com> Signed-off-by: David S. Miller <davem(a)davemloft.net> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/net/forwarding/router_broadcast.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/router_broadcast.sh b/tools/testing/selftests/net/forwarding/router_broadcast.sh index 9a678ece32b4..4eac0a06f451 100755 --- a/tools/testing/selftests/net/forwarding/router_broadcast.sh +++ b/tools/testing/selftests/net/forwarding/router_broadcast.sh @@ -145,16 +145,19 @@ bc_forwarding_disable() { sysctl_set net.ipv4.conf.all.bc_forwarding 0 sysctl_set net.ipv4.conf.$rp1.bc_forwarding 0 + sysctl_set net.ipv4.conf.$rp2.bc_forwarding 0 } bc_forwarding_enable() { sysctl_set net.ipv4.conf.all.bc_forwarding 1 sysctl_set net.ipv4.conf.$rp1.bc_forwarding 1 + sysctl_set net.ipv4.conf.$rp2.bc_forwarding 1 } bc_forwarding_restore() { + sysctl_restore net.ipv4.conf.$rp2.bc_forwarding sysctl_restore net.ipv4.conf.$rp1.bc_forwarding sysctl_restore net.ipv4.conf.all.bc_forwarding } @@ -171,7 +174,7 @@ ping_test_from() log_info "ping $dip, expected reply from $from" ip vrf exec $(master_name_get $oif) \ $PING -I $oif $dip -c 10 -i 0.1 -w $PING_TIMEOUT -b 2>&1 \ - | grep $from &> /dev/null + | grep "bytes from $from" > /dev/null check_err_fail $fail $? } -- 2.20.1

6 years

1
0
0 0

[PATCH AUTOSEL 5.1 32/59] selftests: vm: install test_vmalloc.sh for run_vmtests

by Sasha Levin

From: Naresh Kamboju <naresh.kamboju(a)linaro.org> [ Upstream commit bc2cce3f2ebcae02aa4bb29e3436bf75ee674c32 ] Add test_vmalloc.sh to TEST_FILES to make sure it gets installed for run_vmtests. Fixed below error: ./run_vmtests: line 217: ./test_vmalloc.sh: No such file or directory Tested with: make TARGETS=vm install INSTALL_PATH=$PWD/x Signed-off-by: Naresh Kamboju <naresh.kamboju(a)linaro.org> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/vm/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index e13eb6cc8901..05306c58ff9f 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -25,6 +25,8 @@ TEST_GEN_FILES += virtual_address_range TEST_PROGS := run_vmtests +TEST_FILES := test_vmalloc.sh + KSFT_KHDR_INSTALL := 1 include ../lib.mk -- 2.20.1

6 years

1
0
0 0

[PATCH AUTOSEL 5.1 31/59] userfaultfd: selftest: fix compiler warning

by Sasha Levin

From: Alakesh Haloi <alakesh.haloi(a)gmail.com> [ Upstream commit 98a13a8d253999cf25eb16d901c35fbd2a8455c4 ] Fixes following compiler warning userfaultfd.c: In function ‘usage’: userfaultfd.c:126:2: warning: format not a string literal and no format arguments [-Wformat-security] fprintf(stderr, examples); Signed-off-by: Alakesh Haloi <alakesh.haloi(a)gmail.com> Reviewed-by: Peter Xu <peterx(a)redhat.com> Reviewed-by: Mike Rapoport <rppt(a)linux.ibm.com> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/vm/userfaultfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 5d1db824f73a..b3e6497b080c 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -123,7 +123,7 @@ static void usage(void) fprintf(stderr, "Supported <test type>: anon, hugetlb, " "hugetlb_shared, shmem\n\n"); fprintf(stderr, "Examples:\n\n"); - fprintf(stderr, examples); + fprintf(stderr, "%s", examples); exit(1); } -- 2.20.1

6 years

1
0
0 0

[PATCH AUTOSEL 5.1 30/59] kselftest/cgroup: fix incorrect test_core skip

by Sasha Levin

From: Alex Shi <alex.shi(a)linux.alibaba.com> [ Upstream commit f97f3f8839eb9de5843066d80819884f7722c8c5 ] The test_core will skip the test_cgcore_no_internal_process_constraint_on_threads test case if the 'cpu' controller missing in root's subtree_control. In fact we need to set the 'cpu' in subtree_control, to make the testing meaningful. ./test_core ... ok 4 # skip test_cgcore_no_internal_process_constraint_on_threads ... Signed-off-by: Alex Shi <alex.shi(a)linux.alibaba.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Tejun Heo <tj(a)kernel.org> Cc: Roman Gushchin <guro(a)fb.com> Cc: Claudio Zumbo <claudioz(a)fb.com> Cc: Claudio <claudiozumbo(a)gmail.com> Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Reviewed-by: Roman Gushchin <guro(a)fb.com> Acked-by: Tejun Heo <tj(a)kernel.org> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/cgroup/test_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index d78f1c5366d3..79053a4f4783 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -198,7 +198,7 @@ static int test_cgcore_no_internal_process_constraint_on_threads(const char *roo char *parent = NULL, *child = NULL; if (cg_read_strstr(root, "cgroup.controllers", "cpu") || - cg_read_strstr(root, "cgroup.subtree_control", "cpu")) { + cg_write(root, "cgroup.subtree_control", "+cpu")) { ret = KSFT_SKIP; goto cleanup; } -- 2.20.1

6 years

1
0
0 0

[PATCH AUTOSEL 5.1 29/59] kselftest/cgroup: fix unexpected testing failure on test_core

by Sasha Levin

From: Alex Shi <alex.shi(a)linux.alibaba.com> [ Upstream commit 00e38a5d753d7788852f81703db804a60a84c26e ] The cgroup testing relys on the root cgroup's subtree_control setting, If the 'memory' controller isn't set, some test cases will be failed as following: $sudo ./test_core not ok 1 test_cgcore_internal_process_constraint ok 2 test_cgcore_top_down_constraint_enable not ok 3 test_cgcore_top_down_constraint_disable ... To correct this unexpected failure, this patch write the 'memory' to subtree_control of root to get a right result. Signed-off-by: Alex Shi <alex.shi(a)linux.alibaba.com> Cc: Shuah Khan <shuah(a)kernel.org> Cc: Tejun Heo <tj(a)kernel.org> Cc: Roman Gushchin <guro(a)fb.com> Cc: Claudio Zumbo <claudioz(a)fb.com> Cc: Claudio <claudiozumbo(a)gmail.com> Cc: linux-kselftest(a)vger.kernel.org Cc: linux-kernel(a)vger.kernel.org Reviewed-by: Roman Gushchin <guro(a)fb.com> Acked-by: Tejun Heo <tj(a)kernel.org> Signed-off-by: Shuah Khan <skhan(a)linuxfoundation.org> Signed-off-by: Sasha Levin <sashal(a)kernel.org> --- tools/testing/selftests/cgroup/test_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index be59f9c34ea2..d78f1c5366d3 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -376,6 +376,11 @@ int main(int argc, char *argv[]) if (cg_find_unified_root(root, sizeof(root))) ksft_exit_skip("cgroup v2 isn't mounted\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) + if (cg_write(root, "cgroup.subtree_control", "+memory")) + ksft_exit_skip("Failed to set memory controller\n"); + for (i = 0; i < ARRAY_SIZE(tests); i++) { switch (tests[i].fn(root)) { case KSFT_PASS: -- 2.20.1

6 years

1
0
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-kselftest-mirror