One can't use memcpy on memory obtained by ioremap, because IO memory may have different alignment and size access restriction than the system memory. Use memremap as phram driver operates on RAM.
This fixes an unaligned access on ARM64, which could be triggered with e.g. dd if=/dev/phram/by-name/testdev bs=8190 count=1
Unable to handle kernel paging request at virtual address ffffffc01208bfbf Mem abort info: ESR = 0x96000021 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000021 CM = 0, WnR = 0 swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000000cd5000 [ffffffc01208bfbf] pgd=00000002fffff003, p4d=00000002fffff003, pud=00000002fffff003, pmd=0000000100b43003, pte=0068000022221717 Internal error: Oops: 96000021 [#1] PREEMPT SMP CPU: 2 PID: 14768 Comm: dd Tainted: G O 5.10.116-f13ddced70 #1 Hardware name: AXM56xx Victoria (DT) pstate: 80000005 (Nzcv daif -PAN -UAO -TCO BTYPE=--) pc : __memcpy+0x168/0x230 lr : phram_read+0x68/0xb0 [phram] sp : ffffffc0138f3bd0 x29: ffffffc0138f3bd0 x28: 0000000034a50090 x27: 0000000000000000 x26: ffffff81176ce000 x25: 0000000000000000 x24: 0000000000000000 x23: ffffffc0138f3cb8 x22: ffffff8109475000 x21: 0000000000000000 x20: ffffff81176ce000 x19: 0000000000001fff x18: 0000000000000020 x17: 0000000000000000 x16: 0000000000000000 x15: ffffff8125861410 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 x5 : ffffff81176cffff x4 : ffffffc01208bfff x3 : ffffff81176cff80 x2 : ffffffffffffffef x1 : ffffffc01208bfc0 x0 : ffffff81176ce000 Call trace: __memcpy+0x168/0x230 mtd_read_oob_std+0x80/0x90 mtd_read_oob+0x8c/0x150 mtd_read+0x54/0x80 mtdchar_read+0xdc/0x2c0 vfs_read+0xb8/0x1e4 ksys_read+0x78/0x10c __arm64_sys_read+0x28/0x34 do_el0_svc+0x94/0x1f0 el0_svc+0x20/0x30 el0_sync_handler+0x1a4/0x1c0 el0_sync+0x180/0x1c0 Code: a984346c a9c4342c f1010042 54fffee8 (a97c3c8e) ---[ end trace 5707221d643416b6 ]---
Signed-off-by: Petr Malat oss@malat.biz --- drivers/mtd/devices/phram.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c index d503821a3e60..25d3674b4e51 100644 --- a/drivers/mtd/devices/phram.c +++ b/drivers/mtd/devices/phram.c @@ -83,7 +83,7 @@ static void unregister_devices(void)
list_for_each_entry_safe(this, safe, &phram_list, list) { mtd_device_unregister(&this->mtd); - iounmap(this->mtd.priv); + memunmap(this->mtd.priv); kfree(this->mtd.name); kfree(this); } @@ -99,9 +99,9 @@ static int register_device(char *name, phys_addr_t start, size_t len, uint32_t e goto out0;
ret = -EIO; - new->mtd.priv = ioremap(start, len); + new->mtd.priv = memremap(start, len, MEMREMAP_WB); if (!new->mtd.priv) { - pr_err("ioremap failed\n"); + pr_err("memremap failed\n"); goto out1; }
@@ -129,7 +129,7 @@ static int register_device(char *name, phys_addr_t start, size_t len, uint32_t e return 0;
out2: - iounmap(new->mtd.priv); + memunmap(new->mtd.priv); out1: kfree(new); out0:
From: Petr Malat
Sent: 23 May 2022 15:28
One can't use memcpy on memory obtained by ioremap, because IO memory may have different alignment and size access restriction than the system memory. Use memremap as phram driver operates on RAM.
Does that actually help? The memcpy() is still likely to issue unaligned accesses that the hardware can't handle.
David
This fixes an unaligned access on ARM64, which could be triggered with e.g. dd if=/dev/phram/by-name/testdev bs=8190 count=1
Unable to handle kernel paging request at virtual address ffffffc01208bfbf Mem abort info: ESR = 0x96000021 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000021 CM = 0, WnR = 0 swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000000cd5000 [ffffffc01208bfbf] pgd=00000002fffff003, p4d=00000002fffff003, pud=00000002fffff003, pmd=0000000100b43003, pte=0068000022221717 Internal error: Oops: 96000021 [#1] PREEMPT SMP CPU: 2 PID: 14768 Comm: dd Tainted: G O 5.10.116-f13ddced70 #1 Hardware name: AXM56xx Victoria (DT) pstate: 80000005 (Nzcv daif -PAN -UAO -TCO BTYPE=--) pc : __memcpy+0x168/0x230 lr : phram_read+0x68/0xb0 [phram] sp : ffffffc0138f3bd0 x29: ffffffc0138f3bd0 x28: 0000000034a50090 x27: 0000000000000000 x26: ffffff81176ce000 x25: 0000000000000000 x24: 0000000000000000 x23: ffffffc0138f3cb8 x22: ffffff8109475000 x21: 0000000000000000 x20: ffffff81176ce000 x19: 0000000000001fff x18: 0000000000000020 x17: 0000000000000000 x16: 0000000000000000 x15: ffffff8125861410 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 x5 : ffffff81176cffff x4 : ffffffc01208bfff x3 : ffffff81176cff80 x2 : ffffffffffffffef x1 : ffffffc01208bfc0 x0 : ffffff81176ce000 Call trace: __memcpy+0x168/0x230 mtd_read_oob_std+0x80/0x90 mtd_read_oob+0x8c/0x150 mtd_read+0x54/0x80 mtdchar_read+0xdc/0x2c0 vfs_read+0xb8/0x1e4 ksys_read+0x78/0x10c __arm64_sys_read+0x28/0x34 do_el0_svc+0x94/0x1f0 el0_svc+0x20/0x30 el0_sync_handler+0x1a4/0x1c0 el0_sync+0x180/0x1c0 Code: a984346c a9c4342c f1010042 54fffee8 (a97c3c8e) ---[ end trace 5707221d643416b6 ]---
Signed-off-by: Petr Malat oss@malat.biz
drivers/mtd/devices/phram.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c index d503821a3e60..25d3674b4e51 100644 --- a/drivers/mtd/devices/phram.c +++ b/drivers/mtd/devices/phram.c @@ -83,7 +83,7 @@ static void unregister_devices(void)
list_for_each_entry_safe(this, safe, &phram_list, list) { mtd_device_unregister(&this->mtd);
iounmap(this->mtd.priv);
kfree(this->mtd.name); kfree(this); }memunmap(this->mtd.priv);
@@ -99,9 +99,9 @@ static int register_device(char *name, phys_addr_t start, size_t len, uint32_t e goto out0;
ret = -EIO;
- new->mtd.priv = ioremap(start, len);
- new->mtd.priv = memremap(start, len, MEMREMAP_WB); if (!new->mtd.priv) {
pr_err("ioremap failed\n");
goto out1; }pr_err("memremap failed\n");
@@ -129,7 +129,7 @@ static int register_device(char *name, phys_addr_t start, size_t len, uint32_t e return 0;
out2:
- iounmap(new->mtd.priv);
- memunmap(new->mtd.priv);
out1: kfree(new); out0: -- 2.30.2
- Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
Hi!
On Mon, May 23, 2022 at 02:51:41PM +0000, David Laight wrote:
From: Petr Malat
Sent: 23 May 2022 15:28
One can't use memcpy on memory obtained by ioremap, because IO memory may have different alignment and size access restriction than the system memory. Use memremap as phram driver operates on RAM.
Does that actually help? The memcpy() is still likely to issue unaligned accesses that the hardware can't handle.
Yes, it solves the issue. Memcpy can cause unaligned access only on platforms, which can handle it. And on ARM64 it's handled only for RAM and not for a device memory (__pgprot(PROT_DEVICE_*)). Petr
From: Petr Malat
Sent: 23 May 2022 16:28
Hi!
On Mon, May 23, 2022 at 02:51:41PM +0000, David Laight wrote:
From: Petr Malat
Sent: 23 May 2022 15:28
One can't use memcpy on memory obtained by ioremap, because IO memory may have different alignment and size access restriction than the system memory. Use memremap as phram driver operates on RAM.
Does that actually help? The memcpy() is still likely to issue unaligned accesses that the hardware can't handle.
Yes, it solves the issue. Memcpy can cause unaligned access only on platforms, which can handle it. And on ARM64 it's handled only for RAM and not for a device memory (__pgprot(PROT_DEVICE_*)).
Does mapping it as memory cause it to be cached? So the hardware only sees cache line reads (which are aligned) and the cpu support for misaligned memory accesses then stop the faults?
On x86 (which I know a lot more about) memcpy() has a nasty habit of getting implemented as 'rep movsb' relying on the cpu to speed it up. But that doesn't happen for uncached addresses - so you get very slow byte copies. OTOH misaligned PCIe transfers generate TLP that have the correct byte enables for the end words. Provided the PCIe target isn't broken they are fine.
David
- Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
On Mon, May 23, 2022 at 04:09:20PM +0000, David Laight wrote:
From: Petr Malat
Sent: 23 May 2022 16:28 On Mon, May 23, 2022 at 02:51:41PM +0000, David Laight wrote:
From: Petr Malat
Sent: 23 May 2022 15:28
One can't use memcpy on memory obtained by ioremap, because IO memory may have different alignment and size access restriction than the system memory. Use memremap as phram driver operates on RAM.
Does that actually help? The memcpy() is still likely to issue unaligned accesses that the hardware can't handle.
Yes, it solves the issue. Memcpy can cause unaligned access only on platforms, which can handle it. And on ARM64 it's handled only for RAM and not for a device memory (__pgprot(PROT_DEVICE_*)).
Does mapping it as memory cause it to be cached? So the hardware only sees cache line reads (which are aligned) and the cpu support for misaligned memory accesses then stop the faults?
Yes, this is controlled by the MEMREMAP_WB flag, which sets up a mapping, which "matches the default mapping for System RAM on the architecture. This is usually a read-allocate write-back cache.
On x86 (which I know a lot more about) memcpy() has a nasty habit of getting implemented as 'rep movsb' relying on the cpu to speed it up. But that doesn't happen for uncached addresses - so you get very slow byte copies. OTOH misaligned PCIe transfers generate TLP that have the correct byte enables for the end words. Provided the PCIe target isn't broken they are fine.
With memremap one should get the same behavior and performance as with the system memory and it seems to be a good choice for "Physical system RAM" MTD driver, but if one uses it for actual IO memory, he should use ioremam, memcpy_toio and memcpy_fromio. Using these prevents the crash on arm64 as well, but could lead to a performance degradation on some platforms.
If you think there could be users using the driver for a real IO mem, I can provide both behaviors and let the user choose with an option. Petr
Hi!
On Mon, May 23, 2022 at 04:09:20PM +0000, David Laight wrote:
On x86 (which I know a lot more about) memcpy() has a nasty habit of getting implemented as 'rep movsb' relying on the cpu to speed it up. But that doesn't happen for uncached addresses - so you get very slow byte copies.
I have measured the performance with (patched) and without my change (orig). My change improves the performance on X8664 and arm. On Mips64 it stays the same:
Tests ===== All runtimes are in milliseconds, average real-time of 3 runs, time measured with bash time built-in. Measured process run in SCHED_FIFO with priority 99. Page cache was flushed before every run, but all involved program images were in tmpfs (no swap). - dd r512 dd if=/dev/TESTDEV of=/dev/null bs=512 - dd r1MB dd if=/dev/TESTDEV of=/dev/null bs=1M - dd r512 dd of=/dev/TESTDEV if=/tmpfs/img bs=512 - dd r1MB dd of=/dev/TESTDEV if=/tmpfs/img bs=1M - flashcp flashcp /tmpfs/img /dev/TESTDEV - flasherase flash_eraseall -q /dev/TESTDEV
Results ======= All times are in ms
ARCH | MIPS64 | ARM | X8664 CPU | CN6335p2.2 | v7 TI K2 | Xeon D-1548 Dev. size | 32MB | 128MB | 256MB -----------+-------+---------+-------+---------+-------+--------- in ms | Orig | Patched | Orig | Patched | Orig | Patched dd r512 | 131 | 130 | 1101 | 543 | 22906 | 281 dd r1MB | 65 | 65 | 655 | 122 | 22715 | 70 dd w512 | 1150 | 1150 | 1136 | 1042 | 28067 | 412 dd w1MB | 104 | 104 | 396 | 244 | 27761 | 122 flashcp | 100 | 99 | 1438 | 568 | 78455 | 270 flasherase | 21 | 21 | 208 | 77 | 27707 | 57
BR, Petr
On Mon, May 23, 2022 at 04:28:25PM +0200, Petr Malat wrote:
One can't use memcpy on memory obtained by ioremap, because IO memory may have different alignment and size access restriction than the system memory. Use memremap as phram driver operates on RAM.
This fixes an unaligned access on ARM64, which could be triggered with e.g. dd if=/dev/phram/by-name/testdev bs=8190 count=1
Unable to handle kernel paging request at virtual address ffffffc01208bfbf Mem abort info: ESR = 0x96000021 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000021 CM = 0, WnR = 0 swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000000cd5000 [ffffffc01208bfbf] pgd=00000002fffff003, p4d=00000002fffff003, pud=00000002fffff003, pmd=0000000100b43003, pte=0068000022221717 Internal error: Oops: 96000021 [#1] PREEMPT SMP CPU: 2 PID: 14768 Comm: dd Tainted: G O 5.10.116-f13ddced70 #1 Hardware name: AXM56xx Victoria (DT) pstate: 80000005 (Nzcv daif -PAN -UAO -TCO BTYPE=--) pc : __memcpy+0x168/0x230 lr : phram_read+0x68/0xb0 [phram] sp : ffffffc0138f3bd0 x29: ffffffc0138f3bd0 x28: 0000000034a50090 x27: 0000000000000000 x26: ffffff81176ce000 x25: 0000000000000000 x24: 0000000000000000 x23: ffffffc0138f3cb8 x22: ffffff8109475000 x21: 0000000000000000 x20: ffffff81176ce000 x19: 0000000000001fff x18: 0000000000000020 x17: 0000000000000000 x16: 0000000000000000 x15: ffffff8125861410 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 x5 : ffffff81176cffff x4 : ffffffc01208bfff x3 : ffffff81176cff80 x2 : ffffffffffffffef x1 : ffffffc01208bfc0 x0 : ffffff81176ce000 Call trace: __memcpy+0x168/0x230 mtd_read_oob_std+0x80/0x90 mtd_read_oob+0x8c/0x150 mtd_read+0x54/0x80 mtdchar_read+0xdc/0x2c0 vfs_read+0xb8/0x1e4 ksys_read+0x78/0x10c __arm64_sys_read+0x28/0x34 do_el0_svc+0x94/0x1f0 el0_svc+0x20/0x30 el0_sync_handler+0x1a4/0x1c0 el0_sync+0x180/0x1c0 Code: a984346c a9c4342c f1010042 54fffee8 (a97c3c8e) ---[ end trace 5707221d643416b6 ]---
Signed-off-by: Petr Malat oss@malat.biz
drivers/mtd/devices/phram.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c index d503821a3e60..25d3674b4e51 100644 --- a/drivers/mtd/devices/phram.c +++ b/drivers/mtd/devices/phram.c @@ -83,7 +83,7 @@ static void unregister_devices(void) list_for_each_entry_safe(this, safe, &phram_list, list) { mtd_device_unregister(&this->mtd);
iounmap(this->mtd.priv);
kfree(this->mtd.name); kfree(this); }memunmap(this->mtd.priv);
@@ -99,9 +99,9 @@ static int register_device(char *name, phys_addr_t start, size_t len, uint32_t e goto out0; ret = -EIO;
- new->mtd.priv = ioremap(start, len);
- new->mtd.priv = memremap(start, len, MEMREMAP_WB); if (!new->mtd.priv) {
pr_err("ioremap failed\n");
goto out1; }pr_err("memremap failed\n");
@@ -129,7 +129,7 @@ static int register_device(char *name, phys_addr_t start, size_t len, uint32_t e return 0; out2:
- iounmap(new->mtd.priv);
- memunmap(new->mtd.priv);
out1: kfree(new); out0: -- 2.30.2
<formletter>
This is not the correct way to submit patches for inclusion in the stable kernel tree. Please read: https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html for how to do this properly.
</formletter>
linux-stable-mirror@lists.linaro.org