6.16-stable review patch. If anyone has any objections, please let me know.
------------------
From: Jocelyn Falempe jfalempe@redhat.com
[ Upstream commit 9af8f2b469c0438620832f3729a3c5c03853b56b ]
On 32bits ARM, u64 divided by a constant is not optimized to a multiply by inverse by the compiler [1]. So do the multiply by inverse explicitly for this architecture.
Link: https://github.com/llvm/llvm-project/issues/37280 [1] Reported-by: Andrei Lalaev andrey.lalaev@gmail.com Closes: https://lore.kernel.org/dri-devel/c0a2771c-f3f5-4d4c-aa82-d673b3c5cb46@gmail... Fixes: 675008f196ca ("drm/panic: Use a decimal fifo to avoid u64 by u64 divide") Reviewed-by: Alice Ryhl aliceryhl@google.com Signed-off-by: Jocelyn Falempe jfalempe@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/drm_panic_qr.rs | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index 18492daae4b3..b9cc64458437 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -381,6 +381,26 @@ struct DecFifo { len: usize, }
+// On arm32 architecture, dividing an `u64` by a constant will generate a call +// to `__aeabi_uldivmod` which is not present in the kernel. +// So use the multiply by inverse method for this architecture. +fn div10(val: u64) -> u64 { + if cfg!(target_arch = "arm") { + let val_h = val >> 32; + let val_l = val & 0xFFFFFFFF; + let b_h: u64 = 0x66666666; + let b_l: u64 = 0x66666667; + + let tmp1 = val_h * b_l + ((val_l * b_l) >> 32); + let tmp2 = val_l * b_h + (tmp1 & 0xffffffff); + let tmp3 = val_h * b_h + (tmp1 >> 32) + (tmp2 >> 32); + + tmp3 >> 2 + } else { + val / 10 + } +} + impl DecFifo { fn push(&mut self, data: u64, len: usize) { let mut chunk = data; @@ -389,7 +409,7 @@ impl DecFifo { } for i in 0..len { self.decimals[i] = (chunk % 10) as u8; - chunk /= 10; + chunk = div10(chunk); } self.len += len; }