On Tue, May 24, 2011 at 11:45:04PM -0400, Nicolas Pitre wrote:
On Tue, 24 May 2011, Michael Hope wrote:
On Tue, May 24, 2011 at 10:33 AM, Michael Casadevall mcasadevall@ubuntu.com wrote:
On 05/19/2011 10:56 AM, David Gilbert wrote:
On 19 May 2011 16:49, Ken Werner ken@linux.vnet.ibm.com wrote:
On 05/19/2011 12:40 PM, David Rusling wrote:
Is this going to end up in a blueprint? This is the last loose end of SMP / atomic memory operations work and I'd like to see it happen
Hi,
Yep, there is one (kind of a skeleton) in place at: https://blueprints.launchpad.net/linaro-toolchain-misc/+spec/64-bit-sync-pri...
Which I'll be filling out in the next few days.
Dave
Is there a timeline for this feature? It's been requested by members of the ARM Server Club to have this implemented, and its important that this makes it into the Ubuntu 11.10 release. Michael
Hi Michael. The topics for this planning cycle are listed here: https://wiki.linaro.org/Cycles/1111/TechnicalTopics/Toolchain
64 bit sync primitives are medium priority so they will be achieved in the next six months.
A draft of what is in whos queue is at: https://wiki.linaro.org/Cycles/1111/TechnicalTopics/Toolchain/Planning
The primitives are second in Dave's queue so should be started in the next three months.
FWIW, here's what the kernel part might look like, i.e. for compatibility with pre ARMv6k systems (beware, only compile tested):
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index e8d8856..53830a7 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -383,7 +383,7 @@ ENDPROC(__pabt_svc) .endm .macro kuser_cmpxchg_check -#if __LINUX_ARM_ARCH__ < 6 && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) +#if !defined(CONFIG_CPU_32v6K) && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) #ifndef CONFIG_MMU #warning "NPTL on non MMU needs fixing" #else @@ -392,7 +392,7 @@ ENDPROC(__pabt_svc) @ perform a quick test inline since it should be false @ 99.9999% of the time. The rest is done out of line. cmp r2, #TASK_SIZE
- blhs kuser_cmpxchg_fixup
- blhs kuser_cmpxchg64_fixup
#endif #endif .endm @@ -797,6 +797,139 @@ __kuser_helper_start: /*
- Reference prototype:
- int __kernel_cmpxchgd64(int64_t *oldval, int64_t *newval, int64_t *ptr)
- Input:
- r0 = pointer to oldval
- r1 = pointer to newval
- r2 = pointer to target value
- lr = return address
- Output:
- r0 = returned value (zero or non-zero)
- C flag = set if r0 == 0, clear if r0 != 0
- Clobbered:
- r3, flags
- Definition and user space usage example:
- typedef int (__kernel_cmpxchg64_t)(const int64_t *oldval,
const int64_t *newval,
volatile int64_t *ptr);
- #define __kernel_cmpxchg64 (*(__kernel_cmpxchg64_t *)0xffff0f60)
- Atomically store newval in *ptr if *ptr is equal to oldval for user space.
- Return zero if *ptr was changed or non-zero if no exchange happened.
- The C flag is also set if *ptr was changed to allow for assembly
- optimization in the calling code.
* Do not attempt to call this function unless __kernel_helper_version >= 5. *
- Notes:
- This routine already includes memory barriers as needed.
- Due to the length of some sequences, this spans 2 regular kuser
"slots" so 0xffff0f80 is not used as a valid entry point.
- */
+__kuser_cmpxchg64: @ 0xffff0f60
+#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
- /*
* Poor you. No fast solution possible...
* The kernel itself must perform the operation.
* A special ghost syscall is used for that (see traps.c).
*/
- stmfd sp!, {r7, lr}
- ldr r7, 1f @ it's 20 bits
- swi __ARM_NR_cmpxchg64
- ldmfd sp!, {r7, pc}
+1: .word __ARM_NR_cmpxchg64
+#elif defined(CONFIG_CPU_32v6K)
- stmfd sp!, {r4, r5, r6, r7}
- ldrd r4, r5, [r0] @ load old val
- ldrd r6, r7, [r1] @ load new val
- smp_dmb arm
+1: ldrexd r0, r1, [r2] @ load current val
- eors r3, r0, r4 @ compare with oldval (1)
- eoreqs r3, r1, r5 @ compare with oldval (2)
- strexdeq r3, r6, r7, [r2] @ store newval if eq
- teqeq r3, #1 @ success?
- beq 1b @ if no then retry
- smp_dmb arm
- rsbs r0, r3, #0 @ set returned val and C flag
- ldmfd sp!, {r4, r5, r6, r7}
- usr_ret lr
+#elif !defined(CONFIG_SMP)
+#ifdef CONFIG_MMU
- /*
* The only thing that can break atomicity in this cmpxchg64
* implementation is either an IRQ or a data abort exception
* causing another process/thread to be scheduled in the middle
* of the critical sequence. To prevent this, code is added to
* the IRQ and data abort exception handlers to set the pc back
* to the beginning of the critical section if it is found to be
* within that critical section (see kuser_cmpxchg_fixup64).
*/
- stmfd sp!, {r4, r5, r6, lr}
- ldmia r0, {r4, r5} @ load old val
- ldmia r1, {r6, lr} @ load new val
+1: ldmia r2, {r0, r1} @ load current val
- eors r3, r0, r4 @ compare with oldval (1)
- eoreqs r3, r1, r5 @ compare with oldval (2)
+2: stmeqia r2, {r6, lr} @ store newval if eq
- rsbs r0, r3, #0 @ set return val and C flag
- ldmfd sp!, {r4, r5, r6, pc}
- .text
+kuser_cmpxchg64_fixup:
- @ Called from kuser_cmpxchg_fixup.
- @ r2 = address of interrupted insn (must be preserved).
- @ sp = saved regs. r7 and r8 are clobbered.
- @ 1b = first critical insn, 2b = last critical insn.
- @ If r2 >= 1b and r2 <= 2b then saved pc_usr is set to 1b.
- mov r7, #0xffff0fff
- sub r7, r7, #(0xffff0fff - (0xffff0f60 + (1b - __kuser_cmpxchg64)))
- subs r8, r2, r7
- rsbcss r8, r8, #(2b - 1b)
- strcs r7, [sp, #S_PC]
+#if __LINUX_ARM_ARCH__ < 6
- b kuser_cmpxchg32_fixup
Can we just have movcs pc,lr here, and put kuser_cmpxchg32_fixup immediately after?
This would allow us to skip the branch, and the initial "mov r7" in the kuser_cmpxchg32_fixup code.
+#else
- mov pc, lr
+#endif
- .previous
+#else +#warning "NPTL on non MMU needs fixing"
- mov r0, #-1
- adds r0, r0, #0
- usr_ret lr
+#endif
+#else +#error "incoherent kernel configuration" +#endif
- /* pad to next slot */
- .rept (16 - (. - __kuser_cmpxchg64)/4)
- .word 0
- .endr
- .align 5
+/*
- Reference prototype:
- void __kernel_memory_barrier(void)
- Input:
@@ -921,7 +1054,7 @@ __kuser_cmpxchg: @ 0xffff0fc0 usr_ret lr .text -kuser_cmpxchg_fixup: +kuser_cmpxchg32_fixup: @ Called from kuser_cmpxchg_check macro. @ r2 = address of interrupted insn (must be preserved). @ sp = saved regs. r7 and r8 are clobbered.
There's a fair amount of duplicated logic from the 32-bit case. Is it worth trying to merge the two?
Since we can reasonably expect there to be no additional cmpxchg helpers in the future, the answer may be "no"...
Cheers ---Dave