On kernels since 5.14.0, ALSA playback to the FireWire RME Fireface 800 audio interface results in a deadlock involving snd_pcm_period_elapsed(), freezing the system.
On kernels 5.0.0 to 5.13.19 the interface works just fine, as it does with the RME driver.
Distributions tested: Ubuntu Manjaro Arch Fedora
FireWire chipsets tested: LSI FW643 TI XIO2213B
Platforms tested: Intel i5 4570 on AsRock H97 Pro4 Intel i5 12600K on MSI MS-7D25
The behavior was also observed on the RME forum: https://forum.rme-audio.de/viewtopic.php?pid=190472#p190472
Shortened traces of 6.10.0-rc7 (Arch linux-mainline):
RIP: 0010:tasklet_unlock_spin_wait (./arch/x86/include/asm/bitops.h:206 ./arch/x86/include/asm/bitops.h:238 ./include/asm-generic/bitops/instrumented-non-atomic.h:142 kernel/softirq.c:851) <NMI> ? watchdog_hardlockup_check.cold (kernel/watchdog.c:200) ? __perf_event_overflow (kernel/events/core.c:9737 (discriminator 2)) ? handle_pmi_common (arch/x86/events/intel/core.c:3061 (discriminator 1)) ? intel_pmu_handle_irq (./arch/x86/include/asm/paravirt.h:192 arch/x86/events/intel/core.c:2428 arch/x86/events/intel/core.c:3127) ? perf_event_nmi_handler (arch/x86/events/core.c:1744 arch/x86/events/core.c:1730) ? nmi_handle (arch/x86/kernel/nmi.c:151) ? default_do_nmi (arch/x86/kernel/nmi.c:352 (discriminator 61)) ? exc_nmi (arch/x86/kernel/nmi.c:546) ? end_repeat_nmi (arch/x86/entry/entry_64.S:1408) ? tasklet_unlock_spin_wait (./arch/x86/include/asm/bitops.h:206 ./arch/x86/include/asm/bitops.h:238 ./include/asm-generic/bitops/instrumented-non-atomic.h:142 kernel/softirq.c:851) ? tasklet_unlock_spin_wait (./arch/x86/include/asm/bitops.h:206 ./arch/x86/include/asm/bitops.h:238 ./include/asm-generic/bitops/instrumented-non-atomic.h:142 kernel/softirq.c:851) ? tasklet_unlock_spin_wait (./arch/x86/include/asm/bitops.h:206 ./arch/x86/include/asm/bitops.h:238 ./include/asm-generic/bitops/instrumented-non-atomic.h:142 kernel/softirq.c:851) </NMI> <TASK> ohci_flush_iso_completions (./include/linux/interrupt.h:740 drivers/firewire/ohci.c:3530) firewire_ohci amdtp_domain_stream_pcm_pointer (sound/firewire/amdtp-stream.c:1858) snd_firewire_lib snd_pcm_update_hw_ptr0 (sound/core/pcm_lib.c:304) snd_pcm snd_pcm_status64 (sound/core/pcm_native.c:1034) snd_pcm snd_pcm_status_user64 (./include/linux/uaccess.h:191 sound/core/pcm_native.c:1096) snd_pcm snd_pcm_ioctl (sound/core/pcm_native.c:3401 (discriminator 1)) snd_pcm __x64_sys_ioctl (fs/ioctl.c:51 fs/ioctl.c:907 fs/ioctl.c:893 fs/ioctl.c:893) do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1)) ? snd_pcm_status_user64 (sound/core/pcm_native.c:1096 (discriminator 1)) snd_pcm ? futex_wake (kernel/futex/waitwake.c:173) ? do_futex (kernel/futex/syscalls.c:107 (discriminator 1)) ? __x64_sys_futex (kernel/futex/syscalls.c:179 kernel/futex/syscalls.c:160 kernel/futex/syscalls.c:160) ? syscall_exit_to_user_mode (kernel/entry/common.c:221) ? do_syscall_64 (./arch/x86/include/asm/cpufeature.h:178 arch/x86/entry/common.c:98) ? do_futex (kernel/futex/syscalls.c:107 (discriminator 1)) ? __x64_sys_futex (kernel/futex/syscalls.c:179 kernel/futex/syscalls.c:160 kernel/futex/syscalls.c:160) ? syscall_exit_to_user_mode (kernel/entry/common.c:221) ? do_syscall_64 (./arch/x86/include/asm/cpufeature.h:178 arch/x86/entry/common.c:98) ? syscall_exit_to_user_mode (kernel/entry/common.c:221) ? do_syscall_64 (./arch/x86/include/asm/cpufeature.h:178 arch/x86/entry/common.c:98) ? do_syscall_64 (./arch/x86/include/asm/cpufeature.h:178 arch/x86/entry/common.c:98) ? __irq_exit_rcu (kernel/softirq.c:620 (discriminator 1) kernel/softirq.c:639 (discriminator 1)) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
RIP: 0010:native_queued_spin_lock_slowpath (kernel/locking/qspinlock.c:380 (discriminator 3)) <NMI> ? watchdog_hardlockup_check.cold (kernel/watchdog.c:200) ? __perf_event_overflow (kernel/events/core.c:9737 (discriminator 2)) ? handle_pmi_common (arch/x86/events/intel/core.c:3061 (discriminator 1)) ? intel_pmu_handle_irq (./arch/x86/include/asm/paravirt.h:192 arch/x86/events/intel/core.c:2428 arch/x86/events/intel/core.c:3127) ? perf_event_nmi_handler (arch/x86/events/core.c:1744 arch/x86/events/core.c:1730) ? nmi_handle (arch/x86/kernel/nmi.c:151) ? default_do_nmi (arch/x86/kernel/nmi.c:352 (discriminator 61)) ? exc_nmi (arch/x86/kernel/nmi.c:546) ? end_repeat_nmi (arch/x86/entry/entry_64.S:1408) ? native_queued_spin_lock_slowpath (kernel/locking/qspinlock.c:380 (discriminator 3)) ? native_queued_spin_lock_slowpath (kernel/locking/qspinlock.c:380 (discriminator 3)) ? native_queued_spin_lock_slowpath (kernel/locking/qspinlock.c:380 (discriminator 3)) </NMI> <IRQ> _raw_spin_lock_irqsave (./arch/x86/include/asm/paravirt.h:584 ./arch/x86/include/asm/qspinlock.h:51 ./include/asm-generic/qspinlock.h:114 ./include/linux/spinlock.h:187 ./include/linux/spinlock_api_smp.h:111 kernel/locking/spinlock.c:162) snd_pcm_period_elapsed (sound/core/pcm_lib.c:1905) snd_pcm process_rx_packets (sound/firewire/amdtp-stream.c:1164) snd_firewire_lib irq_target_callback (sound/firewire/amdtp-stream.c:1549) snd_firewire_lib handle_it_packet (drivers/firewire/ohci.c:2786 drivers/firewire/ohci.c:2974) firewire_ohci context_tasklet (drivers/firewire/ohci.c:1127) firewire_ohci tasklet_action_common.isra.0 (kernel/softirq.c:789) handle_softirqs (kernel/softirq.c:554) __irq_exit_rcu (kernel/softirq.c:589 kernel/softirq.c:428 kernel/softirq.c:637) common_interrupt (arch/x86/kernel/irq.c:278 (discriminator 35)) </IRQ> <TASK> asm_common_interrupt (./arch/x86/include/asm/idtentry.h:693)
It can be induced by direct ALSA playback to the device: mpv --audio-device=alsa/sysdefault:CARD=Fireface800 Spor-Ignition.flac Time to occurrence ranges from two seconds to 30 minutes.
Loading the CPU appears to increase the likelihood: stress --cpu $(nproc) So does switching between applications via workspaces (only tested Xfce).
The regression has been traced to these two commits: 7ba5ca32fe6e8d2e153fb5602997336517b34743 b5b519965c4c364ae65c49fe9f11d222dd70a9c2
I am currently testing a simple patch, in essence reverting both commits. Behaves well so far (stable), will likely send it in tomorrow.
#regzbot introduced: 7ba5ca32fe6e