This reverts commit 2dc016599cfa9672a147528ca26d70c3654a5423.
Users are reporting regressions in regulatory domain detection and
channel availability.
The problem this was trying to resolve was fixed in firmware anyway:
QCA6174 hw3.0: sdio-4.4.1: add firmware.bin_WLAN.RMH.4.4.1-00042
https://github.com/kvalo/ath10k-firmware/commit/4d382787f0efa77dba40394e0bc…
Link: https://bbs.archlinux.org/viewtopic.php?id=254535
Link: http://lists.infradead.org/pipermail/ath10k/2020-April/014871.html
Link: http://lists.infradead.org/pipermail/ath10k/2020-May/015152.html
Fixes: 2dc016599cfa ("ath: add support for special 0x0 regulatory domain")
Cc: <stable(a)vger.kernel.org>
Cc: Wen Gong <wgong(a)codeaurora.org>
Signed-off-by: Brian Norris <briannorris(a)chromium.org>
---
drivers/net/wireless/ath/regd.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/drivers/net/wireless/ath/regd.c b/drivers/net/wireless/ath/regd.c
index bee9110b91f3..20f4f8ea9f89 100644
--- a/drivers/net/wireless/ath/regd.c
+++ b/drivers/net/wireless/ath/regd.c
@@ -666,14 +666,14 @@ ath_regd_init_wiphy(struct ath_regulatory *reg,
/*
* Some users have reported their EEPROM programmed with
- * 0x8000 or 0x0 set, this is not a supported regulatory
- * domain but since we have more than one user with it we
- * need a solution for them. We default to 0x64, which is
- * the default Atheros world regulatory domain.
+ * 0x8000 set, this is not a supported regulatory domain
+ * but since we have more than one user with it we need
+ * a solution for them. We default to 0x64, which is the
+ * default Atheros world regulatory domain.
*/
static void ath_regd_sanitize(struct ath_regulatory *reg)
{
- if (reg->current_rd != COUNTRY_ERD_FLAG && reg->current_rd != 0)
+ if (reg->current_rd != COUNTRY_ERD_FLAG)
return;
printk(KERN_DEBUG "ath: EEPROM regdomain sanitized\n");
reg->current_rd = 0x64;
--
2.27.0.rc0.183.gde8f92d652-goog
The i801 controller provides a locking mechanism that the OS is supposed
to use to safely share the SMBus with ACPI AML or other firmware.
Previously, Linux attempted to get out of the way of ACPI AML entirely,
but left the bus locked if it used it before the first AML access. This
causes AML implementations that *do* attempt to safely share the bus
to time out if Linux uses it first; notably, this regressed ACPI video
backlight controls on 2015 iMacs after 01590f361e started instantiating
SPD EEPROMs on boot.
Commit 065b6211a8 fixed the immediate problem of leaving the bus locked,
but we can do better. The controller does have a proper locking mechanism,
so let's use it as intended. Since we can't rely on the BIOS doing this
properly, we implement the following logic:
- If ACPI AML uses the bus at all, we make a note and disable power
management. The latter matches already existing behavior.
- When we want to use the bus, we attempt to lock it first. If the
locking attempt times out, *and* ACPI hasn't tried to use the bus at
all yet, we cautiously go ahead and assume the BIOS forgot to unlock
the bus after boot. This preserves existing behavior.
- We always unlock the bus after a transfer.
- If ACPI AML tries to use the bus (except trying to lock it) while
we're in the middle of a transfer, or after we've determined
locking is broken, we know we cannot safely share the bus and give up.
Upon first usage of SMBus by ACPI AML, if nothing has gone horribly
wrong so far, users will see:
i801_smbus 0000:00:1f.4: SMBus controller is shared with ACPI AML. This seems safe so far.
If locking the SMBus times out, users will see:
i801_smbus 0000:00:1f.4: BIOS left SMBus locked
And if ACPI AML tries to use the bus concurrently with Linux, or it
previously used the bus and we failed to subsequently lock it as
above, the driver will give up and users will get:
i801_smbus 0000:00:1f.4: BIOS uses SMBus unsafely
i801_smbus 0000:00:1f.4: Driver SMBus register access inhibited
This fixes the regression introduced by 01590f361e, and further allows
safely sharing the SMBus on 2015 iMacs. Tested by running `i2cdump` in a
loop while changing backlight levels via the ACPI video device.
Fixes: 01590f361e ("i2c: i801: Instantiate SPD EEPROMs automatically")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Hector Martin <marcan(a)marcan.st>
---
drivers/i2c/busses/i2c-i801.c | 96 ++++++++++++++++++++++++++++-------
1 file changed, 79 insertions(+), 17 deletions(-)
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 04a1e38f2a6f..03be6310d6d7 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -287,11 +287,18 @@ struct i801_priv {
#endif
struct platform_device *tco_pdev;
+ /* BIOS left the controller marked busy. */
+ bool inuse_stuck;
/*
- * If set to true the host controller registers are reserved for
- * ACPI AML use. Protected by acpi_lock.
+ * If set to true, ACPI AML uses the host controller registers.
+ * Protected by acpi_lock.
*/
- bool acpi_reserved;
+ bool acpi_usage;
+ /*
+ * If set to true, ACPI AML uses the host controller registers in an
+ * unsafe way. Protected by acpi_lock.
+ */
+ bool acpi_unsafe;
struct mutex acpi_lock;
};
@@ -854,10 +861,37 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
int hwpec;
int block = 0;
int ret = 0, xact = 0;
+ int timeout = 0;
struct i801_priv *priv = i2c_get_adapdata(adap);
+ /*
+ * The controller provides a bit that implements a mutex mechanism
+ * between users of the bus. First, try to lock the hardware mutex.
+ * If this doesn't work, we give up trying to do this, but then
+ * bail if ACPI uses SMBus at all.
+ */
+ if (!priv->inuse_stuck) {
+ while (inb_p(SMBHSTSTS(priv)) & SMBHSTSTS_INUSE_STS) {
+ if (++timeout >= MAX_RETRIES) {
+ dev_warn(&priv->pci_dev->dev,
+ "BIOS left SMBus locked\n");
+ priv->inuse_stuck = true;
+ break;
+ }
+ usleep_range(250, 500);
+ }
+ }
+
mutex_lock(&priv->acpi_lock);
- if (priv->acpi_reserved) {
+ if (priv->acpi_usage && priv->inuse_stuck && !priv->acpi_unsafe) {
+ priv->acpi_unsafe = true;
+
+ dev_warn(&priv->pci_dev->dev, "BIOS uses SMBus unsafely\n");
+ dev_warn(&priv->pci_dev->dev,
+ "Driver SMBus register access inhibited\n");
+ }
+
+ if (priv->acpi_unsafe) {
mutex_unlock(&priv->acpi_lock);
return -EBUSY;
}
@@ -1639,6 +1673,16 @@ static bool i801_acpi_is_smbus_ioport(const struct i801_priv *priv,
address <= pci_resource_end(priv->pci_dev, SMBBAR);
}
+static acpi_status
+i801_acpi_do_access(u32 function, acpi_physical_address address,
+ u32 bits, u64 *value)
+{
+ if ((function & ACPI_IO_MASK) == ACPI_READ)
+ return acpi_os_read_port(address, (u32 *)value, bits);
+ else
+ return acpi_os_write_port(address, (u32)*value, bits);
+}
+
static acpi_status
i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits,
u64 *value, void *handler_context, void *region_context)
@@ -1648,17 +1692,38 @@ i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits,
acpi_status status;
/*
- * Once BIOS AML code touches the OpRegion we warn and inhibit any
- * further access from the driver itself. This device is now owned
- * by the system firmware.
+ * Non-i801 accesses pass through.
*/
- mutex_lock(&priv->acpi_lock);
+ if (!i801_acpi_is_smbus_ioport(priv, address))
+ return i801_acpi_do_access(function, address, bits, value);
- if (!priv->acpi_reserved && i801_acpi_is_smbus_ioport(priv, address)) {
- priv->acpi_reserved = true;
+ if (!mutex_trylock(&priv->acpi_lock)) {
+ mutex_lock(&priv->acpi_lock);
+ /*
+ * This better be a read of the status register to acquire
+ * the lock...
+ */
+ if (!priv->acpi_unsafe &&
+ !(address == SMBHSTSTS(priv) &&
+ (function & ACPI_IO_MASK) == ACPI_READ)) {
+ /*
+ * Uh-oh, ACPI AML is trying to do something with the
+ * controller without locking it properly.
+ */
+ priv->acpi_unsafe = true;
+
+ dev_warn(&pdev->dev, "BIOS uses SMBus unsafely\n");
+ dev_warn(&pdev->dev,
+ "Driver SMBus register access inhibited\n");
+ }
+ }
- dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n");
- dev_warn(&pdev->dev, "Driver SMBus register access inhibited\n");
+ if (!priv->acpi_usage) {
+ priv->acpi_usage = true;
+
+ if (!priv->acpi_unsafe)
+ dev_info(&pdev->dev,
+ "SMBus controller is shared with ACPI AML. This seems safe so far.\n");
/*
* BIOS is accessing the host controller so prevent it from
@@ -1667,10 +1732,7 @@ i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits,
pm_runtime_get_sync(&pdev->dev);
}
- if ((function & ACPI_IO_MASK) == ACPI_READ)
- status = acpi_os_read_port(address, (u32 *)value, bits);
- else
- status = acpi_os_write_port(address, (u32)*value, bits);
+ status = i801_acpi_do_access(function, address, bits, value);
mutex_unlock(&priv->acpi_lock);
@@ -1706,7 +1768,7 @@ static void i801_acpi_remove(struct i801_priv *priv)
ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler);
mutex_lock(&priv->acpi_lock);
- if (priv->acpi_reserved)
+ if (priv->acpi_usage)
pm_runtime_put(&priv->pci_dev->dev);
mutex_unlock(&priv->acpi_lock);
}
--
2.32.0
When booting with ACPI unavailable or disabled, get_smp_config() ends up
calling MP_processor_info() for each CPU found in the MPS
table. Previously, this resulted in boot_cpu_physical_apicid getting
unconditionally overwritten by the apicid of whatever processor had the
CPU_BOOTPROCESSOR flag. This occurred even if boot_cpu_physical_apicid
had already been more reliably determined in register_lapic_address() by
calling read_apic_id() from the actual boot processor.
Ordinariliy, this is not a problem because the boot processor really is
the one with the CPU_BOOTPROCESSOR flag. However, kexec is an exception
in which the kernel may be booted from any processor regardless of the
MPS table contents. In this case, boot_cpu_physical_apicid may not
indicate the actual boot processor.
This was particularly problematic when the second kernel was booted with
NR_CPUS fewer than the number of physical processors. It's the job of
generic_processor_info() to decide which CPUs to bring up in this case.
That obviously must include the real boot processor which it takes care
to save a slot for. It relies upon the contents of
boot_cpu_physical_apicid to do this, which if incorrect, may result in
the boot processor getting left out.
This condition can be discovered by smp_sanity_check() and rectified by
adding the boot processor to the phys_cpu_present_map with the warning
"weird, boot CPU (#%d) not listed by the BIOS". However, commit
3e730dad3b6da ("x86/apic: Unify interrupt mode setup for SMP-capable
system") caused setup_local_APIC() to be called before this could happen
resulting in a BUG_ON(!apic->apic_id_registered()):
[ 0.655452] ------------[ cut here ]------------
[ 0.660610] Kernel BUG at setup_local_APIC+0x74/0x280 [verbose debug info unavailable]
[ 0.669466] invalid opcode: 0000 [#1] SMP
[ 0.673948] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.109.Ar-16509018.eostrunkkernel419 #1
[ 0.683670] Hardware name: Quanta Quanta LY6 (1LY6UZZ0FBC), BIOS 1.0.6.0-e7d6a55 11/26/2015
[ 0.693007] RIP: 0010:setup_local_APIC+0x74/0x280
[ 0.698264] Code: 80 e4 fe bf f0 00 00 00 89 c6 48 8b 05 0f 1a 8e 00 ff 50 10 e8 12 53 fd ff 48 8b 05 00 1a 8e 00 ff 90 a0 00 00 00 85 c0 75 02 <0f> 0b 48 8b 05 ed 19 8e 00 41 be 00 02 00 00 ff 90 b0 00 00 00 48
[ 0.719251] RSP: 0000:ffffffff81a03e20 EFLAGS: 00010246
[ 0.725091] RAX: 0000000000000000 RBX: 0000000000000003 RCX: 0000000000000000
[ 0.733066] RDX: 0000000000000000 RSI: 000000000000000f RDI: 0000000000000020
[ 0.741041] RBP: ffffffff81a03e98 R08: 0000000000000002 R09: 0000000000000000
[ 0.749014] R10: ffffffff81a204e0 R11: ffffffff81b50ea7 R12: 0000000000000000
[ 0.756989] R13: ffffffff81aef920 R14: ffffffff81af60a0 R15: 0000000000000000
[ 0.764965] FS: 0000000000000000(0000) GS:ffff888036800000(0000) knlGS:0000000000000000
[ 0.774007] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 0.780427] CR2: ffff888035c01000 CR3: 0000000035a08000 CR4: 00000000000006b0
[ 0.788401] Call Trace:
[ 0.791137] ? amd_iommu_prepare+0x15/0x2a
[ 0.795717] apic_bsp_setup+0x55/0x75
[ 0.799808] apic_intr_mode_init+0x169/0x16e
[ 0.804579] x86_late_time_init+0x10/0x17
[ 0.809062] start_kernel+0x37e/0x3fe
[ 0.813154] x86_64_start_reservations+0x2a/0x2c
[ 0.818316] x86_64_start_kernel+0x72/0x75
[ 0.822886] secondary_startup_64+0xa4/0xb0
[ 0.827564] ---[ end trace 237b64da0fd9b22e ]---
This change avoids these issues by only setting boot_cpu_physical_apicid
from the MPS table if it is not already set, which can occur in the
construct_default_ISA_mptable() path. Otherwise,
boot_cpu_physical_apicid will already have been set in
register_lapic_address() and should therefore remain untouched.
Looking through all the places where boot_cpu_physical_apicid is
accessed, nearly all of them assume that boot_cpu_physical_apicid should
match read_apic_id() on the booting processor. The only place that might
intend to use the BSP apicid listed in the MPS table is amd_numa_init(),
which explicitly requires boot_cpu_physical_apicid to be the lowest
apicid of all processors. Ironically, due to the early exit short
circuit in early_get_smp_config(), it instead gets
boot_cpu_physical_apicid = read_apic_id() rather than the MPS table
BSP. The behaviour of amd_numa_init() is therefore unaffected by this
change.
Fixes: 3e730dad3b6da ("x86/apic: Unify interrupt mode setup for SMP-capable system")
Signed-off-by: Kevin Mitchell <kevmitch(a)arista.com>
Cc: <stable(a)vger.kernel.org>
---
arch/x86/kernel/mpparse.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index afac7ccce72f..6f22f09bfe11 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -64,7 +64,8 @@ static void __init MP_processor_info(struct mpc_cpu *m)
if (m->cpuflag & CPU_BOOTPROCESSOR) {
bootup_cpu = " (Bootup-CPU)";
- boot_cpu_physical_apicid = m->apicid;
+ if (boot_cpu_physical_apicid == -1U)
+ boot_cpu_physical_apicid = m->apicid;
}
pr_info("Processor #%d%s\n", m->apicid, bootup_cpu);
--
2.26.2
From: Joerg Roedel <jroedel(a)suse.de>
Allow a runtime opt-out of kexec support for architecture code in case
the kernel is running in an environment where kexec is not properly
supported yet.
This will be used on x86 when the kernel is running as an SEV-ES
guest. SEV-ES guests need special handling for kexec to hand over all
CPUs to the new kernel. This requires special hypervisor support and
handling code in the guest which is not yet implemented.
Cc: stable(a)vger.kernel.org # v5.10+
Signed-off-by: Joerg Roedel <jroedel(a)suse.de>
---
include/linux/kexec.h | 1 +
kernel/kexec.c | 14 ++++++++++++++
kernel/kexec_file.c | 9 +++++++++
3 files changed, 24 insertions(+)
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 0c994ae37729..85c30dcd0bdc 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -201,6 +201,7 @@ int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
unsigned long buf_len);
#endif
int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf);
+bool arch_kexec_supported(void);
extern int kexec_add_buffer(struct kexec_buf *kbuf);
int kexec_locate_mem_hole(struct kexec_buf *kbuf);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index c82c6c06f051..d03134160458 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -195,11 +195,25 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
* that to happen you need to do that yourself.
*/
+bool __weak arch_kexec_supported(void)
+{
+ return true;
+}
+
static inline int kexec_load_check(unsigned long nr_segments,
unsigned long flags)
{
int result;
+ /*
+ * The architecture may support kexec in general, but the kernel could
+ * run in an environment where it is not (yet) possible to execute a new
+ * kernel. Allow the architecture code to opt-out of kexec support when
+ * it is running in such an environment.
+ */
+ if (!arch_kexec_supported())
+ return -ENOSYS;
+
/* We only trust the superuser with rebooting the system. */
if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
return -EPERM;
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 33400ff051a8..96d08a512e9c 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -358,6 +358,15 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
int ret = 0, i;
struct kimage **dest_image, *image;
+ /*
+ * The architecture may support kexec in general, but the kernel could
+ * run in an environment where it is not (yet) possible to execute a new
+ * kernel. Allow the architecture code to opt-out of kexec support when
+ * it is running in such an environment.
+ */
+ if (!arch_kexec_supported())
+ return -ENOSYS;
+
/* We only trust the superuser with rebooting the system. */
if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
return -EPERM;
--
2.31.1
From: Alex Leibovich <alexl(a)marvell.com>
Automatic Clock Gating is a feature used for the power
consumption optimisation. It turned out that
during early init phase it may prevent the stable voltage
switch to 1.8V - due to that on some platfroms an endless
printout in dmesg can be observed:
"mmc1: 1.8V regulator output did not became stable"
Fix the problem by disabling the ACG at very beginning
of the sdhci_init and let that be enabled later.
Fixes: 3a3748dba881 ("mmc: sdhci-xenon: Add Marvell Xenon SDHC core functionality")
Signed-off-by: Alex Leibovich <alexl(a)marvell.com>
Signed-off-by: Marcin Wojtas <mw(a)semihalf.com>
Cc: stable(a)vger.kernel.org
---
drivers/mmc/host/sdhci-xenon.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/mmc/host/sdhci-xenon.c b/drivers/mmc/host/sdhci-xenon.c
index c67611fdaa8a..4b05f6fdefb4 100644
--- a/drivers/mmc/host/sdhci-xenon.c
+++ b/drivers/mmc/host/sdhci-xenon.c
@@ -168,7 +168,12 @@ static void xenon_reset_exit(struct sdhci_host *host,
/* Disable tuning request and auto-retuning again */
xenon_retune_setup(host);
- xenon_set_acg(host, true);
+ /*
+ * The ACG should be turned off at the early init time, in order
+ * to solve a possile issues with the 1.8V regulator stabilization.
+ * The feature is enabled in later stage.
+ */
+ xenon_set_acg(host, false);
xenon_set_sdclk_off_idle(host, sdhc_id, false);
--
2.29.0
On 2021-07-30 12:35, Anders Roxell wrote:
> From: Robin Murphy <robin.murphy(a)arm.com>
>
>> Now that PCI inbound window restrictions are handled generically between
>> the of_pci resource parsing and the IOMMU layer, and described in the
>> Juno DT, we can finally enable the PCIe SMMU without the risk of DMA
>> mappings inadvertently allocating unusable addresses.
>>
>> Similarly, the relevant support for IOMMU mappings for peripheral
>> transfers has been hooked up in the pl330 driver for ages, so we can
>> happily enable the DMA SMMU without that breaking anything either.
>>
>> Signed-off-by: Robin Murphy <robin.murphy(a)arm.com>
>
> When we build a kernel with 64k page size and run the ltp syscalls we
> sporadically see a kernel crash while doing a mkfs on a connected SATA
> drive. This is happening every third test run on any juno-r2 device in
> the lab with the same kernel image (stable-rc 5.13.y, mainline and next)
> with gcc-11.
Hmm, I guess 64K pages might make a difference in that we'll chew
through IOVA space a lot faster with small mappings...
I'll have to try to reproduce this locally, since the interesting thing
would be knowing what DMA address it was trying to use that went wrong,
but IOMMU tracepoints and/or dma-debug are going to generate an crazy
amount of data to sift through and try to correlate - having done it
before it's not something I'd readily ask someone else to do for me :)
On a hunch, though, does it make any difference if you remove the first
entry from the PCIe "dma-ranges" (the 0x2c1c0000 one)?
Robin.
> Here is a snippet of the boot log [1]:
>
> + mkfs -t ext4 /dev/disk/by-id/ata-SanDisk_SDSSDA120G_165192443611
> mke2fs 1.43.8 (1-Jan-2018)
> Discarding device blocks: 4096/29305200
> [ 55.344291] ata1.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x6
> frozen
> [ 55.351423] ata1.00: irq_stat 0x00020002, failed to transmit command
> FIS
> [ 55.358205] ata1.00: failed command: DATA SET MANAGEMENT
> [ 55.363561] ata1.00: cmd 06/01:01:00:00:00/00:00:00:00:00/a0 tag 12
> dma 512 out
> [ 55.363561] res ec/ff:00:00:00:00/00:00:00:00:ec/00 Emask
> 0x12 (ATA bus error)
> [ 55.378955] ata1.00: status: { Busy }
> [ 55.382658] ata1.00: error: { ICRC UNC AMNF IDNF ABRT }
> [ 55.387947] ata1: hard resetting link
> [ 55.391653] ata1: controller in dubious state, performing PORT_RST
> [ 57.588447] ata1: SATA link up 3.0 Gbps (SStatus 123 SControl 0)
> [ 57.613471] ata1.00: configured for UDMA/100
> [ 57.617866] ata1.00: device reported invalid CHS sector 0
> [ 57.623397] ata1: EH complete
>
>
> When we revert this patch we don't see any issue.
>
> Reported-by: Linux Kernel Functional Testing <lkft(a)linaro.org>
>
> Cheers,
> Anders
> [1]
> https://qa-reports.linaro.org/lkft/linux-stable-rc-linux-5.13.y/build/v5.13…
>
Some boot partitions on the Samsung 4GB KLM4G1YE4C "4YMD1R" and "M4G1YC"
cards appear broken when accessed randomly. CMD6 to switch back to the main
partition randomly stalls after CMD18 access to the boot partition 1, and
the card never comes back online. The accesses to the boot partitions work
several times before this happens, but eventually the card access hangs
while initializing the card.
Some problematic eMMC cards are found in the Samsung GT-S7710 (Skomer)
and SGH-I407 (Kyle) mobile phones.
I tried using only single blocks with CMD17 on the boot partitions with the
result that it crashed even faster.
After a bit of root cause analysis it turns out that these old eMMC cards
probably cannot do hardware busy detection (monitoring DAT0) properly.
The card survives on older kernels, but this is because recent kernels have
added busy detection handling for the SoC used in these phones, exposing
the issue.
Construct a quirk that makes the MMC cord avoid using the ->card_busy()
callback if the card is listed with MMC_QUIRK_BROKEN_HW_BUSY_DETECT and
register the known problematic cards. The core changes are pretty
straight-forward with a helper inline to check of we can use hardware
busy detection.
On the MMCI host we have to counter the fact that if the host was able to
use busy detect, it would be used unsolicited in the command IRQ callback.
Rewrite this so that MMCI will not attempt to use hardware busy detection
in the command IRQ until:
- A card is attached to the host and
- We know that the card can handle this busy detection
I have glanced over the ->card_busy() callbacks on some other hosts and
they seem to mostly read a register reflecting the value of DAT0 for this
which works fine with the quirk in this patch. However if the error appear
on other hosts they might need additional fixes.
After applying this patch, the main partition can be accessed and mounted
without problems on Samsung GT-S7710 and SGH-I407.
Fixes: cb0335b778c7 ("mmc: mmci: add busy_complete callback")
Cc: stable(a)vger.kernel.org
Cc: phone-devel(a)vger.kernel.org
Cc: Ludovic Barre <ludovic.barre(a)st.com>
Cc: Stephan Gerhold <stephan(a)gerhold.net>
Reported-by: newbyte(a)disroot.org
Signed-off-by: Linus Walleij <linus.walleij(a)linaro.org>
---
ChangeLog v2->v3:
- Rebase on v5.14-rc1
- Reword the commit message slightly.
ChangeLog v1->v2:
- Rewrite to reflect the actual problem of broken busy detection.
---
drivers/mmc/core/core.c | 8 ++++----
drivers/mmc/core/core.h | 17 +++++++++++++++++
drivers/mmc/core/mmc_ops.c | 4 ++--
drivers/mmc/core/quirks.h | 21 +++++++++++++++++++++
drivers/mmc/host/mmci.c | 22 ++++++++++++++++++++--
include/linux/mmc/card.h | 1 +
6 files changed, 65 insertions(+), 8 deletions(-)
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 95fedcf56e4a..e08dd9ea3d46 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -232,7 +232,7 @@ static void __mmc_start_request(struct mmc_host *host, struct mmc_request *mrq)
* And bypass I/O abort, reset and bus suspend operations.
*/
if (sdio_is_io_busy(mrq->cmd->opcode, mrq->cmd->arg) &&
- host->ops->card_busy) {
+ mmc_hw_busy_detect(host)) {
int tries = 500; /* Wait aprox 500ms at maximum */
while (host->ops->card_busy(host) && --tries)
@@ -1200,7 +1200,7 @@ int mmc_set_uhs_voltage(struct mmc_host *host, u32 ocr)
*/
if (!host->ops->start_signal_voltage_switch)
return -EPERM;
- if (!host->ops->card_busy)
+ if (!mmc_hw_busy_detect(host))
pr_warn("%s: cannot verify signal voltage switch\n",
mmc_hostname(host));
@@ -1220,7 +1220,7 @@ int mmc_set_uhs_voltage(struct mmc_host *host, u32 ocr)
* after the response of cmd11, but wait 1 ms to be sure
*/
mmc_delay(1);
- if (host->ops->card_busy && !host->ops->card_busy(host)) {
+ if (mmc_hw_busy_detect(host) && !host->ops->card_busy(host)) {
err = -EAGAIN;
goto power_cycle;
}
@@ -1241,7 +1241,7 @@ int mmc_set_uhs_voltage(struct mmc_host *host, u32 ocr)
* Failure to switch is indicated by the card holding
* dat[0:3] low
*/
- if (host->ops->card_busy && host->ops->card_busy(host))
+ if (mmc_hw_busy_detect(host) && host->ops->card_busy(host))
err = -EAGAIN;
power_cycle:
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index 0c4de2030b3f..6a5619eed4a6 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -181,4 +181,21 @@ static inline int mmc_flush_cache(struct mmc_host *host)
return 0;
}
+/**
+ * mmc_hw_busy_detect() - Can we use hw busy detection?
+ * @host: the host in question
+ */
+static inline bool mmc_hw_busy_detect(struct mmc_host *host)
+{
+ struct mmc_card *card = host->card;
+ bool has_ops;
+ bool able = true;
+
+ has_ops = (host->ops->card_busy != NULL);
+ if (card)
+ able = !(card->quirks & MMC_QUIRK_BROKEN_HW_BUSY_DETECT);
+
+ return (has_ops && able);
+}
+
#endif
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 973756ed4016..546fc799a8e5 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -435,7 +435,7 @@ static int mmc_busy_cb(void *cb_data, bool *busy)
u32 status = 0;
int err;
- if (host->ops->card_busy) {
+ if (mmc_hw_busy_detect(host)) {
*busy = host->ops->card_busy(host);
return 0;
}
@@ -597,7 +597,7 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
* when it's not allowed to poll by using CMD13, then we need to rely on
* waiting the stated timeout to be sufficient.
*/
- if (!send_status && !host->ops->card_busy) {
+ if (!send_status && !mmc_hw_busy_detect(host)) {
mmc_delay(timeout_ms);
goto out_tim;
}
diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h
index d68e6e513a4f..8da6526f0eb0 100644
--- a/drivers/mmc/core/quirks.h
+++ b/drivers/mmc/core/quirks.h
@@ -99,6 +99,27 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = {
MMC_FIXUP("V10016", CID_MANFID_KINGSTON, CID_OEMID_ANY, add_quirk_mmc,
MMC_QUIRK_TRIM_BROKEN),
+ /*
+ * Some older Samsung eMMCs have broken hardware busy detection.
+ * Enabling this feature in the host controller can make the card
+ * accesses lock up completely.
+ */
+ MMC_FIXUP("4YMD1R", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+ MMC_QUIRK_BROKEN_HW_BUSY_DETECT),
+ /* Samsung KLMxGxxE4x eMMCs from 2012: 4, 8, 16, 32 and 64 GB */
+ MMC_FIXUP("M4G1YC", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+ MMC_QUIRK_BROKEN_HW_BUSY_DETECT),
+ MMC_FIXUP("M8G1WA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+ MMC_QUIRK_BROKEN_HW_BUSY_DETECT),
+ MMC_FIXUP("MAG2WA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+ MMC_QUIRK_BROKEN_HW_BUSY_DETECT),
+ MMC_FIXUP("MBG4WA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+ MMC_QUIRK_BROKEN_HW_BUSY_DETECT),
+ MMC_FIXUP("MAG2WA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+ MMC_QUIRK_BROKEN_HW_BUSY_DETECT),
+ MMC_FIXUP("MCG8WA", CID_MANFID_SAMSUNG, CID_OEMID_ANY, add_quirk_mmc,
+ MMC_QUIRK_BROKEN_HW_BUSY_DETECT),
+
END_FIXUP
};
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 984d35055156..3046917b2b67 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -347,6 +347,24 @@ static int mmci_card_busy(struct mmc_host *mmc)
return busy;
}
+/* Use this if the MMCI variant AND the card supports it */
+static bool mmci_use_busy_detect(struct mmci_host *host)
+{
+ struct mmc_card *card = host->mmc->card;
+
+ if (!host->variant->busy_detect)
+ return false;
+
+ /* We don't allow this until we know that the card can handle it */
+ if (!card)
+ return false;
+
+ if (card->quirks & MMC_QUIRK_BROKEN_HW_BUSY_DETECT)
+ return false;
+
+ return true;
+}
+
static void mmci_reg_delay(struct mmci_host *host)
{
/*
@@ -1381,7 +1399,7 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd,
return;
/* Handle busy detection on DAT0 if the variant supports it. */
- if (busy_resp && host->variant->busy_detect)
+ if (busy_resp && mmci_use_busy_detect(host))
if (!host->ops->busy_complete(host, status, err_msk))
return;
@@ -1725,7 +1743,7 @@ static void mmci_set_max_busy_timeout(struct mmc_host *mmc)
struct mmci_host *host = mmc_priv(mmc);
u32 max_busy_timeout = 0;
- if (!host->variant->busy_detect)
+ if (!mmci_use_busy_detect(host))
return;
if (host->variant->busy_timeout && mmc->actual_clock)
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 74e6c0624d27..525a39951c6d 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -280,6 +280,7 @@ struct mmc_card {
/* for byte mode */
#define MMC_QUIRK_NONSTD_SDIO (1<<2) /* non-standard SDIO card attached */
/* (missing CIA registers) */
+#define MMC_QUIRK_BROKEN_HW_BUSY_DETECT (1<<3) /* Disable hardware busy detection on DAT0 */
#define MMC_QUIRK_NONSTD_FUNC_IF (1<<4) /* SDIO card has nonstd function interfaces */
#define MMC_QUIRK_DISABLE_CD (1<<5) /* disconnect CD/DAT[3] resistor */
#define MMC_QUIRK_INAND_CMD38 (1<<6) /* iNAND devices have broken CMD38 */
--
2.31.1