The reason for this is to properly support the spi nor chip on the Jetson Xavier NX module. Prior to this, it would time out on all transfers and sometimes even trigger a cbb fault, locking up the entire unit. With this, reading and writing to the flash memory works as expected.
This also fixes the tegra210-quad spi driver to properly use the dma memory space instead of the spi controllers. Without this, enabling dma on the controllers results in mmu faults.
The driver change has only been tested on tegra210 / p3450 and tegra194 / p3518 as that is the only available test platforms. Tegra234 and Tegra241 should also be verified. I have p3766 for tegra234, but the qspi flash memory is firewalled by mb1 on all publicly available bootloaders, and no other spi devices are part of the devkit.
--- Changes in v2: - Drop bindings patches - Add patch to use dma memory space instead of the spi controllers when dma is enabled. - Drop iommu properties from final patch - Link to v1: https://lore.kernel.org/r/20260515-tegra194-qspi-iommu-v1-0-57dfb63cd3d6@gma...
--- Aaron Kling (2): spi: tegra210-quad: Allocate DMA memory for DMA engine arm64: tegra: Enable DMA Support on Tegra194 QSPI
arch/arm64/boot/dts/nvidia/tegra194.dtsi | 4 ++++ drivers/spi/spi-tegra210-quad.c | 29 ++++++++++++++++++----------- 2 files changed, 22 insertions(+), 11 deletions(-) --- base-commit: c1ecb239fa3456529a32255359fc78b69eb9d847 change-id: 20260515-tegra194-qspi-iommu-e4e4644d5fdf
Best regards,
From: Aaron Kling webgeek1234@gmail.com
When the SPI controllers are running in DMA mode, it is the DMA engine that performs the memory accesses rather than the SPI controller. Pass the DMA engine's struct device pointer to the DMA API to make sure the correct DMA operations are used.
Suggested-by: Thierry Reding treding@nvidia.com Signed-off-by: Aaron Kling webgeek1234@gmail.com --- drivers/spi/spi-tegra210-quad.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-)
diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index db28dd556484b2..588a929a97850a 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -226,11 +226,13 @@ struct tegra_qspi { struct completion xfer_completion; struct spi_transfer *curr_xfer;
+ struct device *rx_dma_dev; struct dma_chan *rx_dma_chan; u32 *rx_dma_buf; dma_addr_t rx_dma_phys; struct dma_async_tx_descriptor *rx_dma_desc;
+ struct device *tx_dma_dev; struct dma_chan *tx_dma_chan; u32 *tx_dma_buf; dma_addr_t tx_dma_phys; @@ -574,15 +576,15 @@ static int tegra_qspi_dma_map_xfer(struct tegra_qspi *tqspi, struct spi_transfer len = DIV_ROUND_UP(tqspi->curr_dma_words * tqspi->bytes_per_word, 4) * 4;
if (t->tx_buf) { - t->tx_dma = dma_map_single(tqspi->dev, (void *)tx_buf, len, DMA_TO_DEVICE); - if (dma_mapping_error(tqspi->dev, t->tx_dma)) + t->tx_dma = dma_map_single(tqspi->tx_dma_dev, (void *)tx_buf, len, DMA_TO_DEVICE); + if (dma_mapping_error(tqspi->tx_dma_dev, t->tx_dma)) return -ENOMEM; }
if (t->rx_buf) { - t->rx_dma = dma_map_single(tqspi->dev, (void *)rx_buf, len, DMA_FROM_DEVICE); - if (dma_mapping_error(tqspi->dev, t->rx_dma)) { - dma_unmap_single(tqspi->dev, t->tx_dma, len, DMA_TO_DEVICE); + t->rx_dma = dma_map_single(tqspi->rx_dma_dev, (void *)rx_buf, len, DMA_FROM_DEVICE); + if (dma_mapping_error(tqspi->rx_dma_dev, t->rx_dma)) { + dma_unmap_single(tqspi->tx_dma_dev, t->tx_dma, len, DMA_TO_DEVICE); return -ENOMEM; } } @@ -597,9 +599,9 @@ static void tegra_qspi_dma_unmap_xfer(struct tegra_qspi *tqspi, struct spi_trans len = DIV_ROUND_UP(tqspi->curr_dma_words * tqspi->bytes_per_word, 4) * 4;
if (t->tx_buf) - dma_unmap_single(tqspi->dev, t->tx_dma, len, DMA_TO_DEVICE); + dma_unmap_single(tqspi->tx_dma_dev, t->tx_dma, len, DMA_TO_DEVICE); if (t->rx_buf) - dma_unmap_single(tqspi->dev, t->rx_dma, len, DMA_FROM_DEVICE); + dma_unmap_single(tqspi->rx_dma_dev, t->rx_dma, len, DMA_FROM_DEVICE); }
static int tegra_qspi_start_dma_based_transfer(struct tegra_qspi *tqspi, struct spi_transfer *t) @@ -745,7 +747,7 @@ static int tegra_qspi_start_cpu_based_transfer(struct tegra_qspi *qspi, struct s static void tegra_qspi_deinit_dma(struct tegra_qspi *tqspi) { if (tqspi->tx_dma_buf) { - dma_free_coherent(tqspi->dev, tqspi->dma_buf_size, + dma_free_coherent(tqspi->tx_dma_dev, tqspi->dma_buf_size, tqspi->tx_dma_buf, tqspi->tx_dma_phys); tqspi->tx_dma_buf = NULL; } @@ -756,7 +758,7 @@ static void tegra_qspi_deinit_dma(struct tegra_qspi *tqspi) }
if (tqspi->rx_dma_buf) { - dma_free_coherent(tqspi->dev, tqspi->dma_buf_size, + dma_free_coherent(tqspi->rx_dma_dev, tqspi->dma_buf_size, tqspi->rx_dma_buf, tqspi->rx_dma_phys); tqspi->rx_dma_buf = NULL; } @@ -782,6 +784,7 @@ static int tegra_qspi_init_dma(struct tegra_qspi *tqspi) }
tqspi->rx_dma_chan = dma_chan; + tqspi->rx_dma_dev = dmaengine_get_dma_device(tqspi->rx_dma_chan);
dma_chan = dma_request_chan(tqspi->dev, "tx"); if (IS_ERR(dma_chan)) { @@ -790,15 +793,19 @@ static int tegra_qspi_init_dma(struct tegra_qspi *tqspi) }
tqspi->tx_dma_chan = dma_chan; + tqspi->tx_dma_dev = dmaengine_get_dma_device(tqspi->tx_dma_chan); } else { if (!device_iommu_mapped(tqspi->dev)) { dev_warn(tqspi->dev, "IOMMU not enabled in device-tree, falling back to PIO mode\n"); return 0; } + + tqspi->rx_dma_dev = tqspi->dev; + tqspi->tx_dma_dev = tqspi->dev; }
- dma_buf = dma_alloc_coherent(tqspi->dev, tqspi->dma_buf_size, &dma_phys, GFP_KERNEL); + dma_buf = dma_alloc_coherent(tqspi->rx_dma_dev, tqspi->dma_buf_size, &dma_phys, GFP_KERNEL); if (!dma_buf) { err = -ENOMEM; goto err_out; @@ -807,7 +814,7 @@ static int tegra_qspi_init_dma(struct tegra_qspi *tqspi) tqspi->rx_dma_buf = dma_buf; tqspi->rx_dma_phys = dma_phys;
- dma_buf = dma_alloc_coherent(tqspi->dev, tqspi->dma_buf_size, &dma_phys, GFP_KERNEL); + dma_buf = dma_alloc_coherent(tqspi->tx_dma_dev, tqspi->dma_buf_size, &dma_phys, GFP_KERNEL); if (!dma_buf) { err = -ENOMEM; goto err_out;
From: Aaron Kling webgeek1234@gmail.com
Without dma enabled pio mode is used and flash storage such as the one on the p3668 module times out and cannot complete any transfers. In some cases, these timeouts cause hangs and cbb faults.
Signed-off-by: Aaron Kling webgeek1234@gmail.com --- arch/arm64/boot/dts/nvidia/tegra194.dtsi | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 1d659454a6f9fe..e2ddbc6715d5e8 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -923,6 +923,8 @@ spi@3270000 { <&bpmp TEGRA194_CLK_QSPI0_PM>; clock-names = "qspi", "qspi_out"; resets = <&bpmp TEGRA194_RESET_QSPI0>; + dmas = <&gpcdma 5>, <&gpcdma 5>; + dma-names = "rx", "tx"; status = "disabled"; };
@@ -1013,6 +1015,8 @@ spi@3300000 { <&bpmp TEGRA194_CLK_QSPI1_PM>; clock-names = "qspi", "qspi_out"; resets = <&bpmp TEGRA194_RESET_QSPI1>; + dmas = <&gpcdma 6>, <&gpcdma 6>; + dma-names = "rx", "tx"; status = "disabled"; };
linaro-mm-sig@lists.linaro.org