On Mon, Nov 11, 2024 at 07:32:44PM +0530, Jyothi Kumar Seerapu wrote:
The I2C driver gets an interrupt upon transfer completion. For multiple messages in a single transfer, N interrupts will be received for N messages, leading to significant software interrupt latency. To mitigate this latency, utilize Block Event Interrupt (BEI)
Please rewrite this to the tone that the reader doesn't know what Block Event Interrupt is, or that it exists.
only when an interrupt is necessary. This means large transfers can be split into multiple chunks of 8 messages internally, without expecting interrupts for the first 7 message completions, only the last one will trigger an interrupt indicating 8 messages completed.
By implementing BEI, multi-message transfers can be divided into chunks of 8 messages, improving overall transfer time.
You already wrote this in the paragraph above.
Where is this number 8 coming from btw?
This optimization reduces transfer time from 168 ms to 48 ms for a series of 200 I2C write messages in a single transfer, with a clock frequency support of 100 kHz.
BEI optimizations are currently implemented for I2C write transfers only, as there is no use case for multiple I2C read messages in a single transfer at this time.
Signed-off-by: Jyothi Kumar Seerapu quic_jseerapu@quicinc.com
v1 -> v2:
- Moved gi2c_gpi_xfer->msg_idx_cnt to separate local variable.
- Updated goto labels for error scenarios in geni_i2c_gpi function
- memset tx_multi_xfer to 0.
- Removed passing current msg index to geni_i2c_gpi.
- Fixed kernel test robot reported compilation issues.
drivers/i2c/busses/i2c-qcom-geni.c | 203 +++++++++++++++++++++++++---- 1 file changed, 178 insertions(+), 25 deletions(-)
diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 7a22e1f46e60..04a7d926dadc 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -100,6 +100,10 @@ struct geni_i2c_dev { struct dma_chan *rx_c; bool gpi_mode; bool abort_done;
- bool is_tx_multi_xfer;
- u32 num_msgs;
- u32 tx_irq_cnt;
- struct gpi_i2c_config *gpi_config;
}; struct geni_i2c_desc { @@ -500,6 +504,7 @@ static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, static void i2c_gpi_cb_result(void *cb, const struct dmaengine_result *result) { struct geni_i2c_dev *gi2c = cb;
- struct gpi_multi_xfer *tx_multi_xfer;
if (result->result != DMA_TRANS_NOERROR) { dev_err(gi2c->se.dev, "DMA txn failed:%d\n", result->result); @@ -508,7 +513,21 @@ static void i2c_gpi_cb_result(void *cb, const struct dmaengine_result *result) dev_dbg(gi2c->se.dev, "DMA xfer has pending: %d\n", result->residue); }
- complete(&gi2c->done);
- if (gi2c->is_tx_multi_xfer) {
Wouldn't it be cleaner to treat the !is_tx_multi_xfer case as a multi-xfer of length 1?
tx_multi_xfer = &gi2c->gpi_config->multi_xfer;
/*
* Send Completion for last message or multiple of NUM_MSGS_PER_IRQ.
*/
if ((tx_multi_xfer->irq_msg_cnt == gi2c->num_msgs - 1) ||
(!((tx_multi_xfer->irq_msg_cnt + 1) % NUM_MSGS_PER_IRQ))) {
tx_multi_xfer->irq_cnt++;
complete(&gi2c->done);
Why? You're removing the wait_for_completion_timeout() from geni_i2c_gpi_xfer() when is_tx_multi_xfer is set.
}
tx_multi_xfer->irq_msg_cnt++;
- } else {
complete(&gi2c->done);
- }
} static void geni_i2c_gpi_unmap(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, @@ -526,7 +545,42 @@ static void geni_i2c_gpi_unmap(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, } } -static int geni_i2c_gpi(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, +/**
- gpi_i2c_multi_desc_unmap() - unmaps the buffers post multi message TX transfers
- @dev: pointer to the corresponding dev node
- @gi2c: i2c dev handle
- @msgs: i2c messages array
- @peripheral: pointer to the gpi_i2c_config
- */
+static void gpi_i2c_multi_desc_unmap(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[],
struct gpi_i2c_config *peripheral)
+{
- u32 msg_xfer_cnt, wr_idx = 0;
- struct gpi_multi_xfer *tx_multi_xfer = &peripheral->multi_xfer;
- /*
* In error case, need to unmap all messages based on the msg_idx_cnt.
* Non-error case unmap all the processed messages.
What is the benefit of this optimization, compared to keeping things simple and just unmap all buffers at the end of geni_i2c_gpi_xfer()?
*/
- if (gi2c->err)
msg_xfer_cnt = tx_multi_xfer->msg_idx_cnt;
- else
msg_xfer_cnt = tx_multi_xfer->irq_cnt * NUM_MSGS_PER_IRQ;
- /* Unmap the processed DMA buffers based on the received interrupt count */
- for (; tx_multi_xfer->unmap_msg_cnt < msg_xfer_cnt; tx_multi_xfer->unmap_msg_cnt++) {
if (tx_multi_xfer->unmap_msg_cnt == gi2c->num_msgs)
break;
wr_idx = tx_multi_xfer->unmap_msg_cnt % QCOM_GPI_MAX_NUM_MSGS;
geni_i2c_gpi_unmap(gi2c, &msgs[tx_multi_xfer->unmap_msg_cnt],
tx_multi_xfer->dma_buf[wr_idx],
tx_multi_xfer->dma_addr[wr_idx],
NULL, (dma_addr_t)NULL);
tx_multi_xfer->freed_msg_cnt++;
- }
+}
+static int geni_i2c_gpi(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], struct dma_slave_config *config, dma_addr_t *dma_addr_p, void **buf, unsigned int op, struct dma_chan *dma_chan) { @@ -538,26 +592,48 @@ static int geni_i2c_gpi(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, enum dma_transfer_direction dma_dirn; struct dma_async_tx_descriptor *desc; int ret;
- struct gpi_multi_xfer *gi2c_gpi_xfer;
- dma_cookie_t cookie;
- u32 msg_idx;
peripheral = config->peripheral_config;
- dma_buf = i2c_get_dma_safe_msg_buf(msg, 1);
- if (!dma_buf)
return -ENOMEM;
- gi2c_gpi_xfer = &peripheral->multi_xfer;
- dma_buf = gi2c_gpi_xfer->dma_buf[gi2c_gpi_xfer->buf_idx];
- addr = gi2c_gpi_xfer->dma_addr[gi2c_gpi_xfer->buf_idx];
- msg_idx = gi2c_gpi_xfer->msg_idx_cnt;
- dma_buf = i2c_get_dma_safe_msg_buf(&msgs[msg_idx], 1);
- if (!dma_buf) {
ret = -ENOMEM;
goto out;
- }
if (op == I2C_WRITE) map_dirn = DMA_TO_DEVICE; else map_dirn = DMA_FROM_DEVICE;
- addr = dma_map_single(gi2c->se.dev->parent, dma_buf, msg->len, map_dirn);
- addr = dma_map_single(gi2c->se.dev->parent, dma_buf,
if (dma_mapping_error(gi2c->se.dev->parent, addr)) {msgs[msg_idx].len, map_dirn);
i2c_put_dma_safe_msg_buf(dma_buf, msg, false);
return -ENOMEM;
i2c_put_dma_safe_msg_buf(dma_buf, &msgs[msg_idx], false);
ret = -ENOMEM;
goto out;
- }
- if (gi2c->is_tx_multi_xfer) {
if (((msg_idx + 1) % NUM_MSGS_PER_IRQ))
peripheral->flags |= QCOM_GPI_BLOCK_EVENT_IRQ;
else
peripheral->flags &= ~QCOM_GPI_BLOCK_EVENT_IRQ;
/* BEI bit to be cleared for last TRE */
if (msg_idx == gi2c->num_msgs - 1)
}peripheral->flags &= ~QCOM_GPI_BLOCK_EVENT_IRQ;
/* set the length as message for rx txn */
- peripheral->rx_len = msg->len;
- peripheral->rx_len = msgs[msg_idx].len; peripheral->op = op;
ret = dmaengine_slave_config(dma_chan, config); @@ -575,7 +651,8 @@ static int geni_i2c_gpi(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, else dma_dirn = DMA_DEV_TO_MEM;
- desc = dmaengine_prep_slave_single(dma_chan, addr, msg->len, dma_dirn, flags);
- desc = dmaengine_prep_slave_single(dma_chan, addr, msgs[msg_idx].len,
if (!desc) { dev_err(gi2c->se.dev, "prep_slave_sg failed\n"); ret = -EIO;dma_dirn, flags);
@@ -585,15 +662,48 @@ static int geni_i2c_gpi(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, desc->callback_result = i2c_gpi_cb_result; desc->callback_param = gi2c;
- dmaengine_submit(desc);
- *buf = dma_buf;
- *dma_addr_p = addr;
- if (!((msgs[msg_idx].flags & I2C_M_RD) && op == I2C_WRITE)) {
gi2c_gpi_xfer->msg_idx_cnt++;
gi2c_gpi_xfer->buf_idx = (msg_idx + 1) % QCOM_GPI_MAX_NUM_MSGS;
- }
- cookie = dmaengine_submit(desc);
- if (dma_submit_error(cookie)) {
dev_err(gi2c->se.dev,
"%s: dmaengine_submit failed (%d)\n", __func__, cookie);
ret = -EINVAL;
goto err_config;
- }
- if (gi2c->is_tx_multi_xfer) {
dma_async_issue_pending(gi2c->tx_c);
if ((msg_idx == (gi2c->num_msgs - 1)) ||
(gi2c_gpi_xfer->msg_idx_cnt >=
QCOM_GPI_MAX_NUM_MSGS + gi2c_gpi_xfer->freed_msg_cnt)) {
ret = gpi_multi_desc_process(gi2c->se.dev, gi2c_gpi_xfer,
A function call straight into the GPI driver? I'm not entirely familiar with the details of the dmaengine API, but this doesn't look correct.
gi2c->num_msgs, XFER_TIMEOUT,
&gi2c->done);
if (ret) {
dev_err(gi2c->se.dev,
"I2C multi write msg transfer timeout: %d\n",
ret);
gi2c->err = ret;
goto err_config;
}
}
- } else {
/* Non multi descriptor message transfer */
*buf = dma_buf;
*dma_addr_p = addr;
- } return 0;
err_config:
- dma_unmap_single(gi2c->se.dev->parent, addr, msg->len, map_dirn);
- i2c_put_dma_safe_msg_buf(dma_buf, msg, false);
- dma_unmap_single(gi2c->se.dev->parent, addr,
msgs[msg_idx].len, map_dirn);
- i2c_put_dma_safe_msg_buf(dma_buf, &msgs[msg_idx], false);
+out:
- gi2c->err = ret; return ret;
} @@ -605,6 +715,7 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i unsigned long time_left; dma_addr_t tx_addr, rx_addr; void *tx_buf = NULL, *rx_buf = NULL;
- struct gpi_multi_xfer *tx_multi_xfer; const struct geni_i2c_clk_fld *itr = gi2c->clk_fld;
config.peripheral_config = &peripheral; @@ -618,6 +729,34 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i peripheral.set_config = 1; peripheral.multi_msg = false;
- gi2c->gpi_config = &peripheral;
- gi2c->num_msgs = num;
- gi2c->is_tx_multi_xfer = false;
- gi2c->tx_irq_cnt = 0;
- tx_multi_xfer = &peripheral.multi_xfer;
- memset(tx_multi_xfer, 0, sizeof(struct gpi_multi_xfer));
- /*
* If number of write messages are four and higher then
Why four?
* configure hardware for multi descriptor transfers with BEI.
*/
- if (num >= MIN_NUM_OF_MSGS_MULTI_DESC) {
gi2c->is_tx_multi_xfer = true;
for (i = 0; i < num; i++) {
if (msgs[i].flags & I2C_M_RD) {
/*
* Multi descriptor transfer with BEI
* support is enabled for write transfers.
* Add BEI optimization support for read
* transfers later.
Prefix this comment with "TODO:"
*/
gi2c->is_tx_multi_xfer = false;
break;
}
}
- }
- for (i = 0; i < num; i++) { gi2c->cur = &msgs[i]; gi2c->err = 0;
@@ -628,14 +767,16 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i peripheral.stretch = 1; peripheral.addr = msgs[i].addr;
if (i > 0 && (!(msgs[i].flags & I2C_M_RD)))
peripheral.multi_msg = false;
ret = geni_i2c_gpi(gi2c, &msgs[i], &config,
if (ret) goto err;ret = geni_i2c_gpi(gi2c, msgs, &config, &tx_addr, &tx_buf, I2C_WRITE, gi2c->tx_c);
if (msgs[i].flags & I2C_M_RD) {
ret = geni_i2c_gpi(gi2c, &msgs[i], &config,
ret = geni_i2c_gpi(gi2c, msgs, &config, &rx_addr, &rx_buf, I2C_READ, gi2c->rx_c); if (ret) goto err;
@@ -643,18 +784,26 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i dma_async_issue_pending(gi2c->rx_c); }
dma_async_issue_pending(gi2c->tx_c);
time_left = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT);
if (!time_left)
gi2c->err = -ETIMEDOUT;
if (!gi2c->is_tx_multi_xfer) {
dma_async_issue_pending(gi2c->tx_c);
time_left = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT);
By making this conditional on !is_tx_multi_xfer transfers, what makes the loop wait for the transfer to complete before you below unmap the buffers?
if (!time_left) {
dev_err(gi2c->se.dev, "%s:I2C timeout\n", __func__);
gi2c->err = -ETIMEDOUT;
}
}
if (gi2c->err) { ret = gi2c->err; goto err; }
geni_i2c_gpi_unmap(gi2c, &msgs[i], tx_buf, tx_addr, rx_buf, rx_addr);
if (!gi2c->is_tx_multi_xfer) {
geni_i2c_gpi_unmap(gi2c, &msgs[i], tx_buf, tx_addr, rx_buf, rx_addr);
} else if (gi2c->tx_irq_cnt != tx_multi_xfer->irq_cnt) {
gi2c->tx_irq_cnt = tx_multi_xfer->irq_cnt;
gpi_i2c_multi_desc_unmap(gi2c, msgs, &peripheral);
}}
return num; @@ -663,7 +812,11 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i dev_err(gi2c->se.dev, "GPI transfer failed: %d\n", ret); dmaengine_terminate_sync(gi2c->rx_c); dmaengine_terminate_sync(gi2c->tx_c);
- geni_i2c_gpi_unmap(gi2c, &msgs[i], tx_buf, tx_addr, rx_buf, rx_addr);
- if (gi2c->is_tx_multi_xfer)
gpi_i2c_multi_desc_unmap(gi2c, msgs, &peripheral);
- else
geni_i2c_gpi_unmap(gi2c, &msgs[i], tx_buf, tx_addr, rx_buf, rx_addr);
As above, it would be nice if multi-xfer was just a special case with a single buffer; rather than inflating the cyclomatic complexity.
Regards, Bjorn
return ret; } -- 2.17.1
linaro-mm-sig@lists.linaro.org