The BAM has 3 channels - tx, rx and command. command channel
is used for register read/writes, tx channel for data writes
and rx channel for data reads. Currently, the driver assumes the
transfer completion once it gets all the command descriptors
completed. Sometimes, there is race condition between data channel
(tx/rx) and command channel completion. In these cases,
the data present in buffer is not valid during small window
between command descriptor completion and data descriptor
completion.
This patch generates NAND transfer completion when both
(Data and Command) DMA channels have completed all its DMA
descriptors. It assigns completion callback in last
DMA descriptors of that channel and wait for completion.
Fixes: 8d6b6d7e135e ("mtd: nand: qcom: support for command descriptor formation")
Cc: stable(a)vger.kernel.org
Acked-by: Miquel Raynal <miquel.raynal(a)bootlin.com>
Signed-off-by: Abhishek Sahu <absahu(a)codeaurora.org>
---
* Changes from v3:
1. NONE
* Changes from v2:
1. Changed commit message and comments slightly
2. Renamed wait_second_completion from first_chan_done and set
it before submit desc
3. Mark for stable tree
* Changes from v1:
NONE
drivers/mtd/nand/raw/qcom_nandc.c | 53 ++++++++++++++++++++++++++++++++++++++-
1 file changed, 52 insertions(+), 1 deletion(-)
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index 2375780..fc20149 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -213,6 +213,8 @@
#define QPIC_PER_CW_CMD_SGL 32
#define QPIC_PER_CW_DATA_SGL 8
+#define QPIC_NAND_COMPLETION_TIMEOUT msecs_to_jiffies(2000)
+
/*
* Flags used in DMA descriptor preparation helper functions
* (i.e. read_reg_dma/write_reg_dma/read_data_dma/write_data_dma)
@@ -245,6 +247,11 @@
* @tx_sgl_start - start index in data sgl for tx.
* @rx_sgl_pos - current index in data sgl for rx.
* @rx_sgl_start - start index in data sgl for rx.
+ * @wait_second_completion - wait for second DMA desc completion before making
+ * the NAND transfer completion.
+ * @txn_done - completion for NAND transfer.
+ * @last_data_desc - last DMA desc in data channel (tx/rx).
+ * @last_cmd_desc - last DMA desc in command channel.
*/
struct bam_transaction {
struct bam_cmd_element *bam_ce;
@@ -258,6 +265,10 @@ struct bam_transaction {
u32 tx_sgl_start;
u32 rx_sgl_pos;
u32 rx_sgl_start;
+ bool wait_second_completion;
+ struct completion txn_done;
+ struct dma_async_tx_descriptor *last_data_desc;
+ struct dma_async_tx_descriptor *last_cmd_desc;
};
/*
@@ -504,6 +515,8 @@ static void free_bam_transaction(struct qcom_nand_controller *nandc)
bam_txn->data_sgl = bam_txn_buf;
+ init_completion(&bam_txn->txn_done);
+
return bam_txn;
}
@@ -523,11 +536,33 @@ static void clear_bam_transaction(struct qcom_nand_controller *nandc)
bam_txn->tx_sgl_start = 0;
bam_txn->rx_sgl_pos = 0;
bam_txn->rx_sgl_start = 0;
+ bam_txn->last_data_desc = NULL;
+ bam_txn->wait_second_completion = false;
sg_init_table(bam_txn->cmd_sgl, nandc->max_cwperpage *
QPIC_PER_CW_CMD_SGL);
sg_init_table(bam_txn->data_sgl, nandc->max_cwperpage *
QPIC_PER_CW_DATA_SGL);
+
+ reinit_completion(&bam_txn->txn_done);
+}
+
+/* Callback for DMA descriptor completion */
+static void qpic_bam_dma_done(void *data)
+{
+ struct bam_transaction *bam_txn = data;
+
+ /*
+ * In case of data transfer with NAND, 2 callbacks will be generated.
+ * One for command channel and another one for data channel.
+ * If current transaction has data descriptors
+ * (i.e. wait_second_completion is true), then set this to false
+ * and wait for second DMA descriptor completion.
+ */
+ if (bam_txn->wait_second_completion)
+ bam_txn->wait_second_completion = false;
+ else
+ complete(&bam_txn->txn_done);
}
static inline struct qcom_nand_host *to_qcom_nand_host(struct nand_chip *chip)
@@ -756,6 +791,12 @@ static int prepare_bam_async_desc(struct qcom_nand_controller *nandc,
desc->dma_desc = dma_desc;
+ /* update last data/command descriptor */
+ if (chan == nandc->cmd_chan)
+ bam_txn->last_cmd_desc = dma_desc;
+ else
+ bam_txn->last_data_desc = dma_desc;
+
list_add_tail(&desc->node, &nandc->desc_list);
return 0;
@@ -1273,10 +1314,20 @@ static int submit_descs(struct qcom_nand_controller *nandc)
cookie = dmaengine_submit(desc->dma_desc);
if (nandc->props->is_bam) {
+ bam_txn->last_cmd_desc->callback = qpic_bam_dma_done;
+ bam_txn->last_cmd_desc->callback_param = bam_txn;
+ if (bam_txn->last_data_desc) {
+ bam_txn->last_data_desc->callback = qpic_bam_dma_done;
+ bam_txn->last_data_desc->callback_param = bam_txn;
+ bam_txn->wait_second_completion = true;
+ }
+
dma_async_issue_pending(nandc->tx_chan);
dma_async_issue_pending(nandc->rx_chan);
+ dma_async_issue_pending(nandc->cmd_chan);
- if (dma_sync_wait(nandc->cmd_chan, cookie) != DMA_COMPLETE)
+ if (!wait_for_completion_timeout(&bam_txn->txn_done,
+ QPIC_NAND_COMPLETION_TIMEOUT))
return -ETIMEDOUT;
} else {
if (dma_sync_wait(nandc->chan, cookie) != DMA_COMPLETE)
--
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc.
is a member of Code Aurora Forum, hosted by The Linux Foundation
The commit 719f6a7040f1bdaf96fcc ("printk: Use the main logbuf in NMI when
logbuf_lock is available") tried to detect when logbuf_lock was taken
on another CPU. Then it looked safe to wait for the lock even in NMI.
It would be safe if other locks were not involved. Ironically the same
commit introduced an ABBA deadlock scenario. It added a spin lock into
nmi_cpu_backtrace() to serialize logs from different CPUs. The effect
is that also the NMI handlers are serialized. As a result, logbuf_lock
might be blocked by NMI on another CPU:
CPU0 CPU1 CPU2
printk()
vprintk_emit()
spin_lock(&logbuf_lock)
trigger_all_cpu_backtrace()
raise()
nmi_enter()
printk_nmi_enter()
if (this_cpu_read(printk_context)
& PRINTK_SAFE_CONTEXT_MASK)
// false
else
// looks safe to use printk_deferred()
this_cpu_or(printk_context,
PRINTK_NMI_DEFERRED_CONTEXT_MASK);
nmi_cpu_backtrace()
arch_spin_lock(&lock);
show_regs()
nmi_enter()
nmi_cpu_backtrace()
arch_spin_lock(&lock);
printk()
vprintk_func()
vprintk_deferred()
vprintk_emit()
spin_lock(&logbuf_lock)
DEADLOCK: between &logbuf_lock from vprintk_emit() and
&lock from nmi_cpu_backtrace().
CPU0 CPU1
lock(logbuf_lock) lock(lock)
lock(lock) lock(logbuf_lock)
I have found this problem when stress testing trigger_all_cpu_backtrace()
and the system frozen.
Note that lockdep is not able to detect these dependencies because
there is no support for NMI context. Let's stay on the safe side
and always use printk_safe buffers when logbuf_lock is taken
when entering NMI.
Fixes: 719f6a7040f1bdaf96fcc ("printk: Use the main logbuf in NMI when logbuf_lock is available")
Cc: 4.13+ <stable(a)vger.kernel.org> # v4.13+
Signed-off-by: Petr Mladek <pmladek(a)suse.com>
---
kernel/printk/printk_safe.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index 449d67edfa4b..a2ebd749c053 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -310,15 +310,12 @@ void printk_nmi_enter(void)
{
/*
* The size of the extra per-CPU buffer is limited. Use it only when
- * the main one is locked. If this CPU is not in the safe context,
- * the lock must be taken on another CPU and we could wait for it.
+ * the main one is locked.
*/
- if ((this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) &&
- raw_spin_is_locked(&logbuf_lock)) {
+ if (raw_spin_is_locked(&logbuf_lock))
this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
- } else {
+ else
this_cpu_or(printk_context, PRINTK_NMI_DEFERRED_CONTEXT_MASK);
- }
}
void printk_nmi_exit(void)
--
2.13.6
SPC5r17 states that the contents of the ADDITIONAL LENGTH field are not
altered based on the allocation length, so always calculate and pack the
full key list length even if the list itself is truncated.
According to Maged:
Yes it fixes the "Storage Spaces Persistent Reservation" test in the
Windows 2016 Server Failover Cluster validation suites when having
many connections that result in more than 8 registrations. I tested
your patch on 4.17 with iblock.
This behaviour can be tested using the libiscsi PrinReadKeys.Truncate
test.
Cc: stable(a)vger.kernel.org
Signed-off-by: David Disseldorp <ddiss(a)suse.de>
Reviewed-by: Mike Christie <mchristi(a)redhat.com>
Tested-by: Maged Mokhtar <mmokhtar(a)petasan.org>
Reviewed-by: Christoph Hellwig <hch(a)lst.de>
---
Changes since v2:
* drop unnecessary braces
* add Christoph's Reviewed-by
Changes since v1:
* CC stable
* mention Maged's Windows PR test fix comment in commit message
* add Reviewed-by and Tested-by tags
drivers/target/target_core_pr.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 01ac306131c1..10db5656fd5d 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -3727,11 +3727,16 @@ core_scsi3_pri_read_keys(struct se_cmd *cmd)
* Check for overflow of 8byte PRI READ_KEYS payload and
* next reservation key list descriptor.
*/
- if ((add_len + 8) > (cmd->data_length - 8))
- break;
-
- put_unaligned_be64(pr_reg->pr_res_key, &buf[off]);
- off += 8;
+ if (off + 8 <= cmd->data_length) {
+ put_unaligned_be64(pr_reg->pr_res_key, &buf[off]);
+ off += 8;
+ }
+ /*
+ * SPC5r17: 6.16.2 READ KEYS service action
+ * The ADDITIONAL LENGTH field indicates the number of bytes in
+ * the Reservation key list. The contents of the ADDITIONAL
+ * LENGTH field are not altered based on the allocation length
+ */
add_len += 8;
}
spin_unlock(&dev->t10_pr.registration_lock);
--
2.13.7
Currently, when all modules, including VMCI and VMware balloon are built
into the kernel, the initialization of the balloon happens before the
VMCI is probed. As a result, the balloon fails to initialize the VMCI
doorbell, which it uses to get asynchronous requests for balloon size
changes.
The problem can be seen in the logs, in the form of the following
message:
"vmw_balloon: failed to initialize vmci doorbell"
The driver would work correctly but slightly less efficiently, probing
for requests periodically. This patch changes the balloon to be
initialized using late_initcall() instead of module_init() to address
this issue. It does not address a situation in which VMCI is built as a
module and the balloon is built into the kernel.
Fixes: 48e3d668b790 ("VMware balloon: Enable notification via VMCI")
Cc: stable(a)vger.kernel.org
Reviewed-by: Xavier Deguillard <xdeguillard(a)vmware.com>
Signed-off-by: Nadav Amit <namit(a)vmware.com>
---
drivers/misc/vmw_balloon.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index a7df4c24a28d..e7cfc85f6961 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -1297,7 +1297,14 @@ static int __init vmballoon_init(void)
return 0;
}
-module_init(vmballoon_init);
+
+/*
+ * Using late_initcall() instead of module_init() allows the balloon to use the
+ * VMCI doorbell even when the balloon is built into the kernel. Otherwise the
+ * VMCI is probed only after the balloon is initialized. If the balloon is used
+ * as a module, late_initcall() is equivalent to module_init().
+ */
+late_initcall(vmballoon_init);
static void __exit vmballoon_exit(void)
{
--
2.17.0
If the hypervisor sets 2MB batching is on, while batching is cleared,
the balloon code breaks. In this case the legacy mechanism is used with
2MB page. The VM would report a 2MB page is ballooned, and the
hypervisor would only take the first 4KB.
While the hypervisor should not report such settings, make the code more
robust by not enabling 2MB support without batching.
Fixes: 365bd7ef7ec8e ("VMware balloon: Support 2m page ballooning.")
Cc: stable(a)vger.kernel.org
Reviewed-by: Xavier Deguillard <xdeguillard(a)vmware.com>
Signed-off-by: Nadav Amit <nadav.amit(a)gmail.com>
---
drivers/misc/vmw_balloon.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 28e77ab1e136..60ab83d3d0ef 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -341,7 +341,13 @@ static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
success = false;
}
- if (b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS)
+ /*
+ * 2MB pages are only supported with batching. If batching is for some
+ * reason disabled, do not use 2MB pages, since otherwise the legacy
+ * mechanism is used with 2MB pages, causing a failure.
+ */
+ if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
+ (b->capabilities & VMW_BALLOON_BATCHED_CMDS))
b->supported_page_sizes = 2;
else
b->supported_page_sizes = 1;
--
2.17.0
The patch titled
Subject: slub: fix __kmem_cache_empty for !CONFIG_SLUB_DEBUG
has been added to the -mm tree. Its filename is
slub-fix-__kmem_cache_empty-for-config_slub_debug.patch
This patch should soon appear at
http://ozlabs.org/~akpm/mmots/broken-out/slub-fix-__kmem_cache_empty-for-co…
and later at
http://ozlabs.org/~akpm/mmotm/broken-out/slub-fix-__kmem_cache_empty-for-co…
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next and is updated
there every 3-4 working days
------------------------------------------------------
From: Shakeel Butt <shakeelb(a)google.com>
Subject: slub: fix __kmem_cache_empty for !CONFIG_SLUB_DEBUG
f9e13c0a5a33 ("slab, slub: skip unnecessary kasan_cache_shutdown()")
causes crashes when using slub, as described at
http://lkml.kernel.org/r/CAHmME9rtoPwxUSnktxzKso14iuVCWT7BE_-_8PAC=pGw1iJnQ…
For !CONFIG_SLUB_DEBUG, SLUB does not maintain the number of slabs
allocated per node for a kmem_cache. Thus, slabs_node() in
__kmem_cache_empty() will always return 0. So, in such situation, it is
required to check per-cpu slabs to make sure if a kmem_cache is empty or
not.
Please note that __kmem_cache_shutdown() and __kmem_cache_shrink() are not
affected by !CONFIG_SLUB_DEBUG as they call flush_all() to clear per-cpu
slabs.
Link: http://lkml.kernel.org/r/20180619213352.71740-1-shakeelb@google.com
Link: http://lkml.kernel.org/r/CAHmME9rtoPwxUSnktxzKso14iuVCWT7BE_-_8PAC=pGw1iJnQ…
Fixes: f9e13c0a5a33 ("slab, slub: skip unnecessary kasan_cache_shutdown()")
Signed-off-by: Shakeel Butt <shakeelb(a)google.com>
Reported-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Tested-by: Jason A. Donenfeld <Jason(a)zx2c4.com>
Cc: Christoph Lameter <cl(a)linux.com>
Cc: Pekka Enberg <penberg(a)kernel.org>
Cc: David Rientjes <rientjes(a)google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim(a)lge.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
mm/slub.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff -puN mm/slub.c~slub-fix-__kmem_cache_empty-for-config_slub_debug mm/slub.c
--- a/mm/slub.c~slub-fix-__kmem_cache_empty-for-config_slub_debug
+++ a/mm/slub.c
@@ -3673,9 +3673,23 @@ static void free_partial(struct kmem_cac
bool __kmem_cache_empty(struct kmem_cache *s)
{
- int node;
+ int cpu, node;
struct kmem_cache_node *n;
+ /*
+ * slabs_node will always be 0 for !CONFIG_SLUB_DEBUG. So, manually
+ * check slabs for all cpus.
+ */
+ if (!IS_ENABLED(CONFIG_SLUB_DEBUG)) {
+ for_each_online_cpu(cpu) {
+ struct kmem_cache_cpu *c;
+
+ c = per_cpu_ptr(s->cpu_slab, cpu);
+ if (c->page || slub_percpu_partial(c))
+ return false;
+ }
+ }
+
for_each_kmem_cache_node(s, node, n)
if (n->nr_partial || slabs_node(s, node))
return false;
_
Patches currently in -mm which might be from shakeelb(a)google.com are
slub-fix-__kmem_cache_empty-for-config_slub_debug.patch