This patch will lookup existing nodes instead of always creating them when dlm_midcomms_addr() is called. The idea is here to create midcomms nodes when user space getting informed that nodes joins the cluster. This is the case when dlm_midcomms_addr() is called, however it can be called multiple times by user space to add several address configurations to one node e.g. when using SCTP. Those multiple times need to be filtered out and we doing that by looking up if the node exists before. Due configfs entry it is safe that this function gets only called once at a time.
Cc: stable@vger.kernel.org Fixes: 63e711b08160 ("fs: dlm: create midcomms nodes when configure") Signed-off-by: Alexander Aring aahringo@redhat.com --- fs/dlm/midcomms.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index f641b36a36db..455265c6ba53 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c @@ -337,13 +337,21 @@ static struct midcomms_node *nodeid2node(int nodeid)
int dlm_midcomms_addr(int nodeid, struct sockaddr_storage *addr, int len) { - int ret, r = nodeid_hash(nodeid); + int ret, idx, r = nodeid_hash(nodeid); struct midcomms_node *node;
ret = dlm_lowcomms_addr(nodeid, addr, len); if (ret) return ret;
+ idx = srcu_read_lock(&nodes_srcu); + node = __find_node(nodeid, r); + if (node) { + srcu_read_unlock(&nodes_srcu, idx); + return 0; + } + srcu_read_unlock(&nodes_srcu, idx); + node = kmalloc(sizeof(*node), GFP_NOFS); if (!node) return -ENOMEM;
The idea of commit 63e711b08160 ("fs: dlm: create midcomms nodes when configure") is to set the midcomms node lifetime when a node joins or leaves the cluster. Currently we can hit the following warning:
[10844.611495] ------------[ cut here ]------------ [10844.615913] WARNING: CPU: 4 PID: 84304 at fs/dlm/midcomms.c:1263 dlm_midcomms_remove_member+0x13f/0x180 [dlm]
or running in a state where we hit a midcomms node usage count in a negative value:
[ 260.830782] node 2 users dec count -1
The first warning happens when the a specific node does not exists and it was probably removed but dlm_midcomms_close() which is called when a node leaves the cluster. The second kernel log message is probably in a case when dlm_midcomms_addr() is called when a joined the cluster but due fencing a node leaved the cluster without getting removed from the lockspace. If the node joins the cluster and it was removed from the cluster due fencing the first call is to remove the node from lockspaces triggered by the user space. In both cases if the node wasn't found or the user count is zero, we should ignore any additional midcomms handling of dlm_midcomms_remove_member().
Cc: stable@vger.kernel.org Fixes: 63e711b08160 ("fs: dlm: create midcomms nodes when configure") Signed-off-by: Alexander Aring aahringo@redhat.com --- fs/dlm/midcomms.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index 455265c6ba53..4ad71e97cec2 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c @@ -1268,12 +1268,23 @@ void dlm_midcomms_remove_member(int nodeid)
idx = srcu_read_lock(&nodes_srcu); node = nodeid2node(nodeid); - if (WARN_ON_ONCE(!node)) { + /* in case of dlm_midcomms_close() removes node */ + if (!node) { srcu_read_unlock(&nodes_srcu, idx); return; }
spin_lock(&node->state_lock); + /* case of dlm_midcomms_addr() created node but + * was not added before because dlm_midcomms_close() + * removed the node + */ + if (!node->users) { + spin_unlock(&node->state_lock); + srcu_read_unlock(&nodes_srcu, idx); + return; + } + node->users--; pr_debug("node %d users dec count %d\n", nodeid, node->users);
In case we running in a force shutdown in either midcomms or lowcomms implementation we will make sure we reset all per midcomms node information.
Cc: stable@vger.kernel.org Fixes: 63e711b08160 ("fs: dlm: create midcomms nodes when configure") Signed-off-by: Alexander Aring aahringo@redhat.com --- fs/dlm/midcomms.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index 4ad71e97cec2..6bc8d7f89b2c 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c @@ -1405,10 +1405,16 @@ void dlm_midcomms_shutdown(void) midcomms_shutdown(node); } } - srcu_read_unlock(&nodes_srcu, idx); - mutex_unlock(&close_lock);
dlm_lowcomms_shutdown(); + + for (i = 0; i < CONN_HASH_SIZE; i++) { + hlist_for_each_entry_rcu(node, &node_hash[i], hlist) { + midcomms_node_reset(node); + } + } + srcu_read_unlock(&nodes_srcu, idx); + mutex_unlock(&close_lock); }
int dlm_midcomms_close(int nodeid)
In case of an final DLM message we can't should not send an ack out after the final message. This patch moves the ack message before the messages will be transmitted. If it's the final message and the receiving node turns into DLM_CLOSED state another ack messages will being received and turning the receiving node into DLM_ESTABLISHED again.
Cc: stable@vger.kernel.org Fixes: 1696c75f1864 ("fs: dlm: add send ack threshold and append acks to msgs") Signed-off-by: Alexander Aring aahringo@redhat.com --- fs/dlm/midcomms.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index 6bc8d7f89b2c..2247ebb61be1 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c @@ -1038,15 +1038,15 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
break; case DLM_VERSION_3_2: + /* send ack back if necessary */ + dlm_send_ack_threshold(node, DLM_SEND_ACK_BACK_MSG_THRESHOLD); + msg = dlm_midcomms_get_msg_3_2(mh, nodeid, len, allocation, ppc); if (!msg) { dlm_free_mhandle(mh); goto err; } - - /* send ack back if necessary */ - dlm_send_ack_threshold(node, DLM_SEND_ACK_BACK_MSG_THRESHOLD); break; default: dlm_free_mhandle(mh);
This patch is fixing the following compiler warning when compiling with -Wformat-truncation:
fs/dlm/debug_fs.c:1031:50: warning: '_queued_asts' directive output may be truncated writing 12 bytes into a region of size between 8 and 72
We simple increase the additional amount of bytes to 13 because _queued_asts does not fit into 8 bytes when the whole lockspace name length is being used.
Fixes: 541adb0d4d10 ("fs: dlm: debugfs for queued callbacks") Signed-off-by: Alexander Aring aahringo@redhat.com --- fs/dlm/debug_fs.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 5aabcb6f0f15..698d6b7a20f8 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -20,6 +20,7 @@ #include "lock.h" #include "ast.h"
+#define DLM_DEBUG_NAME_LEN (DLM_LOCKSPACE_LEN + 13) #define DLM_DEBUG_BUF_LEN 4096 static char debug_buf[DLM_DEBUG_BUF_LEN]; static struct mutex debug_buf_lock; @@ -973,7 +974,7 @@ void dlm_delete_debug_comms_file(void *ctx)
void dlm_create_debug_file(struct dlm_ls *ls) { - char name[DLM_LOCKSPACE_LEN + 8]; + char name[DLM_DEBUG_NAME_LEN];
/* format 1 */
@@ -986,7 +987,7 @@ void dlm_create_debug_file(struct dlm_ls *ls) /* format 2 */
memset(name, 0, sizeof(name)); - snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_locks", ls->ls_name); + snprintf(name, DLM_DEBUG_NAME_LEN, "%s_locks", ls->ls_name);
ls->ls_debug_locks_dentry = debugfs_create_file(name, 0644, @@ -997,7 +998,7 @@ void dlm_create_debug_file(struct dlm_ls *ls) /* format 3 */
memset(name, 0, sizeof(name)); - snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_all", ls->ls_name); + snprintf(name, DLM_DEBUG_NAME_LEN, "%s_all", ls->ls_name);
ls->ls_debug_all_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, @@ -1008,7 +1009,7 @@ void dlm_create_debug_file(struct dlm_ls *ls) /* format 4 */
memset(name, 0, sizeof(name)); - snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_toss", ls->ls_name); + snprintf(name, DLM_DEBUG_NAME_LEN, "%s_toss", ls->ls_name);
ls->ls_debug_toss_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, @@ -1017,7 +1018,7 @@ void dlm_create_debug_file(struct dlm_ls *ls) &format4_fops);
memset(name, 0, sizeof(name)); - snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_waiters", ls->ls_name); + snprintf(name, DLM_DEBUG_NAME_LEN, "%s_waiters", ls->ls_name);
ls->ls_debug_waiters_dentry = debugfs_create_file(name, 0644, @@ -1028,7 +1029,7 @@ void dlm_create_debug_file(struct dlm_ls *ls) /* format 5 */
memset(name, 0, sizeof(name)); - snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_queued_asts", ls->ls_name); + snprintf(name, DLM_DEBUG_NAME_LEN, "%s_queued_asts", ls->ls_name);
ls->ls_debug_queued_asts_dentry = debugfs_create_file(name, 0644,
If there is a burst of message the receive worker will filling up the processing queue but where are too slow to process dlm messages. This patch will slow down the receiver worker to keep the buffer on the socket layer to tell the sender to backoff. This is done by a threshold to get the next buffers from the socket after all messages were processed done by a flush_workqueue(). This however only occurs when we have a burst like my dlm_bursttest [0] testcase to create 1 million locks on module init and unlock them on module exit. If we put more and more new messages to process in the processqueue we will soon run out of memory. The testcase to reproduce this issue can be found at:
https://gitlab.com/netcoder/linux-public/-/blob/dlm_test_burst/fs/dlm/dlm_bu... Signed-off-by: Alexander Aring aahringo@redhat.com --- fs/dlm/lowcomms.c | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index f7bc22e74db2..67f8dd8a05ef 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -63,6 +63,7 @@ #include "config.h"
#define DLM_SHUTDOWN_WAIT_TIMEOUT msecs_to_jiffies(5000) +#define DLM_MAX_PROCESS_BUFFERS 24 #define NEEDED_RMEM (4*1024*1024)
struct connection { @@ -194,6 +195,7 @@ static const struct dlm_proto_ops *dlm_proto_ops; #define DLM_IO_END 1 #define DLM_IO_EOF 2 #define DLM_IO_RESCHED 3 +#define DLM_IO_FLUSH 4
static void process_recv_sockets(struct work_struct *work); static void process_send_sockets(struct work_struct *work); @@ -202,6 +204,7 @@ static void process_dlm_messages(struct work_struct *work); static DECLARE_WORK(process_work, process_dlm_messages); static DEFINE_SPINLOCK(processqueue_lock); static bool process_dlm_messages_pending; +static atomic_t processqueue_count; static LIST_HEAD(processqueue);
bool dlm_lowcomms_is_running(void) @@ -874,6 +877,7 @@ static void process_dlm_messages(struct work_struct *work) }
list_del(&pentry->list); + atomic_dec(&processqueue_count); spin_unlock(&processqueue_lock);
for (;;) { @@ -891,6 +895,7 @@ static void process_dlm_messages(struct work_struct *work) }
list_del(&pentry->list); + atomic_dec(&processqueue_count); spin_unlock(&processqueue_lock); } } @@ -962,6 +967,7 @@ static int receive_from_sock(struct connection *con, int buflen) con->rx_leftover);
spin_lock(&processqueue_lock); + ret = atomic_inc_return(&processqueue_count); list_add_tail(&pentry->list, &processqueue); if (!process_dlm_messages_pending) { process_dlm_messages_pending = true; @@ -969,6 +975,9 @@ static int receive_from_sock(struct connection *con, int buflen) } spin_unlock(&processqueue_lock);
+ if (ret > DLM_MAX_PROCESS_BUFFERS) + return DLM_IO_FLUSH; + return DLM_IO_SUCCESS; }
@@ -1503,6 +1512,9 @@ static void process_recv_sockets(struct work_struct *work) wake_up(&con->shutdown_wait); /* CF_RECV_PENDING cleared */ break; + case DLM_IO_FLUSH: + flush_workqueue(process_workqueue); + fallthrough; case DLM_IO_RESCHED: cond_resched(); queue_work(io_workqueue, &con->rwork);
linux-stable-mirror@lists.linaro.org