On 10-Jul-2023, at 7:24 AM, Steven Rostedt rostedt@goodmis.org wrote:
!! External Email
On Mon, 3 Jul 2023 15:52:26 -0400 Steven Rostedt rostedt@goodmis.org wrote:
On Mon, 3 Jul 2023 18:51:22 +0000 Ajay Kaher akaher@vmware.com wrote:
We can also look to see if we can implement this with RCU. What exactly is this rwsem protecting?
- struct eventfs_file holds the meta-data for file or dir.
https://github.com/intel-lab-lkp/linux/blob/dfe0dc15a73261ed83cdc728e43f4b3d...
- eventfs_rwsem is supposed to protect the 'link-list which is made of struct eventfs_file
' and elements of struct eventfs_file.
RCU is usually the perfect solution for protecting link lists though. I'll take a look at this when I get back to work.
So I did the below patch on top of this series. If you could fold this into the appropriate patches, it should get us closer to an acceptable solution.
What I did was:
- Moved the struct eventfs_file and eventfs_inode into event_inode.c as it
really should not be exposed to all users.
- Added a recursion check to eventfs_remove_rec() as it is really
dangerous to have unchecked recursion in the kernel (we do have a fixed size stack).
- Removed all the eventfs_rwsem code and replaced it with an srcu lock for
the readers, and a mutex to synchronize the writers of the list.
- Added a eventfs_mutex that is used for the modifications of the
dentry itself (as well as modifying the list from 3 above).
- Have the free use srcu callbacks. After the srcu grace periods are done,
it adds the eventfs_file onto a llist (lockless link list) and wakes up a work queue. Then the work queue does the freeing (this needs to be done in task/workqueue context, as srcu callbacks are done in softirq context).
This appears to pass through some of my instance stress tests as well as the in tree ftrace selftests.
Awesome :)
I have manually applied the patches and ftracetest results are same as v3. No more complains from lockdep.
I will merge this into appropriate patches of v3 and soon send v4.
You have renamed eventfs_create_dir() to create_dir(), and kept eventfs_create_dir() just a wrapper with lock, same for eventfs_create_file(). However these wrapper no where used, I will drop these wrappers.
I was trying to have independent lock for each instance of events. As common lock for every instance of events is not must.
Something was broken in your mail (I guess cc list) and couldn’t reach to lkml or ignored by lkml. I just wanted to track the auto test results from linux-kselftest.
-Ajay
fs/tracefs/event_inode.c | 333 ++++++++++++++++++++++---------------------- include/linux/tracefs.h | 26 --- kernel/trace/trace.h | 1 kernel/trace/trace_events.c | 6 4 files changed, 179 insertions(+), 187 deletions(-)
Index: linux-trace.git/fs/tracefs/event_inode.c
--- linux-trace.git.orig/fs/tracefs/event_inode.c 2023-07-07 22:04:44.490812310 -0400 +++ linux-trace.git/fs/tracefs/event_inode.c 2023-07-09 21:48:28.162874719 -0400 @@ -16,71 +16,69 @@ #include <linux/fsnotify.h> #include <linux/fs.h> #include <linux/namei.h> +#include <linux/workqueue.h> #include <linux/security.h> #include <linux/tracefs.h> #include <linux/kref.h> #include <linux/delay.h> #include "internal.h"
-/**
- eventfs_dentry_to_rwsem - Return corresponding eventfs_rwsem
- @dentry: a pointer to dentry
- helper function to return crossponding eventfs_rwsem for given dentry
- */
-static struct rw_semaphore *eventfs_dentry_to_rwsem(struct dentry *dentry) -{
if (S_ISDIR(dentry->d_inode->i_mode))
return (struct rw_semaphore *)dentry->d_inode->i_private;
else
return (struct rw_semaphore *)dentry->d_parent->d_inode->i_private;
-} +struct eventfs_inode {
struct list_head e_top_files;
+};
-/**
- eventfs_down_read - acquire read lock function
- @eventfs_rwsem: a pointer to rw_semaphore
- helper function to perform read lock. Nested locking requires because
- lookup(), release() requires read lock, these could be called directly
- or from open(), remove() which already hold the read/write lock.
- */
-static void eventfs_down_read(struct rw_semaphore *eventfs_rwsem) -{
down_read_nested(eventfs_rwsem, SINGLE_DEPTH_NESTING);
-} +struct eventfs_file {
const char *name;
struct dentry *d_parent;
struct dentry *dentry;
struct list_head list;
struct eventfs_inode *ei;
const struct file_operations *fop;
const struct inode_operations *iop;
union {
struct rcu_head rcu;
struct llist_node llist; /* For freeing after RCU */
};
void *data;
umode_t mode;
bool created;
+};
-/**
- eventfs_up_read - release read lock function
- @eventfs_rwsem: a pointer to rw_semaphore
- helper function to release eventfs_rwsem lock if locked
- */
-static void eventfs_up_read(struct rw_semaphore *eventfs_rwsem) -{
up_read(eventfs_rwsem);
-} +static DEFINE_MUTEX(eventfs_mutex); +DEFINE_STATIC_SRCU(eventfs_srcu);
-/**
- eventfs_down_write - acquire write lock function
- @eventfs_rwsem: a pointer to rw_semaphore
- helper function to perform write lock on eventfs_rwsem
- */
-static void eventfs_down_write(struct rw_semaphore *eventfs_rwsem) +static struct dentry *create_file(const char *name, umode_t mode,
struct dentry *parent, void *data,
const struct file_operations *fop)
{
while (!down_write_trylock(eventfs_rwsem))
msleep(10);
-}
struct tracefs_inode *ti;
struct dentry *dentry;
struct inode *inode;
-/**
- eventfs_up_write - release write lock function
- @eventfs_rwsem: a pointer to rw_semaphore
- helper function to perform write lock on eventfs_rwsem
- */
-static void eventfs_up_write(struct rw_semaphore *eventfs_rwsem) -{
up_write(eventfs_rwsem);
if (!(mode & S_IFMT))
mode |= S_IFREG;
if (WARN_ON_ONCE(!S_ISREG(mode)))
return NULL;
dentry = eventfs_start_creating(name, parent);
if (IS_ERR(dentry))
return dentry;
inode = tracefs_get_inode(dentry->d_sb);
if (unlikely(!inode))
return eventfs_failed_creating(dentry);
inode->i_mode = mode;
inode->i_fop = fop;
inode->i_private = data;
ti = get_tracefs(inode);
ti->flags |= TRACEFS_EVENT_INODE;
d_instantiate(dentry, inode);
fsnotify_create(dentry->d_parent->d_inode, dentry);
return eventfs_end_creating(dentry);
}
/** @@ -111,21 +109,30 @@ static struct dentry *eventfs_create_fil struct dentry *parent, void *data, const struct file_operations *fop) {
struct tracefs_inode *ti; struct dentry *dentry;
struct inode *inode; if (security_locked_down(LOCKDOWN_TRACEFS)) return NULL;
if (!(mode & S_IFMT))
mode |= S_IFREG;
mutex_lock(&eventfs_mutex);
dentry = create_file(name, mode, parent, data, fop);
mutex_unlock(&eventfs_mutex);
if (WARN_ON_ONCE(!S_ISREG(mode)))
return NULL;
return dentry;
+}
dentry = eventfs_start_creating(name, parent);
+static struct dentry *create_dir(const char *name, umode_t mode,
struct dentry *parent, void *data,
const struct file_operations *fop,
const struct inode_operations *iop)
+{
struct tracefs_inode *ti;
struct dentry *dentry;
struct inode *inode;
WARN_ON(!S_ISDIR(mode));
dentry = eventfs_start_creating(name, parent); if (IS_ERR(dentry)) return dentry;
@@ -134,13 +141,17 @@ static struct dentry *eventfs_create_fil return eventfs_failed_creating(dentry);
inode->i_mode = mode;
inode->i_op = iop; inode->i_fop = fop; inode->i_private = data; ti = get_tracefs(inode); ti->flags |= TRACEFS_EVENT_INODE;
inc_nlink(inode); d_instantiate(dentry, inode);
fsnotify_create(dentry->d_parent->d_inode, dentry);
inc_nlink(dentry->d_parent->d_inode);
fsnotify_mkdir(dentry->d_parent->d_inode, dentry); return eventfs_end_creating(dentry);
}
@@ -175,37 +186,18 @@ static struct dentry *eventfs_create_dir const struct file_operations *fop, const struct inode_operations *iop) {
struct tracefs_inode *ti; struct dentry *dentry;
struct inode *inode; if (security_locked_down(LOCKDOWN_TRACEFS)) return NULL; WARN_ON(!S_ISDIR(mode));
dentry = eventfs_start_creating(name, parent);
if (IS_ERR(dentry))
return dentry;
inode = tracefs_get_inode(dentry->d_sb);
if (unlikely(!inode))
return eventfs_failed_creating(dentry);
mutex_lock(&eventfs_mutex);
dentry = create_dir(name, mode, parent, data, fop, iop);
mutex_unlock(&eventfs_mutex);
inode->i_mode = mode;
inode->i_op = iop;
inode->i_fop = fop;
inode->i_private = data;
ti = get_tracefs(inode);
ti->flags |= TRACEFS_EVENT_INODE;
inc_nlink(inode);
d_instantiate(dentry, inode);
inc_nlink(dentry->d_parent->d_inode);
fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
return eventfs_end_creating(dentry);
return dentry;
}
/** @@ -241,13 +233,14 @@ static void eventfs_post_create_dir(stru { struct eventfs_file *ef_child; struct tracefs_inode *ti;
int idx;
eventfs_down_read((struct rw_semaphore *) ef->data);
/* srcu lock already held */ /* fill parent-child relation */
list_for_each_entry(ef_child, &ef->ei->e_top_files, list) {
list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list,
srcu_read_lock_held(&eventfs_srcu)) { ef_child->d_parent = ef->dentry; }
eventfs_up_read((struct rw_semaphore *) ef->data); ti = get_tracefs(ef->dentry->d_inode); ti->private = ef->ei;
@@ -271,40 +264,43 @@ static struct dentry *eventfs_root_looku struct eventfs_inode *ei; struct eventfs_file *ef; struct dentry *ret = NULL;
struct rw_semaphore *eventfs_rwsem;
int idx; ti = get_tracefs(dir); if (!(ti->flags & TRACEFS_EVENT_INODE)) return NULL; ei = ti->private;
eventfs_rwsem = (struct rw_semaphore *) dir->i_private;
eventfs_down_read(eventfs_rwsem);
list_for_each_entry(ef, &ei->e_top_files, list) {
idx = srcu_read_lock(&eventfs_srcu);
list_for_each_entry_srcu(ef, &ei->e_top_files, list,
srcu_read_lock_held(&eventfs_srcu)) { if (strcmp(ef->name, dentry->d_name.name)) continue; ret = simple_lookup(dir, dentry, flags); if (ef->created) continue;
mutex_lock(&eventfs_mutex); ef->created = true; if (ef->ei)
ef->dentry = eventfs_create_dir(ef->name, ef->mode, ef->d_parent,
ef->data, ef->fop, ef->iop);
ef->dentry = create_dir(ef->name, ef->mode, ef->d_parent,
ef->data, ef->fop, ef->iop); else
ef->dentry = eventfs_create_file(ef->name, ef->mode, ef->d_parent,
ef->data, ef->fop);
ef->dentry = create_file(ef->name, ef->mode, ef->d_parent,
ef->data, ef->fop); if (IS_ERR_OR_NULL(ef->dentry)) { ef->created = false;
mutex_unlock(&eventfs_mutex); } else { if (ef->ei) eventfs_post_create_dir(ef); ef->dentry->d_fsdata = ef;
mutex_unlock(&eventfs_mutex); dput(ef->dentry); } break; }
eventfs_up_read(eventfs_rwsem);
srcu_read_unlock(&eventfs_srcu, idx); return ret;
}
@@ -318,21 +314,20 @@ static int eventfs_release(struct inode struct tracefs_inode *ti; struct eventfs_inode *ei; struct eventfs_file *ef;
struct dentry *dentry = file_dentry(file);
struct rw_semaphore *eventfs_rwsem;
int idx; ti = get_tracefs(inode); if (!(ti->flags & TRACEFS_EVENT_INODE)) return -EINVAL; ei = ti->private;
eventfs_rwsem = eventfs_dentry_to_rwsem(dentry);
eventfs_down_read(eventfs_rwsem);
list_for_each_entry(ef, &ei->e_top_files, list) {
idx = srcu_read_lock(&eventfs_srcu);
list_for_each_entry_srcu(ef, &ei->e_top_files, list,
srcu_read_lock_held(&eventfs_srcu)) { if (ef->created) dput(ef->dentry); }
eventfs_up_read(eventfs_rwsem);
srcu_read_unlock(&eventfs_srcu, idx); return dcache_dir_close(inode, file);
}
@@ -352,30 +347,30 @@ static int dcache_dir_open_wrapper(struc struct eventfs_file *ef; struct inode *f_inode = file_inode(file); struct dentry *dentry = file_dentry(file);
struct rw_semaphore *eventfs_rwsem;
int idx; ti = get_tracefs(f_inode); if (!(ti->flags & TRACEFS_EVENT_INODE)) return -EINVAL; ei = ti->private;
eventfs_rwsem = eventfs_dentry_to_rwsem(dentry);
eventfs_down_read(eventfs_rwsem);
list_for_each_entry(ef, &ei->e_top_files, list) {
idx = srcu_read_lock(&eventfs_srcu);
list_for_each_entry_rcu(ef, &ei->e_top_files, list) { if (ef->created) { dget(ef->dentry); continue; }
mutex_lock(&eventfs_mutex); ef->created = true; inode_lock(dentry->d_inode); if (ef->ei)
ef->dentry = eventfs_create_dir(ef->name, ef->mode, dentry,
ef->data, ef->fop, ef->iop);
ef->dentry = create_dir(ef->name, ef->mode, dentry,
ef->data, ef->fop, ef->iop); else
ef->dentry = eventfs_create_file(ef->name, ef->mode, dentry,
ef->data, ef->fop);
ef->dentry = create_file(ef->name, ef->mode, dentry,
ef->data, ef->fop); inode_unlock(dentry->d_inode); if (IS_ERR_OR_NULL(ef->dentry)) {
@@ -385,8 +380,9 @@ static int dcache_dir_open_wrapper(struc eventfs_post_create_dir(ef); ef->dentry->d_fsdata = ef; }
mutex_unlock(&eventfs_mutex); }
eventfs_up_read(eventfs_rwsem);
srcu_read_unlock(&eventfs_srcu, idx); return dcache_dir_open(inode, file);
}
@@ -463,13 +459,11 @@ static struct eventfs_file *eventfs_prep
- @parent: a pointer to the parent dentry for this file. This should be a
directory dentry if set. If this parameter is NULL, then the
directory will be created in the root of the tracefs filesystem.
- @eventfs_rwsem: a pointer to rw_semaphore
- This function creates the top of the trace event directory.
*/ struct dentry *eventfs_create_events_dir(const char *name,
struct dentry *parent,
struct rw_semaphore *eventfs_rwsem)
struct dentry *parent)
{ struct dentry *dentry = tracefs_start_creating(name, parent); struct eventfs_inode *ei; @@ -489,7 +483,6 @@ struct dentry *eventfs_create_events_dir return ERR_PTR(-ENOMEM); }
init_rwsem(eventfs_rwsem); INIT_LIST_HEAD(&ei->e_top_files); ti = get_tracefs(inode);
@@ -499,7 +492,6 @@ struct dentry *eventfs_create_events_dir inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; inode->i_op = &eventfs_root_dir_inode_operations; inode->i_fop = &eventfs_file_operations;
inode->i_private = eventfs_rwsem; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode);
@@ -513,15 +505,13 @@ struct dentry *eventfs_create_events_dir
- eventfs_add_subsystem_dir - add eventfs subsystem_dir to list to create later
- @name: a pointer to a string containing the name of the file to create.
- @parent: a pointer to the parent dentry for this dir.
- @eventfs_rwsem: a pointer to rw_semaphore
- This function adds eventfs subsystem dir to list.
- And all these dirs are created on the fly when they are looked up,
- and the dentry and inodes will be removed when they are done.
*/ struct eventfs_file *eventfs_add_subsystem_dir(const char *name,
struct dentry *parent,
struct rw_semaphore *eventfs_rwsem)
struct dentry *parent)
{ struct tracefs_inode *ti_parent; struct eventfs_inode *ei_parent; @@ -536,16 +526,15 @@ struct eventfs_file *eventfs_add_subsyst ef = eventfs_prepare_ef(name, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, &eventfs_file_operations,
&eventfs_root_dir_inode_operations,
(void *) eventfs_rwsem);
&eventfs_root_dir_inode_operations, NULL); if (IS_ERR(ef)) return ef;
eventfs_down_write(eventfs_rwsem);
mutex_lock(&eventfs_mutex); list_add_tail(&ef->list, &ei_parent->e_top_files); ef->d_parent = parent;
eventfs_up_write(eventfs_rwsem);
mutex_unlock(&eventfs_mutex); return ef;
}
@@ -553,15 +542,13 @@ struct eventfs_file *eventfs_add_subsyst
- eventfs_add_dir - add eventfs dir to list to create later
- @name: a pointer to a string containing the name of the file to create.
- @ef_parent: a pointer to the parent eventfs_file for this dir.
- @eventfs_rwsem: a pointer to rw_semaphore
- This function adds eventfs dir to list.
- And all these dirs are created on the fly when they are looked up,
- and the dentry and inodes will be removed when they are done.
*/ struct eventfs_file *eventfs_add_dir(const char *name,
struct eventfs_file *ef_parent,
struct rw_semaphore *eventfs_rwsem)
struct eventfs_file *ef_parent)
{ struct eventfs_file *ef;
@@ -571,16 +558,15 @@ struct eventfs_file *eventfs_add_dir(con ef = eventfs_prepare_ef(name, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, &eventfs_file_operations,
&eventfs_root_dir_inode_operations,
(void *) eventfs_rwsem);
&eventfs_root_dir_inode_operations, NULL); if (IS_ERR(ef)) return ef;
eventfs_down_write(eventfs_rwsem);
mutex_lock(&eventfs_mutex); list_add_tail(&ef->list, &ef_parent->ei->e_top_files); ef->d_parent = ef_parent->dentry;
eventfs_up_write(eventfs_rwsem);
mutex_unlock(&eventfs_mutex); return ef;
}
@@ -608,7 +594,6 @@ int eventfs_add_top_file(const char *nam struct tracefs_inode *ti; struct eventfs_inode *ei; struct eventfs_file *ef;
struct rw_semaphore *eventfs_rwsem; if (!parent) return -EINVAL;
@@ -629,11 +614,10 @@ int eventfs_add_top_file(const char *nam if (IS_ERR(ef)) return -ENOMEM;
eventfs_rwsem = (struct rw_semaphore *) parent->d_inode->i_private;
eventfs_down_write(eventfs_rwsem);
mutex_lock(&eventfs_mutex); list_add_tail(&ef->list, &ei->e_top_files); ef->d_parent = parent;
eventfs_up_write(eventfs_rwsem);
mutex_unlock(&eventfs_mutex); return 0;
}
@@ -658,7 +642,6 @@ int eventfs_add_file(const char *name, u const struct file_operations *fop) { struct eventfs_file *ef;
struct rw_semaphore *eventfs_rwsem; if (!ef_parent) return -EINVAL;
@@ -670,14 +653,42 @@ int eventfs_add_file(const char *name, u if (IS_ERR(ef)) return -ENOMEM;
eventfs_rwsem = (struct rw_semaphore *) ef_parent->data;
eventfs_down_write(eventfs_rwsem);
mutex_lock(&eventfs_mutex); list_add_tail(&ef->list, &ef_parent->ei->e_top_files); ef->d_parent = ef_parent->dentry;
eventfs_up_write(eventfs_rwsem);
mutex_unlock(&eventfs_mutex); return 0;
}
+static LLIST_HEAD(free_list);
+static void eventfs_workfn(struct work_struct *work) +{
struct eventfs_file *ef, *tmp;
struct llist_node *llnode;
llnode = llist_del_all(&free_list);
llist_for_each_entry_safe(ef, tmp, llnode, llist) {
if (ef->created && ef->dentry)
dput(ef->dentry);
kfree(ef->name);
kfree(ef->ei);
kfree(ef);
}
+}
+DECLARE_WORK(eventfs_work, eventfs_workfn);
+static void free_ef(struct rcu_head *head) +{
struct eventfs_file *ef = container_of(head, struct eventfs_file, rcu);
if (!llist_add(&ef->llist, &free_list))
return;
queue_work(system_unbound_wq, &eventfs_work);
+}
/**
- eventfs_remove_rec - remove eventfs dir or file from list
- @ef: a pointer to eventfs_file to be removed.
@@ -685,51 +696,51 @@ int eventfs_add_file(const char *name, u
- This function recursively remove eventfs_file which
- contains info of file or dir.
*/ -static void eventfs_remove_rec(struct eventfs_file *ef) +static void eventfs_remove_rec(struct eventfs_file *ef, int level) {
struct eventfs_file *ef_child, *n;
struct eventfs_file *ef_child; if (!ef) return;
/*
* Check recursion depth. It should never be greater than 3:
* 0 - events/
* 1 - events/group/
* 2 - events/group/event/
* 3 - events/group/event/file
*/
if (WARN_ON_ONCE(level > 3))
return; if (ef->ei) { /* search for nested folders or files */
list_for_each_entry_safe(ef_child, n, &ef->ei->e_top_files, list) {
eventfs_remove_rec(ef_child);
list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list,
lockdep_is_held(&eventfs_mutex)) {
eventfs_remove_rec(ef_child, level + 1); }
kfree(ef->ei); }
if (ef->created && ef->dentry) {
if (ef->created && ef->dentry) d_invalidate(ef->dentry);
dput(ef->dentry);
}
list_del(&ef->list);
kfree(ef->name);
kfree(ef);
list_del_rcu(&ef->list);
call_srcu(&eventfs_srcu, &ef->rcu, free_ef);
}
/**
- eventfs_remove - remove eventfs dir or file from list
- @ef: a pointer to eventfs_file to be removed.
- This function acquire the eventfs_rwsem lock and call eventfs_remove_rec()
- This function acquire the eventfs_mutex lock and calls eventfs_remove_rec()
*/ void eventfs_remove(struct eventfs_file *ef) {
struct rw_semaphore *eventfs_rwsem;
if (!ef) return;
if (ef->ei)
eventfs_rwsem = (struct rw_semaphore *) ef->data;
else
eventfs_rwsem = (struct rw_semaphore *) ef->d_parent->d_inode->i_private;
eventfs_down_write(eventfs_rwsem);
eventfs_remove_rec(ef);
eventfs_up_write(eventfs_rwsem);
mutex_lock(&eventfs_mutex);
eventfs_remove_rec(ef, 0);
mutex_unlock(&eventfs_mutex);
}
/** Index: linux-trace.git/include/linux/tracefs.h =================================================================== --- linux-trace.git.orig/include/linux/tracefs.h 2023-07-07 22:04:44.490812310 -0400 +++ linux-trace.git/include/linux/tracefs.h 2023-07-07 22:04:44.486812271 -0400 @@ -21,22 +21,7 @@ struct file_operations;
#ifdef CONFIG_TRACING
-struct eventfs_inode {
struct list_head e_top_files;
-};
-struct eventfs_file {
const char *name;
struct dentry *d_parent;
struct dentry *dentry;
struct list_head list;
struct eventfs_inode *ei;
const struct file_operations *fop;
const struct inode_operations *iop;
void *data;
umode_t mode;
bool created;
-}; +struct eventfs_file;
struct dentry *eventfs_start_creating(const char *name, struct dentry *parent);
@@ -45,16 +30,13 @@ struct dentry *eventfs_failed_creating(s struct dentry *eventfs_end_creating(struct dentry *dentry);
struct dentry *eventfs_create_events_dir(const char *name,
struct dentry *parent,
struct rw_semaphore *eventfs_rwsem);
struct dentry *parent);
struct eventfs_file *eventfs_add_subsystem_dir(const char *name,
struct dentry *parent,
struct rw_semaphore *eventfs_rwsem);
struct dentry *parent);
struct eventfs_file *eventfs_add_dir(const char *name,
struct eventfs_file *ef_parent,
struct rw_semaphore *eventfs_rwsem);
struct eventfs_file *ef_parent);
int eventfs_add_file(const char *name, umode_t mode, struct eventfs_file *ef_parent, void *data, Index: linux-trace.git/kernel/trace/trace.h =================================================================== --- linux-trace.git.orig/kernel/trace/trace.h 2023-07-07 22:04:44.490812310 -0400 +++ linux-trace.git/kernel/trace/trace.h 2023-07-07 22:04:44.486812271 -0400 @@ -359,7 +359,6 @@ struct trace_array { struct dentry *options; struct dentry *percpu_dir; struct dentry *event_dir;
struct rw_semaphore eventfs_rwsem; struct trace_options *topts; struct list_head systems; struct list_head events;
Index: linux-trace.git/kernel/trace/trace_events.c
--- linux-trace.git.orig/kernel/trace/trace_events.c 2023-07-07 22:04:44.490812310 -0400 +++ linux-trace.git/kernel/trace/trace_events.c 2023-07-07 22:04:44.486812271 -0400 @@ -2337,7 +2337,7 @@ event_subsystem_dir(struct trace_array * } else __get_system(system);
dir->ef = eventfs_add_subsystem_dir(name, parent, &tr->eventfs_rwsem);
dir->ef = eventfs_add_subsystem_dir(name, parent); if (IS_ERR(dir->ef)) { pr_warn("Failed to create system directory %s\n", name); __put_system(system);
@@ -2439,7 +2439,7 @@ event_create_dir(struct dentry *parent, return -ENOMEM;
name = trace_event_name(call);
file->ef = eventfs_add_dir(name, ef_subsystem, &tr->eventfs_rwsem);
file->ef = eventfs_add_dir(name, ef_subsystem); if (IS_ERR(file->ef)) { pr_warn("Could not create tracefs '%s' directory\n", name); return -1;
@@ -3647,7 +3647,7 @@ create_event_toplevel_files(struct dentr if (!entry) return -ENOMEM;
d_events = eventfs_create_events_dir("events", parent, &tr->eventfs_rwsem);
d_events = eventfs_create_events_dir("events", parent); if (IS_ERR(d_events)) { pr_warn("Could not create tracefs 'events' directory\n"); return -ENOMEM;
!! External Email: This email originated from outside of the organization. Do not click links or open attachments unless you recognize the sender.
On Mon, 10 Jul 2023 18:53:53 +0000 Ajay Kaher akaher@vmware.com wrote:
On 10-Jul-2023, at 7:24 AM, Steven Rostedt rostedt@goodmis.org wrote:
!! External Email
On Mon, 3 Jul 2023 15:52:26 -0400 Steven Rostedt rostedt@goodmis.org wrote:
On Mon, 3 Jul 2023 18:51:22 +0000 Ajay Kaher akaher@vmware.com wrote:
We can also look to see if we can implement this with RCU. What exactly is this rwsem protecting?
- struct eventfs_file holds the meta-data for file or dir.
https://github.com/intel-lab-lkp/linux/blob/dfe0dc15a73261ed83cdc728e43f4b3d...
- eventfs_rwsem is supposed to protect the 'link-list which is made of struct eventfs_file
' and elements of struct eventfs_file.
RCU is usually the perfect solution for protecting link lists though. I'll take a look at this when I get back to work.
So I did the below patch on top of this series. If you could fold this into the appropriate patches, it should get us closer to an acceptable solution.
What I did was:
- Moved the struct eventfs_file and eventfs_inode into event_inode.c as it
really should not be exposed to all users.
- Added a recursion check to eventfs_remove_rec() as it is really
dangerous to have unchecked recursion in the kernel (we do have a fixed size stack).
- Removed all the eventfs_rwsem code and replaced it with an srcu lock for
the readers, and a mutex to synchronize the writers of the list.
- Added a eventfs_mutex that is used for the modifications of the
dentry itself (as well as modifying the list from 3 above).
- Have the free use srcu callbacks. After the srcu grace periods are done,
it adds the eventfs_file onto a llist (lockless link list) and wakes up a work queue. Then the work queue does the freeing (this needs to be done in task/workqueue context, as srcu callbacks are done in softirq context).
This appears to pass through some of my instance stress tests as well as the in tree ftrace selftests.
Awesome :)
I have manually applied the patches and ftracetest results are same as v3. No more complains from lockdep.
I will merge this into appropriate patches of v3 and soon send v4.
You have renamed eventfs_create_dir() to create_dir(), and kept eventfs_create_dir() just a wrapper with lock, same for eventfs_create_file(). However these wrapper no where used, I will drop these wrappers.
Ah, I thought that because they started with "eventfs_" that they were used for some fops pointer. Note, I try to avoid using the "eventfs_" naming for static functions that are not exported elsewhere.
I was trying to have independent lock for each instance of events. As common lock for every instance of events is not must.
We can find a way to make the lock for the root later. Let's get it working first before we optimize it. I do not want to expose any locking to the users of this interface.
Something was broken in your mail (I guess cc list) and couldn’t reach to lkml or ignored by lkml. I just wanted to track the auto test results from linux-kselftest.
Yeah, claws-mail has an issue with some emails with quotes in it (sometimes drops the second quote). Sad part is, it happens after I hit send, and it is not part of the email. I'll send this reply now, but I bet it's going to happen again.
Let's see :-/ I checked the To and Cc's and they all have the proper quotes. Let's see what ends up in my "Sent" folder.
-- Steve
On Mon, 10 Jul 2023 15:06:06 -0400 Steven Rostedt rostedt@goodmis.org wrote:
Something was broken in your mail (I guess cc list) and couldn’t reach to lkml or ignored by lkml. I just wanted to track the auto test results from linux-kselftest.
Yeah, claws-mail has an issue with some emails with quotes in it (sometimes drops the second quote). Sad part is, it happens after I hit send, and it is not part of the email. I'll send this reply now, but I bet it's going to happen again.
Let's see :-/ I checked the To and Cc's and they all have the proper quotes. Let's see what ends up in my "Sent" folder.
Sorry for the spam, but I just upgraded my claws-mail from 3.19.0 to 3.19.1 and I just want to see if it fails again.
-- Steve
On Mon, 10 Jul 2023 18:53:53 +0000 Ajay Kaher akaher@vmware.com wrote:
Something was broken in your mail (I guess cc list) and couldn’t reach to lkml or ignored by lkml. I just wanted to track the auto test results from linux-kselftest.
Anyway, I pushed your series plus this as a commit to:
https://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git/log/?h...
Which should trigger some of the zero-day bots.
I also ran all my testing with lockdep enabled and nothing triggered.
-- Steve
On Mon, 10 Jul 2023 18:53:53 +0000 Ajay Kaher akaher@vmware.com wrote:
Something was broken in your mail (I guess cc list) and couldn’t reach to lkml or ignored by lkml. I just wanted to track the auto test results from linux-kselftest.
Below is the report from the tree I pushed. I guess I forgot to remove an "idx" variable, and it also caught the unused functions you mentioned.
-- Steve
tree: git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace trace/rfc/eventfs head: 1dc48374bb8ad8aec6d7244267f9b36e0512d3bb commit: 1dc48374bb8ad8aec6d7244267f9b36e0512d3bb [28/28] tracefs: Add RCU and global mutex for eventfs config: x86_64-kexec (https://download.01.org/0day-ci/archive/20230711/202307111415.tc8g7M63-lkp@i...) compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 reproduce: (https://download.01.org/0day-ci/archive/20230711/202307111415.tc8g7M63-lkp@i...)
If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot lkp@intel.com | Closes: https://lore.kernel.org/oe-kbuild-all/202307111415.tc8g7M63-lkp@intel.com/
All warnings (new ones prefixed by >>):
fs/tracefs/event_inode.c: In function 'eventfs_post_create_dir':
fs/tracefs/event_inode.c:236:13: warning: unused variable 'idx' [-Wunused-variable]
236 | int idx; | ^~~ fs/tracefs/event_inode.c: At top level: fs/tracefs/event_inode.c:184:23: warning: 'eventfs_create_dir' defined but not used [-Wunused-function] 184 | static struct dentry *eventfs_create_dir(const char *name, umode_t mode, | ^~~~~~~~~~~~~~~~~~ fs/tracefs/event_inode.c:108:23: warning: 'eventfs_create_file' defined but not used [-Wunused-function] 108 | static struct dentry *eventfs_create_file(const char *name, umode_t mode, | ^~~~~~~~~~~~~~~~~~~
vim +/idx +236 fs/tracefs/event_inode.c
225 226 /** 227 * eventfs_post_create_dir - post create dir routine 228 * @ef: eventfs_file of recently created dir 229 * 230 * Files with-in eventfs dir should know dentry of parent dir 231 */ 232 static void eventfs_post_create_dir(struct eventfs_file *ef) 233 { 234 struct eventfs_file *ef_child; 235 struct tracefs_inode *ti;
236 int idx;
237 238 /* srcu lock already held */ 239 /* fill parent-child relation */ 240 list_for_each_entry_srcu(ef_child, &ef->ei->e_top_files, list, 241 srcu_read_lock_held(&eventfs_srcu)) { 242 ef_child->d_parent = ef->dentry; 243 } 244 245 ti = get_tracefs(ef->dentry->d_inode); 246 ti->private = ef->ei; 247 } 248
linux-kselftest-mirror@lists.linaro.org