Re: [PATCH v2 4/7] tracefs: dentry lookup crapectomy

1 Feb 2024


      On Thu, 1 Feb 2024 00:27:19 +0000
Al Viro viro@zeniv.linux.org.uk wrote:
...
On Wed, Jan 31, 2024 at 01:49:22PM -0500, Steven Rostedt wrote:
...
@@ -329,32 +320,29 @@ static struct dentry *create_file(const char *name, umode_t mode,
 
   ti = get_tracefs(inode);
   ti->flags |= TRACEFS_EVENT_INODE;

d_instantiate(dentry, inode);



d_add(dentry, inode);
fsnotify_create(dentry->d_parent->d_inode, dentry);

Seriously?  stat(2), have it go away from dcache on memory pressure,
lather, rinse, repeat...  Won't *snotify get confused by the stream
of creations of the same thing, with not a removal in sight?
That looks to be cut and paste from the old create in tracefs. I don't know
of a real use case for that. I think we could possibly delete it without
anyone noticing.
...
...

return eventfs_end_creating(dentry);


return dentry;

};
...
...
@@ -371,11 +359,14 @@ static struct dentry *create_dir(struct eventfs_inode *ei, struct dentry *parent
   /* Only directories have ti->private set to an ei, not files */
   ti->private = ei;

dentry->d_fsdata = ei;
   ei->dentry = dentry;	// Remove me!


inc_nlink(inode);


d_instantiate(dentry, inode);


d_add(dentry, inode);
inc_nlink(dentry->d_parent->d_inode);

What will happen when that thing gets evicted from dcache,
gets looked up again, and again, and...?
...
fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
Same re snotify confusion...
Yeah, again, I think it's useless. Doing that is more useless than taring
the tracefs directory ;-)
...
...

return eventfs_end_creating(dentry);


return dentry;

}
 
 static void free_ei(struct eventfs_inode *ei)
@@ -425,7 +416,7 @@ void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry)
 }
...
...
@@ -607,79 +462,55 @@ static struct dentry *eventfs_root_lookup(struct inode *dir,
   				  struct dentry *dentry,
   				  unsigned int flags)
 {

const struct file_operations *fops;
const struct eventfs_entry *entry;
struct eventfs_inode *ei_child;
struct tracefs_inode *ti;
struct eventfs_inode *ei;
struct dentry *ei_dentry = NULL;
struct dentry *ret = NULL;
struct dentry *d;
const char *name = dentry->d_name.name;
umode_t mode;
void *data;
int idx;
int i;
int r;


struct dentry *result = NULL;

ti = get_tracefs(dir);
   if (!(ti->flags & TRACEFS_EVENT_INODE))
Can that ever happen?  I mean, why set ->i_op to something that
has this for ->lookup() on a directory without TRACEFS_EVENT_INODE in
its inode?  It's not as if you ever removed that flag...
That's been there mostly as paranoia. Should probably be switched to:
if (WARN_ON_ONCE(!(ti->flags & TRACEFS_EVENT_INODE)))
...
...

return NULL;



/* Grab srcu to prevent the ei from going away */
idx = srcu_read_lock(&eventfs_srcu);


return ERR_PTR(-EIO);




/*
* Grab the eventfs_mutex to consistent value from ti->private.


* This s


*/

mutex_lock(&eventfs_mutex);
ei = READ_ONCE(ti->private);
if (ei && !ei->is_freed)
ei_dentry = READ_ONCE(ei->dentry);


mutex_unlock(&eventfs_mutex);

if (!ei || !ei_dentry)
goto out;




data = ei->data;


ei = ti->private;
if (!ei || ei->is_freed)
goto enoent;




list_for_each_entry_srcu(ei_child, &ei->children, list,
		 srcu_read_lock_held(&eventfs_srcu)) {




list_for_each_entry(ei_child, &ei->children, list) {
if (strcmp(ei_child->name, name) != 0)
	continue;


ret = simple_lookup(dir, dentry, flags);


if (IS_ERR(ret))


	goto out;


d = create_dir_dentry(ei, ei_child, ei_dentry);


dput(d);




if (ei_child->is_freed)


	goto enoent;  



Out of curiosity - can that happen now?  You've got exclusion with
eventfs_remove_rec(), so you shouldn't be able to catch the moment
between setting ->is_freed and removal from the list...
Yeah, that's from when we just used SRCU. If anything, it too should just
add a WARN_ON_ONCE() to it.
...
...

lookup_dir_entry(dentry, ei, ei_child);

goto out;
}


for (i = 0; i < ei->nr_entries; i++) {
entry = &ei->entries[i];


if (strcmp(name, entry->name) == 0) {


	void *cdata = data;


	mutex_lock(&eventfs_mutex);


	/* If ei->is_freed, then the event itself may be too */


	if (!ei->is_freed)


		r = entry->callback(name, &mode, &cdata, &fops);


	else


		r = -1;


	mutex_unlock(&eventfs_mutex);


	if (r <= 0)


		continue;


	ret = simple_lookup(dir, dentry, flags);


	if (IS_ERR(ret))


		goto out;


	d = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);


	dput(d);


	break;


}




for (int i = 0; i < ei->nr_entries; i++) {
void *data;


umode_t mode;


const struct file_operations *fops;


const struct eventfs_entry *entry = &ei->entries[i];



if (strcmp(name, entry->name) != 0)


	continue;



data = ei->data;


if (entry->callback(name, &mode, &data, &fops) <= 0)


	goto enoent;



lookup_file_dentry(dentry, ei, i, mode, data, fops);


goto out;

}

enoent:
/* Don't cache negative lookups, just return an error */
result = ERR_PTR(-ENOENT);

Huh?  Just return NULL and be done with that - you'll get an
unhashed negative dentry and let the caller turn that into
-ENOENT...
We had a problem here with just returning NULL. It leaves the negative
dentry around and doesn't get refreshed.
I did this:
# cd /sys/kernel/tracing
 # ls events/kprobes/sched/
ls: cannot access 'events/kprobes/sched/': No such file or directory
 # echo 'p:sched schedule' >> kprobe_events
 # ls events/kprobes/sched/
ls: cannot access 'events/kprobes/sched/': No such file or directory
When it should have been:
# ls events/kprobes/sched/
enable  filter  format  hist  hist_debug  id  inject  trigger
Leaving the negative dentry there will have it fail when the directory
exists the next time.
-- Steve
...
...
out:

srcu_read_unlock(&eventfs_srcu, idx);
return ret;


mutex_unlock(&eventfs_mutex);
return result;

}
 
 /*

    

2026

2025

2024

2023

2022

2021

2020

2019

2018

2017

Re: [PATCH v2 4/7] tracefs: dentry lookup crapectomy