From: Arjan van de Ven arjan@linux.intel.com
PowerTOP would like to be able to show who is keeping the disk busy by dirtying data. The most logical spot for this is in the vfs in the mark_inode_dirty() function, doing this on the block level is not possible because by the time the IO hits the block layer the guilty party can no longer be found ("kjournald" and "pdflush" are not useful answers to "who caused this file to be dirty).
The trace point follows the same logic/style as the block_dump code and pretty much dumps the same data, just not to dmesg (and thus to /var/log/messages) but via the trace events streams.
Signed-of-by: Arjan van de Ven arjan@linux.intel.com --- fs/fs-writeback.c | 4 +++ fs/inode.c | 4 +++ include/trace/events/vfs.h | 53 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 0 deletions(-) create mode 100644 include/trace/events/vfs.h
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d5be169..6a2af1d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -26,6 +26,7 @@ #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/buffer_head.h> +#include <trace/events/vfs.h> #include "internal.h"
#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) @@ -900,6 +901,9 @@ void __mark_inode_dirty(struct inode *inode, int flags) sb->s_op->dirty_inode(inode); }
+ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)) + trace_dirty_inode(inode, current); + /* * make sure that changes are seen by all cpus before we test i_state * -- mikulas diff --git a/fs/inode.c b/fs/inode.c index 722860b..a06184f 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1626,3 +1626,7 @@ void inode_init_owner(struct inode *inode, const struct inode *dir, inode->i_mode = mode; } EXPORT_SYMBOL(inode_init_owner); + +#define CREATE_TRACE_POINTS +#include <trace/events/vfs.h> + diff --git a/include/trace/events/vfs.h b/include/trace/events/vfs.h new file mode 100644 index 0000000..21cf9fb --- /dev/null +++ b/include/trace/events/vfs.h @@ -0,0 +1,53 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM vfs + +#if !defined(_TRACE_VFS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_VFS_H + +/* + * Tracepoint for dirtying an inode: + */ +TRACE_EVENT(dirty_inode, + + TP_PROTO(struct inode *inode, struct task_struct *task), + + TP_ARGS(inode, task), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __array( char, dev, 16 ) + __array( char, file, 32 ) + ), + + TP_fast_assign( + if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { + struct dentry *dentry; + const char *name = "?"; + + dentry = d_find_alias(inode); + if (dentry) { + spin_lock(&dentry->d_lock); + name = (const char *) dentry->d_name.name; + } + + memcpy(__entry->comm, task->comm, TASK_COMM_LEN); + __entry->pid = task->pid; + strlcpy(__entry->file, name, 32); + strlcpy(__entry->dev, inode->i_sb->s_id, 16); + + if (dentry) { + spin_unlock(&dentry->d_lock); + dput(dentry); + } + } + ), + + TP_printk("task=%i (%s) file=%s dev=%s", + __entry->pid, __entry->comm, __entry->file, __entry->dev) +); + +#endif /* _TRACE_VFS_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h>