Hello,
It looks like powertop requires a kernel patch to display VFS ops/sec. The last patch was for kernel v3.3.0. The patch needs to be updated, otherwise powertop always displays 0 VFS ops/sec.
Considering changes starting with kernel v3.9, the kernel side patch would look like as below. I have tested it with powertop and linux-linaro-tracking branch (v3.14.0) and with this patch powertop displays and updates VFS ops/sec properly.
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 5584bea..c6a9832 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1141,6 +1141,9 @@ void __mark_inode_dirty(struct inode *inode, int flags) if ((inode->i_state & flags) == flags) return;
+ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)) + trace_writeback_inode_dirty(inode, flags); + if (unlikely(block_dump > 1)) block_dump___mark_inode_dirty(inode);
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 464ea82..7956d8f 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -101,6 +101,35 @@ DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode, TP_ARGS(inode, flags) );
+/* + * Tracepoint for dirtying an inode; used by PowerTOP + */ +TRACE_EVENT(writeback_inode_dirty, + + TP_PROTO(struct inode *inode, int flags), + + TP_ARGS(inode, flags), + + TP_STRUCT__entry( + __field( __kernel_dev_t, dev ) + __field( ino_t, ino ) + __field( u32, flags ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->flags = flags; + ), + + TP_printk("dev %d:%d ino %lu flags %d %s", MAJOR(__entry->dev), + MINOR(__entry->dev), + (unsigned long) __entry->ino, + __entry->flags, + show_inode_state(__entry->flags) + ) +); + DECLARE_EVENT_CLASS(writeback_write_inode_template,
TP_PROTO(struct inode *inode, struct writeback_control *wbc),
However, there is an alternative thought.
Starting with kernel v3.9, kernel provides a built in trace event called - "writeback_dirty_inode" -
DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode,
TP_PROTO(struct inode *inode, int flags),
TP_ARGS(inode, flags) );
But since the event powertop expects is "writeback_inode_dirty" and the names and output format don't match, to take advantage of the built-in kernel trace event, the below two patches together make it work.
powertop side patch is necessary to match the TRACE_EVENT name. Also, checking for dev > 0 is not necessary, since now there is name instead of device major and minor number in kernel trace.
kernel side patch is necessary to account for DIRTY_PAGES.
powertop side patch -
diff --git a/src/process/do_process.cpp b/src/process/do_process.cpp index ba164eb..2e2cbea 100644 --- a/src/process/do_process.cpp +++ b/src/process/do_process.cpp @@ -625,21 +625,14 @@ void perf_process_bundle::handle_trace_point(void *trace, int cpu, uint64_t time consumer->gpu_ops++; } } - else if (strcmp(event->name, "writeback_inode_dirty") == 0) { - static uint64_t prev_time; + else if (strcmp(event->name, "writeback_dirty_inode") == 0) { + static uint64_t prev_time = 0; class power_consumer *consumer = NULL; - int dev;
consumer = current_consumer(cpu);
- ret = pevent_get_field_val(NULL, event, "dev", &rec, &val, 0); - if (ret < 0) - - return; - dev = (int)val; - if (consumer && strcmp(consumer->name(), - "process")==0 && dev > 0) { + "process")==0) {
consumer->disk_hits++;
@@ -674,7 +667,7 @@ void start_process_measurement(void) perf_events->add_event("workqueue:workqueue_execute_end"); perf_events->add_event("i915:i915_gem_ring_dispatch"); perf_events->add_event("i915:i915_gem_request_submit"); - perf_events->add_event("writeback:writeback_inode_dirty"); + perf_events->add_event("writeback:writeback_dirty_inode"); }
first_stamp = ~0ULL;
kernel side patch -
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 5584bea..c2ab700 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1141,6 +1141,9 @@ void __mark_inode_dirty(struct inode *inode, int flags) if ((inode->i_state & flags) == flags) return;
+ if (flags & I_DIRTY_PAGES) + trace_writeback_dirty_inode(inode, flags); + if (unlikely(block_dump > 1)) block_dump___mark_inode_dirty(inode);
I have tested both solutions and both displays and updates VFS ops/sec. However, since second solution changes powertop, that means powertop won't work with pre v3.9 kernel.
Any comment or thought on this?
-- Thanks, -Meraj