There isn't yet a clear way to identify a set of "lost" time that everyone (or at least a wider group of users) cares about. However, users can perform some delay accounting by iterating over components of interest. This patch allows cgroup v2 freezing time to be one of those components.
Track the cumulative time that each v2 cgroup spends freezing and expose it to userland via a new core interface file in cgroupfs.
To access this value: $ mkdir /sys/fs/cgroup/test $ cat /sys/fs/cgroup/test/cgroup.freeze.stat.local freeze_time_total 0
Ensure consistent freeze time reads with freeze_seq, a per-cgroup sequence counter. Writes are serialized using the css_set_lock.
Signed-off-by: Tiffany Yang ynaffit@google.com --- Documentation/admin-guide/cgroup-v2.rst | 20 ++++++++++++++++++ include/linux/cgroup-defs.h | 17 +++++++++++++++ kernel/cgroup/cgroup.c | 28 +++++++++++++++++++++++++ kernel/cgroup/freezer.c | 10 +++++++-- 4 files changed, 73 insertions(+), 2 deletions(-)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index d9d3cc7df348..e5bc463f8e05 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1027,6 +1027,26 @@ All cgroup core files are prefixed with "cgroup." it's possible to delete a frozen (and empty) cgroup, as well as create new sub-cgroups.
+ cgroup.freeze.stat.local + A read-only flat-keyed file which exists in non-root cgroups. + The following entry is defined: + + freeze_time_total + Cumulative time that this cgroup has spent between freezing and + thawing, regardless of whether by self or ancestor groups. + NB: (not) reaching "frozen" state is not accounted here. + + Using the following ASCII representation of a cgroup's freezer + state, :: + + 1 _____ + frozen 0 __/ __ + ab cd + + .. Originally contributed by Michal Koutný mkoutny@suse.com + + the duration being measured is the span between a and c. + cgroup.kill A write-only single value file which exists in non-root cgroups. The only allowed value is "1". diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 6b93a64115fe..a4f9600fc101 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -433,6 +433,23 @@ struct cgroup_freezer_state { * frozen, SIGSTOPped, and PTRACEd. */ int nr_frozen_tasks; + + /* Freeze time data consistency protection */ + seqcount_t freeze_seq; + + /* + * Most recent time the cgroup was requested to freeze. + * Accesses guarded by freeze_seq counter. Writes serialized + * by css_set_lock. + */ + u64 freeze_time_start_ns; + + /* + * Total duration the cgroup has spent freezing. + * Accesses guarded by freeze_seq counter. Writes serialized + * by css_set_lock. + */ + u64 freeze_time_total_ns; };
struct cgroup { diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 312c6a8b55bb..25e008b40992 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4055,6 +4055,27 @@ static ssize_t cgroup_freeze_write(struct kernfs_open_file *of, return nbytes; }
+static int cgroup_freeze_local_stat_show(struct seq_file *seq, void *v) +{ + struct cgroup *cgrp = seq_css(seq)->cgroup; + unsigned int sequence; + u64 freeze_time; + + do { + sequence = read_seqcount_begin(&cgrp->freezer.freeze_seq); + freeze_time = cgrp->freezer.freeze_time_total_ns; + /* Add in current freezer interval if the task is now frozen */ + if (test_bit(CGRP_FREEZE, &cgrp->flags)) + freeze_time += (ktime_get_ns() - + cgrp->freezer.freeze_time_start_ns); + } while (read_seqcount_retry(&cgrp->freezer.freeze_seq, sequence)); + + seq_printf(seq, "freeze_time_total %llu\n", + (unsigned long long) freeze_time / NSEC_PER_USEC); + + return 0; +} + static void __cgroup_kill(struct cgroup *cgrp) { struct css_task_iter it; @@ -5360,6 +5381,11 @@ static struct cftype cgroup_base_files[] = { .seq_show = cgroup_freeze_show, .write = cgroup_freeze_write, }, + { + .name = "cgroup.freeze.stat.local", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = cgroup_freeze_local_stat_show, + }, { .name = "cgroup.kill", .flags = CFTYPE_NOT_ON_ROOT, @@ -5763,6 +5789,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name, * if the parent has to be frozen, the child has too. */ cgrp->freezer.e_freeze = parent->freezer.e_freeze; + seqcount_init(&cgrp->freezer.freeze_seq); if (cgrp->freezer.e_freeze) { /* * Set the CGRP_FREEZE flag, so when a process will be @@ -5771,6 +5798,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name, * consider it frozen immediately. */ set_bit(CGRP_FREEZE, &cgrp->flags); + cgrp->freezer.freeze_time_start_ns = ktime_get_ns(); set_bit(CGRP_FROZEN, &cgrp->flags); }
diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c index bf1690a167dd..bbffad570ff7 100644 --- a/kernel/cgroup/freezer.c +++ b/kernel/cgroup/freezer.c @@ -179,10 +179,16 @@ static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze) lockdep_assert_held(&cgroup_mutex);
spin_lock_irq(&css_set_lock); - if (freeze) + write_seqcount_begin(&cgrp->freezer.freeze_seq); + if (freeze) { set_bit(CGRP_FREEZE, &cgrp->flags); - else + cgrp->freezer.freeze_time_start_ns = ktime_get_ns(); + } else { clear_bit(CGRP_FREEZE, &cgrp->flags); + cgrp->freezer.freeze_time_total_ns += (ktime_get_ns() - + cgrp->freezer.freeze_time_start_ns); + } + write_seqcount_end(&cgrp->freezer.freeze_seq); spin_unlock_irq(&css_set_lock);
if (freeze)