The cfg80211 layer uses get_seconds() to read the current time
in its supend handling. This function is deprecated because of the 32-bit
time_t overflow, and it can cause unexpected behavior when the time
changes due to settimeofday() calls or leap second updates.
In many cases, we want to use monotonic time instead, however cfg80211
explicitly tracks the time spent in suspend, so this changes the
driver over to use ktime_get_boottime_seconds(), which is slightly
slower, but not used in a fastpath here.
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
---
net/wireless/core.h | 2 +-
net/wireless/sysfs.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 63eb1b5fdd04..7f52ef569320 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -76,7 +76,7 @@ struct cfg80211_registered_device {
struct cfg80211_scan_request *scan_req; /* protected by RTNL */
struct sk_buff *scan_msg;
struct list_head sched_scan_req_list;
- unsigned long suspend_at;
+ time64_t suspend_at;
struct work_struct scan_done_wk;
struct genl_info *cur_cmd_info;
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 570a2b67ca10..6ab32f6a1961 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -102,7 +102,7 @@ static int wiphy_suspend(struct device *dev)
struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
int ret = 0;
- rdev->suspend_at = get_seconds();
+ rdev->suspend_at = ktime_get_boottime_seconds();
rtnl_lock();
if (rdev->wiphy.registered) {
@@ -130,7 +130,7 @@ static int wiphy_resume(struct device *dev)
int ret = 0;
/* Age scan results with time spent in suspend */
- cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at);
+ cfg80211_bss_age(rdev, ktime_get_boottime_seconds() - rdev->suspend_at);
rtnl_lock();
if (rdev->wiphy.registered && rdev->ops->resume)
--
2.9.0
Hi,
I just wanted to check if you would be interested in a list of Managed
Service Providers (MSPs) and Managed Security Service Providers (MSSPs)?
• Managed Service Providers (MSP’s) – 25,000 unique companies
• Managed Security Service Providers (MSSP’s) – 7,520 unique
companies
IT Decision Makers – 6million
Business Decision Makers – 10 million
Kindly review and let me know if I can share more information on this.
I look forward to hearing from you.
Regards,
Diana
MSP List Specialist
For Opt-Out reply with “Not Interested”.
The nes infiniband driver uses current_kernel_time() to get a nanosecond
granunarity timestamp to initialize its tcp sequence counters. This is
one of only a few remaining users of that deprecated function, so we
should try to get rid of it.
Aside from using a deprecated API, there are several problems I see here:
- Using a CLOCK_REALTIME based time source makes it predictable in
case the time base is synchronized.
- Using a coarse timestamp means it only gets updated once per jiffie,
making it even more predictable in order to avoid having to access
the hardware clock source
- The upper 2 bits are always zero because the nanoseconds are at most
999999999.
For the Linux TCP implementation, we use secure_tcp_seq(), which appears
to be appropriate here as well, and solves all the above problems.
I'm doing the same change in both versions of the nes driver, with
i40iw being a later copy of the same code.
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
---
The above change is just a guess at what it should look like,
please review carefully and Ack/Nak as appropriate.
---
drivers/infiniband/hw/i40iw/i40iw_cm.c | 8 +++++---
drivers/infiniband/hw/nes/nes_cm.c | 8 +++++---
net/core/secure_seq.c | 1 +
3 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 7b2655128b9f..da221d07f2dd 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -57,6 +57,7 @@
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <net/ip_fib.h>
+#include <net/secure_seq.h>
#include <net/tcp.h>
#include <asm/checksum.h>
@@ -2164,7 +2165,6 @@ static struct i40iw_cm_node *i40iw_make_cm_node(
struct i40iw_cm_listener *listener)
{
struct i40iw_cm_node *cm_node;
- struct timespec ts;
int oldarpindex;
int arpindex;
struct net_device *netdev = iwdev->netdev;
@@ -2214,8 +2214,10 @@ static struct i40iw_cm_node *i40iw_make_cm_node(
cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
cm_node->tcp_cntxt.rcv_wnd =
I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE;
- ts = current_kernel_time();
- cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec;
+ cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr[0]),
+ htonl(cm_node->rem_addr[0]),
+ htons(cm_node->loc_port),
+ htons(cm_node->rem_port));
cm_node->tcp_cntxt.mss = (cm_node->ipv4) ? (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4) :
(iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6);
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 6cdfbf8c5674..2b67ace5b614 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -58,6 +58,7 @@
#include <net/neighbour.h>
#include <net/route.h>
#include <net/ip_fib.h>
+#include <net/secure_seq.h>
#include <net/tcp.h>
#include <linux/fcntl.h>
@@ -1445,7 +1446,6 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
struct nes_cm_listener *listener)
{
struct nes_cm_node *cm_node;
- struct timespec ts;
int oldarpindex = 0;
int arpindex = 0;
struct nes_device *nesdev;
@@ -1496,8 +1496,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >>
NES_CM_DEFAULT_RCV_WND_SCALE;
- ts = current_kernel_time();
- cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec);
+ cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr),
+ htonl(cm_node->rem_addr),
+ htons(cm_node->loc_port),
+ htons(cm_node->rem_port));
cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) -
sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN;
cm_node->tcp_cntxt.rcv_nxt = 0;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 7232274de334..af6ad467ed61 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -140,6 +140,7 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
&net_secret);
return seq_scale(hash);
}
+EXPORT_SYMBOL_GPL(secure_tcp_seq);
u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
--
2.9.0
As Dave Chinner points out, we don't have a proper documentation for the
ktime_get() family of interfaces, making it rather unclear which of the
over 30 (!) interfaces one should actually use in a driver or elsewhere
in the kernel.
I wrote up an explanation from how I personally see the interfaces,
documenting what each of the functions do and hopefully making it a bit
clearer which should be used where.
This is the first time I tried writing .rst format documentation, so
in addition to any mistakes in the content, I probably also introduce
nonstandard formatting ;-)
I first tried to add an extra section to
Documentation/timers/timekeeping.txt, but this is currently not included
in the generated API, and it seems useful to have the API docs as part
of what gets generated in
https://www.kernel.org/doc/html/latest/core-api/index.html#core-utilities
instead, so I started a new file there.
I also considered adding the documentation inline in the
include/linux/timekeeping.h header, but couldn't figure out how to do
that in a way that would result both in helpful inline comments as
well as readable html output, so I settled for the latter, with
a small note pointing to it from the header.
Cc: Dave Chinner <david(a)fromorbit.com>
Cc: John Stultz <john.stultz(a)linaro.org>
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Stephen Boyd <sboyd(a)kernel.org>
Cc: Linus Walleij <linus.walleij(a)linaro.org>
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
---
Documentation/core-api/index.rst | 1 +
Documentation/core-api/timekeeping.rst | 185 +++++++++++++++++++++++++++++++++
include/linux/timekeeping.h | 15 +++
3 files changed, 201 insertions(+)
create mode 100644 Documentation/core-api/timekeeping.rst
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index f5a66b72f984..989c97cc232a 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -28,6 +28,7 @@ Core utilities
printk-formats
circular-buffers
gfp_mask-from-fs-io
+ timekeeping
Interfaces for kernel debugging
===============================
diff --git a/Documentation/core-api/timekeeping.rst b/Documentation/core-api/timekeeping.rst
new file mode 100644
index 000000000000..97dafa69dddf
--- /dev/null
+++ b/Documentation/core-api/timekeeping.rst
@@ -0,0 +1,185 @@
+ktime access
+============
+
+Device drivers can read the current time using ktime_get() and the many
+related functions declared in linux/timekeeping.h. As a rule of thumb,
+using an accessor with a shorter name is preferred over one with a longer
+name if both are equally fit for a particular use case.
+
+Basic ktime_t based interfaces
+------------------------------
+
+The recommended simplest form returns an opaque ktime_t, with variants
+that return time for different clock references:
+
+
+.. c:function:: ktime_t ktime_get( void )
+
+ CLOCK_MONOTONIC
+
+ Useful for reliable timestamps and measuring short time intervals
+ accurately. Starts at system boot time but stops during suspend.
+
+.. c:function:: ktime_t ktime_get_boottime( void )
+
+ CLOCK_BOOTTIME
+
+ Like ktime_get(), but does not stop when suspended. This can be
+ used e.g. for key expiration times that need to be synchronized
+ with other machines across a suspend operation.
+
+.. c:function:: ktime_t ktime_get_real( void )
+
+ CLOCK_REALTIME
+
+ Returns the time in relative to the UNIX epoch starting in 1970
+ using the Coordinated Universal Time (UTC), same as gettimeofday()
+ user space. This is used for all timestamps that need to
+ persist across a reboot, like inode times, but should be avoided
+ for internal uses, since it can jump backwards due to a leap
+ second update, NTP adjustment settimeofday() operation from user
+ space.
+
+.. c:function:: ktime_t ktime_get_clocktai( void )
+
+ CLOCK_TAI
+
+ Like ktime_get_real(), but uses the International Atomic Time (TAI)
+ reference instead of UTC to avoid jumping on leap second updates.
+ This is rarely useful in the kernel.
+
+.. c:function:: ktime_t ktime_get_raw( void )
+
+ CLOCK_MONOTONIC_RAW
+
+ Like ktime_get(), but runs at the same rate as the hardware
+ clocksource without (NTP) adjustments for clock drift. This is
+ also rarely needed in the kernel.
+
+nanosecond, timespec64, and second output
+-------------------------------------
+
+For all of the above, there are variants that return the time in a
+different format depending on what is required by the user:
+
+.. c:function:: u64 ktime_get_ns( void )
+ u64 ktime_get_boottime_ns( void )
+ u64 ktime_get_real_ns( void )
+ u64 ktime_get_tai_ns( void )
+ u64 ktime_get_raw_ns( void )
+
+ Same as the plain ktime_get functions, but returning a u64 number
+ of nanoseconds in the respective time reference, which may be
+ more convenient for some callers.
+
+.. c:function:: void ktime_get_ts64( struct timespec64 * )
+ void ktime_get_boottime_ts64( struct timespec64 * )
+ void ktime_get_real_ts64( struct timespec64 * )
+ void ktime_get_clocktai_ts64( struct timespec64 * )
+ void ktime_get_raw_ts64( struct timespec64 * )
+
+ Same above, but returns the time in a 'struct timespec64', split
+ into seconds and nanoseconds. This can avoid an extra division
+ when printing the time, or when passing it into an external
+ interface that expects a 'timespec' or 'timeval' structure.
+
+.. c:function:: time64_t ktime_get_seconds( void )
+ time64_t ktime_get_boottime_seconds( void )
+ time64_t ktime_get_real_seconds( void )
+ time64_t ktime_get_clocktai_seconds( void )
+ time64_t ktime_get_raw_seconds( void )
+
+ Return a coarse-grained version of the time as a scalar
+ time64_t. This avoids accessing the clock hardware and rounds
+ down the seconds to the full seconds of the last timer tick
+ using the respective reference.
+
+Coarse and fast_ns access
+-------------------------
+
+Some additional variants exist for more specialized cases:
+
+.. c:function:: ktime_t ktime_get_coarse_boottime( void )
+ ktime_t ktime_get_coarse_real( void )
+ ktime_t ktime_get_coarse_clocktai( void )
+ ktime_t ktime_get_coarse_raw( void )
+
+.. c:function:: void ktime_get_coarse_ts64( struct timespec64 * )
+ void ktime_get_coarse_boottime_ts64( struct timespec64 * )
+ void ktime_get_coarse_real_ts64( struct timespec64 * )
+ void ktime_get_coarse_clocktai_ts64( struct timespec64 * )
+ void ktime_get_coarse_raw_ts64( struct timespec64 * )
+
+ These are quicker than the non-coarse versions, but less accurate,
+ corresponding to CLOCK_MONONOTNIC_COARSE and CLOCK_REALTIME_COARSE
+ in user space, along with the equivalent boottime/tai/raw
+ timebase not available in user space.
+
+ The time returned here corresponds to the last timer tick, which
+ may be as much as 10ms in the past (for CONFIG_HZ=100), same as
+ reading the 'jiffies' variable. These are only useful when called
+ in a fast path and one still expects better than second accuracy,
+ but can't easily use 'jiffies', e.g. for inode timestamps.
+ Skipping the hardware clock access saves around 100 CPU cycles
+ on most modern machines with a reliable cycle counter, but
+ up to several microseconds on older hardware with an external
+ clocksource.
+
+.. c:function:: u64 ktime_get_mono_fast_ns( void )
+ u64 ktime_get_raw_fast_ns( void )
+ u64 ktime_get_boot_fast_ns( void )
+ u64 ktime_get_real_fast_ns( void )
+
+ These variants are safe to call from any context, including from
+ a non-maskable interrupt (NMI) during a timekeeper update, and
+ while we are entering suspend with the clocksource powered down.
+ This is useful in some tracing or debugging code as well as
+ machine check reporting, but most drivers should never call them,
+ since the time is allowed to jump under certain conditions.
+
+Deprecated time interfaces
+--------------------------
+
+Older kernels used some other interfaces that are now being phased out
+but may appear in third-party drivers being ported here. In particular,
+all interfaces returning a 'struct timeval' or 'struct timespec' have
+been replaced because the tv_sec member overflows in year 2038 on 32-bit
+architectures. These are the recommended replacements:
+
+.. c:function:: void ktime_get_ts( struct timespec * )
+
+ Use ktime_get() or ktime_get_ts64() instead.
+
+.. c:function:: struct timeval do_gettimeofday( void )
+ struct timespec getnstimeofday( void )
+ struct timespec64 getnstimeofday64( void )
+ void ktime_get_real_ts( struct timespec * )
+
+ ktime_get_real_ts64() is a direct replacement, but consider using
+ monotonic time (ktime_get_ts64()) and/or a ktime_t based interface
+ (ktime_get()/ktime_get_real()).
+
+.. c:function:: struct timespec current_kernel_time( void )
+ struct timespec64 current_kernel_time64( void )
+ struct timespec get_monotonic_coarse( void )
+ struct timespec64 get_monotonic_coarse64( void )
+
+ These are replaced by ktime_get_coarse_real_ts64() and
+ ktime_get_coarse_ts64(). However, A lot of code that wants
+ coarse-grained times can use the simple 'jiffies' instead, while
+ some drivers may actually want the higher resolution accessors
+ these days.
+
+.. c:function:: struct timespec getrawmonotonic( void )
+ struct timespec64 getrawmonotonic64( void )
+ struct timespec timekeeping_clocktai( void )
+ struct timespec64 timekeeping_clocktai64( void )
+ struct timespec get_monotonic_boottime( void )
+ struct timespec64 get_monotonic_boottime64( void )
+
+ These are replaced by ktime_get_raw()/ktime_get_raw_ts64(),
+ ktime_get_clocktai()/ktime_get_clocktai_ts64() as well
+ as ktime_get_boottime()/ktime_get_boottime_ts64().
+ However, if the particular choice of clock source is not
+ important for the user, consider converting to
+ ktime_get()/ktime_get_ts64() instead for consistency.
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 86bc2026efce..947b1b8d2d01 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -21,6 +21,21 @@ extern int do_sys_settimeofday64(const struct timespec64 *tv,
const struct timezone *tz);
/*
+ * ktime_get() family: read the current time in a multitude of ways,
+ *
+ * The default time reference is CLOCK_MONOTONIC, starting at
+ * boot time but not counting the time spent in suspend.
+ * For other references, use the functions with "real", "clocktai",
+ * "boottime" and "raw" suffixes.
+ *
+ * To get the time in a different format, use the ones wit
+ * "ns", "ts64" and "seconds" suffix.
+ *
+ * See Documentation/core-api/timekeeping.rst for more details.
+ */
+
+
+/*
* timespec64 based interfaces
*/
extern void ktime_get_raw_ts64(struct timespec64 *ts);
--
2.9.0
current_time is one of the few callers of current_kernel_time64(), which
is a wrapper around ktime_get_coarse_real_ts64(). This calls the latter
directly for consistency with the rest of the kernel that is moving to
the ktime_get_ family of time accessors.
An open questions is whether we may want to actually call the more
accurate ktime_get_real_ts64() for file systems that save high-resolution
timestamps in their on-disk format. This would add a small but measurable
overhead to each update of the inode stamps but lead to inode timestamps
to actually have a usable resolution better than one jiffy (1 to 10
milliseconds normally).
I traced the original addition of the current_kernel_time() call to set
the nanosecond fields back to linux-2.5.48, where Andi Kleen added a
patch with subject "nanosecond stat timefields". This adds the original
call to current_kernel_time and the truncation to the resolution of the
file system, but makes no mention of the intended accuracy. At the time,
we had a do_gettimeofday() interface that on some architectures could
return a microsecond-resolution timestamp, but there was no interface
for getting an accurate timestamp in nanosecond resolution, neither inside
the kernel nor from user space. This makes me suspect that the use of
coarse timestamps was never really a conscious decision but instead
a result of whatever API was available 16 years ago.
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
---
fs/inode.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/inode.c b/fs/inode.c
index 2c300e981796..e27bd9334939 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2133,7 +2133,9 @@ EXPORT_SYMBOL(timespec64_trunc);
*/
struct timespec64 current_time(struct inode *inode)
{
- struct timespec64 now = current_kernel_time64();
+ struct timespec64 now;
+
+ ktime_get_coarse_real_ts64(&now);
if (unlikely(!inode->i_sb)) {
WARN(1, "current_time() called with uninitialized super_block in the inode");
--
2.9.0
get_seconds() can overflow on 32-bit architectures and is deprecated
because of that. The use in the aacraid driver has the same problem due
to a limited firmware interface, it also overflows in the year 2106.
This changes all calls to get_seconds() to the non-deprecated
ktime_get_real_seconds(), which unfortunately doesn't solve that problem
but gets rid of one user of the deprecated interface.
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
---
drivers/scsi/aacraid/rx.c | 2 +-
drivers/scsi/aacraid/sa.c | 2 +-
drivers/scsi/aacraid/src.c | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c
index 620166694171..576cdf9cc120 100644
--- a/drivers/scsi/aacraid/rx.c
+++ b/drivers/scsi/aacraid/rx.c
@@ -319,7 +319,7 @@ static void aac_rx_start_adapter(struct aac_dev *dev)
union aac_init *init;
init = dev->init;
- init->r7.host_elapsed_seconds = cpu_to_le32(get_seconds());
+ init->r7.host_elapsed_seconds = cpu_to_le32(ktime_get_real_seconds());
// We can only use a 32 bit address here
rx_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS, (u32)(ulong)dev->init_pa,
0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL);
diff --git a/drivers/scsi/aacraid/sa.c b/drivers/scsi/aacraid/sa.c
index 882f40353b96..efa96c1c6aa3 100644
--- a/drivers/scsi/aacraid/sa.c
+++ b/drivers/scsi/aacraid/sa.c
@@ -251,7 +251,7 @@ static void aac_sa_start_adapter(struct aac_dev *dev)
* Fill in the remaining pieces of the init.
*/
init = dev->init;
- init->r7.host_elapsed_seconds = cpu_to_le32(get_seconds());
+ init->r7.host_elapsed_seconds = cpu_to_le32(ktime_get_real_seconds());
/* We can only use a 32 bit address here */
sa_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS,
(u32)(ulong)dev->init_pa, 0, 0, 0, 0, 0,
diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c
index 4ebb35a29caa..5a299975a289 100644
--- a/drivers/scsi/aacraid/src.c
+++ b/drivers/scsi/aacraid/src.c
@@ -409,7 +409,7 @@ static void aac_src_start_adapter(struct aac_dev *dev)
init = dev->init;
if (dev->comm_interface == AAC_COMM_MESSAGE_TYPE3) {
- init->r8.host_elapsed_seconds = cpu_to_le32(get_seconds());
+ init->r8.host_elapsed_seconds = cpu_to_le32(ktime_get_real_seconds());
src_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS,
lower_32_bits(dev->init_pa),
upper_32_bits(dev->init_pa),
@@ -417,7 +417,7 @@ static void aac_src_start_adapter(struct aac_dev *dev)
(AAC_MAX_HRRQ - 1) * sizeof(struct _rrq),
0, 0, 0, NULL, NULL, NULL, NULL, NULL);
} else {
- init->r7.host_elapsed_seconds = cpu_to_le32(get_seconds());
+ init->r7.host_elapsed_seconds = cpu_to_le32(ktime_get_real_seconds());
// We can only use a 32 bit address here
src_sync_cmd(dev, INIT_STRUCT_BASE_ADDRESS,
(u32)(ulong)dev->init_pa, 0, 0, 0, 0, 0,
--
2.9.0
'struct rusage' contains the run times of a process in 'timeval' format
and is accessed through the wait4() and getrusage() system calls. This
is not a problem for y2038 safety by itself, but causes an issue when
the C library starts using 64-bit time_t on 32-bit architectures because
the structure layout becomes incompatible.
There are three possible ways of dealing with this:
a) deprecate the wait4() and getrusage() system calls, and create
a set of kernel interfaces based around a newly defined structure that
could solve multiple problems at once, e.g. provide more fine-grained
timestamps. The C library could then implement the posix interfaces
on top of the new system calls.
b) Extend the approach taken by the x32 ABI, and use the 64-bit
native structure layout for rusage on all architectures with new
system calls that is otherwise compatible. A downside of this
is that it requires a number of ugly hacks to deal with all the
other fields of the structure also becoming 64 bit wide.
Especially on big-endian architectures, we can't easily use the
union trick from glibc.
c) Change the definition of struct rusage to be independent of
time_t. This is the easiest change, as it does not involve new system
call entry points, but it requires the C library to convert between
the kernel format of the structure and the user space definition.
d) Add a new ABI variant of 'struct rusage' that corresponds to the
current layout with 32-bit counters but 64-bit time_t. This would
minimize the libc changes but require additional kernel code to
handle a third binary layout on 64-bit kernels.
I'm picking approach c) for its simplicity. As pointed out by reviewers,
simply using the kernel structure in user space would not be POSIX
compliant, but I have verified that none of the usual C libraries (glibc,
musl, uclibc-ng, newlib) do that. Instead, they all provide their own
definition of 'struct rusage' to applications in sys/resource.h.
To be on the safe side, I'm only changing the definition inside of
the kernel and for user space with an updated 'time_t'. All existing
users will see the traditional layout that is compatible with what the
C libraries export. A 32-bit application that includes linux/resource.h
but uses an update C library with 64-bit time_t will now see the low-level
kernel structure that corresponds to the getrusage() system call interface
but that will be different from one defined in sys/resource.h for the
getrusage library interface.
Link: https://patchwork.kernel.org/patch/10077527/
Cc: Paul Eggert <eggert(a)cs.ucla.edu>
Cc: Eric W. Biederman <ebiederm(a)xmission.com>
Signed-off-by: Arnd Bergmann <arnd(a)arndb.de>
---
arch/alpha/kernel/osf_sys.c | 15 +++++++++------
include/uapi/linux/resource.h | 14 ++++++++++++--
kernel/sys.c | 4 ++--
3 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 89faa6f4de47..cad03ee445b3 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -1184,6 +1184,7 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options,
struct rusage32 __user *, ur)
{
unsigned int status = 0;
+ struct rusage32 r32;
struct rusage r;
long err = kernel_wait4(pid, &status, options, &r);
if (err <= 0)
@@ -1192,12 +1193,14 @@ SYSCALL_DEFINE4(osf_wait4, pid_t, pid, int __user *, ustatus, int, options,
return -EFAULT;
if (!ur)
return err;
- if (put_tv_to_tv32(&ur->ru_utime, &r.ru_utime))
- return -EFAULT;
- if (put_tv_to_tv32(&ur->ru_stime, &r.ru_stime))
- return -EFAULT;
- if (copy_to_user(&ur->ru_maxrss, &r.ru_maxrss,
- sizeof(struct rusage32) - offsetof(struct rusage32, ru_maxrss)))
+ r32.ru_utime.tv_sec = r.ru_utime.tv_sec;
+ r32.ru_utime.tv_usec = r.ru_utime.tv_usec;
+ r32.ru_stime.tv_sec = r.ru_stime.tv_sec;
+ r32.ru_stime.tv_usec = r.ru_stime.tv_usec;
+ memcpy(&r32.ru_maxrss, &r.ru_maxrss,
+ sizeof(struct rusage32) - offsetof(struct rusage32, ru_maxrss));
+
+ if (copy_to_user(ur, &r32, sizeof(r32)))
return -EFAULT;
return err;
}
diff --git a/include/uapi/linux/resource.h b/include/uapi/linux/resource.h
index cc00fd079631..611d3745c70a 100644
--- a/include/uapi/linux/resource.h
+++ b/include/uapi/linux/resource.h
@@ -22,8 +22,18 @@
#define RUSAGE_THREAD 1 /* only the calling thread */
struct rusage {
- struct timeval ru_utime; /* user time used */
- struct timeval ru_stime; /* system time used */
+#if (__BITS_PER_LONG != 32 || !defined(__USE_TIME_BITS64)) && !defined(__KERNEL__)
+ struct timeval ru_utime; /* user time used */
+ struct timeval ru_stime; /* system time used */
+#else
+ /*
+ * For 32-bit user space with 64-bit time_t, the binary layout
+ * in these fields is incompatible with 'struct timeval', so the
+ * C library has to translate this into the POSIX compatible layout.
+ */
+ struct __kernel_old_timeval ru_utime;
+ struct __kernel_old_timeval ru_stime;
+#endif
__kernel_long_t ru_maxrss; /* maximum resident set size */
__kernel_long_t ru_ixrss; /* integral shared memory size */
__kernel_long_t ru_idrss; /* integral unshared data size */
diff --git a/kernel/sys.c b/kernel/sys.c
index ad692183dfe9..1de538f622e8 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1769,8 +1769,8 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
unlock_task_sighand(p, &flags);
out:
- r->ru_utime = ns_to_timeval(utime);
- r->ru_stime = ns_to_timeval(stime);
+ r->ru_utime = ns_to_kernel_old_timeval(utime);
+ r->ru_stime = ns_to_kernel_old_timeval(stime);
if (who != RUSAGE_CHILDREN) {
struct mm_struct *mm = get_task_mm(p);
--
2.9.0