From: Steven Rostedt (VMware) rostedt@goodmis.org
commit 785e3c0a3a870e72dc530856136ab4c8dd207128 upstream.
The default max PID is set by PID_MAX_DEFAULT, and the tracing infrastructure uses this number to map PIDs to the comm names of the tasks, such output of the trace can show names from the recorded PIDs in the ring buffer. This mapping is also exported to user space via the "saved_cmdlines" file in the tracefs directory.
But currently the mapping expects the PIDs to be less than PID_MAX_DEFAULT, which is the default maximum and not the real maximum. Recently, systemd will increases the maximum value of a PID on the system, and when tasks are traced that have a PID higher than PID_MAX_DEFAULT, its comm is not recorded. This leads to the entire trace to have "<...>" as the comm name, which is pretty useless.
Instead, keep the array mapping the size of PID_MAX_DEFAULT, but instead of just mapping the index to the comm, map a mask of the PID (PID_MAX_DEFAULT - 1) to the comm, and find the full PID from the map_cmdline_to_pid array (that already exists).
This bug goes back to the beginning of ftrace, but hasn't been an issue until user space started increasing the maximum value of PIDs.
Link: https://lkml.kernel.org/r/20210427113207.3c601884@gandalf.local.home
Cc: stable@vger.kernel.org Fixes: bc0c38d139ec7 ("ftrace: latency tracer infrastructure") Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- kernel/trace/trace.c | 41 +++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 26 deletions(-)
--- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1562,14 +1562,13 @@ void trace_stop_cmdline_recording(void);
static int trace_save_cmdline(struct task_struct *tsk) { - unsigned pid, idx; + unsigned tpid, idx;
/* treat recording of idle task as a success */ if (!tsk->pid) return 1;
- if (unlikely(tsk->pid > PID_MAX_DEFAULT)) - return 0; + tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
/* * It's not the end of the world if we don't get @@ -1580,26 +1579,15 @@ static int trace_save_cmdline(struct tas if (!arch_spin_trylock(&trace_cmdline_lock)) return 0;
- idx = savedcmd->map_pid_to_cmdline[tsk->pid]; + idx = savedcmd->map_pid_to_cmdline[tpid]; if (idx == NO_CMDLINE_MAP) { idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
- /* - * Check whether the cmdline buffer at idx has a pid - * mapped. We are going to overwrite that entry so we - * need to clear the map_pid_to_cmdline. Otherwise we - * would read the new comm for the old pid. - */ - pid = savedcmd->map_cmdline_to_pid[idx]; - if (pid != NO_CMDLINE_MAP) - savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP; - - savedcmd->map_cmdline_to_pid[idx] = tsk->pid; - savedcmd->map_pid_to_cmdline[tsk->pid] = idx; - + savedcmd->map_pid_to_cmdline[tpid] = idx; savedcmd->cmdline_idx = idx; }
+ savedcmd->map_cmdline_to_pid[idx] = tsk->pid; set_cmdline(idx, tsk->comm);
arch_spin_unlock(&trace_cmdline_lock); @@ -1610,6 +1598,7 @@ static int trace_save_cmdline(struct tas static void __trace_find_cmdline(int pid, char comm[]) { unsigned map; + int tpid;
if (!pid) { strcpy(comm, "<idle>"); @@ -1621,16 +1610,16 @@ static void __trace_find_cmdline(int pid return; }
- if (pid > PID_MAX_DEFAULT) { - strcpy(comm, "<...>"); - return; + tpid = pid & (PID_MAX_DEFAULT - 1); + map = savedcmd->map_pid_to_cmdline[tpid]; + if (map != NO_CMDLINE_MAP) { + tpid = savedcmd->map_cmdline_to_pid[map]; + if (tpid == pid) { + strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN); + return; + } } - - map = savedcmd->map_pid_to_cmdline[pid]; - if (map != NO_CMDLINE_MAP) - strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN); - else - strcpy(comm, "<...>"); + strcpy(comm, "<...>"); }
void trace_find_cmdline(int pid, char comm[])