June 2020 - Linux-stable-mirror

[PATCH 2/4] writeback: Avoid skipping inode writeback

by Jan Kara

Inode's i_io_list list head is used to attach inode to several different lists - wb->{b_dirty, b_dirty_time, b_io, b_more_io}. When flush worker prepares a list of inodes to writeback e.g. for sync(2), it moves inodes to b_io list. Thus it is critical for sync(2) data integrity guarantees that inode is not requeued to any other writeback list when inode is queued for processing by flush worker. That's the reason why writeback_single_inode() does not touch i_io_list (unless the inode is completely clean) and why __mark_inode_dirty() does not touch i_io_list if I_SYNC flag is set. However there are two flaws in the current logic: 1) When inode has only I_DIRTY_TIME set but it is already queued in b_io list due to sync(2), concurrent __mark_inode_dirty(inode, I_DIRTY_SYNC) can still move inode back to b_dirty list resulting in skipping writeback of inode time stamps during sync(2). 2) When inode is on b_dirty_time list and writeback_single_inode() races with __mark_inode_dirty() like: writeback_single_inode() __mark_inode_dirty(inode, I_DIRTY_PAGES) inode->i_state |= I_SYNC __writeback_single_inode() inode->i_state |= I_DIRTY_PAGES; if (inode->i_state & I_SYNC) bail if (!(inode->i_state & I_DIRTY_ALL)) - not true so nothing done We end up with I_DIRTY_PAGES inode on b_dirty_time list and thus standard background writeback will not writeback this inode leading to possible dirty throttling stalls etc. (thanks to Martijn Coenen for this analysis). Fix these problems by tracking whether inode is queued in b_io or b_more_io lists in a new I_SYNC_QUEUED flag. When this flag is set, we know flush worker has queued inode and we should not touch i_io_list. On the other hand we also know that once flush worker is done with the inode it will requeue the inode to appropriate dirty list. When I_SYNC_QUEUED is not set, __mark_inode_dirty() can (and must) move inode to appropriate dirty list. Reported-by: Martijn Coenen <maco(a)android.com> Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option") CC: stable(a)vger.kernel.org Signed-off-by: Jan Kara <jack(a)suse.cz> --- fs/fs-writeback.c | 17 ++++++++++++----- include/linux/fs.h | 8 ++++++-- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ff0b18331590..f470c10641c5 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -146,6 +146,7 @@ static void inode_io_list_del_locked(struct inode *inode, assert_spin_locked(&wb->list_lock); assert_spin_locked(&inode->i_lock); + inode->i_state &= ~I_SYNC_QUEUED; list_del_init(&inode->i_io_list); wb_io_lists_depopulated(wb); } @@ -1187,6 +1188,7 @@ static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb) inode->dirtied_when = jiffies; } inode_io_list_move_locked(inode, wb, &wb->b_dirty); + inode->i_state &= ~I_SYNC_QUEUED; } static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) @@ -1262,8 +1264,11 @@ static int move_expired_inodes(struct list_head *delaying_queue, break; list_move(&inode->i_io_list, &tmp); moved++; + spin_lock(&inode->i_lock); if (flags & EXPIRE_DIRTY_ATIME) - set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state); + inode->i_state |= I_DIRTY_TIME_EXPIRED; + inode->i_state |= I_SYNC_QUEUED; + spin_unlock(&inode->i_lock); if (sb_is_blkdev_sb(inode->i_sb)) continue; if (sb && sb != inode->i_sb) @@ -1438,6 +1443,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, } else if (inode->i_state & I_DIRTY_TIME) { inode->dirtied_when = jiffies; inode_io_list_move_locked(inode, wb, &wb->b_dirty_time); + inode->i_state &= ~I_SYNC_QUEUED; } else { /* The inode is clean. Remove from writeback lists. */ inode_io_list_del_locked(inode, wb); @@ -2301,11 +2307,12 @@ void __mark_inode_dirty(struct inode *inode, int flags) inode->i_state |= flags; /* - * If the inode is being synced, just update its dirty state. - * The unlocker will place the inode on the appropriate - * superblock list, based upon its state. + * If the inode is queued for writeback by flush worker, just + * update its dirty state. Once the flush worker is done with + * the inode it will place it on the appropriate superblock + * list, based upon its state. */ - if (inode->i_state & I_SYNC) + if (inode->i_state & I_SYNC_QUEUED) goto out_unlock_inode; /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 19ef6c88c152..48556efcdcf0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2157,6 +2157,10 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * * I_DONTCACHE Evict inode as soon as it is not used anymore. * + * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists. + * Used to detect that mark_inode_dirty() should not move + * inode between dirty lists. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -2174,12 +2178,12 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) -#define __I_DIRTY_TIME_EXPIRED 12 -#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED) +#define I_DIRTY_TIME_EXPIRED (1 << 12) #define I_WB_SWITCH (1 << 13) #define I_OVL_INUSE (1 << 14) #define I_CREATING (1 << 15) #define I_DONTCACHE (1 << 16) +#define I_SYNC_QUEUED (1 << 17) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) -- 2.16.4

5 years, 6 months

3
4
0 0

[PATCH] drm/i915/icl+: Fix hotplug interrupt disabling after storm detection

by Imre Deak

Atm, hotplug interrupts on TypeC ports are left enabled after detecting an interrupt storm, fix this. Reported-by: Kunal Joshi <kunal1.joshi(a)intel.com> References: https://gitlab.freedesktop.org/drm/intel/-/issues/351 Bugzilla: https://gitlab.freedesktop.org/drm/intel/-/issues/1964 Cc: Kunal Joshi <kunal1.joshi(a)intel.com> Cc: stable(a)vger.kernel.org Signed-off-by: Imre Deak <imre.deak(a)intel.com> --- drivers/gpu/drm/i915/i915_irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8e823ba25f5f..710224d930c5 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3132,6 +3132,7 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) val = I915_READ(GEN11_DE_HPD_IMR); val &= ~hotplug_irqs; + val |= ~enabled_irqs & hotplug_irqs; I915_WRITE(GEN11_DE_HPD_IMR, val); POSTING_READ(GEN11_DE_HPD_IMR); -- 2.23.1

5 years, 6 months

2
2
0 0

[PATCH 3/4] writeback: Fix sync livelock due to b_dirty_time processing

by Jan Kara

When we are processing writeback for sync(2), move_expired_inodes() didn't set any inode expiry value (older_than_this). This can result in writeback never completing if there's steady stream of inodes added to b_dirty_time list as writeback rechecks dirty lists after each writeback round whether there's more work to be done. Fix the problem by using sync(2) start time is inode expiry value when processing b_dirty_time list similarly as for ordinarily dirtied inodes. This requires some refactoring of older_than_this handling which simplifies the code noticeably as a bonus. Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option") CC: stable(a)vger.kernel.org Signed-off-by: Jan Kara <jack(a)suse.cz> --- fs/fs-writeback.c | 44 ++++++++++++++++------------------------ include/trace/events/writeback.h | 13 ++++++------ 2 files changed, 23 insertions(+), 34 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f470c10641c5..ae17d64a3e18 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -42,7 +42,6 @@ struct wb_writeback_work { long nr_pages; struct super_block *sb; - unsigned long *older_than_this; enum writeback_sync_modes sync_mode; unsigned int tagged_writepages:1; unsigned int for_kupdate:1; @@ -1234,16 +1233,13 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) #define EXPIRE_DIRTY_ATIME 0x0001 /* - * Move expired (dirtied before work->older_than_this) dirty inodes from + * Move expired (dirtied before dirtied_before) dirty inodes from * @delaying_queue to @dispatch_queue. */ static int move_expired_inodes(struct list_head *delaying_queue, struct list_head *dispatch_queue, - int flags, - struct wb_writeback_work *work) + int flags, unsigned long dirtied_before) { - unsigned long *older_than_this = NULL; - unsigned long expire_time; LIST_HEAD(tmp); struct list_head *pos, *node; struct super_block *sb = NULL; @@ -1251,16 +1247,9 @@ static int move_expired_inodes(struct list_head *delaying_queue, int do_sb_sort = 0; int moved = 0; - if ((flags & EXPIRE_DIRTY_ATIME) == 0) - older_than_this = work->older_than_this; - else if (!work->for_sync) { - expire_time = jiffies - (dirtytime_expire_interval * HZ); - older_than_this = &expire_time; - } while (!list_empty(delaying_queue)) { inode = wb_inode(delaying_queue->prev); - if (older_than_this && - inode_dirtied_after(inode, *older_than_this)) + if (inode_dirtied_after(inode, dirtied_before)) break; list_move(&inode->i_io_list, &tmp); moved++; @@ -1306,18 +1295,22 @@ static int move_expired_inodes(struct list_head *delaying_queue, * | * +--> dequeue for IO */ -static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work) +static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work, + unsigned long dirtied_before) { int moved; + unsigned long time_expire_jif = dirtied_before; assert_spin_locked(&wb->list_lock); list_splice_init(&wb->b_more_io, &wb->b_io); - moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work); + moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, dirtied_before); + if (!work->for_sync) + time_expire_jif = jiffies - dirtytime_expire_interval * HZ; moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io, - EXPIRE_DIRTY_ATIME, work); + EXPIRE_DIRTY_ATIME, time_expire_jif); if (moved) wb_io_lists_populated(wb); - trace_writeback_queue_io(wb, work, moved); + trace_writeback_queue_io(wb, work, dirtied_before, moved); } static int write_inode(struct inode *inode, struct writeback_control *wbc) @@ -1829,7 +1822,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, blk_start_plug(&plug); spin_lock(&wb->list_lock); if (list_empty(&wb->b_io)) - queue_io(wb, &work); + queue_io(wb, &work, jiffies); __writeback_inodes_wb(wb, &work); spin_unlock(&wb->list_lock); blk_finish_plug(&plug); @@ -1849,7 +1842,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, * takes longer than a dirty_writeback_interval interval, then leave a * one-second gap. * - * older_than_this takes precedence over nr_to_write. So we'll only write back + * dirtied_before takes precedence over nr_to_write. So we'll only write back * all dirty pages if they are all attached to "old" mappings. */ static long wb_writeback(struct bdi_writeback *wb, @@ -1857,14 +1850,11 @@ static long wb_writeback(struct bdi_writeback *wb, { unsigned long wb_start = jiffies; long nr_pages = work->nr_pages; - unsigned long oldest_jif; + unsigned long dirtied_before = jiffies; struct inode *inode; long progress; struct blk_plug plug; - oldest_jif = jiffies; - work->older_than_this = &oldest_jif; - blk_start_plug(&plug); spin_lock(&wb->list_lock); for (;;) { @@ -1898,14 +1888,14 @@ static long wb_writeback(struct bdi_writeback *wb, * safe. */ if (work->for_kupdate) { - oldest_jif = jiffies - + dirtied_before = jiffies - msecs_to_jiffies(dirty_expire_interval * 10); } else if (work->for_background) - oldest_jif = jiffies; + dirtied_before = jiffies; trace_writeback_start(wb, work); if (list_empty(&wb->b_io)) - queue_io(wb, work); + queue_io(wb, work, dirtied_before); if (work->sb) progress = writeback_sb_inodes(work->sb, wb, work); else diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 10f5d1fa7347..7565dcd59697 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -498,8 +498,9 @@ DEFINE_WBC_EVENT(wbc_writepage); TRACE_EVENT(writeback_queue_io, TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work, + unsigned long dirtied_before, int moved), - TP_ARGS(wb, work, moved), + TP_ARGS(wb, work, dirtied_before, moved), TP_STRUCT__entry( __array(char, name, 32) __field(unsigned long, older) @@ -509,19 +510,17 @@ TRACE_EVENT(writeback_queue_io, __field(ino_t, cgroup_ino) ), TP_fast_assign( - unsigned long *older_than_this = work->older_than_this; strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32); - __entry->older = older_than_this ? *older_than_this : 0; - __entry->age = older_than_this ? - (jiffies - *older_than_this) * 1000 / HZ : -1; + __entry->older = dirtied_before; + __entry->age = (jiffies - dirtied_before) * 1000 / HZ; __entry->moved = moved; __entry->reason = work->reason; __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); ), TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%lu", __entry->name, - __entry->older, /* older_than_this in jiffies */ - __entry->age, /* older_than_this in relative milliseconds */ + __entry->older, /* dirtied_before in jiffies */ + __entry->age, /* dirtied_before in relative milliseconds */ __entry->moved, __print_symbolic(__entry->reason, WB_WORK_REASON), (unsigned long)__entry->cgroup_ino -- 2.16.4

5 years, 6 months

2
1
0 0

backport status of two sctp commits

by William Dauchy

Hello, I found the following sctp commits: 582eea230536a6f104097dd46205822005d5fe3a ("sctp: fix possibly using a bad saddr with a given dst") backported in the following stable versions: v5.6.x, v5.5.x, v4.19.x, v4.14.x, v4.9.x, v4.4.x. 5c3e82fe159622e46e91458c1a6509c321a62820 ("sctp: fix refcount bug in sctp_wfree") backported in the following stable versions: v5.6.x, v5.5.x, v4.19.x, v4.14.x, v4.9.x However I cannot find them in v5.4.x yet. I checked stable queue on netdev side (http://patchwork.ozlabs.org/bundle/davem/stable/?state=*) but also main stable queue https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git I was wondering whether it was an oversight or was it expected? Sorry for the noise if I'm mistaken. Best regards, -- William

5 years, 6 months

2
1
0 0

backport status of "ipv4: fix a RCU-list lock in fib_triestat_seq_show"

by William Dauchy

Hello, I found the following commit fbe4e0c1b298b4665ee6915266c9d6c5b934ef4a ("ipv4: fix a RCU-list lock in fib_triestat_seq_show") backported in the following stable versions: v5.6.x, v5.5.x, v4.19.x, v4.14.x, v4.9.x, v4.4.x. However I cannot find it in v5.4.x yet. I checked stable queue on netdev side (http://patchwork.ozlabs.org/bundle/davem/stable/?state=*) but also main stable queue https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git I was wondering whether it was an oversight or it was expected? Sorry for the noise if I'm mistaken. Best regards, -- William

5 years, 6 months

2
1
0 0

[PATCH 05/17] pwm: lpss: Fix get_state runtime-pm reference handling

by Frank Werner-Krippendorf

From: Hans de Goede <hdegoede(a)redhat.com> Before commit cfc4c189bc70 ("pwm: Read initial hardware state at request time"), a driver's get_state callback would get called once per PWM from pwmchip_add(). pwm-lpss' runtime-pm code was relying on this, getting a runtime-pm ref for PWMs which are enabled at probe time from within its get_state callback, before enabling runtime-pm. The change to calling get_state at request time causes a number of problems: 1. PWMs enabled at probe time may get runtime suspended before they are requested, causing e.g. a LCD backlight controlled by the PWM to turn off. 2. When the request happens when the PWM has been runtime suspended, the ctrl register will read all 1 / 0xffffffff, causing get_state to store bogus values in the pwm_state. 3. get_state was using an async pm_runtime_get() call, because it assumed that runtime-pm has not been enabled yet. If shortly after the request an apply call is made, then the pwm_lpss_is_updating() check may trigger because the resume triggered by the pm_runtime_get() call is not complete yet, so the ctrl register still reads all 1 / 0xffffffff. This commit fixes these issues by moving the initial pm_runtime_get() call for PWMs which are enabled at probe time to the pwm_lpss_probe() function; and by making get_state take a runtime-pm ref before reading the ctrl reg. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1828927 Fixes: cfc4c189bc70 ("pwm: Read initial hardware state at request time") Cc: stable(a)vger.kernel.org Signed-off-by: Hans de Goede <hdegoede(a)redhat.com> Reviewed-by: Andy Shevchenko <andriy.shevchenko(a)linux.intel.com> Signed-off-by: Thierry Reding <thierry.reding(a)gmail.com> --- drivers/pwm/pwm-lpss.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c index 75bbfe5f3bc2..9d965ffe66d1 100644 --- a/drivers/pwm/pwm-lpss.c +++ b/drivers/pwm/pwm-lpss.c @@ -158,7 +158,6 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm, return 0; } -/* This function gets called once from pwmchip_add to get the initial state */ static void pwm_lpss_get_state(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_state *state) { @@ -167,6 +166,8 @@ static void pwm_lpss_get_state(struct pwm_chip *chip, struct pwm_device *pwm, unsigned long long base_unit, freq, on_time_div; u32 ctrl; + pm_runtime_get_sync(chip->dev); + base_unit_range = BIT(lpwm->info->base_unit_bits); ctrl = pwm_lpss_read(pwm); @@ -187,8 +188,7 @@ static void pwm_lpss_get_state(struct pwm_chip *chip, struct pwm_device *pwm, state->polarity = PWM_POLARITY_NORMAL; state->enabled = !!(ctrl & PWM_ENABLE); - if (state->enabled) - pm_runtime_get(chip->dev); + pm_runtime_put(chip->dev); } static const struct pwm_ops pwm_lpss_ops = { @@ -202,7 +202,8 @@ struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r, { struct pwm_lpss_chip *lpwm; unsigned long c; - int ret; + int i, ret; + u32 ctrl; if (WARN_ON(info->npwm > MAX_PWMS)) return ERR_PTR(-ENODEV); @@ -232,6 +233,12 @@ struct pwm_lpss_chip *pwm_lpss_probe(struct device *dev, struct resource *r, return ERR_PTR(ret); } + for (i = 0; i < lpwm->info->npwm; i++) { + ctrl = pwm_lpss_read(&lpwm->chip.pwms[i]); + if (ctrl & PWM_ENABLE) + pm_runtime_get(dev); + } + return lpwm; } EXPORT_SYMBOL_GPL(pwm_lpss_probe); -- 2.20.1

5 years, 6 months

1
0
0 0

Re: [PATCH 1/4] proc/bootconfig: Fix to use correct quotes for value

by Markus Elfring

> Fix /proc/bootconfig to show the correctly choose the > double or single quotes according to the value. I suggest to improve this wording a bit. Regards, Markus

5 years, 6 months

2
1
0 0

[PATCH] smb3: add indatalen that can be a non-zero value to calculation of credit charge in smb2 ioctl

by Namjae Jeon

Some of tests in xfstests failed with cifsd kernel server since commit e80ddeb2f70e. cifsd kernel server validates credit charge from client by calculating it base on max((InputCount + OutputCount) and (MaxInputResponse + MaxOutputResponse)) according to specification. MS-SMB2 specification describe credit charge calculation of smb2 ioctl : If Connection.SupportsMultiCredit is TRUE, the server MUST validate CreditCharge based on the maximum of (InputCount + OutputCount) and (MaxInputResponse + MaxOutputResponse), as specified in section 3.3.5.2.5. If the validation fails, it MUST fail the IOCTL request with STATUS_INVALID_PARAMETER. This patch add indatalen that can be a non-zero value to calculation of credit charge in SMB2_ioctl_init(). Fixes: e80ddeb2f70e ("smb3: fix incorrect number of credits when ioctl MaxOutputResponse > 64K") Cc: Stable <stable(a)vger.kernel.org> Cc: Aurelien Aptel <aaptel(a)suse.com> Cc: Steve French <smfrench(a)gmail.com> Signed-off-by: Namjae Jeon <namjae.jeon(a)samsung.com> --- fs/cifs/smb2pdu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index ded96b529a4d..86d894499fbb 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2973,7 +2973,9 @@ SMB2_ioctl_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server, * response size smaller. */ req->MaxOutputResponse = cpu_to_le32(max_response_size); - req->sync_hdr.CreditCharge = cpu_to_le16(DIV_ROUND_UP(max_response_size, SMB2_MAX_BUFFER_SIZE)); + req->sync_hdr.CreditCharge = + cpu_to_le16(DIV_ROUND_UP(max(indatalen, max_response_size), + SMB2_MAX_BUFFER_SIZE)); if (is_fsctl) req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL); else -- 2.17.1

5 years, 6 months

3
2
0 0

[7d2debdcdb3cb93c1e5e] [PATCH v2] proc: Use new_inode not new_inode_pseudo

by Frank Werner-Krippendorf

From: "Eric W. Biederman" <ebiederm(a)xmission.com> Recently syzbot reported that unmounting proc when there is an ongoing inotify watch on the root directory of proc could result in a use after free when the watch is removed after the unmount of proc when the watcher exits. Commit 69879c01a0c3 ("proc: Remove the now unnecessary internal mount of proc") made it easier to unmount proc and allowed syzbot to see the problem, but looking at the code it has been around for a long time. Looking at the code the fsnotify watch should have been removed by fsnotify_sb_delete in generic_shutdown_super. Unfortunately the inode was allocated with new_inode_pseudo instead of new_inode so the inode was not on the sb->s_inodes list. Which prevented fsnotify_unmount_inodes from finding the inode and removing the watch as well as made it so the "VFS: Busy inodes after unmount" warning could not find the inodes to warn about them. Make all of the inodes in proc visible to generic_shutdown_super, and fsnotify_sb_delete by using new_inode instead of new_inode_pseudo. The only functional difference is that new_inode places the inodes on the sb->s_inodes list. I wrote a small test program and I can verify that without changes it can trigger this issue, and by replacing new_inode_pseudo with new_inode the issues goes away. Cc: stable(a)vger.kernel.org Link: https://lkml.kernel.org/r/000000000000d788c905a7dfa3f4@google.com Reported-by: syzbot+7d2debdcdb3cb93c1e5e(a)syzkaller.appspotmail.com Fixes: 0097875bd415 ("proc: Implement /proc/thread-self to point at the directory of the current thread") Fixes: 021ada7dff22 ("procfs: switch /proc/self away from proc_dir_entry") Fixes: 51f0885e5415 ("vfs,proc: guarantee unique inodes in /proc") Signed-off-by: "Eric W. Biederman" <ebiederm(a)xmission.com> --- fs/proc/inode.c | 2 +- fs/proc/self.c | 2 +- fs/proc/thread_self.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/proc/inode.c b/fs/proc/inode.c index f40c2532c057..28d6105e908e 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -617,7 +617,7 @@ const struct inode_operations proc_link_inode_operations = { struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) { - struct inode *inode = new_inode_pseudo(sb); + struct inode *inode = new_inode(sb); if (inode) { inode->i_ino = de->low_ino; diff --git a/fs/proc/self.c b/fs/proc/self.c index ca5158fa561c..72cd69bcaf4a 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -43,7 +43,7 @@ int proc_setup_self(struct super_block *s) inode_lock(root_inode); self = d_alloc_name(s->s_root, "self"); if (self) { - struct inode *inode = new_inode_pseudo(s); + struct inode *inode = new_inode(s); if (inode) { inode->i_ino = self_inum; inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index ac284f409568..a553273fbd41 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -43,7 +43,7 @@ int proc_setup_thread_self(struct super_block *s) inode_lock(root_inode); thread_self = d_alloc_name(s->s_root, "thread-self"); if (thread_self) { - struct inode *inode = new_inode_pseudo(s); + struct inode *inode = new_inode(s); if (inode) { inode->i_ino = thread_self_inum; inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); -- 2.20.1

5 years, 6 months

1
0
0 0

[PATCH] ARM: tegra: Fix restoration of PLLM when exiting suspend

by Jon Hunter

The suspend entry and exit code for 32-bit Tegra devices assumes that the PLLM (which is used to provide the clock for external memory) is always enabled on entry to suspend. Hence, the current code always disables the PLLM on entry to suspend and re-enables the PLLM on exit from suspend. Since the introduction of the Tegra124 EMC driver by commit 73a7f0a90641 ("memory: tegra: Add EMC (external memory controller) driver"), which is used to scale the EMC frequency, PLLM may not be the current clock source for the EMC on entry to suspend and hence may not be enabled. Always enabling the PLLM on exit from suspend can cause the actual status on the PLL to be different from that reported by the common clock framework. On kernels prior to v4.5, the code to set the rate of the PLLM had a test to verify if the PLL was enabled and if the PLL was enabled, setting the rate would fail. Since commit 267b62a96951 ("clk: tegra: pll: Update PLLM handling") the test to see if PLLM is enabled was removed. With these earlier kernels, if the PLLM is disabled on entering suspend and the EMC driver attempts to set the parent of the EMC clock to the PLLM on exiting suspend, then the set rate for the PLLM will fail and in turn cause the resume to fail. We should not be re-enabling the PLLM on resume from suspend unless it was enabled on entry to suspend. Therefore, fix this by saving the state of PLLM on entry to suspend and only re-enable it, if it was already enabled. Fixes: 73a7f0a90641 ("memory: tegra: Add EMC (external memory controller) driver") Cc: stable(a)vger.kernel.org Signed-off-by: Jon Hunter <jonathanh(a)nvidia.com> --- arch/arm/mach-tegra/sleep-tegra30.S | 33 +++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S index 3341a12bbb9c..c2f0793a424f 100644 --- a/arch/arm/mach-tegra/sleep-tegra30.S +++ b/arch/arm/mach-tegra/sleep-tegra30.S @@ -337,26 +337,42 @@ ENTRY(tegra30_lp1_reset) add r1, r1, #2 wait_until r1, r7, r3 - /* enable PLLM via PMC */ + /* restore PLLM state */ mov32 r2, TEGRA_PMC_BASE + adr r7, tegra_pllm_status + ldr r1, [r7] + cmp r2, #(1 << 12) + bne _skip_pllm + ldr r1, [r2, #PMC_PLLP_WB0_OVERRIDE] orr r1, r1, #(1 << 12) str r1, [r2, #PMC_PLLP_WB0_OVERRIDE] pll_enable r1, r0, CLK_RESET_PLLM_BASE, 0 + pll_locked r1, r0, CLK_RESET_PLLM_BASE + +_skip_pllm: pll_enable r1, r0, CLK_RESET_PLLC_BASE, 0 pll_enable r1, r0, CLK_RESET_PLLX_BASE, 0 b _pll_m_c_x_done _no_pll_iddq_exit: - /* enable PLLM via PMC */ + /* restore PLLM state */ mov32 r2, TEGRA_PMC_BASE + adr r7, tegra_pllm_status + ldr r1, [r7] + cmp r2, #(1 << 12) + bne _skip_pllm_no_iddq + ldr r1, [r2, #PMC_PLLP_WB0_OVERRIDE] orr r1, r1, #(1 << 12) str r1, [r2, #PMC_PLLP_WB0_OVERRIDE] pll_enable r1, r0, CLK_RESET_PLLM_BASE, CLK_RESET_PLLM_MISC + pll_locked r1, r0, CLK_RESET_PLLM_BASE + +_skip_pllm_no_iddq: pll_enable r1, r0, CLK_RESET_PLLC_BASE, CLK_RESET_PLLC_MISC pll_enable r1, r0, CLK_RESET_PLLX_BASE, CLK_RESET_PLLX_MISC @@ -364,7 +380,6 @@ _pll_m_c_x_done: pll_enable r1, r0, CLK_RESET_PLLP_BASE, CLK_RESET_PLLP_MISC pll_enable r1, r0, CLK_RESET_PLLA_BASE, CLK_RESET_PLLA_MISC - pll_locked r1, r0, CLK_RESET_PLLM_BASE pll_locked r1, r0, CLK_RESET_PLLP_BASE pll_locked r1, r0, CLK_RESET_PLLA_BASE pll_locked r1, r0, CLK_RESET_PLLC_BASE @@ -526,6 +541,8 @@ __no_dual_emc_chanl: ENDPROC(tegra30_lp1_reset) .align L1_CACHE_SHIFT +tegra_pllm_status: + .word 0 tegra30_sdram_pad_address: .word TEGRA_EMC_BASE + EMC_CFG @0x0 .word TEGRA_EMC_BASE + EMC_ZCAL_INTERVAL @0x4 @@ -624,10 +641,14 @@ tegra30_switch_cpu_to_clk32k: add r1, r1, #2 wait_until r1, r7, r9 - /* disable PLLM via PMC in LP1 */ + /* disable PLLM, if enabled, via PMC in LP1 */ + adr r1, tegra_pllm_status ldr r0, [r4, #PMC_PLLP_WB0_OVERRIDE] - bic r0, r0, #(1 << 12) - str r0, [r4, #PMC_PLLP_WB0_OVERRIDE] + and r2, r0, #(1 << 12) + str r2, [r1] + cmp r2, #(1 << 12) + biceq r0, r0, #(1 << 12) + streq r0, [r4, #PMC_PLLP_WB0_OVERRIDE] /* disable PLLP, PLLA, PLLC and PLLX */ ldr r0, [r5, #CLK_RESET_PLLP_BASE] -- 2.17.1

5 years, 6 months

4
10
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror June 2020