Each Chip-Select (CS) of a Unified Memory Controller (UMC) on AMD EPYC
SOCs has an Address Mask and a Secondary Address Mask register associated
with it. The amd64_edac module logs DIMM sizes on a per-UMC per-CS
granularity during init using these two registers.
Currently, the module primarily considers only the Address Mask register
for computing DIMM sizes. The Secondary Address Mask register is only
considered for odd CS. Additionally, if it has been considered, the
Address Mask register is ignored altogether for that CS. For
power-of-two DIMMs, this is not an issue since only the Address Mask
register is used.
For non-power-of-two DIMMs, however, the Secondary Address Mask register
is used in conjunction with the Address Mask register. However, since the
module only considers either of the two registers for a CS, the size
computed by the module is incorrect. The Secondary Address Mask register
is not considered for even CS, and the Address Mask register is not
considered for odd CS.
Furthermore, also rename some variables for greater clarity.
Fixes: 81f5090db843 ("EDAC/amd64: Support asymmetric dual-rank DIMMs")
Signed-off-by: Avadhut Naik <avadhut.naik(a)amd.com>
Cc: stable(a)vger.kernel.org
---
drivers/edac/amd64_edac.c | 56 ++++++++++++++++++++++++---------------
1 file changed, 35 insertions(+), 21 deletions(-)
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 90f0eb7cc5b9..16117fda727f 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1209,7 +1209,9 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
if (csrow_enabled(2 * dimm + 1, ctrl, pvt))
cs_mode |= CS_ODD_PRIMARY;
- /* Asymmetric dual-rank DIMM support. */
+ if (csrow_sec_enabled(2 * dimm, ctrl, pvt))
+ cs_mode |= CS_EVEN_SECONDARY;
+
if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt))
cs_mode |= CS_ODD_SECONDARY;
@@ -1230,12 +1232,10 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt)
return cs_mode;
}
-static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
- int csrow_nr, int dimm)
+static int calculate_cs_size(u32 mask, unsigned int cs_mode)
{
- u32 msb, weight, num_zero_bits;
- u32 addr_mask_deinterleaved;
- int size = 0;
+ int msb, weight, num_zero_bits;
+ u32 deinterleaved_mask = 0;
/*
* The number of zero bits in the mask is equal to the number of bits
@@ -1248,19 +1248,32 @@ static int __addr_mask_to_cs_size(u32 addr_mask_orig, unsigned int cs_mode,
* without swapping with the most significant bit. This can be handled
* by keeping the MSB where it is and ignoring the single zero bit.
*/
- msb = fls(addr_mask_orig) - 1;
- weight = hweight_long(addr_mask_orig);
+ msb = fls(mask) - 1;
+ weight = hweight_long(mask);
num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE);
/* Take the number of zero bits off from the top of the mask. */
- addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1);
+ deinterleaved_mask = GENMASK(msb - num_zero_bits, 1);
+ edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", deinterleaved_mask);
+
+ return (deinterleaved_mask >> 2) + 1;
+}
+
+static int __addr_mask_to_cs_size(u32 addr_mask, u32 addr_mask_sec,
+ unsigned int cs_mode, int csrow_nr, int dimm)
+{
+ int size = 0;
edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm);
- edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig);
- edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved);
+ edac_dbg(1, " Primary AddrMask: 0x%x\n", addr_mask);
/* Register [31:1] = Address [39:9]. Size is in kBs here. */
- size = (addr_mask_deinterleaved >> 2) + 1;
+ size = calculate_cs_size(addr_mask, cs_mode);
+
+ if (addr_mask_sec) {
+ edac_dbg(1, " Secondary AddrMask: 0x%x\n", addr_mask);
+ size += calculate_cs_size(addr_mask_sec, cs_mode);
+ }
/* Return size in MBs. */
return size >> 10;
@@ -1270,7 +1283,7 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
int cs_mask_nr = csrow_nr;
- u32 addr_mask_orig;
+ u32 addr_mask = 0, addr_mask_sec = 0;
int dimm, size = 0;
/* No Chip Selects are enabled. */
@@ -1308,13 +1321,13 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
if (!pvt->flags.zn_regs_v2)
cs_mask_nr >>= 1;
- /* Asymmetric dual-rank DIMM support. */
- if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY))
- addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr];
- else
- addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr];
+ if (cs_mode & CS_EVEN_PRIMARY || cs_mode & CS_ODD_PRIMARY)
+ addr_mask = pvt->csels[umc].csmasks[cs_mask_nr];
+
+ if (cs_mode & CS_EVEN_SECONDARY || cs_mode & CS_ODD_SECONDARY)
+ addr_mask_sec = pvt->csels[umc].csmasks_sec[cs_mask_nr];
- return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, dimm);
+ return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, dimm);
}
static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
@@ -3512,9 +3525,10 @@ static void gpu_get_err_info(struct mce *m, struct err_info *err)
static int gpu_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc,
unsigned int cs_mode, int csrow_nr)
{
- u32 addr_mask_orig = pvt->csels[umc].csmasks[csrow_nr];
+ u32 addr_mask = pvt->csels[umc].csmasks[csrow_nr];
+ u32 addr_mask_sec = pvt->csels[umc].csmasks_sec[csrow_nr];
- return __addr_mask_to_cs_size(addr_mask_orig, cs_mode, csrow_nr, csrow_nr >> 1);
+ return __addr_mask_to_cs_size(addr_mask, addr_mask_sec, cs_mode, csrow_nr, csrow_nr >> 1);
}
static void gpu_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl)
base-commit: f1861fb575b028e35e6233295441d535f2e3f240
--
2.43.0
If a new reset event appears before the previous one has been
processed, the device can get stuck into a reset loop. This happens
rarely, but blocks the device when it does, and floods the log with
messages like the following:
lan78xx 2-3:1.0 enp1s0u3: kevent 4 may have been dropped
The only bit that the driver pays attention to in the interrupt data
is "link was reset". If there's a flapping status bit in that endpoint
data (such as if PHY negotiation needs a few tries to get a stable
link), polling at a slower rate allows the state to settle.
This is a simplified version of a patch that's been in the Raspberry
Pi downstream kernel since their 4.14 branch, see also:
https://github.com/raspberrypi/linux/issues/2447
Signed-off-by: Fiona Klute <fiona.klute(a)gmx.de>
Cc: kernel-list(a)raspberrypi.com
Cc: stable(a)vger.kernel.org
---
For the stable crew: I've *tested* the patch with 6.12.7 and 6.13.5 on
a Revolution Pi Connect 4 (Raspberry Pi CM4 based device with built-in
LAN7800 as second ethernet port), according to the linked issue for
the RPi downstream kernel the problem should be present in all
maintained longterm kernel versions, too (based on how long they've
carried a patch).
drivers/net/usb/lan78xx.c | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index a91bf9c7e31d..7bf01a31a932 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -173,6 +173,12 @@
#define INT_EP_GPIO_1 (1)
#define INT_EP_GPIO_0 (0)
+/* highspeed device, so polling interval is in microframes (eight per
+ * millisecond)
+ */
+#define INT_URB_MICROFRAMES_PER_MS 8
+#define MIN_INT_URB_INTERVAL_MS 8
+
static const char lan78xx_gstrings[][ETH_GSTRING_LEN] = {
"RX FCS Errors",
"RX Alignment Errors",
@@ -4527,7 +4533,11 @@ static int lan78xx_probe(struct usb_interface *intf,
if (ret < 0)
goto out4;
- period = ep_intr->desc.bInterval;
+ period = max(ep_intr->desc.bInterval,
+ MIN_INT_URB_INTERVAL_MS * INT_URB_MICROFRAMES_PER_MS);
+ dev_info(&intf->dev,
+ "interrupt urb period set to %d, bInterval is %d\n",
+ period, ep_intr->desc.bInterval);
maxp = usb_maxpacket(dev->udev, dev->pipe_intr);
dev->urb_intr = usb_alloc_urb(0, GFP_KERNEL);
base-commit: dd83757f6e686a2188997cb58b5975f744bb7786
--
2.47.2
There is a nasty regression wrt mt7921e in the last LTS series (6.12).
If your computer crashes or fails to get out of hibernation, then at the
next boot the mt7921e wifi does not work, with dmesg reporting that it
is unable to change power state from d3cold to d0.
The issue is nasty, because rebooting won't help.
The only solution that I have found to the issue is booting a 6.6
kernel. With that the wife gets alive again. If, at this point, you boot
into 6.12, everything seems to be fine again, until a boot fails and
from that moment on you are without wifi.
Working around the issue is not very discoverable. On my machine not
even a hardware reset (40s of power off button pressed) helped alone,
without going through the 6.6 kernel boot.
I think the regression was introduced with 6.12 and I remember having no
issues with previous kernels, but cannot be 100% sure.
Similarly, I do not know if the bug is with the wifi card driver itself
or with something related to PM or PCIe.
The machine on which I am experiencing the issue is an Asus ROG 14
laptop (2022 edition), with AMD CPU and GPU.
Thanks for the attention,
Sergio
From: Emanuele Ghidoli <emanuele.ghidoli(a)toradex.com>
If an input changes state during wake-up and is used as an interrupt
source, the IRQ handler reads the volatile input register to clear the
interrupt mask and deassert the IRQ line. However, the IRQ handler is
triggered before access to the register is granted, causing the read
operation to fail.
As a result, the IRQ handler enters a loop, repeatedly printing the
"failed reading register" message, until `pca953x_resume` is eventually
called, which restores the driver context and enables access to
registers.
Fix by using DEFINE_NOIRQ_DEV_PM_OPS which ensures that `pca953x_resume`
is called before the IRQ handler is called.
Fixes: b76574300504 ("gpio: pca953x: Restore registers after suspend/resume cycle")
Cc: stable(a)vger.kernel.org
Signed-off-by: Emanuele Ghidoli <emanuele.ghidoli(a)toradex.com>
Signed-off-by: Francesco Dolcini <francesco.dolcini(a)toradex.com>
---
drivers/gpio/gpio-pca953x.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index d63c1030e6ac..d39bdc125cfc 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -1252,7 +1252,7 @@ static int pca953x_resume(struct device *dev)
return ret;
}
-static DEFINE_SIMPLE_DEV_PM_OPS(pca953x_pm_ops, pca953x_suspend, pca953x_resume);
+static DEFINE_NOIRQ_DEV_PM_OPS(pca953x_pm_ops, pca953x_suspend, pca953x_resume);
/* convenience to stop overlong match-table lines */
#define OF_653X(__nrgpio, __int) ((void *)(__nrgpio | PCAL653X_TYPE | __int))
--
2.39.5
From: Frode Isaksen <frode(a)meta.com>
Invalidate io_data by setting context to NULL when USB request is
dequeued or interrupted, and check for NULL io_data in epfile_io_complete().
The invalidation of io_data in req->context is done when exiting
epfile_io(), since then io_data will become invalid as it is allocated
on the stack.
The epfile_io_complete() may be called after ffs_epfile_io() returns
in case the wait_for_completion_interruptible() is interrupted.
This fixes a use-after-free error with the following call stack:
Unable to handle kernel paging request at virtual address ffffffc02f7bbcc0
pc : ffs_epfile_io_complete+0x30/0x48
lr : usb_gadget_giveback_request+0x30/0xf8
Call trace:
ffs_epfile_io_complete+0x30/0x48
usb_gadget_giveback_request+0x30/0xf8
dwc3_remove_requests+0x264/0x2e8
dwc3_gadget_pullup+0x1d0/0x250
kretprobe_trampoline+0x0/0xc4
usb_gadget_remove_driver+0x40/0xf4
usb_gadget_unregister_driver+0xdc/0x178
unregister_gadget_item+0x40/0x6c
ffs_closed+0xd4/0x10c
ffs_data_clear+0x2c/0xf0
ffs_data_closed+0x178/0x1ec
ffs_ep0_release+0x24/0x38
__fput+0xe8/0x27c
Signed-off-by: Frode Isaksen <frode(a)meta.com>
Cc: stable(a)vger.kernel.org
---
v1 -> v2:
Removed WARN_ON() in ffs_epfile_io_complete().
Clarified commit message.
Added stable Cc tag.
drivers/usb/gadget/function/f_fs.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 2dea9e42a0f8..e35d32e7be58 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -738,6 +738,9 @@ static void ffs_epfile_io_complete(struct usb_ep *_ep, struct usb_request *req)
{
struct ffs_io_data *io_data = req->context;
+ if (io_data == NULL)
+ return;
+
if (req->status)
io_data->status = req->status;
else
@@ -1126,6 +1129,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
spin_lock_irq(&epfile->ffs->eps_lock);
if (epfile->ep != ep) {
ret = -ESHUTDOWN;
+ req->context = NULL;
goto error_lock;
}
/*
@@ -1140,6 +1144,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
interrupted = io_data->status < 0;
}
+ req->context = NULL;
if (interrupted)
ret = -EINTR;
else if (io_data->read && io_data->status > 0)
--
2.49.0
The late init call just writes to omap4 registers as soon as
CONFIG_MFD_CPCAP is enabled without checking whether the
cpcap driver is actually there or the SoC is indeed an
OMAP4.
Rather do these things only with the right device combination.
Fixes booting the BT200 with said configuration enabled and non-factory
X-Loader and probably also some surprising behavior on other devices.
Fixes: c145649bf262 ("ARM: OMAP2+: Configure voltage controller for cpcap to low-speed")
CC: <stable(a)vger.kernel.org>
Signed-off-by: Andreas Kemnade <andreas(a)kemnade.info>
---
arch/arm/mach-omap2/pmic-cpcap.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/arm/mach-omap2/pmic-cpcap.c b/arch/arm/mach-omap2/pmic-cpcap.c
index 4f31e61c0c90..9f9a20274db8 100644
--- a/arch/arm/mach-omap2/pmic-cpcap.c
+++ b/arch/arm/mach-omap2/pmic-cpcap.c
@@ -264,7 +264,11 @@ int __init omap4_cpcap_init(void)
static int __init cpcap_late_init(void)
{
- omap4_vc_set_pmic_signaling(PWRDM_POWER_RET);
+ if (!of_find_compatible_node(NULL, NULL, "motorola,cpcap"))
+ return 0;
+
+ if (soc_is_omap443x() || soc_is_omap446x() || soc_is_omap447x())
+ omap4_vc_set_pmic_signaling(PWRDM_POWER_RET);
return 0;
}
--
2.39.5
Previously, commit ed129ec9057f ("KVM: x86: forcibly leave nested mode
on vCPU reset") addressed an issue where a triple fault occurring in
nested mode could lead to use-after-free scenarios. However, the commit
did not handle the analogous situation for System Management Mode (SMM).
This omission results in triggering a WARN when a vCPU reset occurs
while still in SMM mode, due to the check in kvm_vcpu_reset(). This
situation was reprodused using Syzkaller by:
1) Creating a KVM VM and vCPU
2) Sending a KVM_SMI ioctl to explicitly enter SMM
3) Executing invalid instructions causing consecutive exceptions and
eventually a triple fault
The issue manifests as follows:
WARNING: CPU: 0 PID: 25506 at arch/x86/kvm/x86.c:12112
kvm_vcpu_reset+0x1d2/0x1530 arch/x86/kvm/x86.c:12112
Modules linked in:
CPU: 0 PID: 25506 Comm: syz-executor.0 Not tainted
6.1.130-syzkaller-00157-g164fe5dde9b6 #0
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.12.0-1 04/01/2014
RIP: 0010:kvm_vcpu_reset+0x1d2/0x1530 arch/x86/kvm/x86.c:12112
Call Trace:
<TASK>
shutdown_interception+0x66/0xb0 arch/x86/kvm/svm/svm.c:2136
svm_invoke_exit_handler+0x110/0x530 arch/x86/kvm/svm/svm.c:3395
svm_handle_exit+0x424/0x920 arch/x86/kvm/svm/svm.c:3457
vcpu_enter_guest arch/x86/kvm/x86.c:10959 [inline]
vcpu_run+0x2c43/0x5a90 arch/x86/kvm/x86.c:11062
kvm_arch_vcpu_ioctl_run+0x50f/0x1cf0 arch/x86/kvm/x86.c:11283
kvm_vcpu_ioctl+0x570/0xf00 arch/x86/kvm/../../../virt/kvm/kvm_main.c:4122
vfs_ioctl fs/ioctl.c:51 [inline]
__do_sys_ioctl fs/ioctl.c:870 [inline]
__se_sys_ioctl fs/ioctl.c:856 [inline]
__x64_sys_ioctl+0x19a/0x210 fs/ioctl.c:856
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x35/0x80 arch/x86/entry/common.c:81
entry_SYSCALL_64_after_hwframe+0x6e/0xd8
Considering that hardware CPUs exit SMM mode completely upon receiving
a triple fault by triggering a hardware reset (which inherently leads
to exiting SMM), explicitly perform SMM exit prior to the WARN check.
Although subsequent code clears vCPU hflags, including the SMM flag,
calling kvm_smm_changed ensures the exit from SMM is handled correctly
and explicitly, aligning precisely with hardware behavior.
Found by Linux Verification Center (linuxtesting.org) with Syzkaller.
Fixes: ed129ec9057f ("KVM: x86: forcibly leave nested mode on vCPU reset")
Cc: stable(a)vger.kernel.org
Signed-off-by: Mikhail Lobanov <m.lobanov(a)rosa.ru>
---
arch/x86/kvm/x86.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4b64ab350bcd..f1c95c21703a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12409,6 +12409,9 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
if (is_guest_mode(vcpu))
kvm_leave_nested(vcpu);
+ if (is_smm(vcpu))
+ kvm_smm_changed(vcpu, false);
+
kvm_lapic_reset(vcpu, init_event);
WARN_ON_ONCE(is_guest_mode(vcpu) || is_smm(vcpu));
--
2.47.2
From: yangge <yangge1116(a)126.com>
If a large number of CMA memory are configured in system (for example, the
CMA memory accounts for 50% of the system memory), starting a virtual
virtual machine with device passthrough, it will
call pin_user_pages_remote(..., FOLL_LONGTERM, ...) to pin memory.
Normally if a page is present and in CMA area, pin_user_pages_remote()
will migrate the page from CMA area to non-CMA area because of
FOLL_LONGTERM flag. But the current code will cause the migration failure
due to unexpected page refcounts, and eventually cause the virtual machine
fail to start.
If a page is added in LRU batch, its refcount increases one, remove the
page from LRU batch decreases one. Page migration requires the page is not
referenced by others except page mapping. Before migrating a page, we
should try to drain the page from LRU batch in case the page is in it,
however, folio_test_lru() is not sufficient to tell whether the page is
in LRU batch or not, if the page is in LRU batch, the migration will fail.
To solve the problem above, we modify the logic of adding to LRU batch.
Before adding a page to LRU batch, we clear the LRU flag of the page so
that we can check whether the page is in LRU batch by folio_test_lru(page).
It's quite valuable, because likely we don't want to blindly drain the LRU
batch simply because there is some unexpected reference on a page, as
described above.
This change makes the LRU flag of a page invisible for longer, which
may impact some programs. For example, as long as a page is on a LRU
batch, we cannot isolate it, and we cannot check if it's an LRU page.
Further, a page can now only be on exactly one LRU batch. This doesn't
seem to matter much, because a new page is allocated from buddy and
added to the lru batch, or be isolated, it's LRU flag may also be
invisible for a long time.
Fixes: 9a4e9f3b2d73 ("mm: update get_user_pages_longterm to migrate pages allocated from CMA region")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: yangge <yangge1116(a)126.com>
---
mm/swap.c | 43 +++++++++++++++++++++++++++++++------------
1 file changed, 31 insertions(+), 12 deletions(-)
V4:
Adjust commit message according to David's comments
V3:
Add fixes tag
V2:
Adjust code and commit message according to David's comments
diff --git a/mm/swap.c b/mm/swap.c
index dc205bd..9caf6b0 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -211,10 +211,6 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
for (i = 0; i < folio_batch_count(fbatch); i++) {
struct folio *folio = fbatch->folios[i];
- /* block memcg migration while the folio moves between lru */
- if (move_fn != lru_add_fn && !folio_test_clear_lru(folio))
- continue;
-
folio_lruvec_relock_irqsave(folio, &lruvec, &flags);
move_fn(lruvec, folio);
@@ -255,11 +251,16 @@ static void lru_move_tail_fn(struct lruvec *lruvec, struct folio *folio)
void folio_rotate_reclaimable(struct folio *folio)
{
if (!folio_test_locked(folio) && !folio_test_dirty(folio) &&
- !folio_test_unevictable(folio) && folio_test_lru(folio)) {
+ !folio_test_unevictable(folio)) {
struct folio_batch *fbatch;
unsigned long flags;
folio_get(folio);
+ if (!folio_test_clear_lru(folio)) {
+ folio_put(folio);
+ return;
+ }
+
local_lock_irqsave(&lru_rotate.lock, flags);
fbatch = this_cpu_ptr(&lru_rotate.fbatch);
folio_batch_add_and_move(fbatch, folio, lru_move_tail_fn);
@@ -352,11 +353,15 @@ static void folio_activate_drain(int cpu)
void folio_activate(struct folio *folio)
{
- if (folio_test_lru(folio) && !folio_test_active(folio) &&
- !folio_test_unevictable(folio)) {
+ if (!folio_test_active(folio) && !folio_test_unevictable(folio)) {
struct folio_batch *fbatch;
folio_get(folio);
+ if (!folio_test_clear_lru(folio)) {
+ folio_put(folio);
+ return;
+ }
+
local_lock(&cpu_fbatches.lock);
fbatch = this_cpu_ptr(&cpu_fbatches.activate);
folio_batch_add_and_move(fbatch, folio, folio_activate_fn);
@@ -700,6 +705,11 @@ void deactivate_file_folio(struct folio *folio)
return;
folio_get(folio);
+ if (!folio_test_clear_lru(folio)) {
+ folio_put(folio);
+ return;
+ }
+
local_lock(&cpu_fbatches.lock);
fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate_file);
folio_batch_add_and_move(fbatch, folio, lru_deactivate_file_fn);
@@ -716,11 +726,16 @@ void deactivate_file_folio(struct folio *folio)
*/
void folio_deactivate(struct folio *folio)
{
- if (folio_test_lru(folio) && !folio_test_unevictable(folio) &&
- (folio_test_active(folio) || lru_gen_enabled())) {
+ if (!folio_test_unevictable(folio) && (folio_test_active(folio) ||
+ lru_gen_enabled())) {
struct folio_batch *fbatch;
folio_get(folio);
+ if (!folio_test_clear_lru(folio)) {
+ folio_put(folio);
+ return;
+ }
+
local_lock(&cpu_fbatches.lock);
fbatch = this_cpu_ptr(&cpu_fbatches.lru_deactivate);
folio_batch_add_and_move(fbatch, folio, lru_deactivate_fn);
@@ -737,12 +752,16 @@ void folio_deactivate(struct folio *folio)
*/
void folio_mark_lazyfree(struct folio *folio)
{
- if (folio_test_lru(folio) && folio_test_anon(folio) &&
- folio_test_swapbacked(folio) && !folio_test_swapcache(folio) &&
- !folio_test_unevictable(folio)) {
+ if (folio_test_anon(folio) && folio_test_swapbacked(folio) &&
+ !folio_test_swapcache(folio) && !folio_test_unevictable(folio)) {
struct folio_batch *fbatch;
folio_get(folio);
+ if (!folio_test_clear_lru(folio)) {
+ folio_put(folio);
+ return;
+ }
+
local_lock(&cpu_fbatches.lock);
fbatch = this_cpu_ptr(&cpu_fbatches.lru_lazyfree);
folio_batch_add_and_move(fbatch, folio, lru_lazyfree_fn);
--
2.7.4