From: Aditya Garg <gargaditya08(a)live.com>
On T2 Macs, the secure boot is handled by the T2 Chip. If enabled, only
macOS and Windows are allowed to boot on these machines. Moreover, loading
UEFI Secure Boot certificates is not supported on these machines on Linux.
An attempt to do so causes a crash with the following logs :-
Call Trace:
<TASK>
page_fault_oops+0x4f/0x2c0
? search_bpf_extables+0x6b/0x80
? search_module_extables+0x50/0x80
? search_exception_tables+0x5b/0x60
kernelmode_fixup_or_oops+0x9e/0x110
__bad_area_nosemaphore+0x155/0x190
bad_area_nosemaphore+0x16/0x20
do_kern_addr_fault+0x8c/0xa0
exc_page_fault+0xd8/0x180
asm_exc_page_fault+0x1e/0x30
(Removed some logs from here)
? __efi_call+0x28/0x30
? switch_mm+0x20/0x30
? efi_call_rts+0x19a/0x8e0
? process_one_work+0x222/0x3f0
? worker_thread+0x4a/0x3d0
? kthread+0x17a/0x1a0
? process_one_work+0x3f0/0x3f0
? set_kthread_struct+0x40/0x40
? ret_from_fork+0x22/0x30
</TASK>
---[ end trace 1f82023595a5927f ]---
efi: Froze efi_rts_wq and disabled EFI Runtime Services
integrity: Couldn't get size: 0x8000000000000015
integrity: MODSIGN: Couldn't get UEFI db list
efi: EFI Runtime Services are disabled!
integrity: Couldn't get size: 0x8000000000000015
integrity: Couldn't get UEFI dbx list
integrity: Couldn't get size: 0x8000000000000015
integrity: Couldn't get mokx list
integrity: Couldn't get size: 0x80000000
As a result of not being able to read or load certificates, secure boot
cannot be enabled. This patch prevents querying of these UEFI variables,
since these Macs seem to use a non-standard EFI hardware.
Cc: stable(a)vger.kernel.org
Signed-off-by: Aditya Garg <gargaditya08(a)live.com>
---
v2 :- Reduce code size of the table.
v3 :- Close the brackets which were left open by mistake.
v4 :- Fix comment style issues, remove blank spaces and limit use of dmi_first_match()
v4 RESEND :- Add stable to cc
.../platform_certs/keyring_handler.h | 8 +++++
security/integrity/platform_certs/load_uefi.c | 35 +++++++++++++++++++
2 files changed, 43 insertions(+)
diff --git a/security/integrity/platform_certs/keyring_handler.h b/security/integrity/platform_certs/keyring_handler.h
index 284558f30..212d894a8 100644
--- a/security/integrity/platform_certs/keyring_handler.h
+++ b/security/integrity/platform_certs/keyring_handler.h
@@ -35,3 +35,11 @@ efi_element_handler_t get_handler_for_mok(const efi_guid_t *sig_type);
efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type);
#endif
+
+#ifndef UEFI_QUIRK_SKIP_CERT
+#define UEFI_QUIRK_SKIP_CERT(vendor, product) \
+ .matches = { \
+ DMI_MATCH(DMI_BOARD_VENDOR, vendor), \
+ DMI_MATCH(DMI_PRODUCT_NAME, product), \
+ },
+#endif
diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c
index 5f45c3c07..c3393b2b1 100644
--- a/security/integrity/platform_certs/load_uefi.c
+++ b/security/integrity/platform_certs/load_uefi.c
@@ -3,6 +3,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/cred.h>
+#include <linux/dmi.h>
#include <linux/err.h>
#include <linux/efi.h>
#include <linux/slab.h>
@@ -12,6 +13,33 @@
#include "../integrity.h"
#include "keyring_handler.h"
+/*
+ * Apple Macs with T2 Security chip seem to be using a non standard
+ * implementation of Secure Boot. For Linux to run on these machines
+ * Secure Boot needs to be turned off, since the T2 Chip manages
+ * Secure Boot and doesn't allow OS other than macOS or Windows to
+ * boot. If turned off, an attempt to get certificates causes a crash,
+ * so we simply prevent doing the same.
+ */
+static const struct dmi_system_id uefi_skip_cert[] = {
+ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,1") },
+ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,2") },
+ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,3") },
+ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro15,4") },
+ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,1") },
+ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,2") },
+ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,3") },
+ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookPro16,4") },
+ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir8,1") },
+ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir8,2") },
+ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacBookAir9,1") },
+ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacMini8,1") },
+ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "MacPro7,1") },
+ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMac20,1") },
+ { UEFI_QUIRK_SKIP_CERT("Apple Inc.", "iMac20,2") },
+ { }
+};
+
/*
* Look to see if a UEFI variable called MokIgnoreDB exists and return true if
* it does.
@@ -138,6 +166,13 @@ static int __init load_uefi_certs(void)
unsigned long dbsize = 0, dbxsize = 0, mokxsize = 0;
efi_status_t status;
int rc = 0;
+ const struct dmi_system_id *dmi_id;
+
+ dmi_id = dmi_first_match(uefi_skip_cert);
+ if (dmi_id) {
+ pr_err("Getting UEFI Secure Boot Certs is not supported on T2 Macs.\n");
+ return false;
+ }
if (!efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE))
return false;
--
2.25.1
As Yanming reported in bugzilla:
https://bugzilla.kernel.org/show_bug.cgi?id=215895
I have encountered a bug in F2FS file system in kernel v5.17.
The kernel message is shown below:
kernel BUG at fs/inode.c:611!
Call Trace:
evict+0x282/0x4e0
__dentry_kill+0x2b2/0x4d0
dput+0x2dd/0x720
do_renameat2+0x596/0x970
__x64_sys_rename+0x78/0x90
do_syscall_64+0x3b/0x90
The root cause is: fuzzed inode has both inline_data flag and encrypted
flag, so after it was deleted by rename(), during f2fs_evict_inode(),
it will cause inline data conversion due to flags confilction, then
page cache will be polluted and trigger panic in clear_inode().
This patch tries to fix the issue by do more sanity checks for inline
data inode in sanity_check_inode().
Cc: stable(a)vger.kernel.org
Reported-by: Ming Yan <yanming(a)tju.edu.cn>
Signed-off-by: Chao Yu <chao.yu(a)oppo.com>
---
fs/f2fs/f2fs.h | 7 +++++++
fs/f2fs/inode.c | 3 +--
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 27aa93caec06..64c511b498cc 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -4173,6 +4173,13 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode)
*/
static inline bool f2fs_post_read_required(struct inode *inode)
{
+ /*
+ * used by sanity_check_inode(), when disk layout fields has not
+ * been synchronized to inmem fields.
+ */
+ if (file_is_encrypt(inode) || file_is_verity(inode) ||
+ F2FS_I(inode)->i_flags & F2FS_COMPR_FL)
+ return true;
return f2fs_encrypted_file(inode) || fsverity_active(inode) ||
f2fs_compressed_file(inode);
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 83639238a1fe..234b8ed02644 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -276,8 +276,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
}
}
- if (f2fs_has_inline_data(inode) &&
- (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) {
+ if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix",
__func__, inode->i_ino, inode->i_mode);
--
2.25.1
The patch below does not apply to the 5.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e914d8f00391520ecc4495dd0ca0124538ab7119 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan(a)kernel.org>
Date: Thu, 14 Apr 2022 19:13:46 -0700
Subject: [PATCH] mm: fix unexpected zeroed page mapping with zram swap
Two processes under CLONE_VM cloning, user process can be corrupted by
seeing zeroed page unexpectedly.
CPU A CPU B
do_swap_page do_swap_page
SWP_SYNCHRONOUS_IO path SWP_SYNCHRONOUS_IO path
swap_readpage valid data
swap_slot_free_notify
delete zram entry
swap_readpage zeroed(invalid) data
pte_lock
map the *zero data* to userspace
pte_unlock
pte_lock
if (!pte_same)
goto out_nomap;
pte_unlock
return and next refault will
read zeroed data
The swap_slot_free_notify is bogus for CLONE_VM case since it doesn't
increase the refcount of swap slot at copy_mm so it couldn't catch up
whether it's safe or not to discard data from backing device. In the
case, only the lock it could rely on to synchronize swap slot freeing is
page table lock. Thus, this patch gets rid of the swap_slot_free_notify
function. With this patch, CPU A will see correct data.
CPU A CPU B
do_swap_page do_swap_page
SWP_SYNCHRONOUS_IO path SWP_SYNCHRONOUS_IO path
swap_readpage original data
pte_lock
map the original data
swap_free
swap_range_free
bd_disk->fops->swap_slot_free_notify
swap_readpage read zeroed data
pte_unlock
pte_lock
if (!pte_same)
goto out_nomap;
pte_unlock
return
on next refault will see mapped data by CPU B
The concern of the patch would increase memory consumption since it
could keep wasted memory with compressed form in zram as well as
uncompressed form in address space. However, most of cases of zram uses
no readahead and do_swap_page is followed by swap_free so it will free
the compressed form from in zram quickly.
Link: https://lkml.kernel.org/r/YjTVVxIAsnKAXjTd@google.com
Fixes: 0bcac06f27d7 ("mm, swap: skip swapcache for swapin of synchronous device")
Reported-by: Ivan Babrou <ivan(a)cloudflare.com>
Tested-by: Ivan Babrou <ivan(a)cloudflare.com>
Signed-off-by: Minchan Kim <minchan(a)kernel.org>
Cc: Nitin Gupta <ngupta(a)vflare.org>
Cc: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org> [4.14+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/mm/page_io.c b/mm/page_io.c
index b417f000b49e..89fbf3cae30f 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -51,54 +51,6 @@ void end_swap_bio_write(struct bio *bio)
bio_put(bio);
}
-static void swap_slot_free_notify(struct page *page)
-{
- struct swap_info_struct *sis;
- struct gendisk *disk;
- swp_entry_t entry;
-
- /*
- * There is no guarantee that the page is in swap cache - the software
- * suspend code (at least) uses end_swap_bio_read() against a non-
- * swapcache page. So we must check PG_swapcache before proceeding with
- * this optimization.
- */
- if (unlikely(!PageSwapCache(page)))
- return;
-
- sis = page_swap_info(page);
- if (data_race(!(sis->flags & SWP_BLKDEV)))
- return;
-
- /*
- * The swap subsystem performs lazy swap slot freeing,
- * expecting that the page will be swapped out again.
- * So we can avoid an unnecessary write if the page
- * isn't redirtied.
- * This is good for real swap storage because we can
- * reduce unnecessary I/O and enhance wear-leveling
- * if an SSD is used as the as swap device.
- * But if in-memory swap device (eg zram) is used,
- * this causes a duplicated copy between uncompressed
- * data in VM-owned memory and compressed data in
- * zram-owned memory. So let's free zram-owned memory
- * and make the VM-owned decompressed page *dirty*,
- * so the page should be swapped out somewhere again if
- * we again wish to reclaim it.
- */
- disk = sis->bdev->bd_disk;
- entry.val = page_private(page);
- if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) {
- unsigned long offset;
-
- offset = swp_offset(entry);
-
- SetPageDirty(page);
- disk->fops->swap_slot_free_notify(sis->bdev,
- offset);
- }
-}
-
static void end_swap_bio_read(struct bio *bio)
{
struct page *page = bio_first_page_all(bio);
@@ -114,7 +66,6 @@ static void end_swap_bio_read(struct bio *bio)
}
SetPageUptodate(page);
- swap_slot_free_notify(page);
out:
unlock_page(page);
WRITE_ONCE(bio->bi_private, NULL);
@@ -394,11 +345,6 @@ int swap_readpage(struct page *page, bool synchronous)
if (sis->flags & SWP_SYNCHRONOUS_IO) {
ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
if (!ret) {
- if (trylock_page(page)) {
- swap_slot_free_notify(page);
- unlock_page(page);
- }
-
count_vm_event(PSWPIN);
goto out;
}
This is the start of the stable review cycle for the 4.19.241 release.
There are 12 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sun, 01 May 2022 10:40:41 +0000.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.19.241-r…
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-4.19.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 4.19.241-rc1
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
lightnvm: disable the subsystem
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "net: ethernet: stmmac: fix altr_tse_pcs function when using a fixed-link"
Masami Hiramatsu <mhiramat(a)kernel.org>
ia64: kprobes: Fix to pass correct trampoline address to the handler
Masami Hiramatsu <mhiramat(a)kernel.org>
Revert "ia64: kprobes: Use generic kretprobe trampoline handler"
Masami Hiramatsu <mhiramat(a)kernel.org>
Revert "ia64: kprobes: Fix to pass correct trampoline address to the handler"
Michael Ellerman <mpe(a)ellerman.id.au>
powerpc/64s: Unmerge EX_LR and EX_DAR
Nicholas Piggin <npiggin(a)gmail.com>
powerpc/64/interrupt: Temporarily save PPR on stack to fix register corruption due to SLB miss
Eric Dumazet <edumazet(a)google.com>
net/sched: cls_u32: fix netns refcount changes in u32_change()
Lin Ma <linma(a)zju.edu.cn>
hamradio: remove needs_free_netdev to avoid UAF
Lin Ma <linma(a)zju.edu.cn>
hamradio: defer 6pack kfree after unregister_netdev
Willy Tarreau <w(a)1wt.eu>
floppy: disable FDRAWCMD by default
Dafna Hirschfeld <dafna3(a)gmail.com>
media: vicodec: upon release, call m2m release before freeing ctrl handler
-------------
Diffstat:
Makefile | 4 +-
arch/ia64/kernel/kprobes.c | 78 +++++++++++++++++++++-
arch/powerpc/include/asm/exception-64s.h | 37 +++++-----
drivers/block/Kconfig | 16 +++++
drivers/block/floppy.c | 43 +++++++++---
drivers/lightnvm/Kconfig | 2 +-
drivers/media/platform/vicodec/vicodec-core.c | 6 +-
drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c | 8 +++
drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.h | 4 --
.../net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 13 ++--
drivers/net/hamradio/6pack.c | 5 +-
net/sched/cls_u32.c | 18 +++--
12 files changed, 181 insertions(+), 53 deletions(-)
There are 3 places where the cpu and node masks of the top cpuset can
be initialized in the order they are executed:
1) start_kernel -> cpuset_init()
2) start_kernel -> cgroup_init() -> cpuset_bind()
3) kernel_init_freeable() -> do_basic_setup() -> cpuset_init_smp()
The first cpuset_init() call just sets all the bits in the masks.
The second cpuset_bind() call sets cpus_allowed and mems_allowed to the
default v2 values. The third cpuset_init_smp() call sets them back to
v1 values.
For systems with cgroup v2 setup, cpuset_bind() is called once. As a
result, cpu and memory node hot add may fail to update the cpu and node
masks of the top cpuset to include the newly added cpu or node in a
cgroup v2 environment.
For systems with cgroup v1 setup, cpuset_bind() is called again by
rebind_subsystem() when the v1 cpuset filesystem is mounted as shown
in the dmesg log below with an instrumented kernel.
[ 2.609781] cpuset_bind() called - v2 = 1
[ 3.079473] cpuset_init_smp() called
[ 7.103710] cpuset_bind() called - v2 = 0
smp_init() is called after the first two init functions. So we don't
have a complete list of active cpus and memory nodes until later in
cpuset_init_smp() which is the right time to set up effective_cpus
and effective_mems.
To fix this cgroup v2 mask setup problem, the potentially incorrect
cpus_allowed & mems_allowed setting in cpuset_init_smp() are removed.
For cgroup v2 systems, the initial cpuset_bind() call will set the masks
correctly. For cgroup v1 systems, the second call to cpuset_bind()
will do the right setup.
cc: stable(a)vger.kernel.org
Signed-off-by: Waiman Long <longman(a)redhat.com>
Tested-by: Feng Tang <feng.tang(a)intel.com>
Reviewed-by: Michal Koutný <mkoutny(a)suse.com>
---
kernel/cgroup/cpuset.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 9390bfd9f1cd..71a418858a5e 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -3390,8 +3390,11 @@ static struct notifier_block cpuset_track_online_nodes_nb = {
*/
void __init cpuset_init_smp(void)
{
- cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
- top_cpuset.mems_allowed = node_states[N_MEMORY];
+ /*
+ * cpus_allowd/mems_allowed set to v2 values in the initial
+ * cpuset_bind() call will be reset to v1 values in another
+ * cpuset_bind() call when v1 cpuset is mounted.
+ */
top_cpuset.old_mems_allowed = top_cpuset.mems_allowed;
cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask);
--
2.27.0
The patch below does not apply to the 4.19-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable(a)vger.kernel.org>.
thanks,
greg k-h
------------------ original commit in Linus's tree ------------------
From e914d8f00391520ecc4495dd0ca0124538ab7119 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan(a)kernel.org>
Date: Thu, 14 Apr 2022 19:13:46 -0700
Subject: [PATCH] mm: fix unexpected zeroed page mapping with zram swap
Two processes under CLONE_VM cloning, user process can be corrupted by
seeing zeroed page unexpectedly.
CPU A CPU B
do_swap_page do_swap_page
SWP_SYNCHRONOUS_IO path SWP_SYNCHRONOUS_IO path
swap_readpage valid data
swap_slot_free_notify
delete zram entry
swap_readpage zeroed(invalid) data
pte_lock
map the *zero data* to userspace
pte_unlock
pte_lock
if (!pte_same)
goto out_nomap;
pte_unlock
return and next refault will
read zeroed data
The swap_slot_free_notify is bogus for CLONE_VM case since it doesn't
increase the refcount of swap slot at copy_mm so it couldn't catch up
whether it's safe or not to discard data from backing device. In the
case, only the lock it could rely on to synchronize swap slot freeing is
page table lock. Thus, this patch gets rid of the swap_slot_free_notify
function. With this patch, CPU A will see correct data.
CPU A CPU B
do_swap_page do_swap_page
SWP_SYNCHRONOUS_IO path SWP_SYNCHRONOUS_IO path
swap_readpage original data
pte_lock
map the original data
swap_free
swap_range_free
bd_disk->fops->swap_slot_free_notify
swap_readpage read zeroed data
pte_unlock
pte_lock
if (!pte_same)
goto out_nomap;
pte_unlock
return
on next refault will see mapped data by CPU B
The concern of the patch would increase memory consumption since it
could keep wasted memory with compressed form in zram as well as
uncompressed form in address space. However, most of cases of zram uses
no readahead and do_swap_page is followed by swap_free so it will free
the compressed form from in zram quickly.
Link: https://lkml.kernel.org/r/YjTVVxIAsnKAXjTd@google.com
Fixes: 0bcac06f27d7 ("mm, swap: skip swapcache for swapin of synchronous device")
Reported-by: Ivan Babrou <ivan(a)cloudflare.com>
Tested-by: Ivan Babrou <ivan(a)cloudflare.com>
Signed-off-by: Minchan Kim <minchan(a)kernel.org>
Cc: Nitin Gupta <ngupta(a)vflare.org>
Cc: Sergey Senozhatsky <senozhatsky(a)chromium.org>
Cc: Jens Axboe <axboe(a)kernel.dk>
Cc: David Hildenbrand <david(a)redhat.com>
Cc: <stable(a)vger.kernel.org> [4.14+]
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
diff --git a/mm/page_io.c b/mm/page_io.c
index b417f000b49e..89fbf3cae30f 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -51,54 +51,6 @@ void end_swap_bio_write(struct bio *bio)
bio_put(bio);
}
-static void swap_slot_free_notify(struct page *page)
-{
- struct swap_info_struct *sis;
- struct gendisk *disk;
- swp_entry_t entry;
-
- /*
- * There is no guarantee that the page is in swap cache - the software
- * suspend code (at least) uses end_swap_bio_read() against a non-
- * swapcache page. So we must check PG_swapcache before proceeding with
- * this optimization.
- */
- if (unlikely(!PageSwapCache(page)))
- return;
-
- sis = page_swap_info(page);
- if (data_race(!(sis->flags & SWP_BLKDEV)))
- return;
-
- /*
- * The swap subsystem performs lazy swap slot freeing,
- * expecting that the page will be swapped out again.
- * So we can avoid an unnecessary write if the page
- * isn't redirtied.
- * This is good for real swap storage because we can
- * reduce unnecessary I/O and enhance wear-leveling
- * if an SSD is used as the as swap device.
- * But if in-memory swap device (eg zram) is used,
- * this causes a duplicated copy between uncompressed
- * data in VM-owned memory and compressed data in
- * zram-owned memory. So let's free zram-owned memory
- * and make the VM-owned decompressed page *dirty*,
- * so the page should be swapped out somewhere again if
- * we again wish to reclaim it.
- */
- disk = sis->bdev->bd_disk;
- entry.val = page_private(page);
- if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) {
- unsigned long offset;
-
- offset = swp_offset(entry);
-
- SetPageDirty(page);
- disk->fops->swap_slot_free_notify(sis->bdev,
- offset);
- }
-}
-
static void end_swap_bio_read(struct bio *bio)
{
struct page *page = bio_first_page_all(bio);
@@ -114,7 +66,6 @@ static void end_swap_bio_read(struct bio *bio)
}
SetPageUptodate(page);
- swap_slot_free_notify(page);
out:
unlock_page(page);
WRITE_ONCE(bio->bi_private, NULL);
@@ -394,11 +345,6 @@ int swap_readpage(struct page *page, bool synchronous)
if (sis->flags & SWP_SYNCHRONOUS_IO) {
ret = bdev_read_page(sis->bdev, swap_page_sector(page), page);
if (!ret) {
- if (trylock_page(page)) {
- swap_slot_free_notify(page);
- unlock_page(page);
- }
-
count_vm_event(PSWPIN);
goto out;
}