The iscsi target login thread might stuck in following stack:
cat /proc/`pidof iscsi_np`/stack
[<0>] down_interruptible+0x42/0x50
[<0>] iscsit_access_np+0xe3/0x167
[<0>] iscsi_target_locate_portal+0x695/0x8ac
[<0>] __iscsi_target_login_thread+0x855/0xb82
[<0>] iscsi_target_login_thread+0x2f/0x5a
[<0>] kthread+0xfa/0x130
[<0>] ret_from_fork+0x1f/0x30
This could be reproduced by following steps:
1. Initiator A try to login iqn1-tpg1 on port 3260. After finishing
PDU exchange in the login thread and before the negotiation is
finished, at this time the network link is down. In a production
environment, this could happen. I could emulated it by bring
the network card down in the initiator node by ifconfig eth0 down.
(Now A could never finish this login. And tpg->np_login_sem is
hold by it).
2. Initiator B try to login iqn2-tpg1 on port 3260. After finishing
PDU exchange in the login thread. The target expect to process
remaining login PDUs in workqueue context.
3. Initiator A' try to re-login to iqn1-tpg1 on port 3260 from
a new socket. It will wait for tpg->np_login_sem with
np->np_login_timer loaded to wait for at most 15 second.
(Because the lock is held by A. A never gets a change to
release tpg->np_login_sem. so A' should finally get timeout).
4. Before A' got timeout. Initiator B gets negotiation failed and
calls iscsi_target_login_drop()->iscsi_target_login_sess_out().
The np->np_login_timer is canceled. And initiator A' will hang
there forever. Because A' is now in the login thread. All other
login requests could not be serviced.
Fix this by moving iscsi_stop_login_thread_timer() out of
iscsi_target_login_sess_out(). Also remove iscsi_np parameter
from iscsi_target_login_sess_out().
Cc: stable(a)vger.kernel.org
Signed-off-by: Hou Pu <houpu(a)bytedance.com>
---
drivers/target/iscsi/iscsi_target_login.c | 6 +++---
drivers/target/iscsi/iscsi_target_login.h | 3 +--
drivers/target/iscsi/iscsi_target_nego.c | 3 +--
3 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 85748e338858..893d1b406c29 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -1149,7 +1149,7 @@ void iscsit_free_conn(struct iscsi_conn *conn)
}
void iscsi_target_login_sess_out(struct iscsi_conn *conn,
- struct iscsi_np *np, bool zero_tsih, bool new_sess)
+ bool zero_tsih, bool new_sess)
{
if (!new_sess)
goto old_sess_out;
@@ -1167,7 +1167,6 @@ void iscsi_target_login_sess_out(struct iscsi_conn *conn,
conn->sess = NULL;
old_sess_out:
- iscsi_stop_login_thread_timer(np);
/*
* If login negotiation fails check if the Time2Retain timer
* needs to be restarted.
@@ -1407,8 +1406,9 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
new_sess_out:
new_sess = true;
old_sess_out:
+ iscsi_stop_login_thread_timer(np);
tpg_np = conn->tpg_np;
- iscsi_target_login_sess_out(conn, np, zero_tsih, new_sess);
+ iscsi_target_login_sess_out(conn, zero_tsih, new_sess);
new_sess = false;
if (tpg) {
diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h
index 3b8e3639ff5d..fc95e6150253 100644
--- a/drivers/target/iscsi/iscsi_target_login.h
+++ b/drivers/target/iscsi/iscsi_target_login.h
@@ -22,8 +22,7 @@ extern int iscsit_put_login_tx(struct iscsi_conn *, struct iscsi_login *, u32);
extern void iscsit_free_conn(struct iscsi_conn *);
extern int iscsit_start_kthreads(struct iscsi_conn *);
extern void iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8);
-extern void iscsi_target_login_sess_out(struct iscsi_conn *, struct iscsi_np *,
- bool, bool);
+extern void iscsi_target_login_sess_out(struct iscsi_conn *, bool, bool);
extern int iscsi_target_login_thread(void *);
extern void iscsi_handle_login_thread_timeout(struct timer_list *t);
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index 685d771b51d4..e32d93b92742 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -535,12 +535,11 @@ static bool iscsi_target_sk_check_and_clear(struct iscsi_conn *conn, unsigned in
static void iscsi_target_login_drop(struct iscsi_conn *conn, struct iscsi_login *login)
{
- struct iscsi_np *np = login->np;
bool zero_tsih = login->zero_tsih;
iscsi_remove_failed_auth_entry(conn);
iscsi_target_nego_release(conn);
- iscsi_target_login_sess_out(conn, np, zero_tsih, true);
+ iscsi_target_login_sess_out(conn, zero_tsih, true);
}
struct conn_timeout {
--
2.11.0
While moving Android kernels over to use LLVM=1, we observe the failure
when building in a hermetic docker image:
HOSTCC scripts/basic/fixdep
clang: error: unable to execute command: Executable "ld" doesn't exist!
The is because the build of the host utility fixdep builds the fixdep
executable in one step by invoking the compiler as the driver, rather
than individual compile then link steps.
Clang when configured from source defaults to use the system's linker,
and not LLVM's own LLD, unless the CMake config
-DCLANG_DEFAULT_LINKER='lld' is set when configuring a build of clang
itself.
Don't rely on the compiler's implicit default linker; be explicit.
Cc: stable(a)vger.kernel.org
Fixes: commit a0d1c951ef08 ("kbuild: support LLVM=1 to switch the default tools to Clang/LLVM")
Reported-by: Matthias Maennich <maennich(a)google.com>
Signed-off-by: Nick Desaulniers <ndesaulniers(a)google.com>
---
Makefile | 1 +
1 file changed, 1 insertion(+)
diff --git a/Makefile b/Makefile
index def590b743a9..b4e93b228a26 100644
--- a/Makefile
+++ b/Makefile
@@ -436,6 +436,7 @@ OBJDUMP = llvm-objdump
READELF = llvm-readelf
OBJSIZE = llvm-size
STRIP = llvm-strip
+KBUILD_HOSTLDFLAGS += -fuse-ld=lld
else
CC = $(CROSS_COMPILE)gcc
LD = $(CROSS_COMPILE)ld
--
2.28.0.297.g1956fa8f8d-goog
On 2 Sep 2020, at 12:58, Ralph Campbell wrote:
> A migrating transparent huge page has to already be unmapped. Otherwise,
> the page could be modified while it is being copied to a new page and
> data could be lost. The function __split_huge_pmd() checks for a PMD
> migration entry before calling __split_huge_pmd_locked() leading one to
> think that __split_huge_pmd_locked() can handle splitting a migrating PMD.
> However, the code always increments the page->_mapcount and adjusts the
> memory control group accounting assuming the page is mapped.
> Also, if the PMD entry is a migration PMD entry, the call to
> is_huge_zero_pmd(*pmd) is incorrect because it calls pmd_pfn(pmd) instead
> of migration_entry_to_pfn(pmd_to_swp_entry(pmd)).
> Fix these problems by checking for a PMD migration entry.
>
> Signed-off-by: Ralph Campbell <rcampbell(a)nvidia.com>
Thanks for the fix. You can add Reviewed-by: Zi Yan <ziy(a)nvidia.com>
I think you also want to add the Fixes tag and cc stable.
Fixes 84c3fc4e9c56 (“mm: thp: check pmd migration entry in common path”)
cc: stable(a)vger.kernel.org # 4.14+
> ---
> mm/huge_memory.c | 42 +++++++++++++++++++++++-------------------
> 1 file changed, 23 insertions(+), 19 deletions(-)
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 2a468a4acb0a..606d712d9505 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2023,7 +2023,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
> put_page(page);
> add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
> return;
> - } else if (is_huge_zero_pmd(*pmd)) {
> + } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
> /*
> * FIXME: Do we want to invalidate secondary mmu by calling
> * mmu_notifier_invalidate_range() see comments below inside
> @@ -2117,30 +2117,34 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
> pte = pte_offset_map(&_pmd, addr);
> BUG_ON(!pte_none(*pte));
> set_pte_at(mm, addr, pte, entry);
> - atomic_inc(&page[i]._mapcount);
> - pte_unmap(pte);
> - }
> -
> - /*
> - * Set PG_double_map before dropping compound_mapcount to avoid
> - * false-negative page_mapped().
> - */
> - if (compound_mapcount(page) > 1 && !TestSetPageDoubleMap(page)) {
> - for (i = 0; i < HPAGE_PMD_NR; i++)
> + if (!pmd_migration)
> atomic_inc(&page[i]._mapcount);
> + pte_unmap(pte);
> }
>
> - lock_page_memcg(page);
> - if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
> - /* Last compound_mapcount is gone. */
> - __dec_lruvec_page_state(page, NR_ANON_THPS);
> - if (TestClearPageDoubleMap(page)) {
> - /* No need in mapcount reference anymore */
> + if (!pmd_migration) {
> + /*
> + * Set PG_double_map before dropping compound_mapcount to avoid
> + * false-negative page_mapped().
> + */
> + if (compound_mapcount(page) > 1 &&
> + !TestSetPageDoubleMap(page)) {
> for (i = 0; i < HPAGE_PMD_NR; i++)
> - atomic_dec(&page[i]._mapcount);
> + atomic_inc(&page[i]._mapcount);
> + }
> +
> + lock_page_memcg(page);
> + if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
> + /* Last compound_mapcount is gone. */
> + __dec_lruvec_page_state(page, NR_ANON_THPS);
> + if (TestClearPageDoubleMap(page)) {
> + /* No need in mapcount reference anymore */
> + for (i = 0; i < HPAGE_PMD_NR; i++)
> + atomic_dec(&page[i]._mapcount);
> + }
> }
> + unlock_page_memcg(page);
> }
> - unlock_page_memcg(page);
>
> smp_wmb(); /* make pte visible before pmd */
> pmd_populate(mm, pmd, pgtable);
> --
> 2.20.1
—
Best Regards,
Yan Zi
From: Nadav Amit <namit(a)vmware.com>
The __force_order logic seems to be inverted. __force_order is
supposedly used to manipulate the compiler to use its memory
dependencies analysis to enforce orders between CR writes and reads.
Therefore, the memory should behave as a "CR": when the CR is read, the
memory should be "read" by the inline assembly, and __force_order should
be an output. When the CR is written, the memory should be "written".
This change should allow to remove the "volatile" qualifier from CR
reads at a later patch.
While at it, remove the extra new-line from the inline assembly, as it
only confuses GCC when it estimates the cost of the inline assembly.
Cc: Thomas Gleixner <tglx(a)linutronix.de>
Cc: Ingo Molnar <mingo(a)redhat.com>
Cc: Borislav Petkov <bp(a)alien8.de>
Cc: x86(a)kernel.org
Cc: "H. Peter Anvin" <hpa(a)zytor.com>
Cc: Peter Zijlstra <peterz(a)infradead.org>
Cc: Andy Lutomirski <luto(a)kernel.org>
Cc: Kees Cook <keescook(a)chromium.org>
Cc: stable(a)vger.kernel.org
Signed-off-by: Nadav Amit <namit(a)vmware.com>
---
Unless I misunderstand the logic, __force_order should also be used by
rdpkru() and wrpkru() which do not have dependency on __force_order. I
also did not understand why native_write_cr0() has R/W dependency on
__force_order, and why native_write_cr4() no longer has any dependency
on __force_order.
---
arch/x86/include/asm/special_insns.h | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 5999b0b3dd4a..dff5e5b01a3c 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -24,32 +24,32 @@ void native_write_cr0(unsigned long val);
static inline unsigned long native_read_cr0(void)
{
unsigned long val;
- asm volatile("mov %%cr0,%0\n\t" : "=r" (val), "=m" (__force_order));
+ asm volatile("mov %%cr0,%0" : "=r" (val) : "m" (__force_order));
return val;
}
static __always_inline unsigned long native_read_cr2(void)
{
unsigned long val;
- asm volatile("mov %%cr2,%0\n\t" : "=r" (val), "=m" (__force_order));
+ asm volatile("mov %%cr2,%0" : "=r" (val) : "m" (__force_order));
return val;
}
static __always_inline void native_write_cr2(unsigned long val)
{
- asm volatile("mov %0,%%cr2": : "r" (val), "m" (__force_order));
+ asm volatile("mov %1,%%cr2" : "=m" (__force_order) : "r" (val));
}
static inline unsigned long __native_read_cr3(void)
{
unsigned long val;
- asm volatile("mov %%cr3,%0\n\t" : "=r" (val), "=m" (__force_order));
+ asm volatile("mov %%cr3,%0" : "=r" (val) : "m" (__force_order));
return val;
}
static inline void native_write_cr3(unsigned long val)
{
- asm volatile("mov %0,%%cr3": : "r" (val), "m" (__force_order));
+ asm volatile("mov %1,%%cr3" : "=m" (__force_order) : "r" (val));
}
static inline unsigned long native_read_cr4(void)
@@ -64,10 +64,10 @@ static inline unsigned long native_read_cr4(void)
asm volatile("1: mov %%cr4, %0\n"
"2:\n"
_ASM_EXTABLE(1b, 2b)
- : "=r" (val), "=m" (__force_order) : "0" (0));
+ : "=r" (val) : "m" (__force_order), "0" (0));
#else
/* CR4 always exists on x86_64. */
- asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
+ asm volatile("mov %%cr4,%0" : "=r" (val) : "m" (__force_order));
#endif
return val;
}
--
2.25.1
On Wed, Sep 2, 2020 at 4:49 PM Jens Axboe <axboe(a)kernel.dk> wrote:
> On 9/2/20 3:59 AM, Jiufei Xue wrote:
> > While io_sqe_file_register() failed in __io_sqe_files_update(),
> > table->files[i] still point to the original file which may freed
> > soon, and that will trigger use-after-free problems.
>
> Applied, thanks.
Shouldn't this have a CC stable tag and a fixes tag on it? AFAICS this
is a fix for a UAF that exists since
f3bd9dae3708a0ff6b067e766073ffeb853301f9 ("io_uring: fix memleak in
__io_sqe_files_update()"), and that commit was marked for stable
backporting back to when c3a31e605620 landed, and that commit was
introduced in Linux 5.5.
You can see at <https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/fs/io…>
that this security vulnerability currently exists in the stable 5.8
branch.