This is a note to let you know that I've just added the patch titled
misc: panel: properly restore atomic counter on error path
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
misc-panel-properly-restore-atomic-counter-on-error-path.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 7ddfddb3fc0afb2aa36e89b554f06c901c5fa36f Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w(a)1wt.eu>
Date: Thu, 7 Sep 2017 11:56:40 +0200
Subject: misc: panel: properly restore atomic counter on error path
commit 93dc1774d2a4c7a298d5cdf78cc8acdcb7b1428d upstream.
Commit f4757af ("staging: panel: Fix single-open policy race condition")
introduced in 3.19-rc1 attempted to fix a race condition on the open, but
failed to properly do it and used to exit without restoring the semaphore.
This results in -EBUSY being returned after the first open error until
the module is reloaded or the system restarted (ie: consecutive to a
dual open resulting in -EBUSY or to a permission error).
Fixes: f4757af85 # 3.19-rc1
Cc: Mariusz Gorski <marius.gorski(a)gmail.com>
Signed-off-by: Willy Tarreau <w(a)1wt.eu>
[wt: driver is in misc/panel in 4.9]
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/misc/panel.c | 23 +++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)
--- a/drivers/misc/panel.c
+++ b/drivers/misc/panel.c
@@ -1423,17 +1423,25 @@ static ssize_t lcd_write(struct file *fi
static int lcd_open(struct inode *inode, struct file *file)
{
+ int ret;
+
+ ret = -EBUSY;
if (!atomic_dec_and_test(&lcd_available))
- return -EBUSY; /* open only once at a time */
+ goto fail; /* open only once at a time */
+ ret = -EPERM;
if (file->f_mode & FMODE_READ) /* device is write-only */
- return -EPERM;
+ goto fail;
if (lcd.must_clear) {
lcd_clear_display();
lcd.must_clear = false;
}
return nonseekable_open(inode, file);
+
+ fail:
+ atomic_inc(&lcd_available);
+ return ret;
}
static int lcd_release(struct inode *inode, struct file *file)
@@ -1696,14 +1704,21 @@ static ssize_t keypad_read(struct file *
static int keypad_open(struct inode *inode, struct file *file)
{
+ int ret;
+
+ ret = -EBUSY;
if (!atomic_dec_and_test(&keypad_available))
- return -EBUSY; /* open only once at a time */
+ goto fail; /* open only once at a time */
+ ret = -EPERM;
if (file->f_mode & FMODE_WRITE) /* device is read-only */
- return -EPERM;
+ goto fail;
keypad_buflen = 0; /* flush the buffer on opening */
return 0;
+ fail:
+ atomic_inc(&keypad_available);
+ return ret;
}
static int keypad_release(struct inode *inode, struct file *file)
Patches currently in stable-queue which might be from w(a)1wt.eu are
queue-4.9/misc-panel-properly-restore-atomic-counter-on-error-path.patch
Hi Greg,
as promised a month ago, here finally come the tested backports of this
fix for 4.9 and 4.4 respectively. This backport is not needed for 3.18
as the regression was introduced in 3.19-rc1.
Cheers,
Willy
This is a note to let you know that I've just added the patch titled
misc: panel: properly restore atomic counter on error path
to the 4.4-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
misc-panel-properly-restore-atomic-counter-on-error-path.patch
and it can be found in the queue-4.4 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 29e391ca59d60b7902b09ed11edbc5c95c372f09 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w(a)1wt.eu>
Date: Thu, 7 Sep 2017 11:56:40 +0200
Subject: misc: panel: properly restore atomic counter on error path
commit 93dc1774d2a4c7a298d5cdf78cc8acdcb7b1428d upstream.
Commit f4757af ("staging: panel: Fix single-open policy race condition")
introduced in 3.19-rc1 attempted to fix a race condition on the open, but
failed to properly do it and used to exit without restoring the semaphore.
This results in -EBUSY being returned after the first open error until
the module is reloaded or the system restarted (ie: consecutive to a
dual open resulting in -EBUSY or to a permission error).
Fixes: f4757af85 # 3.19-rc1
Cc: Mariusz Gorski <marius.gorski(a)gmail.com>
Signed-off-by: Willy Tarreau <w(a)1wt.eu>
[wt: driver is in staging/panel in 4.4]
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/staging/panel/panel.c | 23 +++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)
--- a/drivers/staging/panel/panel.c
+++ b/drivers/staging/panel/panel.c
@@ -1431,17 +1431,25 @@ static ssize_t lcd_write(struct file *fi
static int lcd_open(struct inode *inode, struct file *file)
{
+ int ret;
+
+ ret = -EBUSY;
if (!atomic_dec_and_test(&lcd_available))
- return -EBUSY; /* open only once at a time */
+ goto fail; /* open only once at a time */
+ ret = -EPERM;
if (file->f_mode & FMODE_READ) /* device is write-only */
- return -EPERM;
+ goto fail;
if (lcd.must_clear) {
lcd_clear_display();
lcd.must_clear = false;
}
return nonseekable_open(inode, file);
+
+ fail:
+ atomic_inc(&lcd_available);
+ return ret;
}
static int lcd_release(struct inode *inode, struct file *file)
@@ -1704,14 +1712,21 @@ static ssize_t keypad_read(struct file *
static int keypad_open(struct inode *inode, struct file *file)
{
+ int ret;
+
+ ret = -EBUSY;
if (!atomic_dec_and_test(&keypad_available))
- return -EBUSY; /* open only once at a time */
+ goto fail; /* open only once at a time */
+ ret = -EPERM;
if (file->f_mode & FMODE_WRITE) /* device is read-only */
- return -EPERM;
+ goto fail;
keypad_buflen = 0; /* flush the buffer on opening */
return 0;
+ fail:
+ atomic_inc(&keypad_available);
+ return ret;
}
static int keypad_release(struct inode *inode, struct file *file)
Patches currently in stable-queue which might be from w(a)1wt.eu are
queue-4.4/misc-panel-properly-restore-atomic-counter-on-error-path.patch
This is the start of the stable review cycle for the 3.18.82 release.
There are 20 patches in this series, all will be posted as a response
to this one. If anyone has any issues with these being applied, please
let me know.
Responses should be made by Sat Nov 18 17:27:03 UTC 2017.
Anything received after that time might be too late.
The whole patch series can be found in one patch at:
kernel.org/pub/linux/kernel/v3.x/stable-review/patch-3.18.82-rc1.gz
or in the git tree and branch at:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git linux-3.18.y
and the diffstat can be found below.
thanks,
greg k-h
-------------
Pseudo-Shortlog of commits:
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Linux 3.18.82-rc1
Bart Van Assche <bart.vanassche(a)sandisk.com>
target/iscsi: Fix iSCSI task reassignment handling
Bilal Amarni <bilal.amarni(a)gmail.com>
security/keys: add CONFIG_KEYS_COMPAT to Kconfig
Xin Long <lucien.xin(a)gmail.com>
ip6_gre: only increase err_count for some certain type icmpv6 in ip6gre_err
Xin Long <lucien.xin(a)gmail.com>
ipip: only increase err_count for some certain type icmp in ipip_err
Eric Dumazet <edumazet(a)google.com>
ipv6: flowlabel: do not leave opt->tot_len with garbage
Xin Long <lucien.xin(a)gmail.com>
sctp: reset owner sk for data chunks on out queues when migrating a sock
Julien Gomes <julien(a)arista.com>
tun: allow positive return values on dev_get_valid_name() call
Andrei Vagin <avagin(a)openvz.org>
net/unix: don't show information about sockets from other namespaces
Xin Long <lucien.xin(a)gmail.com>
sctp: add the missing sock_owned_by_user check in sctp_icmp_redirect
Cong Wang <xiyou.wangcong(a)gmail.com>
tun: call dev_get_valid_name() before register_netdevice()
Guillaume Nault <g.nault(a)alphalink.fr>
l2tp: check ps->sock before running pppol2tp_session_ioctl()
Eric Dumazet <edumazet(a)google.com>
tcp: fix tcp_mtu_probe() vs highest_sack
Craig Gallek <kraig(a)google.com>
tun/tap: sanitize TUNSETSNDBUF input
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "ARM: dts: imx53-qsb-common: fix FEC pinmux config"
Dmitry Torokhov <dmitry.torokhov(a)gmail.com>
Input: ims-psu - check if CDC union descriptor is sane
Alan Stern <stern(a)rowland.harvard.edu>
usb: usbtest: fix NULL pointer dereference
Johannes Berg <johannes.berg(a)intel.com>
mac80211: don't compare TKIP TX MIC key in reinstall prevention
Jason A. Donenfeld <Jason(a)zx2c4.com>
mac80211: use constant time comparison with keys
Johannes Berg <johannes.berg(a)intel.com>
mac80211: accept key reinstall without changing anything
Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Revert "ceph: unlock dangling spinlock in try_flush_caps()"
-------------
Diffstat:
Makefile | 4 +--
arch/arm/boot/dts/imx53-qsb-common.dtsi | 20 +++++------
arch/powerpc/Kconfig | 5 ---
arch/s390/Kconfig | 3 --
arch/sparc/Kconfig | 3 --
arch/x86/Kconfig | 4 ---
drivers/input/misc/ims-pcu.c | 16 +++++++--
drivers/net/macvtap.c | 2 ++
drivers/net/tun.c | 7 ++++
drivers/target/iscsi/iscsi_target.c | 19 ++++-------
drivers/usb/misc/usbtest.c | 5 +--
fs/ceph/caps.c | 5 +--
include/linux/netdevice.h | 3 ++
include/net/tcp.h | 6 ++--
include/target/target_core_base.h | 1 +
net/core/dev.c | 6 ++--
net/ipv4/ipip.c | 59 +++++++++++++++++++++++----------
net/ipv4/tcp_output.c | 3 +-
net/ipv6/ip6_flowlabel.c | 1 +
net/ipv6/ip6_gre.c | 11 +++---
net/ipv6/ip6_output.c | 4 +--
net/l2tp/l2tp_ppp.c | 3 ++
net/mac80211/key.c | 53 +++++++++++++++++++++++++++--
net/sctp/input.c | 2 +-
net/sctp/socket.c | 32 ++++++++++++++++++
net/unix/diag.c | 2 ++
security/keys/Kconfig | 4 +++
27 files changed, 202 insertions(+), 81 deletions(-)
This is a note to let you know that I've just added the patch titled
security/keys: add CONFIG_KEYS_COMPAT to Kconfig
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
security-keys-add-config_keys_compat-to-kconfig.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 47b2c3fff4932e6fc17ce13d51a43c6969714e20 Mon Sep 17 00:00:00 2001
From: Bilal Amarni <bilal.amarni(a)gmail.com>
Date: Thu, 8 Jun 2017 14:47:26 +0100
Subject: security/keys: add CONFIG_KEYS_COMPAT to Kconfig
From: Bilal Amarni <bilal.amarni(a)gmail.com>
commit 47b2c3fff4932e6fc17ce13d51a43c6969714e20 upstream.
CONFIG_KEYS_COMPAT is defined in arch-specific Kconfigs and is missing for
several 64-bit architectures : mips, parisc, tile.
At the moment and for those architectures, calling in 32-bit userspace the
keyctl syscall would return an ENOSYS error.
This patch moves the CONFIG_KEYS_COMPAT option to security/keys/Kconfig, to
make sure the compatibility wrapper is registered by default for any 64-bit
architecture as long as it is configured with CONFIG_COMPAT.
[DH: Modified to remove arm64 compat enablement also as requested by Eric
Biggers]
Signed-off-by: Bilal Amarni <bilal.amarni(a)gmail.com>
Signed-off-by: David Howells <dhowells(a)redhat.com>
Reviewed-by: Arnd Bergmann <arnd(a)arndb.de>
cc: Eric Biggers <ebiggers3(a)gmail.com>
Signed-off-by: James Morris <james.l.morris(a)oracle.com>
Cc: James Cowgill <james.cowgill(a)mips.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
arch/powerpc/Kconfig | 5 -----
arch/s390/Kconfig | 3 ---
arch/sparc/Kconfig | 3 ---
arch/x86/Kconfig | 4 ----
security/keys/Kconfig | 4 ++++
5 files changed, 4 insertions(+), 15 deletions(-)
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1087,11 +1087,6 @@ source "arch/powerpc/Kconfig.debug"
source "security/Kconfig"
-config KEYS_COMPAT
- bool
- depends on COMPAT && KEYS
- default y
-
source "crypto/Kconfig"
config PPC_LIB_RHEAP
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -359,9 +359,6 @@ config COMPAT
config SYSVIPC_COMPAT
def_bool y if COMPAT && SYSVIPC
-config KEYS_COMPAT
- def_bool y if COMPAT && KEYS
-
config SMP
def_bool y
prompt "Symmetric multi-processing support"
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -568,9 +568,6 @@ config SYSVIPC_COMPAT
depends on COMPAT && SYSVIPC
default y
-config KEYS_COMPAT
- def_bool y if COMPAT && KEYS
-
endmenu
source "net/Kconfig"
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2732,10 +2732,6 @@ config COMPAT_FOR_U64_ALIGNMENT
config SYSVIPC_COMPAT
def_bool y
depends on SYSVIPC
-
-config KEYS_COMPAT
- def_bool y
- depends on KEYS
endif
endmenu
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig
@@ -20,6 +20,10 @@ config KEYS
If you are unsure as to whether this is required, answer N.
+config KEYS_COMPAT
+ def_bool y
+ depends on COMPAT && KEYS
+
config PERSISTENT_KEYRINGS
bool "Enable register of persistent per-UID keyrings"
depends on KEYS
Patches currently in stable-queue which might be from bilal.amarni(a)gmail.com are
queue-4.9/security-keys-add-config_keys_compat-to-kconfig.patch
This is a note to let you know that I've just added the patch titled
qla2xxx: Fix incorrect tcm_qla2xxx_free_cmd use during TMR ABORT (v2)
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
qla2xxx-fix-incorrect-tcm_qla2xxx_free_cmd-use-during-tmr-abort-v2.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 6bcbb3174caa5f1ccc894f8ae077631659d5a629 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab(a)linux-iscsi.org>
Date: Fri, 30 Jun 2017 00:08:13 -0700
Subject: qla2xxx: Fix incorrect tcm_qla2xxx_free_cmd use during TMR ABORT (v2)
From: Nicholas Bellinger <nab(a)linux-iscsi.org>
commit 6bcbb3174caa5f1ccc894f8ae077631659d5a629 upstream.
This patch drops two incorrect usages of tcm_qla2xxx_free_cmd()
during TMR ABORT within tcm_qla2xxx_handle_data_work() and
tcm_qla2xxx_aborted_task(), which where attempting to dispatch
into workqueue context to do tcm_qla2xxx_complete_free() and
subsequently invoke transport_generic_free_cmd().
This is incorrect because during TMR ABORT target-core will
drop the outstanding se_cmd->cmd_kref references once it has
quiesced the se_cmd via transport_wait_for_tasks(), and in
the case of qla2xxx it should not attempt to do it's own
transport_generic_free_cmd() once the abort has occured.
As reported by Pascal, this was originally manifesting as a
BUG_ON(cmd->cmd_in_wq) in qlt_free_cmd() during TMR ABORT,
with a LIO backend that had sufficently high enough WRITE
latency to trigger a host side TMR ABORT_TASK.
(v2: Drop the qla_tgt_cmd->write_pending_abort_comp changes,
as they will be addressed in a seperate series)
Reported-by: Pascal de Bruijn <p.debruijn(a)unilogic.nl>
Tested-by: Pascal de Bruijn <p.debruijn(a)unilogic.nl>
Cc: Pascal de Bruijn <p.debruijn(a)unilogic.nl>
Reported-by: Lukasz Engel <lukasz.engel(a)softax.pl>
Cc: Lukasz Engel <lukasz.engel(a)softax.pl>
Acked-by: Himanshu Madhani <himanshu.madhani(a)cavium.com>
Cc: Quinn Tran <quinn.tran(a)cavium.com>
Signed-off-by: Nicholas Bellinger <nab(a)linux-iscsi.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
drivers/scsi/qla2xxx/tcm_qla2xxx.c | 33 ---------------------------------
1 file changed, 33 deletions(-)
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -484,7 +484,6 @@ static int tcm_qla2xxx_handle_cmd(scsi_q
static void tcm_qla2xxx_handle_data_work(struct work_struct *work)
{
struct qla_tgt_cmd *cmd = container_of(work, struct qla_tgt_cmd, work);
- unsigned long flags;
/*
* Ensure that the complete FCP WRITE payload has been received.
@@ -492,17 +491,6 @@ static void tcm_qla2xxx_handle_data_work
*/
cmd->cmd_in_wq = 0;
- spin_lock_irqsave(&cmd->cmd_lock, flags);
- cmd->cmd_flags |= CMD_FLAG_DATA_WORK;
- if (cmd->aborted) {
- cmd->cmd_flags |= CMD_FLAG_DATA_WORK_FREE;
- spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-
- tcm_qla2xxx_free_cmd(cmd);
- return;
- }
- spin_unlock_irqrestore(&cmd->cmd_lock, flags);
-
cmd->vha->tgt_counters.qla_core_ret_ctio++;
if (!cmd->write_data_transferred) {
/*
@@ -682,34 +670,13 @@ static void tcm_qla2xxx_queue_tm_rsp(str
qlt_xmit_tm_rsp(mcmd);
}
-
-#define DATA_WORK_NOT_FREE(_flags) \
- (( _flags & (CMD_FLAG_DATA_WORK|CMD_FLAG_DATA_WORK_FREE)) == \
- CMD_FLAG_DATA_WORK)
static void tcm_qla2xxx_aborted_task(struct se_cmd *se_cmd)
{
struct qla_tgt_cmd *cmd = container_of(se_cmd,
struct qla_tgt_cmd, se_cmd);
- unsigned long flags;
if (qlt_abort_cmd(cmd))
return;
-
- spin_lock_irqsave(&cmd->cmd_lock, flags);
- if ((cmd->state == QLA_TGT_STATE_NEW)||
- ((cmd->state == QLA_TGT_STATE_DATA_IN) &&
- DATA_WORK_NOT_FREE(cmd->cmd_flags)) ) {
-
- cmd->cmd_flags |= CMD_FLAG_DATA_WORK_FREE;
- spin_unlock_irqrestore(&cmd->cmd_lock, flags);
- /* Cmd have not reached firmware.
- * Use this trigger to free it. */
- tcm_qla2xxx_free_cmd(cmd);
- return;
- }
- spin_unlock_irqrestore(&cmd->cmd_lock, flags);
- return;
-
}
static void tcm_qla2xxx_clear_sess_lookup(struct tcm_qla2xxx_lport *,
Patches currently in stable-queue which might be from nab(a)linux-iscsi.org are
queue-4.9/qla2xxx-fix-incorrect-tcm_qla2xxx_free_cmd-use-during-tmr-abort-v2.patch
queue-4.9/target-iscsi-fix-iscsi-task-reassignment-handling.patch
This is a note to let you know that I've just added the patch titled
netfilter: nat: Revert "netfilter: nat: convert nat bysrc hash to rhashtable"
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
netfilter-nat-revert-netfilter-nat-convert-nat-bysrc-hash-to-rhashtable.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From e1bf1687740ce1a3598a1c5e452b852ff2190682 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw(a)strlen.de>
Date: Wed, 6 Sep 2017 14:39:51 +0200
Subject: netfilter: nat: Revert "netfilter: nat: convert nat bysrc hash to rhashtable"
From: Florian Westphal <fw(a)strlen.de>
commit e1bf1687740ce1a3598a1c5e452b852ff2190682 upstream.
This reverts commit 870190a9ec9075205c0fa795a09fa931694a3ff1.
It was not a good idea. The custom hash table was a much better
fit for this purpose.
A fast lookup is not essential, in fact for most cases there is no lookup
at all because original tuple is not taken and can be used as-is.
What needs to be fast is insertion and deletion.
rhlist removal however requires a rhlist walk.
We can have thousands of entries in such a list if source port/addresses
are reused for multiple flows, if this happens removal requests are so
expensive that deletions of a few thousand flows can take several
seconds(!).
The advantages that we got from rhashtable are:
1) table auto-sizing
2) multiple locks
1) would be nice to have, but it is not essential as we have at
most one lookup per new flow, so even a million flows in the bysource
table are not a problem compared to current deletion cost.
2) is easy to add to custom hash table.
I tried to add hlist_node to rhlist to speed up rhltable_remove but this
isn't doable without changing semantics. rhltable_remove_fast will
check that the to-be-deleted object is part of the table and that
requires a list walk that we want to avoid.
Furthermore, using hlist_node increases size of struct rhlist_head, which
in turn increases nf_conn size.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=196821
Reported-by: Ivan Babrou <ibobrik(a)gmail.com>
Signed-off-by: Florian Westphal <fw(a)strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
include/net/netfilter/nf_conntrack.h | 3
include/net/netfilter/nf_nat.h | 1
net/netfilter/nf_nat_core.c | 132 ++++++++++++++---------------------
3 files changed, 56 insertions(+), 80 deletions(-)
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -17,7 +17,6 @@
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/atomic.h>
-#include <linux/rhashtable.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
#include <linux/netfilter/nf_conntrack_dccp.h>
@@ -101,7 +100,7 @@ struct nf_conn {
possible_net_t ct_net;
#if IS_ENABLED(CONFIG_NF_NAT)
- struct rhlist_head nat_bysource;
+ struct hlist_node nat_bysource;
#endif
/* all members below initialized via memset */
u8 __nfct_init_offset[0];
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -1,6 +1,5 @@
#ifndef _NF_NAT_H
#define _NF_NAT_H
-#include <linux/rhashtable.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/nf_nat.h>
#include <net/netfilter/nf_conntrack_tuple.h>
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -30,19 +30,17 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_nat.h>
+static DEFINE_SPINLOCK(nf_nat_lock);
+
static DEFINE_MUTEX(nf_nat_proto_mutex);
static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
__read_mostly;
static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
__read_mostly;
-struct nf_nat_conn_key {
- const struct net *net;
- const struct nf_conntrack_tuple *tuple;
- const struct nf_conntrack_zone *zone;
-};
-
-static struct rhltable nf_nat_bysource_table;
+static struct hlist_head *nf_nat_bysource __read_mostly;
+static unsigned int nf_nat_htable_size __read_mostly;
+static unsigned int nf_nat_hash_rnd __read_mostly;
inline const struct nf_nat_l3proto *
__nf_nat_l3proto_find(u8 family)
@@ -121,17 +119,19 @@ int nf_xfrm_me_harder(struct net *net, s
EXPORT_SYMBOL(nf_xfrm_me_harder);
#endif /* CONFIG_XFRM */
-static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed)
+/* We keep an extra hash for each conntrack, for fast searching. */
+static inline unsigned int
+hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
{
- const struct nf_conntrack_tuple *t;
- const struct nf_conn *ct = data;
+ unsigned int hash;
+
+ get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
- t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
/* Original src, to ensure we map it consistently if poss. */
+ hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
+ tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
- seed ^= net_hash_mix(nf_ct_net(ct));
- return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32),
- t->dst.protonum ^ seed);
+ return reciprocal_scale(hash, nf_nat_htable_size);
}
/* Is this tuple already taken? (not by us) */
@@ -187,28 +187,6 @@ same_src(const struct nf_conn *ct,
t->src.u.all == tuple->src.u.all);
}
-static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg,
- const void *obj)
-{
- const struct nf_nat_conn_key *key = arg->key;
- const struct nf_conn *ct = obj;
-
- if (!same_src(ct, key->tuple) ||
- !net_eq(nf_ct_net(ct), key->net) ||
- !nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL))
- return 1;
-
- return 0;
-}
-
-static struct rhashtable_params nf_nat_bysource_params = {
- .head_offset = offsetof(struct nf_conn, nat_bysource),
- .obj_hashfn = nf_nat_bysource_hash,
- .obj_cmpfn = nf_nat_bysource_cmp,
- .nelem_hint = 256,
- .min_size = 1024,
-};
-
/* Only called for SRC manip */
static int
find_appropriate_src(struct net *net,
@@ -219,26 +197,22 @@ find_appropriate_src(struct net *net,
struct nf_conntrack_tuple *result,
const struct nf_nat_range *range)
{
+ unsigned int h = hash_by_src(net, tuple);
const struct nf_conn *ct;
- struct nf_nat_conn_key key = {
- .net = net,
- .tuple = tuple,
- .zone = zone
- };
- struct rhlist_head *hl, *h;
-
- hl = rhltable_lookup(&nf_nat_bysource_table, &key,
- nf_nat_bysource_params);
- rhl_for_each_entry_rcu(ct, h, hl, nat_bysource) {
- nf_ct_invert_tuplepr(result,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- result->dst = tuple->dst;
+ hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
+ if (same_src(ct, tuple) &&
+ net_eq(net, nf_ct_net(ct)) &&
+ nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
+ /* Copy source part from reply tuple. */
+ nf_ct_invert_tuplepr(result,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ result->dst = tuple->dst;
- if (in_range(l3proto, l4proto, result, range))
- return 1;
+ if (in_range(l3proto, l4proto, result, range))
+ return 1;
+ }
}
-
return 0;
}
@@ -411,6 +385,7 @@ nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conntrack_tuple curr_tuple, new_tuple;
struct nf_conn_nat *nat;
@@ -452,19 +427,16 @@ nf_nat_setup_info(struct nf_conn *ct,
}
if (maniptype == NF_NAT_MANIP_SRC) {
- struct nf_nat_conn_key key = {
- .net = nf_ct_net(ct),
- .tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- .zone = nf_ct_zone(ct),
- };
- int err;
-
- err = rhltable_insert_key(&nf_nat_bysource_table,
- &key,
- &ct->nat_bysource,
- nf_nat_bysource_params);
- if (err)
- return NF_DROP;
+ unsigned int srchash;
+
+ srchash = hash_by_src(net,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ spin_lock_bh(&nf_nat_lock);
+ /* nf_conntrack_alter_reply might re-allocate extension aera */
+ nat = nfct_nat(ct);
+ hlist_add_head_rcu(&ct->nat_bysource,
+ &nf_nat_bysource[srchash]);
+ spin_unlock_bh(&nf_nat_lock);
}
/* It's done. */
@@ -572,9 +544,10 @@ static int nf_nat_proto_clean(struct nf_
* Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
* will delete entry from already-freed table.
*/
+ spin_lock_bh(&nf_nat_lock);
+ hlist_del_rcu(&ct->nat_bysource);
ct->status &= ~IPS_NAT_DONE_MASK;
- rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
- nf_nat_bysource_params);
+ spin_unlock_bh(&nf_nat_lock);
/* don't delete conntrack. Although that would make things a lot
* simpler, we'd end up flushing all conntracks on nat rmmod.
@@ -699,9 +672,11 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregis
/* No one using conntrack by the time this called. */
static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
- if (ct->status & IPS_SRC_NAT_DONE)
- rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
- nf_nat_bysource_params);
+ if (ct->status & IPS_SRC_NAT_DONE) {
+ spin_lock_bh(&nf_nat_lock);
+ hlist_del_rcu(&ct->nat_bysource);
+ spin_unlock_bh(&nf_nat_lock);
+ }
}
static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -836,13 +811,16 @@ static int __init nf_nat_init(void)
{
int ret;
- ret = rhltable_init(&nf_nat_bysource_table, &nf_nat_bysource_params);
- if (ret)
- return ret;
+ /* Leave them the same for the moment. */
+ nf_nat_htable_size = nf_conntrack_htable_size;
+
+ nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
+ if (!nf_nat_bysource)
+ return -ENOMEM;
ret = nf_ct_extend_register(&nat_extend);
if (ret < 0) {
- rhltable_destroy(&nf_nat_bysource_table);
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
return ret;
}
@@ -866,7 +844,7 @@ static int __init nf_nat_init(void)
return 0;
cleanup_extend:
- rhltable_destroy(&nf_nat_bysource_table);
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
nf_ct_extend_unregister(&nat_extend);
return ret;
}
@@ -886,8 +864,8 @@ static void __exit nf_nat_cleanup(void)
for (i = 0; i < NFPROTO_NUMPROTO; i++)
kfree(nf_nat_l4protos[i]);
-
- rhltable_destroy(&nf_nat_bysource_table);
+ synchronize_net();
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
}
MODULE_LICENSE("GPL");
Patches currently in stable-queue which might be from fw(a)strlen.de are
queue-4.9/netfilter-nat-revert-netfilter-nat-convert-nat-bysrc-hash-to-rhashtable.patch
queue-4.9/netfilter-nat-avoid-use-of-nf_conn_nat-extension.patch
This is a note to let you know that I've just added the patch titled
netfilter: nat: avoid use of nf_conn_nat extension
to the 4.9-stable tree which can be found at:
http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=sum…
The filename of the patch is:
netfilter-nat-avoid-use-of-nf_conn_nat-extension.patch
and it can be found in the queue-4.9 subdirectory.
If you, or anyone else, feels it should not be added to the stable tree,
please let <stable(a)vger.kernel.org> know about it.
>From 6e699867f84c0f358fed233fe6162173aca28e04 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw(a)strlen.de>
Date: Tue, 28 Mar 2017 10:31:03 +0200
Subject: netfilter: nat: avoid use of nf_conn_nat extension
From: Florian Westphal <fw(a)strlen.de>
commit 6e699867f84c0f358fed233fe6162173aca28e04 upstream.
successful insert into the bysource hash sets IPS_SRC_NAT_DONE status bit
so we can check that instead of presence of nat extension which requires
extra deref.
Signed-off-by: Florian Westphal <fw(a)strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/netfilter/nf_conntrack_core.c | 2 +-
net/netfilter/nf_nat_core.c | 18 ++++--------------
2 files changed, 5 insertions(+), 15 deletions(-)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -689,7 +689,7 @@ static int nf_ct_resolve_clash(struct ne
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->allow_clash &&
- !nfct_nat(ct) &&
+ ((ct->status & IPS_NAT_DONE_MASK) == 0) &&
!nf_ct_is_dying(ct) &&
atomic_inc_not_zero(&ct->ct_general.use)) {
nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct);
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -550,10 +550,6 @@ struct nf_nat_proto_clean {
static int nf_nat_proto_remove(struct nf_conn *i, void *data)
{
const struct nf_nat_proto_clean *clean = data;
- struct nf_conn_nat *nat = nfct_nat(i);
-
- if (!nat)
- return 0;
if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) ||
(clean->l4proto && nf_ct_protonum(i) != clean->l4proto))
@@ -564,12 +560,10 @@ static int nf_nat_proto_remove(struct nf
static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
{
- struct nf_conn_nat *nat = nfct_nat(ct);
-
if (nf_nat_proto_remove(ct, data))
return 1;
- if (!nat)
+ if ((ct->status & IPS_SRC_NAT_DONE) == 0)
return 0;
/* This netns is being destroyed, and conntrack has nat null binding.
@@ -705,13 +699,9 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregis
/* No one using conntrack by the time this called. */
static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
- struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
-
- if (!nat)
- return;
-
- rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
- nf_nat_bysource_params);
+ if (ct->status & IPS_SRC_NAT_DONE)
+ rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
+ nf_nat_bysource_params);
}
static struct nf_ct_ext_type nat_extend __read_mostly = {
Patches currently in stable-queue which might be from fw(a)strlen.de are
queue-4.9/netfilter-nat-revert-netfilter-nat-convert-nat-bysrc-hash-to-rhashtable.patch
queue-4.9/netfilter-nat-avoid-use-of-nf_conn_nat-extension.patch