The Realtek PC Beep Hidden Register[1] is currently set by
patch_realtek.c in two different places:
In alc_fill_eapd_coef(), it's set to the value 0x5757, corresponding to
non-beep input on 1Ah and no 1Ah loopback to either headphones or
speakers. (Although, curiously, the loopback amp is still enabled.) This
write was added fairly recently by commit e3743f431143 ("ALSA:
hda/realtek - Dell headphone has noise on unmute for ALC236") and is a
safe default. However, it happens in the wrong place:
alc_fill_eapd_coef() runs on module load and cold boot but not on S3
resume, meaning the register loses its value after suspend.
Conversely, in alc256_init(), the register is updated to unset bit 13
(disable speaker loopback) and set bit 5 (set non-beep input on 1Ah).
Although this write does run on S3 resume, it's not quite enough to fix
up the register's default value of 0x3717. What's missing is a set of
bit 14 to disable headphone loopback. Without that, we end up with a
feedback loop where the headphone jack is being driven by amplified
samples of itself[2].
This change eliminates the write in alc_fill_eapd_coef() and replaces
the update in alc256_init() with a write of the fixed value 0x4727. This
value ought to have the same behavior as 0x5757--disabling all PC Beep
routing that isn't part of the HDA node graph--but it has two
differences:
1. To enable non-beep input on 1Ah, the `b` field is set to 1, like the
previous code in alc256_init() used, instead of 2, like the value
written by alc_fill_eapd_coef() used. My testing shows these two
values to behave identically, but, in case there is a difference,
a bitwise update of the register seems like a more reliable source
of truth than a magic number written without explanation.
2. Loopback amplification is disabled (unset L and R bits) because no
loopback path is in use.
Affects the ALC255, ALC256, ALC257, ALC235, and ALC236 codecs.
[1] Newly documented in Documentation/sound/hd-audio/realtek-pc-beep.rst
[2] Setting the "Headphone Mic Boost" control from userspace changes
this feedback loop and has been a widely-shared workaround for headphone
noise on laptops like the Dell XPS 13 9350. This commit eliminates the
feedback loop and makes the workaround unnecessary.
Fixes: e3743f431143 ("ALSA: hda/realtek - Dell headphone has noise on unmute for ALC236")
Cc: stable(a)vger.kernel.org
Signed-off-by: Thomas Hebb <tommyhebb(a)gmail.com>
---
sound/pci/hda/patch_realtek.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 63e1a56f705b..024dd61a788b 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -367,7 +367,9 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
case 0x10ec0215:
case 0x10ec0233:
case 0x10ec0235:
+ case 0x10ec0236:
case 0x10ec0255:
+ case 0x10ec0256:
case 0x10ec0257:
case 0x10ec0282:
case 0x10ec0283:
@@ -379,11 +381,6 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
case 0x10ec0300:
alc_update_coef_idx(codec, 0x10, 1<<9, 0);
break;
- case 0x10ec0236:
- case 0x10ec0256:
- alc_write_coef_idx(codec, 0x36, 0x5757);
- alc_update_coef_idx(codec, 0x10, 1<<9, 0);
- break;
case 0x10ec0275:
alc_update_coef_idx(codec, 0xe, 0, 1<<0);
break;
@@ -3269,7 +3266,13 @@ static void alc256_init(struct hda_codec *codec)
alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x4); /* Hight power */
alc_update_coefex_idx(codec, 0x53, 0x02, 0x8000, 1 << 15); /* Clear bit */
alc_update_coefex_idx(codec, 0x53, 0x02, 0x8000, 0 << 15);
- alc_update_coef_idx(codec, 0x36, 1 << 13, 1 << 5); /* Switch pcbeep path to Line in path*/
+ /*
+ * Expose headphone mic (or possibly Line In on some machines) instead
+ * of PC Beep on 1Ah, and disable 1Ah loopback for all outputs. See
+ * Documentation/sound/hd-audio/realtek-pc-beep.rst for details of
+ * this register.
+ */
+ alc_write_coef_idx(codec, 0x36, 0x4727);
}
static void alc256_shutup(struct hda_codec *codec)
--
2.25.2
From: Johannes Berg <johannes.berg(a)intel.com>
The original patch didn't copy the ieee80211_is_data() condition
because on most drivers the management frames don't go through
this path. However, they do on iwlwifi/mvm, so we do need to keep
the condition here.
Cc: stable(a)vger.kernel.org
Fixes: ce2e1ca70307 ("mac80211: Check port authorization in the ieee80211_tx_dequeue() case")
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
---
Dave, can you please apply this directly?
(sorry, I shall remember to use git commit --amend properly)
---
net/mac80211/tx.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d9cca6dbd870..efe4c1fc68e5 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3610,7 +3610,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
* Drop unicast frames to unauthorised stations unless they are
* EAPOL frames from the local station.
*/
- if (unlikely(!ieee80211_vif_is_mesh(&tx.sdata->vif) &&
+ if (unlikely(ieee80211_is_data(hdr->frame_control) &&
+ !ieee80211_vif_is_mesh(&tx.sdata->vif) &&
tx.sdata->vif.type != NL80211_IFTYPE_OCB &&
!is_multicast_ether_addr(hdr->addr1) &&
!test_sta_flag(tx.sta, WLAN_STA_AUTHORIZED) &&
--
2.25.1
Hello friend,
This is Julian Smith and i am purchasing manager from Sinara Group Co.,LTD in Russia.
We are glad to know about your company from the web and we are interested in your products.
Could you kindly send us your Latest catalog and price list for our trial order.
Thanks and Best Regards,
Ms. Julian Smith
Purchasing Manager
Sinara Group Co.,LTD
The command ring and cursor ring use different notify port addresses
definition: QXL_IO_NOTIFY_CMD and QXL_IO_NOTIFY_CURSOR. However, in
qxl_device_init() we use QXL_IO_NOTIFY_CMD to create both command ring
and cursor ring. This doesn't cause any problems now, because QEMU's
behaviors on QXL_IO_NOTIFY_CMD and QXL_IO_NOTIFY_CURSOR are the same.
However, QEMU's behavior may be change in future, so let's fix it.
P.S.: In the X.org QXL driver, the notify port address of cursor ring
is correct.
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Huacai Chen <chenhc(a)lemote.com>
---
drivers/gpu/drm/qxl/qxl_kms.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/qxl/qxl_kms.c b/drivers/gpu/drm/qxl/qxl_kms.c
index bfc1631..9bdbe0d 100644
--- a/drivers/gpu/drm/qxl/qxl_kms.c
+++ b/drivers/gpu/drm/qxl/qxl_kms.c
@@ -218,7 +218,7 @@ int qxl_device_init(struct qxl_device *qdev,
&(qdev->ram_header->cursor_ring_hdr),
sizeof(struct qxl_command),
QXL_CURSOR_RING_SIZE,
- qdev->io_base + QXL_IO_NOTIFY_CMD,
+ qdev->io_base + QXL_IO_NOTIFY_CURSOR,
false,
&qdev->cursor_event);
--
2.7.0
The Power Management Events (PMEs) the INT0002 driver listens for get
signalled by the Power Management Controller (PMC) using the same IRQ
as used for the ACPI SCI.
Since commit fdde0ff8590b ("ACPI: PM: s2idle: Prevent spurious SCIs from
waking up the system") the SCI triggering without there being a wakeup
cause recognized by the ACPI sleep code will no longer wakeup the system.
This breaks PMEs / wakeups signalled to the INT0002 driver, the system
never leaves the s2idle_loop() now.
Use acpi_s2idle_register_wake_callback to register a function which
checks the GPE0a_STS register for a PME and trigger a wakeup when a
PME has been signalled.
With this new mechanism the pm_wakeup_hard_event() call is no longer
necessary, so remove it and also remove the matching device_init_wakeup()
calls.
Fixes: fdde0ff8590b ("ACPI: PM: s2idle: Prevent spurious SCIs from waking up the system")
Cc: 5.4+ <stable(a)vger.kernel.org> # 5.4+
Signed-off-by: Hans de Goede <hdegoede(a)redhat.com>
---
drivers/platform/x86/intel_int0002_vgpio.c | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c
index f14e2c5f9da5..3c70694188fe 100644
--- a/drivers/platform/x86/intel_int0002_vgpio.c
+++ b/drivers/platform/x86/intel_int0002_vgpio.c
@@ -122,11 +122,17 @@ static irqreturn_t int0002_irq(int irq, void *data)
generic_handle_irq(irq_find_mapping(chip->irq.domain,
GPE0A_PME_B0_VIRT_GPIO_PIN));
- pm_wakeup_hard_event(chip->parent);
-
return IRQ_HANDLED;
}
+static bool int0002_check_wake(void *data)
+{
+ u32 gpe_sts_reg;
+
+ gpe_sts_reg = inl(GPE0A_STS_PORT);
+ return (gpe_sts_reg & GPE0A_PME_B0_STS_BIT);
+}
+
static struct irq_chip int0002_byt_irqchip = {
.name = DRV_NAME,
.irq_ack = int0002_irq_ack,
@@ -220,13 +226,13 @@ static int int0002_probe(struct platform_device *pdev)
return ret;
}
- device_init_wakeup(dev, true);
+ acpi_s2idle_register_wake_callback(irq, int0002_check_wake, NULL);
return 0;
}
static int int0002_remove(struct platform_device *pdev)
{
- device_init_wakeup(&pdev->dev, false);
+ acpi_s2idle_unregister_wake_callback(int0002_check_wake, NULL);
return 0;
}
--
2.26.0
The following commit has been merged into the irq/core branch of tip:
Commit-ID: 6a214a28132f19ace3d835a6d8f6422ec80ad200
Gitweb: https://git.kernel.org/tip/6a214a28132f19ace3d835a6d8f6422ec80ad200
Author: Sungbo Eo <mans0n(a)gorani.run>
AuthorDate: Sat, 21 Mar 2020 22:38:42 +09:00
Committer: Marc Zyngier <maz(a)kernel.org>
CommitterDate: Sun, 22 Mar 2020 11:52:16
irqchip/versatile-fpga: Apply clear-mask earlier
Clear its own IRQs before the parent IRQ get enabled, so that the
remaining IRQs do not accidentally interrupt the parent IRQ controller.
This patch also fixes a reboot bug on OX820 SoC, where the remaining
rps-timer IRQ raises a GIC interrupt that is left pending. After that,
the rps-timer IRQ is cleared during driver initialization, and there's
no IRQ left in rps-irq when local_irq_enable() is called, which evokes
an error message "unexpected IRQ trap".
Fixes: bdd272cbb97a ("irqchip: versatile FPGA: support cascaded interrupts from DT")
Signed-off-by: Sungbo Eo <mans0n(a)gorani.run>
Signed-off-by: Marc Zyngier <maz(a)kernel.org>
Reviewed-by: Linus Walleij <linus.walleij(a)linaro.org>
Cc: stable(a)vger.kernel.org
Link: https://lore.kernel.org/r/20200321133842.2408823-1-mans0n@gorani.run
---
drivers/irqchip/irq-versatile-fpga.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c
index 70e2cff..f138673 100644
--- a/drivers/irqchip/irq-versatile-fpga.c
+++ b/drivers/irqchip/irq-versatile-fpga.c
@@ -212,6 +212,9 @@ int __init fpga_irq_of_init(struct device_node *node,
if (of_property_read_u32(node, "valid-mask", &valid_mask))
valid_mask = 0;
+ writel(clear_mask, base + IRQ_ENABLE_CLEAR);
+ writel(clear_mask, base + FIQ_ENABLE_CLEAR);
+
/* Some chips are cascaded from a parent IRQ */
parent_irq = irq_of_parse_and_map(node, 0);
if (!parent_irq) {
@@ -221,9 +224,6 @@ int __init fpga_irq_of_init(struct device_node *node,
fpga_irq_init(base, node->name, 0, parent_irq, valid_mask, node);
- writel(clear_mask, base + IRQ_ENABLE_CLEAR);
- writel(clear_mask, base + FIQ_ENABLE_CLEAR);
-
/*
* On Versatile AB/PB, some secondary interrupts have a direct
* pass-thru to the primary controller for IRQs 20 and 22-31 which need
From: Johannes Berg <johannes.berg(a)intel.com>
The original patch didn't copy the ieee80211_is_data() condition
because on most drivers the management frames don't go through
this path. However, they do on iwlwifi/mvm, so we do need to keep
the condition here.
Cc: stable(a)vger.kernel.org
Fixes: ce2e1ca70307 ("mac80211: Check port authorization in the ieee80211_tx_dequeue() case")
Signed-off-by: Johannes Berg <johannes.berg(a)intel.com>
---
Dave, can you please apply this directly?
---
net/mac80211/tx.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d9cca6dbd870..19a67f639471 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3610,7 +3610,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
* Drop unicast frames to unauthorised stations unless they are
* EAPOL frames from the local station.
*/
- if (unlikely(!ieee80211_vif_is_mesh(&tx.sdata->vif) &&
+ if (unlikely(ieee80211_is_data(hdr->fc) &&
+ !ieee80211_vif_is_mesh(&tx.sdata->vif) &&
tx.sdata->vif.type != NL80211_IFTYPE_OCB &&
!is_multicast_ether_addr(hdr->addr1) &&
!test_sta_flag(tx.sta, WLAN_STA_AUTHORIZED) &&
--
2.25.1
On Sun, Mar 29, 2020 at 11:04 AM David Hildenbrand <david(a)redhat.com> wrote:
>
>
> What I received via the mailing list (e.g., linux-mm(a)kvack.org)
>
> Message-Id: <20200128093542.6908-1-david(a)redhat.com>
> MIME-Version: 1.0
> X-Scanned-By: MIMEDefang 2.79 on 10.5.11.13
> X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
> Sender: owner-linux-mm(a)kvack.org
> Precedence: bulk
> X-Loop: owner-majordomo(a)kvack.org
> List-ID: <linux-mm.kvack.org>
> [...]
> X-Mimecast-Spam-Score: 1
> Content-Type: text/plain; charset=US-ASCII
> Content-Transfer-Encoding: quoted-printable
> X-Scanned-By: MIMEDefang 2.78 on 10.11.54.4
> [...]
>
> And a lot of this MIME crap.
Well, that may still be a perfectly fine email.
Yes, it has the MIME crap, but it also has that
Content-Transfer-Encoding: quoted-printable
which should tell all users how to _handle_ that MIME crap.
It's sad that people in this day and age still don't just handle
Content-Transfer-Encoding: 8bit
and just send it on untouched, but SMTP certainly encourages that bad
behavior of "convert to 7-bit MIME crap", because in theory there
could be SMTP servers out there that can't handle anything 8-bit or
with longer lines.
Those SMTP servers should just be scrapped and people told not to use
them, but sadly that's not the approach email people have taken.
They've taken the approach that old garbage SMTP servers should be
allowed to exist and destroy email for the rest of us.
> I have no idea if such a conversion is expected to be done.
It is (sadly) expected to be done by a lot of mail software.
But the problem is that some part of your email handling code then
doesn't _undo_ the MIME conversion, and leaves the MIME turds alone,
while then that "Content-Transfer-Encoding: quoted-printable" got
lost.
Do you at any point end up using a raw mbox and cut-and-pasting stuff?
Reading email in a broken mail-reader that doesn't undo MIME? Because
that's the usual way that these kinds of turds get copied.. Using raw
emails without honoring or taking that "Content-Transfer-Encoding"
into account.
Linus
From: David Hildenbrand <david(a)redhat.com>
Subject: drivers/base/memory.c: indicate all memory blocks as removable
We see multiple issues with the implementation/interface to compute
whether a memory block can be offlined (exposed via
/sys/devices/system/memory/memoryX/removable) and would like to simplify
it (remove the implementation).
1. It runs basically lockless. While this might be good for performance,
we see possible races with memory offlining that will require at least
some sort of locking to fix.
2. Nowadays, more false positives are possible. No arch-specific checks
are performed that validate if memory offlining will not be denied
right away (and such check will require locking). For example, arm64
won't allow to offline any memory block that was added during boot -
which will imply a very high error rate. Other archs have other
constraints.
3. The interface is inherently racy. E.g., if a memory block is
detected to be removable (and was not a false positive at that time),
there is still no guarantee that offlining will actually succeed. So
any caller already has to deal with false positives.
4. It is unclear which performance benefit this interface actually
provides. The introducing commit 5c755e9fd813 ("memory-hotplug: add
sysfs removable attribute for hotplug memory remove") mentioned
"A user-level agent must be able to identify which sections of
memory are likely to be removable before attempting the
potentially expensive operation."
However, no actual performance comparison was included.
Known users:
- lsmem: Will group memory blocks based on the "removable" property. [1]
- chmem: Indirect user. It has a RANGE mode where one can specify
removable ranges identified via lsmem to be offlined. However, it
also has a "SIZE" mode, which allows a sysadmin to skip the manual
"identify removable blocks" step. [2]
- powerpc-utils: Uses the "removable" attribute to skip some memory
blocks right away when trying to find some to
offline+remove. However, with ballooning enabled, it
already skips this information completely (because it
once resulted in many false negatives). Therefore, the
implementation can deal with false positives properly
already. [3]
According to Nathan Fontenot, DLPAR on powerpc is nowadays no longer
driven from userspace via the drmgr command (powerpc-utils). Nowadays
it's managed in the kernel - including onlining/offlining of memory
blocks - triggered by drmgr writing to /sys/kernel/dlpar. So the
affected legacy userspace handling is only active on old kernels. Only ve=
ry
old versions of drmgr on a new kernel (unlikely) might execute slower -
totally acceptable.
With CONFIG_MEMORY_HOTREMOVE, always indicating "removable" should not
break any user space tool. We implement a very bad heuristic now. Withou=
t
CONFIG_MEMORY_HOTREMOVE we cannot offline anything, so report
"not removable" as before.
Original discussion can be found in [4] ("[PATCH RFC v1] mm:
is_mem_section_removable() overhaul").
Other users of is_mem_section_removable() will be removed next, so that
we can remove is_mem_section_removable() completely.
[1] http://man7.org/linux/man-pages/man1/lsmem.1.html
[2] http://man7.org/linux/man-pages/man8/chmem.8.html
[3] https://github.com/ibm-power-utilities/powerpc-utils
[4] https://lkml.kernel.org/r/20200117105759.27905-1-david@redhat.com
Also, this patch probably fixes a crash reported by Steve.
http://lkml.kernel.org/r/CAPcyv4jpdaNvJ67SkjyUJLBnBnXXQv686BiVW042g03FUmWLX…
Link: http://lkml.kernel.org/r/20200128093542.6908-1-david@redhat.com
Signed-off-by: David Hildenbrand <david(a)redhat.com>
Suggested-by: Michal Hocko <mhocko(a)kernel.org>
Acked-by: Michal Hocko <mhocko(a)suse.com>
Reviewed-by: Nathan Fontenot <ndfont(a)gmail.com>
Reported-by: "Scargall, Steve" <steve.scargall(a)intel.com>
Cc: Dan Williams <dan.j.williams(a)intel.com>
Cc: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael(a)kernel.org>
Cc: Badari Pulavarty <pbadari(a)us.ibm.com>
Cc: Robert Jennings <rcj(a)linux.vnet.ibm.com>
Cc: Heiko Carstens <heiko.carstens(a)de.ibm.com>
Cc: Karel Zak <kzak(a)redhat.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
drivers/base/memory.c | 23 +++--------------------
1 file changed, 3 insertions(+), 20 deletions(-)
--- a/drivers/base/memory.c~drivers-base-memoryc-indicate-all-memory-blocks-as-removable
+++ a/drivers/base/memory.c
@@ -97,30 +97,13 @@ static ssize_t phys_index_show(struct de
}
/*
- * Show whether the memory block is likely to be offlineable (or is already
- * offline). Once offline, the memory block could be removed. The return
- * value does, however, not indicate that there is a way to remove the
- * memory block.
+ * Legacy interface that we cannot remove. Always indicate "removable"
+ * with CONFIG_MEMORY_HOTREMOVE - bad heuristic.
*/
static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- struct memory_block *mem = to_memory_block(dev);
- unsigned long pfn;
- int ret = 1, i;
-
- if (mem->state != MEM_ONLINE)
- goto out;
-
- for (i = 0; i < sections_per_block; i++) {
- if (!present_section_nr(mem->start_section_nr + i))
- continue;
- pfn = section_nr_to_pfn(mem->start_section_nr + i);
- ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
- }
-
-out:
- return sprintf(buf, "%d\n", ret);
+ return sprintf(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE));
}
/*
_
From: Roman Gushchin <guro(a)fb.com>
Subject: mm: fork: fix kernel_stack memcg stats for various stack implementations
Depending on CONFIG_VMAP_STACK and the THREAD_SIZE / PAGE_SIZE ratio the
space for task stacks can be allocated using __vmalloc_node_range(),
alloc_pages_node() and kmem_cache_alloc_node(). In the first and the
second cases page->mem_cgroup pointer is set, but in the third it's not:
memcg membership of a slab page should be determined using the
memcg_from_slab_page() function, which looks at
page->slab_cache->memcg_params.memcg . In this case, using
mod_memcg_page_state() (as in account_kernel_stack()) is incorrect:
page->mem_cgroup pointer is NULL even for pages charged to a non-root
memory cgroup.
It can lead to kernel_stack per-memcg counters permanently showing 0 on
some architectures (depending on the configuration).
In order to fix it, let's introduce a mod_memcg_obj_state() helper, which
takes a pointer to a kernel object as a first argument, uses
mem_cgroup_from_obj() to get a RCU-protected memcg pointer and calls
mod_memcg_state(). It allows to handle all possible configurations
(CONFIG_VMAP_STACK and various THREAD_SIZE/PAGE_SIZE values) without
spilling any memcg/kmem specifics into fork.c .
Note: This is a special version of the patch created for stable
backports. It contains code from the following two patches:
- mm: memcg/slab: introduce mem_cgroup_from_obj()
- mm: fork: fix kernel_stack memcg stats for various stack implementations
[guro(a)fb.com: introduce mem_cgroup_from_obj()]
Link: http://lkml.kernel.org/r/20200324004221.GA36662@carbon.dhcp.thefacebook.com
Link: http://lkml.kernel.org/r/20200303233550.251375-1-guro@fb.com
Fixes: 4d96ba353075 ("mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages")
Signed-off-by: Roman Gushchin <guro(a)fb.com>
Reviewed-by: Shakeel Butt <shakeelb(a)google.com>
Acked-by: Johannes Weiner <hannes(a)cmpxchg.org>
Cc: Michal Hocko <mhocko(a)kernel.org>
Cc: Bharata B Rao <bharata(a)linux.ibm.com>
Cc: Shakeel Butt <shakeelb(a)google.com>
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org>
---
include/linux/memcontrol.h | 12 +++++++++++
kernel/fork.c | 4 +--
mm/memcontrol.c | 38 +++++++++++++++++++++++++++++++++++
3 files changed, 52 insertions(+), 2 deletions(-)
--- a/include/linux/memcontrol.h~mm-fork-fix-kernel_stack-memcg-stats-for-various-stack-implementations
+++ a/include/linux/memcontrol.h
@@ -695,6 +695,7 @@ static inline unsigned long lruvec_page_
void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
int val);
void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val);
+void mod_memcg_obj_state(void *p, int idx, int val);
static inline void mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
@@ -1123,6 +1124,10 @@ static inline void __mod_lruvec_slab_sta
__mod_node_page_state(page_pgdat(page), idx, val);
}
+static inline void mod_memcg_obj_state(void *p, int idx, int val)
+{
+}
+
static inline
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
@@ -1427,6 +1432,8 @@ static inline int memcg_cache_id(struct
return memcg ? memcg->kmemcg_id : -1;
}
+struct mem_cgroup *mem_cgroup_from_obj(void *p);
+
#else
static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
@@ -1468,6 +1475,11 @@ static inline void memcg_put_cache_ids(v
{
}
+static inline struct mem_cgroup *mem_cgroup_from_obj(void *p)
+{
+ return NULL;
+}
+
#endif /* CONFIG_MEMCG_KMEM */
#endif /* _LINUX_MEMCONTROL_H */
--- a/kernel/fork.c~mm-fork-fix-kernel_stack-memcg-stats-for-various-stack-implementations
+++ a/kernel/fork.c
@@ -397,8 +397,8 @@ static void account_kernel_stack(struct
mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
THREAD_SIZE / 1024 * account);
- mod_memcg_page_state(first_page, MEMCG_KERNEL_STACK_KB,
- account * (THREAD_SIZE / 1024));
+ mod_memcg_obj_state(stack, MEMCG_KERNEL_STACK_KB,
+ account * (THREAD_SIZE / 1024));
}
}
--- a/mm/memcontrol.c~mm-fork-fix-kernel_stack-memcg-stats-for-various-stack-implementations
+++ a/mm/memcontrol.c
@@ -777,6 +777,17 @@ void __mod_lruvec_slab_state(void *p, en
rcu_read_unlock();
}
+void mod_memcg_obj_state(void *p, int idx, int val)
+{
+ struct mem_cgroup *memcg;
+
+ rcu_read_lock();
+ memcg = mem_cgroup_from_obj(p);
+ if (memcg)
+ mod_memcg_state(memcg, idx, val);
+ rcu_read_unlock();
+}
+
/**
* __count_memcg_events - account VM events in a cgroup
* @memcg: the memory cgroup
@@ -2661,6 +2672,33 @@ static void commit_charge(struct page *p
}
#ifdef CONFIG_MEMCG_KMEM
+/*
+ * Returns a pointer to the memory cgroup to which the kernel object is charged.
+ *
+ * The caller must ensure the memcg lifetime, e.g. by taking rcu_read_lock(),
+ * cgroup_mutex, etc.
+ */
+struct mem_cgroup *mem_cgroup_from_obj(void *p)
+{
+ struct page *page;
+
+ if (mem_cgroup_disabled())
+ return NULL;
+
+ page = virt_to_head_page(p);
+
+ /*
+ * Slab pages don't have page->mem_cgroup set because corresponding
+ * kmem caches can be reparented during the lifetime. That's why
+ * memcg_from_slab_page() should be used instead.
+ */
+ if (PageSlab(page))
+ return memcg_from_slab_page(page);
+
+ /* All other pages use page->mem_cgroup */
+ return page->mem_cgroup;
+}
+
static int memcg_alloc_cache_id(void)
{
int id, size;
_