The current panthor_device_mmap_io() implementation has two issues:
1. For mapping DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET,
panthor_device_mmap_io() bails if VM_WRITE is set, but does not clear
VM_MAYWRITE. That means userspace can use mprotect() to make the mapping
writable later on. This is a classic Linux driver gotcha.
I don't think this actually has any impact in practice:
When the GPU is powered, writes to the FLUSH_ID seem to be ignored; and
when the GPU is not powered, the dummy_latest_flush page provided by the
driver is deliberately designed to not do any flushes, so the only thing
writing to the dummy_latest_flush could achieve would be to make *more*
flushes happen.
2. panthor_device_mmap_io() does not block MAP_PRIVATE mappings (which are
mappings without the VM_SHARED flag).
MAP_PRIVATE in combination with VM_MAYWRITE indicates that the VMA has
copy-on-write semantics, which for VM_PFNMAP are semi-supported but
fairly cursed.
In particular, in such a mapping, the driver can only install PTEs
during mmap() by calling remap_pfn_range() (because remap_pfn_range()
wants to **store the physical address of the mapped physical memory into
the vm_pgoff of the VMA**); installing PTEs later on with a fault
handler (as panthor does) is not supported in private mappings, and so
if you try to fault in such a mapping, vmf_insert_pfn_prot() splats when
it hits a BUG() check.
Fix it by clearing the VM_MAYWRITE flag (userspace writing to the FLUSH_ID
doesn't make sense) and requiring VM_SHARED (copy-on-write semantics for
the FLUSH_ID don't make sense).
Reproducers for both scenarios are in the notes of my patch on the mailing
list; I tested that these bugs exist on a Rock 5B machine.
Note that I only compile-tested the patch, I haven't tested it; I don't
have a working kernel build setup for the test machine yet. Please test it
before applying it.
Cc: stable(a)vger.kernel.org
Fixes: 5fe909cae118 ("drm/panthor: Add the device logical block")
Signed-off-by: Jann Horn <jannh(a)google.com>
---
First testcase (can write to the FLUSH_ID):
```
typeof(x) __res = (x); \
if (__res == (typeof(x))-1) \
err(1, "SYSCHK(" #x ")"); \
__res; \
})
int main(void) {
int fd = SYSCHK(open(GPU_PATH, O_RDWR));
// sanity-check that PROT_WRITE+MAP_SHARED fails
void *mmap_write_res = mmap(NULL, 0x1000, PROT_READ|PROT_WRITE,
MAP_SHARED, fd, DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET);
if (mmap_write_res == MAP_FAILED) {
perror("mmap() with PROT_WRITE+MAP_SHARED failed as expected");
} else {
errx(1, "mmap() with PROT_WRITE+MAP_SHARED worked???");
}
// make a PROT_READ+MAP_SHARED mapping, and upgrade it to writable
void *mmio_page = SYSCHK(mmap(NULL, 0x1000, PROT_READ, MAP_SHARED,
fd, DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET));
SYSCHK(mprotect(mmio_page, 0x1000, PROT_READ|PROT_WRITE));
volatile uint32_t *flush_counter = (volatile uint32_t*)mmio_page;
uint32_t last_old = -1;
while (1) {
uint32_t old_val = *flush_counter;
*flush_counter = 1111;
uint32_t new_val = *flush_counter;
if (old_val != last_old)
printf("flush counter: old=%u, new=%u\n", old_val, new_val);
last_old = old_val;
}
}
```
Second testcase (triggers BUG() splat):
```
typeof(x) __res = (x); \
if (__res == (typeof(x))-1) \
err(1, "SYSCHK(" #x ")"); \
__res; \
})
int main(void) {
int fd = SYSCHK(open(GPU_PATH, O_RDWR));
// make a PROT_READ+**MAP_PRIVATE** mapping
void *ptr = SYSCHK(mmap(NULL, 0x1000, PROT_READ, MAP_PRIVATE,
fd, DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET));
// trigger a read fault
*(volatile char *)ptr;
}
```
The second testcase splats like this:
```
[ 2918.411814] ------------[ cut here ]------------
[ 2918.411857] kernel BUG at mm/memory.c:2220!
[ 2918.411955] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP
[...]
[ 2918.416147] CPU: 3 PID: 2934 Comm: private_user_fl Tainted: G O 6.1.43-19-rk2312 #428a0a5e6
[ 2918.417043] Hardware name: Radxa ROCK 5B (DT)
[ 2918.417464] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 2918.418119] pc : vmf_insert_pfn_prot+0x40/0xe4
[ 2918.418567] lr : panthor_mmio_vm_fault+0xb0/0x12c [panthor]
[...]
[ 2918.425746] Call trace:
[ 2918.425972] vmf_insert_pfn_prot+0x40/0xe4
[ 2918.426342] __do_fault+0x38/0x7c
[ 2918.426648] __handle_mm_fault+0x404/0x6dc
[ 2918.427018] handle_mm_fault+0x13c/0x18c
[ 2918.427374] do_page_fault+0x194/0x33c
[ 2918.427716] do_translation_fault+0x60/0x7c
[ 2918.428095] do_mem_abort+0x44/0x90
[ 2918.428410] el0_da+0x40/0x68
[ 2918.428685] el0t_64_sync_handler+0x9c/0xf8
[ 2918.429067] el0t_64_sync+0x174/0x178
```
---
drivers/gpu/drm/panthor/panthor_device.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
index 4082c8f2951dfdace7f73a24d6fe34e9e7f920eb..6fbff516c1c1f047fcb4dee17b87d8263616dc0c 100644
--- a/drivers/gpu/drm/panthor/panthor_device.c
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -390,11 +390,15 @@ int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *
{
u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT;
+ if ((vma->vm_flags & VM_SHARED) == 0)
+ return -EINVAL;
+
switch (offset) {
case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
if (vma->vm_end - vma->vm_start != PAGE_SIZE ||
(vma->vm_flags & (VM_WRITE | VM_EXEC)))
return -EINVAL;
+ vm_flags_clear(vma, VM_MAYWRITE);
break;
---
base-commit: d78f0ee0406803cda8801fd5201746ccf89e5e4a
change-id: 20241104-panthor-flush-page-fixes-fe4202bb18c0
--
Jann Horn <jannh(a)google.com>
From: Xu Yang <xu.yang_2(a)nxp.com>
When do perf stat on sys metric, perf tool output nothing now:
$ perf stat -a -M imx95_ddr_read.all -I 1000
$
This command runs on an arm64 machine and the Soc has one DDR hw pmu
except one armv8_cortex_a55 pmu. Their maps show as follows:
const struct pmu_events_map pmu_events_map[] = {
{
.arch = "arm64",
.cpuid = "0x00000000410fd050",
.event_table = {
.pmus = pmu_events__arm_cortex_a55,
.num_pmus = ARRAY_SIZE(pmu_events__arm_cortex_a55)
},
.metric_table = {
.pmus = NULL,
.num_pmus = 0
}
},
static const struct pmu_sys_events pmu_sys_event_tables[] = {
{
.event_table = {
.pmus = pmu_events__freescale_imx95_sys,
.num_pmus = ARRAY_SIZE(pmu_events__freescale_imx95_sys)
},
.metric_table = {
.pmus = pmu_metrics__freescale_imx95_sys,
.num_pmus = ARRAY_SIZE(pmu_metrics__freescale_imx95_sys)
},
.name = "pmu_events__freescale_imx95_sys",
},
Currently, pmu_metrics_table__find() will return NULL when only do perf
stat on sys metric. Then parse_groups() will never be called to parse
sys metric_name, finally perf tool will exit directly. This should be a
common problem. To fix the issue, this will keep the logic before commit
f20c15d13f01 ("perf pmu-events: Remember the perf_events_map for a PMU")
to return a empty metric table rather than a NULL pointer. This should
be fine since the removed part just check if the table match provided
metric_name. Without these code, the code in parse_groups() will also
check the validity of metrci_name too.
Fixes: f20c15d13f01 ("perf pmu-events: Remember the perf_events_map for a PMU")
Cc: stable(a)vger.kernel.org
Signed-off-by: Xu Yang <xu.yang_2(a)nxp.com>
Acked-by: Ian Rogers <irogers(a)google.com>
Signed-off-by: Ian Rogers <irogers(a)google.com>
---
tools/perf/pmu-events/empty-pmu-events.c | 12 +-----------
tools/perf/pmu-events/jevents.py | 12 +-----------
2 files changed, 2 insertions(+), 22 deletions(-)
diff --git a/tools/perf/pmu-events/empty-pmu-events.c b/tools/perf/pmu-events/empty-pmu-events.c
index 2b7516946ded..b8719dab264d 100644
--- a/tools/perf/pmu-events/empty-pmu-events.c
+++ b/tools/perf/pmu-events/empty-pmu-events.c
@@ -585,17 +585,7 @@ const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pm
if (!map)
return NULL;
- if (!pmu)
- return &map->metric_table;
-
- for (size_t i = 0; i < map->metric_table.num_pmus; i++) {
- const struct pmu_table_entry *table_pmu = &map->metric_table.pmus[i];
- const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
-
- if (pmu__name_match(pmu, pmu_name))
- return &map->metric_table;
- }
- return NULL;
+ return &map->metric_table;
}
const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid)
diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index 6e71b09dbc2a..70f4fd5395fb 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -1101,17 +1101,7 @@ const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pm
if (!map)
return NULL;
- if (!pmu)
- return &map->metric_table;
-
- for (size_t i = 0; i < map->metric_table.num_pmus; i++) {
- const struct pmu_table_entry *table_pmu = &map->metric_table.pmus[i];
- const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset];
-
- if (pmu__name_match(pmu, pmu_name))
- return &map->metric_table;
- }
- return NULL;
+ return &map->metric_table;
}
const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid)
--
2.47.0.199.ga7371fff76-goog
In line 1854 of the file esas2r_ioctl.c, the function
esas2r_process_vda_ioctl() is called with the parameter vi being assigned
the value of a->vda_buffer. On line 1892, a->vda_buffer is stored in DMA
memory with the statement
a->vda_buffer = dma_alloc_coherent(&a->pcid->dev, ..., indicating that the
parameter vi passed to the function is also stored in DMA memory. This
suggests that the parameter vi could be altered at any time by malicious
hardware. If vi’s value is changed after the first conditional check
if (vi->function >= vercnt), it is likely that an array out-of-bounds
access could occur in the subsequent check
if (vi->version > esas2r_vdaioctl_versions[vi_function]), leading to
serious issues.
To fix this issue, we will store the value of vi->function in a local
variable to ensure that the subsequent checks remain valid.
Signed-off-by: Qiu-ji Chen <chenqiuji666(a)gmail.com>
Cc: stable(a)vger.kernel.org
Fixes: 26780d9e12ed ("[SCSI] esas2r: ATTO Technology ExpressSAS 6G SAS/SATA RAID Adapter Driver")
---
drivers/scsi/esas2r/esas2r_vda.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/drivers/scsi/esas2r/esas2r_vda.c b/drivers/scsi/esas2r/esas2r_vda.c
index 30028e56df63..48af8c05b01d 100644
--- a/drivers/scsi/esas2r/esas2r_vda.c
+++ b/drivers/scsi/esas2r/esas2r_vda.c
@@ -70,16 +70,17 @@ bool esas2r_process_vda_ioctl(struct esas2r_adapter *a,
u32 datalen = 0;
struct atto_vda_sge *firstsg = NULL;
u8 vercnt = (u8)ARRAY_SIZE(esas2r_vdaioctl_versions);
+ u8 vi_function = vi->function;
vi->status = ATTO_STS_SUCCESS;
vi->vda_status = RS_PENDING;
- if (vi->function >= vercnt) {
+ if (vi_function >= vercnt) {
vi->status = ATTO_STS_INV_FUNC;
return false;
}
- if (vi->version > esas2r_vdaioctl_versions[vi->function]) {
+ if (vi->version > esas2r_vdaioctl_versions[vi_function]) {
vi->status = ATTO_STS_INV_VERSION;
return false;
}
@@ -89,14 +90,14 @@ bool esas2r_process_vda_ioctl(struct esas2r_adapter *a,
return false;
}
- if (vi->function != VDA_FUNC_SCSI)
+ if (vi_function != VDA_FUNC_SCSI)
clear_vda_request(rq);
- rq->vrq->scsi.function = vi->function;
+ rq->vrq->scsi.function = vi_function;
rq->interrupt_cb = esas2r_complete_vda_ioctl;
rq->interrupt_cx = vi;
- switch (vi->function) {
+ switch (vi_function) {
case VDA_FUNC_FLASH:
if (vi->cmd.flash.sub_func != VDA_FLASH_FREAD
--
2.34.1
Upgrading to kernel version 6.11 breaks the ability to switch between power modes on KDE Plasma 6.2.2 and NixOS 24.11 (unstable). I am using a ROG Zephyrus 14 GA401 with a 4060.
I thought it may be Nvidia acting up so I've tried:
Re-enabling power-profiles-daemon in my NixOS configuration (did not fix the issues)
Disabling supergfxctl on NixOS configuration (did not fix the issue)
Switching between nvidia latest and beta (did not fix the issue)
Switching back to kernel version 6.10 fixes the issue. Since 6.10 is considered EOL upstream, I had to manually switch back to a specific commit hash to get 6.10 to work.
Hoping this gets fixed on 6.11.
Thank you,
soda
Sent with Proton Mail secure email.
From: Uladzislau Rezki <urezki(a)gmail.com>
commit 3c5d61ae919cc377c71118ccc76fa6e8518023f8 upstream.
Add a kvfree_rcu_barrier() function. It waits until all
in-flight pointers are freed over RCU machinery. It does
not wait any GP completion and it is within its right to
return immediately if there are no outstanding pointers.
This function is useful when there is a need to guarantee
that a memory is fully freed before destroying memory caches.
For example, during unloading a kernel module.
Signed-off-by: Uladzislau Rezki (Sony) <urezki(a)gmail.com>
Signed-off-by: Vlastimil Babka <vbabka(a)suse.cz>
Signed-off-by: Suren Baghdasaryan <surenb(a)google.com>
---
Changes since v1 [1]:
- Added SOB, per Greg KH
[1] https://lore.kernel.org/all/20241021171003.2907935-1-surenb@google.com/
include/linux/rcutiny.h | 5 ++
include/linux/rcutree.h | 1 +
kernel/rcu/tree.c | 109 +++++++++++++++++++++++++++++++++++++---
3 files changed, 107 insertions(+), 8 deletions(-)
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index d9ac7b136aea..522123050ff8 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -111,6 +111,11 @@ static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr)
kvfree(ptr);
}
+static inline void kvfree_rcu_barrier(void)
+{
+ rcu_barrier();
+}
+
#ifdef CONFIG_KASAN_GENERIC
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
#else
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 254244202ea9..58e7db80f3a8 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -35,6 +35,7 @@ static inline void rcu_virt_note_context_switch(void)
void synchronize_rcu_expedited(void);
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
+void kvfree_rcu_barrier(void);
void rcu_barrier(void);
void rcu_momentary_dyntick_idle(void);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e641cc681901..be00aac5f4e7 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3584,18 +3584,15 @@ kvfree_rcu_drain_ready(struct kfree_rcu_cpu *krcp)
}
/*
- * This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
+ * Return: %true if a work is queued, %false otherwise.
*/
-static void kfree_rcu_monitor(struct work_struct *work)
+static bool
+kvfree_rcu_queue_batch(struct kfree_rcu_cpu *krcp)
{
- struct kfree_rcu_cpu *krcp = container_of(work,
- struct kfree_rcu_cpu, monitor_work.work);
unsigned long flags;
+ bool queued = false;
int i, j;
- // Drain ready for reclaim.
- kvfree_rcu_drain_ready(krcp);
-
raw_spin_lock_irqsave(&krcp->lock, flags);
// Attempt to start a new batch.
@@ -3634,11 +3631,27 @@ static void kfree_rcu_monitor(struct work_struct *work)
// be that the work is in the pending state when
// channels have been detached following by each
// other.
- queue_rcu_work(system_wq, &krwp->rcu_work);
+ queued = queue_rcu_work(system_wq, &krwp->rcu_work);
}
}
raw_spin_unlock_irqrestore(&krcp->lock, flags);
+ return queued;
+}
+
+/*
+ * This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
+ */
+static void kfree_rcu_monitor(struct work_struct *work)
+{
+ struct kfree_rcu_cpu *krcp = container_of(work,
+ struct kfree_rcu_cpu, monitor_work.work);
+
+ // Drain ready for reclaim.
+ kvfree_rcu_drain_ready(krcp);
+
+ // Queue a batch for a rest.
+ kvfree_rcu_queue_batch(krcp);
// If there is nothing to detach, it means that our job is
// successfully done here. In case of having at least one
@@ -3859,6 +3872,86 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr)
}
EXPORT_SYMBOL_GPL(kvfree_call_rcu);
+/**
+ * kvfree_rcu_barrier - Wait until all in-flight kvfree_rcu() complete.
+ *
+ * Note that a single argument of kvfree_rcu() call has a slow path that
+ * triggers synchronize_rcu() following by freeing a pointer. It is done
+ * before the return from the function. Therefore for any single-argument
+ * call that will result in a kfree() to a cache that is to be destroyed
+ * during module exit, it is developer's responsibility to ensure that all
+ * such calls have returned before the call to kmem_cache_destroy().
+ */
+void kvfree_rcu_barrier(void)
+{
+ struct kfree_rcu_cpu_work *krwp;
+ struct kfree_rcu_cpu *krcp;
+ bool queued;
+ int i, cpu;
+
+ /*
+ * Firstly we detach objects and queue them over an RCU-batch
+ * for all CPUs. Finally queued works are flushed for each CPU.
+ *
+ * Please note. If there are outstanding batches for a particular
+ * CPU, those have to be finished first following by queuing a new.
+ */
+ for_each_possible_cpu(cpu) {
+ krcp = per_cpu_ptr(&krc, cpu);
+
+ /*
+ * Check if this CPU has any objects which have been queued for a
+ * new GP completion. If not(means nothing to detach), we are done
+ * with it. If any batch is pending/running for this "krcp", below
+ * per-cpu flush_rcu_work() waits its completion(see last step).
+ */
+ if (!need_offload_krc(krcp))
+ continue;
+
+ while (1) {
+ /*
+ * If we are not able to queue a new RCU work it means:
+ * - batches for this CPU are still in flight which should
+ * be flushed first and then repeat;
+ * - no objects to detach, because of concurrency.
+ */
+ queued = kvfree_rcu_queue_batch(krcp);
+
+ /*
+ * Bail out, if there is no need to offload this "krcp"
+ * anymore. As noted earlier it can run concurrently.
+ */
+ if (queued || !need_offload_krc(krcp))
+ break;
+
+ /* There are ongoing batches. */
+ for (i = 0; i < KFREE_N_BATCHES; i++) {
+ krwp = &(krcp->krw_arr[i]);
+ flush_rcu_work(&krwp->rcu_work);
+ }
+ }
+ }
+
+ /*
+ * Now we guarantee that all objects are flushed.
+ */
+ for_each_possible_cpu(cpu) {
+ krcp = per_cpu_ptr(&krc, cpu);
+
+ /*
+ * A monitor work can drain ready to reclaim objects
+ * directly. Wait its completion if running or pending.
+ */
+ cancel_delayed_work_sync(&krcp->monitor_work);
+
+ for (i = 0; i < KFREE_N_BATCHES; i++) {
+ krwp = &(krcp->krw_arr[i]);
+ flush_rcu_work(&krwp->rcu_work);
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(kvfree_rcu_barrier);
+
static unsigned long
kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
base-commit: 163b38476c50d64b89c1dfdf4fd57a368b6ebbec
--
2.47.0.199.ga7371fff76-goog
The link speed is downgraded to 2.5 GT/s when a Samsung NVMe device
is hotplugged into a Intel PCIe root port [8086:0db0].
```
+-[0000:3c]-+-00.0 Intel Corporation Ice Lake Memory Map/VT-d
| ...
| +02.0-[3d]----00.0 Samsung Electronics Co Ltd Device a80e
```
Some printing information can be obtained when the issue emerges.
"Card present" is reported twice via external interrupts due to
a slight tremor when the Samsung NVMe device is plugged in.
The failure of the link activation for the first time leads to
the link speed of the root port being mistakenly downgraded to 2.5G/s.
```
[ 8223.419682] pcieport 0000:3d:02.0: pciehp: Slot(1): Card present
[ 8224.449714] pcieport 0000:3d:02.0: broken device, retraining non-functional downstream link at 2.5GT/s
[ 8225.518723] pcieport 0000:3d:02.0: pciehp: Slot(1): Card present
[ 8225.518726] pcieport 0000:3d:02.0: pciehp: Slot(1): Link up
```
To avoid wrongly setting the link speed to 2.5GT/s, only allow
specific pcie devices to perform link retrain.
Fixes: a89c82249c37 ("PCI: Work around PCIe link training failures")
Signed-off-by: Jinhui Guo <guojinhui.liam(a)bytedance.com>
---
drivers/pci/quirks.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index dccb60c1d9cc..59858156003b 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -91,7 +91,8 @@ int pcie_failed_link_retrain(struct pci_dev *dev)
int ret = -ENOTTY;
if (!pci_is_pcie(dev) || !pcie_downstream_port(dev) ||
- !pcie_cap_has_lnkctl2(dev) || !dev->link_active_reporting)
+ !pcie_cap_has_lnkctl2(dev) || !dev->link_active_reporting ||
+ !pci_match_id(ids, dev))
return ret;
pcie_capability_read_word(dev, PCI_EXP_LNKCTL2, &lnkctl2);
@@ -119,8 +120,7 @@ int pcie_failed_link_retrain(struct pci_dev *dev)
}
if ((lnksta & PCI_EXP_LNKSTA_DLLLA) &&
- (lnkctl2 & PCI_EXP_LNKCTL2_TLS) == PCI_EXP_LNKCTL2_TLS_2_5GT &&
- pci_match_id(ids, dev)) {
+ (lnkctl2 & PCI_EXP_LNKCTL2_TLS) == PCI_EXP_LNKCTL2_TLS_2_5GT) {
u32 lnkcap;
pci_info(dev, "removing 2.5GT/s downstream link speed restriction\n");
--
2.20.1
In line 1854 of the file esas2r_ioctl.c, the function
esas2r_process_vda_ioctl() is called with the parameter vi being assigned
the value of a->vda_buffer. On line 1892, a->vda_buffer is stored in DMA
memory with the statement
a->vda_buffer = dma_alloc_coherent(&a->pcid->dev, ..., indicating that the
parameter vi passed to the function is also stored in DMA memory. This
suggests that the parameter vi could be altered at any time by malicious
hardware. If vi’s value is changed after the first conditional check
if (vi->function >= vercnt), it is likely that an array out-of-bounds
access could occur in the subsequent check
if (vi->version > esas2r_vdaioctl_versions[vi_function]), leading to
serious issues.
To fix this issue, we will store the value of vi->function in a local
variable to ensure that the subsequent checks remain valid.
Signed-off-by: Qiu-ji Chen <chenqiuji666(a)gmail.com>
Cc: stable(a)vger.kernel.org
Fixes: 26780d9e12ed ("[SCSI] esas2r: ATTO Technology ExpressSAS 6G SAS/SATA RAID Adapter Driver")
---
V2:
Changed the incorrect patch title
---
drivers/scsi/esas2r/esas2r_vda.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/drivers/scsi/esas2r/esas2r_vda.c b/drivers/scsi/esas2r/esas2r_vda.c
index 30028e56df63..48af8c05b01d 100644
--- a/drivers/scsi/esas2r/esas2r_vda.c
+++ b/drivers/scsi/esas2r/esas2r_vda.c
@@ -70,16 +70,17 @@ bool esas2r_process_vda_ioctl(struct esas2r_adapter *a,
u32 datalen = 0;
struct atto_vda_sge *firstsg = NULL;
u8 vercnt = (u8)ARRAY_SIZE(esas2r_vdaioctl_versions);
+ u8 vi_function = vi->function;
vi->status = ATTO_STS_SUCCESS;
vi->vda_status = RS_PENDING;
- if (vi->function >= vercnt) {
+ if (vi_function >= vercnt) {
vi->status = ATTO_STS_INV_FUNC;
return false;
}
- if (vi->version > esas2r_vdaioctl_versions[vi->function]) {
+ if (vi->version > esas2r_vdaioctl_versions[vi_function]) {
vi->status = ATTO_STS_INV_VERSION;
return false;
}
@@ -89,14 +90,14 @@ bool esas2r_process_vda_ioctl(struct esas2r_adapter *a,
return false;
}
- if (vi->function != VDA_FUNC_SCSI)
+ if (vi_function != VDA_FUNC_SCSI)
clear_vda_request(rq);
- rq->vrq->scsi.function = vi->function;
+ rq->vrq->scsi.function = vi_function;
rq->interrupt_cb = esas2r_complete_vda_ioctl;
rq->interrupt_cx = vi;
- switch (vi->function) {
+ switch (vi_function) {
case VDA_FUNC_FLASH:
if (vi->cmd.flash.sub_func != VDA_FLASH_FREAD
--
2.34.1
The function call alloc_percpu() returns a pointer to the memory address,
but it hasn't been checked. Our static analysis tool indicates that null
pointer dereference may exist in pointer zone->per_cpu_pageset. It is
always safe to judge the null pointer before use.
Signed-off-by: Qiu-ji Chen <chenqiuji666(a)gmail.com>
Cc: stable(a)vger.kernel.org
Fixes: 9420f89db2dd ("mm: move most of core MM initialization to mm/mm_init.c")
---
V2:
Fixed the incorrect code logic.
Thanks David Hildenbrand for helpful suggestion.
---
mm/page_alloc.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8afab64814dc..7c8a74fd02d6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5703,8 +5703,14 @@ void __meminit setup_zone_pageset(struct zone *zone)
/* Size may be 0 on !SMP && !NUMA */
if (sizeof(struct per_cpu_zonestat) > 0)
zone->per_cpu_zonestats = alloc_percpu(struct per_cpu_zonestat);
+ if (!zone->per_cpu_zonestats)
+ return;
zone->per_cpu_pageset = alloc_percpu(struct per_cpu_pages);
+ if (!zone->per_cpu_pageset) {
+ free_percpu(zone->per_cpu_zonestats);
+ return;
+ }
for_each_possible_cpu(cpu) {
struct per_cpu_pages *pcp;
struct per_cpu_zonestat *pzstats;
--
2.34.1