If the full path to be built by ceph_mdsc_build_path() happens to be
longer than PATH_MAX, then this function will enter an endless (retry)
loop, effectively blocking the whole task. Most of the machine
becomes unusable, making this a very simple and effective DoS
vulnerability.
I cannot imagine why this retry was ever implemented, but it seems
rather useless and harmful to me. Let's remove it and fail with
ENAMETOOLONG instead.
Cc: stable(a)vger.kernel.org
Reported-by: Dario Weißer <dario(a)cure53.de>
Signed-off-by: Max Kellermann <max.kellermann(a)ionos.com>
---
fs/ceph/mds_client.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index c4a5fd94bbbb..4f6ac015edcd 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2808,12 +2808,11 @@ char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, struct dentry *dentry,
if (pos < 0) {
/*
- * A rename didn't occur, but somehow we didn't end up where
- * we thought we would. Throw a warning and try again.
+ * The path is longer than PATH_MAX and this function
+ * cannot ever succeed. Creating paths that long is
+ * possible with Ceph, but Linux cannot use them.
*/
- pr_warn_client(cl, "did not end path lookup where expected (pos = %d)\n",
- pos);
- goto retry;
+ return ERR_PTR(-ENAMETOOLONG);
}
*pbase = base;
--
2.45.2
Netpoll will explicitly pass the polling call with a budget of 0 to
indicate it's clearing the Tx path only. For the gve_rx_poll and
gve_xdp_poll, they were mistakenly taking the 0 budget as the indication
to do all the work. Add check to avoid the rx path and xdp path being
called when budget is 0. And also avoid napi_complete_done being called
when budget is 0 for netpoll.
The original fix was merged here:
https://lore.kernel.org/r/20231114004144.2022268-1-ziweixiao@google.com
Resend it since the original one was not cleanly applied to 5.15 kernel.
Fixes: f5cedc84a30d ("gve: Add transmit and receive support")
Signed-off-by: Ziwei Xiao <ziweixiao(a)google.com>
Reviewed-by: Praveen Kaligineedi <pkaligineedi(a)google.com>
Signed-off-by: Praveen Kaligineedi <pkaligineedi(a)google.com>
---
drivers/net/ethernet/google/gve/gve_main.c | 7 +++++++
drivers/net/ethernet/google/gve/gve_rx.c | 4 ----
drivers/net/ethernet/google/gve/gve_tx.c | 4 ----
3 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index bf8a4a7c43f7..c3f1959533a8 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -198,6 +198,10 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
if (block->tx)
reschedule |= gve_tx_poll(block, budget);
+
+ if (!budget)
+ return 0;
+
if (block->rx)
reschedule |= gve_rx_poll(block, budget);
@@ -246,6 +250,9 @@ static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
if (block->tx)
reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
+ if (!budget)
+ return 0;
+
if (block->rx) {
work_done = gve_rx_poll_dqo(block, budget);
reschedule |= work_done == budget;
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index 94941d4e4744..368e0e770178 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -599,10 +599,6 @@ bool gve_rx_poll(struct gve_notify_block *block, int budget)
feat = block->napi.dev->features;
- /* If budget is 0, do all the work */
- if (budget == 0)
- budget = INT_MAX;
-
if (budget > 0)
repoll |= gve_clean_rx_done(rx, budget, feat);
else
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index 665ac795a1ad..d56b8356f1f3 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -691,10 +691,6 @@ bool gve_tx_poll(struct gve_notify_block *block, int budget)
u32 nic_done;
u32 to_do;
- /* If budget is 0, do all the work */
- if (budget == 0)
- budget = INT_MAX;
-
/* Find out how much work there is to be done */
tx->last_nic_done = gve_tx_load_event_counter(priv, tx);
nic_done = be32_to_cpu(tx->last_nic_done);
--
2.47.0.338.g60cca15819-goog
This patchset implements UVC v1.5 region of interest using V4L2
control API.
ROI control is consisted two uvc specific controls.
1. A rectangle control with a newly added type V4L2_CTRL_TYPE_RECT.
2. An auto control with type bitmask.
V4L2_CTRL_WHICH_MIN/MAX_VAL is added to support the rectangle control.
The corresponding v4l-utils series can be found at
https://patchwork.linuxtv.org/project/linux-media/list/?series=11069 .
Tested with v4l2-compliance, v4l2-ctl, calling ioctls on usb cameras and
VIVID with a newly added V4L2_CTRL_TYPE_RECT control.
This set includes also the patch:
media: uvcvideo: Fix event flags in uvc_ctrl_send_events
It is not technically part of this change, but we conflict with it.
I am continuing the work that Yunke did.
Changes in v15:
- Modify mapping set/get to support any size
- Remove v4l2_size field. It is not needed, we can use the v4l2_type to
infer it.
- Improve documentation.
- Lots of refactoring, now adding compound and roi are very small
patches.
- Remove rectangle clamping, not supported by some firmware.
- Remove init, we can add it later.
- Move uvc_cid to USER_BASE
- Link to v14: https://lore.kernel.org/linux-media/20231201071907.3080126-1-yunkec@google.…
Signed-off-by: Ricardo Ribalda <ribalda(a)chromium.org>
---
Hans Verkuil (1):
media: v4l2-ctrls: add support for V4L2_CTRL_WHICH_MIN/MAX_VAL
Ricardo Ribalda (12):
media: uvcvideo: Fix event flags in uvc_ctrl_send_events
media: uvcvideo: Handle uvc menu translation inside uvc_get_le_value
media: uvcvideo: Handle uvc menu translation inside uvc_set_le_value
media: uvcvideo: refactor uvc_ioctl_g_ext_ctrls
media: uvcvideo: uvc_ioctl_(g|s)_ext_ctrls: handle NoP case
media: uvcvideo: Support any size for mapping get/set
media: uvcvideo: Factor out clamping from uvc_ctrl_set
media: uvcvideo: Factor out query_boundaries from query_ctrl
media: uvcvideo: Use the camera to clamp compound controls
media: uvcvideo: let v4l2_query_v4l2_ctrl() work with v4l2_query_ext_ctrl
media: uvcvideo: Introduce uvc_mapping_v4l2_size
media: uvcvideo: Add sanity check to uvc_ioctl_xu_ctrl_map
Yunke Cao (6):
media: v4l2_ctrl: Add V4L2_CTRL_TYPE_RECT
media: vivid: Add a rectangle control
media: uvcvideo: add support for compound controls
media: uvcvideo: support V4L2_CTRL_WHICH_MIN/MAX_VAL
media: uvcvideo: implement UVC v1.5 ROI
media: uvcvideo: document UVC v1.5 ROI
.../userspace-api/media/drivers/uvcvideo.rst | 64 ++
.../userspace-api/media/v4l/vidioc-g-ext-ctrls.rst | 26 +-
.../userspace-api/media/v4l/vidioc-queryctrl.rst | 14 +
.../userspace-api/media/videodev2.h.rst.exceptions | 4 +
drivers/media/i2c/imx214.c | 4 +-
drivers/media/platform/qcom/venus/venc_ctrls.c | 9 +-
drivers/media/test-drivers/vivid/vivid-ctrls.c | 34 +
drivers/media/usb/uvc/uvc_ctrl.c | 805 ++++++++++++++++-----
drivers/media/usb/uvc/uvc_v4l2.c | 77 +-
drivers/media/usb/uvc/uvcvideo.h | 25 +-
drivers/media/v4l2-core/v4l2-ctrls-api.c | 54 +-
drivers/media/v4l2-core/v4l2-ctrls-core.c | 167 ++++-
drivers/media/v4l2-core/v4l2-ioctl.c | 4 +-
include/media/v4l2-ctrls.h | 38 +-
include/uapi/linux/usb/video.h | 1 +
include/uapi/linux/uvcvideo.h | 13 +
include/uapi/linux/v4l2-controls.h | 9 +
include/uapi/linux/videodev2.h | 5 +
18 files changed, 1062 insertions(+), 291 deletions(-)
---
base-commit: 5516200c466f92954551406ea641376963c43a92
change-id: 20241113-uvc-roi-66bd6cfa1e64
Best regards,
--
Ricardo Ribalda <ribalda(a)chromium.org>
This issue was found after attempting to make the same mistake for
a driver I maintain, which was fortunately spotted by Jonathan [1].
Keeping old sensor values if the channel configuration changes is known
and not considered an issue, which is also mentioned in [1], so it has
not been addressed by this series. That keeps most of the drivers out
of the way because they store the scan element in iio private data,
which is kzalloc() allocated.
This series only addresses cases where uninitialized i.e. unknown data
is pushed to the userspace, either due to holes in structs or
uninitialized struct members/array elements.
While analyzing involved functions, I found and fixed some triviality
(wrong function name) in the documentation of iio_dev_opaque.
Link: https://lore.kernel.org/linux-iio/20241123151634.303aa860@jic23-huawei/ [1]
Signed-off-by: Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
---
Javier Carrasco (11):
iio: temperature: tmp006: fix information leak in triggered buffer
iio: adc: ti-ads1119: fix information leak in triggered buffer
iio: pressure: zpa2326: fix information leak in triggered buffer
iio: adc: rockchip_saradc: fix information leak in triggered buffer
iio: imu: kmx61: fix information leak in triggered buffer
iio: light: vcnl4035: fix information leak in triggered buffer
iio: light: bh1745: fix information leak in triggered buffer
iio: adc: ti-ads8688: fix information leak in triggered buffer
iio: dummy: iio_simply_dummy_buffer: fix information leak in triggered buffer
iio: light: as73211: fix information leak in triggered buffer
iio: core: fix doc reference to iio_push_to_buffers_with_ts_unaligned
drivers/iio/adc/rockchip_saradc.c | 2 ++
drivers/iio/adc/ti-ads1119.c | 2 ++
drivers/iio/adc/ti-ads8688.c | 2 +-
drivers/iio/dummy/iio_simple_dummy_buffer.c | 2 +-
drivers/iio/imu/kmx61.c | 2 +-
drivers/iio/light/as73211.c | 3 +++
drivers/iio/light/bh1745.c | 2 ++
drivers/iio/light/vcnl4035.c | 2 +-
drivers/iio/pressure/zpa2326.c | 2 ++
drivers/iio/temperature/tmp006.c | 2 ++
include/linux/iio/iio-opaque.h | 2 +-
11 files changed, 18 insertions(+), 5 deletions(-)
---
base-commit: ab376e4d674037f45d5758c1dc391bd4e11c5dc4
change-id: 20241123-iio_memset_scan_holes-a673833ef932
Best regards,
--
Javier Carrasco <javier.carrasco.cruz(a)gmail.com>
read_hv_sched_clock_tsc() assumes that the Hyper-V clock counter is
bigger than the variable hv_sched_clock_offset, which is cached during
early boot, but depending on the timing this assumption may be false
when a hibernated VM starts again (the clock counter starts from 0
again) and is resuming back (Note: hv_init_tsc_clocksource() is not
called during hibernation/resume); consequently,
read_hv_sched_clock_tsc() may return a negative integer (which is
interpreted as a huge positive integer since the return type is u64)
and new kernel messages are prefixed with huge timestamps before
read_hv_sched_clock_tsc() grows big enough (which typically takes
several seconds).
Fix the issue by saving the Hyper-V clock counter just before the
suspend, and using it to correct the hv_sched_clock_offset in
resume. This makes hv tsc page based sched_clock continuous and ensures
that post resume, it starts from where it left off during suspend.
Override x86_platform.save_sched_clock_state and
x86_platform.restore_sched_clock_state routines to correct this as soon
as possible.
Note: if Invariant TSC is available, the issue doesn't happen because
1) we don't register read_hv_sched_clock_tsc() for sched clock:
See commit e5313f1c5404 ("clocksource/drivers/hyper-v: Rework
clocksource and sched clock setup");
2) the common x86 code adjusts TSC similarly: see
__restore_processor_state() -> tsc_verify_tsc_adjust(true) and
x86_platform.restore_sched_clock_state().
Cc: stable(a)vger.kernel.org
Fixes: 1349401ff1aa ("clocksource/drivers/hyper-v: Suspend/resume Hyper-V clocksource for hibernation")
Co-developed-by: Dexuan Cui <decui(a)microsoft.com>
Signed-off-by: Dexuan Cui <decui(a)microsoft.com>
Signed-off-by: Naman Jain <namjain(a)linux.microsoft.com>
---
Changes from v2:
https://lore.kernel.org/all/20240911045632.3757-1-namjain@linux.microsoft.c…
Addressed Michael's comments:
* Changed commit msg to include information on making timestamps
continuous
* Changed subject to reflect the new file being changed
* Changed variable name for saving offset/counters
* Moved comment on new function introduced from header file to function
definition.
* Removed the equations in comments
* Rebased to latest linux-next tip
Changes from v1:
https://lore.kernel.org/all/20240909053923.8512-1-namjain@linux.microsoft.c…
* Reorganized code as per Michael's comment, and moved the logic to x86
specific files, to keep hyperv_timer.c arch independent.
---
arch/x86/kernel/cpu/mshyperv.c | 58 ++++++++++++++++++++++++++++++
drivers/clocksource/hyperv_timer.c | 14 +++++++-
include/clocksource/hyperv_timer.h | 2 ++
3 files changed, 73 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index ead967479fa6..e8e25d6e64cd 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -224,6 +224,63 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs)
hyperv_cleanup();
}
#endif /* CONFIG_CRASH_DUMP */
+
+static u64 hv_ref_counter_at_suspend;
+static void (*old_save_sched_clock_state)(void);
+static void (*old_restore_sched_clock_state)(void);
+
+/*
+ * Hyper-V clock counter resets during hibernation. Save and restore clock
+ * offset during suspend/resume, while also considering the time passed
+ * before suspend. This is to make sure that sched_clock using hv tsc page
+ * based clocksource, proceeds from where it left off during suspend and
+ * it shows correct time for the timestamps of kernel messages after resume.
+ */
+static void save_hv_clock_tsc_state(void)
+{
+ hv_ref_counter_at_suspend = hv_read_reference_counter();
+}
+
+static void restore_hv_clock_tsc_state(void)
+{
+ /*
+ * Adjust the offsets used by hv tsc clocksource to
+ * account for the time spent before hibernation.
+ * adjusted value = reference counter (time) at suspend
+ * - reference counter (time) now.
+ */
+ hv_adj_sched_clock_offset(hv_ref_counter_at_suspend - hv_read_reference_counter());
+}
+
+/*
+ * Functions to override save_sched_clock_state and restore_sched_clock_state
+ * functions of x86_platform. The Hyper-V clock counter is reset during
+ * suspend-resume and the offset used to measure time needs to be
+ * corrected, post resume.
+ */
+static void hv_save_sched_clock_state(void)
+{
+ old_save_sched_clock_state();
+ save_hv_clock_tsc_state();
+}
+
+static void hv_restore_sched_clock_state(void)
+{
+ restore_hv_clock_tsc_state();
+ old_restore_sched_clock_state();
+}
+
+static void __init x86_setup_ops_for_tsc_pg_clock(void)
+{
+ if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
+ return;
+
+ old_save_sched_clock_state = x86_platform.save_sched_clock_state;
+ x86_platform.save_sched_clock_state = hv_save_sched_clock_state;
+
+ old_restore_sched_clock_state = x86_platform.restore_sched_clock_state;
+ x86_platform.restore_sched_clock_state = hv_restore_sched_clock_state;
+}
#endif /* CONFIG_HYPERV */
static uint32_t __init ms_hyperv_platform(void)
@@ -590,6 +647,7 @@ static void __init ms_hyperv_init_platform(void)
/* Register Hyper-V specific clocksource */
hv_init_clocksource();
+ x86_setup_ops_for_tsc_pg_clock();
hv_vtl_init_platform();
#endif
/*
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index 99177835cade..b39dee7b93af 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -27,7 +27,8 @@
#include <asm/mshyperv.h>
static struct clock_event_device __percpu *hv_clock_event;
-static u64 hv_sched_clock_offset __ro_after_init;
+/* Note: offset can hold negative values after hibernation. */
+static u64 hv_sched_clock_offset __read_mostly;
/*
* If false, we're using the old mechanism for stimer0 interrupts
@@ -470,6 +471,17 @@ static void resume_hv_clock_tsc(struct clocksource *arg)
hv_set_msr(HV_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
}
+/*
+ * Called during resume from hibernation, from overridden
+ * x86_platform.restore_sched_clock_state routine. This is to adjust offsets
+ * used to calculate time for hv tsc page based sched_clock, to account for
+ * time spent before hibernation.
+ */
+void hv_adj_sched_clock_offset(u64 offset)
+{
+ hv_sched_clock_offset -= offset;
+}
+
#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK
static int hv_cs_enable(struct clocksource *cs)
{
diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h
index 6cdc873ac907..aa5233b1eba9 100644
--- a/include/clocksource/hyperv_timer.h
+++ b/include/clocksource/hyperv_timer.h
@@ -38,6 +38,8 @@ extern void hv_remap_tsc_clocksource(void);
extern unsigned long hv_get_tsc_pfn(void);
extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
+extern void hv_adj_sched_clock_offset(u64 offset);
+
static __always_inline bool
hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
u64 *cur_tsc, u64 *time)
base-commit: a430d95c5efa2b545d26a094eb5f624e36732af0
--
2.34.1
Since 5.16 and prior to 6.13 KVM can't be used with FSDAX
guest memory (PMD pages). To reproduce the issue you need to reserve
guest memory with `memmap=` cmdline, create and mount FS in DAX mode
(tested both XFS and ext4), see doc link below. ndctl command for test:
ndctl create-namespace -v -e namespace1.0 --map=dev --mode=fsdax -a 2M
Then pass memory object to qemu like:
-m 8G -object memory-backend-file,id=ram0,size=8G,\
mem-path=/mnt/pmem/guestmem,share=on,prealloc=on,dump=off,align=2097152 \
-numa node,memdev=ram0,cpus=0-1
QEMU fails to run guest with error: kvm run failed Bad address
and there are two warnings in dmesg:
WARN_ON_ONCE(!page_count(page)) in kvm_is_zone_device_page() and
WARN_ON_ONCE(folio_ref_count(folio) <= 0) in try_grab_folio() (v6.6.63)
It looks like in the past assumption was made that pfn won't change from
faultin_pfn() to release_pfn_clean(), e.g. see
commit 4cd071d13c5c ("KVM: x86/mmu: Move calls to thp_adjust() down a level")
But kvm_page_fault structure made pfn part of mutable state, so
now release_pfn_clean() can take hugepage-adjusted pfn.
And it works for all cases (/dev/shm, hugetlb, devdax) except fsdax.
Apparently in fsdax mode faultin-pfn and adjusted-pfn may refer to
different folios, so we're getting get_page/put_page imbalance.
To solve this preserve faultin pfn in separate kvm_page_fault
field and pass it in kvm_release_pfn_clean(). Patch tested for all
mentioned guest memory backends with tdp_mmu={0,1}.
No bug in upstream as it was solved fundamentally by
commit 8dd861cc07e2 ("KVM: x86/mmu: Put refcounted pages instead of blindly releasing pfns")
and related patch series.
Link: https://nvdimm.docs.kernel.org/2mib_fs_dax.html
Fixes: 2f6305dd5676 ("KVM: MMU: change kvm_tdp_mmu_map() arguments to kvm_page_fault")
Signed-off-by: Nikolay Kuratov <kniv(a)yandex-team.ru>
---
arch/x86/kvm/mmu/mmu.c | 5 +++--
arch/x86/kvm/mmu/mmu_internal.h | 2 ++
arch/x86/kvm/mmu/paging_tmpl.h | 2 +-
3 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 294775b7383b..2105f3bc2e59 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4321,6 +4321,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
smp_rmb();
ret = __kvm_faultin_pfn(vcpu, fault);
+ fault->faultin_pfn = fault->pfn;
if (ret != RET_PF_CONTINUE)
return ret;
@@ -4398,7 +4399,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
out_unlock:
write_unlock(&vcpu->kvm->mmu_lock);
- kvm_release_pfn_clean(fault->pfn);
+ kvm_release_pfn_clean(fault->faultin_pfn);
return r;
}
@@ -4474,7 +4475,7 @@ static int kvm_tdp_mmu_page_fault(struct kvm_vcpu *vcpu,
out_unlock:
read_unlock(&vcpu->kvm->mmu_lock);
- kvm_release_pfn_clean(fault->pfn);
+ kvm_release_pfn_clean(fault->faultin_pfn);
return r;
}
#endif
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index decc1f153669..a016b51f9c62 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -236,6 +236,8 @@ struct kvm_page_fault {
/* Outputs of kvm_faultin_pfn. */
unsigned long mmu_seq;
kvm_pfn_t pfn;
+ /* pfn copy for kvm_release_pfn_clean(), constant after kvm_faultin_pfn() */
+ kvm_pfn_t faultin_pfn;
hva_t hva;
bool map_writable;
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index c85255073f67..b945dde6e3be 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -848,7 +848,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
out_unlock:
write_unlock(&vcpu->kvm->mmu_lock);
- kvm_release_pfn_clean(fault->pfn);
+ kvm_release_pfn_clean(fault->faultin_pfn);
return r;
}
--
2.34.1