Passing the host topology to the guest is almost certainly wrong
and will confuse the scheduler. In addition, several fields of
these CPUID leaves vary on each processor; it is simply impossible to
return the right values from KVM_GET_SUPPORTED_CPUID in such a way that
they can be passed to KVM_SET_CPUID2.
The values that will most likely prevent confusion are all zeroes.
Userspace will have to override it anyway if it wishes to present a
specific topology to the guest.
Cc: stable(a)vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini(a)redhat.com>
---
Documentation/virt/kvm/api.rst | 14 ++++++++++++++
arch/x86/kvm/cpuid.c | 32 ++++++++++++++++----------------
2 files changed, 30 insertions(+), 16 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index eee9f857a986..20f4f6b302ff 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -8249,6 +8249,20 @@ CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by ``KVM_GET_SUPPORTED_CPUID``
It can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` is present and the kernel
has enabled in-kernel emulation of the local APIC.
+CPU topology
+~~~~~~~~~~~~
+
+Several CPUID values include topology information for the host CPU:
+0x0b and 0x1f for Intel systems, 0x8000001e for AMD systems. Different
+versions of KVM return different values for this information and userspace
+should not rely on it. Currently they return all zeroes.
+
+If userspace wishes to set up a guest topology, it should be careful that
+the values of these three leaves differ for each CPU. In particular,
+the APIC ID is found in EDX for all subleaves of 0x0b and 0x1f, and in EAX
+for 0x8000001e; the latter also encodes the core id and node id in bits
+7:0 of EBX and ECX respectively.
+
Obsolete ioctls and capabilities
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0810e93cbedc..164bfb7e7a16 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -759,16 +759,22 @@ struct kvm_cpuid_array {
int nent;
};
+static struct kvm_cpuid_entry2 *get_next_cpuid(struct kvm_cpuid_array *array)
+{
+ if (array->nent >= array->maxnent)
+ return NULL;
+
+ return &array->entries[array->nent++];
+}
+
static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array,
u32 function, u32 index)
{
- struct kvm_cpuid_entry2 *entry;
+ struct kvm_cpuid_entry2 *entry = get_next_cpuid(array);
- if (array->nent >= array->maxnent)
+ if (!entry)
return NULL;
- entry = &array->entries[array->nent++];
-
memset(entry, 0, sizeof(*entry));
entry->function = function;
entry->index = index;
@@ -945,22 +951,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->edx = edx.full;
break;
}
- /*
- * Per Intel's SDM, the 0x1f is a superset of 0xb,
- * thus they can be handled by common code.
- */
case 0x1f:
case 0xb:
/*
- * Populate entries until the level type (ECX[15:8]) of the
- * previous entry is zero. Note, CPUID EAX.{0x1f,0xb}.0 is
- * the starting entry, filled by the primary do_host_cpuid().
+ * No topology; a valid topology is indicated by the presence
+ * of subleaf 1.
*/
- for (i = 1; entry->ecx & 0xff00; ++i) {
- entry = do_host_cpuid(array, function, i);
- if (!entry)
- goto out;
- }
+ entry->eax = entry->ebx = entry->ecx = 0;
break;
case 0xd: {
u64 permitted_xcr0 = kvm_caps.supported_xcr0 & xstate_get_guest_group_perm();
@@ -1193,6 +1190,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
entry->ebx = entry->ecx = entry->edx = 0;
break;
case 0x8000001e:
+ /* Do not return host topology information. */
+ entry->eax = entry->ebx = entry->ecx = 0;
+ entry->edx = 0; /* reserved */
break;
case 0x8000001F:
if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) {
--
2.31.1
The "duty > cycle" trick to force the pin level of a disabled TCU2
channel would only work when the channel had been enabled previously.
Address this issue by enabling the PWM mode in jz4740_pwm_disable
(I know, right) so that the "duty > cycle" trick works before disabling
the PWM channel right after.
This issue went unnoticed, as the PWM pins on the majority of the boards
tested would default to the inactive level once the corresponding TCU
clock was enabled, so the first call to jz4740_pwm_disable() would not
actually change the pin levels.
On the GCW Zero however, the PWM pin for the backlight (PWM1, which is
a TCU2 channel) goes active as soon as the timer1 clock is enabled.
Since the jz4740_pwm_disable() function did not work on channels not
previously enabled, the backlight would shine at full brightness from
the moment the backlight driver would probe, until the backlight driver
tried to *enable* the PWM output.
With this fix, the PWM pins will be forced inactive as soon as
jz4740_pwm_apply() is called (and might be reconfigured to active if
dictated by the pwm_state). This means that there is still a tiny time
frame between the .request() and .apply() callbacks where the PWM pin
might be active. Sadly, there is no way to fix this issue: it is
impossible to write a PWM channel's registers if the corresponding clock
is not enabled, and enabling the clock is what causes the PWM pin to go
active.
There is a workaround, though, which complements this fix: simply
starting the backlight driver (or any PWM client driver) with a "init"
pinctrl state that sets the pin as an inactive GPIO. Once the driver is
probed and the pinctrl state switches to "default", the regular PWM pin
configuration can be used as it will be properly driven.
Fixes: c2693514a0a1 ("pwm: jz4740: Obtain regmap from parent node")
Signed-off-by: Paul Cercueil <paul(a)crapouillou.net>
Cc: stable(a)vger.kernel.org
---
drivers/pwm/pwm-jz4740.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c
index a5fdf97c0d2e..228eb104bf1e 100644
--- a/drivers/pwm/pwm-jz4740.c
+++ b/drivers/pwm/pwm-jz4740.c
@@ -102,11 +102,14 @@ static void jz4740_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
struct jz4740_pwm_chip *jz = to_jz4740(chip);
/*
- * Set duty > period. This trick allows the TCU channels in TCU2 mode to
- * properly return to their init level.
+ * Set duty > period, then enable PWM mode and start the counter.
+ * This trick allows to force the inactive pin level for the TCU2
+ * channels.
*/
regmap_write(jz->map, TCU_REG_TDHRc(pwm->hwpwm), 0xffff);
regmap_write(jz->map, TCU_REG_TDFRc(pwm->hwpwm), 0x0);
+ regmap_set_bits(jz->map, TCU_REG_TCSRc(pwm->hwpwm), TCU_TCSR_PWM_EN);
+ regmap_write(jz->map, TCU_REG_TESR, BIT(pwm->hwpwm));
/*
* Disable PWM output.
--
2.35.1
From: Takashi Iwai <tiwai(a)suse.de>
commit 8423f0b6d513b259fdab9c9bf4aaa6188d054c2d upstream.
There is a small race window at snd_pcm_oss_sync() that is called from
OSS PCM SNDCTL_DSP_SYNC ioctl; namely the function calls
snd_pcm_oss_make_ready() at first, then takes the params_lock mutex
for the rest. When the stream is set up again by another thread
between them, it leads to inconsistency, and may result in unexpected
results such as NULL dereference of OSS buffer as a fuzzer spotted
recently.
The fix is simply to cover snd_pcm_oss_make_ready() call into the same
params_lock mutex with snd_pcm_oss_make_ready_locked() variant.
Reported-and-tested-by: butt3rflyh4ck <butterflyhuangxx(a)gmail.com>
Reviewed-by: Jaroslav Kysela <perex(a)perex.cz>
Cc: <stable(a)vger.kernel.org>
Link: https://lore.kernel.org/r/CAFcO6XN7JDM4xSXGhtusQfS2mSBcx50VJKwQpCq=WeLt57aa…
Link: https://lore.kernel.org/r/20220905060714.22549-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai(a)suse.de>
Signed-off-by: Zubin Mithra <zsm(a)google.com>
---
Note:
* 8423f0b6d513 is present in linux-5.15.y and linux-5.4.y; missing in
linux-5.10.y.
* Backport addresses conflict due to surrounding context.
* Tests run: build and boot.
sound/core/oss/pcm_oss.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index f88de74da1eb..de6f94bee50b 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -1662,13 +1662,14 @@ static int snd_pcm_oss_sync(struct snd_pcm_oss_file *pcm_oss_file)
runtime = substream->runtime;
if (atomic_read(&substream->mmap_count))
goto __direct;
- if ((err = snd_pcm_oss_make_ready(substream)) < 0)
- return err;
atomic_inc(&runtime->oss.rw_ref);
if (mutex_lock_interruptible(&runtime->oss.params_lock)) {
atomic_dec(&runtime->oss.rw_ref);
return -ERESTARTSYS;
}
+ err = snd_pcm_oss_make_ready_locked(substream);
+ if (err < 0)
+ goto unlock;
format = snd_pcm_oss_format_from(runtime->oss.format);
width = snd_pcm_format_physical_width(format);
if (runtime->oss.buffer_used > 0) {
--
2.38.0.rc1.362.ged0d419d3c-goog
The coreboot_table driver registers a coreboot bus while probing a
"coreboot_table" device representing the coreboot table memory region.
Probing this device (i.e., registering the bus) is a dependency for the
module_init() functions of any driver for this bus (e.g.,
memconsole-coreboot.c / memconsole_driver_init()).
With synchronous probe, this dependency works OK, as the link order in
the Makefile ensures coreboot_table_driver_init() (and thus,
coreboot_table_probe()) completes before a coreboot device driver tries
to add itself to the bus.
With asynchronous probe, however, coreboot_table_probe() may race with
memconsole_driver_init(), and so we're liable to hit one of these two:
1. coreboot_driver_register() eventually hits "[...] the bus was not
initialized.", and the memconsole driver fails to register; or
2. coreboot_driver_register() gets past #1, but still races with
bus_register() and hits some other undefined/crashing behavior (e.g.,
in driver_find() [1])
We can resolve this by registering the bus in our initcall, and only
deferring "device" work (scanning the coreboot memory region and
creating sub-devices) to probe().
[1] Example failure, using 'driver_async_probe=*' kernel command line:
[ 0.114217] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010
...
[ 0.114307] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 6.1.0-rc1 #63
[ 0.114316] Hardware name: Google Scarlet (DT)
...
[ 0.114488] Call trace:
[ 0.114494] _raw_spin_lock+0x34/0x60
[ 0.114502] kset_find_obj+0x28/0x84
[ 0.114511] driver_find+0x30/0x50
[ 0.114520] driver_register+0x64/0x10c
[ 0.114528] coreboot_driver_register+0x30/0x3c
[ 0.114540] memconsole_driver_init+0x24/0x30
[ 0.114550] do_one_initcall+0x154/0x2e0
[ 0.114560] do_initcall_level+0x134/0x160
[ 0.114571] do_initcalls+0x60/0xa0
[ 0.114579] do_basic_setup+0x28/0x34
[ 0.114588] kernel_init_freeable+0xf8/0x150
[ 0.114596] kernel_init+0x2c/0x12c
[ 0.114607] ret_from_fork+0x10/0x20
[ 0.114624] Code: 5280002b 1100054a b900092a f9800011 (885ffc01)
[ 0.114631] ---[ end trace 0000000000000000 ]---
Fixes: b81e3140e412 ("firmware: coreboot: Make bus registration symmetric")
Cc: <stable(a)vger.kernel.org>
Signed-off-by: Brian Norris <briannorris(a)chromium.org>
---
Currently, get_maintainers.pl tells me Greg should pick this up. But I
CC the chrome-platform list too, since it seems reasonable for Google
folks (probably ChromeOS folks are most active here?) to maintain
Google/Chrome drivers.
Let me know if y'all would like this official, and I'll push out a
MAINTAINERS patch.
drivers/firmware/google/coreboot_table.c | 37 +++++++++++++++++++-----
1 file changed, 29 insertions(+), 8 deletions(-)
diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c
index c52bcaa9def6..9ca21feb9d45 100644
--- a/drivers/firmware/google/coreboot_table.c
+++ b/drivers/firmware/google/coreboot_table.c
@@ -149,12 +149,8 @@ static int coreboot_table_probe(struct platform_device *pdev)
if (!ptr)
return -ENOMEM;
- ret = bus_register(&coreboot_bus_type);
- if (!ret) {
- ret = coreboot_table_populate(dev, ptr);
- if (ret)
- bus_unregister(&coreboot_bus_type);
- }
+ ret = coreboot_table_populate(dev, ptr);
+
memunmap(ptr);
return ret;
@@ -169,7 +165,6 @@ static int __cb_dev_unregister(struct device *dev, void *dummy)
static int coreboot_table_remove(struct platform_device *pdev)
{
bus_for_each_dev(&coreboot_bus_type, NULL, NULL, __cb_dev_unregister);
- bus_unregister(&coreboot_bus_type);
return 0;
}
@@ -199,6 +194,32 @@ static struct platform_driver coreboot_table_driver = {
.of_match_table = of_match_ptr(coreboot_of_match),
},
};
-module_platform_driver(coreboot_table_driver);
+
+static int __init coreboot_table_driver_init(void)
+{
+ int ret;
+
+ ret = bus_register(&coreboot_bus_type);
+ if (ret)
+ return ret;
+
+ ret = platform_driver_register(&coreboot_table_driver);
+ if (ret) {
+ bus_unregister(&coreboot_bus_type);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void __exit coreboot_table_driver_exit(void)
+{
+ platform_driver_unregister(&coreboot_table_driver);
+ bus_unregister(&coreboot_bus_type);
+}
+
+module_init(coreboot_table_driver_init);
+module_exit(coreboot_table_driver_exit);
+
MODULE_AUTHOR("Google, Inc.");
MODULE_LICENSE("GPL");
--
2.38.0.413.g74048e4d9e-goog
When bfqq is shared by multiple processes it can happen that one of the
processes gets moved to a different cgroup (or just starts submitting IO
for different cgroup). In case that happens we need to split the merged
bfqq as otherwise we will have IO for multiple cgroups in one bfqq and
we will just account IO time to wrong entities etc.
Similarly if the bfqq is scheduled to merge with another bfqq but the
merge didn't happen yet, cancel the merge as it need not be valid
anymore.
CC: stable(a)vger.kernel.org
Fixes: e21b7a0b9887 ("block, bfq: add full hierarchical scheduling and cgroups support")
Signed-off-by: Jan Kara <jack(a)suse.cz>
---
block/bfq-cgroup.c | 36 +++++++++++++++++++++++++++++++++---
block/bfq-iosched.c | 2 +-
block/bfq-iosched.h | 1 +
3 files changed, 35 insertions(+), 4 deletions(-)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 420eda2589c0..9352f3cc2377 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -743,9 +743,39 @@ static struct bfq_group *__bfq_bic_change_cgroup(struct bfq_data *bfqd,
}
if (sync_bfqq) {
- entity = &sync_bfqq->entity;
- if (entity->sched_data != &bfqg->sched_data)
- bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+ if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) {
+ /* We are the only user of this bfqq, just move it */
+ if (sync_bfqq->entity.sched_data != &bfqg->sched_data)
+ bfq_bfqq_move(bfqd, sync_bfqq, bfqg);
+ } else {
+ struct bfq_queue *bfqq;
+
+ /*
+ * The queue was merged to a different queue. Check
+ * that the merge chain still belongs to the same
+ * cgroup.
+ */
+ for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq)
+ if (bfqq->entity.sched_data !=
+ &bfqg->sched_data)
+ break;
+ if (bfqq) {
+ /*
+ * Some queue changed cgroup so the merge is
+ * not valid anymore. We cannot easily just
+ * cancel the merge (by clearing new_bfqq) as
+ * there may be other processes using this
+ * queue and holding refs to all queues below
+ * sync_bfqq->new_bfqq. Similarly if the merge
+ * already happened, we need to detach from
+ * bfqq now so that we cannot merge bio to a
+ * request from the old cgroup.
+ */
+ bfq_put_cooperator(sync_bfqq);
+ bfq_release_process_ref(bfqd, sync_bfqq);
+ bic_set_bfqq(bic, NULL, 1);
+ }
+ }
}
return bfqg;
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 7d00b21ebe5d..89fe3f85eb3c 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5315,7 +5315,7 @@ static void bfq_put_stable_ref(struct bfq_queue *bfqq)
bfq_put_queue(bfqq);
}
-static void bfq_put_cooperator(struct bfq_queue *bfqq)
+void bfq_put_cooperator(struct bfq_queue *bfqq)
{
struct bfq_queue *__bfqq, *next;
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 3b83e3d1c2e5..a56763045d19 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -979,6 +979,7 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
void bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq,
bool compensate, enum bfqq_expiration reason);
void bfq_put_queue(struct bfq_queue *bfqq);
+void bfq_put_cooperator(struct bfq_queue *bfqq);
void bfq_end_wr_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq);
void bfq_schedule_dispatch(struct bfq_data *bfqd);
--
2.34.1