- Linux-stable-mirror - lists.linaro.org

[PATCH v2 2/2] i2c: microchip-core: fix "ghost" detections

by Conor Dooley

From: Conor Dooley <conor.dooley(a)microchip.com> Running i2c-detect currently produces an output akin to: 0 1 2 3 4 5 6 7 8 9 a b c d e f 00: 08 -- 0a -- 0c -- 0e -- 10: 10 -- 12 -- 14 -- 16 -- UU 19 -- 1b -- 1d -- 1f 20: -- 21 -- 23 -- 25 -- 27 -- 29 -- 2b -- 2d -- 2f 30: -- -- -- -- -- -- -- -- 38 -- 3a -- 3c -- 3e -- 40: 40 -- 42 -- 44 -- 46 -- 48 -- 4a -- 4c -- 4e -- 50: -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 60: 60 -- 62 -- 64 -- 66 -- 68 -- 6a -- 6c -- 6e -- 70: 70 -- 72 -- 74 -- 76 -- This happens because for an i2c_msg with a len of 0 the driver will mark the transmission of the message as a success once the START has been sent, without waiting for the devices on the bus to respond with an ACK/NAK. Since i2cdetect seems to run in a tight loop over all addresses the NAK is treated as part of the next test for the next address. Delete the fast path that marks a message as complete when idev->msg_len is zero after sending a START/RESTART since this isn't a valid scenario. CC: stable(a)vger.kernel.org Fixes: 64a6f1c4987e ("i2c: add support for microchip fpga i2c controllers") Signed-off-by: Conor Dooley <conor.dooley(a)microchip.com> --- My original tests with KASAN/UBSAN/PREEMPT_RT enabled saw far fewer of these "ghost" detections and the skip caused by the occupied address at 0x18 on this bus is part of my attribution of the problem. Unless I'm mistaken there's no scenario that you consider a message complete after sending a START/RESTART without waiting for the ACK/NAK and this code path I deleted is useless? Looking out of tree, it predates my involvement with the code so I don't know where it came from, nor is there anything like it in the bare-metal driver the linux one was based on. --- drivers/i2c/busses/i2c-microchip-corei2c.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-microchip-corei2c.c b/drivers/i2c/busses/i2c-microchip-corei2c.c index e5af38dfaa81..b0a51695138a 100644 --- a/drivers/i2c/busses/i2c-microchip-corei2c.c +++ b/drivers/i2c/busses/i2c-microchip-corei2c.c @@ -287,8 +287,6 @@ static irqreturn_t mchp_corei2c_handle_isr(struct mchp_corei2c_dev *idev) ctrl &= ~CTRL_STA; writeb(idev->addr, idev->base + CORE_I2C_DATA); writeb(ctrl, idev->base + CORE_I2C_CTRL); - if (idev->msg_len == 0) - finished = true; break; case STATUS_M_ARB_LOST: idev->msg_err = -EAGAIN; -- 2.45.2

11 months, 1 week

1
0
0 0

[PATCH v2 1/2] i2c: microchip-core: actually use repeated sends

by Conor Dooley

From: Conor Dooley <conor.dooley(a)microchip.com> At present, where repeated sends are intended to be used, the i2c-microchip-core driver sends a stop followed by a start. Lots of i2c devices must not malfunction in the face of this behaviour, because the driver has operated like this for years! Try to keep track of whether or not a repeated send is required, and suppress sending a stop in these cases. CC: stable(a)vger.kernel.org Fixes: 64a6f1c4987e ("i2c: add support for microchip fpga i2c controllers") Signed-off-by: Conor Dooley <conor.dooley(a)microchip.com> --- drivers/i2c/busses/i2c-microchip-corei2c.c | 124 ++++++++++++++++----- 1 file changed, 96 insertions(+), 28 deletions(-) diff --git a/drivers/i2c/busses/i2c-microchip-corei2c.c b/drivers/i2c/busses/i2c-microchip-corei2c.c index 0b0a1c4d17ca..e5af38dfaa81 100644 --- a/drivers/i2c/busses/i2c-microchip-corei2c.c +++ b/drivers/i2c/busses/i2c-microchip-corei2c.c @@ -93,27 +93,35 @@ * @base: pointer to register struct * @dev: device reference * @i2c_clk: clock reference for i2c input clock + * @msg_queue: pointer to the messages requiring sending * @buf: pointer to msg buffer for easier use * @msg_complete: xfer completion object * @adapter: core i2c abstraction * @msg_err: error code for completed message * @bus_clk_rate: current i2c bus clock rate * @isr_status: cached copy of local ISR status + * @total_num: total number of messages to be sent/received + * @current_num: index of the current message being sent/received * @msg_len: number of bytes transferred in msg * @addr: address of the current slave + * @restart_needed: whether or not a repeated start is required after current message */ struct mchp_corei2c_dev { void __iomem *base; struct device *dev; struct clk *i2c_clk; + struct i2c_msg *msg_queue; u8 *buf; struct completion msg_complete; struct i2c_adapter adapter; int msg_err; + int total_num; + int current_num; u32 bus_clk_rate; u32 isr_status; u16 msg_len; u8 addr; + bool restart_needed; }; static void mchp_corei2c_core_disable(struct mchp_corei2c_dev *idev) @@ -222,6 +230,47 @@ static int mchp_corei2c_fill_tx(struct mchp_corei2c_dev *idev) return 0; } +static void mchp_corei2c_next_msg(struct mchp_corei2c_dev *idev) +{ + struct i2c_msg *this_msg; + u8 ctrl; + + if (idev->current_num >= idev->total_num) { + complete(&idev->msg_complete); + return; + } + + /* + * If there's been an error, the isr needs to return control + * to the "main" part of the driver, so as not to keep sending + * messages once it completes and clears the SI bit. + */ + if (idev->msg_err) { + complete(&idev->msg_complete); + return; + } + + this_msg = idev->msg_queue++; + + if (idev->current_num < (idev->total_num - 1)) { + struct i2c_msg *next_msg = idev->msg_queue; + + idev->restart_needed = next_msg->flags & I2C_M_RD; + } else { + idev->restart_needed = false; + } + + idev->addr = i2c_8bit_addr_from_msg(this_msg); + idev->msg_len = this_msg->len; + idev->buf = this_msg->buf; + + ctrl = readb(idev->base + CORE_I2C_CTRL); + ctrl |= CTRL_STA; + writeb(ctrl, idev->base + CORE_I2C_CTRL); + + idev->current_num++; +} + static irqreturn_t mchp_corei2c_handle_isr(struct mchp_corei2c_dev *idev) { u32 status = idev->isr_status; @@ -247,10 +296,14 @@ static irqreturn_t mchp_corei2c_handle_isr(struct mchp_corei2c_dev *idev) break; case STATUS_M_SLAW_ACK: case STATUS_M_TX_DATA_ACK: - if (idev->msg_len > 0) + if (idev->msg_len > 0) { mchp_corei2c_fill_tx(idev); - else - last_byte = true; + } else { + if (idev->restart_needed) + finished = true; + else + last_byte = true; + } break; case STATUS_M_TX_DATA_NACK: case STATUS_M_SLAR_NACK: @@ -287,7 +340,7 @@ static irqreturn_t mchp_corei2c_handle_isr(struct mchp_corei2c_dev *idev) mchp_corei2c_stop(idev); if (last_byte || finished) - complete(&idev->msg_complete); + mchp_corei2c_next_msg(idev); return IRQ_HANDLED; } @@ -311,21 +364,48 @@ static irqreturn_t mchp_corei2c_isr(int irq, void *_dev) return ret; } -static int mchp_corei2c_xfer_msg(struct mchp_corei2c_dev *idev, - struct i2c_msg *msg) +static int mchp_corei2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, + int num) { - u8 ctrl; + struct mchp_corei2c_dev *idev = i2c_get_adapdata(adap); + struct i2c_msg *this_msg = msgs; unsigned long time_left; - - idev->addr = i2c_8bit_addr_from_msg(msg); - idev->msg_len = msg->len; - idev->buf = msg->buf; - idev->msg_err = 0; - - reinit_completion(&idev->msg_complete); + u8 ctrl; mchp_corei2c_core_enable(idev); + /* + * The isr controls the flow of a transfer, this info needs to be saved + * to a location that it can access the queue information from. + */ + idev->restart_needed = false; + idev->msg_queue = msgs; + idev->total_num = num; + idev->current_num = 0; + + /* + * But the first entry to the isr is triggered by the start in this + * function, so the first message needs to be "dequeued". + */ + idev->addr = i2c_8bit_addr_from_msg(this_msg); + idev->msg_len = this_msg->len; + idev->buf = this_msg->buf; + idev->msg_err = 0; + + if (idev->total_num > 1) { + struct i2c_msg *next_msg = msgs + 1; + + idev->restart_needed = next_msg->flags & I2C_M_RD; + } + + idev->current_num++; + idev->msg_queue++; + + reinit_completion(&idev->msg_complete); + + /* + * Send the first start to pass control to the isr + */ ctrl = readb(idev->base + CORE_I2C_CTRL); ctrl |= CTRL_STA; writeb(ctrl, idev->base + CORE_I2C_CTRL); @@ -335,20 +415,8 @@ static int mchp_corei2c_xfer_msg(struct mchp_corei2c_dev *idev, if (!time_left) return -ETIMEDOUT; - return idev->msg_err; -} - -static int mchp_corei2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, - int num) -{ - struct mchp_corei2c_dev *idev = i2c_get_adapdata(adap); - int i, ret; - - for (i = 0; i < num; i++) { - ret = mchp_corei2c_xfer_msg(idev, msgs++); - if (ret) - return ret; - } + if (idev->msg_err) + return idev->msg_err; return num; } -- 2.45.2

11 months, 1 week

1
0
0 0

[PATCH v2] virtio: fix reference leak in register_virtio_device()

by Ma Ke

Once device_add(&dev->dev) failed, call put_device() to explicitly release dev->dev. Or it could cause double free problem. As comment of device_add() says, 'if device_add() succeeds, you should call device_del() when you want to get rid of it. If device_add() has not succeeded, use only put_device() to drop the reference count'. Found by code review. Cc: stable(a)vger.kernel.org Fixes: f2b44cde7e16 ("virtio: split device_register into device_initialize and device_add") Signed-off-by: Ma Ke <make_ruc2021(a)163.com> --- Changes in v2: - modified the bug description to make it more clear; - changed the Fixes tag. --- drivers/virtio/virtio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index b9095751e43b..ac721b5597e8 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -503,6 +503,7 @@ int register_virtio_device(struct virtio_device *dev) out_of_node_put: of_node_put(dev->dev.of_node); + put_device(&dev->dev); out_ida_remove: ida_free(&virtio_index_ida, dev->index); out: -- 2.25.1

11 months, 1 week

3
2
0 0

[PATCH] objtool: add bch2_trans_unlocked_error to bcachefs noreturns.

by chenchangcheng

fs/bcachefs/btree_trans_commit.o: warning: objtool: bch2_trans_commit_write_locked.isra.0() falls through to next function do_bch2_trans_commit.isra.0() fs/bcachefs/btree_trans_commit.o: warning: objtool: .text: unexpected end of section ...... fs/bcachefs/btree_update.o: warning: objtool: bch2_trans_update_get_key_cache() falls through to next function flush_new_cached_update() fs/bcachefs/btree_update.o: warning: objtool: flush_new_cached_update() falls through to next function bch2_trans_update_by_path() Signed-off-by: chenchangcheng <ccc194101(a)163.com> --- tools/objtool/noreturns.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index f37614cc2c1b..88a0fa8807be 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -49,3 +49,4 @@ NORETURN(x86_64_start_kernel) NORETURN(x86_64_start_reservations) NORETURN(xen_cpu_bringup_again) NORETURN(xen_start_kernel) +NORETURN(bch2_trans_unlocked_error) -- 2.25.1

11 months, 1 week

1
0
0 0

[PATCH V7] mm, compaction: don't use ALLOC_CMA for unmovable allocations

by yangge1116＠126.com

From: yangge <yangge1116(a)126.com> Since commit 984fdba6a32e ("mm, compaction: use proper alloc_flags in __compaction_suitable()") allow compaction to proceed when free pages required for compaction reside in the CMA pageblocks, it's possible that __compaction_suitable() always returns true, and in some cases, it's not acceptable. There are 4 NUMA nodes on my machine, and each NUMA node has 32GB of memory. I have configured 16GB of CMA memory on each NUMA node, and starting a 32GB virtual machine with device passthrough is extremely slow, taking almost an hour. During the start-up of the virtual machine, it will call pin_user_pages_remote(..., FOLL_LONGTERM, ...) to allocate memory. Long term GUP cannot allocate memory from CMA area, so a maximum of 16 GB of no-CMA memory on a NUMA node can be used as virtual machine memory. Since there is 16G of free CMA memory on the NUMA node, watermark for order-0 always be met for compaction, so __compaction_suitable() always returns true, even if the node is unable to allocate non-CMA memory for the virtual machine. For costly allocations, because __compaction_suitable() always returns true, __alloc_pages_slowpath() can't exit at the appropriate place, resulting in excessively long virtual machine startup times. Call trace: __alloc_pages_slowpath if (compact_result == COMPACT_SKIPPED || compact_result == COMPACT_DEFERRED) goto nopage; // should exit __alloc_pages_slowpath() from here Other unmovable alloctions, like dma_buf, which can be large in a Linux system, are also unable to allocate memory from CMA, and these allocations suffer from the same problems described above. In order to quickly fall back to remote node, we should remove ALLOC_CMA both in __compaction_suitable() and __isolate_free_page() for unmovable alloctions. After this fix, starting a 32GB virtual machine with device passthrough takes only a few seconds. Fixes: 984fdba6a32e ("mm, compaction: use proper alloc_flags in __compaction_suitable()") Cc: <stable(a)vger.kernel.org> Signed-off-by: yangge <yangge1116(a)126.com> Reviewed-by: Baolin Wang <baolin.wang(a)linux.alibaba.com> --- V7: -- fix the changelog and code documentation V6: -- update cc->alloc_flags to keep the original loginc V5: - add 'alloc_flags' parameter for __isolate_free_page() - remove 'usa_cma' variable V4: - rich the commit log description V3: - fix build errors - add ALLOC_CMA both in should_continue_reclaim() and compaction_ready() V2: - using the 'cc->alloc_flags' to determin if 'ALLOC_CMA' is needed - rich the commit log description include/linux/compaction.h | 6 ++++-- mm/compaction.c | 26 +++++++++++++++----------- mm/internal.h | 3 ++- mm/page_alloc.c | 7 +++++-- mm/page_isolation.c | 3 ++- mm/page_reporting.c | 2 +- mm/vmscan.c | 4 ++-- 7 files changed, 31 insertions(+), 20 deletions(-) diff --git a/include/linux/compaction.h b/include/linux/compaction.h index e947764..b4c3ac3 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -90,7 +90,8 @@ extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, struct page **page); extern void reset_isolation_suitable(pg_data_t *pgdat); extern bool compaction_suitable(struct zone *zone, int order, - int highest_zoneidx); + int highest_zoneidx, + unsigned int alloc_flags); extern void compaction_defer_reset(struct zone *zone, int order, bool alloc_success); @@ -108,7 +109,8 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat) } static inline bool compaction_suitable(struct zone *zone, int order, - int highest_zoneidx) + int highest_zoneidx, + unsigned int alloc_flags) { return false; } diff --git a/mm/compaction.c b/mm/compaction.c index 07bd227..223f2da 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -655,7 +655,7 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, /* Found a free page, will break it into order-0 pages */ order = buddy_order(page); - isolated = __isolate_free_page(page, order); + isolated = __isolate_free_page(page, order, cc->alloc_flags); if (!isolated) break; set_page_private(page, order); @@ -1634,7 +1634,7 @@ static void fast_isolate_freepages(struct compact_control *cc) /* Isolate the page if available */ if (page) { - if (__isolate_free_page(page, order)) { + if (__isolate_free_page(page, order, cc->alloc_flags)) { set_page_private(page, order); nr_isolated = 1 << order; nr_scanned += nr_isolated - 1; @@ -2381,6 +2381,7 @@ static enum compact_result compact_finished(struct compact_control *cc) static bool __compaction_suitable(struct zone *zone, int order, int highest_zoneidx, + unsigned int alloc_flags, unsigned long wmark_target) { unsigned long watermark; @@ -2395,25 +2396,26 @@ static bool __compaction_suitable(struct zone *zone, int order, * even if compaction succeeds. * For costly orders, we require low watermark instead of min for * compaction to proceed to increase its chances. - * ALLOC_CMA is used, as pages in CMA pageblocks are considered - * suitable migration targets + * In addition to unmovable allocations, ALLOC_CMA is used, as pages in + * CMA pageblocks are considered suitable migration targets */ watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ? low_wmark_pages(zone) : min_wmark_pages(zone); watermark += compact_gap(order); return __zone_watermark_ok(zone, 0, watermark, highest_zoneidx, - ALLOC_CMA, wmark_target); + alloc_flags & ALLOC_CMA, wmark_target); } /* * compaction_suitable: Is this suitable to run compaction on this zone now? */ -bool compaction_suitable(struct zone *zone, int order, int highest_zoneidx) +bool compaction_suitable(struct zone *zone, int order, int highest_zoneidx, + unsigned int alloc_flags) { enum compact_result compact_result; bool suitable; - suitable = __compaction_suitable(zone, order, highest_zoneidx, + suitable = __compaction_suitable(zone, order, highest_zoneidx, alloc_flags, zone_page_state(zone, NR_FREE_PAGES)); /* * fragmentation index determines if allocation failures are due to @@ -2474,7 +2476,7 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order, available = zone_reclaimable_pages(zone) / order; available += zone_page_state_snapshot(zone, NR_FREE_PAGES); if (__compaction_suitable(zone, order, ac->highest_zoneidx, - available)) + alloc_flags, available)) return true; } @@ -2499,7 +2501,7 @@ compaction_suit_allocation_order(struct zone *zone, unsigned int order, alloc_flags)) return COMPACT_SUCCESS; - if (!compaction_suitable(zone, order, highest_zoneidx)) + if (!compaction_suitable(zone, order, highest_zoneidx, alloc_flags)) return COMPACT_SKIPPED; return COMPACT_CONTINUE; @@ -2893,6 +2895,7 @@ static int compact_node(pg_data_t *pgdat, bool proactive) struct compact_control cc = { .order = -1, .mode = proactive ? MIGRATE_SYNC_LIGHT : MIGRATE_SYNC, + .alloc_flags = ALLOC_CMA, .ignore_skip_hint = true, .whole_zone = true, .gfp_mask = GFP_KERNEL, @@ -3037,7 +3040,7 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat) ret = compaction_suit_allocation_order(zone, pgdat->kcompactd_max_order, - highest_zoneidx, ALLOC_WMARK_MIN); + highest_zoneidx, ALLOC_CMA | ALLOC_WMARK_MIN); if (ret == COMPACT_CONTINUE) return true; } @@ -3058,6 +3061,7 @@ static void kcompactd_do_work(pg_data_t *pgdat) .search_order = pgdat->kcompactd_max_order, .highest_zoneidx = pgdat->kcompactd_highest_zoneidx, .mode = MIGRATE_SYNC_LIGHT, + .alloc_flags = ALLOC_CMA | ALLOC_WMARK_MIN, .ignore_skip_hint = false, .gfp_mask = GFP_KERNEL, }; @@ -3078,7 +3082,7 @@ static void kcompactd_do_work(pg_data_t *pgdat) continue; ret = compaction_suit_allocation_order(zone, - cc.order, zoneid, ALLOC_WMARK_MIN); + cc.order, zoneid, cc.alloc_flags); if (ret != COMPACT_CONTINUE) continue; diff --git a/mm/internal.h b/mm/internal.h index 3922788..6d257c8 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -662,7 +662,8 @@ static inline void clear_zone_contiguous(struct zone *zone) zone->contiguous = false; } -extern int __isolate_free_page(struct page *page, unsigned int order); +extern int __isolate_free_page(struct page *page, unsigned int order, + unsigned int alloc_flags); extern void __putback_isolated_page(struct page *page, unsigned int order, int mt); extern void memblock_free_pages(struct page *page, unsigned long pfn, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dde19db..1bfdca3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2809,7 +2809,8 @@ void split_page(struct page *page, unsigned int order) } EXPORT_SYMBOL_GPL(split_page); -int __isolate_free_page(struct page *page, unsigned int order) +int __isolate_free_page(struct page *page, unsigned int order, + unsigned int alloc_flags) { struct zone *zone = page_zone(page); int mt = get_pageblock_migratetype(page); @@ -2823,7 +2824,8 @@ int __isolate_free_page(struct page *page, unsigned int order) * exists. */ watermark = zone->_watermark[WMARK_MIN] + (1UL << order); - if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA)) + if (!zone_watermark_ok(zone, 0, watermark, 0, + alloc_flags & ALLOC_CMA)) return 0; } @@ -6454,6 +6456,7 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end, .order = -1, .zone = page_zone(pfn_to_page(start)), .mode = MIGRATE_SYNC, + .alloc_flags = ALLOC_CMA, .ignore_skip_hint = true, .no_set_skip_hint = true, .alloc_contig = true, diff --git a/mm/page_isolation.c b/mm/page_isolation.c index c608e9d..a1f2c79 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -229,7 +229,8 @@ static void unset_migratetype_isolate(struct page *page, int migratetype) buddy = find_buddy_page_pfn(page, page_to_pfn(page), order, NULL); if (buddy && !is_migrate_isolate_page(buddy)) { - isolated_page = !!__isolate_free_page(page, order); + isolated_page = !!__isolate_free_page(page, order, + ALLOC_CMA); /* * Isolating a free page in an isolated pageblock * is expected to always work as watermarks don't diff --git a/mm/page_reporting.c b/mm/page_reporting.c index e4c428e..fd3813b 100644 --- a/mm/page_reporting.c +++ b/mm/page_reporting.c @@ -198,7 +198,7 @@ page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone, /* Attempt to pull page from list and place in scatterlist */ if (*offset) { - if (!__isolate_free_page(page, order)) { + if (!__isolate_free_page(page, order, ALLOC_CMA)) { next = page; break; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 5e03a61..33f5b46 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -5815,7 +5815,7 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, sc->reclaim_idx, 0)) return false; - if (compaction_suitable(zone, sc->order, sc->reclaim_idx)) + if (compaction_suitable(zone, sc->order, sc->reclaim_idx, ALLOC_CMA)) return false; } @@ -6043,7 +6043,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) return true; /* Compaction cannot yet proceed. Do reclaim. */ - if (!compaction_suitable(zone, sc->order, sc->reclaim_idx)) + if (!compaction_suitable(zone, sc->order, sc->reclaim_idx, ALLOC_CMA)) return false; /* -- 2.7.4

11 months, 1 week

4
6
0 0

[BUG stable 6.6] kernel crash in BPF selftests dummy_st_ops

by Shung-Hsi Yu

(Maybe a good first-timer bug if anyone wants to try contributing during the holiday seasons) The stable v6.6 kernel currently runs into kernel panic when running the test_progs tests from BPF selftests. Judging by the log it is failing in one of the dummy_st_ops tests (which comes after deny_namespace tests if you look at the output of `test_progs -l`). My guess is that it is related to "check bpf_dummy_struct_ops program params for test runs"[1], perhaps we're missing a commit or two. Some notes for anyone tackling this for the first time: 1. You'll need to use the stable/linux-6.6.y branch from https://github.com/shunghsiyu/bpf. The current v6.6.66 one fails at compiling of BPF selftests[2] 2. The easiest way to run BPF selftests is to got relevant dependencies[3] installed, and run tools/testing/selftests/bpf/vmtest.sh (need to give it `-i` to download the root image first, and also might need to specify clang and llvm-strip by setting environmental variable CLANG=clang-17 and LLVM_STRIP=llvm-strip-17, respectively). For a more solid setup, see materials[4][5] from Manu Bretelle 3. Patch(es) should be send to stable(a)vger.kernel.org, following the stable process[6], see [2] as an example Below is the output from vmtest.sh: #68/1 deny_namespace/unpriv_userns_create_no_bpf:OK #68/2 deny_namespace/userns_create_bpf:OK #68 deny_namespace:OK [ 26.829153] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 26.831136] #PF: supervisor read access in kernel mode [ 26.832635] #PF: error_code(0x0000) - not-present page [ 26.833999] PGD 0 P4D 0 [ 26.834771] Oops: 0000 [#1] PREEMPT SMP PTI [ 26.835997] CPU: 2 PID: 119 Comm: test_progs Tainted: G OE 6.6.66-00003-gd80551078e71 #3 [ 26.838774] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 [ 26.841152] RIP: 0010:bpf_prog_8ee9cbe7c9b5a50f_test_1+0x17/0x24 [ 26.842877] Code: 00 00 00 cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc f3 0f 1e fa 0f 1f 44 00 00 66 90 55 48 89 e5 f3 0f 1e fa 48 8b 7f 00 <8b> 47 00 be 5a 00 00 00 89 77 00 c9 c3 cc cc cc cc cc cc cc cc c0 [ 26.847953] RSP: 0018:ffff9e6b803b7d88 EFLAGS: 00010202 [ 26.849425] RAX: 0000000000000001 RBX: 0000000000000001 RCX: 2845e103d7dffb60 [ 26.851483] RDX: 0000000000000000 RSI: 0000000084d09025 RDI: 0000000000000000 [ 26.853508] RBP: ffff9e6b803b7d88 R08: 0000000000000001 R09: 0000000000000000 [ 26.855670] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9754c0b5f700 [ 26.857824] R13: ffff9754c09cc800 R14: ffff9754c0b5f680 R15: ffff9754c0b5f760 [ 26.859741] FS: 00007f77dee12740(0000) GS:ffff9754fbc80000(0000) knlGS:0000000000000000 [ 26.862087] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 26.863705] CR2: 0000000000000000 CR3: 00000001020e6003 CR4: 0000000000170ee0 [ 26.865689] Call Trace: [ 26.866407] <TASK> [ 26.866982] ? __die+0x24/0x70 [ 26.867774] ? page_fault_oops+0x15b/0x450 [ 26.868882] ? search_bpf_extables+0xb0/0x160 [ 26.870076] ? fixup_exception+0x26/0x330 [ 26.871214] ? exc_page_fault+0x64/0x190 [ 26.872293] ? asm_exc_page_fault+0x26/0x30 [ 26.873352] ? bpf_prog_8ee9cbe7c9b5a50f_test_1+0x17/0x24 [ 26.874705] ? __bpf_prog_enter+0x3f/0xc0 [ 26.875718] ? bpf_struct_ops_test_run+0x1b8/0x2c0 [ 26.876942] ? __sys_bpf+0xc4e/0x2c30 [ 26.877898] ? __x64_sys_bpf+0x20/0x30 [ 26.878812] ? do_syscall_64+0x37/0x90 [ 26.879704] ? entry_SYSCALL_64_after_hwframe+0x78/0xe2 [ 26.880918] </TASK> [ 26.881409] Modules linked in: bpf_testmod(OE) [last unloaded: bpf_testmod(OE)] [ 26.883095] CR2: 0000000000000000 [ 26.883934] ---[ end trace 0000000000000000 ]--- [ 26.885099] RIP: 0010:bpf_prog_8ee9cbe7c9b5a50f_test_1+0x17/0x24 [ 26.886452] Code: 00 00 00 cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc f3 0f 1e fa 0f 1f 44 00 00 66 90 55 48 89 e5 f3 0f 1e fa 48 8b 7f 00 <8b> 47 00 be 5a 00 00 00 89 77 00 c9 c3 cc cc cc cc cc cc cc cc c0 [ 26.890379] RSP: 0018:ffff9e6b803b7d88 EFLAGS: 00010202 [ 26.891450] RAX: 0000000000000001 RBX: 0000000000000001 RCX: 2845e103d7dffb60 [ 26.892779] RDX: 0000000000000000 RSI: 0000000084d09025 RDI: 0000000000000000 [ 26.894254] RBP: ffff9e6b803b7d88 R08: 0000000000000001 R09: 0000000000000000 [ 26.895630] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9754c0b5f700 [ 26.897008] R13: ffff9754c09cc800 R14: ffff9754c0b5f680 R15: ffff9754c0b5f760 [ 26.898337] FS: 00007f77dee12740(0000) GS:ffff9754fbc80000(0000) knlGS:0000000000000000 [ 26.899972] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 26.901076] CR2: 0000000000000000 CR3: 00000001020e6003 CR4: 0000000000170ee0 [ 26.902336] Kernel panic - not syncing: Fatal exception [ 26.903639] Kernel Offset: 0x36000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 26.905693] ---[ end Kernel panic - not syncing: Fatal exception ]--- 1: https://lore.kernel.org/all/20240424012821.595216-1-eddyz87@gmail.com/t/#u 2: https://lore.kernel.org/all/20241217080240.46699-1-shung-hsi.yu@suse.com/t/… 3: https://gist.github.com/shunghsiyu/1bd4189654cce5b3e55c2ab8da7dd33d#file-vm… 4: https://chantra.github.io/bpfcitools/bpf-local-development.html 5: http://oldvger.kernel.org/bpfconf2024_material/BPF-dev-hacks.pdf 6: https://www.kernel.org/doc/html/latest/process/stable-kernel-rules.html

11 months, 1 week

2
2
0 0

[PATCH v3] RDMA/srp: Fix error handling in srp_add_port

by Ma Ke

If device_add() fails, call only put_device() to decrement reference count for cleanup. Do not call device_del() before put_device(). As comment of device_add() says, 'if device_add() succeeds, you should call device_del() when you want to get rid of it. If device_add() has not succeeded, use only put_device() to drop the reference count'. Found by code review. Cc: stable(a)vger.kernel.org Fixes: c8e4c2397655 ("RDMA/srp: Rework the srp_add_port() error path") Signed-off-by: Ma Ke <make_ruc2021(a)163.com> --- Changes in v3: - modified the bug description as suggestions; - added a blank line to separate the description and the tags. Changes in v2: - modified the bug description as suggestions. --- drivers/infiniband/ulp/srp/ib_srp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 2916e77f589b..7289ae0b83ac 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3978,7 +3978,6 @@ static struct srp_host *srp_add_port(struct srp_device *device, u32 port) return host; put_host: - device_del(&host->dev); put_device(&host->dev); return NULL; } -- 2.25.1

11 months, 1 week

2
1
0 0

request to backport this patch to v6.6 stable tree

by Hardik Gohil

ethtool: fail closed if we can't get max channel used in indirection tables [ Upstream commit 2899d58462ba868287d6ff3acad3675e7adf934f ]

11 months, 1 week

2
3
0 0

[PATCH v2] usb: fix reference leak in usb_new_device()

by Ma Ke

When device_add(&udev->dev) failed, calling put_device() to explicitly release udev->dev. And the routine which calls usb_new_device() does not call put_device() when an error occurs. As comment of device_add() says, 'if device_add() succeeds, you should call device_del() when you want to get rid of it. If device_add() has not succeeded, use only put_device() to drop the reference count'. Found by code review. Cc: stable(a)vger.kernel.org Fixes: 9f8b17e643fe ("USB: make usbdevices export their device nodes instead of using a separate class") Signed-off-by: Ma Ke <make_ruc2021(a)163.com> --- Changes in v2: - modified the bug description to make it more clear; - added the missed part of the patch. --- drivers/usb/core/hub.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 4b93c0bd1d4b..ddd572312296 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -2651,6 +2651,7 @@ int usb_new_device(struct usb_device *udev) err = device_add(&udev->dev); if (err) { dev_err(&udev->dev, "can't device_add, error %d\n", err); + put_device(&udev->dev); goto fail; } @@ -2663,13 +2664,13 @@ int usb_new_device(struct usb_device *udev) err = sysfs_create_link(&udev->dev.kobj, &port_dev->dev.kobj, "port"); if (err) - goto fail; + goto out_del_dev; err = sysfs_create_link(&port_dev->dev.kobj, &udev->dev.kobj, "device"); if (err) { sysfs_remove_link(&udev->dev.kobj, "port"); - goto fail; + goto out_del_dev; } if (!test_and_set_bit(port1, hub->child_usage_bits)) @@ -2683,6 +2684,9 @@ int usb_new_device(struct usb_device *udev) pm_runtime_put_sync_autosuspend(&udev->dev); return err; +out_del_dev: + device_del(&udev->dev); + put_device(&udev->dev); fail: usb_set_device_state(udev, USB_STATE_NOTATTACHED); pm_runtime_disable(&udev->dev); -- 2.25.1

11 months, 1 week

2
2
0 0

[PATCH 1/3] ring-buffer: Add uname to match criteria for persistent ring buffer

by Steven Rostedt

From: Steven Rostedt <rostedt(a)goodmis.org> The persistent ring buffer can live across boots. It is expected that the content in the buffer can be translated to the current kernel with delta offsets even with KASLR enabled. But it can only guarantee this if the content of the ring buffer came from the same kernel as the one that is currently running. Add uname into the meta data and if the uname in the meta data from the previous boot does not match the uname of the current boot, then clear the buffer and re-initialize it. This only handles the case of kernel versions. It does not clear the buffer for development. There's several mechanisms to keep bad data from crashing the kernel. The worse that can happen is some corrupt data may be displayed. Cc: stable(a)vger.kernel.org Fixes: 8f3e6659656e6 ("ring-buffer: Save text and data locations in mapped meta data") Signed-off-by: Steven Rostedt (Google) <rostedt(a)goodmis.org> --- kernel/trace/ring_buffer.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 7e257e855dd1..3c94c59d000c 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -17,6 +17,7 @@ #include <linux/uaccess.h> #include <linux/hardirq.h> #include <linux/kthread.h> /* for self test */ +#include <linux/utsname.h> #include <linux/module.h> #include <linux/percpu.h> #include <linux/mutex.h> @@ -45,10 +46,13 @@ static void update_pages_handler(struct work_struct *work); #define RING_BUFFER_META_MAGIC 0xBADFEED +#define UNAME_SZ 64 struct ring_buffer_meta { int magic; int struct_size; + char uname[UNAME_SZ]; + unsigned long text_addr; unsigned long data_addr; unsigned long first_buffer; @@ -1687,6 +1691,11 @@ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu, return false; } + if (strncmp(init_utsname()->release, meta->uname, UNAME_SZ - 1)) { + pr_info("Ring buffer boot meta[%d] mismatch of uname\n", cpu); + return false; + } + /* The subbuffer's size and number of subbuffers must match */ if (meta->subbuf_size != subbuf_size || meta->nr_subbufs != nr_pages + 1) { @@ -1920,6 +1929,7 @@ static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages) meta->magic = RING_BUFFER_META_MAGIC; meta->struct_size = sizeof(*meta); + strscpy(meta->uname, init_utsname()->release, UNAME_SZ); meta->nr_subbufs = nr_pages + 1; meta->subbuf_size = PAGE_SIZE; -- 2.45.2

11 months, 1 week

4
24
0 0

2025

2024

2023

2022

2021

2020

2019

2018

2017

Linux-stable-mirror