From: Konstantin Komarov almaz.alexandrovich@paragon-software.com
[ Upstream commit 22457c047ed971f2f2e33be593ddfabd9639a409 ]
Unfortunately reparse attribute is used for many purposes (several dozens). It is not possible here to know is this name symlink or not. To get exactly the type of name we should to open inode (read mft). getattr for opened file (fstat) correctly returns symlink.
Signed-off-by: Konstantin Komarov almaz.alexandrovich@paragon-software.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ntfs3/dir.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-)
diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index d4d9f4ffb6d9..c2fb76bb28f4 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -309,11 +309,31 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, return 0; }
- /* NTFS: symlinks are "dir + reparse" or "file + reparse" */ - if (fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) - dt_type = DT_LNK; - else - dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + /* + * NTFS: symlinks are "dir + reparse" or "file + reparse" + * Unfortunately reparse attribute is used for many purposes (several dozens). + * It is not possible here to know is this name symlink or not. + * To get exactly the type of name we should to open inode (read mft). + * getattr for opened file (fstat) correctly returns symlink. + */ + dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + + /* + * It is not reliable to detect the type of name using duplicated information + * stored in parent directory. + * The only correct way to get the type of name - read MFT record and find ATTR_STD. + * The code below is not good idea. + * It does additional locks/reads just to get the type of name. + * Should we use additional mount option to enable branch below? + */ + if ((fname->dup.fa & FILE_ATTRIBUTE_REPARSE_POINT) && + ino != ni->mi.rno) { + struct inode *inode = ntfs_iget5(sbi->sb, &e->ref, NULL); + if (!IS_ERR_OR_NULL(inode)) { + dt_type = fs_umode_to_dtype(inode->i_mode); + iput(inode); + } + }
return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); }
From: Konstantin Komarov almaz.alexandrovich@paragon-software.com
[ Upstream commit 6a799c928b78b14999b7705c4cca0f88e297fe96 ]
Signed-off-by: Konstantin Komarov almaz.alexandrovich@paragon-software.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ntfs3/dir.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-)
diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index c2fb76bb28f4..72cdfa8727d3 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -515,11 +515,9 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, struct INDEX_HDR *hdr; const struct ATTR_FILE_NAME *fname; u32 e_size, off, end; - u64 vbo = 0; size_t drs = 0, fles = 0, bit = 0; - loff_t i_size = ni->vfs_inode.i_size; struct indx_node *node = NULL; - u8 index_bits = ni->dir.index_bits; + size_t max_indx = ni->vfs_inode.i_size >> ni->dir.index_bits;
if (is_empty) *is_empty = true; @@ -563,7 +561,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, fles += 1; }
- if (vbo >= i_size) + if (bit >= max_indx) goto out;
err = indx_used_bit(&ni->dir, ni, &bit); @@ -573,8 +571,7 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, if (bit == MINUS_ONE_T) goto out;
- vbo = (u64)bit << index_bits; - if (vbo >= i_size) + if (bit >= max_indx) goto out;
err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits, @@ -584,7 +581,6 @@ static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs,
hdr = &node->index->ihdr; bit += 1; - vbo = (u64)bit << ni->dir.idx2vbn_bits; }
out:
From: Konstantin Komarov almaz.alexandrovich@paragon-software.com
[ Upstream commit 1918c10e137eae266b8eb0ab1cc14421dcb0e3e2 ]
Signed-off-by: Konstantin Komarov almaz.alexandrovich@paragon-software.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ntfs3/record.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index 938fc286963f..ac43e4a6d57d 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -509,8 +509,20 @@ bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi, return false;
if (ni && is_attr_indexed(attr)) { - le16_add_cpu(&ni->mi.mrec->hard_links, -1); - ni->mi.dirty = true; + u16 links = le16_to_cpu(ni->mi.mrec->hard_links); + struct ATTR_FILE_NAME *fname = + attr->type != ATTR_NAME ? + NULL : + resident_data_ex(attr, + SIZEOF_ATTRIBUTE_FILENAME); + if (fname && fname->type == FILE_NAME_DOS) { + /* Do not decrease links count deleting DOS name. */ + } else if (!links) { + /* minor error. Not critical. */ + } else { + ni->mi.mrec->hard_links = cpu_to_le16(links - 1); + ni->mi.dirty = true; + } }
used -= asize;
From: Konstantin Komarov almaz.alexandrovich@paragon-software.com
[ Upstream commit 85ba2a75faee759809a7e43b4c103ac59bac1026 ]
Signed-off-by: Konstantin Komarov almaz.alexandrovich@paragon-software.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ntfs3/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 176b04a5d1ad..0ff673bb4b2b 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -402,7 +402,6 @@ static struct inode *ntfs_read_mft(struct inode *inode, goto out;
if (!is_match && name) { - /* Reuse rec as buffer for ascii name. */ err = -ENOENT; goto out; } @@ -417,6 +416,7 @@ static struct inode *ntfs_read_mft(struct inode *inode,
if (names != le16_to_cpu(rec->hard_links)) { /* Correct minor error on the fly. Do not mark inode as dirty. */ + ntfs_inode_warn(inode, "Correct links count -> %u.", names); rec->hard_links = cpu_to_le16(names); ni->mi.dirty = true; }
From: Konstantin Komarov almaz.alexandrovich@paragon-software.com
[ Upstream commit d155617006ebc172a80d3eb013c4b867f9a8ada4 ]
Signed-off-by: Konstantin Komarov almaz.alexandrovich@paragon-software.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ntfs3/ntfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 0f38d558169a..8b580515b1d6 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -517,7 +517,7 @@ struct ATTR_LIST_ENTRY { __le64 vcn; // 0x08: Starting VCN of this attribute. struct MFT_REF ref; // 0x10: MFT record number with attribute. __le16 id; // 0x18: struct ATTRIB ID. - __le16 name[3]; // 0x1A: Just to align. To get real name can use bNameOffset. + __le16 name[]; // 0x1A: Just to align. To get real name can use name_off.
}; // sizeof(0x20)
From: Konstantin Komarov almaz.alexandrovich@paragon-software.com
[ Upstream commit aaab47f204aaf47838241d57bf8662c8840de60a ]
It is preferable to exit through the out: label because internal debugging functions are located there.
Signed-off-by: Konstantin Komarov almaz.alexandrovich@paragon-software.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ntfs3/attrib.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-)
diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 1d5ac2164d94..64a6d255c468 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -1583,8 +1583,10 @@ int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); - if (!attr_b) - return -ENOENT; + if (!attr_b) { + err = -ENOENT; + goto out; + }
attr = attr_b; le = le_b; @@ -1665,13 +1667,15 @@ int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, ok: run_truncate_around(run, vcn); out: - if (new_valid > data_size) - new_valid = data_size; + if (attr_b) { + if (new_valid > data_size) + new_valid = data_size;
- valid_size = le64_to_cpu(attr_b->nres.valid_size); - if (new_valid != valid_size) { - attr_b->nres.valid_size = cpu_to_le64(valid_size); - mi_b->dirty = true; + valid_size = le64_to_cpu(attr_b->nres.valid_size); + if (new_valid != valid_size) { + attr_b->nres.valid_size = cpu_to_le64(valid_size); + mi_b->dirty = true; + } }
return err;
From: Konstantin Komarov almaz.alexandrovich@paragon-software.com
[ Upstream commit 4cdfb6e7bc9c80142d33bf1d4653a73fa678ba56 ]
The use of sizeof(struct ATTR_LIST_ENTRY) has been replaced with le_size(0) due to alignment peculiarities on different platforms.
Reported-by: kernel test robot lkp@intel.com Closes: https://lore.kernel.org/oe-kbuild-all/202312071005.g6YrbaIe-lkp@intel.com/ Signed-off-by: Konstantin Komarov almaz.alexandrovich@paragon-software.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ntfs3/attrlist.c | 8 ++++---- fs/ntfs3/ntfs.h | 2 -- 2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index 0c6a68e71e7d..723e49ec83ce 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -127,12 +127,13 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, { size_t off; u16 sz; + const unsigned le_min_size = le_size(0);
if (!le) { le = ni->attr_list.le; } else { sz = le16_to_cpu(le->size); - if (sz < sizeof(struct ATTR_LIST_ENTRY)) { + if (sz < le_min_size) { /* Impossible 'cause we should not return such le. */ return NULL; } @@ -141,7 +142,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni,
/* Check boundary. */ off = PtrOffset(ni->attr_list.le, le); - if (off + sizeof(struct ATTR_LIST_ENTRY) > ni->attr_list.size) { + if (off + le_min_size > ni->attr_list.size) { /* The regular end of list. */ return NULL; } @@ -149,8 +150,7 @@ struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, sz = le16_to_cpu(le->size);
/* Check le for errors. */ - if (sz < sizeof(struct ATTR_LIST_ENTRY) || - off + sz > ni->attr_list.size || + if (sz < le_min_size || off + sz > ni->attr_list.size || sz < le->name_off + le->name_len * sizeof(short)) { return NULL; } diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 8b580515b1d6..ba26a465b309 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -521,8 +521,6 @@ struct ATTR_LIST_ENTRY {
}; // sizeof(0x20)
-static_assert(sizeof(struct ATTR_LIST_ENTRY) == 0x20); - static inline u32 le_size(u8 name_len) { return ALIGN(offsetof(struct ATTR_LIST_ENTRY, name) +
From: Ism Hong ism.hong@gmail.com
[ Upstream commit d6d33f03baa43d763fe094ca926eeae7d3421d07 ]
Since the buffer cache for ntfs3 metadata is not released until the file system is unmounted, allocating from the movable zone may result in cma allocation failures. This is due to the page still being used by ntfs3, leading to migration failures.
To address this, this commit use sb_bread_umovable() instead of sb_bread(). This change prevents allocation from the movable zone, ensuring compatibility with scenarios where the buffer head is not released until unmount. This patch is inspired by commit a8ac900b8163("ext4: use non-movable memory for the ext4 superblock").
The issue is found when playing video files stored in NTFS on the Android TV platform. During this process, the media parser reads the video file, causing ntfs3 to allocate buffer cache from the CMA area. Subsequently, the hardware decoder attempts to allocate memory from the same CMA area. However, the page is still in use by ntfs3, resulting in a migrate failure in alloc_contig_range().
The pinned page and allocating stacktrace reported by page owner shows below:
page:ffffffff00b68880 refcount:3 mapcount:0 mapping:ffffff80046aa828 index:0xc0040 pfn:0x20fa4 aops:def_blk_aops ino:0 flags: 0x2020(active|private) page dumped because: migration failure page last allocated via order 0, migratetype Movable, gfp_mask 0x108c48 (GFP_NOFS|__GFP_NOFAIL|__GFP_HARDWALL|__GFP_MOVABLE), page_owner tracks the page as allocated prep_new_page get_page_from_freelist __alloc_pages_nodemask pagecache_get_page __getblk_gfp __bread_gfp ntfs_read_run_nb ntfs_read_bh mi_read ntfs_iget5 dir_search_u ntfs_lookup __lookup_slow lookup_slow walk_component path_lookupat
Signed-off-by: Ism Hong ism.hong@gmail.com Signed-off-by: Konstantin Komarov almaz.alexandrovich@paragon-software.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ntfs3/ntfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 510ed2ea1c48..713b7e3888bc 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -1014,7 +1014,7 @@ static inline u64 bytes_to_block(const struct super_block *sb, u64 size) static inline struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block) { - struct buffer_head *bh = sb_bread(sb, block); + struct buffer_head *bh = sb_bread_unmovable(sb, block);
if (bh) return bh;
From: Konstantin Komarov almaz.alexandrovich@paragon-software.com
[ Upstream commit 5ca87d01eba7bdfe9536a157ca33c1455bb8d16c ]
It used in test environment.
Signed-off-by: Konstantin Komarov almaz.alexandrovich@paragon-software.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ntfs3/fsntfs.c | 24 ++++++++++++++++++++++++ fs/ntfs3/ntfs_fs.h | 14 +------------- 2 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index 4413f6da68e6..ff6f1f462b21 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -981,6 +981,30 @@ static inline __le32 security_hash(const void *sd, size_t bytes) return cpu_to_le32(hash); }
+/* + * simple wrapper for sb_bread_unmovable. + */ +struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block) +{ + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct buffer_head *bh; + + if (unlikely(block >= sbi->volume.blocks)) { + /* prevent generic message "attempt to access beyond end of device" */ + ntfs_err(sb, "try to read out of volume at offset 0x%llx", + (u64)block << sb->s_blocksize_bits); + return NULL; + } + + bh = sb_bread_unmovable(sb, block); + if (bh) + return bh; + + ntfs_err(sb, "failed to read volume at offset 0x%llx", + (u64)block << sb->s_blocksize_bits); + return NULL; +} + int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer) { struct block_device *bdev = sb->s_bdev; diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 713b7e3888bc..f618d63ce797 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -581,6 +581,7 @@ bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes); int log_replay(struct ntfs_inode *ni, bool *initialized);
/* Globals from fsntfs.c */ +struct buffer_head *ntfs_bread(struct super_block *sb, sector_t block); bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes); int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes, bool simple); @@ -1011,19 +1012,6 @@ static inline u64 bytes_to_block(const struct super_block *sb, u64 size) return (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; }
-static inline struct buffer_head *ntfs_bread(struct super_block *sb, - sector_t block) -{ - struct buffer_head *bh = sb_bread_unmovable(sb, block); - - if (bh) - return bh; - - ntfs_err(sb, "failed to read volume at offset 0x%llx", - (u64)block << sb->s_blocksize_bits); - return NULL; -} - static inline struct ntfs_inode *ntfs_i(struct inode *inode) { return container_of(inode, struct ntfs_inode, vfs_inode);
From: Konstantin Komarov almaz.alexandrovich@paragon-software.com
[ Upstream commit 1b7dd28e14c4728ae1a815605ca33ffb4ce1b309 ]
Reported-by: Robert Morris rtm@csail.mit.edu Signed-off-by: Konstantin Komarov almaz.alexandrovich@paragon-software.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ntfs3/fslog.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 8b95c06e5a4c..6ba1357f3ed4 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -465,7 +465,7 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr) { const struct RESTART_AREA *ra; u16 cl, fl, ul; - u32 off, l_size, file_dat_bits, file_size_round; + u32 off, l_size, seq_bits; u16 ro = le16_to_cpu(rhdr->ra_off); u32 sys_page = le32_to_cpu(rhdr->sys_page_size);
@@ -511,13 +511,15 @@ static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr) /* Make sure the sequence number bits match the log file size. */ l_size = le64_to_cpu(ra->l_size);
- file_dat_bits = sizeof(u64) * 8 - le32_to_cpu(ra->seq_num_bits); - file_size_round = 1u << (file_dat_bits + 3); - if (file_size_round != l_size && - (file_size_round < l_size || (file_size_round / 2) > l_size)) { - return false; + seq_bits = sizeof(u64) * 8 + 3; + while (l_size) { + l_size >>= 1; + seq_bits -= 1; }
+ if (seq_bits != ra->seq_num_bits) + return false; + /* The log page data offset and record header length must be quad-aligned. */ if (!IS_ALIGNED(le16_to_cpu(ra->data_off), 8) || !IS_ALIGNED(le16_to_cpu(ra->rec_hdr_len), 8))
From: Konstantin Komarov almaz.alexandrovich@paragon-software.com
[ Upstream commit d68968440b1a75dee05cfac7f368f1aa139e1911 ]
Reported-by: Giovanni Santini giovannisantini93@yahoo.it Signed-off-by: Konstantin Komarov almaz.alexandrovich@paragon-software.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ntfs3/file.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index c526e0427f2b..6d4f3431bc75 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -1090,6 +1090,8 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) iocb->ki_pos += written; if (iocb->ki_pos > ni->i_valid) ni->i_valid = iocb->ki_pos; + if (iocb->ki_pos > i_size) + i_size_write(inode, iocb->ki_pos);
return written; }
From: Edward Adam Davis eadavis@qq.com
[ Upstream commit 731ab1f9828800df871c5a7ab9ffe965317d3f15 ]
The length of name cannot exceed the space occupied by ea.
Reported-and-tested-by: syzbot+65e940cfb8f99a97aca7@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis eadavis@qq.com Signed-off-by: Konstantin Komarov almaz.alexandrovich@paragon-software.com Signed-off-by: Sasha Levin sashal@kernel.org --- fs/ntfs3/xattr.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 8e739023e305..d0b75d7f58a7 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -217,6 +217,9 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, if (!ea->name_len) break;
+ if (ea->name_len > ea_size) + break; + if (buffer) { /* Check if we can use field ea->name */ if (off + ea_size > size)
From: Johannes Berg johannes.berg@intel.com
[ Upstream commit c042600c17d8c490279f0ae2baee29475fe8047d ]
There's a call to drv_mgd_prepare_tx() and so there should be one to drv_mgd_complete_tx(), but on this path it's not. Add it.
Link: https://msgid.link/20240131164824.2f0922a514e1.I5aac89b93bcead88c374187d70ca... Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org --- net/mac80211/mlme.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index cc6d38a2e6d5..5da0c2a2e293 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5923,6 +5923,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true, req->reason_code, false); + drv_mgd_complete_tx(sdata->local, sdata, &info); return 0; }
From: Andrew Bresticker abrestic@rivosinc.com
[ Upstream commit de1034b38a346ef6be25fe8792f5d1e0684d5ff4 ]
md_size will have been narrowed if we have >= 4GB worth of pages in a soft-reserved region.
Signed-off-by: Andrew Bresticker abrestic@rivosinc.com Signed-off-by: Ard Biesheuvel ardb@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/firmware/efi/arm-runtime.c | 2 +- drivers/firmware/efi/riscv-runtime.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 3359ae2adf24..9054c2852580 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -107,7 +107,7 @@ static int __init arm_enable_runtime_services(void) efi_memory_desc_t *md;
for_each_efi_memory_desc(md) { - int md_size = md->num_pages << EFI_PAGE_SHIFT; + u64 md_size = md->num_pages << EFI_PAGE_SHIFT; struct resource *res;
if (!(md->attribute & EFI_MEMORY_SP)) diff --git a/drivers/firmware/efi/riscv-runtime.c b/drivers/firmware/efi/riscv-runtime.c index d28e715d2bcc..6711e64eb0b1 100644 --- a/drivers/firmware/efi/riscv-runtime.c +++ b/drivers/firmware/efi/riscv-runtime.c @@ -85,7 +85,7 @@ static int __init riscv_enable_runtime_services(void) efi_memory_desc_t *md;
for_each_efi_memory_desc(md) { - int md_size = md->num_pages << EFI_PAGE_SHIFT; + u64 md_size = md->num_pages << EFI_PAGE_SHIFT; struct resource *res;
if (!(md->attribute & EFI_MEMORY_SP))
From: Andrew Bresticker abrestic@rivosinc.com
[ Upstream commit 0bcff59ef7a652fcdc6d535554b63278c2406c8f ]
Adding memblocks for soft-reserved regions prevents them from later being hotplugged in by dax_kmem.
Signed-off-by: Andrew Bresticker abrestic@rivosinc.com Signed-off-by: Ard Biesheuvel ardb@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/firmware/efi/efi-init.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c index b2c829e95bd1..4639ac6e4f9a 100644 --- a/drivers/firmware/efi/efi-init.c +++ b/drivers/firmware/efi/efi-init.c @@ -141,15 +141,6 @@ static __init int is_usable_memory(efi_memory_desc_t *md) case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: case EFI_PERSISTENT_MEMORY: - /* - * Special purpose memory is 'soft reserved', which means it - * is set aside initially, but can be hotplugged back in or - * be assigned to the dax driver after boot. - */ - if (efi_soft_reserve_enabled() && - (md->attribute & EFI_MEMORY_SP)) - return false; - /* * According to the spec, these regions are no longer reserved * after calling ExitBootServices(). However, we can only use @@ -194,6 +185,16 @@ static __init void reserve_regions(void) size = npages << PAGE_SHIFT;
if (is_memory(md)) { + /* + * Special purpose memory is 'soft reserved', which + * means it is set aside initially. Don't add a memblock + * for it now so that it can be hotplugged back in or + * be assigned to the dax driver after boot. + */ + if (efi_soft_reserve_enabled() && + (md->attribute & EFI_MEMORY_SP)) + continue; + early_init_dt_add_memory_arch(paddr, size);
if (!is_usable_memory(md))
From: Zhang Rui rui.zhang@intel.com
[ Upstream commit 34cf8c657cf0365791cdc658ddbca9cc907726ce ]
Currently, coretemp driver supports only 128 cores per package. This loses some core temperature information on systems that have more than 128 cores per package. [ 58.685033] coretemp coretemp.0: Adding Core 128 failed [ 58.692009] coretemp coretemp.0: Adding Core 129 failed ...
Enlarge the limitation to 512 because there are platforms with more than 256 cores per package.
Signed-off-by: Zhang Rui rui.zhang@intel.com Link: https://lore.kernel.org/r/20240202092144.71180-4-rui.zhang@intel.com Signed-off-by: Guenter Roeck linux@roeck-us.net Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/hwmon/coretemp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 5b2057ce5a59..23b1c4c0452c 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -40,7 +40,7 @@ MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
#define PKG_SYSFS_ATTR_NO 1 /* Sysfs attribute for package temp */ #define BASE_SYSFS_ATTR_NO 2 /* Sysfs Base attr no for coretemp */ -#define NUM_REAL_CORES 128 /* Number of Real cores per cpu */ +#define NUM_REAL_CORES 512 /* Number of Real cores per cpu */ #define CORETEMP_NAME_LENGTH 28 /* String Length of attrs */ #define MAX_CORE_ATTRS 4 /* Maximum no of basic attrs */ #define TOTAL_ATTRS (MAX_CORE_ATTRS + 1)
From: Hannes Reinecke hare@suse.de
[ Upstream commit d6c1b19153f92e95e5e1801d540e98771053afae ]
LUNs going into "failed ready running" state observed on >1T and on even numbers of size (2T, 4T, 6T, 8T and 10T). The issue occurs when DIF is enabled at the host.
The kernel logs:
Cannot setup S/G List for HBAIO segs 1/1 SGL 512 SCSI 256: 3 0
The host lpfc driver is failing to setup scatter/gather list (protection data) for the I/Os.
The return type lpfc_bg_setup_sgl()/lpfc_bg_setup_sgl_prot() causes the compiler to remove the most significant bit. Use an unsigned type instead.
Signed-off-by: Hannes Reinecke hare@suse.de [dwagner: added commit message] Signed-off-by: Daniel Wagner dwagner@suse.de Link: https://lore.kernel.org/r/20231220162658.12392-1-dwagner@suse.de Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/scsi/lpfc/lpfc_scsi.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 4813adec0301..6d1a3cbd6b3c 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -1983,7 +1983,7 @@ lpfc_bg_setup_bpl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, * * Returns the number of SGEs added to the SGL. **/ -static int +static uint32_t lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct sli4_sge *sgl, int datasegcnt, struct lpfc_io_buf *lpfc_cmd) @@ -1991,8 +1991,8 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct scatterlist *sgde = NULL; /* s/g data entry */ struct sli4_sge_diseed *diseed = NULL; dma_addr_t physaddr; - int i = 0, num_sge = 0, status; - uint32_t reftag; + int i = 0, status; + uint32_t reftag, num_sge = 0; uint8_t txop, rxop; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS uint32_t rc; @@ -2164,7 +2164,7 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc, * * Returns the number of SGEs added to the SGL. **/ -static int +static uint32_t lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, struct sli4_sge *sgl, int datacnt, int protcnt, struct lpfc_io_buf *lpfc_cmd) @@ -2188,8 +2188,8 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc, uint32_t rc; #endif uint32_t checking = 1; - uint32_t dma_offset = 0; - int num_sge = 0, j = 2; + uint32_t dma_offset = 0, num_sge = 0; + int j = 2; struct sli4_hybrid_sgl *sgl_xtra = NULL;
sgpe = scsi_prot_sglist(sc);
From: Takashi Sakamoto o-takashi@sakamocchi.jp
[ Upstream commit 7ed4380009e96d9e9c605e12822e987b35b05648 ]
If we are bus manager and the bus has inconsistent gap counts, send a bus reset immediately instead of trying to read the root node's config ROM first. Otherwise, we could spend a lot of time trying to read the config ROM but never succeeding.
This eliminates a 50+ second delay before the FireWire bus is usable after a newly connected device is powered on in certain circumstances.
The delay occurs if a gap count inconsistency occurs, we are not the root node, and we become bus manager. One scenario that causes this is with a TI XIO2213B OHCI, the first time a Sony DSR-25 is powered on after being connected to the FireWire cable. In this configuration, the Linux box will not receive the initial PHY configuration packet sent by the DSR-25 as IRM, resulting in the DSR-25 having a gap count of 44 while the Linux box has a gap count of 63.
FireWire devices have a gap count parameter, which is set to 63 on power-up and can be changed with a PHY configuration packet. This determines the duration of the subaction and arbitration gaps. For reliable communication, all nodes on a FireWire bus must have the same gap count.
A node may have zero or more of the following roles: root node, bus manager (BM), isochronous resource manager (IRM), and cycle master. Unless a root node was forced with a PHY configuration packet, any node might become root node after a bus reset. Only the root node can become cycle master. If the root node is not cycle master capable, the BM or IRM should force a change of root node.
After a bus reset, each node sends a self-ID packet, which contains its current gap count. A single bus reset does not change the gap count, but two bus resets in a row will set the gap count to 63. Because a consistent gap count is required for reliable communication, IEEE 1394a-2000 requires that the bus manager generate a bus reset if it detects that the gap count is inconsistent.
When the gap count is inconsistent, build_tree() will notice this after the self identification process. It will set card->gap_count to the invalid value 0. If we become bus master, this will force bm_work() to send a bus reset when it performs gap count optimization.
After a bus reset, there is no bus manager. We will almost always try to become bus manager. Once we become bus manager, we will first determine whether the root node is cycle master capable. Then, we will determine if the gap count should be changed. If either the root node or the gap count should be changed, we will generate a bus reset.
To determine if the root node is cycle master capable, we read its configuration ROM. bm_work() will wait until we have finished trying to read the configuration ROM.
However, an inconsistent gap count can make this take a long time. read_config_rom() will read the first few quadlets from the config ROM. Due to the gap count inconsistency, eventually one of the reads will time out. When read_config_rom() fails, fw_device_init() calls it again until MAX_RETRIES is reached. This takes 50+ seconds.
Once we give up trying to read the configuration ROM, bm_work() will wake up, assume that the root node is not cycle master capable, and do a bus reset. Hopefully, this will resolve the gap count inconsistency.
This change makes bm_work() check for an inconsistent gap count before waiting for the root node's configuration ROM. If the gap count is inconsistent, bm_work() will immediately do a bus reset. This eliminates the 50+ second delay and rapidly brings the bus to a working state.
I considered that if the gap count is inconsistent, a PHY configuration packet might not be successful, so it could be desirable to skip the PHY configuration packet before the bus reset in this case. However, IEEE 1394a-2000 and IEEE 1394-2008 say that the bus manager may transmit a PHY configuration packet before a bus reset when correcting a gap count error. Since the standard endorses this, I decided it's safe to retain the PHY configuration packet transmission.
Normally, after a topology change, we will reset the bus a maximum of 5 times to change the root node and perform gap count optimization. However, if there is a gap count inconsistency, we must always generate a bus reset. Otherwise the gap count inconsistency will persist and communication will be unreliable. For that reason, if there is a gap count inconstency, we generate a bus reset even if we already reached the 5 reset limit.
Signed-off-by: Adam Goldman adamg@pobox.com Reference: https://sourceforge.net/p/linux1394/mailman/message/58727806/ Signed-off-by: Takashi Sakamoto o-takashi@sakamocchi.jp Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/firewire/core-card.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index f3b3953cac83..be195ba83463 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -429,7 +429,23 @@ static void bm_work(struct work_struct *work) */ card->bm_generation = generation;
- if (root_device == NULL) { + if (card->gap_count == 0) { + /* + * If self IDs have inconsistent gap counts, do a + * bus reset ASAP. The config rom read might never + * complete, so don't wait for it. However, still + * send a PHY configuration packet prior to the + * bus reset. The PHY configuration packet might + * fail, but 1394-2008 8.4.5.2 explicitly permits + * it in this case, so it should be safe to try. + */ + new_root_id = local_id; + /* + * We must always send a bus reset if the gap count + * is inconsistent, so bypass the 5-reset limit. + */ + card->bm_retries = 0; + } else if (root_device == NULL) { /* * Either link_on is false, or we failed to read the * config rom. In either case, pick another root.
From: Dan Carpenter dan.carpenter@linaro.org
[ Upstream commit 67057f48df79a3d73683385f521215146861684b ]
I recently changed the alignment code in dw_pcie_ep_raise_msix_irq(). The code in dw_pcie_ep_raise_msi_irq() is similar, so update it to match, just for consistency. (No effect on runtime, just a cleanup).
Link: https://lore.kernel.org/r/184097e0-c728-42c7-9e8a-556bd33fb612@moroto.mounta... Signed-off-by: Dan Carpenter dan.carpenter@linaro.org Signed-off-by: Bjorn Helgaas bhelgaas@google.com Reviewed-by: Niklas Cassel cassel@kernel.org Reviewed-by: Ilpo Järvinen ilpo.jarvinen@linux.intel.com Reviewed-by: Manivannan Sadhasivam manivannan.sadhasivam@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/pci/controller/dwc/pcie-designware-ep.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 61a0f33c59cf..a88dd254a714 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -518,9 +518,10 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no, reg = ep_func->msi_cap + func_offset + PCI_MSI_DATA_32; msg_data = dw_pcie_readw_dbi(pci, reg); } - aligned_offset = msg_addr_lower & (epc->mem->window.page_size - 1); - msg_addr = ((u64)msg_addr_upper) << 32 | - (msg_addr_lower & ~aligned_offset); + msg_addr = ((u64)msg_addr_upper) << 32 | msg_addr_lower; + + aligned_offset = msg_addr & (epc->mem->window.page_size - 1); + msg_addr = ALIGN_DOWN(msg_addr, epc->mem->window.page_size); ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr, epc->mem->window.page_size); if (ret)
From: Prike Liang Prike.Liang@amd.com
[ Upstream commit 93bafa32a6918154aa0caf9f66679a32c2431357 ]
In the suspend abort cases, the gfx power rail doesn't turn off so some GFXDEC registers/CSB can't reset to default value and at this moment reinitialize GFXDEC/CSB will result in an unexpected error. So let skip those program sequence for the suspend abort case.
Signed-off-by: Prike Liang Prike.Liang@amd.com Reviewed-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 ++++++++ 3 files changed, 12 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 1f1e7966beb5..dbef22f56482 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1045,6 +1045,8 @@ struct amdgpu_device { bool in_s3; bool in_s4; bool in_s0ix; + /* indicate amdgpu suspension status */ + bool suspend_complete;
atomic_t in_gpu_reset; enum pp_mp1_state mp1_state; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index deae92fde3b8..57943e900871 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2252,6 +2252,7 @@ static int amdgpu_pmops_suspend(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ adev->suspend_complete = false; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else @@ -2264,6 +2265,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev);
+ adev->suspend_complete = true; if (amdgpu_acpi_should_gpu_reset(adev)) return amdgpu_asic_reset(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index de1fab165041..fb37c0d4b35b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3268,6 +3268,14 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
gfx_v9_0_cp_gfx_enable(adev, true);
+ /* Now only limit the quirk on the APU gfx9 series and already + * confirmed that the APU gfx10/gfx11 needn't such update. + */ + if (adev->flags & AMD_IS_APU && + adev->in_s3 && !adev->suspend_complete) { + DRM_INFO(" Will skip the CSB packet resubmit\n"); + return 0; + } r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
From: Prike Liang Prike.Liang@amd.com
[ Upstream commit 6ef82ac664bb9568ca3956e0d9c9c478e25077ff ]
In the s3 suspend abort case some type of gfx9 power rail not turn off from FCH side and this will put the GPU in an unknown power status, so let's reset the gpu to a known good power state before reinitialize gpu device.
Signed-off-by: Prike Liang Prike.Liang@amd.com Acked-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/gpu/drm/amd/amdgpu/soc15.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index e8c0e77e1b01..6a3486f52d69 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1490,10 +1490,32 @@ static int soc15_common_suspend(void *handle) return soc15_common_hw_fini(adev); }
+static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + /* Will reset for the following suspend abort cases. + * 1) Only reset limit on APU side, dGPU hasn't checked yet. + * 2) S3 suspend abort and TOS already launched. + */ + if (adev->flags & AMD_IS_APU && adev->in_s3 && + !adev->suspend_complete && + sol_reg) + return true; + + return false; +} + static int soc15_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (soc15_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n"); + soc15_asic_reset(adev); + } return soc15_common_hw_init(adev); }
From: Yi Sun yi.sun@unisoc.com
[ Upstream commit 4ce6e2db00de8103a0687fb0f65fd17124a51aaa ]
Ensure no remaining requests in virtqueues before resetting vdev and deleting virtqueues. Otherwise these requests will never be completed. It may cause the system to become unresponsive.
Function blk_mq_quiesce_queue() can ensure that requests have become in_flight status, but it cannot guarantee that requests have been processed by the device. Virtqueues should never be deleted before all requests become complete status.
Function blk_mq_freeze_queue() ensure that all requests in virtqueues become complete status. And no requests can enter in virtqueues.
Signed-off-by: Yi Sun yi.sun@unisoc.com Reviewed-by: Stefan Hajnoczi stefanha@redhat.com Link: https://lore.kernel.org/r/20240129085250.1550594-1-yi.sun@unisoc.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Sasha Levin sashal@kernel.org --- drivers/block/virtio_blk.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index affeca0dbc7e..7f73e7447ecb 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -989,14 +989,15 @@ static int virtblk_freeze(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv;
+ /* Ensure no requests in virtqueues before deleting vqs. */ + blk_mq_freeze_queue(vblk->disk->queue); + /* Ensure we don't receive any more interrupts */ vdev->config->reset(vdev);
/* Make sure no work handler is accessing the device. */ flush_work(&vblk->config_work);
- blk_mq_quiesce_queue(vblk->disk->queue); - vdev->config->del_vqs(vdev); kfree(vblk->vqs);
@@ -1014,7 +1015,7 @@ static int virtblk_restore(struct virtio_device *vdev)
virtio_device_ready(vdev);
- blk_mq_unquiesce_queue(vblk->disk->queue); + blk_mq_unfreeze_queue(vblk->disk->queue); return 0; } #endif
linux-stable-mirror@lists.linaro.org