FYI, I really want SGL support before this get merged, but ignoring that for now:
+struct nvme_dmabuf_map {
- struct io_dmabuf_map base;
- dma_addr_t *dma_list;
- struct sg_table *sgt;
- unsigned nr_entries;
I'd make dma_list a variable-sized array at the end of the struture to avoid an extra allocation and pointer derefernece.
+static void nvme_dmabuf_map_sync(struct nvme_dev *nvme_dev, struct request *req,
bool for_cpu)+{
- int length = blk_rq_payload_bytes(req);
- struct device *dev = nvme_dev->dev;
- enum dma_data_direction dma_dir;
- struct bio *bio = req->bio;
- struct nvme_dmabuf_map *map;
- dma_addr_t *dma_list;
- int offset, map_idx;
- dma_dir = rq_data_dir(req) == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
- map = container_of(bio->dmabuf_map, struct nvme_dmabuf_map, base);
- dma_list = map->dma_list;
- offset = bio->bi_iter.bi_bvec_done;
- map_idx = offset / NVME_CTRL_PAGE_SIZE;
- length += offset & (NVME_CTRL_PAGE_SIZE - 1);
Please initialize the variable at declaration time and use or add proper helpers to simplify this:
static inline struct nvme_dmabuf_map * to_nvme_dmabuf_map(struct io_dmabuf_map *map) { return container_of(map, struct nvme_dmabuf_map, base); }
....
enum dma_data_direction dma_dir = rq_dma_dir(req); struct device *dev = nvme_dev->dev; struct bio *bio = req->bio; struct nvme_dmabuf_map *map = to_nvme_dmabuf_map(bio->bi_dmabuf_map); dma_addr_t *dma_list = map->dma_list; int offset = bio->bi_iter.bi_bvec_done; int mmap_idx = offset / NVME_CTRL_PAGE_SIZE; int length = blk_rq_payload_bytes(req) + offset & (NVME_CTRL_PAGE_SIZE - 1);
Also a lot of these ints sound like they should be unsigned.
- while (length > 0) {
u64 dma_addr = dma_list[map_idx++];if (for_cpu)__dma_sync_single_for_cpu(dev, dma_addr,NVME_CTRL_PAGE_SIZE, dma_dir);else__dma_sync_single_for_device(dev, dma_addr,NVME_CTRL_PAGE_SIZE,dma_dir);length -= NVME_CTRL_PAGE_SIZE;- }
+}
Nothing should be using these __dma_sync helpers that are internal details. Using them means you call into sync code that should be skipped on most common server class systems.
Also the for_cpu argument is a bit ugly. I'd rather have separate routines as in the core dma-mapping code, even if that means a little bit of code duplication.
+static blk_status_t nvme_rq_setup_dmabuf_map(struct request *req,
struct nvme_queue *nvmeq)+{
- struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- int length = blk_rq_payload_bytes(req);
- u64 dma_addr, prp1_dma, prp2_dma;
- struct bio *bio = req->bio;
- struct nvme_dmabuf_map *map;
- dma_addr_t *dma_list;
- dma_addr_t prp_dma;
- __le64 *prp_list;
- int i, map_idx;
- int offset;
- nvme_dmabuf_map_sync(nvmeq->dev, req, false);
- map = container_of(bio->dmabuf_map, struct nvme_dmabuf_map, base);
- dma_list = map->dma_list;
- offset = bio->bi_iter.bi_bvec_done;
- map_idx = offset / NVME_CTRL_PAGE_SIZE;
- offset &= (NVME_CTRL_PAGE_SIZE - 1);
- prp1_dma = dma_list[map_idx++] + offset;
Same comments as for the sync helper above.
- length -= (NVME_CTRL_PAGE_SIZE - offset);
- if (length <= 0) {
prp2_dma = 0;goto done;- }
- if (length <= NVME_CTRL_PAGE_SIZE) {
prp2_dma = dma_list[map_idx];goto done;- }
- if (DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE) <=
NVME_SMALL_POOL_SIZE / sizeof(__le64))iod->flags |= IOD_SMALL_DESCRIPTOR;- prp_list = dma_pool_alloc(nvme_dma_pool(nvmeq, iod), GFP_ATOMIC,
&prp_dma);- if (!prp_list)
return BLK_STS_RESOURCE;- iod->descriptors[iod->nr_descriptors++] = prp_list;
- prp2_dma = prp_dma;
And I really hate how this duplicates all the nasty PRP building logic, although right now I don't have a good answer to that.
+static inline bool nvme_rq_is_dmabuf_attached(struct request *req) +{
- if (!IS_ENABLED(CONFIG_DMABUF_TOKEN))
return false;- return req->bio && bio_flagged(req->bio, BIO_DMABUF_MAP);
+}
This is something that should go into the block layer.