4343 */
4444#define NVME_MAX_KB_SZ 8192
4545#define NVME_MAX_SEGS 128
46+ #define NVME_MAX_META_SEGS 15
4647#define NVME_MAX_NR_ALLOCATIONS 5
4748
4849static int use_threaded_interrupts ;
@@ -144,6 +145,7 @@ struct nvme_dev {
144145 struct sg_table * hmb_sgt ;
145146
146147 mempool_t * iod_mempool ;
148+ mempool_t * iod_meta_mempool ;
147149
148150 /* shadow doorbell buffer support: */
149151 __le32 * dbbuf_dbs ;
@@ -239,6 +241,8 @@ struct nvme_iod {
239241 dma_addr_t first_dma ;
240242 dma_addr_t meta_dma ;
241243 struct sg_table sgt ;
244+ struct sg_table meta_sgt ;
245+ union nvme_descriptor meta_list ;
242246 union nvme_descriptor list [NVME_MAX_NR_ALLOCATIONS ];
243247};
244248
@@ -506,6 +510,14 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
506510 spin_unlock (& nvmeq -> sq_lock );
507511}
508512
513+ static inline bool nvme_pci_metadata_use_sgls (struct nvme_dev * dev ,
514+ struct request * req )
515+ {
516+ if (!nvme_ctrl_meta_sgl_supported (& dev -> ctrl ))
517+ return false;
518+ return req -> nr_integrity_segments > 1 ;
519+ }
520+
509521static inline bool nvme_pci_use_sgls (struct nvme_dev * dev , struct request * req ,
510522 int nseg )
511523{
@@ -518,6 +530,8 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req,
518530 return false;
519531 if (!nvmeq -> qid )
520532 return false;
533+ if (nvme_pci_metadata_use_sgls (dev , req ))
534+ return true;
521535 if (!sgl_threshold || avg_seg_size < sgl_threshold )
522536 return false;
523537 return true;
@@ -780,7 +794,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
780794 struct bio_vec bv = req_bvec (req );
781795
782796 if (!is_pci_p2pdma_page (bv .bv_page )) {
783- if ((bv .bv_offset & (NVME_CTRL_PAGE_SIZE - 1 )) +
797+ if (!nvme_pci_metadata_use_sgls (dev , req ) &&
798+ (bv .bv_offset & (NVME_CTRL_PAGE_SIZE - 1 )) +
784799 bv .bv_len <= NVME_CTRL_PAGE_SIZE * 2 )
785800 return nvme_setup_prp_simple (dev , req ,
786801 & cmnd -> rw , & bv );
@@ -824,11 +839,69 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
824839 return ret ;
825840}
826841
827- static blk_status_t nvme_map_metadata (struct nvme_dev * dev , struct request * req ,
828- struct nvme_command * cmnd )
842+ static blk_status_t nvme_pci_setup_meta_sgls (struct nvme_dev * dev ,
843+ struct request * req )
844+ {
845+ struct nvme_iod * iod = blk_mq_rq_to_pdu (req );
846+ struct nvme_rw_command * cmnd = & iod -> cmd .rw ;
847+ struct nvme_sgl_desc * sg_list ;
848+ struct scatterlist * sgl , * sg ;
849+ unsigned int entries ;
850+ dma_addr_t sgl_dma ;
851+ int rc , i ;
852+
853+ iod -> meta_sgt .sgl = mempool_alloc (dev -> iod_meta_mempool , GFP_ATOMIC );
854+ if (!iod -> meta_sgt .sgl )
855+ return BLK_STS_RESOURCE ;
856+
857+ sg_init_table (iod -> meta_sgt .sgl , req -> nr_integrity_segments );
858+ iod -> meta_sgt .orig_nents = blk_rq_map_integrity_sg (req ,
859+ iod -> meta_sgt .sgl );
860+ if (!iod -> meta_sgt .orig_nents )
861+ goto out_free_sg ;
862+
863+ rc = dma_map_sgtable (dev -> dev , & iod -> meta_sgt , rq_dma_dir (req ),
864+ DMA_ATTR_NO_WARN );
865+ if (rc )
866+ goto out_free_sg ;
867+
868+ sg_list = dma_pool_alloc (dev -> prp_small_pool , GFP_ATOMIC , & sgl_dma );
869+ if (!sg_list )
870+ goto out_unmap_sg ;
871+
872+ entries = iod -> meta_sgt .nents ;
873+ iod -> meta_list .sg_list = sg_list ;
874+ iod -> meta_dma = sgl_dma ;
875+
876+ cmnd -> flags = NVME_CMD_SGL_METASEG ;
877+ cmnd -> metadata = cpu_to_le64 (sgl_dma );
878+
879+ sgl = iod -> meta_sgt .sgl ;
880+ if (entries == 1 ) {
881+ nvme_pci_sgl_set_data (sg_list , sgl );
882+ return BLK_STS_OK ;
883+ }
884+
885+ sgl_dma += sizeof (* sg_list );
886+ nvme_pci_sgl_set_seg (sg_list , sgl_dma , entries );
887+ for_each_sg (sgl , sg , entries , i )
888+ nvme_pci_sgl_set_data (& sg_list [i + 1 ], sg );
889+
890+ return BLK_STS_OK ;
891+
892+ out_unmap_sg :
893+ dma_unmap_sgtable (dev -> dev , & iod -> meta_sgt , rq_dma_dir (req ), 0 );
894+ out_free_sg :
895+ mempool_free (iod -> meta_sgt .sgl , dev -> iod_meta_mempool );
896+ return BLK_STS_RESOURCE ;
897+ }
898+
899+ static blk_status_t nvme_pci_setup_meta_mptr (struct nvme_dev * dev ,
900+ struct request * req )
829901{
830902 struct nvme_iod * iod = blk_mq_rq_to_pdu (req );
831903 struct bio_vec bv = rq_integrity_vec (req );
904+ struct nvme_command * cmnd = & iod -> cmd ;
832905
833906 iod -> meta_dma = dma_map_bvec (dev -> dev , & bv , rq_dma_dir (req ), 0 );
834907 if (dma_mapping_error (dev -> dev , iod -> meta_dma ))
@@ -837,6 +910,13 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
837910 return BLK_STS_OK ;
838911}
839912
913+ static blk_status_t nvme_map_metadata (struct nvme_dev * dev , struct request * req )
914+ {
915+ if (nvme_pci_metadata_use_sgls (dev , req ))
916+ return nvme_pci_setup_meta_sgls (dev , req );
917+ return nvme_pci_setup_meta_mptr (dev , req );
918+ }
919+
840920static blk_status_t nvme_prep_rq (struct nvme_dev * dev , struct request * req )
841921{
842922 struct nvme_iod * iod = blk_mq_rq_to_pdu (req );
@@ -845,6 +925,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
845925 iod -> aborted = false;
846926 iod -> nr_allocations = -1 ;
847927 iod -> sgt .nents = 0 ;
928+ iod -> meta_sgt .nents = 0 ;
848929
849930 ret = nvme_setup_cmd (req -> q -> queuedata , req );
850931 if (ret )
@@ -857,7 +938,7 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
857938 }
858939
859940 if (blk_integrity_rq (req )) {
860- ret = nvme_map_metadata (dev , req , & iod -> cmd );
941+ ret = nvme_map_metadata (dev , req );
861942 if (ret )
862943 goto out_unmap_data ;
863944 }
@@ -955,17 +1036,31 @@ static void nvme_queue_rqs(struct rq_list *rqlist)
9551036 * rqlist = requeue_list ;
9561037}
9571038
1039+ static __always_inline void nvme_unmap_metadata (struct nvme_dev * dev ,
1040+ struct request * req )
1041+ {
1042+ struct nvme_iod * iod = blk_mq_rq_to_pdu (req );
1043+
1044+ if (!iod -> meta_sgt .nents ) {
1045+ dma_unmap_page (dev -> dev , iod -> meta_dma ,
1046+ rq_integrity_vec (req ).bv_len ,
1047+ rq_dma_dir (req ));
1048+ return ;
1049+ }
1050+
1051+ dma_pool_free (dev -> prp_small_pool , iod -> meta_list .sg_list ,
1052+ iod -> meta_dma );
1053+ dma_unmap_sgtable (dev -> dev , & iod -> meta_sgt , rq_dma_dir (req ), 0 );
1054+ mempool_free (iod -> meta_sgt .sgl , dev -> iod_meta_mempool );
1055+ }
1056+
9581057static __always_inline void nvme_pci_unmap_rq (struct request * req )
9591058{
9601059 struct nvme_queue * nvmeq = req -> mq_hctx -> driver_data ;
9611060 struct nvme_dev * dev = nvmeq -> dev ;
9621061
963- if (blk_integrity_rq (req )) {
964- struct nvme_iod * iod = blk_mq_rq_to_pdu (req );
965-
966- dma_unmap_page (dev -> dev , iod -> meta_dma ,
967- rq_integrity_vec (req ).bv_len , rq_dma_dir (req ));
968- }
1062+ if (blk_integrity_rq (req ))
1063+ nvme_unmap_metadata (dev , req );
9691064
9701065 if (blk_rq_nr_phys_segments (req ))
9711066 nvme_unmap_data (dev , req );
@@ -2761,6 +2856,7 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
27612856
27622857static int nvme_pci_alloc_iod_mempool (struct nvme_dev * dev )
27632858{
2859+ size_t meta_size = sizeof (struct scatterlist ) * (NVME_MAX_META_SEGS + 1 );
27642860 size_t alloc_size = sizeof (struct scatterlist ) * NVME_MAX_SEGS ;
27652861
27662862 dev -> iod_mempool = mempool_create_node (1 ,
@@ -2769,7 +2865,18 @@ static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
27692865 dev_to_node (dev -> dev ));
27702866 if (!dev -> iod_mempool )
27712867 return - ENOMEM ;
2868+
2869+ dev -> iod_meta_mempool = mempool_create_node (1 ,
2870+ mempool_kmalloc , mempool_kfree ,
2871+ (void * )meta_size , GFP_KERNEL ,
2872+ dev_to_node (dev -> dev ));
2873+ if (!dev -> iod_meta_mempool )
2874+ goto free ;
2875+
27722876 return 0 ;
2877+ free :
2878+ mempool_destroy (dev -> iod_mempool );
2879+ return - ENOMEM ;
27732880}
27742881
27752882static void nvme_free_tagset (struct nvme_dev * dev )
@@ -2834,6 +2941,11 @@ static void nvme_reset_work(struct work_struct *work)
28342941 if (result )
28352942 goto out ;
28362943
2944+ if (nvme_ctrl_meta_sgl_supported (& dev -> ctrl ))
2945+ dev -> ctrl .max_integrity_segments = NVME_MAX_META_SEGS ;
2946+ else
2947+ dev -> ctrl .max_integrity_segments = 1 ;
2948+
28372949 nvme_dbbuf_dma_alloc (dev );
28382950
28392951 result = nvme_setup_host_mem (dev );
@@ -3101,11 +3213,6 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
31013213 dev -> ctrl .max_hw_sectors = min_t (u32 ,
31023214 NVME_MAX_KB_SZ << 1 , dma_opt_mapping_size (& pdev -> dev ) >> 9 );
31033215 dev -> ctrl .max_segments = NVME_MAX_SEGS ;
3104-
3105- /*
3106- * There is no support for SGLs for metadata (yet), so we are limited to
3107- * a single integrity segment for the separate metadata pointer.
3108- */
31093216 dev -> ctrl .max_integrity_segments = 1 ;
31103217 return dev ;
31113218
@@ -3168,6 +3275,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
31683275 if (result )
31693276 goto out_disable ;
31703277
3278+ if (nvme_ctrl_meta_sgl_supported (& dev -> ctrl ))
3279+ dev -> ctrl .max_integrity_segments = NVME_MAX_META_SEGS ;
3280+ else
3281+ dev -> ctrl .max_integrity_segments = 1 ;
3282+
31713283 nvme_dbbuf_dma_alloc (dev );
31723284
31733285 result = nvme_setup_host_mem (dev );
@@ -3210,6 +3322,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
32103322 nvme_free_queues (dev , 0 );
32113323out_release_iod_mempool :
32123324 mempool_destroy (dev -> iod_mempool );
3325+ mempool_destroy (dev -> iod_meta_mempool );
32133326out_release_prp_pools :
32143327 nvme_release_prp_pools (dev );
32153328out_dev_unmap :
@@ -3275,6 +3388,7 @@ static void nvme_remove(struct pci_dev *pdev)
32753388 nvme_dbbuf_dma_free (dev );
32763389 nvme_free_queues (dev , 0 );
32773390 mempool_destroy (dev -> iod_mempool );
3391+ mempool_destroy (dev -> iod_meta_mempool );
32783392 nvme_release_prp_pools (dev );
32793393 nvme_dev_unmap (dev );
32803394 nvme_uninit_ctrl (& dev -> ctrl );
0 commit comments