19
19
#include <linux/pr.h>
20
20
#include <linux/ptrace.h>
21
21
#include <linux/nvme_ioctl.h>
22
- #include <linux/t10-pi.h>
23
22
#include <linux/pm_qos.h>
24
23
#include <asm/unaligned.h>
25
24
@@ -204,11 +203,6 @@ static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
204
203
nvme_put_ctrl (ctrl );
205
204
}
206
205
207
- static inline bool nvme_ns_has_pi (struct nvme_ns * ns )
208
- {
209
- return ns -> pi_type && ns -> ms == sizeof (struct t10_pi_tuple );
210
- }
211
-
212
206
static blk_status_t nvme_error_status (u16 status )
213
207
{
214
208
switch (status & 0x7ff ) {
@@ -552,19 +546,22 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl)
552
546
553
547
ret = nvme_get_stream_params (ctrl , & s , NVME_NSID_ALL );
554
548
if (ret )
555
- return ret ;
549
+ goto out_disable_stream ;
556
550
557
551
ctrl -> nssa = le16_to_cpu (s .nssa );
558
552
if (ctrl -> nssa < BLK_MAX_WRITE_HINTS - 1 ) {
559
553
dev_info (ctrl -> device , "too few streams (%u) available\n" ,
560
554
ctrl -> nssa );
561
- nvme_disable_streams (ctrl );
562
- return 0 ;
555
+ goto out_disable_stream ;
563
556
}
564
557
565
558
ctrl -> nr_streams = min_t (unsigned , ctrl -> nssa , BLK_MAX_WRITE_HINTS - 1 );
566
559
dev_info (ctrl -> device , "Using %u streams\n" , ctrl -> nr_streams );
567
560
return 0 ;
561
+
562
+ out_disable_stream :
563
+ nvme_disable_streams (ctrl );
564
+ return ret ;
568
565
}
569
566
570
567
/*
@@ -1302,7 +1299,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
1302
1299
meta_len = (io .nblocks + 1 ) * ns -> ms ;
1303
1300
metadata = nvme_to_user_ptr (io .metadata );
1304
1301
1305
- if (ns -> ext ) {
1302
+ if (ns -> features & NVME_NS_EXT_LBAS ) {
1306
1303
length += meta_len ;
1307
1304
meta_len = 0 ;
1308
1305
} else if (meta_len ) {
@@ -1696,7 +1693,8 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1696
1693
}
1697
1694
1698
1695
#ifdef CONFIG_BLK_DEV_INTEGRITY
1699
- static void nvme_init_integrity (struct gendisk * disk , u16 ms , u8 pi_type )
1696
+ static void nvme_init_integrity (struct gendisk * disk , u16 ms , u8 pi_type ,
1697
+ u32 max_integrity_segments )
1700
1698
{
1701
1699
struct blk_integrity integrity ;
1702
1700
@@ -1719,10 +1717,11 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
1719
1717
}
1720
1718
integrity .tuple_size = ms ;
1721
1719
blk_integrity_register (disk , & integrity );
1722
- blk_queue_max_integrity_segments (disk -> queue , 1 );
1720
+ blk_queue_max_integrity_segments (disk -> queue , max_integrity_segments );
1723
1721
}
1724
1722
#else
1725
- static void nvme_init_integrity (struct gendisk * disk , u16 ms , u8 pi_type )
1723
+ static void nvme_init_integrity (struct gendisk * disk , u16 ms , u8 pi_type ,
1724
+ u32 max_integrity_segments )
1726
1725
{
1727
1726
}
1728
1727
#endif /* CONFIG_BLK_DEV_INTEGRITY */
@@ -1842,7 +1841,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
1842
1841
{
1843
1842
sector_t capacity = nvme_lba_to_sect (ns , le64_to_cpu (id -> nsze ));
1844
1843
unsigned short bs = 1 << ns -> lba_shift ;
1845
- u32 atomic_bs , phys_bs , io_opt ;
1844
+ u32 atomic_bs , phys_bs , io_opt = 0 ;
1846
1845
1847
1846
if (ns -> lba_shift > PAGE_SHIFT ) {
1848
1847
/* unsupported block size, set capacity to 0 later */
@@ -1851,7 +1850,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
1851
1850
blk_mq_freeze_queue (disk -> queue );
1852
1851
blk_integrity_unregister (disk );
1853
1852
1854
- atomic_bs = phys_bs = io_opt = bs ;
1853
+ atomic_bs = phys_bs = bs ;
1855
1854
nvme_setup_streams_ns (ns -> ctrl , ns , & phys_bs , & io_opt );
1856
1855
if (id -> nabo == 0 ) {
1857
1856
/*
@@ -1882,13 +1881,28 @@ static void nvme_update_disk_info(struct gendisk *disk,
1882
1881
blk_queue_io_min (disk -> queue , phys_bs );
1883
1882
blk_queue_io_opt (disk -> queue , io_opt );
1884
1883
1885
- if ( ns -> ms && ! ns -> ext &&
1886
- ( ns -> ctrl -> ops -> flags & NVME_F_METADATA_SUPPORTED ))
1887
- nvme_init_integrity ( disk , ns -> ms , ns -> pi_type );
1888
- if (( ns -> ms && ! nvme_ns_has_pi ( ns ) && ! blk_get_integrity ( disk )) ||
1889
- ns -> lba_shift > PAGE_SHIFT )
1884
+ /*
1885
+ * The block layer can't support LBA sizes larger than the page size
1886
+ * yet, so catch this early and don't allow block I/O.
1887
+ */
1888
+ if ( ns -> lba_shift > PAGE_SHIFT )
1890
1889
capacity = 0 ;
1891
1890
1891
+ /*
1892
+ * Register a metadata profile for PI, or the plain non-integrity NVMe
1893
+ * metadata masquerading as Type 0 if supported, otherwise reject block
1894
+ * I/O to namespaces with metadata except when the namespace supports
1895
+ * PI, as it can strip/insert in that case.
1896
+ */
1897
+ if (ns -> ms ) {
1898
+ if (IS_ENABLED (CONFIG_BLK_DEV_INTEGRITY ) &&
1899
+ (ns -> features & NVME_NS_METADATA_SUPPORTED ))
1900
+ nvme_init_integrity (disk , ns -> ms , ns -> pi_type ,
1901
+ ns -> ctrl -> max_integrity_segments );
1902
+ else if (!nvme_ns_has_pi (ns ))
1903
+ capacity = 0 ;
1904
+ }
1905
+
1892
1906
set_capacity_revalidate_and_notify (disk , capacity , false);
1893
1907
1894
1908
nvme_config_discard (disk , ns );
@@ -1902,9 +1916,10 @@ static void nvme_update_disk_info(struct gendisk *disk,
1902
1916
blk_mq_unfreeze_queue (disk -> queue );
1903
1917
}
1904
1918
1905
- static void __nvme_revalidate_disk (struct gendisk * disk , struct nvme_id_ns * id )
1919
+ static int __nvme_revalidate_disk (struct gendisk * disk , struct nvme_id_ns * id )
1906
1920
{
1907
1921
struct nvme_ns * ns = disk -> private_data ;
1922
+ struct nvme_ctrl * ctrl = ns -> ctrl ;
1908
1923
u32 iob ;
1909
1924
1910
1925
/*
@@ -1915,20 +1930,43 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
1915
1930
if (ns -> lba_shift == 0 )
1916
1931
ns -> lba_shift = 9 ;
1917
1932
1918
- if ((ns -> ctrl -> quirks & NVME_QUIRK_STRIPE_SIZE ) &&
1919
- is_power_of_2 (ns -> ctrl -> max_hw_sectors ))
1920
- iob = ns -> ctrl -> max_hw_sectors ;
1933
+ if ((ctrl -> quirks & NVME_QUIRK_STRIPE_SIZE ) &&
1934
+ is_power_of_2 (ctrl -> max_hw_sectors ))
1935
+ iob = ctrl -> max_hw_sectors ;
1921
1936
else
1922
1937
iob = nvme_lba_to_sect (ns , le16_to_cpu (id -> noiob ));
1923
1938
1939
+ ns -> features = 0 ;
1924
1940
ns -> ms = le16_to_cpu (id -> lbaf [id -> flbas & NVME_NS_FLBAS_LBA_MASK ].ms );
1925
- ns -> ext = ns -> ms && (id -> flbas & NVME_NS_FLBAS_META_EXT );
1926
1941
/* the PI implementation requires metadata equal t10 pi tuple size */
1927
1942
if (ns -> ms == sizeof (struct t10_pi_tuple ))
1928
1943
ns -> pi_type = id -> dps & NVME_NS_DPS_PI_MASK ;
1929
1944
else
1930
1945
ns -> pi_type = 0 ;
1931
1946
1947
+ if (ns -> ms ) {
1948
+ /*
1949
+ * For PCIe only the separate metadata pointer is supported,
1950
+ * as the block layer supplies metadata in a separate bio_vec
1951
+ * chain. For Fabrics, only metadata as part of extended data
1952
+ * LBA is supported on the wire per the Fabrics specification,
1953
+ * but the HBA/HCA will do the remapping from the separate
1954
+ * metadata buffers for us.
1955
+ */
1956
+ if (id -> flbas & NVME_NS_FLBAS_META_EXT ) {
1957
+ ns -> features |= NVME_NS_EXT_LBAS ;
1958
+ if ((ctrl -> ops -> flags & NVME_F_FABRICS ) &&
1959
+ (ctrl -> ops -> flags & NVME_F_METADATA_SUPPORTED ) &&
1960
+ ctrl -> max_integrity_segments )
1961
+ ns -> features |= NVME_NS_METADATA_SUPPORTED ;
1962
+ } else {
1963
+ if (WARN_ON_ONCE (ctrl -> ops -> flags & NVME_F_FABRICS ))
1964
+ return - EINVAL ;
1965
+ if (ctrl -> ops -> flags & NVME_F_METADATA_SUPPORTED )
1966
+ ns -> features |= NVME_NS_METADATA_SUPPORTED ;
1967
+ }
1968
+ }
1969
+
1932
1970
if (iob )
1933
1971
blk_queue_chunk_sectors (ns -> queue , rounddown_pow_of_two (iob ));
1934
1972
nvme_update_disk_info (disk , ns , id );
@@ -1939,6 +1977,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
1939
1977
revalidate_disk (ns -> head -> disk );
1940
1978
}
1941
1979
#endif
1980
+ return 0 ;
1942
1981
}
1943
1982
1944
1983
static int nvme_revalidate_disk (struct gendisk * disk )
@@ -1974,7 +2013,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
1974
2013
goto free_id ;
1975
2014
}
1976
2015
1977
- __nvme_revalidate_disk (disk , id );
2016
+ ret = __nvme_revalidate_disk (disk , id );
1978
2017
free_id :
1979
2018
kfree (id );
1980
2019
out :
@@ -2283,6 +2322,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
2283
2322
blk_queue_max_segments (q , min_t (u32 , max_segments , USHRT_MAX ));
2284
2323
}
2285
2324
blk_queue_virt_boundary (q , ctrl -> page_size - 1 );
2325
+ blk_queue_dma_alignment (q , 7 );
2286
2326
if (ctrl -> vwc & NVME_CTRL_VWC_PRESENT )
2287
2327
vwc = true;
2288
2328
blk_queue_write_cache (q , vwc , vwc );
@@ -3628,7 +3668,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
3628
3668
memcpy (disk -> disk_name , disk_name , DISK_NAME_LEN );
3629
3669
ns -> disk = disk ;
3630
3670
3631
- __nvme_revalidate_disk (disk , id );
3671
+ if (__nvme_revalidate_disk (disk , id ))
3672
+ goto out_free_disk ;
3632
3673
3633
3674
if ((ctrl -> quirks & NVME_QUIRK_LIGHTNVM ) && id -> vs [0 ] == 0x1 ) {
3634
3675
ret = nvme_nvm_register (ns , disk_name , node );
@@ -3655,6 +3696,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
3655
3696
/* prevent double queue cleanup */
3656
3697
ns -> disk -> queue = NULL ;
3657
3698
put_disk (ns -> disk );
3699
+ out_free_disk :
3700
+ del_gendisk (ns -> disk );
3658
3701
out_unlink_ns :
3659
3702
mutex_lock (& ctrl -> subsys -> lock );
3660
3703
list_del_rcu (& ns -> siblings );
0 commit comments