@@ -120,6 +120,7 @@ static void svm_range_remove_notifier(struct svm_range *prange)
120
120
121
121
static int
122
122
svm_range_dma_map_dev (struct amdgpu_device * adev , struct svm_range * prange ,
123
+ unsigned long offset , unsigned long npages ,
123
124
unsigned long * hmm_pfns , uint32_t gpuidx )
124
125
{
125
126
enum dma_data_direction dir = DMA_BIDIRECTIONAL ;
@@ -136,7 +137,8 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
136
137
prange -> dma_addr [gpuidx ] = addr ;
137
138
}
138
139
139
- for (i = 0 ; i < prange -> npages ; i ++ ) {
140
+ addr += offset ;
141
+ for (i = 0 ; i < npages ; i ++ ) {
140
142
if (WARN_ONCE (addr [i ] && !dma_mapping_error (dev , addr [i ]),
141
143
"leaking dma mapping\n" ))
142
144
dma_unmap_page (dev , addr [i ], PAGE_SIZE , dir );
@@ -167,6 +169,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
167
169
168
170
static int
169
171
svm_range_dma_map (struct svm_range * prange , unsigned long * bitmap ,
172
+ unsigned long offset , unsigned long npages ,
170
173
unsigned long * hmm_pfns )
171
174
{
172
175
struct kfd_process * p ;
@@ -187,7 +190,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
187
190
}
188
191
adev = (struct amdgpu_device * )pdd -> dev -> kgd ;
189
192
190
- r = svm_range_dma_map_dev (adev , prange , hmm_pfns , gpuidx );
193
+ r = svm_range_dma_map_dev (adev , prange , offset , npages ,
194
+ hmm_pfns , gpuidx );
191
195
if (r )
192
196
break ;
193
197
}
@@ -1088,11 +1092,6 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,
1088
1092
pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0 ;
1089
1093
1090
1094
pte_flags |= amdgpu_gem_va_map_flags (adev , mapping_flags );
1091
-
1092
- pr_debug ("svms 0x%p [0x%lx 0x%lx] vram %d PTE 0x%llx mapping 0x%x\n" ,
1093
- prange -> svms , prange -> start , prange -> last ,
1094
- (domain == SVM_RANGE_VRAM_DOMAIN ) ? 1 :0 , pte_flags , mapping_flags );
1095
-
1096
1095
return pte_flags ;
1097
1096
}
1098
1097
@@ -1156,7 +1155,8 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
1156
1155
1157
1156
static int
1158
1157
svm_range_map_to_gpu (struct amdgpu_device * adev , struct amdgpu_vm * vm ,
1159
- struct svm_range * prange , dma_addr_t * dma_addr ,
1158
+ struct svm_range * prange , unsigned long offset ,
1159
+ unsigned long npages , bool readonly , dma_addr_t * dma_addr ,
1160
1160
struct amdgpu_device * bo_adev , struct dma_fence * * fence )
1161
1161
{
1162
1162
struct amdgpu_bo_va bo_va ;
@@ -1167,14 +1167,15 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1167
1167
int r = 0 ;
1168
1168
int64_t i ;
1169
1169
1170
- pr_debug ("svms 0x%p [0x%lx 0x%lx]\n" , prange -> svms , prange -> start ,
1171
- prange -> last );
1170
+ last_start = prange -> start + offset ;
1171
+
1172
+ pr_debug ("svms 0x%p [0x%lx 0x%lx] readonly %d\n" , prange -> svms ,
1173
+ last_start , last_start + npages - 1 , readonly );
1172
1174
1173
1175
if (prange -> svm_bo && prange -> ttm_res )
1174
1176
bo_va .is_xgmi = amdgpu_xgmi_same_hive (adev , bo_adev );
1175
1177
1176
- last_start = prange -> start ;
1177
- for (i = 0 ; i < prange -> npages ; i ++ ) {
1178
+ for (i = offset ; i < offset + npages ; i ++ ) {
1178
1179
last_domain = dma_addr [i ] & SVM_RANGE_VRAM_DOMAIN ;
1179
1180
dma_addr [i ] &= ~SVM_RANGE_VRAM_DOMAIN ;
1180
1181
if ((prange -> start + i ) < prange -> last &&
@@ -1183,13 +1184,21 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1183
1184
1184
1185
pr_debug ("Mapping range [0x%lx 0x%llx] on domain: %s\n" ,
1185
1186
last_start , prange -> start + i , last_domain ? "GPU" : "CPU" );
1187
+
1186
1188
pte_flags = svm_range_get_pte_flags (adev , prange , last_domain );
1187
- r = amdgpu_vm_bo_update_mapping (adev , bo_adev , vm , false, false, NULL ,
1188
- last_start ,
1189
+ if (readonly )
1190
+ pte_flags &= ~AMDGPU_PTE_WRITEABLE ;
1191
+
1192
+ pr_debug ("svms 0x%p map [0x%lx 0x%llx] vram %d PTE 0x%llx\n" ,
1193
+ prange -> svms , last_start , prange -> start + i ,
1194
+ (last_domain == SVM_RANGE_VRAM_DOMAIN ) ? 1 : 0 ,
1195
+ pte_flags );
1196
+
1197
+ r = amdgpu_vm_bo_update_mapping (adev , bo_adev , vm , false, false,
1198
+ NULL , last_start ,
1189
1199
prange -> start + i , pte_flags ,
1190
1200
last_start - prange -> start ,
1191
- NULL ,
1192
- dma_addr ,
1201
+ NULL , dma_addr ,
1193
1202
& vm -> last_update ,
1194
1203
& table_freed );
1195
1204
if (r ) {
@@ -1220,8 +1229,10 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
1220
1229
return r ;
1221
1230
}
1222
1231
1223
- static int svm_range_map_to_gpus (struct svm_range * prange ,
1224
- unsigned long * bitmap , bool wait )
1232
+ static int
1233
+ svm_range_map_to_gpus (struct svm_range * prange , unsigned long offset ,
1234
+ unsigned long npages , bool readonly ,
1235
+ unsigned long * bitmap , bool wait )
1225
1236
{
1226
1237
struct kfd_process_device * pdd ;
1227
1238
struct amdgpu_device * bo_adev ;
@@ -1257,7 +1268,8 @@ static int svm_range_map_to_gpus(struct svm_range *prange,
1257
1268
}
1258
1269
1259
1270
r = svm_range_map_to_gpu (adev , drm_priv_to_vm (pdd -> drm_priv ),
1260
- prange , prange -> dma_addr [gpuidx ],
1271
+ prange , offset , npages , readonly ,
1272
+ prange -> dma_addr [gpuidx ],
1261
1273
bo_adev , wait ? & fence : NULL );
1262
1274
if (r )
1263
1275
break ;
@@ -1390,7 +1402,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
1390
1402
int32_t gpuidx , bool intr , bool wait )
1391
1403
{
1392
1404
struct svm_validate_context ctx ;
1393
- struct hmm_range * hmm_range ;
1405
+ unsigned long start , end , addr ;
1394
1406
struct kfd_process * p ;
1395
1407
void * owner ;
1396
1408
int32_t idx ;
@@ -1448,40 +1460,66 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
1448
1460
break ;
1449
1461
}
1450
1462
}
1451
- r = amdgpu_hmm_range_get_pages (& prange -> notifier , mm , NULL ,
1452
- prange -> start << PAGE_SHIFT ,
1453
- prange -> npages , & hmm_range ,
1454
- false, true, owner );
1455
- if (r ) {
1456
- pr_debug ("failed %d to get svm range pages\n" , r );
1457
- goto unreserve_out ;
1458
- }
1459
1463
1460
- r = svm_range_dma_map (prange , ctx .bitmap ,
1461
- hmm_range -> hmm_pfns );
1462
- if (r ) {
1463
- pr_debug ("failed %d to dma map range\n" , r );
1464
- goto unreserve_out ;
1465
- }
1464
+ start = prange -> start << PAGE_SHIFT ;
1465
+ end = (prange -> last + 1 ) << PAGE_SHIFT ;
1466
+ for (addr = start ; addr < end && !r ; ) {
1467
+ struct hmm_range * hmm_range ;
1468
+ struct vm_area_struct * vma ;
1469
+ unsigned long next ;
1470
+ unsigned long offset ;
1471
+ unsigned long npages ;
1472
+ bool readonly ;
1466
1473
1467
- prange -> validated_once = true;
1474
+ vma = find_vma (mm , addr );
1475
+ if (!vma || addr < vma -> vm_start ) {
1476
+ r = - EFAULT ;
1477
+ goto unreserve_out ;
1478
+ }
1479
+ readonly = !(vma -> vm_flags & VM_WRITE );
1468
1480
1469
- svm_range_lock (prange );
1470
- if (amdgpu_hmm_range_get_pages_done (hmm_range )) {
1471
- pr_debug ("hmm update the range, need validate again\n" );
1472
- r = - EAGAIN ;
1473
- goto unlock_out ;
1474
- }
1475
- if (!list_empty (& prange -> child_list )) {
1476
- pr_debug ("range split by unmap in parallel, validate again\n" );
1477
- r = - EAGAIN ;
1478
- goto unlock_out ;
1479
- }
1481
+ next = min (vma -> vm_end , end );
1482
+ npages = (next - addr ) >> PAGE_SHIFT ;
1483
+ r = amdgpu_hmm_range_get_pages (& prange -> notifier , mm , NULL ,
1484
+ addr , npages , & hmm_range ,
1485
+ readonly , true, owner );
1486
+ if (r ) {
1487
+ pr_debug ("failed %d to get svm range pages\n" , r );
1488
+ goto unreserve_out ;
1489
+ }
1480
1490
1481
- r = svm_range_map_to_gpus (prange , ctx .bitmap , wait );
1491
+ offset = (addr - start ) >> PAGE_SHIFT ;
1492
+ r = svm_range_dma_map (prange , ctx .bitmap , offset , npages ,
1493
+ hmm_range -> hmm_pfns );
1494
+ if (r ) {
1495
+ pr_debug ("failed %d to dma map range\n" , r );
1496
+ goto unreserve_out ;
1497
+ }
1498
+
1499
+ svm_range_lock (prange );
1500
+ if (amdgpu_hmm_range_get_pages_done (hmm_range )) {
1501
+ pr_debug ("hmm update the range, need validate again\n" );
1502
+ r = - EAGAIN ;
1503
+ goto unlock_out ;
1504
+ }
1505
+ if (!list_empty (& prange -> child_list )) {
1506
+ pr_debug ("range split by unmap in parallel, validate again\n" );
1507
+ r = - EAGAIN ;
1508
+ goto unlock_out ;
1509
+ }
1510
+
1511
+ r = svm_range_map_to_gpus (prange , offset , npages , readonly ,
1512
+ ctx .bitmap , wait );
1482
1513
1483
1514
unlock_out :
1484
- svm_range_unlock (prange );
1515
+ svm_range_unlock (prange );
1516
+
1517
+ addr = next ;
1518
+ }
1519
+
1520
+ if (addr == end )
1521
+ prange -> validated_once = true;
1522
+
1485
1523
unreserve_out :
1486
1524
svm_range_unreserve_bos (& ctx );
1487
1525
0 commit comments