@@ -23,18 +23,25 @@ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
2323static bool tid_rb_invalidate (struct mmu_interval_notifier * mni ,
2424 const struct mmu_notifier_range * range ,
2525 unsigned long cur_seq );
26+ static bool tid_cover_invalidate (struct mmu_interval_notifier * mni ,
27+ const struct mmu_notifier_range * range ,
28+ unsigned long cur_seq );
2629static int program_rcvarray (struct hfi1_filedata * fd , struct tid_user_buf * ,
2730 struct tid_group * grp ,
2831 unsigned int start , u16 count ,
2932 u32 * tidlist , unsigned int * tididx ,
3033 unsigned int * pmapped );
31- static int unprogram_rcvarray (struct hfi1_filedata * fd , u32 tidinfo ,
32- struct tid_group * * grp );
34+ static int unprogram_rcvarray (struct hfi1_filedata * fd , u32 tidinfo );
35+ static void __clear_tid_node (struct hfi1_filedata * fd ,
36+ struct tid_rb_node * node );
3337static void clear_tid_node (struct hfi1_filedata * fd , struct tid_rb_node * node );
3438
3539static const struct mmu_interval_notifier_ops tid_mn_ops = {
3640 .invalidate = tid_rb_invalidate ,
3741};
42+ static const struct mmu_interval_notifier_ops tid_cover_ops = {
43+ .invalidate = tid_cover_invalidate ,
44+ };
3845
3946/*
4047 * Initialize context and file private data needed for Expected
@@ -253,53 +260,65 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
253260 tididx = 0 , mapped , mapped_pages = 0 ;
254261 u32 * tidlist = NULL ;
255262 struct tid_user_buf * tidbuf ;
263+ unsigned long mmu_seq = 0 ;
256264
257265 if (!PAGE_ALIGNED (tinfo -> vaddr ))
258266 return - EINVAL ;
267+ if (tinfo -> length == 0 )
268+ return - EINVAL ;
259269
260270 tidbuf = kzalloc (sizeof (* tidbuf ), GFP_KERNEL );
261271 if (!tidbuf )
262272 return - ENOMEM ;
263273
274+ mutex_init (& tidbuf -> cover_mutex );
264275 tidbuf -> vaddr = tinfo -> vaddr ;
265276 tidbuf -> length = tinfo -> length ;
266277 tidbuf -> psets = kcalloc (uctxt -> expected_count , sizeof (* tidbuf -> psets ),
267278 GFP_KERNEL );
268279 if (!tidbuf -> psets ) {
269- kfree (tidbuf );
270- return - ENOMEM ;
280+ ret = - ENOMEM ;
281+ goto fail_release_mem ;
282+ }
283+
284+ if (fd -> use_mn ) {
285+ ret = mmu_interval_notifier_insert (
286+ & tidbuf -> notifier , current -> mm ,
287+ tidbuf -> vaddr , tidbuf -> npages * PAGE_SIZE ,
288+ & tid_cover_ops );
289+ if (ret )
290+ goto fail_release_mem ;
291+ mmu_seq = mmu_interval_read_begin (& tidbuf -> notifier );
271292 }
272293
273294 pinned = pin_rcv_pages (fd , tidbuf );
274295 if (pinned <= 0 ) {
275- kfree (tidbuf -> psets );
276- kfree (tidbuf );
277- return pinned ;
296+ ret = (pinned < 0 ) ? pinned : - ENOSPC ;
297+ goto fail_unpin ;
278298 }
279299
280300 /* Find sets of physically contiguous pages */
281301 tidbuf -> n_psets = find_phys_blocks (tidbuf , pinned );
282302
283- /*
284- * We don't need to access this under a lock since tid_used is per
285- * process and the same process cannot be in hfi1_user_exp_rcv_clear()
286- * and hfi1_user_exp_rcv_setup() at the same time.
287- */
303+ /* Reserve the number of expected tids to be used. */
288304 spin_lock (& fd -> tid_lock );
289305 if (fd -> tid_used + tidbuf -> n_psets > fd -> tid_limit )
290306 pageset_count = fd -> tid_limit - fd -> tid_used ;
291307 else
292308 pageset_count = tidbuf -> n_psets ;
309+ fd -> tid_used += pageset_count ;
293310 spin_unlock (& fd -> tid_lock );
294311
295- if (!pageset_count )
296- goto bail ;
312+ if (!pageset_count ) {
313+ ret = - ENOSPC ;
314+ goto fail_unreserve ;
315+ }
297316
298317 ngroups = pageset_count / dd -> rcv_entries .group_size ;
299318 tidlist = kcalloc (pageset_count , sizeof (* tidlist ), GFP_KERNEL );
300319 if (!tidlist ) {
301320 ret = - ENOMEM ;
302- goto nomem ;
321+ goto fail_unreserve ;
303322 }
304323
305324 tididx = 0 ;
@@ -395,43 +414,78 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
395414 }
396415unlock :
397416 mutex_unlock (& uctxt -> exp_mutex );
398- nomem :
399417 hfi1_cdbg (TID , "total mapped: tidpairs:%u pages:%u (%d)" , tididx ,
400418 mapped_pages , ret );
401- if (tididx ) {
402- spin_lock (& fd -> tid_lock );
403- fd -> tid_used += tididx ;
404- spin_unlock (& fd -> tid_lock );
405- tinfo -> tidcnt = tididx ;
406- tinfo -> length = mapped_pages * PAGE_SIZE ;
407-
408- if (copy_to_user (u64_to_user_ptr (tinfo -> tidlist ),
409- tidlist , sizeof (tidlist [0 ]) * tididx )) {
410- /*
411- * On failure to copy to the user level, we need to undo
412- * everything done so far so we don't leak resources.
413- */
414- tinfo -> tidlist = (unsigned long )& tidlist ;
415- hfi1_user_exp_rcv_clear (fd , tinfo );
416- tinfo -> tidlist = 0 ;
417- ret = - EFAULT ;
418- goto bail ;
419+
420+ /* fail if nothing was programmed, set error if none provided */
421+ if (tididx == 0 ) {
422+ if (ret >= 0 )
423+ ret = - ENOSPC ;
424+ goto fail_unreserve ;
425+ }
426+
427+ /* adjust reserved tid_used to actual count */
428+ spin_lock (& fd -> tid_lock );
429+ fd -> tid_used -= pageset_count - tididx ;
430+ spin_unlock (& fd -> tid_lock );
431+
432+ /* unpin all pages not covered by a TID */
433+ unpin_rcv_pages (fd , tidbuf , NULL , mapped_pages , pinned - mapped_pages ,
434+ false);
435+
436+ if (fd -> use_mn ) {
437+ /* check for an invalidate during setup */
438+ bool fail = false;
439+
440+ mutex_lock (& tidbuf -> cover_mutex );
441+ fail = mmu_interval_read_retry (& tidbuf -> notifier , mmu_seq );
442+ mutex_unlock (& tidbuf -> cover_mutex );
443+
444+ if (fail ) {
445+ ret = - EBUSY ;
446+ goto fail_unprogram ;
419447 }
420448 }
421449
422- /*
423- * If not everything was mapped (due to insufficient RcvArray entries,
424- * for example), unpin all unmapped pages so we can pin them nex time.
425- */
426- if (mapped_pages != pinned )
427- unpin_rcv_pages (fd , tidbuf , NULL , mapped_pages ,
428- (pinned - mapped_pages ), false);
429- bail :
450+ tinfo -> tidcnt = tididx ;
451+ tinfo -> length = mapped_pages * PAGE_SIZE ;
452+
453+ if (copy_to_user (u64_to_user_ptr (tinfo -> tidlist ),
454+ tidlist , sizeof (tidlist [0 ]) * tididx )) {
455+ ret = - EFAULT ;
456+ goto fail_unprogram ;
457+ }
458+
459+ if (fd -> use_mn )
460+ mmu_interval_notifier_remove (& tidbuf -> notifier );
461+ kfree (tidbuf -> pages );
430462 kfree (tidbuf -> psets );
463+ kfree (tidbuf );
431464 kfree (tidlist );
465+ return 0 ;
466+
467+ fail_unprogram :
468+ /* unprogram, unmap, and unpin all allocated TIDs */
469+ tinfo -> tidlist = (unsigned long )tidlist ;
470+ hfi1_user_exp_rcv_clear (fd , tinfo );
471+ tinfo -> tidlist = 0 ;
472+ pinned = 0 ; /* nothing left to unpin */
473+ pageset_count = 0 ; /* nothing left reserved */
474+ fail_unreserve :
475+ spin_lock (& fd -> tid_lock );
476+ fd -> tid_used -= pageset_count ;
477+ spin_unlock (& fd -> tid_lock );
478+ fail_unpin :
479+ if (fd -> use_mn )
480+ mmu_interval_notifier_remove (& tidbuf -> notifier );
481+ if (pinned > 0 )
482+ unpin_rcv_pages (fd , tidbuf , NULL , 0 , pinned , false);
483+ fail_release_mem :
432484 kfree (tidbuf -> pages );
485+ kfree (tidbuf -> psets );
433486 kfree (tidbuf );
434- return ret > 0 ? 0 : ret ;
487+ kfree (tidlist );
488+ return ret ;
435489}
436490
437491int hfi1_user_exp_rcv_clear (struct hfi1_filedata * fd ,
@@ -452,7 +506,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
452506
453507 mutex_lock (& uctxt -> exp_mutex );
454508 for (tididx = 0 ; tididx < tinfo -> tidcnt ; tididx ++ ) {
455- ret = unprogram_rcvarray (fd , tidinfo [tididx ], NULL );
509+ ret = unprogram_rcvarray (fd , tidinfo [tididx ]);
456510 if (ret ) {
457511 hfi1_cdbg (TID , "Failed to unprogram rcv array %d" ,
458512 ret );
@@ -706,6 +760,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
706760 }
707761
708762 node -> fdata = fd ;
763+ mutex_init (& node -> invalidate_mutex );
709764 node -> phys = page_to_phys (pages [0 ]);
710765 node -> npages = npages ;
711766 node -> rcventry = rcventry ;
@@ -721,11 +776,6 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
721776 & tid_mn_ops );
722777 if (ret )
723778 goto out_unmap ;
724- /*
725- * FIXME: This is in the wrong order, the notifier should be
726- * established before the pages are pinned by pin_rcv_pages.
727- */
728- mmu_interval_read_begin (& node -> notifier );
729779 }
730780 fd -> entry_to_rb [node -> rcventry - uctxt -> expected_base ] = node ;
731781
@@ -745,8 +795,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
745795 return - EFAULT ;
746796}
747797
748- static int unprogram_rcvarray (struct hfi1_filedata * fd , u32 tidinfo ,
749- struct tid_group * * grp )
798+ static int unprogram_rcvarray (struct hfi1_filedata * fd , u32 tidinfo )
750799{
751800 struct hfi1_ctxtdata * uctxt = fd -> uctxt ;
752801 struct hfi1_devdata * dd = uctxt -> dd ;
@@ -769,33 +818,41 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
769818 if (!node || node -> rcventry != (uctxt -> expected_base + rcventry ))
770819 return - EBADF ;
771820
772- if (grp )
773- * grp = node -> grp ;
774-
775821 if (fd -> use_mn )
776822 mmu_interval_notifier_remove (& node -> notifier );
777823 cacheless_tid_rb_remove (fd , node );
778824
779825 return 0 ;
780826}
781827
782- static void clear_tid_node (struct hfi1_filedata * fd , struct tid_rb_node * node )
828+ static void __clear_tid_node (struct hfi1_filedata * fd , struct tid_rb_node * node )
783829{
784830 struct hfi1_ctxtdata * uctxt = fd -> uctxt ;
785831 struct hfi1_devdata * dd = uctxt -> dd ;
786832
833+ mutex_lock (& node -> invalidate_mutex );
834+ if (node -> freed )
835+ goto done ;
836+ node -> freed = true;
837+
787838 trace_hfi1_exp_tid_unreg (uctxt -> ctxt , fd -> subctxt , node -> rcventry ,
788839 node -> npages ,
789840 node -> notifier .interval_tree .start , node -> phys ,
790841 node -> dma_addr );
791842
792- /*
793- * Make sure device has seen the write before we unpin the
794- * pages.
795- */
843+ /* Make sure device has seen the write before pages are unpinned */
796844 hfi1_put_tid (dd , node -> rcventry , PT_INVALID_FLUSH , 0 , 0 );
797845
798846 unpin_rcv_pages (fd , NULL , node , 0 , node -> npages , true);
847+ done :
848+ mutex_unlock (& node -> invalidate_mutex );
849+ }
850+
851+ static void clear_tid_node (struct hfi1_filedata * fd , struct tid_rb_node * node )
852+ {
853+ struct hfi1_ctxtdata * uctxt = fd -> uctxt ;
854+
855+ __clear_tid_node (fd , node );
799856
800857 node -> grp -> used -- ;
801858 node -> grp -> map &= ~(1 << (node -> rcventry - node -> grp -> base ));
@@ -854,10 +911,16 @@ static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
854911 if (node -> freed )
855912 return true;
856913
914+ /* take action only if unmapping */
915+ if (range -> event != MMU_NOTIFY_UNMAP )
916+ return true;
917+
857918 trace_hfi1_exp_tid_inval (uctxt -> ctxt , fdata -> subctxt ,
858919 node -> notifier .interval_tree .start ,
859920 node -> rcventry , node -> npages , node -> dma_addr );
860- node -> freed = true;
921+
922+ /* clear the hardware rcvarray entry */
923+ __clear_tid_node (fdata , node );
861924
862925 spin_lock (& fdata -> invalid_lock );
863926 if (fdata -> invalid_tid_idx < uctxt -> expected_count ) {
@@ -887,6 +950,23 @@ static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
887950 return true;
888951}
889952
953+ static bool tid_cover_invalidate (struct mmu_interval_notifier * mni ,
954+ const struct mmu_notifier_range * range ,
955+ unsigned long cur_seq )
956+ {
957+ struct tid_user_buf * tidbuf =
958+ container_of (mni , struct tid_user_buf , notifier );
959+
960+ /* take action only if unmapping */
961+ if (range -> event == MMU_NOTIFY_UNMAP ) {
962+ mutex_lock (& tidbuf -> cover_mutex );
963+ mmu_interval_set_seq (mni , cur_seq );
964+ mutex_unlock (& tidbuf -> cover_mutex );
965+ }
966+
967+ return true;
968+ }
969+
890970static void cacheless_tid_rb_remove (struct hfi1_filedata * fdata ,
891971 struct tid_rb_node * tnode )
892972{
0 commit comments