@@ -1167,6 +1167,17 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn)
1167
1167
kfd_unref_process (container_of (mn , struct kfd_process , mmu_notifier ));
1168
1168
}
1169
1169
1170
+ static void kfd_process_notifier_release_internal (struct kfd_process * p )
1171
+ {
1172
+ cancel_delayed_work_sync (& p -> eviction_work );
1173
+ cancel_delayed_work_sync (& p -> restore_work );
1174
+
1175
+ /* Indicate to other users that MM is no longer valid */
1176
+ p -> mm = NULL ;
1177
+
1178
+ mmu_notifier_put (& p -> mmu_notifier );
1179
+ }
1180
+
1170
1181
static void kfd_process_notifier_release (struct mmu_notifier * mn ,
1171
1182
struct mm_struct * mm )
1172
1183
{
@@ -1181,17 +1192,22 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
1181
1192
return ;
1182
1193
1183
1194
mutex_lock (& kfd_processes_mutex );
1195
+ /*
1196
+ * Do early return if table is empty.
1197
+ *
1198
+ * This could potentially happen if this function is called concurrently
1199
+ * by mmu_notifier and by kfd_cleanup_pocesses.
1200
+ *
1201
+ */
1202
+ if (hash_empty (kfd_processes_table )) {
1203
+ mutex_unlock (& kfd_processes_mutex );
1204
+ return ;
1205
+ }
1184
1206
hash_del_rcu (& p -> kfd_processes );
1185
1207
mutex_unlock (& kfd_processes_mutex );
1186
1208
synchronize_srcu (& kfd_processes_srcu );
1187
1209
1188
- cancel_delayed_work_sync (& p -> eviction_work );
1189
- cancel_delayed_work_sync (& p -> restore_work );
1190
-
1191
- /* Indicate to other users that MM is no longer valid */
1192
- p -> mm = NULL ;
1193
-
1194
- mmu_notifier_put (& p -> mmu_notifier );
1210
+ kfd_process_notifier_release_internal (p );
1195
1211
}
1196
1212
1197
1213
static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
@@ -1200,6 +1216,43 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
1200
1216
.free_notifier = kfd_process_free_notifier ,
1201
1217
};
1202
1218
1219
+ /*
1220
+ * This code handles the case when driver is being unloaded before all
1221
+ * mm_struct are released. We need to safely free the kfd_process and
1222
+ * avoid race conditions with mmu_notifier that might try to free them.
1223
+ *
1224
+ */
1225
+ void kfd_cleanup_processes (void )
1226
+ {
1227
+ struct kfd_process * p ;
1228
+ struct hlist_node * p_temp ;
1229
+ unsigned int temp ;
1230
+ HLIST_HEAD (cleanup_list );
1231
+
1232
+ /*
1233
+ * Move all remaining kfd_process from the process table to a
1234
+ * temp list for processing. Once done, callback from mmu_notifier
1235
+ * release will not see the kfd_process in the table and do early return,
1236
+ * avoiding double free issues.
1237
+ */
1238
+ mutex_lock (& kfd_processes_mutex );
1239
+ hash_for_each_safe (kfd_processes_table , temp , p_temp , p , kfd_processes ) {
1240
+ hash_del_rcu (& p -> kfd_processes );
1241
+ synchronize_srcu (& kfd_processes_srcu );
1242
+ hlist_add_head (& p -> kfd_processes , & cleanup_list );
1243
+ }
1244
+ mutex_unlock (& kfd_processes_mutex );
1245
+
1246
+ hlist_for_each_entry_safe (p , p_temp , & cleanup_list , kfd_processes )
1247
+ kfd_process_notifier_release_internal (p );
1248
+
1249
+ /*
1250
+ * Ensures that all outstanding free_notifier get called, triggering
1251
+ * the release of the kfd_process struct.
1252
+ */
1253
+ mmu_notifier_synchronize ();
1254
+ }
1255
+
1203
1256
static int kfd_process_init_cwsr_apu (struct kfd_process * p , struct file * filep )
1204
1257
{
1205
1258
unsigned long offset ;
0 commit comments