@@ -1002,6 +1002,7 @@ struct wait_page_queue {
1002
1002
1003
1003
static int wake_page_function (wait_queue_entry_t * wait , unsigned mode , int sync , void * arg )
1004
1004
{
1005
+ int ret ;
1005
1006
struct wait_page_key * key = arg ;
1006
1007
struct wait_page_queue * wait_page
1007
1008
= container_of (wait , struct wait_page_queue , wait );
@@ -1014,17 +1015,40 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
1014
1015
return 0 ;
1015
1016
1016
1017
/*
1017
- * Stop walking if it's locked.
1018
- * Is this safe if put_and_wait_on_page_locked() is in use?
1019
- * Yes: the waker must hold a reference to this page, and if PG_locked
1020
- * has now already been set by another task, that task must also hold
1021
- * a reference to the *same usage* of this page; so there is no need
1022
- * to walk on to wake even the put_and_wait_on_page_locked() callers.
1018
+ * If it's an exclusive wait, we get the bit for it, and
1019
+ * stop walking if we can't.
1020
+ *
1021
+ * If it's a non-exclusive wait, then the fact that this
1022
+ * wake function was called means that the bit already
1023
+ * was cleared, and we don't care if somebody then
1024
+ * re-took it.
1023
1025
*/
1024
- if (test_bit (key -> bit_nr , & key -> page -> flags ))
1025
- return -1 ;
1026
+ ret = 0 ;
1027
+ if (wait -> flags & WQ_FLAG_EXCLUSIVE ) {
1028
+ if (test_and_set_bit (key -> bit_nr , & key -> page -> flags ))
1029
+ return -1 ;
1030
+ ret = 1 ;
1031
+ }
1032
+ wait -> flags |= WQ_FLAG_WOKEN ;
1026
1033
1027
- return autoremove_wake_function (wait , mode , sync , key );
1034
+ wake_up_state (wait -> private , mode );
1035
+
1036
+ /*
1037
+ * Ok, we have successfully done what we're waiting for,
1038
+ * and we can unconditionally remove the wait entry.
1039
+ *
1040
+ * Note that this has to be the absolute last thing we do,
1041
+ * since after list_del_init(&wait->entry) the wait entry
1042
+ * might be de-allocated and the process might even have
1043
+ * exited.
1044
+ *
1045
+ * We _really_ should have a "list_del_init_careful()" to
1046
+ * properly pair with the unlocked "list_empty_careful()"
1047
+ * in finish_wait().
1048
+ */
1049
+ smp_mb ();
1050
+ list_del_init (& wait -> entry );
1051
+ return ret ;
1028
1052
}
1029
1053
1030
1054
static void wake_up_page_bit (struct page * page , int bit_nr )
@@ -1103,16 +1127,31 @@ enum behavior {
1103
1127
*/
1104
1128
};
1105
1129
1130
+ /*
1131
+ * Attempt to check (or get) the page bit, and mark the
1132
+ * waiter woken if successful.
1133
+ */
1134
+ static inline bool trylock_page_bit_common (struct page * page , int bit_nr ,
1135
+ struct wait_queue_entry * wait )
1136
+ {
1137
+ if (wait -> flags & WQ_FLAG_EXCLUSIVE ) {
1138
+ if (test_and_set_bit (bit_nr , & page -> flags ))
1139
+ return false;
1140
+ } else if (test_bit (bit_nr , & page -> flags ))
1141
+ return false;
1142
+
1143
+ wait -> flags |= WQ_FLAG_WOKEN ;
1144
+ return true;
1145
+ }
1146
+
1106
1147
static inline int wait_on_page_bit_common (wait_queue_head_t * q ,
1107
1148
struct page * page , int bit_nr , int state , enum behavior behavior )
1108
1149
{
1109
1150
struct wait_page_queue wait_page ;
1110
1151
wait_queue_entry_t * wait = & wait_page .wait ;
1111
- bool bit_is_set ;
1112
1152
bool thrashing = false;
1113
1153
bool delayacct = false;
1114
1154
unsigned long pflags ;
1115
- int ret = 0 ;
1116
1155
1117
1156
if (bit_nr == PG_locked &&
1118
1157
!PageUptodate (page ) && PageWorkingset (page )) {
@@ -1130,48 +1169,47 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
1130
1169
wait_page .page = page ;
1131
1170
wait_page .bit_nr = bit_nr ;
1132
1171
1133
- for (;;) {
1134
- spin_lock_irq (& q -> lock );
1172
+ /*
1173
+ * Do one last check whether we can get the
1174
+ * page bit synchronously.
1175
+ *
1176
+ * Do the SetPageWaiters() marking before that
1177
+ * to let any waker we _just_ missed know they
1178
+ * need to wake us up (otherwise they'll never
1179
+ * even go to the slow case that looks at the
1180
+ * page queue), and add ourselves to the wait
1181
+ * queue if we need to sleep.
1182
+ *
1183
+ * This part needs to be done under the queue
1184
+ * lock to avoid races.
1185
+ */
1186
+ spin_lock_irq (& q -> lock );
1187
+ SetPageWaiters (page );
1188
+ if (!trylock_page_bit_common (page , bit_nr , wait ))
1189
+ __add_wait_queue_entry_tail (q , wait );
1190
+ spin_unlock_irq (& q -> lock );
1135
1191
1136
- if (likely (list_empty (& wait -> entry ))) {
1137
- __add_wait_queue_entry_tail (q , wait );
1138
- SetPageWaiters (page );
1139
- }
1192
+ /*
1193
+ * From now on, all the logic will be based on
1194
+ * the WQ_FLAG_WOKEN flag, and the and the page
1195
+ * bit testing (and setting) will be - or has
1196
+ * already been - done by the wake function.
1197
+ *
1198
+ * We can drop our reference to the page.
1199
+ */
1200
+ if (behavior == DROP )
1201
+ put_page (page );
1140
1202
1203
+ for (;;) {
1141
1204
set_current_state (state );
1142
1205
1143
- spin_unlock_irq (& q -> lock );
1144
-
1145
- bit_is_set = test_bit (bit_nr , & page -> flags );
1146
- if (behavior == DROP )
1147
- put_page (page );
1148
-
1149
- if (likely (bit_is_set ))
1150
- io_schedule ();
1151
-
1152
- if (behavior == EXCLUSIVE ) {
1153
- if (!test_and_set_bit_lock (bit_nr , & page -> flags ))
1154
- break ;
1155
- } else if (behavior == SHARED ) {
1156
- if (!test_bit (bit_nr , & page -> flags ))
1157
- break ;
1158
- }
1159
-
1160
- if (signal_pending_state (state , current )) {
1161
- ret = - EINTR ;
1206
+ if (signal_pending_state (state , current ))
1162
1207
break ;
1163
- }
1164
1208
1165
- if (behavior == DROP ) {
1166
- /*
1167
- * We can no longer safely access page->flags:
1168
- * even if CONFIG_MEMORY_HOTREMOVE is not enabled,
1169
- * there is a risk of waiting forever on a page reused
1170
- * for something that keeps it locked indefinitely.
1171
- * But best check for -EINTR above before breaking.
1172
- */
1209
+ if (wait -> flags & WQ_FLAG_WOKEN )
1173
1210
break ;
1174
- }
1211
+
1212
+ io_schedule ();
1175
1213
}
1176
1214
1177
1215
finish_wait (q , wait );
@@ -1190,7 +1228,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
1190
1228
* bother with signals either.
1191
1229
*/
1192
1230
1193
- return ret ;
1231
+ return wait -> flags & WQ_FLAG_WOKEN ? 0 : - EINTR ;
1194
1232
}
1195
1233
1196
1234
void wait_on_page_bit (struct page * page , int bit_nr )
0 commit comments