@@ -58,6 +58,7 @@ struct gfs2_glock_iter {
5858typedef void (* glock_examiner ) (struct gfs2_glock * gl );
5959
6060static void do_xmote (struct gfs2_glock * gl , struct gfs2_holder * gh , unsigned int target );
61+ static void __gfs2_glock_dq (struct gfs2_holder * gh );
6162
6263static struct dentry * gfs2_root ;
6364static struct workqueue_struct * glock_workqueue ;
@@ -197,6 +198,12 @@ static int demote_ok(const struct gfs2_glock *gl)
197198
198199 if (gl -> gl_state == LM_ST_UNLOCKED )
199200 return 0 ;
201+ /*
202+ * Note that demote_ok is used for the lru process of disposing of
203+ * glocks. For this purpose, we don't care if the glock's holders
204+ * have the HIF_MAY_DEMOTE flag set or not. If someone is using
205+ * them, don't demote.
206+ */
200207 if (!list_empty (& gl -> gl_holders ))
201208 return 0 ;
202209 if (glops -> go_demote_ok )
@@ -379,7 +386,7 @@ static void do_error(struct gfs2_glock *gl, const int ret)
379386 struct gfs2_holder * gh , * tmp ;
380387
381388 list_for_each_entry_safe (gh , tmp , & gl -> gl_holders , gh_list ) {
382- if (test_bit (HIF_HOLDER , & gh -> gh_iflags ))
389+ if (! test_bit (HIF_WAIT , & gh -> gh_iflags ))
383390 continue ;
384391 if (ret & LM_OUT_ERROR )
385392 gh -> gh_error = - EIO ;
@@ -393,6 +400,40 @@ static void do_error(struct gfs2_glock *gl, const int ret)
393400 }
394401}
395402
403+ /**
404+ * demote_incompat_holders - demote incompatible demoteable holders
405+ * @gl: the glock we want to promote
406+ * @new_gh: the new holder to be promoted
407+ */
408+ static void demote_incompat_holders (struct gfs2_glock * gl ,
409+ struct gfs2_holder * new_gh )
410+ {
411+ struct gfs2_holder * gh ;
412+
413+ /*
414+ * Demote incompatible holders before we make ourselves eligible.
415+ * (This holder may or may not allow auto-demoting, but we don't want
416+ * to demote the new holder before it's even granted.)
417+ */
418+ list_for_each_entry (gh , & gl -> gl_holders , gh_list ) {
419+ /*
420+ * Since holders are at the front of the list, we stop when we
421+ * find the first non-holder.
422+ */
423+ if (!test_bit (HIF_HOLDER , & gh -> gh_iflags ))
424+ return ;
425+ if (test_bit (HIF_MAY_DEMOTE , & gh -> gh_iflags ) &&
426+ !may_grant (gl , new_gh , gh )) {
427+ /*
428+ * We should not recurse into do_promote because
429+ * __gfs2_glock_dq only calls handle_callback,
430+ * gfs2_glock_add_to_lru and __gfs2_glock_queue_work.
431+ */
432+ __gfs2_glock_dq (gh );
433+ }
434+ }
435+ }
436+
396437/**
397438 * find_first_holder - find the first "holder" gh
398439 * @gl: the glock
@@ -411,6 +452,26 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
411452 return NULL ;
412453}
413454
455+ /**
456+ * find_first_strong_holder - find the first non-demoteable holder
457+ * @gl: the glock
458+ *
459+ * Find the first holder that doesn't have the HIF_MAY_DEMOTE flag set.
460+ */
461+ static inline struct gfs2_holder *
462+ find_first_strong_holder (struct gfs2_glock * gl )
463+ {
464+ struct gfs2_holder * gh ;
465+
466+ list_for_each_entry (gh , & gl -> gl_holders , gh_list ) {
467+ if (!test_bit (HIF_HOLDER , & gh -> gh_iflags ))
468+ return NULL ;
469+ if (!test_bit (HIF_MAY_DEMOTE , & gh -> gh_iflags ))
470+ return gh ;
471+ }
472+ return NULL ;
473+ }
474+
414475/**
415476 * do_promote - promote as many requests as possible on the current queue
416477 * @gl: The glock
@@ -425,14 +486,20 @@ __acquires(&gl->gl_lockref.lock)
425486{
426487 const struct gfs2_glock_operations * glops = gl -> gl_ops ;
427488 struct gfs2_holder * gh , * tmp , * first_gh ;
489+ bool incompat_holders_demoted = false;
428490 int ret ;
429491
430492restart :
431- first_gh = find_first_holder (gl );
493+ first_gh = find_first_strong_holder (gl );
432494 list_for_each_entry_safe (gh , tmp , & gl -> gl_holders , gh_list ) {
433- if (test_bit (HIF_HOLDER , & gh -> gh_iflags ))
495+ if (! test_bit (HIF_WAIT , & gh -> gh_iflags ))
434496 continue ;
435497 if (may_grant (gl , first_gh , gh )) {
498+ if (!incompat_holders_demoted ) {
499+ demote_incompat_holders (gl , first_gh );
500+ incompat_holders_demoted = true;
501+ first_gh = gh ;
502+ }
436503 if (gh -> gh_list .prev == & gl -> gl_holders &&
437504 glops -> go_lock ) {
438505 spin_unlock (& gl -> gl_lockref .lock );
@@ -458,6 +525,11 @@ __acquires(&gl->gl_lockref.lock)
458525 gfs2_holder_wake (gh );
459526 continue ;
460527 }
528+ /*
529+ * If we get here, it means we may not grant this holder for
530+ * some reason. If this holder is the head of the list, it
531+ * means we have a blocked holder at the head, so return 1.
532+ */
461533 if (gh -> gh_list .prev == & gl -> gl_holders )
462534 return 1 ;
463535 do_error (gl , 0 );
@@ -1372,7 +1444,7 @@ __acquires(&gl->gl_lockref.lock)
13721444 if (test_bit (GLF_LOCK , & gl -> gl_flags )) {
13731445 struct gfs2_holder * first_gh ;
13741446
1375- first_gh = find_first_holder (gl );
1447+ first_gh = find_first_strong_holder (gl );
13761448 try_futile = !may_grant (gl , first_gh , gh );
13771449 }
13781450 if (test_bit (GLF_INVALIDATE_IN_PROGRESS , & gl -> gl_flags ))
@@ -1381,7 +1453,8 @@ __acquires(&gl->gl_lockref.lock)
13811453
13821454 list_for_each_entry (gh2 , & gl -> gl_holders , gh_list ) {
13831455 if (unlikely (gh2 -> gh_owner_pid == gh -> gh_owner_pid &&
1384- (gh -> gh_gl -> gl_ops -> go_type != LM_TYPE_FLOCK )))
1456+ (gh -> gh_gl -> gl_ops -> go_type != LM_TYPE_FLOCK ) &&
1457+ !test_bit (HIF_MAY_DEMOTE , & gh2 -> gh_iflags )))
13851458 goto trap_recursive ;
13861459 if (try_futile &&
13871460 !(gh2 -> gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB ))) {
@@ -1477,51 +1550,83 @@ int gfs2_glock_poll(struct gfs2_holder *gh)
14771550 return test_bit (HIF_WAIT , & gh -> gh_iflags ) ? 0 : 1 ;
14781551}
14791552
1480- /**
1481- * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
1482- * @gh: the glock holder
1483- *
1484- */
1553+ static inline bool needs_demote ( struct gfs2_glock * gl )
1554+ {
1555+ return ( test_bit ( GLF_DEMOTE , & gl -> gl_flags ) ||
1556+ test_bit ( GLF_PENDING_DEMOTE , & gl -> gl_flags ));
1557+ }
14851558
1486- void gfs2_glock_dq (struct gfs2_holder * gh )
1559+ static void __gfs2_glock_dq (struct gfs2_holder * gh )
14871560{
14881561 struct gfs2_glock * gl = gh -> gh_gl ;
14891562 struct gfs2_sbd * sdp = gl -> gl_name .ln_sbd ;
14901563 unsigned delay = 0 ;
14911564 int fast_path = 0 ;
14921565
1493- spin_lock (& gl -> gl_lockref .lock );
14941566 /*
1495- * If we're in the process of file system withdraw, we cannot just
1496- * dequeue any glocks until our journal is recovered, lest we
1497- * introduce file system corruption. We need two exceptions to this
1498- * rule: We need to allow unlocking of nondisk glocks and the glock
1499- * for our own journal that needs recovery.
1567+ * This while loop is similar to function demote_incompat_holders:
1568+ * If the glock is due to be demoted (which may be from another node
1569+ * or even if this holder is GL_NOCACHE), the weak holders are
1570+ * demoted as well, allowing the glock to be demoted.
15001571 */
1501- if (test_bit (SDF_WITHDRAW_RECOVERY , & sdp -> sd_flags ) &&
1502- glock_blocked_by_withdraw (gl ) &&
1503- gh -> gh_gl != sdp -> sd_jinode_gl ) {
1504- sdp -> sd_glock_dqs_held ++ ;
1505- spin_unlock (& gl -> gl_lockref .lock );
1506- might_sleep ();
1507- wait_on_bit (& sdp -> sd_flags , SDF_WITHDRAW_RECOVERY ,
1508- TASK_UNINTERRUPTIBLE );
1509- spin_lock (& gl -> gl_lockref .lock );
1510- }
1511- if (gh -> gh_flags & GL_NOCACHE )
1512- handle_callback (gl , LM_ST_UNLOCKED , 0 , false);
1572+ while (gh ) {
1573+ /*
1574+ * If we're in the process of file system withdraw, we cannot
1575+ * just dequeue any glocks until our journal is recovered, lest
1576+ * we introduce file system corruption. We need two exceptions
1577+ * to this rule: We need to allow unlocking of nondisk glocks
1578+ * and the glock for our own journal that needs recovery.
1579+ */
1580+ if (test_bit (SDF_WITHDRAW_RECOVERY , & sdp -> sd_flags ) &&
1581+ glock_blocked_by_withdraw (gl ) &&
1582+ gh -> gh_gl != sdp -> sd_jinode_gl ) {
1583+ sdp -> sd_glock_dqs_held ++ ;
1584+ spin_unlock (& gl -> gl_lockref .lock );
1585+ might_sleep ();
1586+ wait_on_bit (& sdp -> sd_flags , SDF_WITHDRAW_RECOVERY ,
1587+ TASK_UNINTERRUPTIBLE );
1588+ spin_lock (& gl -> gl_lockref .lock );
1589+ }
1590+
1591+ /*
1592+ * This holder should not be cached, so mark it for demote.
1593+ * Note: this should be done before the check for needs_demote
1594+ * below.
1595+ */
1596+ if (gh -> gh_flags & GL_NOCACHE )
1597+ handle_callback (gl , LM_ST_UNLOCKED , 0 , false);
1598+
1599+ list_del_init (& gh -> gh_list );
1600+ clear_bit (HIF_HOLDER , & gh -> gh_iflags );
1601+ trace_gfs2_glock_queue (gh , 0 );
1602+
1603+ /*
1604+ * If there hasn't been a demote request we are done.
1605+ * (Let the remaining holders, if any, keep holding it.)
1606+ */
1607+ if (!needs_demote (gl )) {
1608+ if (list_empty (& gl -> gl_holders ))
1609+ fast_path = 1 ;
1610+ break ;
1611+ }
1612+ /*
1613+ * If we have another strong holder (we cannot auto-demote)
1614+ * we are done. It keeps holding it until it is done.
1615+ */
1616+ if (find_first_strong_holder (gl ))
1617+ break ;
15131618
1514- list_del_init (& gh -> gh_list );
1515- clear_bit (HIF_HOLDER , & gh -> gh_iflags );
1516- if (list_empty (& gl -> gl_holders ) &&
1517- !test_bit (GLF_PENDING_DEMOTE , & gl -> gl_flags ) &&
1518- !test_bit (GLF_DEMOTE , & gl -> gl_flags ))
1519- fast_path = 1 ;
1619+ /*
1620+ * If we have a weak holder at the head of the list, it
1621+ * (and all others like it) must be auto-demoted. If there
1622+ * are no more weak holders, we exit the while loop.
1623+ */
1624+ gh = find_first_holder (gl );
1625+ }
15201626
15211627 if (!test_bit (GLF_LFLUSH , & gl -> gl_flags ) && demote_ok (gl ))
15221628 gfs2_glock_add_to_lru (gl );
15231629
1524- trace_gfs2_glock_queue (gh , 0 );
15251630 if (unlikely (!fast_path )) {
15261631 gl -> gl_lockref .count ++ ;
15271632 if (test_bit (GLF_PENDING_DEMOTE , & gl -> gl_flags ) &&
@@ -1530,6 +1635,19 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
15301635 delay = gl -> gl_hold_time ;
15311636 __gfs2_glock_queue_work (gl , delay );
15321637 }
1638+ }
1639+
1640+ /**
1641+ * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
1642+ * @gh: the glock holder
1643+ *
1644+ */
1645+ void gfs2_glock_dq (struct gfs2_holder * gh )
1646+ {
1647+ struct gfs2_glock * gl = gh -> gh_gl ;
1648+
1649+ spin_lock (& gl -> gl_lockref .lock );
1650+ __gfs2_glock_dq (gh );
15331651 spin_unlock (& gl -> gl_lockref .lock );
15341652}
15351653
@@ -1692,6 +1810,7 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
16921810
16931811void gfs2_glock_cb (struct gfs2_glock * gl , unsigned int state )
16941812{
1813+ struct gfs2_holder mock_gh = { .gh_gl = gl , .gh_state = state , };
16951814 unsigned long delay = 0 ;
16961815 unsigned long holdtime ;
16971816 unsigned long now = jiffies ;
@@ -1706,6 +1825,28 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
17061825 if (test_bit (GLF_REPLY_PENDING , & gl -> gl_flags ))
17071826 delay = gl -> gl_hold_time ;
17081827 }
1828+ /*
1829+ * Note 1: We cannot call demote_incompat_holders from handle_callback
1830+ * or gfs2_set_demote due to recursion problems like: gfs2_glock_dq ->
1831+ * handle_callback -> demote_incompat_holders -> gfs2_glock_dq
1832+ * Plus, we only want to demote the holders if the request comes from
1833+ * a remote cluster node because local holder conflicts are resolved
1834+ * elsewhere.
1835+ *
1836+ * Note 2: if a remote node wants this glock in EX mode, lock_dlm will
1837+ * request that we set our state to UNLOCKED. Here we mock up a holder
1838+ * to make it look like someone wants the lock EX locally. Any SH
1839+ * and DF requests should be able to share the lock without demoting.
1840+ *
1841+ * Note 3: We only want to demote the demoteable holders when there
1842+ * are no more strong holders. The demoteable holders might as well
1843+ * keep the glock until the last strong holder is done with it.
1844+ */
1845+ if (!find_first_strong_holder (gl )) {
1846+ if (state == LM_ST_UNLOCKED )
1847+ mock_gh .gh_state = LM_ST_EXCLUSIVE ;
1848+ demote_incompat_holders (gl , & mock_gh );
1849+ }
17091850 handle_callback (gl , state , delay , true);
17101851 __gfs2_glock_queue_work (gl , delay );
17111852 spin_unlock (& gl -> gl_lockref .lock );
@@ -2095,6 +2236,8 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
20952236 * p ++ = 'H' ;
20962237 if (test_bit (HIF_WAIT , & iflags ))
20972238 * p ++ = 'W' ;
2239+ if (test_bit (HIF_MAY_DEMOTE , & iflags ))
2240+ * p ++ = 'D' ;
20982241 * p = 0 ;
20992242 return buf ;
21002243}
0 commit comments