@@ -242,16 +242,22 @@ void bch2_accounting_swab(struct bkey_s k)
242242 * p = swab64 (* p );
243243}
244244
245+ static inline void __accounting_to_replicas (struct bch_replicas_entry_v1 * r ,
246+ struct disk_accounting_pos acc )
247+ {
248+ unsafe_memcpy (r , & acc .replicas ,
249+ replicas_entry_bytes (& acc .replicas ),
250+ "variable length struct" );
251+ }
252+
245253static inline bool accounting_to_replicas (struct bch_replicas_entry_v1 * r , struct bpos p )
246254{
247255 struct disk_accounting_pos acc_k ;
248256 bpos_to_disk_accounting_pos (& acc_k , p );
249257
250258 switch (acc_k .type ) {
251259 case BCH_DISK_ACCOUNTING_replicas :
252- unsafe_memcpy (r , & acc_k .replicas ,
253- replicas_entry_bytes (& acc_k .replicas ),
254- "variable length struct" );
260+ __accounting_to_replicas (r , acc_k );
255261 return true;
256262 default :
257263 return false;
@@ -608,6 +614,81 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k)
608614 return ret ;
609615}
610616
617+ static int bch2_disk_accounting_validate_late (struct btree_trans * trans ,
618+ struct disk_accounting_pos acc ,
619+ u64 * v , unsigned nr )
620+ {
621+ struct bch_fs * c = trans -> c ;
622+ struct printbuf buf = PRINTBUF ;
623+ int ret = 0 , invalid_dev = -1 ;
624+
625+ switch (acc .type ) {
626+ case BCH_DISK_ACCOUNTING_replicas : {
627+ struct bch_replicas_padded r ;
628+ __accounting_to_replicas (& r .e , acc );
629+
630+ for (unsigned i = 0 ; i < r .e .nr_devs ; i ++ )
631+ if (r .e .devs [i ] != BCH_SB_MEMBER_INVALID &&
632+ !bch2_dev_exists (c , r .e .devs [i ])) {
633+ invalid_dev = r .e .devs [i ];
634+ goto invalid_device ;
635+ }
636+
637+ /*
638+ * All replicas entry checks except for invalid device are done
639+ * in bch2_accounting_validate
640+ */
641+ BUG_ON (bch2_replicas_entry_validate (& r .e , c , & buf ));
642+
643+ if (fsck_err_on (!bch2_replicas_marked_locked (c , & r .e ),
644+ trans , accounting_replicas_not_marked ,
645+ "accounting not marked in superblock replicas\n %s" ,
646+ (printbuf_reset (& buf ),
647+ bch2_accounting_key_to_text (& buf , & acc ),
648+ buf .buf ))) {
649+ /*
650+ * We're not RW yet and still single threaded, dropping
651+ * and retaking lock is ok:
652+ */
653+ percpu_up_write (& c -> mark_lock );
654+ ret = bch2_mark_replicas (c , & r .e );
655+ if (ret )
656+ goto fsck_err ;
657+ percpu_down_write (& c -> mark_lock );
658+ }
659+ break ;
660+ }
661+
662+ case BCH_DISK_ACCOUNTING_dev_data_type :
663+ if (!bch2_dev_exists (c , acc .dev_data_type .dev )) {
664+ invalid_dev = acc .dev_data_type .dev ;
665+ goto invalid_device ;
666+ }
667+ break ;
668+ }
669+
670+ fsck_err :
671+ printbuf_exit (& buf );
672+ return ret ;
673+ invalid_device :
674+ if (fsck_err (trans , accounting_to_invalid_device ,
675+ "accounting entry points to invalid device %i\n %s" ,
676+ invalid_dev ,
677+ (printbuf_reset (& buf ),
678+ bch2_accounting_key_to_text (& buf , & acc ),
679+ buf .buf ))) {
680+ for (unsigned i = 0 ; i < nr ; i ++ )
681+ v [i ] = - v [i ];
682+
683+ ret = commit_do (trans , NULL , NULL , 0 ,
684+ bch2_disk_accounting_mod (trans , & acc , v , nr , false)) ?:
685+ - BCH_ERR_remove_disk_accounting_entry ;
686+ } else {
687+ ret = - BCH_ERR_remove_disk_accounting_entry ;
688+ }
689+ goto fsck_err ;
690+ }
691+
611692/*
612693 * At startup time, initialize the in memory accounting from the btree (and
613694 * journal)
@@ -666,44 +747,42 @@ int bch2_accounting_read(struct bch_fs *c)
666747 }
667748 keys -> gap = keys -> nr = dst - keys -> data ;
668749
669- percpu_down_read (& c -> mark_lock );
670- for ( unsigned i = 0 ; i < acc -> k . nr ; i ++ ) {
671- u64 v [ BCH_ACCOUNTING_MAX_COUNTERS ];
672- bch2_accounting_mem_read_counters ( acc , i , v , ARRAY_SIZE ( v ), false );
750+ percpu_down_write (& c -> mark_lock );
751+ unsigned i = 0 ;
752+ while ( i < acc -> k . nr ) {
753+ unsigned idx = inorder_to_eytzinger0 ( i , acc -> k . nr );
673754
674- if ( bch2_is_zero ( v , sizeof ( v [ 0 ]) * acc -> k . data [ i ]. nr_counters ))
675- continue ;
755+ struct disk_accounting_pos acc_k ;
756+ bpos_to_disk_accounting_pos ( & acc_k , acc -> k . data [ idx ]. pos ) ;
676757
677- struct bch_replicas_padded r ;
678- if (!accounting_to_replicas (& r .e , acc -> k .data [i ].pos ))
679- continue ;
758+ u64 v [BCH_ACCOUNTING_MAX_COUNTERS ];
759+ bch2_accounting_mem_read_counters (acc , idx , v , ARRAY_SIZE (v ), false);
680760
681761 /*
682- * If the replicas entry is invalid it'll get cleaned up by
683- * check_allocations:
762+ * If the entry counters are zeroed, it should be treated as
763+ * nonexistent - it might point to an invalid device.
764+ *
765+ * Remove it, so that if it's re-added it gets re-marked in the
766+ * superblock:
684767 */
685- if (bch2_replicas_entry_validate (& r .e , c , & buf ))
768+ ret = bch2_is_zero (v , sizeof (v [0 ]) * acc -> k .data [idx ].nr_counters )
769+ ? - BCH_ERR_remove_disk_accounting_entry
770+ : bch2_disk_accounting_validate_late (trans , acc_k ,
771+ v , acc -> k .data [idx ].nr_counters );
772+
773+ if (ret == - BCH_ERR_remove_disk_accounting_entry ) {
774+ free_percpu (acc -> k .data [idx ].v [0 ]);
775+ free_percpu (acc -> k .data [idx ].v [1 ]);
776+ darray_remove_item (& acc -> k , & acc -> k .data [idx ]);
777+ eytzinger0_sort (acc -> k .data , acc -> k .nr , sizeof (acc -> k .data [0 ]),
778+ accounting_pos_cmp , NULL );
779+ ret = 0 ;
686780 continue ;
687-
688- struct disk_accounting_pos k ;
689- bpos_to_disk_accounting_pos (& k , acc -> k .data [i ].pos );
690-
691- if (fsck_err_on (!bch2_replicas_marked_locked (c , & r .e ),
692- trans , accounting_replicas_not_marked ,
693- "accounting not marked in superblock replicas\n %s" ,
694- (printbuf_reset (& buf ),
695- bch2_accounting_key_to_text (& buf , & k ),
696- buf .buf ))) {
697- /*
698- * We're not RW yet and still single threaded, dropping
699- * and retaking lock is ok:
700- */
701- percpu_up_read (& c -> mark_lock );
702- ret = bch2_mark_replicas (c , & r .e );
703- if (ret )
704- goto fsck_err ;
705- percpu_down_read (& c -> mark_lock );
706781 }
782+
783+ if (ret )
784+ goto fsck_err ;
785+ i ++ ;
707786 }
708787
709788 preempt_disable ();
@@ -742,7 +821,7 @@ int bch2_accounting_read(struct bch_fs *c)
742821 }
743822 preempt_enable ();
744823fsck_err :
745- percpu_up_read (& c -> mark_lock );
824+ percpu_up_write (& c -> mark_lock );
746825err :
747826 printbuf_exit (& buf );
748827 bch2_trans_put (trans );
0 commit comments