@@ -1724,68 +1724,18 @@ static void bch2_journal_entries_postprocess(struct bch_fs *c, struct jset *jset
1724
1724
jset -> u64s = cpu_to_le32 ((u64 * ) prev - jset -> _data );
1725
1725
}
1726
1726
1727
- void bch2_journal_write (struct closure * cl )
1727
+ static int bch2_journal_write_prep (struct journal * j , struct journal_buf * w )
1728
1728
{
1729
- struct journal * j = container_of (cl , struct journal , io );
1730
1729
struct bch_fs * c = container_of (j , struct bch_fs , journal );
1731
- struct bch_dev * ca ;
1732
- struct journal_buf * w = journal_last_unwritten_buf (j );
1733
- struct bch_replicas_padded replicas ;
1734
1730
struct jset_entry * start , * end ;
1735
1731
struct jset * jset ;
1736
- struct bio * bio ;
1737
- struct printbuf journal_debug_buf = PRINTBUF ;
1732
+ unsigned sectors , bytes , u64s ;
1738
1733
bool validate_before_checksum = false;
1739
- unsigned i , sectors , bytes , u64s , nr_rw_members = 0 ;
1740
1734
int ret ;
1741
1735
1742
- BUG_ON (BCH_SB_CLEAN (c -> disk_sb .sb ));
1743
-
1744
1736
journal_buf_realloc (j , w );
1745
1737
jset = w -> data ;
1746
1738
1747
- j -> write_start_time = local_clock ();
1748
-
1749
- spin_lock (& j -> lock );
1750
-
1751
- /*
1752
- * If the journal is in an error state - we did an emergency shutdown -
1753
- * we prefer to continue doing journal writes. We just mark them as
1754
- * noflush so they'll never be used, but they'll still be visible by the
1755
- * list_journal tool - this helps in debugging.
1756
- *
1757
- * There's a caveat: the first journal write after marking the
1758
- * superblock dirty must always be a flush write, because on startup
1759
- * from a clean shutdown we didn't necessarily read the journal and the
1760
- * new journal write might overwrite whatever was in the journal
1761
- * previously - we can't leave the journal without any flush writes in
1762
- * it.
1763
- *
1764
- * So if we're in an error state, and we're still starting up, we don't
1765
- * write anything at all.
1766
- */
1767
- if (!test_bit (JOURNAL_NEED_FLUSH_WRITE , & j -> flags ) &&
1768
- (bch2_journal_error (j ) ||
1769
- w -> noflush ||
1770
- (!w -> must_flush &&
1771
- (jiffies - j -> last_flush_write ) < msecs_to_jiffies (c -> opts .journal_flush_delay ) &&
1772
- test_bit (JOURNAL_MAY_SKIP_FLUSH , & j -> flags )))) {
1773
- w -> noflush = true;
1774
- SET_JSET_NO_FLUSH (jset , true);
1775
- jset -> last_seq = 0 ;
1776
- w -> last_seq = 0 ;
1777
-
1778
- j -> nr_noflush_writes ++ ;
1779
- } else if (!bch2_journal_error (j )) {
1780
- j -> last_flush_write = jiffies ;
1781
- j -> nr_flush_writes ++ ;
1782
- clear_bit (JOURNAL_NEED_FLUSH_WRITE , & j -> flags );
1783
- } else {
1784
- spin_unlock (& j -> lock );
1785
- goto err ;
1786
- }
1787
- spin_unlock (& j -> lock );
1788
-
1789
1739
/*
1790
1740
* New btree roots are set by journalling them; when the journal entry
1791
1741
* gets written we have to propagate them to c->btree_roots
@@ -1816,7 +1766,7 @@ void bch2_journal_write(struct closure *cl)
1816
1766
bch2_fs_fatal_error (c , "aieeee! journal write overran available space, %zu > %u (extra %u reserved %u/%u)" ,
1817
1767
vstruct_bytes (jset ), w -> sectors << 9 ,
1818
1768
u64s , w -> u64s_reserved , j -> entry_u64s_reserved );
1819
- goto err ;
1769
+ return - EINVAL ;
1820
1770
}
1821
1771
1822
1772
jset -> magic = cpu_to_le64 (jset_magic (c ));
@@ -1835,37 +1785,115 @@ void bch2_journal_write(struct closure *cl)
1835
1785
validate_before_checksum = true;
1836
1786
1837
1787
if (validate_before_checksum &&
1838
- jset_validate (c , NULL , jset , 0 , WRITE ))
1839
- goto err ;
1788
+ ( ret = jset_validate (c , NULL , jset , 0 , WRITE ) ))
1789
+ return ret ;
1840
1790
1841
1791
ret = bch2_encrypt (c , JSET_CSUM_TYPE (jset ), journal_nonce (jset ),
1842
1792
jset -> encrypted_start ,
1843
1793
vstruct_end (jset ) - (void * ) jset -> encrypted_start );
1844
1794
if (bch2_fs_fatal_err_on (ret , c ,
1845
1795
"error decrypting journal entry: %i" , ret ))
1846
- goto err ;
1796
+ return ret ;
1847
1797
1848
1798
jset -> csum = csum_vstruct (c , JSET_CSUM_TYPE (jset ),
1849
1799
journal_nonce (jset ), jset );
1850
1800
1851
1801
if (!validate_before_checksum &&
1852
- jset_validate (c , NULL , jset , 0 , WRITE ))
1853
- goto err ;
1802
+ ( ret = jset_validate (c , NULL , jset , 0 , WRITE ) ))
1803
+ return ret ;
1854
1804
1855
1805
memset ((void * ) jset + bytes , 0 , (sectors << 9 ) - bytes );
1806
+ return 0 ;
1807
+ }
1808
+
1809
+ static int bch2_journal_write_pick_flush (struct journal * j , struct journal_buf * w )
1810
+ {
1811
+ struct bch_fs * c = container_of (j , struct bch_fs , journal );
1812
+ int error = bch2_journal_error (j );
1813
+
1814
+ /*
1815
+ * If the journal is in an error state - we did an emergency shutdown -
1816
+ * we prefer to continue doing journal writes. We just mark them as
1817
+ * noflush so they'll never be used, but they'll still be visible by the
1818
+ * list_journal tool - this helps in debugging.
1819
+ *
1820
+ * There's a caveat: the first journal write after marking the
1821
+ * superblock dirty must always be a flush write, because on startup
1822
+ * from a clean shutdown we didn't necessarily read the journal and the
1823
+ * new journal write might overwrite whatever was in the journal
1824
+ * previously - we can't leave the journal without any flush writes in
1825
+ * it.
1826
+ *
1827
+ * So if we're in an error state, and we're still starting up, we don't
1828
+ * write anything at all.
1829
+ */
1830
+ if (error && test_bit (JOURNAL_NEED_FLUSH_WRITE , & j -> flags ))
1831
+ return - EIO ;
1832
+
1833
+ if (error ||
1834
+ w -> noflush ||
1835
+ (!w -> must_flush &&
1836
+ (jiffies - j -> last_flush_write ) < msecs_to_jiffies (c -> opts .journal_flush_delay ) &&
1837
+ test_bit (JOURNAL_MAY_SKIP_FLUSH , & j -> flags ))) {
1838
+ w -> noflush = true;
1839
+ SET_JSET_NO_FLUSH (w -> data , true);
1840
+ w -> data -> last_seq = 0 ;
1841
+ w -> last_seq = 0 ;
1842
+
1843
+ j -> nr_noflush_writes ++ ;
1844
+ } else {
1845
+ j -> last_flush_write = jiffies ;
1846
+ j -> nr_flush_writes ++ ;
1847
+ clear_bit (JOURNAL_NEED_FLUSH_WRITE , & j -> flags );
1848
+ }
1849
+
1850
+ return 0 ;
1851
+ }
1852
+
1853
+ void bch2_journal_write (struct closure * cl )
1854
+ {
1855
+ struct journal * j = container_of (cl , struct journal , io );
1856
+ struct bch_fs * c = container_of (j , struct bch_fs , journal );
1857
+ struct bch_dev * ca ;
1858
+ struct journal_buf * w = journal_last_unwritten_buf (j );
1859
+ struct bch_replicas_padded replicas ;
1860
+ struct bio * bio ;
1861
+ struct printbuf journal_debug_buf = PRINTBUF ;
1862
+ unsigned i , nr_rw_members = 0 ;
1863
+ int ret ;
1864
+
1865
+ BUG_ON (BCH_SB_CLEAN (c -> disk_sb .sb ));
1866
+
1867
+ j -> write_start_time = local_clock ();
1856
1868
1857
- retry_alloc :
1858
1869
spin_lock (& j -> lock );
1859
- ret = journal_write_alloc (j , w );
1870
+ ret = bch2_journal_write_pick_flush (j , w );
1871
+ spin_unlock (& j -> lock );
1872
+ if (ret )
1873
+ goto err ;
1874
+
1875
+ ret = bch2_journal_write_prep (j , w );
1876
+ if (ret )
1877
+ goto err ;
1878
+
1879
+ while (1 ) {
1880
+ spin_lock (& j -> lock );
1881
+ ret = journal_write_alloc (j , w );
1882
+ if (!ret || !j -> can_discard )
1883
+ break ;
1860
1884
1861
- if (ret && j -> can_discard ) {
1862
1885
spin_unlock (& j -> lock );
1863
1886
bch2_journal_do_discards (j );
1864
- goto retry_alloc ;
1865
1887
}
1866
1888
1867
- if (ret )
1889
+ if (ret ) {
1868
1890
__bch2_journal_debug_to_text (& journal_debug_buf , j );
1891
+ spin_unlock (& j -> lock );
1892
+ bch_err (c , "Unable to allocate journal write:\n%s" ,
1893
+ journal_debug_buf .buf );
1894
+ printbuf_exit (& journal_debug_buf );
1895
+ goto err ;
1896
+ }
1869
1897
1870
1898
/*
1871
1899
* write is allocated, no longer need to account for it in
@@ -1880,13 +1908,6 @@ void bch2_journal_write(struct closure *cl)
1880
1908
bch2_journal_space_available (j );
1881
1909
spin_unlock (& j -> lock );
1882
1910
1883
- if (ret ) {
1884
- bch_err (c , "Unable to allocate journal write:\n%s" ,
1885
- journal_debug_buf .buf );
1886
- printbuf_exit (& journal_debug_buf );
1887
- goto err ;
1888
- }
1889
-
1890
1911
w -> devs_written = bch2_bkey_devs (bkey_i_to_s_c (& w -> key ));
1891
1912
1892
1913
if (c -> opts .nochanges )
@@ -1908,7 +1929,7 @@ void bch2_journal_write(struct closure *cl)
1908
1929
if (ret )
1909
1930
goto err ;
1910
1931
1911
- if (!JSET_NO_FLUSH (jset ) && w -> separate_flush ) {
1932
+ if (!JSET_NO_FLUSH (w -> data ) && w -> separate_flush ) {
1912
1933
for_each_rw_member (ca , c , i ) {
1913
1934
percpu_ref_get (& ca -> io_ref );
1914
1935
0 commit comments