3232#include "md.h"
3333#include "md-bitmap.h"
3434
35+ #define BITMAP_MAJOR_LO 3
36+ /* version 4 insists the bitmap is in little-endian order
37+ * with version 3, it is host-endian which is non-portable
38+ * Version 5 is currently set only for clustered devices
39+ */
40+ #define BITMAP_MAJOR_HI 4
41+ #define BITMAP_MAJOR_CLUSTERED 5
42+ #define BITMAP_MAJOR_HOSTENDIAN 3
43+
44+ /*
45+ * in-memory bitmap:
46+ *
47+ * Use 16 bit block counters to track pending writes to each "chunk".
48+ * The 2 high order bits are special-purpose, the first is a flag indicating
49+ * whether a resync is needed. The second is a flag indicating whether a
50+ * resync is active.
51+ * This means that the counter is actually 14 bits:
52+ *
53+ * +--------+--------+------------------------------------------------+
54+ * | resync | resync | counter |
55+ * | needed | active | |
56+ * | (0-1) | (0-1) | (0-16383) |
57+ * +--------+--------+------------------------------------------------+
58+ *
59+ * The "resync needed" bit is set when:
60+ * a '1' bit is read from storage at startup.
61+ * a write request fails on some drives
62+ * a resync is aborted on a chunk with 'resync active' set
63+ * It is cleared (and resync-active set) when a resync starts across all drives
64+ * of the chunk.
65+ *
66+ *
67+ * The "resync active" bit is set when:
68+ * a resync is started on all drives, and resync_needed is set.
69+ * resync_needed will be cleared (as long as resync_active wasn't already set).
70+ * It is cleared when a resync completes.
71+ *
72+ * The counter counts pending write requests, plus the on-disk bit.
73+ * When the counter is '1' and the resync bits are clear, the on-disk
74+ * bit can be cleared as well, thus setting the counter to 0.
75+ * When we set a bit, or in the counter (to start a write), if the fields is
76+ * 0, we first set the disk bit and set the counter to 1.
77+ *
78+ * If the counter is 0, the on-disk bit is clear and the stripe is clean
79+ * Anything that dirties the stripe pushes the counter to 2 (at least)
80+ * and sets the on-disk bit (lazily).
81+ * If a periodic sweep find the counter at 2, it is decremented to 1.
82+ * If the sweep find the counter at 1, the on-disk bit is cleared and the
83+ * counter goes to zero.
84+ *
85+ * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
86+ * counters as a fallback when "page" memory cannot be allocated:
87+ *
88+ * Normal case (page memory allocated):
89+ *
90+ * page pointer (32-bit)
91+ *
92+ * [ ] ------+
93+ * |
94+ * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters)
95+ * c1 c2 c2048
96+ *
97+ * Hijacked case (page memory allocation failed):
98+ *
99+ * hijacked page pointer (32-bit)
100+ *
101+ * [ ][ ] (no page memory allocated)
102+ * counter #1 (16-bit) counter #2 (16-bit)
103+ *
104+ */
105+
106+ #define PAGE_BITS (PAGE_SIZE << 3)
107+ #define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
108+
109+ #define NEEDED (x ) (((bitmap_counter_t) x) & NEEDED_MASK)
110+ #define RESYNC (x ) (((bitmap_counter_t) x) & RESYNC_MASK)
111+ #define COUNTER (x ) (((bitmap_counter_t) x) & COUNTER_MAX)
112+
113+ /* how many counters per page? */
114+ #define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
115+ /* same, except a shift value for more efficient bitops */
116+ #define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
117+ /* same, except a mask value for more efficient bitops */
118+ #define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1)
119+
120+ #define BITMAP_BLOCK_SHIFT 9
121+
122+ /*
123+ * bitmap structures:
124+ */
125+
126+ /* the in-memory bitmap is represented by bitmap_pages */
127+ struct bitmap_page {
128+ /*
129+ * map points to the actual memory page
130+ */
131+ char * map ;
132+ /*
133+ * in emergencies (when map cannot be alloced), hijack the map
134+ * pointer and use it as two counters itself
135+ */
136+ unsigned int hijacked :1 ;
137+ /*
138+ * If any counter in this page is '1' or '2' - and so could be
139+ * cleared then that page is marked as 'pending'
140+ */
141+ unsigned int pending :1 ;
142+ /*
143+ * count of dirty bits on the page
144+ */
145+ unsigned int count :30 ;
146+ };
147+
148+ /* the main bitmap structure - one per mddev */
149+ struct bitmap {
150+
151+ struct bitmap_counts {
152+ spinlock_t lock ;
153+ struct bitmap_page * bp ;
154+ /* total number of pages in the bitmap */
155+ unsigned long pages ;
156+ /* number of pages not yet allocated */
157+ unsigned long missing_pages ;
158+ /* chunksize = 2^chunkshift (for bitops) */
159+ unsigned long chunkshift ;
160+ /* total number of data chunks for the array */
161+ unsigned long chunks ;
162+ } counts ;
163+
164+ struct mddev * mddev ; /* the md device that the bitmap is for */
165+
166+ __u64 events_cleared ;
167+ int need_sync ;
168+
169+ struct bitmap_storage {
170+ /* backing disk file */
171+ struct file * file ;
172+ /* cached copy of the bitmap file superblock */
173+ struct page * sb_page ;
174+ unsigned long sb_index ;
175+ /* list of cache pages for the file */
176+ struct page * * filemap ;
177+ /* attributes associated filemap pages */
178+ unsigned long * filemap_attr ;
179+ /* number of pages in the file */
180+ unsigned long file_pages ;
181+ /* total bytes in the bitmap */
182+ unsigned long bytes ;
183+ } storage ;
184+
185+ unsigned long flags ;
186+
187+ int allclean ;
188+
189+ atomic_t behind_writes ;
190+ /* highest actual value at runtime */
191+ unsigned long behind_writes_used ;
192+
193+ /*
194+ * the bitmap daemon - periodically wakes up and sweeps the bitmap
195+ * file, cleaning up bits and flushing out pages to disk as necessary
196+ */
197+ unsigned long daemon_lastrun ; /* jiffies of last run */
198+ /*
199+ * when we lasted called end_sync to update bitmap with resync
200+ * progress.
201+ */
202+ unsigned long last_end_sync ;
203+
204+ /* pending writes to the bitmap file */
205+ atomic_t pending_writes ;
206+ wait_queue_head_t write_wait ;
207+ wait_queue_head_t overflow_wait ;
208+ wait_queue_head_t behind_wait ;
209+
210+ struct kernfs_node * sysfs_can_clear ;
211+ /* slot offset for clustered env */
212+ int cluster_slot ;
213+ };
214+
35215static int __bitmap_resize (struct bitmap * bitmap , sector_t blocks ,
36216 int chunksize , bool init );
37217
@@ -491,9 +671,10 @@ static void md_bitmap_wait_writes(struct bitmap *bitmap)
491671
492672
493673/* update the event counter and sync the superblock to disk */
494- static void bitmap_update_sb (struct bitmap * bitmap )
674+ static void bitmap_update_sb (void * data )
495675{
496676 bitmap_super_t * sb ;
677+ struct bitmap * bitmap = data ;
497678
498679 if (!bitmap || !bitmap -> mddev ) /* no bitmap for this array */
499680 return ;
@@ -1844,10 +2025,11 @@ static void bitmap_flush(struct mddev *mddev)
18442025 bitmap_update_sb (bitmap );
18452026}
18462027
1847- static void md_bitmap_free (struct bitmap * bitmap )
2028+ static void md_bitmap_free (void * data )
18482029{
18492030 unsigned long k , pages ;
18502031 struct bitmap_page * bp ;
2032+ struct bitmap * bitmap = data ;
18512033
18522034 if (!bitmap ) /* there was no bitmap */
18532035 return ;
@@ -2076,7 +2258,7 @@ static int bitmap_load(struct mddev *mddev)
20762258}
20772259
20782260/* caller need to free returned bitmap with md_bitmap_free() */
2079- static struct bitmap * bitmap_get_from_slot (struct mddev * mddev , int slot )
2261+ static void * bitmap_get_from_slot (struct mddev * mddev , int slot )
20802262{
20812263 int rv = 0 ;
20822264 struct bitmap * bitmap ;
@@ -2143,15 +2325,18 @@ static int bitmap_copy_from_slot(struct mddev *mddev, int slot, sector_t *low,
21432325 return rv ;
21442326}
21452327
2146- static void bitmap_set_pages (struct bitmap * bitmap , unsigned long pages )
2328+ static void bitmap_set_pages (void * data , unsigned long pages )
21472329{
2330+ struct bitmap * bitmap = data ;
2331+
21482332 bitmap -> counts .pages = pages ;
21492333}
21502334
2151- static int bitmap_get_stats (struct bitmap * bitmap , struct md_bitmap_stats * stats )
2335+ static int bitmap_get_stats (void * data , struct md_bitmap_stats * stats )
21522336{
21532337 struct bitmap_storage * storage ;
21542338 struct bitmap_counts * counts ;
2339+ struct bitmap * bitmap = data ;
21552340 bitmap_super_t * sb ;
21562341
21572342 if (!bitmap )
@@ -2510,6 +2695,7 @@ space_show(struct mddev *mddev, char *page)
25102695static ssize_t
25112696space_store (struct mddev * mddev , const char * buf , size_t len )
25122697{
2698+ struct bitmap * bitmap ;
25132699 unsigned long sectors ;
25142700 int rv ;
25152701
@@ -2520,8 +2706,8 @@ space_store(struct mddev *mddev, const char *buf, size_t len)
25202706 if (sectors == 0 )
25212707 return - EINVAL ;
25222708
2523- if ( mddev -> bitmap &&
2524- sectors < (mddev -> bitmap -> storage .bytes + 511 ) >> 9 )
2709+ bitmap = mddev -> bitmap ;
2710+ if ( bitmap && sectors < (bitmap -> storage .bytes + 511 ) >> 9 )
25252711 return - EFBIG ; /* Bitmap is too big for this small space */
25262712
25272713 /* could make sure it isn't too big, but that isn't really
@@ -2698,10 +2884,13 @@ __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
26982884static ssize_t can_clear_show (struct mddev * mddev , char * page )
26992885{
27002886 int len ;
2887+ struct bitmap * bitmap ;
2888+
27012889 spin_lock (& mddev -> lock );
2702- if (mddev -> bitmap )
2703- len = sprintf (page , "%s\n" , (mddev -> bitmap -> need_sync ?
2704- "false" : "true" ));
2890+ bitmap = mddev -> bitmap ;
2891+ if (bitmap )
2892+ len = sprintf (page , "%s\n" , (bitmap -> need_sync ? "false" :
2893+ "true" ));
27052894 else
27062895 len = sprintf (page , "\n" );
27072896 spin_unlock (& mddev -> lock );
@@ -2710,17 +2899,24 @@ static ssize_t can_clear_show(struct mddev *mddev, char *page)
27102899
27112900static ssize_t can_clear_store (struct mddev * mddev , const char * buf , size_t len )
27122901{
2713- if (mddev -> bitmap == NULL )
2902+ struct bitmap * bitmap = mddev -> bitmap ;
2903+
2904+ if (!bitmap )
27142905 return - ENOENT ;
2715- if (strncmp (buf , "false" , 5 ) == 0 )
2716- mddev -> bitmap -> need_sync = 1 ;
2717- else if (strncmp (buf , "true" , 4 ) == 0 ) {
2906+
2907+ if (strncmp (buf , "false" , 5 ) == 0 ) {
2908+ bitmap -> need_sync = 1 ;
2909+ return len ;
2910+ }
2911+
2912+ if (strncmp (buf , "true" , 4 ) == 0 ) {
27182913 if (mddev -> degraded )
27192914 return - EBUSY ;
2720- mddev -> bitmap -> need_sync = 0 ;
2721- } else
2722- return - EINVAL ;
2723- return len ;
2915+ bitmap -> need_sync = 0 ;
2916+ return len ;
2917+ }
2918+
2919+ return - EINVAL ;
27242920}
27252921
27262922static struct md_sysfs_entry bitmap_can_clear =
@@ -2730,21 +2926,26 @@ static ssize_t
27302926behind_writes_used_show (struct mddev * mddev , char * page )
27312927{
27322928 ssize_t ret ;
2929+ struct bitmap * bitmap ;
2930+
27332931 spin_lock (& mddev -> lock );
2734- if (mddev -> bitmap == NULL )
2932+ bitmap = mddev -> bitmap ;
2933+ if (!bitmap )
27352934 ret = sprintf (page , "0\n" );
27362935 else
2737- ret = sprintf (page , "%lu\n" ,
2738- mddev -> bitmap -> behind_writes_used );
2936+ ret = sprintf (page , "%lu\n" , bitmap -> behind_writes_used );
27392937 spin_unlock (& mddev -> lock );
2938+
27402939 return ret ;
27412940}
27422941
27432942static ssize_t
27442943behind_writes_used_reset (struct mddev * mddev , const char * buf , size_t len )
27452944{
2746- if (mddev -> bitmap )
2747- mddev -> bitmap -> behind_writes_used = 0 ;
2945+ struct bitmap * bitmap = mddev -> bitmap ;
2946+
2947+ if (bitmap )
2948+ bitmap -> behind_writes_used = 0 ;
27482949 return len ;
27492950}
27502951
0 commit comments