32
32
#include "md.h"
33
33
#include "md-bitmap.h"
34
34
35
+ #define BITMAP_MAJOR_LO 3
36
+ /* version 4 insists the bitmap is in little-endian order
37
+ * with version 3, it is host-endian which is non-portable
38
+ * Version 5 is currently set only for clustered devices
39
+ */
40
+ #define BITMAP_MAJOR_HI 4
41
+ #define BITMAP_MAJOR_CLUSTERED 5
42
+ #define BITMAP_MAJOR_HOSTENDIAN 3
43
+
44
+ /*
45
+ * in-memory bitmap:
46
+ *
47
+ * Use 16 bit block counters to track pending writes to each "chunk".
48
+ * The 2 high order bits are special-purpose, the first is a flag indicating
49
+ * whether a resync is needed. The second is a flag indicating whether a
50
+ * resync is active.
51
+ * This means that the counter is actually 14 bits:
52
+ *
53
+ * +--------+--------+------------------------------------------------+
54
+ * | resync | resync | counter |
55
+ * | needed | active | |
56
+ * | (0-1) | (0-1) | (0-16383) |
57
+ * +--------+--------+------------------------------------------------+
58
+ *
59
+ * The "resync needed" bit is set when:
60
+ * a '1' bit is read from storage at startup.
61
+ * a write request fails on some drives
62
+ * a resync is aborted on a chunk with 'resync active' set
63
+ * It is cleared (and resync-active set) when a resync starts across all drives
64
+ * of the chunk.
65
+ *
66
+ *
67
+ * The "resync active" bit is set when:
68
+ * a resync is started on all drives, and resync_needed is set.
69
+ * resync_needed will be cleared (as long as resync_active wasn't already set).
70
+ * It is cleared when a resync completes.
71
+ *
72
+ * The counter counts pending write requests, plus the on-disk bit.
73
+ * When the counter is '1' and the resync bits are clear, the on-disk
74
+ * bit can be cleared as well, thus setting the counter to 0.
75
+ * When we set a bit, or in the counter (to start a write), if the fields is
76
+ * 0, we first set the disk bit and set the counter to 1.
77
+ *
78
+ * If the counter is 0, the on-disk bit is clear and the stripe is clean
79
+ * Anything that dirties the stripe pushes the counter to 2 (at least)
80
+ * and sets the on-disk bit (lazily).
81
+ * If a periodic sweep find the counter at 2, it is decremented to 1.
82
+ * If the sweep find the counter at 1, the on-disk bit is cleared and the
83
+ * counter goes to zero.
84
+ *
85
+ * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block
86
+ * counters as a fallback when "page" memory cannot be allocated:
87
+ *
88
+ * Normal case (page memory allocated):
89
+ *
90
+ * page pointer (32-bit)
91
+ *
92
+ * [ ] ------+
93
+ * |
94
+ * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters)
95
+ * c1 c2 c2048
96
+ *
97
+ * Hijacked case (page memory allocation failed):
98
+ *
99
+ * hijacked page pointer (32-bit)
100
+ *
101
+ * [ ][ ] (no page memory allocated)
102
+ * counter #1 (16-bit) counter #2 (16-bit)
103
+ *
104
+ */
105
+
106
+ #define PAGE_BITS (PAGE_SIZE << 3)
107
+ #define PAGE_BIT_SHIFT (PAGE_SHIFT + 3)
108
+
109
+ #define NEEDED (x ) (((bitmap_counter_t) x) & NEEDED_MASK)
110
+ #define RESYNC (x ) (((bitmap_counter_t) x) & RESYNC_MASK)
111
+ #define COUNTER (x ) (((bitmap_counter_t) x) & COUNTER_MAX)
112
+
113
+ /* how many counters per page? */
114
+ #define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS)
115
+ /* same, except a shift value for more efficient bitops */
116
+ #define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT)
117
+ /* same, except a mask value for more efficient bitops */
118
+ #define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1)
119
+
120
+ #define BITMAP_BLOCK_SHIFT 9
121
+
122
+ /*
123
+ * bitmap structures:
124
+ */
125
+
126
+ /* the in-memory bitmap is represented by bitmap_pages */
127
+ struct bitmap_page {
128
+ /*
129
+ * map points to the actual memory page
130
+ */
131
+ char * map ;
132
+ /*
133
+ * in emergencies (when map cannot be alloced), hijack the map
134
+ * pointer and use it as two counters itself
135
+ */
136
+ unsigned int hijacked :1 ;
137
+ /*
138
+ * If any counter in this page is '1' or '2' - and so could be
139
+ * cleared then that page is marked as 'pending'
140
+ */
141
+ unsigned int pending :1 ;
142
+ /*
143
+ * count of dirty bits on the page
144
+ */
145
+ unsigned int count :30 ;
146
+ };
147
+
148
+ /* the main bitmap structure - one per mddev */
149
+ struct bitmap {
150
+
151
+ struct bitmap_counts {
152
+ spinlock_t lock ;
153
+ struct bitmap_page * bp ;
154
+ /* total number of pages in the bitmap */
155
+ unsigned long pages ;
156
+ /* number of pages not yet allocated */
157
+ unsigned long missing_pages ;
158
+ /* chunksize = 2^chunkshift (for bitops) */
159
+ unsigned long chunkshift ;
160
+ /* total number of data chunks for the array */
161
+ unsigned long chunks ;
162
+ } counts ;
163
+
164
+ struct mddev * mddev ; /* the md device that the bitmap is for */
165
+
166
+ __u64 events_cleared ;
167
+ int need_sync ;
168
+
169
+ struct bitmap_storage {
170
+ /* backing disk file */
171
+ struct file * file ;
172
+ /* cached copy of the bitmap file superblock */
173
+ struct page * sb_page ;
174
+ unsigned long sb_index ;
175
+ /* list of cache pages for the file */
176
+ struct page * * filemap ;
177
+ /* attributes associated filemap pages */
178
+ unsigned long * filemap_attr ;
179
+ /* number of pages in the file */
180
+ unsigned long file_pages ;
181
+ /* total bytes in the bitmap */
182
+ unsigned long bytes ;
183
+ } storage ;
184
+
185
+ unsigned long flags ;
186
+
187
+ int allclean ;
188
+
189
+ atomic_t behind_writes ;
190
+ /* highest actual value at runtime */
191
+ unsigned long behind_writes_used ;
192
+
193
+ /*
194
+ * the bitmap daemon - periodically wakes up and sweeps the bitmap
195
+ * file, cleaning up bits and flushing out pages to disk as necessary
196
+ */
197
+ unsigned long daemon_lastrun ; /* jiffies of last run */
198
+ /*
199
+ * when we lasted called end_sync to update bitmap with resync
200
+ * progress.
201
+ */
202
+ unsigned long last_end_sync ;
203
+
204
+ /* pending writes to the bitmap file */
205
+ atomic_t pending_writes ;
206
+ wait_queue_head_t write_wait ;
207
+ wait_queue_head_t overflow_wait ;
208
+ wait_queue_head_t behind_wait ;
209
+
210
+ struct kernfs_node * sysfs_can_clear ;
211
+ /* slot offset for clustered env */
212
+ int cluster_slot ;
213
+ };
214
+
35
215
static int __bitmap_resize (struct bitmap * bitmap , sector_t blocks ,
36
216
int chunksize , bool init );
37
217
@@ -491,9 +671,10 @@ static void md_bitmap_wait_writes(struct bitmap *bitmap)
491
671
492
672
493
673
/* update the event counter and sync the superblock to disk */
494
- static void bitmap_update_sb (struct bitmap * bitmap )
674
+ static void bitmap_update_sb (void * data )
495
675
{
496
676
bitmap_super_t * sb ;
677
+ struct bitmap * bitmap = data ;
497
678
498
679
if (!bitmap || !bitmap -> mddev ) /* no bitmap for this array */
499
680
return ;
@@ -1844,10 +2025,11 @@ static void bitmap_flush(struct mddev *mddev)
1844
2025
bitmap_update_sb (bitmap );
1845
2026
}
1846
2027
1847
- static void md_bitmap_free (struct bitmap * bitmap )
2028
+ static void md_bitmap_free (void * data )
1848
2029
{
1849
2030
unsigned long k , pages ;
1850
2031
struct bitmap_page * bp ;
2032
+ struct bitmap * bitmap = data ;
1851
2033
1852
2034
if (!bitmap ) /* there was no bitmap */
1853
2035
return ;
@@ -2076,7 +2258,7 @@ static int bitmap_load(struct mddev *mddev)
2076
2258
}
2077
2259
2078
2260
/* caller need to free returned bitmap with md_bitmap_free() */
2079
- static struct bitmap * bitmap_get_from_slot (struct mddev * mddev , int slot )
2261
+ static void * bitmap_get_from_slot (struct mddev * mddev , int slot )
2080
2262
{
2081
2263
int rv = 0 ;
2082
2264
struct bitmap * bitmap ;
@@ -2143,15 +2325,18 @@ static int bitmap_copy_from_slot(struct mddev *mddev, int slot, sector_t *low,
2143
2325
return rv ;
2144
2326
}
2145
2327
2146
- static void bitmap_set_pages (struct bitmap * bitmap , unsigned long pages )
2328
+ static void bitmap_set_pages (void * data , unsigned long pages )
2147
2329
{
2330
+ struct bitmap * bitmap = data ;
2331
+
2148
2332
bitmap -> counts .pages = pages ;
2149
2333
}
2150
2334
2151
- static int bitmap_get_stats (struct bitmap * bitmap , struct md_bitmap_stats * stats )
2335
+ static int bitmap_get_stats (void * data , struct md_bitmap_stats * stats )
2152
2336
{
2153
2337
struct bitmap_storage * storage ;
2154
2338
struct bitmap_counts * counts ;
2339
+ struct bitmap * bitmap = data ;
2155
2340
bitmap_super_t * sb ;
2156
2341
2157
2342
if (!bitmap )
@@ -2510,6 +2695,7 @@ space_show(struct mddev *mddev, char *page)
2510
2695
static ssize_t
2511
2696
space_store (struct mddev * mddev , const char * buf , size_t len )
2512
2697
{
2698
+ struct bitmap * bitmap ;
2513
2699
unsigned long sectors ;
2514
2700
int rv ;
2515
2701
@@ -2520,8 +2706,8 @@ space_store(struct mddev *mddev, const char *buf, size_t len)
2520
2706
if (sectors == 0 )
2521
2707
return - EINVAL ;
2522
2708
2523
- if ( mddev -> bitmap &&
2524
- sectors < (mddev -> bitmap -> storage .bytes + 511 ) >> 9 )
2709
+ bitmap = mddev -> bitmap ;
2710
+ if ( bitmap && sectors < (bitmap -> storage .bytes + 511 ) >> 9 )
2525
2711
return - EFBIG ; /* Bitmap is too big for this small space */
2526
2712
2527
2713
/* could make sure it isn't too big, but that isn't really
@@ -2698,10 +2884,13 @@ __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2698
2884
static ssize_t can_clear_show (struct mddev * mddev , char * page )
2699
2885
{
2700
2886
int len ;
2887
+ struct bitmap * bitmap ;
2888
+
2701
2889
spin_lock (& mddev -> lock );
2702
- if (mddev -> bitmap )
2703
- len = sprintf (page , "%s\n" , (mddev -> bitmap -> need_sync ?
2704
- "false" : "true" ));
2890
+ bitmap = mddev -> bitmap ;
2891
+ if (bitmap )
2892
+ len = sprintf (page , "%s\n" , (bitmap -> need_sync ? "false" :
2893
+ "true" ));
2705
2894
else
2706
2895
len = sprintf (page , "\n" );
2707
2896
spin_unlock (& mddev -> lock );
@@ -2710,17 +2899,24 @@ static ssize_t can_clear_show(struct mddev *mddev, char *page)
2710
2899
2711
2900
static ssize_t can_clear_store (struct mddev * mddev , const char * buf , size_t len )
2712
2901
{
2713
- if (mddev -> bitmap == NULL )
2902
+ struct bitmap * bitmap = mddev -> bitmap ;
2903
+
2904
+ if (!bitmap )
2714
2905
return - ENOENT ;
2715
- if (strncmp (buf , "false" , 5 ) == 0 )
2716
- mddev -> bitmap -> need_sync = 1 ;
2717
- else if (strncmp (buf , "true" , 4 ) == 0 ) {
2906
+
2907
+ if (strncmp (buf , "false" , 5 ) == 0 ) {
2908
+ bitmap -> need_sync = 1 ;
2909
+ return len ;
2910
+ }
2911
+
2912
+ if (strncmp (buf , "true" , 4 ) == 0 ) {
2718
2913
if (mddev -> degraded )
2719
2914
return - EBUSY ;
2720
- mddev -> bitmap -> need_sync = 0 ;
2721
- } else
2722
- return - EINVAL ;
2723
- return len ;
2915
+ bitmap -> need_sync = 0 ;
2916
+ return len ;
2917
+ }
2918
+
2919
+ return - EINVAL ;
2724
2920
}
2725
2921
2726
2922
static struct md_sysfs_entry bitmap_can_clear =
@@ -2730,21 +2926,26 @@ static ssize_t
2730
2926
behind_writes_used_show (struct mddev * mddev , char * page )
2731
2927
{
2732
2928
ssize_t ret ;
2929
+ struct bitmap * bitmap ;
2930
+
2733
2931
spin_lock (& mddev -> lock );
2734
- if (mddev -> bitmap == NULL )
2932
+ bitmap = mddev -> bitmap ;
2933
+ if (!bitmap )
2735
2934
ret = sprintf (page , "0\n" );
2736
2935
else
2737
- ret = sprintf (page , "%lu\n" ,
2738
- mddev -> bitmap -> behind_writes_used );
2936
+ ret = sprintf (page , "%lu\n" , bitmap -> behind_writes_used );
2739
2937
spin_unlock (& mddev -> lock );
2938
+
2740
2939
return ret ;
2741
2940
}
2742
2941
2743
2942
static ssize_t
2744
2943
behind_writes_used_reset (struct mddev * mddev , const char * buf , size_t len )
2745
2944
{
2746
- if (mddev -> bitmap )
2747
- mddev -> bitmap -> behind_writes_used = 0 ;
2945
+ struct bitmap * bitmap = mddev -> bitmap ;
2946
+
2947
+ if (bitmap )
2948
+ bitmap -> behind_writes_used = 0 ;
2748
2949
return len ;
2749
2950
}
2750
2951
0 commit comments