Skip to content

Commit bec486b

Browse files
jeffhostetlergitster
authored andcommitted
fsmonitor--daemon: create token-based changed path cache
Teach fsmonitor--daemon to build a list of changed paths and associate them with a token-id. This will be used by the platform-specific backends to accumulate changed paths in response to filesystem events. The platform-specific file system listener thread receives file system events containing one or more changed pathnames (with whatever bucketing or grouping that is convenient for the file system). These paths are accumulated (without locking) by the file system layer into a `fsmonitor_batch`. When the file system layer has drained the kernel event queue, it will "publish" them to our token queue and make them visible to concurrent client worker threads. The token layer is free to combine and/or de-dup paths within these batches for efficient presentation to clients. Signed-off-by: Jeff Hostetler <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent aeef767 commit bec486b

File tree

2 files changed

+268
-2
lines changed

2 files changed

+268
-2
lines changed

builtin/fsmonitor--daemon.c

Lines changed: 228 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,17 +181,27 @@ struct fsmonitor_token_data {
181181
uint64_t client_ref_count;
182182
};
183183

184+
struct fsmonitor_batch {
185+
struct fsmonitor_batch *next;
186+
uint64_t batch_seq_nr;
187+
const char **interned_paths;
188+
size_t nr, alloc;
189+
time_t pinned_time;
190+
};
191+
184192
static struct fsmonitor_token_data *fsmonitor_new_token_data(void)
185193
{
186194
static int test_env_value = -1;
187195
static uint64_t flush_count = 0;
188196
struct fsmonitor_token_data *token;
197+
struct fsmonitor_batch *batch;
189198

190199
CALLOC_ARRAY(token, 1);
200+
batch = fsmonitor_batch__new();
191201

192202
strbuf_init(&token->token_id, 0);
193-
token->batch_head = NULL;
194-
token->batch_tail = NULL;
203+
token->batch_head = batch;
204+
token->batch_tail = batch;
195205
token->client_ref_count = 0;
196206

197207
if (test_env_value < 0)
@@ -217,9 +227,143 @@ static struct fsmonitor_token_data *fsmonitor_new_token_data(void)
217227
strbuf_addf(&token->token_id, "test_%08x", test_env_value++);
218228
}
219229

230+
/*
231+
* We created a new <token_id> and are starting a new series
232+
* of tokens with a zero <seq_nr>.
233+
*
234+
* Since clients cannot guess our new (non test) <token_id>
235+
* they will always receive a trivial response (because of the
236+
* mismatch on the <token_id>). The trivial response will
237+
* tell them our new <token_id> so that subsequent requests
238+
* will be relative to our new series. (And when sending that
239+
* response, we pin the current head of the batch list.)
240+
*
241+
* Even if the client correctly guesses the <token_id>, their
242+
* request of "builtin:<token_id>:0" asks for all changes MORE
243+
* RECENT than batch/bin 0.
244+
*
245+
* This implies that it is a waste to accumulate paths in the
246+
* initial batch/bin (because they will never be transmitted).
247+
*
248+
* So the daemon could be running for days and watching the
249+
* file system, but doesn't need to actually accumulate any
250+
* paths UNTIL we need to set a reference point for a later
251+
* relative request.
252+
*
253+
* However, it is very useful for testing to always have a
254+
* reference point set. Pin batch 0 to force early file system
255+
* events to accumulate.
256+
*/
257+
if (test_env_value)
258+
batch->pinned_time = time(NULL);
259+
220260
return token;
221261
}
222262

263+
struct fsmonitor_batch *fsmonitor_batch__new(void)
264+
{
265+
struct fsmonitor_batch *batch;
266+
267+
CALLOC_ARRAY(batch, 1);
268+
269+
return batch;
270+
}
271+
272+
void fsmonitor_batch__free_list(struct fsmonitor_batch *batch)
273+
{
274+
while (batch) {
275+
struct fsmonitor_batch *next = batch->next;
276+
277+
/*
278+
* The actual strings within the array of this batch
279+
* are interned, so we don't own them. We only own
280+
* the array.
281+
*/
282+
free(batch->interned_paths);
283+
free(batch);
284+
285+
batch = next;
286+
}
287+
}
288+
289+
void fsmonitor_batch__add_path(struct fsmonitor_batch *batch,
290+
const char *path)
291+
{
292+
const char *interned_path = strintern(path);
293+
294+
trace_printf_key(&trace_fsmonitor, "event: %s", interned_path);
295+
296+
ALLOC_GROW(batch->interned_paths, batch->nr + 1, batch->alloc);
297+
batch->interned_paths[batch->nr++] = interned_path;
298+
}
299+
300+
static void fsmonitor_batch__combine(struct fsmonitor_batch *batch_dest,
301+
const struct fsmonitor_batch *batch_src)
302+
{
303+
size_t k;
304+
305+
ALLOC_GROW(batch_dest->interned_paths,
306+
batch_dest->nr + batch_src->nr + 1,
307+
batch_dest->alloc);
308+
309+
for (k = 0; k < batch_src->nr; k++)
310+
batch_dest->interned_paths[batch_dest->nr++] =
311+
batch_src->interned_paths[k];
312+
}
313+
314+
static void fsmonitor_free_token_data(struct fsmonitor_token_data *token)
315+
{
316+
if (!token)
317+
return;
318+
319+
assert(token->client_ref_count == 0);
320+
321+
strbuf_release(&token->token_id);
322+
323+
fsmonitor_batch__free_list(token->batch_head);
324+
325+
free(token);
326+
}
327+
328+
/*
329+
* Flush all of our cached data about the filesystem. Call this if we
330+
* lose sync with the filesystem and miss some notification events.
331+
*
332+
* [1] If we are missing events, then we no longer have a complete
333+
* history of the directory (relative to our current start token).
334+
* We should create a new token and start fresh (as if we just
335+
* booted up).
336+
*
337+
* If there are no concurrent threads reading the current token data
338+
* series, we can free it now. Otherwise, let the last reader free
339+
* it.
340+
*
341+
* Either way, the old token data series is no longer associated with
342+
* our state data.
343+
*/
344+
static void with_lock__do_force_resync(struct fsmonitor_daemon_state *state)
345+
{
346+
/* assert current thread holding state->main_lock */
347+
348+
struct fsmonitor_token_data *free_me = NULL;
349+
struct fsmonitor_token_data *new_one = NULL;
350+
351+
new_one = fsmonitor_new_token_data();
352+
353+
if (state->current_token_data->client_ref_count == 0)
354+
free_me = state->current_token_data;
355+
state->current_token_data = new_one;
356+
357+
fsmonitor_free_token_data(free_me);
358+
}
359+
360+
void fsmonitor_force_resync(struct fsmonitor_daemon_state *state)
361+
{
362+
pthread_mutex_lock(&state->main_lock);
363+
with_lock__do_force_resync(state);
364+
pthread_mutex_unlock(&state->main_lock);
365+
}
366+
223367
static ipc_server_application_cb handle_client;
224368

225369
static int handle_client(void *data,
@@ -329,6 +473,81 @@ enum fsmonitor_path_type fsmonitor_classify_path_absolute(
329473
return fsmonitor_classify_path_gitdir_relative(rel);
330474
}
331475

476+
/*
477+
* We try to combine small batches at the front of the batch-list to avoid
478+
* having a long list. This hopefully makes it a little easier when we want
479+
* to truncate and maintain the list. However, we don't want the paths array
480+
* to just keep growing and growing with realloc, so we insert an arbitrary
481+
* limit.
482+
*/
483+
#define MY_COMBINE_LIMIT (1024)
484+
485+
void fsmonitor_publish(struct fsmonitor_daemon_state *state,
486+
struct fsmonitor_batch *batch,
487+
const struct string_list *cookie_names)
488+
{
489+
if (!batch && !cookie_names->nr)
490+
return;
491+
492+
pthread_mutex_lock(&state->main_lock);
493+
494+
if (batch) {
495+
struct fsmonitor_batch *head;
496+
497+
head = state->current_token_data->batch_head;
498+
if (!head) {
499+
BUG("token does not have batch");
500+
} else if (head->pinned_time) {
501+
/*
502+
* We cannot alter the current batch list
503+
* because:
504+
*
505+
* [a] it is being transmitted to at least one
506+
* client and the handle_client() thread has a
507+
* ref-count, but not a lock on the batch list
508+
* starting with this item.
509+
*
510+
* [b] it has been transmitted in the past to
511+
* at least one client such that future
512+
* requests are relative to this head batch.
513+
*
514+
* So, we can only prepend a new batch onto
515+
* the front of the list.
516+
*/
517+
batch->batch_seq_nr = head->batch_seq_nr + 1;
518+
batch->next = head;
519+
state->current_token_data->batch_head = batch;
520+
} else if (!head->batch_seq_nr) {
521+
/*
522+
* Batch 0 is unpinned. See the note in
523+
* `fsmonitor_new_token_data()` about why we
524+
* don't need to accumulate these paths.
525+
*/
526+
fsmonitor_batch__free_list(batch);
527+
} else if (head->nr + batch->nr > MY_COMBINE_LIMIT) {
528+
/*
529+
* The head batch in the list has never been
530+
* transmitted to a client, but folding the
531+
* contents of the new batch onto it would
532+
* exceed our arbitrary limit, so just prepend
533+
* the new batch onto the list.
534+
*/
535+
batch->batch_seq_nr = head->batch_seq_nr + 1;
536+
batch->next = head;
537+
state->current_token_data->batch_head = batch;
538+
} else {
539+
/*
540+
* We are free to add the paths in the given
541+
* batch onto the end of the current head batch.
542+
*/
543+
fsmonitor_batch__combine(head, batch);
544+
fsmonitor_batch__free_list(batch);
545+
}
546+
}
547+
548+
pthread_mutex_unlock(&state->main_lock);
549+
}
550+
332551
static void *fsm_listen__thread_proc(void *_state)
333552
{
334553
struct fsmonitor_daemon_state *state = _state;
@@ -343,6 +562,13 @@ static void *fsm_listen__thread_proc(void *_state)
343562

344563
fsm_listen__loop(state);
345564

565+
pthread_mutex_lock(&state->main_lock);
566+
if (state->current_token_data &&
567+
state->current_token_data->client_ref_count == 0)
568+
fsmonitor_free_token_data(state->current_token_data);
569+
state->current_token_data = NULL;
570+
pthread_mutex_unlock(&state->main_lock);
571+
346572
trace2_thread_exit();
347573
return NULL;
348574
}

fsmonitor--daemon.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,27 @@
1212
struct fsmonitor_batch;
1313
struct fsmonitor_token_data;
1414

15+
/*
16+
* Create a new batch of path(s). The returned batch is considered
17+
* private and not linked into the fsmonitor daemon state. The caller
18+
* should fill this batch with one or more paths and then publish it.
19+
*/
20+
struct fsmonitor_batch *fsmonitor_batch__new(void);
21+
22+
/*
23+
* Free the list of batches starting with this one.
24+
*/
25+
void fsmonitor_batch__free_list(struct fsmonitor_batch *batch);
26+
27+
/*
28+
* Add this path to this batch of modified files.
29+
*
30+
* The batch should be private and NOT (yet) linked into the fsmonitor
31+
* daemon state and therefore not yet visible to worker threads and so
32+
* no locking is required.
33+
*/
34+
void fsmonitor_batch__add_path(struct fsmonitor_batch *batch, const char *path);
35+
1536
struct fsmonitor_daemon_backend_data; /* opaque platform-specific data */
1637

1738
struct fsmonitor_daemon_state {
@@ -117,5 +138,24 @@ enum fsmonitor_path_type fsmonitor_classify_path_absolute(
117138
struct fsmonitor_daemon_state *state,
118139
const char *path);
119140

141+
/*
142+
* Prepend the this batch of path(s) onto the list of batches associated
143+
* with the current token. This makes the batch visible to worker threads.
144+
*
145+
* The caller no longer owns the batch and must not free it.
146+
*
147+
* Wake up the client threads waiting on these cookies.
148+
*/
149+
void fsmonitor_publish(struct fsmonitor_daemon_state *state,
150+
struct fsmonitor_batch *batch,
151+
const struct string_list *cookie_names);
152+
153+
/*
154+
* If the platform-specific layer loses sync with the filesystem,
155+
* it should call this to invalidate cached data and abort waiting
156+
* threads.
157+
*/
158+
void fsmonitor_force_resync(struct fsmonitor_daemon_state *state);
159+
120160
#endif /* HAVE_FSMONITOR_DAEMON_BACKEND */
121161
#endif /* FSMONITOR_DAEMON_H */

0 commit comments

Comments
 (0)