Skip to content

Commit f3fffb9

Browse files
authored
Lock metadata on first event. (#142)
When multiple requests reaches event processing too quickly then many threads race to register first event. This is problematic as it means server may drop many request after erroring/panicking on local writer. This PR fixes this issue by locking metadata with write lock beforehand in case of first event. This prevents other thread from reading metadata and thus blocks other requests until first event has processed.
1 parent dd24e09 commit f3fffb9

File tree

6 files changed

+95
-24
lines changed

6 files changed

+95
-24
lines changed

server/src/event.rs

Lines changed: 86 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
*
1717
*
1818
*/
19+
use actix_web::rt::spawn;
1920
use datafusion::arrow;
2021
use datafusion::arrow::datatypes::Schema;
2122
use datafusion::arrow::error::ArrowError;
@@ -24,17 +25,19 @@ use datafusion::arrow::json;
2425
use datafusion::arrow::json::reader::infer_json_schema;
2526
use datafusion::arrow::record_batch::RecordBatch;
2627
use lazy_static::lazy_static;
27-
use log::error;
2828
use std::collections::HashMap;
2929
use std::fs::OpenOptions;
3030
use std::io::BufReader;
31+
use std::ops::{Deref, DerefMut};
3132
use std::sync::Arc;
3233
use std::sync::Mutex;
3334
use std::sync::MutexGuard;
3435
use std::sync::RwLock;
3536

3637
use crate::metadata;
38+
use crate::metadata::LOCK_EXPECT;
3739
use crate::option::CONFIG;
40+
use crate::s3;
3841
use crate::storage::ObjectStorage;
3942

4043
use self::error::EventError;
@@ -190,7 +193,7 @@ pub struct Event {
190193
// Events holds the schema related to a each event for a single log stream
191194

192195
impl Event {
193-
pub async fn process(&self, storage: &impl ObjectStorage) -> Result<(), EventError> {
196+
pub async fn process(&self) -> Result<(), EventError> {
194197
let inferred_schema = self.infer_schema()?;
195198

196199
let event = self.get_reader(inferred_schema.clone());
@@ -207,12 +210,11 @@ impl Event {
207210
} else {
208211
// if stream schema is none then it is first event,
209212
// process first event and store schema in obect store
210-
self.process_first_event(event, inferred_schema, storage)
211-
.await?
213+
self.process_first_event::<s3::S3, _>(event, inferred_schema)?
212214
};
213215

214216
if let Err(e) = metadata::STREAM_INFO.check_alerts(self).await {
215-
error!("Error checking for alerts. {:?}", e);
217+
log::error!("Error checking for alerts. {:?}", e);
216218
}
217219

218220
Ok(())
@@ -221,25 +223,67 @@ impl Event {
221223
// This is called when the first event of a log stream is received. The first event is
222224
// special because we parse this event to generate the schema for the log stream. This
223225
// schema is then enforced on rest of the events sent to this log stream.
224-
async fn process_first_event<R: std::io::Read>(
226+
fn process_first_event<S: ObjectStorage, R: std::io::Read>(
225227
&self,
226228
mut event: json::Reader<R>,
227229
schema: Schema,
228-
storage: &impl ObjectStorage,
229230
) -> Result<u64, EventError> {
230-
let rb = event.next()?.ok_or(EventError::MissingRecord)?;
231-
let stream_name = &self.stream_name;
232-
233-
// Store record batch on local cache
234-
STREAM_WRITERS::create_entry(stream_name.clone(), &rb).unwrap();
231+
// note for functions _schema_with_map and _set_schema_with_map,
232+
// these are to be called while holding a write lock specifically.
233+
// this guarantees two things
234+
// - no other metadata operation can happen inbetween
235+
// - map always have an entry for this stream
235236

236-
// Put the inferred schema to object store
237-
storage.put_schema(stream_name.clone(), &schema).await?;
237+
let stream_name = &self.stream_name;
238238

239-
// set the schema in memory for this stream
240-
metadata::STREAM_INFO.set_schema(stream_name, schema)?;
239+
let mut stream_metadata = metadata::STREAM_INFO.write().expect(LOCK_EXPECT);
240+
// if the metadata is not none after acquiring lock
241+
// then some other thread has already completed this function.
242+
if _schema_with_map(stream_name, &stream_metadata).is_some() {
243+
// drop the lock
244+
drop(stream_metadata);
245+
// Try to post event usual way
246+
log::info!("first event is redirected to process_event");
247+
self.process_event(event)
248+
} else {
249+
// stream metadata is still none,
250+
// this means this execution should be considered as first event.
251+
252+
// Store record batch on local cache
253+
log::info!("creating local writer for this first event");
254+
let rb = event.next()?.ok_or(EventError::MissingRecord)?;
255+
STREAM_WRITERS::append_to_local(stream_name, &rb)?;
256+
257+
log::info!("schema is set in memory map for logstream {}", stream_name);
258+
_set_schema_with_map(stream_name, schema.clone(), &mut stream_metadata);
259+
// drop mutex before going across await point
260+
drop(stream_metadata);
261+
262+
log::info!(
263+
"setting schema on objectstore for logstream {}",
264+
stream_name
265+
);
266+
let storage = S::new();
267+
268+
let stream_name = stream_name.clone();
269+
spawn(async move {
270+
if let Err(e) = storage.put_schema(stream_name.clone(), &schema).await {
271+
// If this call has failed then currently there is no right way to make local state consistent
272+
// this needs a fix after more constraints are safety guarentee is provided by localwriter and s3_sync.
273+
// Reasoning -
274+
// - After dropping lock many events may process through
275+
// - Processed events may sync before metadata deletion
276+
log::error!(
277+
"Parseable failed to upload schema to objectstore due to error {}",
278+
e
279+
);
280+
log::error!("Please manually delete this logstream and create a new one.");
281+
metadata::STREAM_INFO.delete_stream(&stream_name);
282+
}
283+
});
241284

242-
Ok(0)
285+
Ok(0)
286+
}
243287
}
244288

245289
// event process all events after the 1st event. Concatenates record batches
@@ -273,6 +317,31 @@ impl Event {
273317
}
274318
}
275319

320+
// Special functions which reads from metadata map while holding the lock
321+
#[inline]
322+
pub fn _schema_with_map(
323+
stream_name: &str,
324+
map: &impl Deref<Target = HashMap<String, metadata::LogStreamMetadata>>,
325+
) -> Option<Schema> {
326+
map.get(stream_name)
327+
.expect("map has entry for this stream name")
328+
.schema
329+
.to_owned()
330+
}
331+
332+
#[inline]
333+
// Special functions which writes to metadata map while holding the lock
334+
pub fn _set_schema_with_map(
335+
stream_name: &str,
336+
schema: Schema,
337+
map: &mut impl DerefMut<Target = HashMap<String, metadata::LogStreamMetadata>>,
338+
) {
339+
map.get_mut(stream_name)
340+
.expect("map has entry for this stream name")
341+
.schema
342+
.replace(schema);
343+
}
344+
276345
pub mod error {
277346
use crate::metadata::error::stream_info::MetadataError;
278347
use crate::storage::ObjectStorageError;

server/src/handlers/event.rs

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,20 +64,18 @@ pub async fn post_event(
6464
collect_labelled_headers(&req, PREFIX_META, SEPARATOR)?,
6565
)]);
6666

67-
let s3 = S3::new();
68-
6967
if let Some(array) = body.as_array() {
7068
for body in array {
7169
let body = merge(body.clone(), metadata.clone());
7270
let body = merge(body, tags.clone());
7371
let body = flatten_json_body(web::Json(body)).unwrap();
7472

75-
let e = event::Event {
73+
let event = event::Event {
7674
body,
7775
stream_name: stream_name.clone(),
7876
};
7977

80-
e.process(&s3).await?;
78+
event.process().await?;
8179
}
8280
} else {
8381
let body = merge(body.clone(), metadata);
@@ -88,7 +86,7 @@ pub async fn post_event(
8886
stream_name,
8987
};
9088

91-
event.process(&s3).await?;
89+
event.process().await?;
9290
}
9391

9492
Ok(HttpResponse::Ok().finish())

server/src/handlers/logstream.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,6 @@ pub async fn put(req: HttpRequest) -> HttpResponse {
168168
if s3.get_schema(&stream_name).await.is_err() {
169169
// Fail if unable to create log stream on object store backend
170170
if let Err(e) = s3.create_stream(&stream_name).await {
171-
// delete the stream from metadata because we couldn't create it on object store backend
172-
metadata::STREAM_INFO.delete_stream(&stream_name);
173171
return response::ServerResponse {
174172
msg: format!(
175173
"failed to create log stream {} due to err: {}",

server/src/metadata.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ impl STREAM_INFO {
9090
Ok(())
9191
}
9292

93+
#[allow(dead_code)]
9394
pub fn set_schema(&self, stream_name: &str, schema: Schema) -> Result<(), MetadataError> {
9495
let mut map = self.write().expect(LOCK_EXPECT);
9596
map.get_mut(stream_name)

server/src/s3.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,10 @@ impl S3 {
365365

366366
#[async_trait]
367367
impl ObjectStorage for S3 {
368+
fn new() -> Self {
369+
Self::new()
370+
}
371+
368372
async fn check(&self) -> Result<(), ObjectStorageError> {
369373
self.client
370374
.head_bucket()

server/src/storage.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ pub const OBJECT_STORE_DATA_GRANULARITY: u32 = (LOCAL_SYNC_INTERVAL as u32) / 60
5252

5353
#[async_trait]
5454
pub trait ObjectStorage: Sync + 'static {
55+
fn new() -> Self;
5556
async fn check(&self) -> Result<(), ObjectStorageError>;
5657
async fn put_schema(
5758
&self,

0 commit comments

Comments
 (0)