triblespace · somethingelseentirely · Aug 7, 2025
diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
@@ -106,7 +106,6 @@ The schema defines all of the fields that the indexes [`Document`](src/schema/do
 
 Depending on the type of the field, you can decide to
 
-- put it in the docstore
 - store it as a fast field
 - index it
 
@@ -135,29 +134,6 @@ This conversion is done by the serializer.
 Finally, the reader is in charge of offering an API to read on this on-disk read-only representation.
 In tantivy, readers are designed to require very little anonymous memory. The data is read straight from an mmapped file, and loading an index is as fast as mmapping its files.
 
-## [store/](src/store): Here is my DocId, Gimme my document
-
-The docstore is a row-oriented storage that, for each document, stores a subset of the fields
-that are marked as stored in the schema. The docstore is compressed using a general-purpose algorithm
-like LZ4.
-
-**Useful for**
-
-In search engines, it is often used to display search results.
-Once the top 10 documents have been identified, we fetch them from the store, and display them or their snippet on the search result page (aka SERP).
-
-**Not useful for**
-
-Fetching a document from the store is typically a "slow" operation. It usually consists in
-
-- searching into a compact tree-like data structure to find the position of the right block.
-- decompressing a small block
-- returning the document from this block.
-
-It is NOT meant to be called for every document matching a query.
-
-As a rule of thumb, if you hit the docstore more than 100 times per search query, you are probably misusing tantivy.
-
 ## [fastfield/](src/fastfield): Here is my DocId, Gimme my value
 
 Fast fields are stored in a column-oriented storage that allows for random access.

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,6 +14,7 @@ have been removed to keep the changelog focused on Yeehaw's history.
 - handle unknown column codes gracefully in `ColumnarReader::iter_columns`.
 
 ## Features/Improvements
+- drop docstore module and references in preparation for trible.space rewrite.
 - remove `quickwit` feature flag and related async code.
 - add docs/example and Vec<u32> values to sstable [#2660](https://github.com/quickwit-oss/yeehaw/pull/2660)(@PSeitz)
 - Add string fast field support to `TopDocs`. [#2642](https://github.com/quickwit-oss/yeehaw/pull/2642)(@stuhood)

diff --git a/INVENTORY.md b/INVENTORY.md
@@ -20,23 +20,19 @@ This document outlines the long term plan to rewrite this project so that it rel
    - Replace the `Directory` abstraction with a backend that reads and writes blobs via the Trible Space `BlobStore`.
    - Index writers and readers operate on blob handles instead of filesystem paths.
 
-3. **Drop the docstore module**
-   - Primary documents are kept in Trible Space; segments no longer store their own row oriented docstore.
-   - Search results fetch documents via blob handles.
-
-4. **Remove `Opstamp` and use commit handles**
+3. **Remove `Opstamp` and use commit handles**
    - Commits record the segments they include.
    - Merges rely on commit ancestry instead of monotonic operation stamps.
 
-5. **Introduce 128-bit IDs with `Universe` mapping**
+4. **Introduce 128-bit IDs with `Universe` mapping**
    - Map external `u128` identifiers to compact `DocId` values.
    - Persist the mapping so search results can translate back.
 
-6. **Typed DSL for fuzzy search**
+5. **Typed DSL for fuzzy search**
    - Generate search filters from Trible namespaces.
    - Provide macros that participate in both `find!` queries and full text search.
 
-7. **Index update merge workflow**
+6. **Index update merge workflow**
    - Wrap indexing operations in workspace commits.
    - Use Trible's compare-and-swap push mechanism so multiple writers merge gracefully.
 

diff --git a/src/index/index.rs b/src/index/index.rs
@@ -95,11 +95,7 @@ fn save_new_metas(
 /// );
 ///
 /// let schema = schema_builder.build();
-/// let settings = IndexSettings{
-///     docstore_blocksize: 100_000,
-///     ..Default::default()
-/// };
-/// let index = Index::builder().schema(schema).settings(settings).create_in_ram();
+/// let index = Index::builder().schema(schema).create_in_ram();
 /// ```
 pub struct IndexBuilder {
     schema: Option<Schema>,

diff --git a/src/index/index_meta.rs b/src/index/index_meta.rs
@@ -1,15 +1,12 @@
 use std::collections::HashSet;
 use std::fmt;
 use std::path::PathBuf;
-use std::sync::atomic::AtomicBool;
-use std::sync::Arc;
 
 use serde::{Deserialize, Serialize};
 
 use super::SegmentComponent;
 use crate::index::SegmentId;
 use crate::schema::Schema;
-use crate::store::Compressor;
 use crate::{Inventory, Opstamp, TrackedObject};
 
 #[derive(Clone, Debug, Serialize, Deserialize)]
@@ -37,7 +34,6 @@ impl SegmentMetaInventory {
         let inner = InnerSegmentMeta {
             segment_id,
             max_doc,
-            include_temp_doc_store: Arc::new(AtomicBool::new(true)),
             deletes: None,
         };
         SegmentMeta::from(self.inventory.track(inner))
@@ -85,15 +81,6 @@ impl SegmentMeta {
         self.tracked.segment_id
     }
 
-    /// Removes the Component::TempStore from the alive list and
-    /// therefore marks the temp docstore file to be deleted by
-    /// the garbage collection.
-    pub fn untrack_temp_docstore(&self) {
-        self.tracked
-            .include_temp_doc_store
-            .store(false, std::sync::atomic::Ordering::Relaxed);
-    }
-
     /// Returns the number of deleted documents.
     pub fn num_deleted_docs(&self) -> u32 {
         self.tracked
@@ -111,20 +98,9 @@ impl SegmentMeta {
     /// is by removing all files that have been created by tantivy
     /// and are not used by any segment anymore.
     pub fn list_files(&self) -> HashSet<PathBuf> {
-        if self
-            .tracked
-            .include_temp_doc_store
-            .load(std::sync::atomic::Ordering::Relaxed)
-        {
-            SegmentComponent::iterator()
-                .map(|component| self.relative_path(*component))
-                .collect::<HashSet<PathBuf>>()
-        } else {
-            SegmentComponent::iterator()
-                .filter(|comp| *comp != &SegmentComponent::TempStore)
-                .map(|component| self.relative_path(*component))
-                .collect::<HashSet<PathBuf>>()
-        }
+        SegmentComponent::iterator()
+            .map(|component| self.relative_path(*component))
+            .collect::<HashSet<PathBuf>>()
     }
 
     /// Returns the relative path of a component of our segment.
@@ -137,8 +113,6 @@ impl SegmentMeta {
             SegmentComponent::Postings => ".idx".to_string(),
             SegmentComponent::Positions => ".pos".to_string(),
             SegmentComponent::Terms => ".term".to_string(),
-            SegmentComponent::Store => ".store".to_string(),
-            SegmentComponent::TempStore => ".store.temp".to_string(),
             SegmentComponent::FastFields => ".fast".to_string(),
             SegmentComponent::FieldNorms => ".fieldnorm".to_string(),
             SegmentComponent::Delete => format!(".{}.del", self.delete_opstamp().unwrap_or(0)),
@@ -183,7 +157,6 @@ impl SegmentMeta {
             segment_id: inner_meta.segment_id,
             max_doc,
             deletes: None,
-            include_temp_doc_store: Arc::new(AtomicBool::new(true)),
         });
         SegmentMeta { tracked }
     }
@@ -202,7 +175,6 @@ impl SegmentMeta {
         let tracked = self.tracked.map(move |inner_meta| InnerSegmentMeta {
             segment_id: inner_meta.segment_id,
             max_doc: inner_meta.max_doc,
-            include_temp_doc_store: Arc::new(AtomicBool::new(true)),
             deletes: Some(delete_meta),
         });
         SegmentMeta { tracked }
@@ -214,14 +186,6 @@ struct InnerSegmentMeta {
     segment_id: SegmentId,
     max_doc: u32,
     deletes: Option<DeleteMeta>,
-    /// If you want to avoid the SegmentComponent::TempStore file to be covered by
-    /// garbage collection and deleted, set this to true. This is used during merge.
-    #[serde(skip)]
-    #[serde(default = "default_temp_store")]
-    pub(crate) include_temp_doc_store: Arc<AtomicBool>,
-}
-fn default_temp_store() -> Arc<AtomicBool> {
-    Arc::new(AtomicBool::new(false))
 }
 
 impl InnerSegmentMeta {
@@ -232,48 +196,9 @@ impl InnerSegmentMeta {
     }
 }
 
-fn return_true() -> bool {
-    true
-}
-
-fn is_true(val: &bool) -> bool {
-    *val
-}
-
 /// Search Index Settings.
-///
-/// Contains settings which are applied on the whole
-/// index, like presort documents.
-#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]
-pub struct IndexSettings {
-    /// The `Compressor` used to compress the doc store.
-    #[serde(default)]
-    pub docstore_compression: Compressor,
-    /// If set to true, docstore compression will happen on a dedicated thread.
-    /// (defaults: true)
-    #[doc(hidden)]
-    #[serde(default = "return_true")]
-    #[serde(skip_serializing_if = "is_true")]
-    pub docstore_compress_dedicated_thread: bool,
-    #[serde(default = "default_docstore_blocksize")]
-    /// The size of each block that will be compressed and written to disk
-    pub docstore_blocksize: usize,
-}
-
-/// Must be a function to be compatible with serde defaults
-fn default_docstore_blocksize() -> usize {
-    16_384
-}
-
-impl Default for IndexSettings {
-    fn default() -> Self {
-        Self {
-            docstore_compression: Compressor::default(),
-            docstore_blocksize: default_docstore_blocksize(),
-            docstore_compress_dedicated_thread: true,
-        }
-    }
-}
+#[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq, Default)]
+pub struct IndexSettings {}
 
 /// The order to sort by
 #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)]

diff --git a/src/indexer/segment_serializer.rs b/src/indexer/segment_serializer.rs
@@ -4,13 +4,11 @@ use crate::directory::WritePtr;
 use crate::fieldnorm::FieldNormsSerializer;
 use crate::index::{Segment, SegmentComponent};
 use crate::postings::InvertedIndexSerializer;
-use crate::store::StoreWriter;
 
 /// Segment serializer is in charge of laying out on disk
 /// the data accumulated and sorted by the `SegmentWriter`.
 pub struct SegmentSerializer {
     segment: Segment,
-    pub(crate) store_writer: StoreWriter,
     fast_field_write: WritePtr,
     fieldnorms_serializer: Option<FieldNormsSerializer>,
     postings_serializer: InvertedIndexSerializer,
@@ -19,17 +17,6 @@ pub struct SegmentSerializer {
 impl SegmentSerializer {
     /// Creates a new `SegmentSerializer`.
     pub fn for_segment(mut segment: Segment) -> crate::Result<SegmentSerializer> {
-        let settings = segment.index().settings().clone();
-        let store_writer = {
-            let store_write = segment.open_write(SegmentComponent::Store)?;
-            StoreWriter::new(
-                store_write,
-                settings.docstore_compression,
-                settings.docstore_blocksize,
-                settings.docstore_compress_dedicated_thread,
-            )?
-        };
-
         let fast_field_write = segment.open_write(SegmentComponent::FastFields)?;
 
         let fieldnorms_write = segment.open_write(SegmentComponent::FieldNorms)?;
@@ -38,7 +25,6 @@ impl SegmentSerializer {
         let postings_serializer = InvertedIndexSerializer::open(&mut segment)?;
         Ok(SegmentSerializer {
             segment,
-            store_writer,
             fast_field_write,
             fieldnorms_serializer: Some(fieldnorms_serializer),
             postings_serializer,
@@ -47,7 +33,7 @@ impl SegmentSerializer {
 
     /// The memory used (inclusive childs)
     pub fn mem_usage(&self) -> usize {
-        self.store_writer.mem_usage()
+        0
     }
 
     pub fn segment(&self) -> &Segment {
@@ -71,19 +57,13 @@ impl SegmentSerializer {
         self.fieldnorms_serializer.take()
     }
 
-    /// Accessor to the `StoreWriter`.
-    pub fn get_store_writer(&mut self) -> &mut StoreWriter {
-        &mut self.store_writer
-    }
-
     /// Finalize the segment serialization.
     pub fn close(mut self) -> crate::Result<()> {
         if let Some(fieldnorms_serializer) = self.extract_fieldnorms_serializer() {
             fieldnorms_serializer.close()?;
         }
         self.fast_field_write.terminate()?;
         self.postings_serializer.close()?;
-        self.store_writer.close()?;
         Ok(())
     }
 }
diff --git a/src/lib.rs b/src/lib.rs
@@ -196,7 +196,6 @@ pub mod postings;
 pub mod query;
 pub mod schema;
 pub mod space_usage;
-pub mod store;
 pub mod termdict;
 
 mod docset;
@@ -267,13 +266,13 @@ impl fmt::Display for Version {
 static VERSION_STRING: Lazy<String> = Lazy::new(|| VERSION.to_string());
 
 /// Expose the current version of tantivy as found in Cargo.toml during compilation.
-/// eg. "0.11.0" as well as the compression scheme used in the docstore.
+/// eg. "0.11.0".
 pub fn version() -> &'static Version {
     &VERSION
 }
 
 /// Exposes the complete version of tantivy as found in Cargo.toml during compilation as a string.
-/// eg. "tantivy v0.11.0, index_format v1, store_compression: lz4".
+/// eg. "tantivy v0.11.0, index_format v1".
 pub fn version_string() -> &'static str {
     VERSION_STRING.as_str()
 }

diff --git a/src/store/compression_lz4_block.rs b/src/store/compression_lz4_block.rs