File tree Expand file tree Collapse file tree 2 files changed +32
-3
lines changed
Expand file tree Collapse file tree 2 files changed +32
-3
lines changed Original file line number Diff line number Diff line change 11"""Generic data types for information retrieval"""
22
33from abc import ABC , abstractmethod
4+ from enum import Enum
45from functools import cached_property
56import logging
67from pathlib import Path
@@ -88,6 +89,19 @@ def document_recordtype(self) -> Type[DocumentRecord]:
8889 ...
8990
9091
92+ class FileAccess (Enum ):
93+ """Defines how to access files (e.g. for document stores)"""
94+
95+ FILE = 0
96+ """Direct file access"""
97+
98+ MMAP = 1
99+ """Use mmap"""
100+
101+ MEMORY = 2
102+ """Use memory"""
103+
104+
91105class DocumentStore (Documents ):
92106 """A document store
93107
@@ -97,6 +111,10 @@ class DocumentStore(Documents):
97111 - return the number of documents
98112 """
99113
114+ file_access : Meta [FileAccess ] = FileAccess .MMAP
115+ """How to access the file collection (might not have any impact, depends on
116+ the docstore)"""
117+
100118 def docid_internal2external (self , docid : int ):
101119 """Converts an internal collection ID (integer) to an external ID"""
102120 raise NotImplementedError (f"For class { self .__class__ } " )
@@ -327,5 +345,4 @@ class PairwiseSampleDataset(Base, ABC):
327345 """Datasets where each record is a query with positive and negative samples"""
328346
329347 @abstractmethod
330- def iter (self ) -> Iterator [PairwiseSample ]:
331- ...
348+ def iter (self ) -> Iterator [PairwiseSample ]: ...
Original file line number Diff line number Diff line change @@ -215,7 +215,19 @@ def documentcount(self):
215215
216216 @cached_property
217217 def store (self ):
218- return self .dataset .docs_store ()
218+ kwargs = {}
219+ try :
220+ # Translate to ir datasets docstore options
221+ import ir_datasets .indices as ir_indices
222+ file_access = {
223+ ir .FileAccess .MMAP : ir_indices .FileAccess .MMAP ,
224+ ir .FileAccess .FILE : ir_indices .FileAccess .FILE ,
225+ ir .FileAccess .MEMORY : ir_indices .FileAccess .MEMORY
226+ }[self .file_access ]
227+ kwargs = {"options" : ir_indices .DocstoreOptions (file_access = file_access )}
228+ except ImportError :
229+ logging .warning ("This version of ir-datasets cannot handle docstore options" )
230+ return self .dataset .docs_store (** kwargs )
219231
220232 @cached_property
221233 def _docs (self ):
You can’t perform that action at this time.
0 commit comments