Skip to content
This repository was archived by the owner on Oct 15, 2025. It is now read-only.

Commit 95c501d

Browse files
authored
feat: optional import of decord, fix for sentence feature extractor (#852)
👋 Thanks for submitting a Pull Request to EvaDB! 🙌 We want to make contributing to EvaDB as easy and transparent as possible. Here are a few tips to get you started: - 🔍 Search existing EvaDB [PRs](https://github.com/georgia-tech-db/eva/pulls) to see if a similar PR already exists. - 🔗 Link this PR to a EvaDB [issue](https://github.com/georgia-tech-db/eva/issues) to help us understand what bug fix or feature is being implemented. - 📈 Provide before and after profiling results to help us quantify the improvement your PR provides (if applicable). 👉 Please see our ✅ [Contributing Guide](https://evadb.readthedocs.io/en/stable/source/contribute/index.html) for more details.
1 parent 0c463d3 commit 95c501d

File tree

12 files changed

+110
-146
lines changed

12 files changed

+110
-146
lines changed

.circleci/config.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,8 @@ workflows:
5959
mode: "COV"
6060
- Windows:
6161
name: "Windows | v3.10"
62-
# test_create_index_doesn't work on MacOS
63-
# - MacOS:
64-
# name: "MacOS | v3.10"
62+
- MacOS:
63+
name: "MacOS | v3.10"
6564
# missing Torchvision
6665
#- Linux:
6766
# name: "Linux - v3.11"

docs/source/overview/faq.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ Where does EvaDB store all the data?
1717

1818
By default, EvaDB stores all the data in a local folder named ``evadb_data``.
1919

20+
pip install ray fails because of grpcio
21+
=======================================
22+
23+
Follow these instructions to install ``ray``:
24+
https://github.com/ray-project/ray/issues/33039
2025

2126
Why does the EvaDB server not start?
2227
====================================

evadb/readers/decord_reader.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,9 @@
2121
from evadb.expression.abstract_expression import AbstractExpression
2222
from evadb.expression.expression_utils import extract_range_list_from_predicate
2323
from evadb.readers.abstract_reader import AbstractReader
24+
from evadb.utils.generic_utils import try_import_decord
2425
from evadb.utils.logging_manager import logger
2526

26-
# Lazy import to avoid torch init failures
27-
_decord = None
28-
29-
30-
def _lazy_import_decord():
31-
global _decord
32-
if _decord is None:
33-
import decord
34-
35-
_decord = decord
36-
return _decord
37-
3827

3928
class DecordReader(AbstractReader):
4029
def __init__(
@@ -106,7 +95,9 @@ def _read(self) -> Iterator[Dict]:
10695
yield self._get_frame(frame_id)
10796

10897
def initialize_reader(self):
109-
decord = _lazy_import_decord()
98+
try_import_decord()
99+
import decord
100+
110101
if self._read_audio:
111102
assert (
112103
self._sampling_type != IFRAMES

evadb/third_party/huggingface/model.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,11 @@
1414
# limitations under the License.
1515
from typing import Any
1616

17-
import decord
1817
import numpy as np
1918
from PIL import Image
2019

2120
from evadb.udfs.abstract.hf_abstract_udf import AbstractHFUdf
22-
from evadb.utils.generic_utils import EvaDBEnum
21+
from evadb.utils.generic_utils import EvaDBEnum, try_import_decord
2322

2423

2524
class HFInputTypes(EvaDBEnum):
@@ -64,6 +63,10 @@ def input_formatter(self, inputs: Any):
6463
# else expect that the user passed an array of video file paths, get audio as numpy array
6564
audio = []
6665
files = inputs.iloc[:, 0].tolist()
66+
67+
try_import_decord()
68+
import decord
69+
6770
for file in files:
6871
# must read audio at 16000Hz because most models were trained at this sampling rate
6972
reader = decord.AudioReader(file, mono=True, sample_rate=16000)

evadb/udfs/sentence_feature_extractor.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def setup(self):
3232
"sentence-transformers/all-MiniLM-L6-v2"
3333
)
3434
self.model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
35+
self.model_device = None
3536

3637
def to_device(self, device: str) -> GPUCompatible:
3738
self.model_device = device
@@ -65,7 +66,8 @@ def _forward(row: pd.Series) -> np.ndarray:
6566
encoded_input = self.tokenizer(
6667
[sentence], padding=True, truncation=True, return_tensors="pt"
6768
)
68-
encoded_input.to(self.model_device)
69+
if self.model_device is not None:
70+
encoded_input.to(self.model_device)
6971
with torch.no_grad():
7072
model_output = self.model(**encoded_input)
7173

evadb/udfs/sentence_transformer_feature_extractor.py

Lines changed: 0 additions & 63 deletions
This file was deleted.

evadb/udfs/udf_bootstrap_queries.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,11 @@
193193
EvaDB_INSTALLATION_DIR
194194
)
195195

196+
yolo8n_query = """CREATE UDF IF NOT EXISTS Yolo
197+
TYPE ultralytics
198+
'model' 'yolov8n.pt';
199+
"""
200+
196201

197202
def init_builtin_udfs(db: EvaDBDatabase, mode: str = "debug") -> None:
198203
"""Load the built-in UDFs into the system during system bootstrapping.
@@ -234,15 +239,10 @@ def init_builtin_udfs(db: EvaDBDatabase, mode: str = "debug") -> None:
234239
DummyObjectDetector_udf_query,
235240
DummyMultiObjectDetector_udf_query,
236241
DummyFeatureExtractor_udf_query,
242+
yolo8n_query,
237243
]
238244
)
239245

240-
yolo8n = """CREATE UDF IF NOT EXISTS Yolo
241-
TYPE ultralytics
242-
'model' 'yolov8n.pt';
243-
"""
244-
queries.append(yolo8n)
245-
246246
# execute each query in the list of UDF queries
247247
for query in queries:
248248
execute_query_fetch_all(db, query)

evadb/utils/generic_utils.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,3 +218,18 @@ def remove_directory_contents(dir_path):
218218
shutil.rmtree(file_path)
219219
except Exception as e:
220220
logger.warning(f"Failed to delete {file_path}. Reason: {str(e)}")
221+
222+
223+
##############################
224+
225+
## TRY TO IMPORT PACKAGES
226+
227+
228+
def try_import_decord():
229+
try:
230+
import decord # noqa: F401
231+
except ImportError:
232+
raise ValueError(
233+
"""Could not import decord python package.
234+
Please install it with `pip install eva-decord`."""
235+
)

script/test/package.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ if [ "$test_code" -ne 0 ];
3838
then
3939
echo "Server did not start"
4040
echo "$test_code"
41+
cat evadb.log
4142
exit "$test_code"
4243
fi
4344

@@ -58,6 +59,7 @@ if [ "$?" -ne 1 ];
5859
then
5960
echo "Client did not start"
6061
echo "$test_code"
62+
cat client.log
6163
exit "$test_code"
6264
fi
6365

setup.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,12 @@ def read(path, encoding="utf-8"):
5252
"retry>=0.9.2",
5353
"aenum>=2.2.0",
5454
"diskcache>=5.4.0",
55-
"eva-decord>=0.6.1",
5655
"boto3",
5756
"nest_asyncio",
5857
"langchain",
5958
"pymupdf",
6059
"pdfminer.six",
6160
"sentence-transformers"
62-
6361
]
6462

6563
formatter_libs = ["black>=23.1.0", "isort>=5.10.1"]
@@ -120,8 +118,9 @@ def read(path, encoding="utf-8"):
120118
"norfair>=2.2.0", # OBJECT TRACKING
121119
]
122120

123-
### NEEDED FOR EXPERIMENTAL FEATURES
121+
### NEEDED FOR A BATTERIES-LOADED EXPERIENCE
124122
third_party_libs = [
123+
"eva-decord>=0.6.1", # for processing videos
125124
"qdrant-client>=1.1.7", # Qdrant vector store client
126125
"kornia", # SIFT features
127126
"langchain>=0.0.177", # langchain document loaders

0 commit comments

Comments
 (0)