|
4 | 4 | import itertools |
5 | 5 | import json |
6 | 6 | import logging |
7 | | -import mimetypes |
8 | 7 | import re |
9 | 8 | import uuid |
10 | | -from datetime import datetime, timezone |
11 | 9 | from pathlib import Path |
12 | 10 |
|
13 | 11 | import numpy as np |
|
26 | 24 | from .heading import Heading |
27 | 25 | from .settings import config |
28 | 26 | from .staged_insert import staged_insert1 as _staged_insert1 |
29 | | -from .storage import StorageBackend, build_object_path, verify_or_create_store_metadata |
30 | 27 | from .utils import get_master, is_camel_case, user_choice |
31 | 28 |
|
32 | 29 | logger = logging.getLogger(__name__.split(".")[0]) |
@@ -255,179 +252,6 @@ def full_table_name(self): |
255 | 252 | ) |
256 | 253 | return r"`{0:s}`.`{1:s}`".format(self.database, self.table_name) |
257 | 254 |
|
258 | | - @property |
259 | | - def object_storage(self) -> StorageBackend | None: |
260 | | - """Get the default object storage backend for this table.""" |
261 | | - return self.get_object_storage() |
262 | | - |
263 | | - def get_object_storage(self, store_name: str | None = None) -> StorageBackend | None: |
264 | | - """ |
265 | | - Get the object storage backend for a specific store. |
266 | | -
|
267 | | - Args: |
268 | | - store_name: Name of the store (None for default store) |
269 | | -
|
270 | | - Returns: |
271 | | - StorageBackend instance or None if not configured |
272 | | - """ |
273 | | - cache_key = f"_object_storage_{store_name or 'default'}" |
274 | | - if not hasattr(self, cache_key): |
275 | | - try: |
276 | | - spec = config.get_object_store_spec(store_name) |
277 | | - backend = StorageBackend(spec) |
278 | | - # Verify/create store metadata on first use |
279 | | - verify_or_create_store_metadata(backend, spec) |
280 | | - setattr(self, cache_key, backend) |
281 | | - except DataJointError: |
282 | | - setattr(self, cache_key, None) |
283 | | - return getattr(self, cache_key) |
284 | | - |
285 | | - def _process_object_value(self, name: str, value, row: dict, store_name: str | None = None) -> str: |
286 | | - """ |
287 | | - Process an object attribute value for insert. |
288 | | -
|
289 | | - Args: |
290 | | - name: Attribute name |
291 | | - value: Input value (file path, folder path, or (ext, stream) tuple) |
292 | | - row: The full row dict (needed for primary key values) |
293 | | - store_name: Name of the object store (None for default store) |
294 | | -
|
295 | | - Returns: |
296 | | - JSON string for database storage |
297 | | - """ |
298 | | - backend = self.get_object_storage(store_name) |
299 | | - if backend is None: |
300 | | - store_desc = f"'{store_name}'" if store_name else "default" |
301 | | - raise DataJointError( |
302 | | - f"Object storage ({store_desc}) is not configured. Set object_storage settings in datajoint.json " |
303 | | - "or DJ_OBJECT_STORAGE_* environment variables." |
304 | | - ) |
305 | | - |
306 | | - # Extract primary key values from row |
307 | | - primary_key = {k: row[k] for k in self.primary_key if k in row} |
308 | | - if not primary_key: |
309 | | - raise DataJointError("Primary key values must be provided before object attributes for insert.") |
310 | | - |
311 | | - # Determine input type and extract extension |
312 | | - is_dir = False |
313 | | - ext = None |
314 | | - size = 0 |
315 | | - source_path = None |
316 | | - stream = None |
317 | | - |
318 | | - if isinstance(value, tuple) and len(value) == 2: |
319 | | - # Tuple of (ext, stream) |
320 | | - ext, stream = value |
321 | | - if hasattr(stream, "read"): |
322 | | - # Read stream to buffer for upload |
323 | | - content = stream.read() |
324 | | - size = len(content) |
325 | | - else: |
326 | | - raise DataJointError(f"Invalid stream object for attribute {name}") |
327 | | - elif isinstance(value, (str, Path)): |
328 | | - source_path = Path(value) |
329 | | - if not source_path.exists(): |
330 | | - raise DataJointError(f"File or folder not found: {source_path}") |
331 | | - is_dir = source_path.is_dir() |
332 | | - if not is_dir: |
333 | | - ext = source_path.suffix or None |
334 | | - size = source_path.stat().st_size |
335 | | - else: |
336 | | - raise DataJointError( |
337 | | - f"Invalid value type for object attribute {name}. Expected file path, folder path, or (ext, stream) tuple." |
338 | | - ) |
339 | | - |
340 | | - # Get storage spec for path building |
341 | | - spec = config.get_object_store_spec(store_name) |
342 | | - partition_pattern = spec.get("partition_pattern") |
343 | | - token_length = spec.get("token_length", 8) |
344 | | - location = spec.get("location", "") |
345 | | - |
346 | | - # Build storage path |
347 | | - relative_path, token = build_object_path( |
348 | | - schema=self.database, |
349 | | - table=self.class_name, |
350 | | - field=name, |
351 | | - primary_key=primary_key, |
352 | | - ext=ext, |
353 | | - partition_pattern=partition_pattern, |
354 | | - token_length=token_length, |
355 | | - ) |
356 | | - |
357 | | - # Prepend location if specified |
358 | | - full_storage_path = f"{location}/{relative_path}" if location else relative_path |
359 | | - |
360 | | - # Upload content |
361 | | - manifest = None |
362 | | - if source_path: |
363 | | - if is_dir: |
364 | | - manifest = backend.put_folder(source_path, full_storage_path) |
365 | | - size = manifest["total_size"] |
366 | | - else: |
367 | | - backend.put_file(source_path, full_storage_path) |
368 | | - elif stream: |
369 | | - backend.put_buffer(content, full_storage_path) |
370 | | - |
371 | | - # Build full URL for the object |
372 | | - url = self._build_object_url(spec, full_storage_path) |
373 | | - |
374 | | - # Build JSON metadata |
375 | | - timestamp = datetime.now(timezone.utc).isoformat() |
376 | | - metadata = { |
377 | | - "path": full_storage_path, |
378 | | - "size": size, |
379 | | - "hash": None, # Hash is optional, not computed by default |
380 | | - "ext": ext, |
381 | | - "is_dir": is_dir, |
382 | | - "timestamp": timestamp, |
383 | | - } |
384 | | - |
385 | | - # Add URL and store name |
386 | | - if url: |
387 | | - metadata["url"] = url |
388 | | - if store_name: |
389 | | - metadata["store"] = store_name |
390 | | - |
391 | | - # Add mime_type for files |
392 | | - if not is_dir and ext: |
393 | | - mime_type, _ = mimetypes.guess_type(f"file{ext}") |
394 | | - if mime_type: |
395 | | - metadata["mime_type"] = mime_type |
396 | | - |
397 | | - # Add item_count for folders |
398 | | - if is_dir and manifest: |
399 | | - metadata["item_count"] = manifest["item_count"] |
400 | | - |
401 | | - return json.dumps(metadata) |
402 | | - |
403 | | - def _build_object_url(self, spec: dict, path: str) -> str | None: |
404 | | - """ |
405 | | - Build a full URL for an object based on the storage spec. |
406 | | -
|
407 | | - Args: |
408 | | - spec: Storage configuration dict |
409 | | - path: Path within the storage |
410 | | -
|
411 | | - Returns: |
412 | | - Full URL string or None for local storage |
413 | | - """ |
414 | | - protocol = spec.get("protocol", "") |
415 | | - if protocol == "s3": |
416 | | - bucket = spec.get("bucket", "") |
417 | | - return f"s3://{bucket}/{path}" |
418 | | - elif protocol == "gcs": |
419 | | - bucket = spec.get("bucket", "") |
420 | | - return f"gs://{bucket}/{path}" |
421 | | - elif protocol == "azure": |
422 | | - container = spec.get("container", "") |
423 | | - return f"az://{container}/{path}" |
424 | | - elif protocol == "file": |
425 | | - # For local storage, return file:// URL |
426 | | - location = spec.get("location", "") |
427 | | - full_path = f"{location}/{path}" if location else path |
428 | | - return f"file://{full_path}" |
429 | | - return None |
430 | | - |
431 | 255 | def update1(self, row): |
432 | 256 | """ |
433 | 257 | ``update1`` updates one existing entry in the table. |
|
0 commit comments