2424# Characters safe for use in filenames and URLs
2525TOKEN_ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
2626
27- # Supported remote URL protocols for copy insert
28- REMOTE_PROTOCOLS = ("s3://" , "gs://" , "gcs://" , "az://" , "abfs://" , "http://" , "https://" )
27+ # Supported URL protocols
28+ URL_PROTOCOLS = ("file://" , "s3://" , "gs://" , "gcs://" , "az://" , "abfs://" , "http://" , "https://" )
2929
3030
31- def is_remote_url (path : str ) -> bool :
31+ def is_url (path : str ) -> bool :
3232 """
33- Check if a path is a remote URL.
33+ Check if a path is a URL.
3434
3535 Parameters
3636 ----------
@@ -40,21 +40,57 @@ def is_remote_url(path: str) -> bool:
4040 Returns
4141 -------
4242 bool
43- True if path starts with a supported remote protocol.
43+ True if path starts with a supported URL protocol.
4444 """
45- if not isinstance (path , str ):
46- return False
47- return path .lower ().startswith (REMOTE_PROTOCOLS )
45+ return path .lower ().startswith (URL_PROTOCOLS )
4846
4947
50- def parse_remote_url ( url : str ) -> tuple [ str , str ] :
48+ def normalize_to_url ( path : str ) -> str :
5149 """
52- Parse a remote URL into protocol and path.
50+ Normalize a path to URL form.
51+
52+ Converts local filesystem paths to file:// URLs. URLs are returned unchanged.
53+
54+ Parameters
55+ ----------
56+ path : str
57+ Path string (local path or URL).
58+
59+ Returns
60+ -------
61+ str
62+ URL form of the path.
63+
64+ Examples
65+ --------
66+ >>> normalize_to_url("/data/file.dat")
67+ 'file:///data/file.dat'
68+ >>> normalize_to_url("s3://bucket/key")
69+ 's3://bucket/key'
70+ >>> normalize_to_url("file:///already/url")
71+ 'file:///already/url'
72+ """
73+ if is_url (path ):
74+ return path
75+ # Convert local path to file:// URL
76+ # Ensure absolute path and proper format
77+ abs_path = str (Path (path ).resolve ())
78+ # Handle Windows paths (C:\...) vs Unix paths (/...)
79+ if abs_path .startswith ("/" ):
80+ return f"file://{ abs_path } "
81+ else :
82+ # Windows: file:///C:/path
83+ return f"file:///{ abs_path .replace (chr (92 ), '/' )} "
84+
85+
86+ def parse_url (url : str ) -> tuple [str , str ]:
87+ """
88+ Parse a URL into protocol and path.
5389
5490 Parameters
5591 ----------
5692 url : str
57- Remote URL (e.g., ``'s3://bucket/path/file.dat'``).
93+ URL (e.g., ``'s3://bucket/path/file.dat'`` or ``'file:///path/to/file '``).
5894
5995 Returns
6096 -------
@@ -65,11 +101,19 @@ def parse_remote_url(url: str) -> tuple[str, str]:
65101 ------
66102 DataJointError
67103 If URL protocol is not supported.
104+
105+ Examples
106+ --------
107+ >>> parse_url("s3://bucket/key/file.dat")
108+ ('s3', 'bucket/key/file.dat')
109+ >>> parse_url("file:///data/file.dat")
110+ ('file', '/data/file.dat')
68111 """
69112 url_lower = url .lower ()
70113
71114 # Map URL schemes to fsspec protocols
72115 protocol_map = {
116+ "file://" : "file" ,
73117 "s3://" : "s3" ,
74118 "gs://" : "gcs" ,
75119 "gcs://" : "gcs" ,
@@ -84,7 +128,7 @@ def parse_remote_url(url: str) -> tuple[str, str]:
84128 path = url [len (prefix ) :]
85129 return protocol , path
86130
87- raise errors .DataJointError (f"Unsupported remote URL protocol: { url } " )
131+ raise errors .DataJointError (f"Unsupported URL protocol: { url } " )
88132
89133
90134def generate_token (length : int = 8 ) -> str :
@@ -358,6 +402,53 @@ def _full_path(self, path: str | PurePosixPath) -> str:
358402 return str (Path (location ) / path )
359403 return path
360404
405+ def get_url (self , path : str | PurePosixPath ) -> str :
406+ """
407+ Get the full URL for a path in storage.
408+
409+ Returns a consistent URL representation for any storage backend,
410+ including file:// URLs for local filesystem.
411+
412+ Parameters
413+ ----------
414+ path : str or PurePosixPath
415+ Relative path within the storage location.
416+
417+ Returns
418+ -------
419+ str
420+ Full URL (e.g., 's3://bucket/path' or 'file:///data/path').
421+
422+ Examples
423+ --------
424+ >>> backend = StorageBackend({"protocol": "file", "location": "/data"})
425+ >>> backend.get_url("schema/table/file.dat")
426+ 'file:///data/schema/table/file.dat'
427+
428+ >>> backend = StorageBackend({"protocol": "s3", "bucket": "mybucket", ...})
429+ >>> backend.get_url("schema/table/file.dat")
430+ 's3://mybucket/schema/table/file.dat'
431+ """
432+ full_path = self ._full_path (path )
433+
434+ if self .protocol == "file" :
435+ # Ensure absolute path for file:// URL
436+ abs_path = str (Path (full_path ).resolve ())
437+ if abs_path .startswith ("/" ):
438+ return f"file://{ abs_path } "
439+ else :
440+ # Windows path
441+ return f"file:///{ abs_path .replace (chr (92 ), '/' )} "
442+ elif self .protocol == "s3" :
443+ return f"s3://{ full_path } "
444+ elif self .protocol == "gcs" :
445+ return f"gs://{ full_path } "
446+ elif self .protocol == "azure" :
447+ return f"az://{ full_path } "
448+ else :
449+ # Fallback: use protocol prefix
450+ return f"{ self .protocol } ://{ full_path } "
451+
361452 def put_file (self , local_path : str | Path , remote_path : str | PurePosixPath , metadata : dict | None = None ) -> None :
362453 """
363454 Upload a file from local filesystem to storage.
@@ -674,7 +765,7 @@ def copy_from_url(self, source_url: str, dest_path: str | PurePosixPath) -> int:
674765 int
675766 Size of copied file in bytes.
676767 """
677- protocol , source_path = parse_remote_url (source_url )
768+ protocol , source_path = parse_url (source_url )
678769 full_dest = self ._full_path (dest_path )
679770
680771 logger .debug (f"copy_from_url: { protocol } ://{ source_path } -> { self .protocol } :{ full_dest } " )
@@ -774,8 +865,8 @@ def source_is_directory(self, source: str) -> bool:
774865 bool
775866 True if source is a directory.
776867 """
777- if is_remote_url (source ):
778- protocol , path = parse_remote_url (source )
868+ if is_url (source ):
869+ protocol , path = parse_url (source )
779870 source_fs = fsspec .filesystem (protocol )
780871 return source_fs .isdir (path )
781872 else :
@@ -795,8 +886,8 @@ def source_exists(self, source: str) -> bool:
795886 bool
796887 True if source exists.
797888 """
798- if is_remote_url (source ):
799- protocol , path = parse_remote_url (source )
889+ if is_url (source ):
890+ protocol , path = parse_url (source )
800891 source_fs = fsspec .filesystem (protocol )
801892 return source_fs .exists (path )
802893 else :
@@ -817,8 +908,8 @@ def get_source_size(self, source: str) -> int | None:
817908 Size in bytes, or None if directory or cannot determine.
818909 """
819910 try :
820- if is_remote_url (source ):
821- protocol , path = parse_remote_url (source )
911+ if is_url (source ):
912+ protocol , path = parse_url (source )
822913 source_fs = fsspec .filesystem (protocol )
823914 if source_fs .isdir (path ):
824915 return None
0 commit comments