Skip to content

Commit 06b448d

Browse files
authored
better gdrive support for file udfs (#1973)
1 parent 029bcba commit 06b448d

File tree

7 files changed

+176
-14
lines changed

7 files changed

+176
-14
lines changed

files/DOCX_File/DOCX_File.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import html
2+
import io
3+
import urllib.error
4+
import urllib.request
5+
6+
7+
def _load_docx_bytes(path: str) -> bytes:
8+
if path.startswith("/mount/") or path.startswith("gdrive://"):
9+
import fsspec
10+
11+
with fsspec.open(path, "rb") as f:
12+
return f.read()
13+
signed_url = fused.api.sign_url(path)
14+
req = urllib.request.Request(signed_url, method="GET")
15+
with urllib.request.urlopen(req, timeout=120) as resp:
16+
return resp.read()
17+
18+
19+
def _docx_bytes_to_plain_text(data: bytes) -> str:
20+
from docx import Document
21+
22+
doc = Document(io.BytesIO(data))
23+
lines: list[str] = []
24+
for para in doc.paragraphs:
25+
lines.append(para.text)
26+
for table in doc.tables:
27+
for row in table.rows:
28+
lines.append("\t".join(cell.text for cell in row.cells))
29+
return "\n".join(lines)
30+
31+
32+
@fused.udf(cache_max_age="30m")
33+
def udf(path: str):
34+
try:
35+
raw = _load_docx_bytes(path)
36+
text = _docx_bytes_to_plain_text(raw)
37+
except urllib.error.URLError as e:
38+
safe = html.escape(str(e), quote=True)
39+
return f"""
40+
<!DOCTYPE html>
41+
<html>
42+
<head><title>DOCX Viewer</title></head>
43+
<body style="margin:0; padding:24px; font-family: system-ui, sans-serif; background:#1a1a1a; color:#ccc;">
44+
<h2 style="color:#ff6b6b;">Error loading DOCX</h2>
45+
<p>{safe}</p>
46+
</body>
47+
</html>
48+
"""
49+
except Exception as e:
50+
safe = html.escape(str(e), quote=True)
51+
return f"""
52+
<!DOCTYPE html>
53+
<html>
54+
<head><title>DOCX Viewer</title></head>
55+
<body style="margin:0; padding:24px; font-family: system-ui, sans-serif; background:#1a1a1a; color:#ccc;">
56+
<h2 style="color:#ff6b6b;">Error parsing DOCX</h2>
57+
<p>{safe}</p>
58+
</body>
59+
</html>
60+
"""
61+
62+
body = html.escape(text)
63+
return f"""
64+
<!DOCTYPE html>
65+
<html>
66+
<head>
67+
<title>DOCX Viewer</title>
68+
<style>
69+
* {{ margin: 0; padding: 0; box-sizing: border-box; }}
70+
body {{
71+
font-family: 'JetBrains Mono', 'Fira Code', 'Monaco', 'Consolas', monospace;
72+
background: #1a1a1a;
73+
color: #cccccc;
74+
height: 100vh;
75+
overflow: hidden;
76+
}}
77+
.text-content {{
78+
background: #1a1a1a;
79+
color: #cccccc;
80+
padding: 20px 20px 0 20px;
81+
height: 100vh;
82+
width: 100vw;
83+
overflow-y: auto;
84+
font-size: 15px;
85+
white-space: pre-wrap;
86+
word-wrap: break-word;
87+
line-height: 1.6;
88+
}}
89+
.text-content::-webkit-scrollbar {{ width: 8px; }}
90+
.text-content::-webkit-scrollbar-track {{ background: #1a1a1a; }}
91+
.text-content::-webkit-scrollbar-thumb {{ background: #D1E550; border-radius: 4px; }}
92+
.text-content::-webkit-scrollbar-thumb:hover {{ background: #E8FF59; }}
93+
</style>
94+
</head>
95+
<body>
96+
<div class="text-content">{body}</div>
97+
</body>
98+
</html>
99+
"""

files/DOCX_File/README.MD

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<!--fused:pin=99-->
2+
<!--fused:preview-->
3+
<p align="center"><img src="https://fused-magic.s3.us-west-2.amazonaws.com/thumbnails/udf_cards/python_txt.png" width="600" alt="UDF preview image"></p>
4+
5+
<!--fused:filePreview-->
6+
Extensions: `docx`
7+
8+
<!--fused:readme-->
9+
Preview a Word document (.docx) as plain text extracted from paragraphs and tables.

files/DOCX_File/meta.json

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
{
2+
"version": "0.0.3",
3+
"job_config": {
4+
"version": "0.0.3",
5+
"name": null,
6+
"steps": [
7+
{
8+
"type": "udf",
9+
"udf": {
10+
"type": "geopandas_v2",
11+
"name": "DOCX_File",
12+
"entrypoint": "udf",
13+
"parameters": {},
14+
"metadata": {
15+
"fused:description": "Preview a Word document (.docx) as plain text extracted from paragraphs and tables.\n\n<!-- fused:previewExt -->\nExtensions: `docx`",
16+
"fused:udfType": "auto",
17+
"fused:slug": "DOCX_File",
18+
"fused:name": "DOCX_File",
19+
"fused:id": null
20+
},
21+
"source": "DOCX_File.py",
22+
"headers": []
23+
}
24+
}
25+
],
26+
"metadata": null
27+
}
28+
}

files/ImageIO_File/ImageIO_File.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
11
@fused.udf
22
def udf(path: str, preview: bool):
33
import imageio.v3 as iio
4-
import s3fs
4+
import fsspec
55

6-
with s3fs.S3FileSystem().open(path) as f:
6+
with fsspec.open(path, "rb") as f:
77
im = iio.imread(f)
8-
transposed_image = im.transpose(2, 0, 1)
9-
print(transposed_image)
8+
transposed_image = im.transpose(2, 0, 1)
109

11-
if preview:
12-
w, h = im.shape[1], im.shape[0]
13-
if w > h:
14-
return transposed_image, (0, 0, 1, 1 / (w / h))
15-
else:
16-
return transposed_image, (0, 0, 1 / (h / w), 1)
10+
if preview:
11+
w, h = im.shape[1], im.shape[0]
12+
if w > h:
13+
return transposed_image, (0, 0, 1, 1 / (w / h))
14+
else:
15+
return transposed_image, (0, 0, 1 / (h / w), 1)
1716

18-
return transposed_image
17+
return transposed_image

files/Preview_Parquet/Preview_Parquet.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,19 @@
11
@fused.udf(cache_max_age='30m')
22
def udf(path: str= ''):
3-
import pandas as pd
3+
import html
44
import urllib.parse
5+
6+
if path and (path.startswith('/mount/') or path.startswith('gdrive://')):
7+
safe_path = html.escape(path, quote=True)
8+
return f"""
9+
<html style="background-color: #1a1a1a">
10+
<body style="margin:0; padding:24px; font-family: system-ui, sans-serif; color:#ccc;">
11+
<h2 style="color:#ff6b6b;">Parquet preview not available</h2>
12+
<p>Signed URLs are not supported for <code>/mount/</code> or <code>gdrive://</code> paths, so the parquet viewer cannot load this file.</p>
13+
<p><strong>Path:</strong> <code>{safe_path}</code></p>
14+
</body>
15+
</html>
16+
"""
517

618
base_url = fused.options.base_web_url
719
signed_url = fused.api.sign_url(path) if path else ''

files/Text_File/Text_File.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,11 @@
22
def udf(
33
path: str,
44
):
5-
if path.startswith('/mount/'):
6-
file_content = open(path, 'r').read()
5+
if path.startswith('/mount/') or path.startswith('gdrive://'):
6+
import fsspec
7+
8+
with fsspec.open(path, 'rb') as f:
9+
file_content = f.read().decode('utf-8')
710
html_content = f"""\n <!DOCTYPE html>\n <html>\n <head>\n <title>Text Viewer</title>\n <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>\n <style>\n * {{\n margin: 0;\n padding: 0;\n box-sizing: border-box;\n }}\n \n body {{\n font-family: 'JetBrains Mono', 'Fira Code', 'Monaco', 'Consolas', monospace;\n background: #1a1a1a;\n color: #cccccc;\n height: 100vh;\n overflow: hidden;\n }}\n \n .text-content {{\n background: #1a1a1a;\n color: #cccccc;\n padding: 20px 20px 0 20px;\n height: 100vh;\n width: 100vw;\n overflow-y: auto;\n font-size: 15px;\n white-space: pre-wrap;\n word-wrap: break-word;\n border: none;\n outline: none;\n resize: none;\n line-height: 1.6;\n }}\n \n .markdown-content {{\n background: #1a1a1a;\n color: #cccccc;\n padding: 20px 20px 0 20px;\n height: 100vh;\n width: 100vw;\n overflow-y: auto;\n font-size: 15px;\n line-height: 1.6;\n }}\n \n .markdown-content h1, .markdown-content h2, .markdown-content h3, \n .markdown-content h4, .markdown-content h5, .markdown-content h6 {{\n color: #D1E550;\n margin: 20px 0 10px 0;\n }}\n \n .markdown-content h1 {{ font-size: 2em; }}\n .markdown-content h2 {{ font-size: 1.5em; }}\n .markdown-content h3 {{ font-size: 1.3em; }}\n \n .markdown-content p {{\n margin: 10px 0;\n }}\n \n .markdown-content code {{\n background: #2a2a2a;\n padding: 2px 6px;\n border-radius: 3px;\n font-family: 'JetBrains Mono', monospace;\n }}\n \n .markdown-content pre {{\n background: #2a2a2a;\n padding: 15px;\n border-radius: 5px;\n overflow-x: auto;\n margin: 15px 0;\n }}\n \n .markdown-content pre code {{\n background: none;\n padding: 0;\n }}\n \n .markdown-content blockquote {{\n border-left: 4px solid #D1E550;\n padding-left: 15px;\n margin: 15px 0;\n color: #999;\n }}\n \n .markdown-content ul, .markdown-content ol {{\n margin: 10px 0;\n padding-left: 20px;\n }}\n \n .markdown-content li {{\n margin: 5px 0;\n }}\n \n .markdown-content table {{\n border-collapse: collapse;\n width: 100%;\n margin: 15px 0;\n }}\n \n .markdown-content th, .markdown-content td {{\n border: 1px solid #444;\n padding: 8px 12px;\n text-align: left;\n }}\n \n .markdown-content th {{\n background: #2a2a2a;\n color: #D1E550;\n }}\n \n .markdown-content a {{\n color: #D1E550;\n text-decoration: none;\n }}\n \n .markdown-content a:hover {{\n text-decoration: underline;\n }}\n \n .text-content::-webkit-scrollbar,\n .markdown-content::-webkit-scrollbar {{\n width: 8px;\n }}\n \n .text-content::-webkit-scrollbar-track,\n .markdown-content::-webkit-scrollbar-track {{\n background: #1a1a1a;\n }}\n \n .text-content::-webkit-scrollbar-thumb,\n .markdown-content::-webkit-scrollbar-thumb {{\n background: #D1E550;\n border-radius: 4px;\n }}\n \n .text-content::-webkit-scrollbar-thumb:hover,\n .markdown-content::-webkit-scrollbar-thumb:hover {{\n background: #E8FF59;\n }}\n \n .error {{\n color: #ff6b6b;\n padding: 20px;\n text-align: center;\n }}\n \n .spinner {{\n display: inline-block;\n width: 50px;\n height: 50px;\n border: 3px solid rgba(209, 229, 80, 0.3);\n border-radius: 50%;\n border-top-color: #D1E550;\n animation: spin 1s ease-in-out infinite;\n margin-right: 15px;\n }}\n \n @keyframes spin {{\n to {{ transform: rotate(360deg); }}\n }}\n \n .loading-container {{\n display: flex;\n flex-direction: column;\n justify-content: center;\n align-items: center;\n height: 100vh;\n gap: 20px;\n }}\n \n .loading-text {{\n font-size: 18px;\n color: #D1E550;\n font-weight: 500;\n }}\n \n .progress-container {{\n width: 300px;\n margin-top: 10px;\n }}\n \n .progress-bar {{\n width: 100%;\n height: 6px;\n background-color: rgba(209, 229, 80, 0.2);\n border-radius: 3px;\n overflow: hidden;\n margin-bottom: 8px;\n }}\n \n .progress-fill {{\n height: 100%;\n background-color: #D1E550;\n border-radius: 3px;\n width: 0%;\n transition: width 0.3s ease;\n }}\n \n .progress-text {{\n font-size: 14px;\n color: #cccccc;\n text-align: center;\n }}\n \n .truncation-message {{\n background: #2a2a2a;\n border: 2px solid #D1E550;\n border-radius: 8px;\n padding: 15px;\n margin: 20px 0;\n color: #D1E550;\n font-weight: bold;\n text-align: center;\n font-size: 16px;\n }}\n \n .truncation-end {{\n background: #2a2a2a;\n border: 1px solid #666;\n border-radius: 5px;\n padding: 0;\n margin: 0;\n color: #D1E550;\n font-style: italic;\n text-align: center;\n font-size: 12px;\n }}\n </style>\n </head>\n <body>\n <div id="content" class="text-content">{file_content.replace('<', '&lt;').replace('>', '&gt;')}</div>\n </body>\n </html>\n """
811
else:
912
signed_url = fused.api.sign_url(path)

files/Video_File/Video_File.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,22 @@
1+
import html
12
import mimetypes
23
import fused
34
from urllib.parse import urlsplit
45

56

67
@fused.udf(cache_max_age="30m")
78
def udf(path: str):
9+
if path.startswith("/mount/") or path.startswith("gdrive://"):
10+
safe_path = html.escape(path, quote=True)
11+
return f"""
12+
<html>
13+
<body style="margin:0; padding:24px; font-family: system-ui, sans-serif; background:#1a1a1a; color:#ccc;">
14+
<h2 style="color:#ff6b6b;">Video preview not available</h2>
15+
<p>Signed URLs are not supported for <code>/mount/</code> or <code>gdrive://</code> paths, so this viewer cannot load the video from the browser.</p>
16+
<p><strong>Path:</strong> <code>{safe_path}</code></p>
17+
</body>
18+
</html>
19+
"""
820
signed_url = fused.api.sign_url(path)
921
source_type = mimetypes.guess_type(urlsplit(signed_url).path)[0] or "video/mp4"
1022
return f"""

0 commit comments

Comments
 (0)