Skip to content

Commit ecfcac6

Browse files
moved media files from Piccolo Admin to Piccolo API (#168)
* moved media files from Piccolo Admin to Piccolo API * add additional requirements to setup.py * fix lgtm alert * another try to fix lgtm alert * lgtm fixes * more LGTM fixes Co-authored-by: Daniel Townsend <[email protected]>
1 parent 33ca725 commit ecfcac6

File tree

13 files changed

+1428
-3
lines changed

13 files changed

+1428
-3
lines changed

piccolo_api/crud/endpoints.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,5 +1093,11 @@ async def delete_single(
10931093
except ValueError:
10941094
return Response("Unable to delete the resource.", status_code=500)
10951095

1096+
def __eq__(self, other: t.Any) -> bool:
1097+
"""
1098+
To keep LGTM happy.
1099+
"""
1100+
return super().__eq__(other)
1101+
10961102

10971103
__all__ = ["PiccoloCRUD"]

piccolo_api/media/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""
2+
Resources:
3+
https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html
4+
"""

piccolo_api/media/base.py

Lines changed: 316 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,316 @@
1+
from __future__ import annotations
2+
3+
import abc
4+
import asyncio
5+
import logging
6+
import pathlib
7+
import string
8+
import typing as t
9+
import uuid
10+
11+
from piccolo.apps.user.tables import BaseUser
12+
from piccolo.columns.column_types import Array, Text, Varchar
13+
14+
logger = logging.getLogger(__file__)
15+
16+
17+
#: Pass into ``allowed_characters`` to just allow audio files.
18+
AUDIO_EXTENSIONS = (
19+
"mp3",
20+
"wav",
21+
)
22+
23+
#: Pass into ``allowed_characters`` to just allow data files.
24+
DATA_EXTENSIONS = (
25+
"csv",
26+
"tsv",
27+
)
28+
29+
#: Pass into ``allowed_characters`` to just allow document files.
30+
DOCUMENT_EXTENSIONS = ("pdf",)
31+
32+
#: Pass into ``allowed_characters`` to just allow image files.
33+
IMAGE_EXTENSIONS = (
34+
"gif",
35+
"jpeg",
36+
"jpg",
37+
"png",
38+
"svg",
39+
"tif",
40+
"webp",
41+
)
42+
43+
#: Pass into ``allowed_characters`` to just allow text files.
44+
TEXT_EXTENSIONS = (
45+
"rtf",
46+
"txt",
47+
)
48+
49+
#: Pass into ``allowed_characters`` to just allow video files.
50+
VIDEO_EXTENSIONS = ("mov", "mp4", "webm")
51+
52+
#: These are the extensions which are allowed by default.
53+
ALLOWED_EXTENSIONS = (
54+
*AUDIO_EXTENSIONS,
55+
*DATA_EXTENSIONS,
56+
*DOCUMENT_EXTENSIONS,
57+
*IMAGE_EXTENSIONS,
58+
*TEXT_EXTENSIONS,
59+
*VIDEO_EXTENSIONS,
60+
)
61+
62+
#: These are the characters allowed in the file name by default.
63+
ALLOWED_CHARACTERS = (
64+
*string.ascii_letters,
65+
*string.digits,
66+
" ",
67+
"-",
68+
"_",
69+
".",
70+
)
71+
72+
73+
ALLOWED_COLUMN_TYPES = (Varchar, Text)
74+
75+
76+
# TODO - might move this to Piccolo API.
77+
class MediaStorage(metaclass=abc.ABCMeta):
78+
"""
79+
If you want to implement your own custom storage backend, create a subclass
80+
of this class. Override each abstract method.
81+
82+
Typically, just use :class:`LocalMediaStorage <piccolo_admin.media.local.LocalMediaStorage>`
83+
or :class:`S3MediaStorage <piccolo_admin.media.s3.S3MediaStorage>` instead.
84+
85+
""" # noqa: E501
86+
87+
def __init__(
88+
self,
89+
column: t.Union[Text, Varchar, Array],
90+
allowed_extensions: t.Optional[t.Sequence[str]] = ALLOWED_EXTENSIONS,
91+
allowed_characters: t.Optional[t.Sequence[str]] = ALLOWED_CHARACTERS,
92+
):
93+
if not (
94+
isinstance(column, ALLOWED_COLUMN_TYPES)
95+
or (
96+
isinstance(column, Array)
97+
and isinstance(column.base_column, ALLOWED_COLUMN_TYPES)
98+
)
99+
):
100+
raise ValueError(
101+
"The column must be a `Varchar`, `Text`, or `Array`."
102+
)
103+
104+
self.column = column
105+
self.allowed_extensions = (
106+
[i.lower() for i in allowed_extensions]
107+
if allowed_extensions
108+
else None
109+
)
110+
self.allowed_characters = allowed_characters
111+
112+
def validate_file_name(self, file_name: str):
113+
"""
114+
:raises ValueError:
115+
If the file name is invalid.
116+
117+
"""
118+
if not file_name:
119+
# It's unlikely that the file_name is an empty string, but just in
120+
# case.
121+
raise ValueError("The file name can't be empty.")
122+
123+
# Don't allow the file name to begin with a dot, otherwise it will be a
124+
# hidden file on Unix.
125+
if file_name.startswith("."):
126+
raise ValueError("File names must not start with a period.")
127+
128+
# Don't allow double dots in the file name, in case it allows a file to
129+
# be written to a parent folder.
130+
if ".." in file_name:
131+
raise ValueError("File names must not contain '..'.")
132+
133+
if self.allowed_characters:
134+
# Having some restriction on the allowed characters is important,
135+
# in case there are things like null bytes in there.
136+
# https://cheatsheetseries.owasp.org/cheatsheets/File_Upload_Cheat_Sheet.html#extension-validation
137+
for character in file_name:
138+
if character not in self.allowed_characters:
139+
raise ValueError(
140+
f"'{character}' is not allowed in the filename."
141+
)
142+
143+
components = file_name.rsplit(".", 1)
144+
145+
if len(components) == 2:
146+
_, extension = components
147+
148+
if self.allowed_extensions:
149+
if extension.lower() not in self.allowed_extensions:
150+
# TODO - inspect the files to make sure they are what
151+
# they say they are.
152+
raise ValueError("This file type isn't allowed.")
153+
else:
154+
raise ValueError("The file has no extension.")
155+
156+
def generate_file_key(
157+
self, file_name: str, user: t.Optional[BaseUser] = None
158+
) -> str:
159+
"""
160+
Generates a unique file ID. If you have your own strategy for naming
161+
files, you can override this method.
162+
163+
By default we add a UUID to the filename, to make it unique::
164+
165+
>>> self.generate_file_key(file_name='my-poster.jpg')
166+
my-poster-3beac950-7721-46c9-9e7d-5e908ef51011.jpg
167+
168+
:raises ValueError:
169+
If the ``file_name`` is invalid.
170+
171+
"""
172+
# If the file_name includes the entire path (e.g. /foo/bar.jpg) - we
173+
# just want bar.jpg.
174+
file_name = pathlib.Path(file_name).name
175+
176+
self.validate_file_name(file_name=file_name)
177+
178+
name, extension = file_name.rsplit(".", 1)
179+
180+
if len(file_name) > 50:
181+
# Truncate really long names. Otherwise they might be too long
182+
# for some operating systems, or too long to be stored in a
183+
# database field.
184+
name = name[:50]
185+
186+
uuid_ = uuid.uuid4()
187+
188+
file_key = f"{name}-{uuid_}"
189+
if extension:
190+
file_key += f".{extension}"
191+
192+
return file_key
193+
194+
###########################################################################
195+
196+
@abc.abstractmethod
197+
async def store_file(
198+
self, file_name: str, file: t.IO, user: t.Optional[BaseUser] = None
199+
) -> str:
200+
"""
201+
Stores the file in whichever storage you're using, and returns a key
202+
which uniquely identifes the file.
203+
204+
:param file:
205+
The file to store.
206+
:param user:
207+
The Piccolo ``BaseUser`` who requested this.
208+
209+
"""
210+
raise NotImplementedError # pragma: no cover
211+
212+
@abc.abstractmethod
213+
async def generate_file_url(
214+
self, file_key: str, root_url: str, user: t.Optional[BaseUser] = None
215+
):
216+
"""
217+
This retrieves an absolute URL for the file. It might be a signed URL,
218+
if using S3 for storage.
219+
220+
:param file_key:
221+
Get the URL for a file with this file_key.
222+
:param root_url:
223+
The URL the media is usually served from. The sub class might
224+
ignore this argument entirely, if it's fetching the data from
225+
an external source like S3.
226+
:param user:
227+
The Piccolo ``BaseUser`` who requested this.
228+
"""
229+
raise NotImplementedError # pragma: no cover
230+
231+
@abc.abstractmethod
232+
async def get_file(self, file_key: str) -> t.Optional[t.IO]:
233+
"""
234+
Returns the file object matching the ``file_key``.
235+
"""
236+
raise NotImplementedError # pragma: no cover
237+
238+
@abc.abstractmethod
239+
async def delete_file(self, file_key: str):
240+
"""
241+
Deletes the file object matching the ``file_key``.
242+
"""
243+
raise NotImplementedError # pragma: no cover
244+
245+
@abc.abstractmethod
246+
async def bulk_delete_files(self, file_keys: t.List[str]):
247+
raise NotImplementedError # pragma: no cover
248+
249+
@abc.abstractmethod
250+
async def get_file_keys(self) -> t.List[str]:
251+
"""
252+
Returns the file key for each file we have stored.
253+
"""
254+
raise NotImplementedError # pragma: no cover
255+
256+
###########################################################################
257+
258+
async def get_file_keys_from_db(self) -> t.List[str]:
259+
"""
260+
Returns the file key for each file we have in the database.
261+
"""
262+
table = self.column._meta.table
263+
return await table.select(self.column).output(as_list=True)
264+
265+
async def get_unused_file_keys(self) -> t.List[str]:
266+
"""
267+
Compares the file keys we have stored, vs what's in the database.
268+
"""
269+
db_keys, disk_keys = await asyncio.gather(
270+
self.get_file_keys_from_db(), self.get_file_keys()
271+
)
272+
return list(set(disk_keys) - set(db_keys))
273+
274+
async def delete_unused_files(
275+
self, number_shown: int = 10, auto: bool = False
276+
):
277+
"""
278+
Over time, you will end up with files stored which are no longer
279+
needed. For example, if a row is deleted in the database, which
280+
referenced a stored file.
281+
282+
By periodically running this method, it will clean up these unused
283+
files.
284+
285+
It's important that each column uses its own folder for storing files.
286+
If multiple columns store data in the same folder, then we could
287+
delete some files which are needed by another column.
288+
289+
:param number_shown:
290+
This number of unused file names are printed out, so you can be
291+
sure nothing strange is going on.
292+
:param auto:
293+
If ``True``, no confirmation is required before deletion takes
294+
place.
295+
296+
"""
297+
unused_file_keys = await self.get_unused_file_keys()
298+
299+
number_unused = len(unused_file_keys)
300+
301+
print(f"There are {number_unused} unused files.")
302+
303+
if number_unused:
304+
print("Here are some examples:")
305+
print("\n".join(i for i in unused_file_keys[:number_shown]))
306+
307+
if auto or (
308+
input("Would you like to delete them? Enter y to confirm")
309+
== "y"
310+
):
311+
await self.bulk_delete_files(unused_file_keys)
312+
313+
def __eq__(self, value):
314+
if not isinstance(value, MediaStorage):
315+
return False
316+
return value.__hash__() == self.__hash__()

0 commit comments

Comments
 (0)