Skip to content

Commit c7dbb63

Browse files
authored
Merge pull request #5 from OpenDataServices/2023-07-19
get_file_type_for_flatten_tool: consider content type too
2 parents 82f96f8 + 4fd1b0e commit c7dbb63

File tree

3 files changed

+54
-7
lines changed

3 files changed

+54
-7
lines changed

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
77

88
## [Unreleased]
99

10+
When upgrading to this version, `ALLOWED_UNKNOWN_CONTENT_TYPES` must be set in the Django settings file, ideally from the settings file included with this library.
11+
12+
## Added
13+
14+
- utils.py: get_file_type_for_flatten_tool: consider content type too
15+
- settings.ALLOWED_UNKNOWN_CONTENT_TYPES.
16+
17+
## Fixed
18+
19+
- utils.py: get_file_type_for_flatten_tool: include an error message in raise at end https://github.com/OpenDataServices/lib-cove-web-2/issues/3
20+
1021
## [0.2.0] - 2023-07-11
1122

1223
## Added

libcoveweb2/settings.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -173,21 +173,29 @@
173173
},
174174
}
175175

176-
ALLOWED_JSON_CONTENT_TYPES = ["application/json", "application/octet-stream"]
176+
# Sometimes uploads happen with a generic content type.
177+
# In this case, we can't rely on content type to detect type.
178+
# But the type is still allowed, so it's added to
179+
# ALLOWED_*_CONTENT_TYPES when they are defined.
180+
ALLOWED_UNKNOWN_CONTENT_TYPES = ["application/octet-stream"]
181+
182+
# JSON details
183+
ALLOWED_JSON_CONTENT_TYPES = ["application/json"] + ALLOWED_UNKNOWN_CONTENT_TYPES
177184
ALLOWED_JSON_EXTENSIONS = [".json"]
178185

186+
# Excel details
179187
ALLOWED_SPREADSHEET_EXCEL_CONTENT_TYPES = [
180188
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
181-
"application/octet-stream",
182-
]
189+
] + ALLOWED_UNKNOWN_CONTENT_TYPES
183190
ALLOWED_SPREADSHEET_EXCEL_EXTENSIONS = [".xlsx"]
184191

192+
# Open Document details
185193
ALLOWED_SPREADSHEET_OPENDOCUMENT_CONTENT_TYPES = [
186194
"application/vnd.oasis.opendocument.spreadsheet",
187-
"application/octet-stream",
188-
]
195+
] + ALLOWED_UNKNOWN_CONTENT_TYPES
189196
ALLOWED_SPREADSHEET_OPENDOCUMENT_EXTENSIONS = [".ods"]
190197

198+
# Spreadsheet details (sum of details above)
191199
ALLOWED_SPREADSHEET_CONTENT_TYPES = (
192200
ALLOWED_SPREADSHEET_EXCEL_CONTENT_TYPES
193201
+ ALLOWED_SPREADSHEET_OPENDOCUMENT_CONTENT_TYPES
@@ -196,5 +204,6 @@
196204
ALLOWED_SPREADSHEET_EXCEL_EXTENSIONS + ALLOWED_SPREADSHEET_OPENDOCUMENT_EXTENSIONS
197205
)
198206

199-
ALLOWED_CSV_CONTENT_TYPES = ["text/csv", "application/octet-stream"]
207+
# CSV Details
208+
ALLOWED_CSV_CONTENT_TYPES = ["text/csv"] + ALLOWED_UNKNOWN_CONTENT_TYPES
200209
ALLOWED_CSV_EXTENSIONS = [".csv"]

libcoveweb2/utils.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,26 @@ def get_file_type_for_flatten_tool(supplied_data_file: SuppliedDataFile):
2929
for extension in settings.ALLOWED_CSV_EXTENSIONS:
3030
if supplied_data_file.filename.lower().endswith(extension):
3131
return "csv"
32+
# Check the content type
33+
if (
34+
supplied_data_file.content_type
35+
and supplied_data_file.content_type
36+
not in settings.ALLOWED_UNKNOWN_CONTENT_TYPES
37+
):
38+
if supplied_data_file.content_type in settings.ALLOWED_JSON_CONTENT_TYPES:
39+
return "json"
40+
if (
41+
supplied_data_file.content_type
42+
in settings.ALLOWED_SPREADSHEET_EXCEL_CONTENT_TYPES
43+
):
44+
return "xlsx"
45+
if (
46+
supplied_data_file.content_type
47+
in settings.ALLOWED_SPREADSHEET_OPENDOCUMENT_CONTENT_TYPES
48+
):
49+
return "ods"
50+
if supplied_data_file.content_type in settings.ALLOWED_CSV_CONTENT_TYPES:
51+
return "csv"
3252
# Try and load the first bit of the file to see if it's JSON?
3353
try:
3454
with open(supplied_data_file.upload_dir_and_filename(), "rb") as fp:
@@ -38,4 +58,11 @@ def get_file_type_for_flatten_tool(supplied_data_file: SuppliedDataFile):
3858
except FileNotFoundError:
3959
pass
4060
# All right, we give up.
41-
raise
61+
raise Exception(
62+
"Could not get file type for file "
63+
+ str(supplied_data_file.id)
64+
+ " with file name "
65+
+ str(supplied_data_file.filename)
66+
+ " and content type "
67+
+ str(supplied_data_file.content_type)
68+
)

0 commit comments

Comments
 (0)