Skip to content

Commit 05f6146

Browse files
committed
Move guess_file_type into hermes.utils
1 parent e49304c commit 05f6146

File tree

2 files changed

+38
-36
lines changed

2 files changed

+38
-36
lines changed

src/hermes/commands/harvest/file_exists.py

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from collections import defaultdict
88
from dataclasses import dataclass
99
from functools import cache
10-
from mimetypes import guess_type
1110
from pathlib import Path
1211
from typing import Dict, Iterable, List, Optional, Set
1312
from typing_extensions import Self
@@ -16,41 +15,7 @@
1615
from pydantic import BaseModel
1716

1817
from hermes.commands.harvest.base import HermesHarvestCommand, HermesHarvestPlugin
19-
20-
21-
def guess_file_type(path: Path):
22-
"""File type detection for non-standardised formats.
23-
24-
Custom detection for file types not yet supported by Python's ``guess_type``
25-
function.
26-
"""
27-
# YAML was only added to ``guess_type`` in Python 3.14 due to the MIME type only
28-
# having been decided in 2024.
29-
# See: https://www.rfc-editor.org/rfc/rfc9512.html
30-
if path.suffix in [".yml", ".yaml"]:
31-
return ("application/yaml", None)
32-
33-
# TOML is not yet part of ``guess_type`` due to the MIME type only having been
34-
# accepted in October of 2024.
35-
# See: https://www.iana.org/assignments/media-types/application/toml
36-
if path.suffix == ".toml":
37-
return ("application/toml", None)
38-
39-
# cff is yaml.
40-
# See: https://github.com/citation-file-format/citation-file-format/issues/391
41-
if path.name == "CITATION.cff":
42-
return ("application/yaml", None)
43-
44-
# .license files are likely license annotations according to REUSE specification.
45-
# See: https://reuse.software/spec/
46-
if path.suffix == ".license":
47-
return ("text/plain", None)
48-
49-
if path.name == "poetry.lock":
50-
return ("text/plain", None)
51-
52-
# use non-strict mode to cover more file types
53-
return guess_type(path, strict=False)
18+
from hermes.utils import guess_file_type
5419

5520

5621
@dataclass(kw_only=True)

src/hermes/utils.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
# SPDX-FileContributor: Stephan Druskat <stephan.druskat@dlr.de>
77

88
from importlib.metadata import metadata
9+
from mimetypes import guess_type
10+
from pathlib import Path
911

1012

1113
def retrieve_project_urls(metadata_urls: list[str]) -> dict[str, str]:
@@ -43,3 +45,38 @@ def retrieve_project_urls(metadata_urls: list[str]) -> dict[str, str]:
4345

4446
# User agent
4547
hermes_user_agent = f"{hermes_name}/{hermes_version} ({hermes_homepage})"
48+
49+
50+
def guess_file_type(path: Path):
51+
"""File type detection for non-standardised formats.
52+
53+
Custom detection for file types not yet supported by Python's ``guess_type``
54+
function.
55+
"""
56+
# YAML was only added to ``guess_type`` in Python 3.14 due to the MIME type only
57+
# having been decided in 2024.
58+
# See: https://www.rfc-editor.org/rfc/rfc9512.html
59+
if path.suffix in [".yml", ".yaml"]:
60+
return ("application/yaml", None)
61+
62+
# TOML is not yet part of ``guess_type`` due to the MIME type only having been
63+
# accepted in October of 2024.
64+
# See: https://www.iana.org/assignments/media-types/application/toml
65+
if path.suffix == ".toml":
66+
return ("application/toml", None)
67+
68+
# cff is yaml.
69+
# See: https://github.com/citation-file-format/citation-file-format/issues/391
70+
if path.name == "CITATION.cff":
71+
return ("application/yaml", None)
72+
73+
# .license files are likely license annotations according to REUSE specification.
74+
# See: https://reuse.software/spec/
75+
if path.suffix == ".license":
76+
return ("text/plain", None)
77+
78+
if path.name == "poetry.lock":
79+
return ("text/plain", None)
80+
81+
# use non-strict mode to cover more file types
82+
return guess_type(path, strict=False)

0 commit comments

Comments
 (0)