|
7 | 7 | from collections import defaultdict |
8 | 8 | from dataclasses import dataclass |
9 | 9 | from functools import cache |
10 | | -from mimetypes import guess_type |
11 | 10 | from pathlib import Path |
12 | 11 | from typing import Dict, Iterable, List, Optional, Set |
13 | 12 | from typing_extensions import Self |
|
16 | 15 | from pydantic import BaseModel |
17 | 16 |
|
18 | 17 | from hermes.commands.harvest.base import HermesHarvestCommand, HermesHarvestPlugin |
19 | | - |
20 | | - |
21 | | -def guess_file_type(path: Path): |
22 | | - """File type detection for non-standardised formats. |
23 | | -
|
24 | | - Custom detection for file types not yet supported by Python's ``guess_type`` |
25 | | - function. |
26 | | - """ |
27 | | - # YAML was only added to ``guess_type`` in Python 3.14 due to the MIME type only |
28 | | - # having been decided in 2024. |
29 | | - # See: https://www.rfc-editor.org/rfc/rfc9512.html |
30 | | - if path.suffix in [".yml", ".yaml"]: |
31 | | - return ("application/yaml", None) |
32 | | - |
33 | | - # TOML is not yet part of ``guess_type`` due to the MIME type only having been |
34 | | - # accepted in October of 2024. |
35 | | - # See: https://www.iana.org/assignments/media-types/application/toml |
36 | | - if path.suffix == ".toml": |
37 | | - return ("application/toml", None) |
38 | | - |
39 | | - # cff is yaml. |
40 | | - # See: https://github.com/citation-file-format/citation-file-format/issues/391 |
41 | | - if path.name == "CITATION.cff": |
42 | | - return ("application/yaml", None) |
43 | | - |
44 | | - # .license files are likely license annotations according to REUSE specification. |
45 | | - # See: https://reuse.software/spec/ |
46 | | - if path.suffix == ".license": |
47 | | - return ("text/plain", None) |
48 | | - |
49 | | - if path.name == "poetry.lock": |
50 | | - return ("text/plain", None) |
51 | | - |
52 | | - # use non-strict mode to cover more file types |
53 | | - return guess_type(path, strict=False) |
| 18 | +from hermes.utils import guess_file_type |
54 | 19 |
|
55 | 20 |
|
56 | 21 | @dataclass(kw_only=True) |
|
0 commit comments