Skip to content

Commit 74ac1e8

Browse files
committed
Python: improving FS api for reading
1 parent a8f376f commit 74ac1e8

File tree

5 files changed

+171
-48
lines changed

5 files changed

+171
-48
lines changed

CHANGELOG.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,20 @@ _# Changelog
22

33
All notable changes to this project will be documented in this file.
44

5-
## [0.0.7 - 2022-12-11]
5+
## [0.0.7 - 2022-12-12]
66

77
### Added
88

9-
- Python, FS: `list_directory`, `get_file_data` functions.
9+
- FS functions:
10+
* `fs_apply_exclude_lists`
11+
* `fs_apply_ignore_flags`
12+
* `fs_extract_sub_dirs`
13+
* `fs_filter_by`
14+
* `fs_get_file_data`
15+
* `fs_get_obj_info`
16+
* `fs_get_objs_info`
17+
* `fs_list_directory`
18+
* `fs_sort_by_id`
1019

1120
### Changed
1221

nc_py_api/__init__.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,20 @@
1-
from . import signal_handler
1+
from . import mimetype
22
from ._version import __version__
33
from .config import CONFIG
44
from .db_api import close_connection, execute_commit, execute_fetchall
55
from .db_misc import TABLES, get_time
6-
from .db_requests import get_mimetype_id, get_paths_by_ids
7-
from .files import get_file_data, list_directory
6+
from .db_requests import get_mimetype_id
7+
from .files import (
8+
FsNodeInfo,
9+
fs_apply_exclude_lists,
10+
fs_apply_ignore_flags,
11+
fs_extract_sub_dirs,
12+
fs_filter_by,
13+
fs_get_file_data,
14+
fs_get_obj_info,
15+
fs_get_objs_info,
16+
fs_list_directory,
17+
fs_sort_by_id,
18+
)
819
from .log import cpa_logger
920
from .occ import get_cloud_app_config_value, occ_call, occ_call_decode

nc_py_api/db_requests.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,21 @@
44
from .db_api import execute_fetchall
55
from .db_misc import TABLES
66

7+
FIELD_NAME_LIST = (
8+
"fcache.fileid, fcache.storage, fcache.path, fcache.storage, fcache.name, "
9+
"fcache.mimetype, fcache.mimepart, "
10+
"fcache.size, fcache.mtime, fcache.encrypted, fcache.etag, fcache.permissions, fcache.checksum"
11+
)
12+
713

814
def get_paths_by_ids(file_ids: list) -> list:
915
"""For each element of list in file_ids return [path, fileid, storage]. Order of file_ids is not preserved."""
1016

1117
query = (
12-
"SELECT path, fileid, storage "
13-
f"FROM {TABLES.file_cache} "
18+
"SELECT fcache.path, fcache.fileid, fcache.storage "
19+
f"FROM {TABLES.file_cache} AS fcache "
1420
f"WHERE fileid IN ({','.join(str(x) for x in file_ids)}) "
15-
"ORDER BY fileid ASC;"
21+
"ORDER BY fcache.fileid ASC;"
1622
)
1723
return execute_fetchall(query)
1824

@@ -61,6 +67,26 @@ def get_mimetype_id(mimetype: str) -> int:
6167
return result[0]["id"]
6268

6369

70+
def get_fileid_info(file_id: int) -> dict:
71+
"""Returns dictionary with information for given file id."""
72+
73+
query = f"SELECT {FIELD_NAME_LIST} FROM {TABLES.file_cache} AS fcache WHERE fcache.fileid = {file_id};"
74+
result = execute_fetchall(query)
75+
if result:
76+
return result[0]
77+
return {}
78+
79+
80+
def get_fileids_info(file_ids: list[int]) -> list[dict]:
81+
"""Returns dictionaries with information for given file ids."""
82+
83+
query = (
84+
f"SELECT {FIELD_NAME_LIST} FROM {TABLES.file_cache} AS fcache "
85+
f"WHERE fcache.fileid IN ({','.join(str(x) for x in file_ids)});"
86+
)
87+
return execute_fetchall(query)
88+
89+
6490
def get_directory_list(dir_id: int, mount_points_ids: list[int]) -> list[dict]:
6591
"""Lists the provided directory
6692
@@ -72,12 +98,7 @@ def get_directory_list(dir_id: int, mount_points_ids: list[int]) -> list[dict]:
7298
mp_query = ""
7399
if mount_points_ids:
74100
mp_query = f" OR fcache.fileid IN ({','.join(str(x) for x in mount_points_ids)})"
75-
query = (
76-
"SELECT fcache.fileid, fcache.storage, fcache.path, fcache.storage, fcache.name, fcache.mimetype, fcache.size, "
77-
"fcache.mtime, fcache.encrypted, fcache.etag, fcache.permissions, fcache.checksum "
78-
f"FROM {TABLES.file_cache} AS fcache "
79-
f"WHERE (fcache.parent = {dir_id}{mp_query});"
80-
)
101+
query = f"SELECT {FIELD_NAME_LIST} FROM {TABLES.file_cache} AS fcache WHERE (fcache.parent = {dir_id}{mp_query});"
81102
return execute_fetchall(query)
82103

83104

nc_py_api/files.py

Lines changed: 111 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
"""
22
Helper functions related to get files content or storages info.
33
"""
4+
from fnmatch import fnmatch
45
from os import environ, path
56
from pathlib import Path
6-
from typing import TypedDict
7+
from typing import Literal, Optional, TypedDict
78

9+
from . import mimetype
810
from .config import CONFIG
911
from .db_requests import (
1012
get_directory_list,
11-
get_mimetype_id,
13+
get_fileid_info,
14+
get_fileids_info,
1215
get_non_direct_access_filesize_limit,
1316
get_paths_by_ids,
1417
get_storages_info,
@@ -21,7 +24,8 @@ class FsNodeInfo(TypedDict):
2124
id: int
2225
is_dir: bool
2326
is_local: bool
24-
mimetype: str
27+
mimetype: int
28+
mimepart: int
2529
name: str
2630
internal_path: str
2731
abs_path: str
@@ -37,52 +41,92 @@ class FsNodeInfo(TypedDict):
3741
direct_access: bool
3842

3943

44+
FsNodeInfoField = Literal["is_dir", "is_local", "mimetype", "mimepart", "name", "direct_access"]
45+
46+
4047
USER_ID = environ.get("USER_ID", "")
41-
DIR_MIMETYPE = get_mimetype_id("'httpd/unix-directory'")
4248
STORAGES_INFO = get_storages_info()
4349
ND_ACCESS_LIMIT = get_non_direct_access_filesize_limit()
4450
"""A value from the config that defines the maximum file size allowed to be requested from php."""
4551

4652

47-
def list_directory(file_id: int, user_id=USER_ID) -> list[FsNodeInfo]:
53+
def fs_get_obj_info(file_id: int) -> Optional[FsNodeInfo]:
54+
raw_result = get_fileid_info(file_id)
55+
if raw_result:
56+
return db_record_to_fs_node(raw_result)
57+
return None
58+
59+
60+
def fs_get_objs_info(file_ids: list[int]) -> list[FsNodeInfo]:
61+
raw_result = get_fileids_info(file_ids)
62+
return [db_record_to_fs_node(i) for i in raw_result]
63+
64+
65+
def fs_list_directory(file_id: int, user_id=USER_ID) -> list[FsNodeInfo]:
4866
_ = user_id # noqa # will be used in 0.4.0 version
4967
dir_info = get_paths_by_ids([file_id])
5068
file_mounts = []
5169
if dir_info:
5270
file_mounts = get_mounts_to(dir_info[0]["storage"], dir_info[0]["path"])
5371
raw_result = get_directory_list(file_id, file_mounts)
54-
result: list[FsNodeInfo] = []
55-
for i in raw_result:
56-
result.append(
57-
{
58-
"id": i["fileid"],
59-
"is_dir": i["mimetype"] == DIR_MIMETYPE,
60-
"is_local": is_local_storage(i["storage"]),
61-
"mimetype": i["mimetype"],
62-
"name": i["name"],
63-
"internal_path": i["path"],
64-
"abs_path": get_file_full_path(i["storage"], i["path"]),
65-
"size": i["size"],
66-
"permissions": i["permissions"],
67-
"mtime": i["mtime"],
68-
"checksum": i["checksum"],
69-
"encrypted": i["encrypted"],
70-
"etag": i["etag"],
71-
"ownerName": get_storage_user_id(i["storage"]),
72-
"storageId": i["storage"],
73-
"mountId": get_storage_root_id(i["storage"]),
74-
"direct_access": can_directly_access_file(i),
75-
}
76-
)
77-
return result
78-
79-
80-
def get_file_data(file_info: FsNodeInfo) -> bytes:
72+
return [db_record_to_fs_node(i) for i in raw_result]
73+
74+
75+
def fs_apply_exclude_lists(fs_objs: list[FsNodeInfo], excl_file_ids: list[int], excl_mask: list[str]) -> None:
76+
"""Purge all records according to exclude_(mask/fileid) from `where_to_purge`(or from fs_records)."""
77+
78+
indexes_to_purge = []
79+
for index, fs_obj in enumerate(fs_objs):
80+
if fs_obj["id"] in excl_file_ids:
81+
indexes_to_purge.append(index)
82+
elif is_path_in_exclude(fs_obj["internal_path"], excl_mask):
83+
indexes_to_purge.append(index)
84+
for index in reversed(indexes_to_purge):
85+
del fs_objs[index]
86+
87+
88+
def fs_extract_sub_dirs(fs_objs: list[FsNodeInfo]) -> list[FsNodeInfo]:
89+
sub_dirs = []
90+
indexes_to_purge = []
91+
for index, fs_obj in enumerate(fs_objs):
92+
if fs_obj["mimetype"] == mimetype.DIR:
93+
sub_dirs.append(fs_obj)
94+
indexes_to_purge.append(index)
95+
for index in reversed(indexes_to_purge):
96+
del fs_objs[index]
97+
return sub_dirs
98+
99+
100+
def fs_apply_ignore_flags(fs_objs: list[FsNodeInfo]) -> None:
101+
ignore_flag = any(fs_obj["name"] in (".noimage", ".nomedia") for fs_obj in fs_objs)
102+
if ignore_flag:
103+
fs_filter_by(fs_objs, "mimepart", [mimetype.IMAGE, mimetype.VIDEO], reverse_filter=True)
104+
fs_apply_exclude_lists(fs_objs, [], [".noimage", ".nomedia"])
105+
106+
107+
def fs_filter_by(fs_objs: list[FsNodeInfo], field: FsNodeInfoField, values: list, reverse_filter=False) -> None:
108+
indexes_to_purge = []
109+
if reverse_filter:
110+
for index, fs_obj in enumerate(fs_objs):
111+
if fs_obj[field] in values:
112+
indexes_to_purge.append(index)
113+
else:
114+
for index, fs_obj in enumerate(fs_objs):
115+
if fs_obj[field] not in values:
116+
indexes_to_purge.append(index)
117+
for index in reversed(indexes_to_purge):
118+
del fs_objs[index]
119+
120+
121+
def fs_sort_by_id(fs_objs: list[FsNodeInfo]) -> list[FsNodeInfo]:
122+
return sorted(fs_objs, key=lambda i: i["id"])
123+
124+
125+
def fs_get_file_data(file_info: FsNodeInfo) -> bytes:
81126
if file_info["direct_access"]:
82127
try:
83128
with open(file_info["abs_path"], "rb") as h_file:
84-
data = h_file.read()
85-
return data
129+
return h_file.read()
86130
except Exception: # noqa # pylint: disable=broad-except
87131
log.exception("Exception during reading %s", file_info["abs_path"])
88132
return request_file_from_php(file_info)
@@ -172,3 +216,36 @@ def get_mounts_to(storage_id: int, dir_path: str) -> list[int]:
172216
if mount_point_with_dir_path == str(Path(storage_info["mount_point"]).parent):
173217
return_list.append(storage_info["root_id"])
174218
return return_list
219+
220+
221+
def db_record_to_fs_node(fs_record: dict) -> FsNodeInfo:
222+
return {
223+
"id": fs_record["fileid"],
224+
"is_dir": fs_record["mimetype"] == mimetype.DIR,
225+
"is_local": is_local_storage(fs_record["storage"]),
226+
"mimetype": fs_record["mimetype"],
227+
"mimepart": fs_record["mimepart"],
228+
"name": fs_record["name"],
229+
"internal_path": fs_record["path"],
230+
"abs_path": get_file_full_path(fs_record["storage"], fs_record["path"]),
231+
"size": fs_record["size"],
232+
"permissions": fs_record["permissions"],
233+
"mtime": fs_record["mtime"],
234+
"checksum": fs_record["checksum"],
235+
"encrypted": fs_record["encrypted"],
236+
"etag": fs_record["etag"],
237+
"ownerName": get_storage_user_id(fs_record["storage"]),
238+
"storageId": fs_record["storage"],
239+
"mountId": get_storage_root_id(fs_record["storage"]),
240+
"direct_access": can_directly_access_file(fs_record),
241+
}
242+
243+
244+
def is_path_in_exclude(fs_path: str, exclude_patterns: list[str]) -> bool:
245+
"""Checks with fnmatch if `path` is in `exclude_patterns`. Returns ``True`` if yes."""
246+
247+
name = path.basename(fs_path)
248+
for pattern in exclude_patterns:
249+
if fnmatch(name, pattern):
250+
return True
251+
return False

nc_py_api/mimetype.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from .db_requests import get_mimetype_id
2+
3+
DIR = get_mimetype_id("'httpd/unix-directory'")
4+
IMAGE = get_mimetype_id("'image'")
5+
VIDEO = get_mimetype_id("'video'")

0 commit comments

Comments
 (0)