Skip to content

Commit 9a13d8c

Browse files
committed
split FileArtifact into Binary/Text
1 parent a79ea7e commit 9a13d8c

File tree

8 files changed

+128
-118
lines changed

8 files changed

+128
-118
lines changed

nodescraper/base/inbandcollectortask.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from typing import Generic, Optional
2828

2929
from nodescraper.connection.inband import InBandConnection
30-
from nodescraper.connection.inband.inband import CommandArtifact, FileArtifact
30+
from nodescraper.connection.inband.inband import BaseFileArtifact, CommandArtifact
3131
from nodescraper.enums import EventPriority, OSFamily, SystemInteractionLevel
3232
from nodescraper.generictypes import TCollectArg, TDataModel
3333
from nodescraper.interfaces import DataCollector, TaskResultHook
@@ -99,7 +99,7 @@ def _run_sut_cmd(
9999

100100
def _read_sut_file(
101101
self, filename: str, encoding="utf-8", strip: bool = True, log_artifact=True
102-
) -> FileArtifact:
102+
) -> BaseFileArtifact:
103103
"""
104104
Read a file from the SUT and return its content.
105105
@@ -110,7 +110,7 @@ def _read_sut_file(
110110
log_artifact (bool, optional): whether we should log the contents of the file. Defaults to True.
111111
112112
Returns:
113-
FileArtifact: The content of the file read from the SUT, which includes the file name and content
113+
BaseFileArtifact: The content of the file read from the SUT, which includes the file name and content
114114
"""
115115
file_res = self.connection.read_file(filename=filename, encoding=encoding, strip=strip)
116116
if log_artifact:

nodescraper/connection/inband/__init__.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,13 @@
2323
# SOFTWARE.
2424
#
2525
###############################################################################
26-
from .inband import CommandArtifact, FileArtifact, InBandConnection
26+
from .inband import (
27+
BaseFileArtifact,
28+
BinaryFileArtifact,
29+
CommandArtifact,
30+
InBandConnection,
31+
TextFileArtifact,
32+
)
2733
from .inbandlocal import LocalShell
2834
from .inbandmanager import InBandConnectionManager
2935
from .sshparams import SSHConnectionParams
@@ -33,6 +39,8 @@
3339
"LocalShell",
3440
"InBandConnectionManager",
3541
"InBandConnection",
36-
"FileArtifact",
42+
"BaseFileArtifact",
43+
"TextFileArtifact",
44+
"BinaryFileArtifact",
3745
"CommandArtifact",
3846
]

nodescraper/connection/inband/inband.py

Lines changed: 48 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,10 @@
2424
#
2525
###############################################################################
2626
import abc
27-
import io
2827
import os
2928
from typing import Optional
3029

31-
from pydantic import BaseModel, Field, field_validator
30+
from pydantic import BaseModel
3231

3332

3433
class CommandArtifact(BaseModel):
@@ -40,45 +39,56 @@ class CommandArtifact(BaseModel):
4039
exit_code: int
4140

4241

43-
class FileArtifact(BaseModel):
44-
"""Artifact to contains contents of file read into memory"""
45-
42+
class BaseFileArtifact(BaseModel, abc.ABC):
4643
filename: str
47-
contents: str | bytes = Field(exclude=True)
4844

49-
@field_validator("contents", mode="before")
45+
@abc.abstractmethod
46+
def log_model(self, log_path: str) -> None:
47+
pass
48+
49+
@abc.abstractmethod
50+
def contents_str(self) -> str:
51+
pass
52+
5053
@classmethod
51-
def validate_contents(cls, value: io.BytesIO | str | bytes):
52-
if isinstance(value, io.BytesIO):
53-
return value.getvalue()
54-
if isinstance(value, str):
55-
return value.encode("utf-8")
56-
return value
54+
def from_bytes(
55+
cls,
56+
filename: str,
57+
raw_contents: bytes,
58+
encoding: Optional[str] = "utf-8",
59+
strip: bool = True,
60+
) -> "BaseFileArtifact":
61+
if encoding is None:
62+
return BinaryFileArtifact(filename=filename, contents=raw_contents)
5763

58-
def log_model(self, log_path: str, encoding: Optional[str] = None) -> None:
59-
"""Log the file contents to disk.
64+
try:
65+
text = raw_contents.decode(encoding)
66+
return TextFileArtifact(filename=filename, contents=text.strip() if strip else text)
67+
except UnicodeDecodeError:
68+
return BinaryFileArtifact(filename=filename, contents=raw_contents)
6069

61-
Args:
62-
log_path (str): path to write the file
63-
encoding (str | None): if None, auto-detect binary or not
64-
"""
70+
71+
class TextFileArtifact(BaseFileArtifact):
72+
contents: str
73+
74+
def log_model(self, log_path: str) -> None:
75+
path = os.path.join(log_path, self.filename)
76+
with open(path, "w", encoding="utf-8") as f:
77+
f.write(self.contents)
78+
79+
def contents_str(self) -> str:
80+
return self.contents
81+
82+
83+
class BinaryFileArtifact(BaseFileArtifact):
84+
contents: bytes
85+
86+
def log_model(self, log_path: str) -> None:
6587
log_name = os.path.join(log_path, self.filename)
66-
contents = self.contents
67-
68-
if encoding:
69-
with open(log_name, "w", encoding=encoding) as f:
70-
f.write(contents.decode(encoding))
71-
else:
72-
try:
73-
decoded = contents.decode("utf-8")
74-
with open(log_name, "w", encoding="utf-8") as f:
75-
f.write(decoded)
76-
except UnicodeDecodeError:
77-
with open(log_name, "wb") as f:
78-
f.write(contents)
88+
with open(log_name, "wb") as f:
89+
f.write(self.contents)
7990

8091
def contents_str(self) -> str:
81-
"""Safe string representation of contents (for logs)."""
8292
try:
8393
return self.contents.decode("utf-8")
8494
except UnicodeDecodeError:
@@ -104,14 +114,16 @@ def run_command(
104114
"""
105115

106116
@abc.abstractmethod
107-
def read_file(self, filename: str, encoding: str = "utf-8", strip: bool = True) -> FileArtifact:
108-
"""Read a file into a FileArtifact
117+
def read_file(
118+
self, filename: str, encoding: str = "utf-8", strip: bool = True
119+
) -> BaseFileArtifact:
120+
"""Read a file into a BaseFileArtifact
109121
110122
Args:
111123
filename (str): filename
112124
encoding (str, optional): encoding to use when opening file. Defaults to "utf-8".
113125
strip (bool): automatically strip file contents
114126
115127
Returns:
116-
FileArtifact: file artifact
128+
BaseFileArtifact: file artifact
117129
"""

nodescraper/connection/inband/inbandlocal.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,11 @@
2626
import os
2727
import subprocess
2828

29-
from .inband import CommandArtifact, FileArtifact, InBandConnection
29+
from .inband import (
30+
BaseFileArtifact,
31+
CommandArtifact,
32+
InBandConnection,
33+
)
3034

3135

3236
class LocalShell(InBandConnection):
@@ -64,30 +68,25 @@ def run_command(
6468
exit_code=res.returncode,
6569
)
6670

67-
def read_file(self, filename: str, encoding: str = "utf-8", strip: bool = True) -> FileArtifact:
68-
"""Read a local file into a FileArtifact
71+
def read_file(
72+
self, filename: str, encoding: str = "utf-8", strip: bool = True
73+
) -> BaseFileArtifact:
74+
"""Read a local file into a BaseFileArtifact
6975
7076
Args:
7177
filename (str): filename
7278
encoding (str, optional): encoding to use when opening file. Defaults to "utf-8".
7379
strip (bool): automatically strip file contents
7480
7581
Returns:
76-
FileArtifact: file artifact
82+
BaseFileArtifact: file artifact
7783
"""
84+
with open(filename, "rb") as f:
85+
raw_contents = f.read()
7886

79-
if encoding is None:
80-
# Read as binary
81-
with open(filename, "rb") as f:
82-
contents = f.read()
83-
else:
84-
# Read as text
85-
with open(filename, "r", encoding=encoding) as f:
86-
contents = f.read()
87-
if strip:
88-
contents = contents.strip()
89-
90-
return FileArtifact(
87+
return BaseFileArtifact.from_bytes(
9188
filename=os.path.basename(filename),
92-
contents=contents,
89+
raw_contents=raw_contents,
90+
encoding=encoding,
91+
strip=strip,
9392
)

nodescraper/connection/inband/inbandremote.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,11 @@
3434
SSHException,
3535
)
3636

37-
from .inband import CommandArtifact, FileArtifact, InBandConnection
37+
from .inband import (
38+
BaseFileArtifact,
39+
CommandArtifact,
40+
InBandConnection,
41+
)
3842
from .sshparams import SSHConnectionParams
3943

4044

@@ -94,27 +98,26 @@ def connect_ssh(self):
9498
def read_file(
9599
self,
96100
filename: str,
97-
encoding="utf-8",
101+
encoding: str | None = "utf-8",
98102
strip: bool = True,
99-
) -> FileArtifact:
100-
"""Read a remote file into a file artifact
103+
) -> BaseFileArtifact:
104+
"""Read a remote file into a BaseFileArtifact.
101105
102106
Args:
103-
filename (str): filename
104-
encoding (str, optional): remote file encoding. Defaults to "utf-8".
105-
strip (bool): automatically strip file contents
107+
filename (str): Path to file on remote host
108+
encoding (str | None, optional): If None, file is read as binary. If str, decode using that encoding. Defaults to "utf-8".
109+
strip (bool): Strip whitespace for text files. Ignored for binary.
106110
107111
Returns:
108-
FileArtifact: file artifact
112+
BaseFileArtifact: Object representing file contents
109113
"""
110-
contents = ""
111-
112-
with self.client.open_sftp().open(filename) as remote_file:
113-
contents = remote_file.read().decode(encoding=encoding, errors="ignore")
114-
115-
return FileArtifact(
114+
with self.client.open_sftp().open(filename, "rb") as remote_file:
115+
raw_contents = remote_file.read()
116+
return BaseFileArtifact.from_bytes(
116117
filename=os.path.basename(filename),
117-
contents=contents.strip() if strip else contents,
118+
raw_contents=raw_contents,
119+
encoding=encoding,
120+
strip=strip,
118121
)
119122

120123
def run_command(

nodescraper/plugins/inband/dmesg/dmesg_analyzer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from typing import Optional
2929

3030
from nodescraper.base.regexanalyzer import ErrorRegex, RegexAnalyzer
31-
from nodescraper.connection.inband import FileArtifact
31+
from nodescraper.connection.inband import BaseFileArtifact
3232
from nodescraper.enums import EventCategory, EventPriority
3333
from nodescraper.models import Event, TaskResult
3434

@@ -386,7 +386,7 @@ def analyze_data(
386386
args.analysis_range_end,
387387
)
388388
self.result.artifacts.append(
389-
FileArtifact(filename="filtered_dmesg.log", contents=dmesg_content)
389+
BaseFileArtifact(filename="filtered_dmesg.log", contents=dmesg_content)
390390
)
391391
else:
392392
dmesg_content = data.dmesg_content

nodescraper/taskresulthooks/filesystemloghook.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
import os
2828
from typing import Optional
2929

30-
from nodescraper.connection.inband import FileArtifact
30+
from nodescraper.connection.inband import BaseFileArtifact
3131
from nodescraper.interfaces.taskresulthook import TaskResultHook
3232
from nodescraper.models import DataModel, TaskResult
3333
from nodescraper.utils import get_unique_filename, pascal_to_snake
@@ -60,7 +60,7 @@ def process_result(self, task_result: TaskResult, data: Optional[DataModel] = No
6060

6161
artifact_map = {}
6262
for artifact in task_result.artifacts:
63-
if isinstance(artifact, FileArtifact):
63+
if isinstance(artifact, BaseFileArtifact):
6464
log_name = get_unique_filename(log_path, artifact.filename)
6565
artifact.log_model(log_path)
6666

0 commit comments

Comments
 (0)