Skip to content

Commit 5ed3d56

Browse files
authored
refactor!: organize project in source layout to make easily installable 🆕 (#45)
* refactor: organize project in source layout to make easily installable * refactor: update installation of project * refator: add linting checks to build action * refactor: add pywinauto dependency back in * refactor: extend ruff to tools/
1 parent e5eefab commit 5ed3d56

19 files changed

+260
-124
lines changed

.github/workflows/build.yaml

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ jobs:
1515
- name: Install dependencies
1616
run: |
1717
python -m pip install --upgrade pip
18-
python -m pip install -r requirements.txt pyinstaller
18+
python -m pip install .[dev] pyinstaller
19+
- name: Lint code 🦄
20+
run: tox -e lint
1921
- name: Build binary 🔢
2022
run: pyinstaller "main.spec"
2123
- name: Run conversion ↩️
@@ -24,9 +26,10 @@ jobs:
2426
.\dist\ms_teams_parser.exe -f ".\forensicsim-data\john_doe_v_1_4_00_11161\IndexedDB\https_teams.microsoft.com_0.indexeddb.leveldb" -o "john_doe.json"
2527
- name: Test calling script 📞
2628
run: |
27-
python utils/dump_leveldb.py --help
28-
python utils/dump_localstorage.py --help
29-
python utils/dump_sessionstorage.py --help
29+
python tools/main.py --help
30+
python tools/dump_leveldb.py --help
31+
python tools/dump_localstorage.py --help
32+
python tools/dump_sessionstorage.py --help
3033
# python utils/populate_teams.py --help
3134
# python utils/populate_teams_2.py --help
3235
# python utils/populate_skype.py --help

.github/workflows/release.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ jobs:
1616
- name: Install dependencies
1717
run: |
1818
python -m pip install --upgrade pip
19-
python -m pip install -r requirements.txt pyinstaller
20-
- name: Build binary 🚧
19+
python -m pip install . pyinstaller
20+
- name: Build binary
2121
run: pyinstaller "main.spec"
2222
- name: Zip files 🗜️
2323
run: |

.pre-commit-config.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,8 @@ repos:
1717
- repo: https://github.com/astral-sh/ruff-pre-commit
1818
rev: v0.1.11
1919
hooks:
20+
- id: ruff
21+
args:
22+
- --fix
2023
- id: ruff-format
2124
exclude: "^(export|populationdata|testdata)"

README.md

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ as following. Simply specify the path to the database and where you want to outp
121121
122122
usage: dump_leveldb.py [-h] -f FILEPATH -o OUTPUTPATH
123123
dump_leveldb.py: error: the following arguments are required: -f/--filepath, -o/--outputpath
124-
125124
```
126125
---
127126

@@ -131,24 +130,23 @@ dump_leveldb.py: error: the following arguments are required: -f/--filepath, -o/
131130

132131
A wee script for populating *Skype for Desktop* in a lab environment. The script can be used like this:
133132

134-
```
135-
utils\populate_skype.py -a 0 -f conversation.json
133+
```bash
134+
tools\populate_skype.py -a 0 -f conversation.json
136135
```
137136

138137
## populate_teams.py
139138

140139
A wee script for populating *Microsoft Teams* in a lab environment. The script can be used like this:
141140

142-
```
143-
utils\populate_teams.py -a 0 -f conversation.json
141+
```bash
142+
tools\populate_teams.py -a 0 -f conversation.json
144143
```
145144

146145
---
147146
# Datasets
148147
This repository comes with two datasets that allow reproducing the findings of this work. The `testdata` folder contains the *LevelDB* databases that have been extracted from two test clients. These can be used for benchmarking without having to perform a (lengthy) data population.
149148

150149
The `populationdata` contains *JSON* files of the communication that has been populated into the testing environment. These can be used to reproduce the experiment from scratch. However, for a rerun, it will be essential to adjust the dates to future dates, as the populator script relies on sufficient breaks between the individual messages.
151-
152150
---
153151

154152
# Acknowledgements & Thanks

main.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
block_cipher = None
44

55

6-
a = Analysis(['utils\\main.py'],
6+
a = Analysis(['tools\\main.py'],
77
binaries=[],
88
datas=[],
99
hiddenimports=[],

pyproject.toml

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
[build-system]
2+
requires = ["setuptools >= 61.0"]
3+
build-backend = "setuptools.build_meta"
4+
5+
6+
[project]
7+
name = "forensicsim"
8+
description = "A forensic open-source parser module for Autopsy that allows extracting the messages, comments, posts, contacts, calendar entries and reactions from a Microsoft Teams IndexedDB LevelDB database."
9+
readme = "README.md"
10+
license = {file = "LICENSE.md"}
11+
requires-python = ">=3.9"
12+
authors = [
13+
{ name = "Alexander Bilz", email = "github@markusbilz.com" },
14+
{ name = "Markus Bilz", email = "github@markusbilz.com" }
15+
]
16+
dependencies = [
17+
"beautifulsoup4~=4.9.3",
18+
"click~=8.0.1",
19+
"chromedb @ git+https://github.com/karelze/ccl_chrome_indexeddb@master",
20+
"pause~=0.3",
21+
"pyautogui~=0.9.54",
22+
"pywinauto~=0.6.8"
23+
]
24+
25+
dynamic = ["version"]
26+
27+
[tool.setuptools.dynamic]
28+
version = {attr = "forensicsim.__version__"}
29+
30+
[project.urls]
31+
"Homepage" = "https://forensics.im/"
32+
"Bug Tracker" = "https://github.com/lxndrblz/forensicsim/issues"
33+
34+
[project.optional-dependencies]
35+
dev=[
36+
"build",
37+
"pre-commit",
38+
"ruff",
39+
"tox",
40+
]
41+
42+
43+
[tool.ruff]
44+
45+
target-version = "py39"
46+
47+
# See rules: https://beta.ruff.rs/docs/rules/
48+
select = [
49+
"C", # flake8-comprehensions
50+
"F", # pyflakes
51+
"FURB", # refurb
52+
"I", # isort
53+
"PIE", # misc lints
54+
"PT", # pytest
55+
"PGH", # pygrep
56+
"RUF", # ruff-specific rules
57+
"UP", # pyupgrade
58+
"SIM", # flake8-simplify
59+
]
60+
61+
include = ["*.py", "*.pyi", "**/pyproject.toml"]
62+
63+
ignore = [
64+
"C901", # too complex
65+
"E501", # line too long, handled by black
66+
"D206", # indent with white space
67+
"W191", # tab identation
68+
]
69+
70+
[tool.ruff.lint]
71+
preview = true
72+
# exclude = ["tools/**.py"]
73+
74+
[tool.ruff.format]
75+
preview = true
76+
77+
[tool.ruff.isort]
78+
known-first-party = ["forensicsim"]
79+
section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
80+
81+
[tool.tox]
82+
legacy_tox_ini = """
83+
84+
[tox]
85+
envlist = format, lint, pre-commit
86+
skipdist = True
87+
isolated_build = True
88+
89+
[testenv]
90+
deps = .[dev]
91+
92+
# Cleanup tasks
93+
[testenv:clean]
94+
commands =
95+
sh -c "rm -rf build cover dist .hypothesis .mypy_cache .pytest_cache site"
96+
97+
# Auto Formatting
98+
[testenv:format]
99+
commands =
100+
python -m ruff src tests --fix
101+
python -m ruff format src
102+
103+
# Syntax Checks
104+
[testenv:lint]
105+
commands =
106+
python -m ruff --output-format=github src
107+
python -m ruff format src --check
108+
109+
# Pre-Commit
110+
[testenv:pre-commit]
111+
commands =
112+
python -m pre-commit run --all-files --show-diff-on-failure
113+
114+
"""

requirements.txt

Lines changed: 0 additions & 8 deletions
This file was deleted.

src/forensicsim/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__version__ = "0.5.3"
Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@
2929
from chromedb import (
3030
ccl_blink_value_deserializer,
3131
ccl_chromium_indexeddb,
32-
ccl_v8_value_deserializer,
33-
ccl_leveldb,
3432
ccl_chromium_localstorage,
3533
ccl_chromium_sessionstorage,
34+
ccl_leveldb,
35+
ccl_v8_value_deserializer,
3636
)
3737
from chromedb.ccl_chromium_indexeddb import (
3838
DatabaseMetadataType,
@@ -77,19 +77,18 @@ def fetch_data(self):
7777
if (
7878
record.key.startswith(b"\x00\x00\x00\x00")
7979
and record.state == ccl_leveldb.KeyState.Live
80+
) and (
81+
record.key not in global_metadata_raw
82+
or global_metadata_raw[record.key].seq < record.seq
8083
):
81-
if (
82-
record.key not in global_metadata_raw
83-
or global_metadata_raw[record.key].seq < record.seq
84-
):
85-
global_metadata_raw[record.key] = record
84+
global_metadata_raw[record.key] = record
8685

8786
# Convert the raw metadata to a nice GlobalMetadata Object
8887
global_metadata = ccl_chromium_indexeddb.GlobalMetadata(global_metadata_raw)
8988

9089
# Loop through the database IDs
9190
for db_id in global_metadata.db_ids:
92-
if None == db_id.dbid_no:
91+
if db_id.dbid_no == None:
9392
continue
9493

9594
if db_id.dbid_no > 0x7F:
@@ -130,9 +129,11 @@ def fetch_data(self):
130129

131130
meta_type = record.key[len(prefix_objectstore) + len(varint_raw)]
132131

133-
old_version = objectstore_metadata_raw.get(
134-
(db_id.dbid_no, objstore_id, meta_type)
135-
)
132+
old_version = objectstore_metadata_raw.get((
133+
db_id.dbid_no,
134+
objstore_id,
135+
meta_type,
136+
))
136137

137138
if old_version is None or old_version.seq < record.seq:
138139
objectstore_metadata_raw[
@@ -160,7 +161,7 @@ def iterate_records(self, do_not_filter=False):
160161
# Loop through the databases and object stores based on their ids
161162
for global_id in self.global_metadata.db_ids:
162163
# print(f"Processing database: {global_id.name}")
163-
if None == global_id.dbid_no:
164+
if global_id.dbid_no == None:
164165
print(f"WARNING: Skipping database {global_id.name}")
165166
continue
166167

@@ -188,7 +189,7 @@ def iterate_records(self, do_not_filter=False):
188189
if record.value == b"":
189190
continue
190191
(
191-
value_version,
192+
_value_version,
192193
varint_raw,
193194
) = ccl_chromium_indexeddb.le_varint_from_bytes(
194195
record.value
@@ -201,7 +202,7 @@ def iterate_records(self, do_not_filter=False):
201202
val_idx += 1
202203

203204
(
204-
blink_version,
205+
_blink_version,
205206
varint_raw,
206207
) = ccl_chromium_indexeddb.le_varint_from_bytes(
207208
record.value[val_idx:]
@@ -226,7 +227,7 @@ def iterate_records(self, do_not_filter=False):
226227
"state": record.state,
227228
"seq": record.seq,
228229
}
229-
except Exception as e:
230+
except Exception:
230231
# TODO Some proper error handling wouldn't hurt
231232
continue
232233
# print(f"{datastore} {global_id.name} {records_per_object_store}")
@@ -280,7 +281,7 @@ def write_results_to_json(data, outputpath):
280281
json.dump(
281282
data, f, indent=4, sort_keys=True, default=str, ensure_ascii=False
282283
)
283-
except EnvironmentError as e:
284+
except OSError as e:
284285
print(e)
285286

286287

@@ -290,5 +291,5 @@ def parse_json():
290291
with open("teams.json") as json_file:
291292
data = json.load(json_file)
292293
return data
293-
except EnvironmentError as e:
294+
except OSError as e:
294295
print(e)

0 commit comments

Comments
 (0)