Skip to content

Commit 6c0a742

Browse files
authored
Merge pull request dod-cyber-crime-center#49 from kchason/pre-commit
Pre-Commit
2 parents 76d3c32 + 3c37742 commit 6c0a742

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+12481
-5463
lines changed

.github/workflows/ci.yml

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,16 @@ on:
77
env:
88
# The Python version for the build jobs as well as the primary one for the test and artifact generation. This MUST be
99
# in the python-version matrix in the `test` job.
10-
PYTHON_VERSION: "3.13"
10+
PYTHON_VERSION: "3.13"
1111
jobs:
12-
test:
12+
test:
1313
runs-on: ubuntu-latest
1414
strategy:
1515
matrix:
1616
# This allows the pipeline to be run against multiple Python versions. eg. [3.6, 3.7, 3.8, 3.9, 3.10]. This results
1717
# in linting and unit tests running for all listed versions as well as the creation of packages and wheels on
1818
# creation of a tag in Git.
19-
python-version: [ "3.8", "3.10", "3.12", "3.13" ]
19+
python-version: [ "3.9", "3.11", "3.13" ]
2020

2121
steps:
2222
# Get the code from the repository to be packaged
@@ -35,7 +35,7 @@ jobs:
3535
sudo apt install python3-setuptools
3636
python -m pip install -q --upgrade pip
3737
pip install .
38-
pip install -q flake8 pytest pytest-cov build twine wheel
38+
pip install -q flake8 pytest pytest-cov build twine wheel pre-commit
3939
4040
# Lint the Python code to check for syntax errors
4141
- name: Lint with Flake8
@@ -45,6 +45,10 @@ jobs:
4545
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
4646
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
4747
48+
# Run pre-commit on the repository
49+
- name: Pre-Commit
50+
run: pre-commit run -a
51+
4852
# Test the Python unit tests
4953
- name: PyTest
5054
run: |
@@ -81,7 +85,7 @@ jobs:
8185
uses: actions/setup-python@v5
8286
with:
8387
python-version: ${{ env.PYTHON_VERSION }}
84-
88+
8589
# Install the packages to build the SQLite Dissect package
8690
- name: Prepare Build Environment
8791
run: |
@@ -105,8 +109,8 @@ jobs:
105109
path: sqlite-dissect-windows-x64-${{ env.PYTHON_VERSION }}-binary.zip
106110

107111
linux-build:
108-
runs-on: ubuntu-latest
109-
112+
runs-on: ubuntu-latest
113+
110114
steps:
111115
# Get the code from the repository to be packaged
112116
- name: Get Repo
@@ -146,7 +150,7 @@ jobs:
146150
# Build the Sphinx documentation into a PDF for easier distribution
147151
- name: Build Documentation
148152
run: |
149-
pip install -q sphinx
153+
pip install -q sphinx
150154
pip install -q sphinx-rtd-theme
151155
sphinx-build -b html ./docs/source/ ./docs/build/
152156

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,3 @@
3939
/htmlcov
4040
/docs/build
4141
*.coverage
42-

.pre-commit-config.yaml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
repos:
2+
- repo: https://github.com/pre-commit/pre-commit-hooks
3+
rev: v5.0.0
4+
hooks:
5+
- id: check-yaml
6+
- id: end-of-file-fixer
7+
exclude_types:
8+
- svg
9+
- id: trailing-whitespace
10+
exclude_types:
11+
- svg
12+
- repo: https://github.com/asottile/pyupgrade
13+
rev: v3.18.0
14+
hooks:
15+
- id: pyupgrade
16+
exclude: migrations/
17+
args:
18+
- --py37-plus
19+
- repo: https://github.com/myint/autoflake
20+
rev: v2.3.1
21+
hooks:
22+
- id: autoflake
23+
exclude: ^migrations/
24+
args:
25+
- --remove-all-unused-imports
26+
- repo: https://github.com/pycqa/isort
27+
rev: 5.13.2
28+
hooks:
29+
- id: isort
30+
name: isort (python)
31+
args: ["--profile", "black", "--filter-files"]
32+
- id: isort
33+
name: isort (cython)
34+
types: [cython]
35+
args: ["--profile", "black", "--filter-files"]
36+
- id: isort
37+
name: isort (pyi)
38+
types: [pyi]
39+
args: ["--profile", "black", "--filter-files"]
40+
- repo: https://github.com/psf/black
41+
rev: 24.10.0
42+
hooks:
43+
- id: black
44+
45+
exclude: ^.+.(min|pack).(js|css)|vendor/.+$

README.md

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ SQLite Dissect is a SQLite parser with recovery abilities over SQLite databases
1414
and their accompanying journal files. If no options are set other than the file
1515
name, the default behaviour will be to check for any journal files and print to
1616
the console the output of the SQLite files. The directory of the SQLite file
17-
specified will be searched through to find the associated journal files. If
17+
specified will be searched through to find the associated journal files. If
1818
they are not in the same directory as the specified file, they will not be found
1919
and their location will need to be specified in the command. SQLite carving
2020
will not be done by default. Please see the options below to enable carving.
@@ -23,7 +23,7 @@ will not be done by default. Please see the options below to enable carving.
2323

2424
| Argument | Description | Example Usage |
2525
|-------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------|
26-
| SQLITE_PATH | The path and filename of the SQLite file or directory to be carved. If a directory is provided, it will recursively search for files with the extensions: `.db`, `.sqlite`, `.sqlite3`. | `sqlite_dissect SQLITE_PATH` |
26+
| SQLITE_PATH | The path and filename of the SQLite file or directory to be carved. If a directory is provided, it will recursively search for files with the extensions: `.db`, `.sqlite`, `.sqlite3`. | `sqlite_dissect SQLITE_PATH` |
2727

2828

2929
#### Optional Arguments:
@@ -85,18 +85,18 @@ sqlite_dissect [SQLITE_PATH] --signatures -d [OUTPUT_DIRECTORY] -e sqlite --carv
8585
sqlite_dissect [SQLITE_PATH] -d [OUTPUT_DIRECTORY] -e sqlite --carve --carve-freelists -b [TABLES]
8686
```
8787

88-
6. Parse a SQLite database file and print the output to a xlsx workbook along with generating signatures and
89-
carving entries. The schema history (schema updates throughout the WAL are included if a WAL file is detected) and
88+
6. Parse a SQLite database file and print the output to a xlsx workbook along with generating signatures and
89+
carving entries. The schema history (schema updates throughout the WAL are included if a WAL file is detected) and
9090
signatures will be printed to standard output. The log level will be set to debug and all log messages will be
9191
output to the specified log file.
9292

9393
```shell
9494
sqlite_dissect [SQLITE_PATH] -d [OUTPUT_DIRECTORY] -e xlsx --schema-history --carve --signatures --log-level debug -i [LOG_FILE]
9595
```
9696

97-
7. Parse a SQLite database file along with a specified rollback journal file and send the output to CSV files.
97+
7. Parse a SQLite database file along with a specified rollback journal file and send the output to CSV files.
9898
(CSV is the only output option currently implemented for rollback journal files)
99-
99+
100100
```shell
101101
sqlite_dissect [SQLITE_PATH] -d [OUTPUT_DIRECTORY] -e csv --carve -j [ROLLBACK_JOURNAL]
102102
```
@@ -149,17 +149,17 @@ export SQLD_EXPORT_TYPE="[text, sqlite, case]"
149149
This application focuses on carving by analyzing the allocated content within each of the SQLite
150150
database tables and creating signatures. Where there is no content in the table, the signature
151151
is based off of analyzing the create table statement in the master schema table. The signature
152-
contains the series of possible serial types that can be stored within the file for that table.
152+
contains the series of possible serial types that can be stored within the file for that table.
153153
This signature is then applied to the unallocated content and freeblocks of the table b-tree in
154-
the file. This includes both interior and leaf table b-tree pages for that table. The signatures
154+
the file. This includes both interior and leaf table b-tree pages for that table. The signatures
155155
are only applied to the pages belonging to the particular b-tree page it was generated from due
156156
to initial research showing that the pages when created or pulled from the freelist set are
157157
overwritten with zeros for the unallocated portions. Fragments within the pages can be reported
158158
on but, due to the size (<4 bytes), are not carved. Due to the fact that entries are added into
159159
tables in SQLite from the end of the page and moving toward the beginning, the carving works
160-
in the same manner in order to detect previously partially overwritten entries better. This
160+
in the same manner in order to detect previously partially overwritten entries better. This
161161
carving can also be applied to the set of freelist pages within the SQLite file if specified
162-
but the freelist pages are currently treated as sets of unallocated data with the exception
162+
but the freelist pages are currently treated as sets of unallocated data with the exception
163163
of the freelist page metadata.
164164

165165
The carving process does not currently account for index b-trees as the more pertinent information
@@ -185,12 +185,12 @@ a full unallocated block and only support export to csv files.
185185
SQLite Dissect can support output to various forms: text, csv, xlsx, and sqlite. Due to certain
186186
constraints on what can be written to some file types, certain modifications need to be made. For
187187
instance, when writing SQLite columns such as row_id that are already going to pre-exist in the table
188-
for export to a SQLite file we need to preface the columns with "sd_" so they will not conflict with
189-
the actual row_id column. This also applies to internal schema objects. If certain SQLite tables are
190-
requested to be written to a SQLite file, than these will be prefaced with "iso_" so they will not
191-
conflict with similar internal schema objects that may already exist in the SQLite file bring written
192-
to. In xlsx or csv, due to a "=" symbol indicating a type of equation, these are prefaced with a " "
193-
character to avoid this issue. More details can be found in the code documentation of the export classes
188+
for export to a SQLite file we need to preface the columns with "sd_" so they will not conflict with
189+
the actual row_id column. This also applies to internal schema objects. If certain SQLite tables are
190+
requested to be written to a SQLite file, than these will be prefaced with "iso_" so they will not
191+
conflict with similar internal schema objects that may already exist in the SQLite file bring written
192+
to. In xlsx or csv, due to a "=" symbol indicating a type of equation, these are prefaced with a " "
193+
character to avoid this issue. More details can be found in the code documentation of the export classes
194194
themselves.
195195

196196
SQLite Dissect opens the file as read only and acts as a read only interpreter when parsing and carving
@@ -203,10 +203,10 @@ specified for output.
203203
(WAL or rollback) file. Journal files by themselves are not supported yet.
204204

205205
#### Currently not implemented:
206-
1. Signatures and carving are not implemented for "without rowid" tables or indexes. This will not cause an error
206+
1. Signatures and carving are not implemented for "without rowid" tables or indexes. This will not cause an error
207207
but will skip signature generation and carving processes.
208-
2. Signatures and carving are not implemented for virtual tables. This will not cause an error but will skip
209-
signature generation and carving processes. `Note: Even though virtual tables are skipped, virtual tables may
208+
2. Signatures and carving are not implemented for virtual tables. This will not cause an error but will skip
209+
signature generation and carving processes. `Note: Even though virtual tables are skipped, virtual tables may
210210
create other non-virtual tables which are not skipped. Currently nothing ties these tables back to the virtual
211211
table that created them.`
212212
3. Invalidated frames in WAL files are currently skipped and not parsed. `Note: This applies to previous WAL records
@@ -286,16 +286,16 @@ TODO:
286286
- [ ] Incorporate signature generation input and output files once implemented.
287287
- [ ] Incorporate "store in memory" arguments (currently set to False, more in depth operations may want it True).
288288
- [ ] Implement multiple passes/depths.
289-
- [ ] Test use cases for exempted tables with rollback journal and when combined with specified tables.
290-
- [ ] Check on name vs table_name properties of the master schema entry.
289+
- [ ] Test use cases for exempted tables with rollback journal and when combined with specified tables.
290+
- [ ] Check on name vs table_name properties of the master schema entry.
291291
- [ ] Test cases where the schema changes throughout the WAL file.
292292
- [ ] Investigate handling of virtual and "without rowid" tables when creating table signatures through the interface.
293293
- [ ] Documentation on "without rowid" tables and indexes in references to carving in help documentation.
294294
- [ ] Make sure to address/print unallocated space (especially uncarved) from updated page numbers in commit records.
295295
- [ ] Research if there can be journal files with a zero length database file or zero-length journal files.
296296
- [ ] Research if there can be combinations and of multiple rollback journal and WAL files with the SQLite database.
297297
- [ ] Validate initial research that allocation of freelist pages to a b-tree results in a wipe of the page data.
298-
- [ ] Add additional logging messages to the master schema entries skipped in signature generation.
298+
- [ ] Add additional logging messages to the master schema entries skipped in signature generation.
299299
- [ ] Integrate in the SQLite Forensic Corpus into tests.
300300
- [ ] Look into updating terminology for versioning to timelining.
301301
- [ ] Create PyUnit tests.

api_usage.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import logging
22
import os
3+
34
import sqlite_dissect.constants as sqlite_constants
45
import sqlite_dissect.interface as sqlite_interface
56

@@ -13,9 +14,11 @@
1314

1415
# Setup logging
1516
logging_level = logging.ERROR
16-
logging_format = '%(levelname)s %(asctime)s [%(pathname)s] %(funcName)s at line %(lineno)d: %(message)s'
17-
logging_date_format = '%d %b %Y %H:%M:%S'
18-
logging.basicConfig(level=logging_level, format=logging_format, datefmt=logging_date_format)
17+
logging_format = "%(levelname)s %(asctime)s [%(pathname)s] %(funcName)s at line %(lineno)d: %(message)s"
18+
logging_date_format = "%d %b %Y %H:%M:%S"
19+
logging.basicConfig(
20+
level=logging_level, format=logging_format, datefmt=logging_date_format
21+
)
1922

2023
# Setup console logging
2124
console_logger = logging.StreamHandler()
@@ -47,35 +50,53 @@
4750

4851
# Create the write ahead log
4952
wal_file_name = file_name + sqlite_constants.WAL_FILE_POSTFIX
50-
write_ahead_log = sqlite_interface.create_write_ahead_log(wal_file_name) if os.path.exists(wal_file_name) else None
53+
write_ahead_log = (
54+
sqlite_interface.create_write_ahead_log(wal_file_name)
55+
if os.path.exists(wal_file_name)
56+
else None
57+
)
5158

5259
# Create the version history
5360
version_history = sqlite_interface.create_version_history(database, write_ahead_log)
5461

5562
# Create the signature we are interested in carving
56-
table_signature = sqlite_interface.create_table_signature(table_name, database, version_history)
63+
table_signature = sqlite_interface.create_table_signature(
64+
table_name, database, version_history
65+
)
5766

5867
# Account for "without rowid"/virtual table signatures until supported
5968
if not table_signature:
60-
print("Table signature not supported (\"without rowid\" table or virtual table)")
69+
print('Table signature not supported ("without rowid" table or virtual table)')
6170
exit(0)
6271

6372
# Get the column indices of the columns we are interested in
6473
column_name_indices = {}
6574
for column_name in column_names:
66-
column_name_indices[column_name] = sqlite_interface.get_column_index(column_name, table_name, version_history)
75+
column_name_indices[column_name] = sqlite_interface.get_column_index(
76+
column_name, table_name, version_history
77+
)
6778

6879
# Get a version history iterator for the table
6980
carve_freelists = True
70-
table_history_iterator = sqlite_interface.get_version_history_iterator(table_name, version_history,
71-
table_signature, carve_freelists)
81+
table_history_iterator = sqlite_interface.get_version_history_iterator(
82+
table_name, version_history, table_signature, carve_freelists
83+
)
7284
# Iterate through the commits in the history for this table
7385
for commit in table_history_iterator:
7486
# The table was only modified if the commit was updated for this table and make sure there were carved cells
7587
if commit.updated and commit.carved_cells:
7688
carved_cells = commit.carved_cells
7789
for carved_cell in carved_cells.itervalues():
7890
for column_name in column_name_indices.keys():
79-
record_column = carved_cell.payload.record_columns[column_name_indices.get(column_name)]
80-
print("Commit version: %s table record column: %s has serial type: %s with value of: \"%s\"." %\
81-
(commit.version_number, column_name, record_column.serial_type, record_column.value))
91+
record_column = carved_cell.payload.record_columns[
92+
column_name_indices.get(column_name)
93+
]
94+
print(
95+
'Commit version: %s table record column: %s has serial type: %s with value of: "%s".'
96+
% (
97+
commit.version_number,
98+
column_name,
99+
record_column.serial_type,
100+
record_column.value,
101+
)
102+
)

docs/source/conf.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,32 +13,32 @@
1313
import os
1414
import sys
1515

16-
sys.path.insert(0, os.path.abspath('../..'))
16+
sys.path.insert(0, os.path.abspath("../.."))
1717

1818
# -- Project information -----------------------------------------------------
1919

20-
project = 'DC3 SQLite Dissect'
21-
copyright = '2022, Department of Defense Cyber Crime Center (DC3)'
22-
author = 'Department of Defense Cyber Crime Center (DC3)'
20+
project = "DC3 SQLite Dissect"
21+
copyright = "2022, Department of Defense Cyber Crime Center (DC3)"
22+
author = "Department of Defense Cyber Crime Center (DC3)"
2323

2424
# The full version, including alpha/beta/rc tags
25-
release = '1.0.0'
25+
release = "1.0.0"
2626

2727
# -- General configuration ---------------------------------------------------
28-
master_doc = 'index'
28+
master_doc = "index"
2929

3030
# Add any Sphinx extension module names here, as strings. They can be
3131
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
3232
# ones.
3333
extensions = [
34-
'sphinx.ext.autodoc',
35-
'sphinx.ext.autosummary',
36-
'sphinx.ext.coverage',
37-
'sphinx.ext.napoleon'
34+
"sphinx.ext.autodoc",
35+
"sphinx.ext.autosummary",
36+
"sphinx.ext.coverage",
37+
"sphinx.ext.napoleon",
3838
]
3939

4040
# Add any paths that contain templates here, relative to this directory.
41-
templates_path = ['_templates']
41+
templates_path = ["_templates"]
4242

4343
# List of patterns, relative to source directory, that match files and
4444
# directories to ignore when looking for source files.
@@ -50,15 +50,15 @@
5050
# The theme to use for HTML and HTML Help pages. See the documentation for
5151
# a list of builtin themes.
5252
#
53-
html_theme = 'sphinx_rtd_theme'
53+
html_theme = "sphinx_rtd_theme"
5454

5555
# Add any paths that contain custom static files (such as style sheets) here,
5656
# relative to this directory. They are copied after the builtin static files,
5757
# so a file named "default.css" will overwrite the builtin "default.css".
58-
html_static_path = ['_static']
58+
html_static_path = ["_static"]
5959

6060
# These paths are either relative to html_static_path
6161
# or fully qualified paths (eg. https://...)
6262
html_css_files = [
63-
'docs.css',
63+
"docs.css",
6464
]

0 commit comments

Comments
 (0)