Skip to content

Commit 8431759

Browse files
committed
molprint v0.1.0
High-performance molecular fingerprint computation and similarity search in Rust. - SMILES parser (lexer → MolGraph, ring closure, branches, bracket atoms) - Ring perception via SSSR (Horton BFS algorithm) - Aromaticity perception - SMARTS engine with VF2 subgraph isomorphism - Morgan/ECFP fingerprints (configurable radius, 512–4096 bit, deterministic) - MACCS-166 structural keys (100% RDKit-accurate on ChEMBL 10k) - Tanimoto, Dice, Cosine similarity via POPCNT on u64 word arrays - Parallel threshold and top-k screening via Rayon - FPS (chemfp-compatible), SDF V2000 (+ gzip), SMILES file I/O - CLI: fp and search subcommands - mdBook documentation site
0 parents  commit 8431759

82 files changed

Lines changed: 34513 additions & 0 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/rdkit-env.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
name: rdkit-env
2+
channels:
3+
- conda-forge
4+
- defaults
5+
dependencies:
6+
- python=3.11
7+
- rdkit>=2023.09

.github/workflows/ci.yml

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
schedule:
8+
# Weekly run every Monday at 03:00 UTC (for fuzz + RDKit validation jobs)
9+
- cron: "0 3 * * 1"
10+
11+
env:
12+
CARGO_TERM_COLOR: always
13+
14+
jobs:
15+
# ── Core checks ─────────────────────────────────────────────────────────────
16+
17+
test:
18+
name: Test
19+
runs-on: ubuntu-latest
20+
steps:
21+
- uses: actions/checkout@v4
22+
- uses: dtolnay/rust-toolchain@stable
23+
- uses: Swatinem/rust-cache@v2
24+
- run: cargo test --workspace
25+
26+
clippy:
27+
name: Clippy
28+
runs-on: ubuntu-latest
29+
steps:
30+
- uses: actions/checkout@v4
31+
- uses: dtolnay/rust-toolchain@stable
32+
with:
33+
components: clippy
34+
- uses: Swatinem/rust-cache@v2
35+
- run: cargo clippy --workspace -- -D warnings
36+
37+
fmt:
38+
name: Format
39+
runs-on: ubuntu-latest
40+
steps:
41+
- uses: actions/checkout@v4
42+
- uses: dtolnay/rust-toolchain@stable
43+
with:
44+
components: rustfmt
45+
- run: cargo fmt --check
46+
47+
doc:
48+
name: Docs
49+
runs-on: ubuntu-latest
50+
steps:
51+
- uses: actions/checkout@v4
52+
- uses: dtolnay/rust-toolchain@stable
53+
- uses: Swatinem/rust-cache@v2
54+
- run: cargo doc --no-deps --workspace
55+
56+
# ── MSRV check ───────────────────────────────────────────────────────────────
57+
58+
msrv:
59+
name: MSRV (1.75)
60+
runs-on: ubuntu-latest
61+
steps:
62+
- uses: actions/checkout@v4
63+
- uses: dtolnay/rust-toolchain@1.75
64+
- uses: Swatinem/rust-cache@v2
65+
- run: cargo check --workspace
66+
67+
# ── Dependency audit ─────────────────────────────────────────────────────────
68+
69+
audit:
70+
name: Security Audit
71+
runs-on: ubuntu-latest
72+
steps:
73+
- uses: actions/checkout@v4
74+
- uses: dtolnay/rust-toolchain@stable
75+
- uses: Swatinem/rust-cache@v2
76+
- run: cargo install cargo-audit --quiet
77+
- run: cargo audit
78+
79+
# ── Fuzz (short CI run) ──────────────────────────────────────────────────────
80+
81+
fuzz:
82+
name: Fuzz SMILES (60s)
83+
runs-on: ubuntu-latest
84+
# Only run on schedule or when parser code changes
85+
if: >
86+
github.event_name == 'schedule' ||
87+
contains(github.event.head_commit.message, '[fuzz]')
88+
steps:
89+
- uses: actions/checkout@v4
90+
- uses: dtolnay/rust-toolchain@nightly
91+
- uses: Swatinem/rust-cache@v2
92+
- run: cargo install cargo-fuzz --quiet
93+
- run: cargo +nightly fuzz run fuzz_smiles -- -max_total_time=60
94+
working-directory: fuzz
95+
96+
# ── RDKit validation (weekly / fingerprint changes) ──────────────────────────
97+
98+
rdkit-validate:
99+
name: RDKit MACCS Validation
100+
runs-on: ubuntu-latest
101+
if: >
102+
github.event_name == 'schedule' ||
103+
contains(github.event.head_commit.modified, 'crates/molprint-fp') ||
104+
contains(github.event.head_commit.modified, 'crates/molprint-core')
105+
steps:
106+
- uses: actions/checkout@v4
107+
- uses: dtolnay/rust-toolchain@stable
108+
- uses: Swatinem/rust-cache@v2
109+
110+
- name: Install conda (Miniforge)
111+
uses: conda-incubator/setup-miniconda@v3
112+
with:
113+
miniforge-version: latest
114+
activate-environment: rdkit-env
115+
environment-file: .github/rdkit-env.yml
116+
117+
- name: Build release binary
118+
run: cargo build --release
119+
120+
- name: Generate RDKit reference data
121+
shell: bash -el {0}
122+
run: python scripts/generate_reference.py
123+
124+
- name: Run MACCS validation test
125+
run: cargo test -p molprint-fp validate_maccs_against_rdkit -- --nocapture

.github/workflows/docs.yml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
name: Docs
2+
3+
on:
4+
push:
5+
branches: [main]
6+
workflow_dispatch:
7+
8+
permissions:
9+
contents: read
10+
pages: write
11+
id-token: write
12+
13+
concurrency:
14+
group: pages
15+
cancel-in-progress: true
16+
17+
jobs:
18+
build:
19+
runs-on: ubuntu-latest
20+
steps:
21+
- uses: actions/checkout@v4
22+
23+
- name: Install mdBook
24+
run: |
25+
curl -sSL https://github.com/rust-lang/mdBook/releases/download/v0.4.40/mdbook-v0.4.40-x86_64-unknown-linux-gnu.tar.gz \
26+
| tar -xz --directory /usr/local/bin
27+
28+
- name: Build book
29+
run: mdbook build docs
30+
31+
- name: Upload Pages artifact
32+
uses: actions/upload-pages-artifact@v3
33+
with:
34+
path: docs/book
35+
36+
deploy:
37+
needs: build
38+
runs-on: ubuntu-latest
39+
environment:
40+
name: github-pages
41+
url: ${{ steps.deployment.outputs.page_url }}
42+
steps:
43+
- name: Deploy to GitHub Pages
44+
id: deployment
45+
uses: actions/deploy-pages@v4

.github/workflows/release.yml

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
name: Release
2+
3+
on:
4+
push:
5+
tags:
6+
- "v*"
7+
8+
permissions:
9+
contents: write
10+
11+
jobs:
12+
build:
13+
name: Build ${{ matrix.target }}
14+
runs-on: ${{ matrix.os }}
15+
strategy:
16+
matrix:
17+
include:
18+
- target: x86_64-unknown-linux-musl
19+
os: ubuntu-latest
20+
archive: tar.gz
21+
- target: aarch64-unknown-linux-musl
22+
os: ubuntu-latest
23+
archive: tar.gz
24+
- target: x86_64-apple-darwin
25+
os: macos-latest
26+
archive: tar.gz
27+
- target: aarch64-apple-darwin
28+
os: macos-latest
29+
archive: tar.gz
30+
- target: x86_64-pc-windows-msvc
31+
os: windows-latest
32+
archive: zip
33+
34+
steps:
35+
- uses: actions/checkout@v4
36+
37+
- uses: dtolnay/rust-toolchain@stable
38+
with:
39+
targets: ${{ matrix.target }}
40+
41+
- name: Install musl tools (Linux)
42+
if: contains(matrix.target, 'musl')
43+
run: sudo apt-get install -y musl-tools
44+
45+
- name: Install cross-compiler (aarch64 Linux)
46+
if: matrix.target == 'aarch64-unknown-linux-musl'
47+
run: |
48+
sudo apt-get install -y gcc-aarch64-linux-gnu
49+
echo "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_MUSL_LINKER=aarch64-linux-gnu-gcc" >> $GITHUB_ENV
50+
51+
- uses: Swatinem/rust-cache@v2
52+
with:
53+
key: ${{ matrix.target }}
54+
55+
- name: Build
56+
run: cargo build --release --target ${{ matrix.target }} -p molprint-cli
57+
58+
- name: Package (Unix)
59+
if: matrix.archive == 'tar.gz'
60+
shell: bash
61+
run: |
62+
BIN=molprint
63+
STAGING="molprint-${{ github.ref_name }}-${{ matrix.target }}"
64+
mkdir "$STAGING"
65+
cp "target/${{ matrix.target }}/release/$BIN" "$STAGING/"
66+
cp README.md "$STAGING/"
67+
tar -czf "$STAGING.tar.gz" "$STAGING"
68+
echo "ASSET=$STAGING.tar.gz" >> $GITHUB_ENV
69+
70+
- name: Package (Windows)
71+
if: matrix.archive == 'zip'
72+
shell: pwsh
73+
run: |
74+
$STAGING = "molprint-${{ github.ref_name }}-${{ matrix.target }}"
75+
New-Item -ItemType Directory -Path $STAGING
76+
Copy-Item "target\${{ matrix.target }}\release\molprint.exe" "$STAGING\"
77+
Copy-Item "README.md" "$STAGING\"
78+
Compress-Archive -Path "$STAGING\*" -DestinationPath "$STAGING.zip"
79+
echo "ASSET=$STAGING.zip" | Out-File -Append -FilePath $env:GITHUB_ENV
80+
81+
- name: Upload artifact
82+
uses: actions/upload-artifact@v4
83+
with:
84+
name: ${{ matrix.target }}
85+
path: ${{ env.ASSET }}
86+
87+
release:
88+
name: Create release
89+
needs: build
90+
runs-on: ubuntu-latest
91+
steps:
92+
- uses: actions/checkout@v4
93+
94+
- name: Download all artifacts
95+
uses: actions/download-artifact@v4
96+
with:
97+
path: artifacts
98+
merge-multiple: true
99+
100+
- name: Create GitHub release
101+
uses: softprops/action-gh-release@v2
102+
with:
103+
files: artifacts/*
104+
generate_release_notes: true
105+
make_latest: true

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
/target/
2+
.claude/
3+
.venv/

0 commit comments

Comments
 (0)