Skip to content

Commit bb481c0

Browse files
authored
Merge pull request #73 from audy/audy/add-github-release-action
2 parents 47850b0 + a6c29c1 commit bb481c0

File tree

14 files changed

+216
-66
lines changed

14 files changed

+216
-66
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
tests:
4646
strategy:
4747
matrix:
48-
rust-version: [1.62.1, stable]
48+
rust-version: [stable]
4949
runs-on: ubuntu-latest
5050
steps:
5151
- name: Checkout

.github/workflows/release.yml

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
# This file is autogenerated by maturin v1.8.2
2+
# To update, run
3+
#
4+
# maturin generate-ci github
5+
#
6+
name: build wheels
7+
on:
8+
push:
9+
branches:
10+
- main
11+
- master
12+
tags:
13+
- "*"
14+
pull_request:
15+
workflow_dispatch:
16+
release:
17+
types: [published]
18+
19+
jobs:
20+
linux:
21+
runs-on: ${{ matrix.platform.runner }}
22+
strategy:
23+
matrix:
24+
platform:
25+
- runner: ubuntu-22.04
26+
target: x86_64
27+
python-version:
28+
- "3.11"
29+
- "3.12"
30+
- "3.13"
31+
steps:
32+
- uses: actions/checkout@v4
33+
- name: Build wheels
34+
uses: PyO3/maturin-action@v1
35+
with:
36+
target: ${{ matrix.platform.target }}
37+
args: --features python --release --out dist --interpreter python${{ matrix.python-version }}
38+
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
39+
manylinux: auto
40+
- name: Upload wheels
41+
uses: actions/upload-artifact@v4
42+
with:
43+
name: wheels-linux-${{ matrix.platform.target }}-${{ matrix.python-version }}
44+
path: dist
45+
46+
macos:
47+
runs-on: ${{ matrix.platform.runner }}
48+
strategy:
49+
matrix:
50+
platform:
51+
- runner: macos-13
52+
target: x86_64
53+
- runner: macos-14
54+
target: aarch64
55+
python-version:
56+
- "3.11"
57+
- "3.12"
58+
- "3.13"
59+
steps:
60+
- uses: actions/checkout@v4
61+
- name: Build wheels
62+
uses: PyO3/maturin-action@v1
63+
with:
64+
target: ${{ matrix.platform.target }}
65+
args: --features python --release --out dist --interpreter python${{ matrix.python-version }}
66+
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
67+
manylinux: auto
68+
- name: Upload wheels
69+
uses: actions/upload-artifact@v4
70+
with:
71+
name: wheels-macos-${{ matrix.platform.target }}-${{ matrix.python-version }}
72+
path: dist
73+
74+
sdist:
75+
runs-on: ubuntu-latest
76+
steps:
77+
- uses: actions/checkout@v4
78+
- name: Build sdist
79+
uses: PyO3/maturin-action@v1
80+
with:
81+
command: sdist
82+
args: --out dist
83+
- name: Upload sdist
84+
uses: actions/upload-artifact@v4
85+
with:
86+
name: wheels-sdist
87+
path: dist
88+
89+
upload-release:
90+
name: Upload release artifacts to GitHub
91+
needs: [linux, macos, sdist]
92+
runs-on: ubuntu-latest
93+
if: github.event.release.tag_name
94+
steps:
95+
- name: Download wheel artifacts
96+
uses: actions/download-artifact@v4
97+
98+
- name: List downloaded wheels
99+
run: ls -l wheels-*/*
100+
101+
- name: Upload Wheels to Release
102+
uses: svenstaro/upload-release-action@v2
103+
with:
104+
repo_token: ${{ secrets.GITHUB_TOKEN }}
105+
file: wheels-*/**
106+
tag: ${{ github.event.release.tag_name }}
107+
file_glob: true
108+
109+
release:
110+
name: Release to PyPI
111+
runs-on: ubuntu-latest
112+
if: github.event.release.tag_name
113+
needs: [linux, macos, sdist]
114+
permissions:
115+
# Use to sign the release artifacts
116+
id-token: write
117+
# Used to upload release artifacts
118+
contents: write
119+
# Used to generate artifact attestation
120+
attestations: write
121+
steps:
122+
- uses: actions/download-artifact@v4
123+
- name: Generate artifact attestation
124+
uses: actions/attest-build-provenance@v1
125+
with:
126+
subject-path: "wheels-*/**"
127+
128+
- name: Publish to PyPI
129+
if: ${{ startsWith(github.ref, 'refs/tags/') }}
130+
uses: PyO3/maturin-action@v1
131+
env:
132+
MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
133+
with:
134+
command: upload
135+
args: --non-interactive --skip-existing wheels-*/**.whl wheels-*/**.tar.gz

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,26 @@ This repository provides a library and command-line interface that reimplements
1212
You may build Finch from source, which requires Rust >= `1.49`. Rust's Cargo package manager (see [rustup](https://www.rustup.rs) for Cargo installation instructions) can automatically build and install Finch with `cargo install finch_cli`.
1313
If you require python bindings, you must take extra steps (see [python support](#python-support)). Alternatively, [download a prebuilt binary](https://github.com/onecodex/finch-rs/releases) or install from [PyPi](https://pypi.org/project/finch-sketch/) `pip install finch-sketch`.
1414

15+
### Development ###
16+
17+
To build wheels locally, run:
18+
19+
```sh
20+
uv venv --python 3.11
21+
source .venv/bin/activate
22+
uv build
23+
24+
# or, using maturin by itself:
25+
26+
maturin build --features python --release --strip^
27+
```
28+
29+
#### Building binary wheels and pushing to PyPI
30+
31+
There is a Github Workflow that will build Python wheels for macOS (x86 and
32+
ARM) and Ubuntu (x86). To run, create a new release.
33+
34+
1535
### Example Usage ###
1636
To get started, we first compute sketches for several FASTA or FASTQ files. These sketches are compact, sampled representations of the underlying genomic data, and what allow `finch` to rapidly estimate distances between datasets. Sketching files uses the `finch sketch` command:
1737

@@ -206,7 +226,7 @@ cont, jacc = sketch_one.compare(sketch_two)
206226

207227
## Cap'n Proto
208228

209-
There is a `finch.capnp` in `src/serialization` file and the output of the MinHash schema (https://github.com/marbl/Mash/blob/54e6d66b7720035a2605a02892cad027ef3231ef/src/mash/capnp/MinHash.capnp)
229+
There is a `finch.capnp` in `src/serialization` file and the output of the MinHash schema (https://github.com/marbl/Mash/blob/54e6d66b7720035a2605a02892cad027ef3231ef/src/mash/capnp/MinHash.capnp)
210230
+ the changes by @bovee in https://github.com/bovee/Mash/blob/master/src/mash/capnp/MinHash.capnp
211231

212232
Both are generated after installing `capnp` and `cargo install capnpc` with the following command:

cli/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ pub fn update_sketch_params(
413413
if let Some(new_scale_num) = new_scale {
414414
if matches.occurrences_of("scale") == 0 {
415415
*scale = new_scale_num;
416-
} else if (*scale - new_scale_num).abs() < std::f64::EPSILON {
416+
} else if (*scale - new_scale_num).abs() < f64::EPSILON {
417417
// TODO: maybe this should have a slightly larger delta?
418418
bail!(
419419
"Specified scale {} does not match {} from sketch {}",

lib/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "finch"
3-
version = "0.6.1"
3+
version = "0.6.2"
44
authors = ["One Codex <[email protected]>"]
55
description = "An implementation of min-wise independent permutation locality sensitive hashing ('MinHashing') for genomic data and command-line utility for manipulation."
66
keywords = ["minhash", "bioinformatics", "sketches"]

lib/src/distance.rs

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ pub fn raw_distance(
9797
// at this point we've exhausted one of the two sketches, but we may have
9898
// more counts in the other to compare if these were scaled sketches
9999
if scale > 0. {
100-
let max_hash = u64::max_value() / scale.recip() as u64;
100+
let max_hash = u64::MAX / scale.recip() as u64;
101101
while query_hashes
102102
.get(i)
103103
.map(|kmer_count| kmer_count.hash < max_hash)
@@ -125,6 +125,37 @@ pub fn raw_distance(
125125
(containment, jaccard, common, total)
126126
}
127127

128+
/// This computes set statistics from one set of hashes to another.
129+
///
130+
/// Every hash in the reference set is considered while only those hashes in the
131+
/// query set that are in the same range as the reference set are compared. This
132+
/// should be a more accurate representation of the query set's containment in
133+
/// the reference set because we consider all of the reference set. In
134+
/// practice, there may be issues especially if the query is sketched to a
135+
/// different effective scale than the reference.
136+
pub fn old_distance(query_sketch: &[KmerCount], ref_sketch: &[KmerCount]) -> (f64, f64, u64, u64) {
137+
let mut i: usize = 0;
138+
let mut common: u64 = 0;
139+
let mut total: u64 = 0;
140+
141+
for ref_hash in ref_sketch {
142+
while (query_sketch[i].hash < ref_hash.hash) && (i < query_sketch.len() - 1) {
143+
i += 1;
144+
}
145+
146+
if query_sketch[i].hash == ref_hash.hash {
147+
common += 1;
148+
}
149+
150+
total += 1;
151+
}
152+
153+
// Numerator is A-intersect-B, |A| is the denominator, we enforce |A| == |B|
154+
let containment: f64 = common as f64 / total as f64;
155+
let jaccard: f64 = common as f64 / (common + 2 * (total - common)) as f64;
156+
(containment, jaccard, common, total)
157+
}
158+
128159
#[cfg(test)]
129160
mod tests {
130161
use super::*;
@@ -306,37 +337,6 @@ mod tests {
306337
}
307338
}
308339

309-
/// This computes set statistics from one set of hashes to another.
310-
///
311-
/// Every hash in the reference set is considered while only those hashes in the
312-
/// query set that are in the same range as the reference set are compared. This
313-
/// should be a more accurate representation of the query set's containment in
314-
/// the reference set because we consider all of the reference set. In
315-
/// practice, there may be issues especially if the query is sketched to a
316-
/// different effective scale than the reference.
317-
pub fn old_distance(query_sketch: &[KmerCount], ref_sketch: &[KmerCount]) -> (f64, f64, u64, u64) {
318-
let mut i: usize = 0;
319-
let mut common: u64 = 0;
320-
let mut total: u64 = 0;
321-
322-
for ref_hash in ref_sketch {
323-
while (query_sketch[i].hash < ref_hash.hash) && (i < query_sketch.len() - 1) {
324-
i += 1;
325-
}
326-
327-
if query_sketch[i].hash == ref_hash.hash {
328-
common += 1;
329-
}
330-
331-
total += 1;
332-
}
333-
334-
// Numerator is A-intersect-B, |A| is the denominator, we enforce |A| == |B|
335-
let containment: f64 = common as f64 / total as f64;
336-
let jaccard: f64 = common as f64 / (common + 2 * (total - common)) as f64;
337-
(containment, jaccard, common, total)
338-
}
339-
340340
// TODO: add another method like this to allow 0's in ref sketch for hashes present in sketches?
341341
// TODO: maybe we want to do NNLS on these matrices in Rust? for example code, see:
342342
// https://github.com/igmanthony/fnnls/blob/master/src/fnnls.rs

lib/src/filtering.rs

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,7 @@ impl FilterParams {
3131
Some(u32::max(l, sketch.filter_params.abun_filter.0.unwrap_or(0))),
3232
Some(u32::min(
3333
h,
34-
sketch
35-
.filter_params
36-
.abun_filter
37-
.1
38-
.unwrap_or(u32::max_value()),
34+
sketch.filter_params.abun_filter.1.unwrap_or(u32::MAX),
3935
)),
4036
),
4137
(Some(l), None) => (
@@ -46,11 +42,7 @@ impl FilterParams {
4642
None,
4743
Some(u32::min(
4844
h,
49-
sketch
50-
.filter_params
51-
.abun_filter
52-
.1
53-
.unwrap_or(u32::max_value()),
45+
sketch.filter_params.abun_filter.1.unwrap_or(u32::MAX),
5446
)),
5547
),
5648
(None, None) => (None, None),
@@ -341,7 +333,7 @@ pub fn filter_abundance(
341333
) -> Vec<KmerCount> {
342334
let mut filtered = Vec::new();
343335
let lo_threshold = low.unwrap_or(0u32);
344-
let hi_threshold = high.unwrap_or(u32::max_value());
336+
let hi_threshold = high.unwrap_or(u32::MAX);
345337
for kmer in sketch {
346338
if lo_threshold <= kmer.count && kmer.count <= hi_threshold {
347339
filtered.push(kmer.clone());

lib/src/serialization/json.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ impl<'de> Deserialize<'de> for QuotedU64 {
247247
{
248248
struct QuotedU64Visitor;
249249

250-
impl<'de> Visitor<'de> for QuotedU64Visitor {
250+
impl Visitor<'_> for QuotedU64Visitor {
251251
type Value = QuotedU64;
252252

253253
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {

lib/src/serialization/mod.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -151,13 +151,8 @@ pub fn write_finch_file(file: &mut dyn Write, sketches: &[Sketch]) -> FinchResul
151151
let mut cap_filter_params = cap_sketch.reborrow().init_filter_params();
152152
cap_filter_params.set_filtered(sketch.filter_params.filter_on.unwrap_or(false));
153153
cap_filter_params.set_low_abun_filter(sketch.filter_params.abun_filter.0.unwrap_or(0));
154-
cap_filter_params.set_high_abun_filter(
155-
sketch
156-
.filter_params
157-
.abun_filter
158-
.1
159-
.unwrap_or(::std::u32::MAX),
160-
);
154+
cap_filter_params
155+
.set_high_abun_filter(sketch.filter_params.abun_filter.1.unwrap_or(u32::MAX));
161156
cap_filter_params.set_err_filter(sketch.filter_params.err_filter);
162157
cap_filter_params.set_strand_filter(sketch.filter_params.strand_filter);
163158

0 commit comments

Comments
 (0)