pfizer-opensource · jorenretel · Nov 12, 2025 · Nov 10, 2025 · Nov 10, 2025 · Nov 10, 2025
@@ -0,0 +1,35 @@
+name: Check pre-commit hooks
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+  workflow_dispatch:
+
+env:
+  FORCE_COLOR: "1"
+
+jobs:
+  tests-using-pixi:
+    timeout-minutes: 10
+    runs-on: ubuntu-latest
+    steps:
+    - name: Check out the repository
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+
+    - name: Install Pixi
+      uses: prefix-dev/[email protected]
+      with:
+        pixi-version: "latest"
+        run-install: false
+
+    - name: Install pre-commit
+      run: pixi global install pre-commit
+
+    - name: install pre-commit hooks
+      run: pre-commit install
+
+    - name: Run pre-commit hooks
+      run: pre-commit run --all --show-diff-on-failure
@@ -0,0 +1,38 @@
+name: Tests-With-Pixi
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+  workflow_dispatch:
+
+env:
+  FORCE_COLOR: "1"
+
+jobs:
+  tests-using-pixi:
+    timeout-minutes: 10
+    runs-on: ubuntu-22.04-gpu-t4
+    steps:
+    - name: Check out the repository
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+
+    - name: run nvidia-smi
+      run: nvidia-smi
+
+    - name: Install Pixi
+      uses: prefix-dev/[email protected]
+      with:
+        pixi-version: "latest"
+        run-install: false
+
+    - name: Install pre-commit
+      run: pixi global install pre-commit
+
+    - name: Install dev environment using pixi
+      run: pixi install --environment dev
+
+    - name: Run tests
+      run: pixi run --environment dev test
@@ -29,16 +29,16 @@ repos:
     hooks:
     -   id: mypy
         additional_dependencies:
-        -    pydantic==2.1.1
-        -    pydantic-settings==2.0.3
+        -    pydantic==2.12.4
+        -    pydantic-settings==2.12.0
         exclude: "^(build|docs|tests|benchmark|examples)"
 -   repo: https://github.com/asottile/pyupgrade
     rev: v3.10.1
     hooks:
     -   id: pyupgrade
         args: [--py37-plus, --keep-runtime-typing]
 -   repo: https://github.com/PyCQA/bandit
-    rev: '1.7.5'
+    rev: '1.8.6'
     hooks:
     - id: bandit
       args: [ "-c", "pyproject.toml" ]

@@ -3,8 +3,33 @@
 Fast batched dataloading of BigWig files containing epigentic track data and corresponding sequences powered by GPU
 for deep learning applications.
 
+> ⚠️ **BREAKING CHANGE (v0.3.0+)**: The output matrix dimensionality has changed from `(n_tracks, batch_size, sequence_length)` to `(batch_size, sequence_length, n_tracks)`. This change was long overdue and eliminates the need for (potentially memory expensive) transpose operations downstream. If you're upgrading from an earlier version, please update your code accordingly (probaby you need to delete one transpose in your code).
+
+> ✨ **NEW FEATURE (v0.3.0+)**: Full `bfloat16` support! You can now specify `dtype="bfloat16"` to get output tensors in bfloat16 format, reducing memory usage by 50%.
+
+
+
+
 ## Quickstart
 
+### Installation with Pixi
+Using [pixi](https://pixi.sh/) to install bigwig-loader is highly recommended.
+Please take a look at the pixi.toml file. If you just want to use bigwig-loader, just
+copy that pixi.toml, add the other libraries you need and use the "prod" environment
+(you don't need to clone this repo, pixi will download bigwig-loader from the
+conda "dataloading" channel):
+
+*   Install pixi, if not installed:
+    ```shell
+    curl -fsSL https://pixi.sh/install.sh | sh
+    ```
+
+* change directory to wherever you put the pixi.toml, and:
+    ```shell
+    pixi run -e prod <my_training_command>
+    ```
+
+
 ### Installation with conda/mamba
 
 Bigwig-loader mainly depends on the rapidsai kvikio library and cupy, both of which are best installed using
@@ -65,16 +90,17 @@ dataset = PytorchBigWigDataset(
     regions_of_interest=train_regions,
     collection=example_bigwigs_directory,
     reference_genome_path=reference_genome_file,
-    sequence_length=1000,
-    center_bin_to_predict=500,
+    sequence_length=1000,000,
+    center_bin_to_predict=500,000,
     window_size=1,
-    batch_size=32,
-    super_batch_size=1024,
-    batches_per_epoch=20,
+    batch_size=1,
+    super_batch_size=4,
+    batches_per_epoch=100,
     maximum_unknown_bases_fraction=0.1,
     sequence_encoder="onehot",
     n_threads=4,
     return_batch_objects=True,
+    dtype="bfloat16"
 )
 
 # Don't use num_workers > 0 in DataLoader. The heavy
@@ -88,7 +114,7 @@ class MyTerribleModel(torch.nn.Module):
         self.linear = torch.nn.Linear(4, 2)
 
     def forward(self, batch):
-        return self.linear(batch).transpose(1, 2)
+        return self.linear(batch)
 
 
 model = MyTerribleModel()
@@ -98,10 +124,10 @@ def poisson_loss(pred, target):
     return (pred - target * torch.log(pred.clamp(min=1e-8))).mean()
 
 for batch in dataloader:
-    # batch.sequences.shape = n_batch (32), sequence_length (1000), onehot encoding (4)
+    # batch.sequences.shape = n_batch x sequence_length x onehot encoding (4)
     pred = model(batch.sequences)
-    # batch.values.shape = n_batch (32), n_tracks (2) center_bin_to_predict (500)
-    loss = poisson_loss(pred[:, :, 250:750], batch.values)
+    # batch.values.shape = n_batch x center_bin_to_predict x n_tracks
+    loss = poisson_loss(pred[:, 250000:750000, :], batch.values)
     print(loss)
     optimizer.zero_grad()
     loss.backward()
@@ -166,19 +192,23 @@ anything is unclear, please open an issue.
 
 ### Environment
 
+The pixi.toml includes a dev environment that has bigwig-loader installed
+as an editable pypi dependency.
+
 1. `git clone [email protected]:pfizer-opensource/bigwig-loader`
 2. `cd bigwig-loader`
-3. create the conda environment" `conda env create -f environment.yml`
-4. `pip install -e '.[dev]'`
-5. run `pre-commit install` to install the pre-commit hooks
+3. optional: `pixi install -e dev`
+4. run `pre-commit install` to install the pre-commit hooks
 
 ### Run Tests
 Tests are in the tests directory. One of the most important tests is
 test_against_pybigwig which makes sure that if there is a mistake in
 pyBigWIg, it is also in bigwig-loader.
 
+In order to run these tests you need gpu.
+
 ```shell
-pytest -vv .
+pixi run -e dev test
 ```
 
 When github runners with GPU's will become available we would also

@@ -2,6 +2,7 @@
 
 # from typing import TYPE_CHECKING
 from typing import Callable
+from typing import Literal
 from typing import Optional
 from typing import Sequence
 from typing import Union
@@ -12,6 +13,7 @@
 
 from bigwig_loader.bigwig import BigWig
 from bigwig_loader.decompressor import Decoder
+from bigwig_loader.default_value import replace_out_tensor_if_needed
 from bigwig_loader.functional import load_decode_search
 from bigwig_loader.intervals_to_values import intervals_to_values
 from bigwig_loader.memory_bank import MemoryBank
@@ -59,19 +61,29 @@ def decoder(self) -> Decoder:
     def memory_bank(self) -> MemoryBank:
         return MemoryBank(elastic=True)
 
-    def _get_out_tensor(self, batch_size: int, sequence_length: int) -> cp.ndarray:
+    def _get_out_tensor(
+        self,
+        batch_size: int,
+        sequence_length: int,
+        dtype: Literal["bfloat16", "float32"] = "float32",
+    ) -> cp.ndarray:
         """Resuses a reserved tensor if possible (when out shape is constant),
         otherwise creates a new one.
         args:
             batch_size: batch size
             sequence_length: length of genomic sequence
+            dtype: output dtype ('float32' or 'bfloat16')
          returns:
-            tensor of shape (number of bigwig files, batch_size, sequence_length)
+            tensor of shape (batch_size, sequence_length, number of bigwig files)
         """
 
-        shape = (len(self._bigwigs), batch_size, sequence_length)
-        if self._out.shape != shape:
-            self._out = cp.zeros(shape, dtype=cp.float32)
+        self._out = replace_out_tensor_if_needed(
+            self._out,
+            batch_size=batch_size,
+            sequence_length=sequence_length,
+            number_of_tracks=len(self._bigwigs),
+            dtype=dtype,
+        )
         return self._out
 
     def preprocess(
@@ -105,6 +117,7 @@ def get_batch(
         window_size: int = 1,
         scaling_factors_cupy: Optional[cp.ndarray] = None,
         default_value: float = 0.0,
+        dtype: Literal["float32", "bfloat16"] = "float32",
         out: Optional[cp.ndarray] = None,
     ) -> cp.ndarray:
         (
@@ -139,9 +152,10 @@ def get_batch(
             query_ends=abs_end,
             window_size=window_size,
             default_value=default_value,
+            dtype=dtype,
             out=out,
         )
-        batch = cp.transpose(out, (1, 0, 2))
+        # batch = cp.transpose(out, (1, 0, 2))
         if scaling_factors_cupy is not None:
-            batch *= scaling_factors_cupy
-        return batch
+            out *= scaling_factors_cupy
+        return out
@@ -424,7 +424,7 @@ def _guess_max_rows_per_chunk(
         data_offsets = self.rtree_leaf_nodes["data_offset"]
         data_sizes = self.rtree_leaf_nodes["data_size"]
         if len(data_offsets) > sample_size:
-            sample_indices = sample(range(len(data_offsets)), sample_size)
+            sample_indices = sample(range(len(data_offsets)), sample_size)  # nosec
             data_offsets = data_offsets[sample_indices]
             data_sizes = data_sizes[sample_indices]
 

@@ -97,7 +97,7 @@ def reset_gpu(self) -> None:
         need to be recreated on the new gpu.
         """
 
-        self._out = cp.zeros((len(self), 1, 1), dtype=cp.float32)
+        # self._out = cp.zeros(1, (len(self), 1), dtype=cp.float32)
         if "decoder" in self.__dict__:
             del self.__dict__["decoder"]
         if "memory_bank" in self.__dict__:
@@ -131,7 +131,7 @@ def batch_processor(self) -> BatchProcessor:
     @cached_property
     def scaling_factors_cupy(self) -> cp.ndarray:
         return cp.asarray(self._scaling_factors, dtype=cp.float32).reshape(
-            1, len(self._scaling_factors), 1
+            1, 1, len(self._scaling_factors)
         )
 
     def get_batch(

@@ -87,6 +87,12 @@ class BigWigDataset:
             tracks in case sub_sample_tracks is set. Should be Iterable batches of track indices.
         return_batch_objects: if True, the batches will be returned as instances of
             bigwig_loader.batch.Batch
+        dtype: float32 or bfloat16 output encoding of the target values (not the sequence encoding).
+            Cupy does not support bfloat16 yet, but the cuda kernel that creates the target values
+            does. When bfloat16 is choosen, the cupy array will show to have the data type uint16
+            which can, for example, be converted to a torch.bfloat16 by
+            torch_tensor = torch.as_tensor(out) # torch uint16
+            torch_tensor = torch_tensor.view(torch.bfloat16)    # Reinterpret as bfloat16
     """
 
     def __init__(
@@ -107,7 +113,7 @@ def __init__(
         ] = "onehot",
         file_extensions: Sequence[str] = (".bigWig", ".bw"),
         crawl: bool = True,
-        scale: Optional[dict[Union[str | Path], Any]] = None,
+        scale: Optional[dict[Union[str, Path], Any]] = None,
         default_value: float = 0.0,
         first_n_files: Optional[int] = None,
         position_sampler_buffer_size: int = 100000,
@@ -117,6 +123,7 @@ def __init__(
         custom_position_sampler: Optional[Iterable[tuple[str, int]]] = None,
         custom_track_sampler: Optional[Iterable[list[int]]] = None,
         return_batch_objects: bool = False,
+        dtype: Literal["float32", "bfloat16"] = "float32",
     ):
         super().__init__()
 
@@ -176,6 +183,8 @@ def __init__(
         else:
             self._track_sampler = None
 
+        self._dtype = dtype
+
     def _create_dataloader(self) -> StreamedDataloader:
         sequence_sampler = GenomicSequenceSampler(
             reference_genome_path=self.reference_genome_path,
@@ -199,6 +208,7 @@ def _create_dataloader(self) -> StreamedDataloader:
             slice_size=self.batch_size,
             window_size=self.window_size,
             default_value=self._default_value,
+            dtype=self._dtype,
         )
 
     def __iter__(