add citations, references to libraries used

henrykrumb · henrykrumb · commit 8d3f71494289 · 2025-08-12T09:53:01.000+02:00
diff --git a/whitepaper/paper.bib b/whitepaper/paper.bib
@@ -1,4 +1,34 @@
-@article{kalkhof2023m3dnca,
+@article{hunter2007matplotlib,
+  title = {Matplotlib: {{A 2D}} Graphics Environment},
+  author = {Hunter, J. D.},
+  year = {2007},
+  journal = {Computing in Science \& Engineering},
+  volume = {9},
+  number = {3},
+  pages = {90--95},
+  publisher = {IEEE COMPUTER SOC},
+  doi = {10.1109/MCSE.2007.55},
+  abstract = {Matplotlib is a 2D graphics package used for Python for application development, interactive scripting, and publication-quality image generation across user interfaces and operating systems.}
+}
+
+@misc{jha2019kvasirseg,
+  title = {Kvasir-{{SEG}}: {{A Segmented Polyp Dataset}}},
+  shorttitle = {Kvasir-{{SEG}}},
+  author = {Jha, Debesh and Smedsrud, Pia H. and Riegler, Michael A. and Halvorsen, P{\aa}l and de Lange, Thomas and Johansen, Dag and Johansen, H{\aa}vard D.},
+  year = {2019},
+  month = nov,
+  number = {arXiv:1911.07069},
+  eprint = {1911.07069},
+  primaryclass = {eess},
+  publisher = {arXiv},
+  doi = {10.48550/arXiv.1911.07069},
+  urldate = {2025-08-12},
+  abstract = {Pixel-wise image segmentation is a highly demanding task in medical-image analysis. In practice, it is difficult to find annotated medical images with corresponding segmentation masks. In this paper, we present Kvasir-SEG: an open-access dataset of gastrointestinal polyp images and corresponding segmentation masks, manually annotated by a medical doctor and then verified by an experienced gastroenterologist. Moreover, we also generated the bounding boxes of the polyp regions with the help of segmentation masks. We demonstrate the use of our dataset with a traditional segmentation approach and a modern deep-learning based Convolutional Neural Network (CNN) approach. The dataset will be of value for researchers to reproduce results and compare methods. By adding segmentation masks to the Kvasir dataset, which only provide frame-wise annotations, we enable multimedia and computer vision researchers to contribute in the field of polyp segmentation and automatic analysis of colonoscopy images.},
+  archiveprefix = {arXiv},
+  keywords = {Computer Science - Computer Vision and Pattern Recognition,Electrical Engineering and Systems Science - Image and Video Processing}
+}
+
+@article{kalkhof2023m3dncaa,
   title = {{{M3D-NCA}}: {{Robust 3D Segmentation}} with {{Built-in Quality Control}}},
   author = {Kalkhof, John and Mukhopadhyay, A.},
   year = {2023},
@@ -29,10 +59,29 @@ @misc{kalkhof2024frequencytime
   urldate = {2025-07-10},
   abstract = {Despite considerable success, large Denoising Diffusion Models (DDMs) with UNet backbone pose practical challenges, particularly on limited hardware and in processing gigapixel images. To address these limitations, we introduce two Neural Cellular Automata (NCA)-based DDMs: Diff-NCA and FourierDiff-NCA. Capitalizing on the local communication capabilities of NCA, Diff-NCA significantly reduces the parameter counts of NCA-based DDMs. Integrating Fourier-based diffusion enables global communication early in the diffusion process. This feature is particularly valuable in synthesizing complex images with important global features, such as the CelebA dataset. We demonstrate that even a 331k parameter Diff-NCA can generate 512x512 pathology slices, while FourierDiff-NCA (1.1m parameters) reaches a three times lower FID score of 43.86, compared to the four times bigger UNet (3.94m parameters) with a score of 128.2. Additionally, FourierDiff-NCA can perform diverse tasks such as super-resolution, out-of-distribution image synthesis, and inpainting without explicit training.},
   archiveprefix = {arXiv},
-  keywords = {Application:Medical Imaging,Dataset:CelebA,Neural Cellular Automata,Subject:Digital Pathology,Task:Synthesis},
+  keywords = {Application:Medical Imaging,Dataset:CelebA,Neural Cellular Automata,Subject:Digital Pathology,Task:Synthesis}
+}
+
+@article{kalkhof2025parameterefficient,
+  title = {Parameter-Efficient Diffusion with Neural Cellular Automata},
+  author = {Kalkhof, John and K{\"u}hn, Arlene and Frisch, Yannik and Mukhopadhyay, Anirban},
+  year = {2025},
+  month = may,
+  journal = {npj Unconventional Computing},
+  volume = {2},
+  number = {1},
+  pages = {10},
+  publisher = {Nature Publishing Group},
+  issn = {3004-8672},
+  doi = {10.1038/s44335-025-00026-4},
+  urldate = {2025-07-10},
+  abstract = {Traditional Denoising Diffusion Models (DDMs) with UNet backbones are over-parameterized, compromising their effectiveness on limited hardware and in processing gigapixel images. To address this inefficiency, we introduce two Neural Cellular Automata (NCA)-based DDMs: Diff-NCA and FourierDiff-NCA. Leveraging the efficient local communication of NCA, Diff-NCA drastically reduces parameter counts, effectively generating 512 {\texttimes} 512 pathology slices with just 336k parameters. Extending this approach, FourierDiff-NCA integrates Fourier-based diffusion to facilitate early global communication, essential for handling complex datasets such as CelebA. With only 1.1 m parameters, it achieves a more than two times lower FID score of 49.48 compared to the four times larger UNet, which scores 128.2. This performance disparity underscores the utility of NCA-based methods in enhancing parameter efficiency. FourierDiff-NCA also demonstrates versatility by performing tasks such as super-resolution, out-of-distribution image synthesis, and inpainting without task-specific training.},
+  copyright = {2025 The Author(s)},
+  langid = {english},
+  keywords = {Application:Medical Imaging,Dataset:BCSS,Dataset:CelebA,Neural Cellular Automata,Subject:Digital Pathology,tagme,Task:Image Synthesis}
 }
 
-@article{krumb2025encapsulate,
+@article{krumb2025encapsulatea,
   title = {{{eNCApsulate}}: Neural Cellular Automata for Precision Diagnosis on Capsule Endoscopes},
   shorttitle = {{{eNCApsulate}}},
   author = {Krumb, Henry John and Mukhopadhyay, Anirban},
@@ -44,7 +93,40 @@ @article{krumb2025encapsulate
   urldate = {2025-07-10},
   abstract = {Wireless capsule endoscopy (WCE) is a noninvasive imaging method for the entire gastrointestinal tract and is a pain-free alternative to traditional endoscopy. It generates extensive video data that requires significant review time, and localizing the capsule after ingestion is a challenge. Techniques like bleeding detection and depth estimation can help with localization of pathologies, but deep learning models are typically too large to run directly on the capsule.},
   langid = {english},
-  keywords = {Application:Medical Imaging,Dataset:KID2,Dataset:KvasirCapsule,Modality:WCE,Neural Cellular Automata,Subject:Intestine,Task:Depth Estimation,Task:Segmentation},
+  keywords = {Application:Medical Imaging,Dataset:KID2,Dataset:KvasirCapsule,Modality:WCE,Neural Cellular Automata,Subject:Intestine,Task:Depth Estimation,Task:Segmentation}
+}
+
+@article{mordvintsev2020growingb,
+  title = {Growing {{Neural Cellular Automata}}},
+  author = {Mordvintsev, Alexander and Randazzo, Ettore and Niklasson, Eyvind and Levin, Michael},
+  year = {2020},
+  month = feb,
+  journal = {Distill},
+  volume = {5},
+  number = {2},
+  pages = {e23},
+  issn = {2476-0757},
+  doi = {10.23915/distill.00023},
+  urldate = {2025-08-12},
+  abstract = {Training an end-to-end differentiable, self-organising cellular automata model of morphogenesis, able to both grow and regenerate specific patterns.},
+  langid = {english}
+}
+
+@misc{paszke2019pytorch,
+  title = {{{PyTorch}}: {{An Imperative Style}}, {{High-Performance Deep Learning Library}}},
+  shorttitle = {{{PyTorch}}},
+  author = {Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and Desmaison, Alban and K{\"o}pf, Andreas and Yang, Edward and DeVito, Zach and Raison, Martin and Tejani, Alykhan and Chilamkurthy, Sasank and Steiner, Benoit and Fang, Lu and Bai, Junjie and Chintala, Soumith},
+  year = {2019},
+  month = dec,
+  number = {arXiv:1912.01703},
+  eprint = {1912.01703},
+  primaryclass = {cs},
+  publisher = {arXiv},
+  doi = {10.48550/arXiv.1912.01703},
+  urldate = {2025-08-12},
+  abstract = {Deep learning frameworks have often focused on either usability or speed, but not both. PyTorch is a machine learning library that shows that these two goals are in fact compatible: it provides an imperative and Pythonic programming style that supports code as a model, makes debugging easy and is consistent with other popular scientific computing libraries, while remaining efficient and supporting hardware accelerators such as GPUs. In this paper, we detail the principles that drove the implementation of PyTorch and how they are reflected in its architecture. We emphasize that every aspect of PyTorch is a regular Python program under the full control of its user. We also explain how the careful and pragmatic implementation of the key components of its runtime enables them to work together to achieve compelling performance. We demonstrate the efficiency of individual subsystems, as well as the overall speed of PyTorch on several common benchmarks.},
+  archiveprefix = {arXiv},
+  keywords = {Computer Science - Machine Learning,Computer Science - Mathematical Software,Statistics - Machine Learning}
 }
 
 @article{ranem2024ncamorph,
@@ -55,3 +137,9 @@ @article{ranem2024ncamorph
   volume = {abs/2410.22265},
   keywords = {Application:Medical Imaging,Neural Cellular Automata,tagme,Task:Registration}
 }
+
+@misc{torcheval,
+  title = {{{TorchEval}} --- {{TorchEval}} Main Documentation},
+  urldate = {2025-08-12},
+  howpublished = {https://docs.pytorch.org/torcheval/stable/}
+}
diff --git a/whitepaper/paper.md b/whitepaper/paper.md
@@ -13,6 +13,8 @@ authors:
   - name: Richard Sattel
     orcid: 0009-0003-1060-3462
     affiliation: 1
+  - name: Jonathan Dewenter
+    affiliation: 1
   - name: Dennis Grotz
     affiliation: 1
   - name: Anirban Mukhopadhyay
@@ -32,26 +34,25 @@ bibliography: paper.bib
 
 # Summary
 
-
 Neural Cellular Automata (NCA) are lightweight neural network models that can be employed in various image analysis tasks such as image segmentation, classification and generation.
-These models are recently getting attention in the medical imaging community, thanks to their small size, their robustness and their overall versatility.
-In terms of accuracy, they are often on-par with state-of-the art models, while being orders of magnitude smaller in size.
+Initially proposed in 2020 [@mordvintsev2020growingb], these models are recently getting attention thanks to their small size, their robustness and their overall versatility.
+In terms of accuracy, they are often on-par with state-of-the art models for the respective downstream task, while being orders of magnitude smaller in size.
 However, the training dynamics of NCAs are not yet fully understood, and there is potential for investigating practical tweaks to increase accuracy, reduce VRAM requirements and increase the overall training stability.
 `NCALab` provides a unified and extensible research framework for training and evaluating NCAs, conducting hyperparameter searches and prototyping applications that build on NCAs for image processing.
 
 
 # Statement of Need
 
-NCAs are recently gaining attention in medical imaging, where they are deployed for various modalities in different downstream tasks, including 3D prostate segmentation on MRI [@kalkhof2023mednca] [@kalkhof2023m3dnca], image registration [@ranem2024ncamorph] or image synthesis [@kalkhof2024frequencytime,@kalkhof2025parameterefficient].
+NCAs are recently gaining attention especially in medical imaging, where they are deployed for various modalities in different downstream tasks, including 3D prostate segmentation on MRI [@kalkhof2023mednca] [@kalkhof2023m3dncaa], image registration [@ranem2024ncamorph] or image synthesis [@kalkhof2024frequencytime,@kalkhof2025parameterefficient].
 In most cases, they outperform other Convolutional Neural Network or Vision Transformer architectures in terms of model size and robustness, while yielding similarly accurate predictions.
 However, there is no unified framework or reference implementation for training, evaluating and experimentation with NCAs.
 
 Research code for Neural Cellular Automata is typically organized in individual repositories for each downstream task under investigation.
-Code bases often follow different approaches, even though the underlying architecture is universal; in most cases, it can be defined by the number of input channels, hidden channels and output channels and the weights of the trained network.
+Code bases often follow different approaches, even though the underlying architecture is in most parts universal; in most cases, it can be defined by the number of input channels, hidden channels and output channels and the weights of the trained network.
 
 The goal of NCALab is to provide a uniform and easy-to-use code base for various downstream tasks with NCAs in a shared project.
 Within minutes, researchers and practitioners should be able to create prototypes for their ideas, inspired by the numerous example tasks provided in this code repository.
-Code quality is ensured by unit tests and automated static code analysis through mypy (type checking) and flake8 (linting).
+Code quality is ensured by unit tests and automated static code analysis through mypy (type checking) and ruff (linting).
 
 # Features
 
@@ -60,8 +61,7 @@ NCALab provides dedicated models and example tasks for recurring image analysis
 * Growing Neural Cellular Automata for emoji generation and other experiments
 * Pixel-wise image segmentation
 * Image classification
-* Per-pixel image classification
-* Monocular depth estimation
+* Per-pixel image classification (MNIST digits)
 
 Until now, NCALab provides the following key features:
 
@@ -73,20 +73,15 @@ Until now, NCALab provides the following key features:
 * Visualization and animation of the NCA inference process
 
 
-# Example use case: Training and Fine-Tuning
-
-```python
-
-```
-
-
 # Ongoing Research
 
-A conference paper utilizing NCALab was recently accepted for presentation in [IPCAI 2025](https://ipcai.org), and was published in the _International Journal of Computer-Assisted Radiology and Surgery_ (@krumb2025encapsulate).
+A conference paper utilizing NCALab was recently accepted for presentation in [IPCAI 2025](https://ipcai.org), and was published in the _International Journal of Computer-Assisted Radiology and Surgery_ [@krumb2025encapsulatea].
 
 
 # Acknowledgements
 
 This work is partially supported by Norwegian Research Council project number 322600 (Capsnetwork).
 
+NCALab depends on pytorch [@paszke2019pytorch], numpy, matplotlib [@hunter2007matplotlib].
+
 # References