You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: whitepaper/paper.bib
+92-4Lines changed: 92 additions & 4 deletions
Original file line number
Diff line number
Diff line change
@@ -1,4 +1,34 @@
1
-
@article{kalkhof2023m3dnca,
1
+
@article{hunter2007matplotlib,
2
+
title = {Matplotlib: {{A 2D}} Graphics Environment},
3
+
author = {Hunter, J. D.},
4
+
year = {2007},
5
+
journal = {Computing in Science \& Engineering},
6
+
volume = {9},
7
+
number = {3},
8
+
pages = {90--95},
9
+
publisher = {IEEE COMPUTER SOC},
10
+
doi = {10.1109/MCSE.2007.55},
11
+
abstract = {Matplotlib is a 2D graphics package used for Python for application development, interactive scripting, and publication-quality image generation across user interfaces and operating systems.}
12
+
}
13
+
14
+
@misc{jha2019kvasirseg,
15
+
title = {Kvasir-{{SEG}}: {{A Segmented Polyp Dataset}}},
16
+
shorttitle = {Kvasir-{{SEG}}},
17
+
author = {Jha, Debesh and Smedsrud, Pia H. and Riegler, Michael A. and Halvorsen, P{\aa}l and de Lange, Thomas and Johansen, Dag and Johansen, H{\aa}vard D.},
18
+
year = {2019},
19
+
month = nov,
20
+
number = {arXiv:1911.07069},
21
+
eprint = {1911.07069},
22
+
primaryclass = {eess},
23
+
publisher = {arXiv},
24
+
doi = {10.48550/arXiv.1911.07069},
25
+
urldate = {2025-08-12},
26
+
abstract = {Pixel-wise image segmentation is a highly demanding task in medical-image analysis. In practice, it is difficult to find annotated medical images with corresponding segmentation masks. In this paper, we present Kvasir-SEG: an open-access dataset of gastrointestinal polyp images and corresponding segmentation masks, manually annotated by a medical doctor and then verified by an experienced gastroenterologist. Moreover, we also generated the bounding boxes of the polyp regions with the help of segmentation masks. We demonstrate the use of our dataset with a traditional segmentation approach and a modern deep-learning based Convolutional Neural Network (CNN) approach. The dataset will be of value for researchers to reproduce results and compare methods. By adding segmentation masks to the Kvasir dataset, which only provide frame-wise annotations, we enable multimedia and computer vision researchers to contribute in the field of polyp segmentation and automatic analysis of colonoscopy images.},
27
+
archiveprefix = {arXiv},
28
+
keywords = {Computer Science - Computer Vision and Pattern Recognition,Electrical Engineering and Systems Science - Image and Video Processing}
29
+
}
30
+
31
+
@article{kalkhof2023m3dncaa,
2
32
title = {{{M3D-NCA}}: {{Robust 3D Segmentation}} with {{Built-in Quality Control}}},
abstract = {Despite considerable success, large Denoising Diffusion Models (DDMs) with UNet backbone pose practical challenges, particularly on limited hardware and in processing gigapixel images. To address these limitations, we introduce two Neural Cellular Automata (NCA)-based DDMs: Diff-NCA and FourierDiff-NCA. Capitalizing on the local communication capabilities of NCA, Diff-NCA significantly reduces the parameter counts of NCA-based DDMs. Integrating Fourier-based diffusion enables global communication early in the diffusion process. This feature is particularly valuable in synthesizing complex images with important global features, such as the CelebA dataset. We demonstrate that even a 331k parameter Diff-NCA can generate 512x512 pathology slices, while FourierDiff-NCA (1.1m parameters) reaches a three times lower FID score of 43.86, compared to the four times bigger UNet (3.94m parameters) with a score of 128.2. Additionally, FourierDiff-NCA can perform diverse tasks such as super-resolution, out-of-distribution image synthesis, and inpainting without explicit training.},
title = {Parameter-Efficient Diffusion with Neural Cellular Automata},
67
+
author = {Kalkhof, John and K{\"u}hn, Arlene and Frisch, Yannik and Mukhopadhyay, Anirban},
68
+
year = {2025},
69
+
month = may,
70
+
journal = {npj Unconventional Computing},
71
+
volume = {2},
72
+
number = {1},
73
+
pages = {10},
74
+
publisher = {Nature Publishing Group},
75
+
issn = {3004-8672},
76
+
doi = {10.1038/s44335-025-00026-4},
77
+
urldate = {2025-07-10},
78
+
abstract = {Traditional Denoising Diffusion Models (DDMs) with UNet backbones are over-parameterized, compromising their effectiveness on limited hardware and in processing gigapixel images. To address this inefficiency, we introduce two Neural Cellular Automata (NCA)-based DDMs: Diff-NCA and FourierDiff-NCA. Leveraging the efficient local communication of NCA, Diff-NCA drastically reduces parameter counts, effectively generating 512 {\texttimes} 512 pathology slices with just 336k parameters. Extending this approach, FourierDiff-NCA integrates Fourier-based diffusion to facilitate early global communication, essential for handling complex datasets such as CelebA. With only 1.1 m parameters, it achieves a more than two times lower FID score of 49.48 compared to the four times larger UNet, which scores 128.2. This performance disparity underscores the utility of NCA-based methods in enhancing parameter efficiency. FourierDiff-NCA also demonstrates versatility by performing tasks such as super-resolution, out-of-distribution image synthesis, and inpainting without task-specific training.},
title = {{{eNCApsulate}}: Neural Cellular Automata for Precision Diagnosis on Capsule Endoscopes},
37
86
shorttitle = {{{eNCApsulate}}},
38
87
author = {Krumb, Henry John and Mukhopadhyay, Anirban},
@@ -44,7 +93,40 @@ @article{krumb2025encapsulate
44
93
urldate = {2025-07-10},
45
94
abstract = {Wireless capsule endoscopy (WCE) is a noninvasive imaging method for the entire gastrointestinal tract and is a pain-free alternative to traditional endoscopy. It generates extensive video data that requires significant review time, and localizing the capsule after ingestion is a challenge. Techniques like bleeding detection and depth estimation can help with localization of pathologies, but deep learning models are typically too large to run directly on the capsule.},
author = {Mordvintsev, Alexander and Randazzo, Ettore and Niklasson, Eyvind and Levin, Michael},
102
+
year = {2020},
103
+
month = feb,
104
+
journal = {Distill},
105
+
volume = {5},
106
+
number = {2},
107
+
pages = {e23},
108
+
issn = {2476-0757},
109
+
doi = {10.23915/distill.00023},
110
+
urldate = {2025-08-12},
111
+
abstract = {Training an end-to-end differentiable, self-organising cellular automata model of morphogenesis, able to both grow and regenerate specific patterns.},
112
+
langid = {english}
113
+
}
114
+
115
+
@misc{paszke2019pytorch,
116
+
title = {{{PyTorch}}: {{An Imperative Style}}, {{High-Performance Deep Learning Library}}},
117
+
shorttitle = {{{PyTorch}}},
118
+
author = {Paszke, Adam and Gross, Sam and Massa, Francisco and Lerer, Adam and Bradbury, James and Chanan, Gregory and Killeen, Trevor and Lin, Zeming and Gimelshein, Natalia and Antiga, Luca and Desmaison, Alban and K{\"o}pf, Andreas and Yang, Edward and DeVito, Zach and Raison, Martin and Tejani, Alykhan and Chilamkurthy, Sasank and Steiner, Benoit and Fang, Lu and Bai, Junjie and Chintala, Soumith},
119
+
year = {2019},
120
+
month = dec,
121
+
number = {arXiv:1912.01703},
122
+
eprint = {1912.01703},
123
+
primaryclass = {cs},
124
+
publisher = {arXiv},
125
+
doi = {10.48550/arXiv.1912.01703},
126
+
urldate = {2025-08-12},
127
+
abstract = {Deep learning frameworks have often focused on either usability or speed, but not both. PyTorch is a machine learning library that shows that these two goals are in fact compatible: it provides an imperative and Pythonic programming style that supports code as a model, makes debugging easy and is consistent with other popular scientific computing libraries, while remaining efficient and supporting hardware accelerators such as GPUs. In this paper, we detail the principles that drove the implementation of PyTorch and how they are reflected in its architecture. We emphasize that every aspect of PyTorch is a regular Python program under the full control of its user. We also explain how the careful and pragmatic implementation of the key components of its runtime enables them to work together to achieve compelling performance. We demonstrate the efficiency of individual subsystems, as well as the overall speed of PyTorch on several common benchmarks.},
Copy file name to clipboardExpand all lines: whitepaper/paper.md
+11-16Lines changed: 11 additions & 16 deletions
Display the source diff
Display the rich diff
Original file line number
Diff line number
Diff line change
@@ -13,6 +13,8 @@ authors:
13
13
- name: Richard Sattel
14
14
orcid: 0009-0003-1060-3462
15
15
affiliation: 1
16
+
- name: Jonathan Dewenter
17
+
affiliation: 1
16
18
- name: Dennis Grotz
17
19
affiliation: 1
18
20
- name: Anirban Mukhopadhyay
@@ -32,26 +34,25 @@ bibliography: paper.bib
32
34
33
35
# Summary
34
36
35
-
36
37
Neural Cellular Automata (NCA) are lightweight neural network models that can be employed in various image analysis tasks such as image segmentation, classification and generation.
37
-
These models are recently getting attention in the medical imaging community, thanks to their small size, their robustness and their overall versatility.
38
-
In terms of accuracy, they are often on-par with state-of-the art models, while being orders of magnitude smaller in size.
38
+
Initially proposed in 2020 [@mordvintsev2020growingb], these models are recently getting attention thanks to their small size, their robustness and their overall versatility.
39
+
In terms of accuracy, they are often on-par with state-of-the art models for the respective downstream task, while being orders of magnitude smaller in size.
39
40
However, the training dynamics of NCAs are not yet fully understood, and there is potential for investigating practical tweaks to increase accuracy, reduce VRAM requirements and increase the overall training stability.
40
41
`NCALab` provides a unified and extensible research framework for training and evaluating NCAs, conducting hyperparameter searches and prototyping applications that build on NCAs for image processing.
41
42
42
43
43
44
# Statement of Need
44
45
45
-
NCAs are recently gaining attention in medical imaging, where they are deployed for various modalities in different downstream tasks, including 3D prostate segmentation on MRI [@kalkhof2023mednca][@kalkhof2023m3dnca], image registration [@ranem2024ncamorph] or image synthesis [@kalkhof2024frequencytime,@kalkhof2025parameterefficient].
46
+
NCAs are recently gaining attention especially in medical imaging, where they are deployed for various modalities in different downstream tasks, including 3D prostate segmentation on MRI [@kalkhof2023mednca][@kalkhof2023m3dncaa], image registration [@ranem2024ncamorph] or image synthesis [@kalkhof2024frequencytime,@kalkhof2025parameterefficient].
46
47
In most cases, they outperform other Convolutional Neural Network or Vision Transformer architectures in terms of model size and robustness, while yielding similarly accurate predictions.
47
48
However, there is no unified framework or reference implementation for training, evaluating and experimentation with NCAs.
48
49
49
50
Research code for Neural Cellular Automata is typically organized in individual repositories for each downstream task under investigation.
50
-
Code bases often follow different approaches, even though the underlying architecture is universal; in most cases, it can be defined by the number of input channels, hidden channels and output channels and the weights of the trained network.
51
+
Code bases often follow different approaches, even though the underlying architecture is in most parts universal; in most cases, it can be defined by the number of input channels, hidden channels and output channels and the weights of the trained network.
51
52
52
53
The goal of NCALab is to provide a uniform and easy-to-use code base for various downstream tasks with NCAs in a shared project.
53
54
Within minutes, researchers and practitioners should be able to create prototypes for their ideas, inspired by the numerous example tasks provided in this code repository.
54
-
Code quality is ensured by unit tests and automated static code analysis through mypy (type checking) and flake8 (linting).
55
+
Code quality is ensured by unit tests and automated static code analysis through mypy (type checking) and ruff (linting).
55
56
56
57
# Features
57
58
@@ -60,8 +61,7 @@ NCALab provides dedicated models and example tasks for recurring image analysis
60
61
* Growing Neural Cellular Automata for emoji generation and other experiments
61
62
* Pixel-wise image segmentation
62
63
* Image classification
63
-
* Per-pixel image classification
64
-
* Monocular depth estimation
64
+
* Per-pixel image classification (MNIST digits)
65
65
66
66
Until now, NCALab provides the following key features:
67
67
@@ -73,20 +73,15 @@ Until now, NCALab provides the following key features:
73
73
* Visualization and animation of the NCA inference process
74
74
75
75
76
-
# Example use case: Training and Fine-Tuning
77
-
78
-
```python
79
-
80
-
```
81
-
82
-
83
76
# Ongoing Research
84
77
85
-
A conference paper utilizing NCALab was recently accepted for presentation in [IPCAI 2025](https://ipcai.org), and was published in the _International Journal of Computer-Assisted Radiology and Surgery_(@krumb2025encapsulate).
78
+
A conference paper utilizing NCALab was recently accepted for presentation in [IPCAI 2025](https://ipcai.org), and was published in the _International Journal of Computer-Assisted Radiology and Surgery_[@krumb2025encapsulatea].
86
79
87
80
88
81
# Acknowledgements
89
82
90
83
This work is partially supported by Norwegian Research Council project number 322600 (Capsnetwork).
91
84
85
+
NCALab depends on pytorch [@paszke2019pytorch], numpy, matplotlib [@hunter2007matplotlib].
0 commit comments