diff --git a/README.md b/README.md
index e1bbdc1219..ff815f7cbc 100644
--- a/README.md
+++ b/README.md
@@ -5,6 +5,7 @@
 [![DOI](https://zenodo.org/badge/108329371.svg)](https://zenodo.org/badge/latestdoi/108329371)
 [![PyPI version](https://badge.fury.io/py/hls4ml.svg)](https://badge.fury.io/py/hls4ml)
 [![Supported Python versions](https://img.shields.io/pypi/pyversions/hls4ml.svg)](https://pypi.org/project/hls4ml/)
+[![Documentation Status](https://github.com/fastmachinelearning/hls4ml/actions/workflows/build-sphinx.yml/badge.svg)](https://fastmachinelearning.org/hls4ml)
 
 A package for machine learning inference in FPGAs. We create firmware implementations of machine learning algorithms using high level synthesis language (HLS). We translate traditional open-source machine learning package models into HLS that can be configured for your use-case!
 
@@ -17,13 +18,13 @@ For more information visit the webpage: [https://fastmachinelearning.org/hls4ml/
 Detailed tutorials on how to use `hls4ml`'s various functionalities can be found [here](https://github.com/hls-fpga-machine-learning/hls4ml-tutorial).
 
 # Installation
-```
+```bash
 pip install hls4ml
 ```
 
-To install the extra dependencies for profiling: 
+To install the extra dependencies for profiling:
 
-```
+```bash
 pip install hls4ml[profiling]
 ```
 
@@ -32,13 +33,14 @@ pip install hls4ml[profiling]
 ```Python
 import hls4ml
 
-#Fetch a keras model from our example repository
-#This will download our example model to your working directory and return an example configuration file
+# Fetch a keras model from our example repository
+# This will download our example model to your working directory and return an example configuration file
 config = hls4ml.utils.fetch_example_model('KERAS_3layer.json')
 
-print(config) #You can print the configuration to see some default parameters
+# You can print the configuration to see some default parameters
+print(config)
 
-#Convert it to a hls project
+# Convert it to a hls project
 hls_model = hls4ml.converters.keras_to_hls(config)
 
 # Print full list of example models if you want to explore more
@@ -49,11 +51,11 @@ hls4ml.utils.fetch_example_list()
 Note: Vitis HLS is not yet supported. Vivado HLS versions between 2018.2 and 2020.1 are recommended.
 
 ```Python
-#Use Vivado HLS to synthesize the model
-#This might take several minutes
+# Use Vivado HLS to synthesize the model
+# This might take several minutes
 hls_model.build()
 
-#Print out the report if you want
+# Print out the report if you want
 hls4ml.report.read_vivado_report('my-hls-test')
 ```
 
diff --git a/contrib/kl_layer/README.md b/contrib/kl_layer/README.md
new file mode 100644
index 0000000000..5d306ae69a
--- /dev/null
+++ b/contrib/kl_layer/README.md
@@ -0,0 +1,18 @@
+This folder contains the implementation of custom KL divergence layer.
+This is a custom implementation and not a built-in layer in any deep learning framework.
+It was developed specifically for [AD@L1 CMS paper](https://www.nature.com/articles/s42256-022-00441-3).
+
+# Files
+
+* `kl_layer.py`: contains the standalone implementation of the custom KL divergence layer
+* `kl_layer.h`: contains the HLS implementation of KL layer
+
+
+# Usage
+
+`kl_layer.py` contains the example of how to use the KL layer.
+To run do
+
+```
+python kl_layer.py
+```
diff --git a/contrib/kl_layer/kl_layer.h b/contrib/kl_layer/kl_layer.h
new file mode 100644
index 0000000000..0435b9a22e
--- /dev/null
+++ b/contrib/kl_layer/kl_layer.h
@@ -0,0 +1,87 @@
+#ifndef KL_LAYER_H_
+#define KL_LAYER_H_
+
+#include "nnet_activation.h"
+#include "nnet_common.h"
+#include <cmath>
+#include <cstdlib>
+
+namespace nnet {
+
+struct distance_config {
+    // IO size
+    static const unsigned n_in = 10;
+    static const unsigned n_out = 1;
+
+    // Internal data type definitions
+    typedef float accum_t;
+    typedef float sum_t;
+    typedef ap_fixed<18, 8> exp_table_t;
+
+    // Internal info
+    static const unsigned table_size = 1024;
+    static constexpr unsigned exp_range = 8;
+};
+
+template <typename CONFIG_T, int N_TABLE> void init_klloss_exp_table(typename CONFIG_T::exp_table_t table_out[N_TABLE]) {
+    for (int ii = 0; ii < N_TABLE; ii++) {
+        // First, convert from table index to X-value (range -1 to +1)
+        float in_val = 2 * CONFIG_T::exp_range * (ii - float(N_TABLE) / 2.0) / float(N_TABLE);
+        // Next, compute lookup table function
+        typename CONFIG_T::exp_table_t real_val = exp_fcn_float(in_val);
+        // std::cout << "Lookup table In Value: " << in_val << " Result: " << real_val << " Index: " << ii << std::endl;
+        table_out[ii] = real_val;
+    }
+}
+template <class data1_T, class data2_T, class res_T, typename CONFIG_T>
+void klloss(data1_T mean[CONFIG_T::n_in], data2_T log_var[CONFIG_T::n_in], res_T res[CONFIG_T::n_out]) {
+    #pragma HLS PIPELINE
+    // Initialize the lookup tables
+#ifdef __HLS_SYN__
+    bool initialized = false;
+    typename CONFIG_T::exp_table_t exp_table[CONFIG_T::table_size];
+#else
+    static bool initialized = false;
+    static typename CONFIG_T::exp_table_t exp_table[CONFIG_T::table_size];
+#endif
+    if (!initialized) {
+        init_klloss_exp_table<CONFIG_T, CONFIG_T::table_size>(exp_table);
+        initialized = true;
+    }
+    typename CONFIG_T::accum_t kl[CONFIG_T::n_in];
+    #pragma HLS ARRAY_PARTITION variable=kl complete
+    typename CONFIG_T::accum_t mean_sq[CONFIG_T::n_in];
+    #pragma HLS ARRAY_PARTITION variable=mean_sq complete
+    typename CONFIG_T::accum_t kl_sum(0);
+    for (unsigned i = 0; i < CONFIG_T::n_in; i++) {
+        #pragma HLS UNROLL
+        mean_sq[i] = mean[i] * mean[i];
+        kl[i] = data2_T(1.) + log_var[i];
+        // std::cout << "Log var: " << log_var[i] << " Result: " << kl[i] << std::endl;
+    }
+    constexpr unsigned table_scale = (unsigned)(CONFIG_T::table_size / (2 * CONFIG_T::exp_range));
+    constexpr unsigned index_scale = (unsigned)(CONFIG_T::exp_range * table_scale);
+    for (unsigned i = 0; i < CONFIG_T::n_in; i++) {
+        #pragma HLS UNROLL
+        auto data_round = log_var[i] * table_scale;
+        auto index = data_round + index_scale;
+        if (index < 0)
+            index = 0;
+        if (index > CONFIG_T::table_size - 1)
+            index = CONFIG_T::table_size - 1;
+        kl[i] -= exp_table[index];
+        // std::cout << "Exp var: " << exp_table[index] << " Result: " << kl[i] << " Index: " << index << std::endl;
+    }
+    for (unsigned i = 0; i < CONFIG_T::n_in; i++) {
+        #pragma HLS UNROLL
+        kl[i] -= mean_sq[i];
+    }
+    Op_add<typename CONFIG_T::accum_t> op_add;
+    kl_sum = reduce<typename CONFIG_T::accum_t, CONFIG_T::n_in, Op_add<typename CONFIG_T::accum_t>>(kl, op_add);
+    // std::cout << "KL sum: " << kl_sum << std::endl;
+    kl_sum *= typename CONFIG_T::accum_t(1. / CONFIG_T::n_in);
+    res[0] = res_T(-0.5) * kl_sum;
+}
+} // namespace nnet
+
+#endif
diff --git a/contrib/kl_layer/kl_layer.py b/contrib/kl_layer/kl_layer.py
new file mode 100644
index 0000000000..ec2af1b797
--- /dev/null
+++ b/contrib/kl_layer/kl_layer.py
@@ -0,0 +1,185 @@
+"""
+    Usage example for a custom KL loss layer
+    Takes as an input two arrays: z_mean and z_log_var
+    and computes KL "distance" between normal distribution
+    and Gaussian with mu=z_mean and sigma=z_log_var
+
+    The HLS part is in contrib/kl_layer/kl_layer.h
+"""
+from pathlib import Path
+
+import numpy as np
+import tensorflow as tf
+
+try:
+    from keras.layers.merge import _Merge as Merge
+except Exception:
+    from keras.layers.merging.base_merge import _Merge as Merge
+
+from tensorflow.python.keras.utils import tf_utils
+from tensorflow.python.ops import math_ops
+
+import hls4ml
+from hls4ml.converters.keras_to_hls import parse_default_keras_layer
+from hls4ml.model.attributes import ConfigurableAttribute, TypeAttribute
+from hls4ml.model.types import FixedPrecisionType, RoundingMode, SaturationMode
+
+
+# Keras implementation of a KL layer
+class KLLoss(Merge):
+    '''Keras implementation of a KL loss custom layer'''
+
+    @tf_utils.shape_type_conversion
+    def build(self, input_shape):
+        super().build(input_shape)
+
+    def _merge_function(self, inputs):
+
+        mean = inputs[0]
+        log_var = inputs[1]
+
+        kl = 1.0 + log_var - math_ops.square(mean) - math_ops.exp(log_var)
+        kl = -0.5 * math_ops.reduce_mean(kl, axis=-1, keepdims=True)
+
+        return kl
+
+
+# hls4ml implementations
+class HKLLoss(hls4ml.model.layers.Layer):
+    '''hls4ml implementation of a KL loss custom layer'''
+
+    _expected_attributes = [
+        ConfigurableAttribute('table_size', default=1024),
+        ConfigurableAttribute('exp_range', default=8),
+        TypeAttribute('accum'),
+        TypeAttribute(
+            'sum',
+            default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
+        ),
+        TypeAttribute(
+            'exp_table',
+            default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
+        ),
+    ]
+
+    def initialize(self):
+        self.add_output_variable(shape=[1], dim_names=[f'KL_LOSS_{self.index}'])
+
+
+# Templates
+distance_config_template = """struct config{index} : nnet::distance_config {{
+    static const unsigned n_in = {n_in};
+    static const unsigned n_out = 1;
+    typedef {accum_t.name} accum_t;
+    typedef {sum_t.name} sum_t;
+    typedef {exp_table_t.name} exp_table_t;
+    static const unsigned table_size = {table_size};
+    static constexpr float exp_range = {exp_range};
+}};\n"""
+distance_function_template = 'nnet::klloss<{input1_t}, {input2_t}, {output_t}, {config}>({input1}, {input2}, {output});'
+distance_include_list = ['nnet_utils/kl_layer.h']
+
+
+class HKLLossConfigTemplate(hls4ml.backends.template.LayerConfigTemplate):
+    def __init__(self):
+        super().__init__(HKLLoss)
+        self.template = distance_config_template
+
+    def format(self, node):
+        params = self._default_config_params(node)
+        params['n_in'] = node.get_input_variable(node.inputs[0]).shape[0]
+        params['n_out'] = 1
+        return self.template.format(**params)
+
+
+class HKLLossFunctionTemplate(hls4ml.backends.template.FunctionCallTemplate):
+    def __init__(self):
+        super().__init__(HKLLoss, include_header=distance_include_list)
+        self.template = distance_function_template
+
+    def format(self, node):
+        params = {}
+        params['config'] = f'config{node.index}'
+        params['input1_t'] = node.get_input_variable(node.inputs[0]).type.name
+        params['input2_t'] = node.get_input_variable(node.inputs[1]).type.name
+        params['output_t'] = node.get_output_variable().type.name
+        params['input1'] = node.get_input_variable(node.inputs[0]).name
+        params['input2'] = node.get_input_variable(node.inputs[1]).name
+        params['output'] = node.get_output_variable().name
+
+        return self.template.format(**params)
+
+
+# Parser for converter
+def parse_klloss_layer(keras_layer, input_names, input_shapes, data_reader):
+    assert 'KLLoss' in keras_layer['class_name']
+
+    layer = parse_default_keras_layer(keras_layer, input_names)
+
+    output_shape = [input_shapes[0][0], 1]
+
+    return layer, output_shape
+
+
+def main():
+    # Register the converter for custom Keras layer
+    hls4ml.converters.register_keras_layer_handler('KLLoss', parse_klloss_layer)
+
+    # Register the hls4ml's IR layer
+    hls4ml.model.layers.register_layer('KLLoss', HKLLoss)
+
+    # Register the optimization passes (if any)
+    backend = hls4ml.backends.get_backend('Vivado')
+
+    # Register template passes for the given backend
+    backend.register_template(HKLLossConfigTemplate)
+    backend.register_template(HKLLossFunctionTemplate)
+
+    # Register HLS implementation
+    p = Path(__file__).parent / 'kl_layer.h'
+    backend.register_source(p)
+
+    # Test if it works
+    # Create a dummy Keras model with KL loss layer
+    inp = tf.keras.layers.Input(shape=(19, 3, 1))
+    z_mean = tf.keras.layers.Dense(10)(inp)
+    z_log_var = tf.keras.layers.Dense(10)(inp)
+    custom_output = KLLoss()([z_mean, z_log_var])
+    # create new model
+    kmodel = tf.keras.models.Model(inputs=inp, outputs=custom_output)
+    kmodel.summary()
+
+    # test on random inputs
+    x = np.random.randint(-5, 5, (1, 19, 3, 1), dtype='int32')
+    kres = kmodel(x)
+
+    # Create dummy config
+    config = {}
+    config['Model'] = {
+        'Precision': 'ap_fixed<16,6>',
+        'ReuseFactor': 1,
+        'ParallelizationFactor': 1,
+        'Strategy': 'Resource',
+    }
+    hmodel = hls4ml.converters.convert_from_keras_model(
+        kmodel,
+        output_dir='hls4mlprj_kl_layer',
+        backend='Vivado',
+        io_type='io_parallel',
+        part='xcvu9p-flga2577-2-e',
+        hls_config=config,
+    )
+
+    hmodel.compile()
+    hres = hmodel.predict(x.astype('float32'))
+
+    print('Compare prediction by hls4ml model to Keras one')
+    print(kres - hres)
+
+    print('Building model')
+    report = hmodel.build(reset=True, csim=False, cosim=True, synth=True, vsynth=True)
+    print(report)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/docs/conf.py b/docs/conf.py
index 04df6dba96..ab7d6c33bf 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -12,15 +12,17 @@
 #
 import os
 import sys
+
 sys.path.insert(0, os.path.abspath('../'))
 
 import datetime
+
 from setuptools_scm import get_version
 
 # -- Project information -----------------------------------------------------
 
 project = 'hls4ml'
-copyright = str(datetime.datetime.now().year)+', Fast Machine Learning Lab'
+copyright = str(datetime.datetime.now().year) + ', Fast Machine Learning Lab'
 author = 'Fast Machine Learning Lab'
 
 # The full version, including alpha/beta/rc tags
@@ -36,7 +38,8 @@
     'sphinx.ext.autodoc',
     'sphinx.ext.githubpages',
     'sphinx_rtd_theme',
-    'sphinx.ext.napoleon'
+    'sphinx.ext.napoleon',
+    'sphinx_contributors',
 ]
 
 # Add any paths that contain templates here, relative to this directory.
@@ -72,12 +75,11 @@
     'display_version': True,
     'prev_next_buttons_location': 'bottom',
     'style_external_links': False,
-
     'style_nav_header_background': '#2980B9',
     # Toc options
     'collapse_navigation': True,
     'sticky_navigation': True,
     'navigation_depth': 2,
     'includehidden': True,
-    'titles_only': False
+    'titles_only': False,
 }
diff --git a/docs/extension.rst b/docs/extension.rst
new file mode 100644
index 0000000000..2836430ea8
--- /dev/null
+++ b/docs/extension.rst
@@ -0,0 +1,185 @@
+========================
+Extension API
+========================
+
+hls4ml natively supports a large number of neural network layers.
+But what if a desired layer is not supported?
+If it is standard enough and its implementation would benefit the community as a whole, we would welcome a contribution to add it to the standard set of supported layers.
+However, if it is a somewhat niche custom layer, there is another approach we can take to extend hls4ml through the *extension API*.
+
+This documentation will walk through a complete `complete end-to-end example <https://github.com/fastmachinelearning/hls4ml/blob/main/test/pytest/test_extensions.py>`_, which is part of our testing suite.
+To implement a custom layer in hls4ml with the extension API, the required components are:
+
+* Your custom layer class
+* Equivalent hls4ml custom layer class
+* Parser for the converter
+* HLS implementation
+* Layer config template
+* Function config template
+* Registration of layer, source code, and templates
+
+Complete example
+================
+
+For concreteness, let's say our custom layer ``KReverse`` is implemented in Keras and reverses the order of the last dimension of the input.
+
+.. code-block:: Python
+
+    # Keras implementation of a custom layer
+    class KReverse(tf.keras.layers.Layer):
+        '''Keras implementation of a hypothetical custom layer'''
+
+        def __init__(self):
+            super().__init__()
+
+        def call(self, inputs):
+            return tf.reverse(inputs, axis=[-1])
+
+We can define the equivalent layer in hls4ml ``HReverse``, which inherits from ``hls4ml.model.layers.Layer``.
+
+.. code-block:: Python
+
+    # hls4ml layer implementation
+    class HReverse(hls4ml.model.layers.Layer):
+        '''hls4ml implementation of a hypothetical custom layer'''
+
+        def initialize(self):
+            inp = self.get_input_variable()
+            shape = inp.shape
+            dims = inp.dim_names
+            self.add_output_variable(shape, dims)
+
+A parser for the Keras to HLS converter is also required.
+This parser reads the attributes of the Keras layer instance and populates a dictionary of attributes for the hls4ml layer.
+It also returns a list of output shapes (one sjape for each output).
+In this case, there a single output with the same shape as the input.
+
+.. code-block:: Python
+
+    # Parser for converter
+    def parse_reverse_layer(keras_layer, input_names, input_shapes, data_reader):
+        layer = {}
+        layer['class_name'] = 'HReverse'
+        layer['name'] = keras_layer['config']['name']
+        layer['n_in'] = input_shapes[0][1]
+
+        if input_names is not None:
+            layer['inputs'] = input_names
+
+        return layer, [shape for shape in input_shapes[0]]
+
+Next, we need the actual HLS implementaton of the function, which can be written in a header file ``nnet_reverse.h``.
+
+.. code-block:: C++
+
+    #ifndef NNET_REVERSE_H_
+    #define NNET_REVERSE_H_
+
+    #include "nnet_common.h"
+
+    namespace nnet {
+
+    struct reverse_config {
+        static const unsigned n_in = 10;
+    };
+
+    template<class data_T, typename CONFIG_T>
+    void reverse(
+        data_T input[CONFIG_T::n_in],
+        data_T reversed[CONFIG_T::n_in]
+    ) {
+        for (int i = 0; i < CONFIG_T::n_in; i++) {
+            reversed[CONFIG_T::n_in - 1 - i] = input[i];
+        }
+    }
+
+    }
+
+    #endif
+
+Now, we can define the layer config and function call templates.
+These two templates determine how to populate the config template based on the layer attributes and the function call signature for the layer in HLS, respectively.
+
+.. code-block:: Python
+
+    rev_config_template = """struct config{index} : nnet::reverse_config {{
+        static const unsigned n_in = {n_in};
+    }};\n"""
+
+    rev_function_template = 'nnet::reverse<{input_t}, {config}>({input}, {output});'
+    rev_include_list = ['nnet_utils/nnet_reverse.h']
+
+
+    class HReverseConfigTemplate(hls4ml.backends.template.LayerConfigTemplate):
+        def __init__(self):
+            super().__init__(HReverse)
+            self.template = rev_config_template
+
+        def format(self, node):
+            params = self._default_config_params(node)
+            return self.template.format(**params)
+
+
+    class HReverseFunctionTemplate(hls4ml.backends.template.FunctionCallTemplate):
+        def __init__(self):
+            super().__init__(HReverse, include_header=rev_include_list)
+            self.template = rev_function_template
+
+        def format(self, node):
+            params = self._default_function_params(node)
+            return self.template.format(**params)
+
+Now, we need to tell hls4ml about the existence of this new layer by registering it.
+We also need to register the parser (a.k.a. the layer handler), the template passes, and HLS implementation source code with the particular backend.
+In this case, the HLS code is valid for both the Vivado and Quartus backends.
+
+.. code-block:: Python
+
+    # Register the converter for custom Keras layer
+    hls4ml.converters.register_keras_layer_handler('KReverse', parse_reverse_layer)
+
+    # Register the hls4ml's IR layer
+    hls4ml.model.layers.register_layer('HReverse', HReverse)
+
+    for backend_id in ['Vivado', 'Quartus']:
+        # Register the optimization passes (if any)
+        backend = hls4ml.backends.get_backend(backend_id)
+        backend.register_pass('remove_duplicate_reverse', RemoveDuplicateReverse, flow=f'{backend_id.lower()}:optimize')
+
+        # Register template passes for the given backend
+        backend.register_template(HReverseConfigTemplate)
+        backend.register_template(HReverseFunctionTemplate)
+
+        # Register HLS implementation
+        backend.register_source('nnet_reverse.h')
+
+Finally, we can actually test the hls4ml custom layer compared to the Keras one.
+
+.. code-block:: Python
+
+    # Test if it works
+    kmodel = tf.keras.models.Sequential(
+        [
+            tf.keras.layers.Input(shape=(8,)),
+            KReverse(),
+            tf.keras.layers.ReLU(),
+        ]
+    )
+
+    x = np.random.randint(-5, 5, (8,), dtype='int32')
+    kres = kmodel(x)
+
+    for backend_id in ['Vivado', 'Quartus']:
+
+        hmodel = hls4ml.converters.convert_from_keras_model(
+            kmodel,
+            output_dir=str(f'hls4mlprj_extensions_{backend_id}'),
+            backend=backend_id,
+            io_type='io_parallel',
+            hls_config={'Model': {'Precision': 'ap_int<6>', 'ReuseFactor': 1}},
+        )
+
+        hmodel.compile()
+        hres = hmodel.predict(x.astype('float32'))
+
+        np.testing.assert_array_equal(kres, hres)
diff --git a/docs/flows.rst b/docs/flows.rst
new file mode 100644
index 0000000000..28d423aa8e
--- /dev/null
+++ b/docs/flows.rst
@@ -0,0 +1,6 @@
+====================
+Flows and Optimizers
+====================
+
+- Explain concept of flows and optimizers
+- Describe FIFO buffer optimizer as an example?
diff --git a/docs/index.rst b/docs/index.rst
index da7e445348..c766f18365 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,12 +1,14 @@
 .. toctree::
     :hidden:
-    
+
     Home <self>
     release_notes
     status
-    setup 
+    setup
     command
     concepts
+    flows
+    extension
     reference
 
 .. toctree::
@@ -23,7 +25,7 @@
 
     autodoc/hls4ml
     autodoc/hls4ml.*
-   
+
 
 ==================================
 Welcome to hls4ml's documentation!
@@ -40,92 +42,10 @@ The project is currently in development, so please let us know if you are intere
 
 Project Status
 =================================
-For the latest status including current and planned features, see the :doc:`Status and Features <status>` page. 
+For the latest status including current and planned features, see the :doc:`Status and Features <status>` page.
 
 Tutorials
 =================================
 Detailed tutorials on how to use ``hls4ml``'s various functionalities can be found at:
 
 https://github.com/fastmachinelearning/hls4ml-tutorial
-
-Citation
-=================================
-If you use this software in a publication, please cite the software
-
-..  code-block:: bibtex
-
-    @software{vloncar_2021_5680908,
-    author       = {{FastML Team}},
-    title        = {fastmachinelearning/hls4ml},
-    year         = 2021,
-    publisher    = {Zenodo},
-    doi          = {10.5281/zenodo.1201549},
-    url          = {https://github.com/fastmachinelearning/hls4ml}
-    }
-
-and first publication:
-
-..  code-block:: bibtex
-    
-    @article{Duarte:2018ite,
-        author = "Duarte, Javier and others",
-        title = "{Fast inference of deep neural networks in FPGAs for particle physics}",
-        eprint = "1804.06913",
-        archivePrefix = "arXiv",
-        primaryClass = "physics.ins-det",
-        reportNumber = "FERMILAB-PUB-18-089-E",
-        doi = "10.1088/1748-0221/13/07/P07027",
-        journal = "JINST",
-        volume = "13",
-        number = "07",
-        pages = "P07027",
-        year = "2018"
-    }
-
-Additionally, if you use specific features developed in later papers, please cite those as well. For example, CNNs:
-
-..  code-block:: bibtex
-
-    @article{Aarrestad:2021zos,
-        author = "Aarrestad, Thea and others",
-        title = "{Fast convolutional neural networks on FPGAs with hls4ml}",
-        eprint = "2101.05108",
-        archivePrefix = "arXiv",
-        primaryClass = "cs.LG",
-        reportNumber = "FERMILAB-PUB-21-130-SCD",
-        doi = "10.1088/2632-2153/ac0ea1",
-        journal = "Mach. Learn. Sci. Tech.",
-        volume = "2",
-        number = "4",
-        pages = "045015",
-        year = "2021"
-    }
-    @article{Ghielmetti:2022ndm,
-        author = "Ghielmetti, Nicol\`{o} and others",
-        title = "{Real-time semantic segmentation on FPGAs for autonomous vehicles with hls4ml}",
-        eprint = "2205.07690",
-        archivePrefix = "arXiv",
-        primaryClass = "cs.CV",
-        reportNumber = "FERMILAB-PUB-22-435-PPD",
-        doi = "10.1088/2632-2153/ac9cb5",
-        journal ="Mach. Learn. Sci. Tech.",
-        year = "2022"
-    }
-
-binary/ternary networks:
-
-..  code-block:: bibtex
-
-    @article{Loncar:2020hqp,
-        author = "Ngadiuba, Jennifer and others",
-        title = "{Compressing deep neural networks on FPGAs to binary and ternary precision with HLS4ML}",
-        eprint = "2003.06308",
-        archivePrefix = "arXiv",
-        primaryClass = "cs.LG",
-        reportNumber = "FERMILAB-PUB-20-167-PPD-SCD",
-        doi = "10.1088/2632-2153/aba042",
-        journal = "Mach. Learn. Sci. Tech.",
-        volume = "2",
-        pages = "015001",
-        year = "2021"
-    }
diff --git a/docs/reference.rst b/docs/reference.rst
index 8216e0157e..aa4443653a 100644
--- a/docs/reference.rst
+++ b/docs/reference.rst
@@ -1,54 +1,96 @@
 ============================
-Reference and Contributors
+Citation and Contributors
 ============================
 
 
 Citation
-========
-
-If you are using the package please cite:
-
-
-* 
-  .. image:: https://zenodo.org/badge/108329371.svg
-     :target: https://zenodo.org/badge/latestdoi/108329371
-     :alt: DOI
-
-* J. Duarte *et al.*\ , "Fast inference of deep neural networks in FPGAs for particle physics", `JINST 13 P07027 (2018) <https://dx.doi.org/10.1088/1748-0221/13/07/P07027>`_\ , `arXiv:1804.06913 <https://arxiv.org/abs/1804.06913>`_.
-
-If you are using the boosted decision tree implementation, please cite also:   
-
-
-* S. Summers *et al.*\ , "Fast inference of boosted decision trees in FPGAs for particle physics", `arXiv:2002.02534 <https://arxiv.org/abs/2002.02534>`_.
-  If you are using the binary or ternary neural network implementation, please also cite:
-* G. Di Guglielmo *et al.*\ , "Compressing deep neural networks on FPGAs to binary and ternary precision with hls4ml", `arXiv:2003.06308 <https://arxiv.org/abs/2003.06308>`_
-
-Additional Talks and Presentations
-==================================
-
-
-* eScience 2019: `talk <https://escience2019.sched.com/event/Uuiy/machine-learning-on-fpgas-for-low-latency-and-high-throughput-inference?iframe=yes&w=100%&sidebar=yes&bg=no#>`__
-* ACAT 2019: `talk <https://indico.cern.ch/event/708041/contributions/3269690/>`__
-* Zurich Hands-on Course: `course <https://indico.cern.ch/event/769727/>`__
-* TWEPP 2018: `talk <https://indico.cern.ch/event/697988/contributions/3055990/>`__
-* CHEP 2018: `talk <https://indico.cern.ch/event/587955/contributions/2937529/>`__
-* Connecting the Dots 2018: `talk <https://indico.cern.ch/event/658267/contributions/2813688/>`__
-* Fermilab Research Techniques Seminar: `talk <https://indico.fnal.gov/event/16908/>`__
-* CERN EP/IT Data Science Seminar: `talk <https://indico.cern.ch/event/721567/>`__ 
+=================================
+If you use this software in a publication, please cite the software
+
+..  code-block:: bibtex
+
+    @software{vloncar_2021_5680908,
+    author       = {{FastML Team}},
+    title        = {fastmachinelearning/hls4ml},
+    year         = 2021,
+    publisher    = {Zenodo},
+    doi          = {10.5281/zenodo.1201549},
+    url          = {https://github.com/fastmachinelearning/hls4ml}
+    }
+
+and first publication:
+
+..  code-block:: bibtex
+
+    @article{Duarte:2018ite,
+        author = "Duarte, Javier and others",
+        title = "{Fast inference of deep neural networks in FPGAs for particle physics}",
+        eprint = "1804.06913",
+        archivePrefix = "arXiv",
+        primaryClass = "physics.ins-det",
+        reportNumber = "FERMILAB-PUB-18-089-E",
+        doi = "10.1088/1748-0221/13/07/P07027",
+        journal = "JINST",
+        volume = "13",
+        number = "07",
+        pages = "P07027",
+        year = "2018"
+    }
+
+Additionally, if you use specific features developed in later papers, please cite those as well. For example, CNNs:
+
+..  code-block:: bibtex
+
+    @article{Aarrestad:2021zos,
+        author = "Aarrestad, Thea and others",
+        title = "{Fast convolutional neural networks on FPGAs with hls4ml}",
+        eprint = "2101.05108",
+        archivePrefix = "arXiv",
+        primaryClass = "cs.LG",
+        reportNumber = "FERMILAB-PUB-21-130-SCD",
+        doi = "10.1088/2632-2153/ac0ea1",
+        journal = "Mach. Learn. Sci. Tech.",
+        volume = "2",
+        number = "4",
+        pages = "045015",
+        year = "2021"
+    }
+    @article{Ghielmetti:2022ndm,
+        author = "Ghielmetti, Nicol\`{o} and others",
+        title = "{Real-time semantic segmentation on FPGAs for autonomous vehicles with hls4ml}",
+        eprint = "2205.07690",
+        archivePrefix = "arXiv",
+        primaryClass = "cs.CV",
+        reportNumber = "FERMILAB-PUB-22-435-PPD",
+        doi = "10.1088/2632-2153/ac9cb5",
+        journal ="Mach. Learn. Sci. Tech.",
+        year = "2022"
+    }
+
+binary/ternary networks:
+
+..  code-block:: bibtex
+
+    @article{Loncar:2020hqp,
+        author = "Ngadiuba, Jennifer and others",
+        title = "{Compressing deep neural networks on FPGAs to binary and ternary precision with HLS4ML}",
+        eprint = "2003.06308",
+        archivePrefix = "arXiv",
+        primaryClass = "cs.LG",
+        reportNumber = "FERMILAB-PUB-20-167-PPD-SCD",
+        doi = "10.1088/2632-2153/aba042",
+        journal = "Mach. Learn. Sci. Tech.",
+        volume = "2",
+        pages = "015001",
+        year = "2021"
+    }
 
 Contributors
 ============
 
+Thanks to our contributors!
 
-* Vladimir Loncar, Jennifer Ngadiuba, Maurizio Pierini, Sioni Summers [CERN]
-* Javier Duarte [University of California San Diego]
-* Sergo Jindariani, Benjamin Kreis, Ryan Rivera, Nhan Tran [Fermilab]
-* Edward Kreinar [Hawkeye360]
-* Song Han, Philip Harris, Dylan Rankin [MIT]
-* Zhenbin Wu [University of Illinois at Chicago]
-* Mark Neubauer [University of Illinois Urbana-Champaign]
-* Shih-Chieh Hsu [University of Washington]
-* Giuseppe Di Guglielmo [Columbia University]
-* Duc Hoang [Rhodes College]
-* Noah Paladino [Rutgers University]
-
+..  contributors:: fastmachinelearning/hls4ml
+   :avatars:
+   :limit: 100
+   :order: DESC
diff --git a/docs/release_notes.rst b/docs/release_notes.rst
index 5db2480be9..b0cbe6c3d6 100644
--- a/docs/release_notes.rst
+++ b/docs/release_notes.rst
@@ -6,19 +6,134 @@ See `here <https://github.com/fastmachinelearning/hls4ml/releases>`__ for offici
 
 ----
 
+**v0.7.0 / TBD**
+
+What's changed:
+
+* GarNet and GarNetStack in config.py by @yiiyama in https://github.com/fastmachinelearning/hls4ml/pull/344
+* support ZeroPadding layers by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/480
+* New backend development framework by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/395
+* Register ``ApplyAlpha`` layer templates by @thesps in https://github.com/fastmachinelearning/hls4ml/pull/499
+* Parsing extended by @nicologhielmetti in https://github.com/fastmachinelearning/hls4ml/pull/501
+* Remove intermediate casting in product by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/490
+* Add QKeras as a package dependency by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/511
+* Copy flows from config by @thesps in https://github.com/fastmachinelearning/hls4ml/pull/510
+* VivadoAccelerator backend updates by @thesps in https://github.com/fastmachinelearning/hls4ml/pull/508
+* Optimized look-up table by @nemerchiedde in https://github.com/fastmachinelearning/hls4ml/pull/527
+* Upsampling2D test case by @ChiRuiChen in https://github.com/fastmachinelearning/hls4ml/pull/520
+* Support UpSampling1D by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/475
+* RNN support (part 1) by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/521
+* Quartus Custom Matrix Multiplication & Quantization by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/523
+* Vivado-equivalent implementation of Softmax on Quartus by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/540
+* Ensure 2 bits for scale in po2 quantizers by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/531
+* Link update by @bkmgit in https://github.com/fastmachinelearning/hls4ml/pull/519
+* Fix removal of nodes ingested by multiple downstream nodes by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/544
+* Enable SeparableConv2d by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/547
+* Extension API by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/528
+* change string ReuseFactor to int by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/416
+* Make the size of bn scale and bias what they really are by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/532
+* Raise runtime error when a layer is named `input` by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/482
+* fix insertion before a node with multiple inputs + support additional broadcasting by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/551
+* Pointwise conv1d/2d resource by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/471
+* Quartus Embedding Layer by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/548
+* Fix for QActivations passed as an argument by @AdrianAlan in https://github.com/fastmachinelearning/hls4ml/pull/553
+* Don't override precision directly in the QKeras optimizer by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/567
+* Remove the in/out size from top function by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/559
+* Transpose2d, Concatenate2d, and up to 3 Clones for io_stream by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/402
+* Remove io_serial as io_stream and add some more info in docs. by @Duchstf in https://github.com/fastmachinelearning/hls4ml/pull/334
+* Update docs for v0.6.0 by @thesps in https://github.com/fastmachinelearning/hls4ml/pull/453
+* Use correct number of args for multiple outputs by @apfusco in https://github.com/fastmachinelearning/hls4ml/pull/487
+* Fixed a few typos in the documentation  by @pitmonticone in https://github.com/fastmachinelearning/hls4ml/pull/467
+* returning integer from _compute_n_samples by @JochiSt in https://github.com/fastmachinelearning/hls4ml/pull/537
+* Providing support for Alveo boards by @selwyn96 in https://github.com/fastmachinelearning/hls4ml/pull/552
+* Make layer names case sensitive in config. by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/577
+* Add issue and PR templates by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/582
+* Vivado Backend GRU/LSTM support by @drankincms in https://github.com/fastmachinelearning/hls4ml/pull/560
+* Update CI template syntax by @thesps in https://github.com/fastmachinelearning/hls4ml/pull/593
+* Update flow dependencies by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/588
+* Fix parsing of ZeroPadding layers by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/595
+* remove cppname by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/562
+* Remove email helpline from the docs by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/601
+* Fixes for GRU/LSTM in Vivado backend by @drankincms in https://github.com/fastmachinelearning/hls4ml/pull/598
+* Remove io_serial by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/609
+* Fix test_graph by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/611
+* Override parent backend optimizer passes with derived backend passes by @thesps in https://github.com/fastmachinelearning/hls4ml/pull/597
+* Enforce function pipelining when using io_parallel with Resource strategy by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/605
+* FIFO depth optimization by @nicologhielmetti in https://github.com/fastmachinelearning/hls4ml/pull/509
+* Add tracing support for the quartus backend by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/583
+* Quartus streaming support for Activations, Dense & Batch Normalization by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/557
+* QConv alpha != 1 bug fix by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/612
+* Quartus Stream Embedding by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/625
+* change master to main by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/602
+* Edit order of the optimizers in the flow so that BramFactor is followed by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/621
+* Softmax LUT Optimization by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/570
+* Quartus Synthesis Flow Improvement by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/618
+* Quartus Extensions by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/628
+* Quartus GRU by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/596
+* Quartus Merge layers by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/634
+* fix nondefault project name handling by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/626
+* Fix parsing of logic synthesis reports by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/639
+* Fix conv1d stream implementation hls directives by @Jonathan-Shoemaker in https://github.com/fastmachinelearning/hls4ml/pull/635
+* Implementation and optimizations linked to Simple-RNN and LSTM for qu… by @nemerchiedde in https://github.com/fastmachinelearning/hls4ml/pull/575
+* Softsign optimization by @nemerchiedde in https://github.com/fastmachinelearning/hls4ml/pull/585
+* Parallel CNNs, Pooling & Image Layers for Quartus Backend by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/561
+* Quartus Streaming Softsign (PR #585 contd.) by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/655
+* Remove final reshapes even for Quartus by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/661
+* Unrolled CNN implementation by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/600
+* the strategy was not propagated in the pytest by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/663
+* Fix keras model loading issue with loading model with KerasH5 by @calad0i in https://github.com/fastmachinelearning/hls4ml/pull/664
+* append applied_flows container before filling instead of after by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/641
+* set version using ``setuptools_scm`` by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/479
+* Argmax Softmax by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/627
+* Fix version extraction in Sphinx config by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/669
+* Add requested citations to README by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/615
+* skip BatchNorm fusion when input/output is used multiple times by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/481
+* Use wider accum_t for (average) pooling by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/681
+* Quartus Streaming Conv, Pooling & Image layers by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/656
+* Create branch on PR by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/636
+* Delete ``example-prjs`` directory by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/682
+* Adiabatically turn on `pre-commit` by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/678
+* Add causal padding by @cgutsche in https://github.com/fastmachinelearning/hls4ml/pull/688
+* Update ``pre-commit`` GitHub Action by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/689
+* New config_from_keras_model by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/690
+* remove obsolete np.int and np.float by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/703
+* Update p-clang-format to work on mac by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/704
+* Fix function call in Alveo tcl script by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/694
+* add readme for contrib by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/706
+* WIP Add custom KL loss layer HLS implementation by @katyagovorkova in https://github.com/fastmachinelearning/hls4ml/pull/606
+* Fix incorrectly linted build() command by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/709
+
+New contributors:
+
+* @nemerchiedde made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/527
+* @ChiRuiChen made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/520
+* @bo3z made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/523
+* @bkmgit made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/519
+* @apfusco made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/487
+* @pitmonticone made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/467
+* @JochiSt made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/537
+* @selwyn96 made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/552
+* @Jonathan-Shoemaker made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/635
+* @calad0i made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/664
+* @cgutsche made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/688
+
+**Full Changelog**: https://github.com/fastmachinelearning/hls4ml/compare/v0.6.0...v0.7.0
+
+----
+
 **v0.6.0 / coris**
 
 What's changed:
 
 * ``VivadoAccelerator`` backend: target ``pynq-z2`` and ``zcu102`` boards directly from hls4ml by @nicologhielmetti
-* Updated ``PyTorch`` and ``ONNX`` converters by @Duchstf 
-* ``line_buffer`` Conv2D implementation for ``io_stream``: reduced resource usage and latency by @Keb-L, @violatingcp, @vloncar 
-* Support ``QConv2DBatchnorm`` layer from ``QKeras`` by @nicologhielmetti 
-* Improved profiling plots - easier to compare original vs ``hls4ml`` converted models by @maksgraczyk 
-* Better derivation of data types for ``QKeras`` models by @jmduarte, @thesps 
+* Updated ``PyTorch`` and ``ONNX`` converters by @Duchstf
+* ``line_buffer`` Conv2D implementation for ``io_stream``: reduced resource usage and latency by @Keb-L, @violatingcp, @vloncar
+* Support ``QConv2DBatchnorm`` layer from ``QKeras`` by @nicologhielmetti
+* Improved profiling plots - easier to compare original vs ``hls4ml`` converted models by @maksgraczyk
+* Better derivation of data types for ``QKeras`` models by @jmduarte, @thesps
 * Improved CI by @thesps
-* More support for models with branches, skip connections, ``Merge`` and ``Concatenate`` layers by @jmduarte, @vloncar 
-* Support for ``Dense`` layers over multi-dimensional tensors by @vloncar 
+* More support for models with branches, skip connections, ``Merge`` and ``Concatenate`` layers by @jmduarte, @vloncar
+* Support for ``Dense`` layers over multi-dimensional tensors by @vloncar
 * Overall improvements by @vloncar, @jmduarte, @thesps, @jmitrevs & others
 
 New contributors:
@@ -133,8 +248,6 @@ Bugfixes:
 **v0.0.2**\ : first alpha release
 
 
-* full translation of DNNs from Keras 
+* full translation of DNNs from Keras
 * an example Conv1D exists
 * parallel mode is supported (serial mode, not yet)
-
-
diff --git a/docs/requirements.txt b/docs/requirements.txt
index e4295d37eb..8ff43b5e0b 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,12 +1,13 @@
-sphinx>=3.2.1
-sphinx_rtd_theme
-toposort>=1.5.0
-numpy
-six
-pyyaml
 h5py
+matplotlib
+numpy
 onnx>=1.4.0
 pandas
+pyyaml
 seaborn
-matplotlib
 setuptools_scm[toml]>=5
+six
+sphinx>=3.2.1
+sphinx_contributors
+sphinx_rtd_theme
+toposort>=1.5.0
diff --git a/docs/status.rst b/docs/status.rst
index e91b605add..b76d08584d 100644
--- a/docs/status.rst
+++ b/docs/status.rst
@@ -5,7 +5,7 @@ Status and Features
 Status
 ========
 
-The latest stable release is :doc:`v0.6.0 <release_notes>`. This release brings the new VivadoAccelerator backend to easily target boards like pynq-z2 and zcu102, with support for more boards like Alveo planned.
+The latest stable release is :doc:`v0.7.0 <release_notes>`.
 
 
 Features
@@ -13,15 +13,24 @@ Features
 
 A list of supported ML codes and architectures, including a summary table is below.  Dependencies are given in the :doc:`Setup <setup>` page.
 
-ML code support: 
+ML code support:
 
-* Keras/Tensorflow/QKeras, PyTorch, Onnx
+* Keras/Tensorflow/QKeras
+* PyTorch (limited)
+* (Q)ONNX (in development)
 
 Neural network architectures:
 
-* Fully Connected NNs (multi-layer perceptron)
-* Convolutional NNs (1D/2D)
-* Recurrent NN/LSTM, in prototyping
+* Fully connected NNs (multilayer perceptron, MLP)
+* Convolutional NNs (1D and 2D)
+* Recurrent NN (LSTM)
+* Graph NN (GarNet)
+
+HLS backends:
+
+* Vivado HLS
+* Vitis HLS (experimental)
+* Intel HLS
 
 A summary of the on-going status of the ``hls4ml`` tool is in the table below.
 
@@ -31,15 +40,15 @@ A summary of the on-going status of the ``hls4ml`` tool is in the table below.
    * - Architectures/Toolkits
      - Keras/TensorFlow/QKeras
      - PyTorch
-     - ONNX
+     - (Q)ONNX
    * - MLP
      - ``supported``
      - ``supported``
      - ``supported``
-   * - Conv1D/Conv2D
+   * - CNN
      - ``supported``
      - ``in development``
-     - ``in development`` 
+     - ``in development``
    * - RNN/LSTM
      - ``in development``
      - ``in development``
@@ -48,7 +57,7 @@ A summary of the on-going status of the ``hls4ml`` tool is in the table below.
 
 Other feature notes:
 
-* ``hls4ml`` is tested on Linux, and supports Vivado HLS versions 2018.2 to 2020.1. Vitis HLS is not yet supported. Windows and macOS are not supported.
+* ``hls4ml`` is tested on Linux, and supports Vivado HLS versions 2018.2 to 2020.1 and Intel HLS versions XXX. Vitis HLS is experimentally supported in v0.7.0. Windows and macOS are not supported.
 
 * BDT support has moved to the `Conifer <https://github.com/thesps/conifer>`__ package
 
@@ -56,4 +65,3 @@ Example Models
 ==============
 
 We also provide and documented several example models that have been implemented in ``hls4ml`` in `this Github repository <https://github.com/fastmachinelearning/example-models>`_.
-
diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py
index b1b586f6c4..793a1d24be 100644
--- a/hls4ml/backends/vivado/vivado_backend.py
+++ b/hls4ml/backends/vivado/vivado_backend.py
@@ -189,13 +189,13 @@ def build(
         curr_dir = os.getcwd()
         os.chdir(model.config.get_output_dir())
         vivado_cmd = (
-            f'vivado_hls -f build_prj.tcl "reset={reset}'
-            f'csim={csim}'
-            f'synth={synth}'
-            f'cosim={cosim}'
-            f'validation={validation}'
-            f'export={export}'
-            f'vsynth={vsynth}'
+            f'vivado_hls -f build_prj.tcl "reset={reset} '
+            f'csim={csim} '
+            f'synth={synth} '
+            f'cosim={cosim} '
+            f'validation={validation} '
+            f'export={export} '
+            f'vsynth={vsynth} '
             f'fifo_opt={fifo_opt}"'
         )
         os.system(vivado_cmd)
diff --git a/setup.cfg b/setup.cfg
index bc6dbf643d..9ff049d343 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -8,7 +8,7 @@ author = hls4ml Team
 license = Apache-2.0
 license_file = LICENSE
 classifiers =
-    Development Status :: 3 - Alpha
+    Development Status :: 4 - Beta
     Intended Audience :: Developers
     Intended Audience :: Science/Research
     License :: OSI Approved :: Apache Software License
diff --git a/test/pytest/test_extensions.py b/test/pytest/test_extensions.py
index 1c8e07198a..e97a58d1f7 100644
--- a/test/pytest/test_extensions.py
+++ b/test/pytest/test_extensions.py
@@ -118,7 +118,7 @@ def format(self, node):
 
 
 @pytest.fixture(scope='session', autouse=True)
-def regsister_custom_layer():
+def register_custom_layer():
     # Register the converter for custom Keras layer
     hls4ml.converters.register_keras_layer_handler('KReverse', parse_reverse_layer)