From 27ac3e1c2347a8684a7902bc5dfac4bc7dc043da Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Tue, 10 Jan 2023 11:35:17 -0800 Subject: [PATCH 01/12] Fix inference for jittered resize --- keras_cv/layers/preprocessing/jittered_resize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_cv/layers/preprocessing/jittered_resize.py b/keras_cv/layers/preprocessing/jittered_resize.py index 58c695428e..73d58d1a50 100644 --- a/keras_cv/layers/preprocessing/jittered_resize.py +++ b/keras_cv/layers/preprocessing/jittered_resize.py @@ -131,7 +131,7 @@ def call(self, inputs, training=True): else: inputs = self._ensure_inputs_are_compute_dtype(inputs) inputs, meta_data = self._format_inputs(inputs) - output = self._inference_resizing(inputs) + output = = self.inference_resizing(inputs) return self._format_output(output, meta_data) def get_random_transformation(self, image=None, **kwargs): From 3335a6f341d2a0a74f23dedf2665acbe3bfbda3a Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Tue, 10 Jan 2023 11:52:41 -0800 Subject: [PATCH 02/12] Fix jittered resize --- keras_cv/layers/preprocessing/jittered_resize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_cv/layers/preprocessing/jittered_resize.py b/keras_cv/layers/preprocessing/jittered_resize.py index 73d58d1a50..af00f8447f 100644 --- a/keras_cv/layers/preprocessing/jittered_resize.py +++ b/keras_cv/layers/preprocessing/jittered_resize.py @@ -131,7 +131,7 @@ def call(self, inputs, training=True): else: inputs = self._ensure_inputs_are_compute_dtype(inputs) inputs, meta_data = self._format_inputs(inputs) - output = = self.inference_resizing(inputs) + output = self.inference_resizing(inputs) return self._format_output(output, meta_data) def get_random_transformation(self, image=None, **kwargs): From 1432e2dff5b2a1bbc06ed49e9757b4506c871f54 Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Tue, 10 Jan 2023 13:09:38 -0800 Subject: [PATCH 03/12] Inference test --- keras_cv/layers/preprocessing/jittered_resize_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_cv/layers/preprocessing/jittered_resize_test.py b/keras_cv/layers/preprocessing/jittered_resize_test.py index ef8c5eb448..828a35d36f 100644 --- a/keras_cv/layers/preprocessing/jittered_resize_test.py +++ b/keras_cv/layers/preprocessing/jittered_resize_test.py @@ -175,7 +175,7 @@ def test_augment_inference_mode(self): seed=self.seed, ) output = layer(input, training=False) - expected_output = layer._inference_resizing(output) + expected_output = layer.inference_resizing(output) self.assertAllClose( expected_output["bounding_boxes"]["boxes"], output["bounding_boxes"]["boxes"], From 801d0f5c449b82af6b94f661bd229c946499256d Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Tue, 10 Jan 2023 13:43:11 -0800 Subject: [PATCH 04/12] Inference test --- keras_cv/layers/preprocessing/jittered_resize.py | 2 +- keras_cv/layers/preprocessing/jittered_resize_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/keras_cv/layers/preprocessing/jittered_resize.py b/keras_cv/layers/preprocessing/jittered_resize.py index af00f8447f..58c695428e 100644 --- a/keras_cv/layers/preprocessing/jittered_resize.py +++ b/keras_cv/layers/preprocessing/jittered_resize.py @@ -131,7 +131,7 @@ def call(self, inputs, training=True): else: inputs = self._ensure_inputs_are_compute_dtype(inputs) inputs, meta_data = self._format_inputs(inputs) - output = self.inference_resizing(inputs) + output = self._inference_resizing(inputs) return self._format_output(output, meta_data) def get_random_transformation(self, image=None, **kwargs): diff --git a/keras_cv/layers/preprocessing/jittered_resize_test.py b/keras_cv/layers/preprocessing/jittered_resize_test.py index 828a35d36f..ef8c5eb448 100644 --- a/keras_cv/layers/preprocessing/jittered_resize_test.py +++ b/keras_cv/layers/preprocessing/jittered_resize_test.py @@ -175,7 +175,7 @@ def test_augment_inference_mode(self): seed=self.seed, ) output = layer(input, training=False) - expected_output = layer.inference_resizing(output) + expected_output = layer._inference_resizing(output) self.assertAllClose( expected_output["bounding_boxes"]["boxes"], output["bounding_boxes"]["boxes"], From 15a4578a7a3bc84f900c35aa084e5c62b0f1b426 Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Thu, 26 Jan 2023 10:32:54 -0800 Subject: [PATCH 05/12] README update --- README.md | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index c951a730d0..28beee1eb5 100644 --- a/README.md +++ b/README.md @@ -4,36 +4,28 @@ ![Tensorflow](https://img.shields.io/badge/tensorflow-v2.9.0+-success.svg) [![Contributions Welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg?style=flat)](https://github.com/keras-team/keras-cv/issues) -# Vision -A computer vision library dedicated for auto-driving, robotics and on device applications. # Mission -KerasCV is a layered repository consisting of core components and modeling components. +KerasCV is a computer vision library of modular computer vision oriented Keras components. +These components consist of models, layers, metrics, losses, callbacks, and utility functions. -On the core components, it is made of modular building blocks (ops, functions, layers, metrics, losses, callbacks) that standardizes APIs for computer vision concepts such as data-augmentation pipeline, bounding boxes, keypoints, point clouds, feature pyramid network, etc, so applied computer vision engineers can leverage to quickly assemble production-grade, state-of-the-art -training and inference pipelines for common tasks such as image classification, object detection and segmentation, image data augmentation, etc. +The goal of the library is to provide standardized Keras native APIs for common computer vision tasks such as data-augmentation, classification, object detection, image generation, and more. +Applied computer vision engineers can leverage KerasCV to quickly assemble production-grade, state-of-the-art training and inference pipelines for all of these common tasks. -On the modeling components, it provides the most widely used models for each task such as ResNet family, MobileNet family, transformer-based models, anchor-based and anchor-free meta architectures, unet models, that are built on top of core components, highly composable and compatible with the Keras trainer (`model.fit`). It aims to provide pre-built models that are mixed-precision compatible, QAT compatible, and xla compilable during training, and generic model optimization tools for deployment on devices such as onboard GPUs, mobile, edge chips. - -KerasCV provides the following values for users: -- modular mid-level APIs and composable meta architectures -- mixed-precision and xla enabled components -- highly optimized, quantization aware training (QAT) enabled models, compatible between GPUs and TPUs. -- reproducible training results and leaderboard -- useful tools for evaluation, visualization and explanation. -- source for inference conversion (TFLite, edge devices, TensorRT, etc) and optimization at model level. KerasCV can be understood as a horizontal extension of the Keras API: the components are new first-party Keras objects (layers, metrics, etc) that are too specialized to be added to core Keras, but that receive the same level of polish and backwards compatibility guarantees as the rest of the Keras API and that are maintained by the Keras team itself. +In addition to API consistency, KerasCV components are built to be mixed-precision compatible, QAT compatible, xla compilable, and TPU compatible. +In the near term, we aim to provide pre-trained models for common tasks such as on-device object detection and NSFW classification. +We also aim to provide generic model optimization tools for deployment on devices such as onboard GPUs, mobile, edge chips. + KerasCV's primary goal is to provide a coherent, elegant, and pleasant API to train state of the art computer vision models. Users should be able to train state of the art models using only `Keras`, `KerasCV`, and TensorFlow core (i.e. `tf.data`) components. -Different from Keras IO, this product focus on meta architectures and training scripts to help users reproduce result from open datasets. - To learn more about the future project direction, please check the [roadmap](.github/ROADMAP.md). ## Quick Links From edac57f440723309b54609b09ebbff3e425fe865 Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Thu, 26 Jan 2023 10:44:48 -0800 Subject: [PATCH 06/12] README update --- README.md | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 28beee1eb5..15eb26bc9a 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ but also for active development for feature delivery. To achieve this, here is t process for how to contribute to this repository: 1) Contributors are always welcome to help us fix an issue, add tests, better documentation. -2) If contributors would like to create a backbone, we usually require a pre-trained weight +2) If contributors would like to create a backbone, we usually require a pre-trained weight set with the model for one dataset as the first PR, and a training script as a follow-up. The training script will preferrably help us reproduce the results claimed from paper. The backbone should be generic but the training script can contain paper specific parameters such as learning rate schedules and weight decays. The training script will be used to produce leaderboard results. Exceptions apply to large transformer-based models which are difficult to train. If this is the case, contributors should let us know so the team can help in training the model or providing GCP resources. @@ -59,14 +59,27 @@ Thank you to all of our wonderful contributors! ## Pretrained Weights -Many models in KerasCV come with pre-trained weights. With the exception of StableDiffusion, -all of these weights are trained using Keras and KerasCV components and training scripts in this -repository. Models may not be trained with the same parameters or preprocessing pipeline -described in their original papers. Performance metrics for pre-trained weights can be found -in the training history for each task. For example, see ImageNet classification training -history for backbone models [here](examples/training/classification/imagenet/training_history.json). -All results are reproducible using the training scripts in this repository. Pre-trained weights -operate on images that have been rescaled using a simple `1/255` rescaling layer. +Many models in KerasCV come with pre-trained weights. +With the exception of StableDiffusion, all of these weights are trained using Keras and +KerasCV components and training scripts in this repository. +While some models are not be trained with the same parameters or preprocessing pipeline +as defined in their original publications, KerasCV still ensuresstrong performance. +Performance metrics for the provided pre-trained weights can be found +in the training history for each documented task. +An example of this can be found in the ImageNet classification training +[history for backbone models](examples/training/classification/imagenet/training_history.json). +All results are reproducible using the training scripts in this repository. + +Historically, many models have been trained on image datasets rescaled via manually +crafted normalization schemes. +The most common variant of manually crafted normalization scheme is subtraction of the +imagenet mean pixel followed by standard deviation normalization based on the imagenet +pixel standard deviation. +This scheme is an artifact of the days of manual feature engineering, but is no longer +required to score state of the art scores using modern deep learning architectures. +Due to this, KerasCV is standardized to operate on images that have been rescaled using +a simple `1/255` rescaling layer. +This can be seen in all KerasCV training pipelines and code examples. ## Custom Ops Note that in some the 3D Object Detection layers, custom TF ops are used. The @@ -77,8 +90,8 @@ If you'd like to use these custom ops, you can install from source using the instructions below. ### Installing KerasCV with Custom Ops from Source -Installing from source requires the [Bazel](https://bazel.build/) build system -(version >= 5.4.0). +Installing custom ops from source requires the [Bazel](https://bazel.build/) build +system (version >= 5.4.0). ``` git clone https://github.com/keras-team/keras-cv.git @@ -103,7 +116,8 @@ and Windows. KerasCV provides access to pre-trained models via the `keras_cv.models` API. These pre-trained models are provided on an "as is" basis, without warranties or conditions of any kind. -The following underlying models are provided by third parties, and subject to separate licenses: +The following underlying models are provided by third parties, and subject to separate +licenses: StableDiffusion ## Citing KerasCV From ea1ca8be5e538ebc9b9632279c848a9f4e704a66 Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Thu, 26 Jan 2023 11:03:13 -0800 Subject: [PATCH 07/12] README update --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 15eb26bc9a..aa2743de25 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Keras objects (layers, metrics, etc) that are too specialized to be added to cor the same level of polish and backwards compatibility guarantees as the rest of the Keras API and that are maintained by the Keras team itself. -In addition to API consistency, KerasCV components are built to be mixed-precision compatible, QAT compatible, xla compilable, and TPU compatible. +In addition to API consistency, KerasCV components aim to be mixed-precision compatible, QAT compatible, xla compilable, and TPU compatible. In the near term, we aim to provide pre-trained models for common tasks such as on-device object detection and NSFW classification. We also aim to provide generic model optimization tools for deployment on devices such as onboard GPUs, mobile, edge chips. From 98debde0e5eed7ed9bd20908acb0dad95c2b6c86 Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Thu, 26 Jan 2023 11:32:51 -0800 Subject: [PATCH 08/12] Ian comments --- README.md | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index aa2743de25..c42f195abe 100644 --- a/README.md +++ b/README.md @@ -7,24 +7,23 @@ # Mission -KerasCV is a computer vision library of modular computer vision oriented Keras components. -These components consist of models, layers, metrics, losses, callbacks, and utility functions. - -The goal of the library is to provide standardized Keras native APIs for common computer vision tasks such as data-augmentation, classification, object detection, image generation, and more. -Applied computer vision engineers can leverage KerasCV to quickly assemble production-grade, state-of-the-art training and inference pipelines for all of these common tasks. +KerasCV is a library of modular computer vision oriented Keras components. +These components include models, layers, metrics, losses, callbacks, and utility +functions. +KerasCV's primary goal is to provide a coherent, elegant, and pleasant API to train state of the art computer vision models. +Users should be able to train state of the art models using only `Keras`, `KerasCV`, and TensorFlow core (i.e. `tf.data`) components. KerasCV can be understood as a horizontal extension of the Keras API: the components are new first-party -Keras objects (layers, metrics, etc) that are too specialized to be added to core Keras, but that receive -the same level of polish and backwards compatibility guarantees as the rest of the Keras API and that -are maintained by the Keras team itself. +Keras objects (layers, metrics, etc) that are too specialized to be added to core Keras. They receive the same level of polish and backwards compatibility guarantees as the core Keras API, and they are maintained by the Keras team. -In addition to API consistency, KerasCV components aim to be mixed-precision compatible, QAT compatible, xla compilable, and TPU compatible. +Our APIs assist in common computer vision tasks such as data-augmentation, classification, object detection, image generation, and more. +Applied computer vision engineers can leverage KerasCV to quickly assemble production-grade, state-of-the-art training and inference pipelines for all of these common tasks. + +In addition to API consistency, KerasCV components aim to be mixed-precision compatible, QAT compatible, XLA compilable, and TPU compatible. In the near term, we aim to provide pre-trained models for common tasks such as on-device object detection and NSFW classification. -We also aim to provide generic model optimization tools for deployment on devices such as onboard GPUs, mobile, edge chips. +We also aim to provide generic model optimization tools for deployment on devices such as onboard GPUs, mobile, and edge chips. -KerasCV's primary goal is to provide a coherent, elegant, and pleasant API to train state of the art computer vision models. -Users should be able to train state of the art models using only `Keras`, `KerasCV`, and TensorFlow core (i.e. `tf.data`) components. To learn more about the future project direction, please check the [roadmap](.github/ROADMAP.md). @@ -60,10 +59,10 @@ Thank you to all of our wonderful contributors! ## Pretrained Weights Many models in KerasCV come with pre-trained weights. -With the exception of StableDiffusion, all of these weights are trained using Keras and +With the exception of StableDiffusion and the standard Vision Transformer, all of these weights are trained using Keras and KerasCV components and training scripts in this repository. -While some models are not be trained with the same parameters or preprocessing pipeline -as defined in their original publications, KerasCV still ensuresstrong performance. +While some models are not trained with the same parameters or preprocessing pipeline +as defined in their original publications, the KerasCV team ensures strong numerical performance. Performance metrics for the provided pre-trained weights can be found in the training history for each documented task. An example of this can be found in the ImageNet classification training @@ -116,7 +115,7 @@ and Windows. KerasCV provides access to pre-trained models via the `keras_cv.models` API. These pre-trained models are provided on an "as is" basis, without warranties or conditions of any kind. -The following underlying models are provided by third parties, and subject to separate +The following underlying models are provided by third parties, and are subject to separate licenses: StableDiffusion From 052b3a3d49f3c7c76eef93327087a753ac8d796f Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Thu, 26 Jan 2023 13:07:10 -0800 Subject: [PATCH 09/12] Matt comment --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c42f195abe..fa2b16a02f 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,7 @@ These pre-trained models are provided on an "as is" basis, without warranties or conditions of any kind. The following underlying models are provided by third parties, and are subject to separate licenses: -StableDiffusion +StableDiffusion, Vision Transfomer ## Citing KerasCV From d064d7251d23eaef4baa29fc4a1f35ba9bf7a0f8 Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Thu, 26 Jan 2023 13:09:38 -0800 Subject: [PATCH 10/12] Matt comment --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index fa2b16a02f..666d8266a3 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,6 @@ Our APIs assist in common computer vision tasks such as data-augmentation, class Applied computer vision engineers can leverage KerasCV to quickly assemble production-grade, state-of-the-art training and inference pipelines for all of these common tasks. In addition to API consistency, KerasCV components aim to be mixed-precision compatible, QAT compatible, XLA compilable, and TPU compatible. -In the near term, we aim to provide pre-trained models for common tasks such as on-device object detection and NSFW classification. We also aim to provide generic model optimization tools for deployment on devices such as onboard GPUs, mobile, and edge chips. From cd8bfa2c7440cba12bb46a703170b2acaac3a363 Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Sat, 28 Jan 2023 15:19:52 -0800 Subject: [PATCH 11/12] Fix haifeng comment --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 666d8266a3..852f0f7e43 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ KerasCV's primary goal is to provide a coherent, elegant, and pleasant API to tr Users should be able to train state of the art models using only `Keras`, `KerasCV`, and TensorFlow core (i.e. `tf.data`) components. KerasCV can be understood as a horizontal extension of the Keras API: the components are new first-party -Keras objects (layers, metrics, etc) that are too specialized to be added to core Keras. They receive the same level of polish and backwards compatibility guarantees as the core Keras API, and they are maintained by the Keras team. +Keras objects (layers, metrics, etc.) that are too specialized to be added to core Keras. They receive the same level of polish and backwards compatibility guarantees as the core Keras API, and they are maintained by the Keras team. Our APIs assist in common computer vision tasks such as data-augmentation, classification, object detection, image generation, and more. Applied computer vision engineers can leverage KerasCV to quickly assemble production-grade, state-of-the-art training and inference pipelines for all of these common tasks. @@ -88,8 +88,9 @@ If you'd like to use these custom ops, you can install from source using the instructions below. ### Installing KerasCV with Custom Ops from Source + Installing custom ops from source requires the [Bazel](https://bazel.build/) build -system (version >= 5.4.0). +system (version >= 5.4.0). Steps to install Bazel can be [found here](https://github.com/keras-team/keras/blob/v2.11.0/.devcontainer/Dockerfile#L21-L23). ``` git clone https://github.com/keras-team/keras-cv.git From 0c04fbe9e549d25a7e4756dfe82421474bdd23c1 Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Sat, 28 Jan 2023 15:23:40 -0800 Subject: [PATCH 12/12] Fix Jonathan comment --- README.md | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/README.md b/README.md index 852f0f7e43..9e923f9386 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,61 @@ To learn more about the future project direction, please check the [roadmap](.gi - [Roadmap](.github/ROADMAP.md) - [API Design Guidelines](.github/API_DESIGN.md) +## Quickstart + +Create a preprocessing pipeline: + +```python +import keras_cv +import tensorflow as tf +from tensorflow import keras +import tensorflow_datasets as tfds + +augmenter = keras_cv.layers.Augmenter( + layers=[ + keras_cv.layers.RandomFlip(), + keras_cv.layers.RandAugment(value_range=(0, 255)), + keras_cv.layers.CutMix(), + keras_cv.layers.MixUp() + ] +) + +def augment_data(images, labels): + labels = tf.one_hot(labels, 3) + inputs = {"images": images, "labels": labels} + outputs = augmenter(inputs) + return outputs['images'], outputs['labels'] +``` + +Augment a `tf.data.Dataset`: + +```python +dataset = tfds.load('rock_paper_scissors', as_supervised=True, split='train') +dataset = dataset.batch(64) +dataset = dataset.map(augment_data, num_parallel_calls=tf.data.AUTOTUNE) +``` + +Create a model: + +```python +densenet = keras_cv.models.DenseNet121( + include_rescaling=True, + include_top=True, + classes=3 +) +densenet.compile( + loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy'] +) +``` + +Train your model: + +```python +densenet.fit(dataset) +``` + ## Contributors If you'd like to contribute, please see our [contributing guide](.github/CONTRIBUTING.md).