diff --git a/.dvc/.gitignore b/.dvc/.gitignore
new file mode 100644
index 0000000000..528f30c71c
--- /dev/null
+++ b/.dvc/.gitignore
@@ -0,0 +1,3 @@
+/config.local
+/tmp
+/cache
diff --git a/.dvc/config b/.dvc/config
new file mode 100644
index 0000000000..048c3d2f04
--- /dev/null
+++ b/.dvc/config
@@ -0,0 +1,5 @@
+[core]
+    remote = storage
+    autostage = true
+['remote "storage"']
+    url = s3://dvc-public/remote/dvc-org/blogs-media
diff --git a/.dvcignore b/.dvcignore
new file mode 100644
index 0000000000..5197305523
--- /dev/null
+++ b/.dvcignore
@@ -0,0 +1,3 @@
+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore
diff --git a/.husky/post-checkout b/.husky/post-checkout
new file mode 100755
index 0000000000..5b3922fde0
--- /dev/null
+++ b/.husky/post-checkout
@@ -0,0 +1,2 @@
+#!/bin/sh
+exec dvc checkout
diff --git a/.husky/pre-push b/.husky/pre-push
new file mode 100755
index 0000000000..7fb87365a7
--- /dev/null
+++ b/.husky/pre-push
@@ -0,0 +1,4 @@
+#!/bin/sh
+if [[ `git --no-pager diff --name-only main '*.dvc'` ]]; then
+  exec dvc push
+fi
diff --git a/app.json b/app.json
index 3fbb194c7b..4f95b5573f 100644
--- a/app.json
+++ b/app.json
@@ -1,6 +1,9 @@
 {
   "addons": [],
   "buildpacks": [
+    {
+      "url": "heroku/python"
+    },
     {
       "url": "heroku/nodejs"
     }
diff --git a/content/.gitignore b/content/.gitignore
new file mode 100644
index 0000000000..30c72a71ba
--- /dev/null
+++ b/content/.gitignore
@@ -0,0 +1 @@
+/uploads
diff --git a/content/authors/0x2b3bfa0.md b/content/authors/0x2b3bfa0.md
new file mode 100644
index 0000000000..2c86905437
--- /dev/null
+++ b/content/authors/0x2b3bfa0.md
@@ -0,0 +1,9 @@
+---
+name: Helio Machado
+avatar: 0x2b3bfa0.jpg
+links:
+  - https://github.com/0x2b3bfa0
+---
+
+Evergreen polymath with a taste for security, open technologies and expressive
+code.
diff --git a/content/authors/aguschin.md b/content/authors/aguschin.md
new file mode 100644
index 0000000000..a66dddc6f2
--- /dev/null
+++ b/content/authors/aguschin.md
@@ -0,0 +1,8 @@
+---
+name: Alexander Guschin
+avatar: aguschin.jpeg
+links:
+  - https://www.linkedin.com/in/1aguschin/
+---
+
+Technical Product Engineer at [Iterative](https://iterative.ai/)
diff --git a/content/authors/alex_kim.md b/content/authors/alex_kim.md
new file mode 100644
index 0000000000..92b6103cb3
--- /dev/null
+++ b/content/authors/alex_kim.md
@@ -0,0 +1,12 @@
+---
+name: Alex Kim
+avatar: alex_kim.png
+links:
+  - https://www.linkedin.com/in/alex000kim/
+  - https://github.com/alex000kim/
+---
+
+Independent Consultant, MLOps Engineer, Open-Source Contributor, and Technical
+Instructor.
+
+[alex000kim.com](https://alex000kim.com/)
diff --git a/content/authors/batuhan_taskaya.md b/content/authors/batuhan_taskaya.md
new file mode 100644
index 0000000000..4b4aed54fa
--- /dev/null
+++ b/content/authors/batuhan_taskaya.md
@@ -0,0 +1,8 @@
+---
+name: Batuhan Taskaya
+avatar: batuhan_taskaya.png
+links:
+  - https://twitter.com/isidentical
+---
+
+Software Engineer at [DVC](https://dvc.org)
diff --git a/content/authors/casper_dcl.md b/content/authors/casper_dcl.md
new file mode 100644
index 0000000000..eb030e1a6a
--- /dev/null
+++ b/content/authors/casper_dcl.md
@@ -0,0 +1,11 @@
+---
+name: Casper da Costa-Luis
+avatar: casper_dcl.jpg
+links:
+  - https://github.com/casperdcl
+---
+
+Computational Physicist; Python Software Foundation (PSF) voting member; GitHub
+OS maintainers member. [tqdm](https://github.com/tqdm/tqdm) primary maintainer.
+
+Expertise in C++/CUDA/Python/Git/Docker.
diff --git a/content/authors/daniel_kharitonov.md b/content/authors/daniel_kharitonov.md
new file mode 100644
index 0000000000..2e864db956
--- /dev/null
+++ b/content/authors/daniel_kharitonov.md
@@ -0,0 +1,8 @@
+---
+name: Daniel Kharitonov
+avatar: daniel_kharitonov.png
+links:
+  - https://www.linkedin.com/in/danielkharitonov/
+---
+
+Technical Product Manager at [DVC.ai}(https://dvc.ai)]
diff --git a/content/authors/dave_berenbaum.md b/content/authors/dave_berenbaum.md
new file mode 100644
index 0000000000..ba45d21db5
--- /dev/null
+++ b/content/authors/dave_berenbaum.md
@@ -0,0 +1,8 @@
+---
+name: Dave Berenbaum
+avatar: dave_berenbaum.png
+links:
+  - https://www.linkedin.com/in/david-berenbaum-20b6b424/
+---
+
+Technical Product Manager at [Iterative](https://iterative.ai/)
diff --git a/content/authors/david_g_ortega.md b/content/authors/david_g_ortega.md
new file mode 100644
index 0000000000..12eaa2cab8
--- /dev/null
+++ b/content/authors/david_g_ortega.md
@@ -0,0 +1,8 @@
+---
+name: David G Ortega
+avatar: david_g_ortega.png
+links:
+  - https://github.com/DavidGOrtega
+---
+
+Founder; Techstars Alumni; Software Engineer; Machine Learning Engineer;
diff --git a/content/authors/diglesia.md b/content/authors/diglesia.md
new file mode 100644
index 0000000000..d4c4e059b1
--- /dev/null
+++ b/content/authors/diglesia.md
@@ -0,0 +1,8 @@
+---
+name: David de la Iglesia
+avatar: diglesia.jpg
+links:
+  - https://github.com/daavoo
+---
+
+Software Engineer at [Iterative](https://iterative.ai/)
diff --git a/content/authors/dmitry_petrov.md b/content/authors/dmitry_petrov.md
new file mode 100644
index 0000000000..ce7c219a33
--- /dev/null
+++ b/content/authors/dmitry_petrov.md
@@ -0,0 +1,11 @@
+---
+name: Dmitry Petrov
+avatar: dmitry_petrov.png
+links:
+  - https://twitter.com/fullstackml
+  - https://www.linkedin.com/in/dmitryleopetrov
+---
+
+Creator of [http://dvc.org](http://dvc.org) — Git for ML. Ex-Data Scientist
+[@Microsoft](http://twitter.com/Microsoft). PhD in CS. Making jokes with a
+serious face.
diff --git a/content/authors/dom_miketa.md b/content/authors/dom_miketa.md
new file mode 100644
index 0000000000..26d233449a
--- /dev/null
+++ b/content/authors/dom_miketa.md
@@ -0,0 +1,9 @@
+---
+name: Dom Miketa
+avatar: dom_miketa.jpeg
+links:
+  - https://www.exscientia.ai
+  - https://www.linkedin.com/in/dom-miketa-1815b7198
+---
+
+Senior AI research assistant at [Exscientia](https://www.exscientia.ai/).
diff --git a/content/authors/elle_obrien.md b/content/authors/elle_obrien.md
new file mode 100644
index 0000000000..c7be16a4e0
--- /dev/null
+++ b/content/authors/elle_obrien.md
@@ -0,0 +1,8 @@
+---
+name: Elle O'Brien
+avatar: elle_obrien.jpg
+links:
+  - https://twitter.com/DrElleOBrien
+---
+
+Data scientist at [http://dvc.org](http://dvc.org)
diff --git a/content/authors/gema_parreno.md b/content/authors/gema_parreno.md
new file mode 100644
index 0000000000..098ffee466
--- /dev/null
+++ b/content/authors/gema_parreno.md
@@ -0,0 +1,8 @@
+---
+name: Gema Parreno
+avatar: gema_parreno.jpeg
+links:
+  - https://github.com/SoyGema
+---
+
+Developer Advocate at [Iterative](https://iterative.ai/)
diff --git a/content/authors/george_vyshnya.md b/content/authors/george_vyshnya.md
new file mode 100644
index 0000000000..b1c92bd406
--- /dev/null
+++ b/content/authors/george_vyshnya.md
@@ -0,0 +1,10 @@
+---
+name: George Vyshnya
+avatar: george_vyshnya.jpeg
+links:
+  - https://www.linkedin.com/in/gvyshnya
+---
+
+Seasoned Data Scientist / Software Developer with blended experience in software
+development, IT, DevOps, PM and C-level roles. CTO at
+[http://sbc-group.pl](http://sbc-group.pl)
diff --git a/content/authors/guro_bokum.md b/content/authors/guro_bokum.md
new file mode 100644
index 0000000000..b641a9f3f1
--- /dev/null
+++ b/content/authors/guro_bokum.md
@@ -0,0 +1,8 @@
+---
+name: Guro Bokum
+avatar: guro_bokum.jpg
+links:
+  - https://www.linkedin.com/in/gurobokum/
+---
+
+Senior Software Engineer at [Iterative](https://iterative.ai/)
diff --git a/content/authors/jeny_defigueiredo.md b/content/authors/jeny_defigueiredo.md
new file mode 100644
index 0000000000..dcf00954ec
--- /dev/null
+++ b/content/authors/jeny_defigueiredo.md
@@ -0,0 +1,8 @@
+---
+name: Jeny De Figueiredo
+avatar: jeny_defigueiredo.png
+links:
+  - https://twitter.com/jendefig
+---
+
+Community Manager at [DVC](https://dvc.org)
diff --git a/content/authors/jorge_orpinel.md b/content/authors/jorge_orpinel.md
new file mode 100644
index 0000000000..95c8754b96
--- /dev/null
+++ b/content/authors/jorge_orpinel.md
@@ -0,0 +1,8 @@
+---
+name: Jorge Orpinel Pérez
+avatar: jorge.jpg
+links:
+  - https://www.linkedin.com/in/jorgeorpinel
+---
+
+Technical writer and developer at [dvc.org](http://dvc.org/)
diff --git a/content/authors/luis_yanes.md b/content/authors/luis_yanes.md
new file mode 100644
index 0000000000..8727a47457
--- /dev/null
+++ b/content/authors/luis_yanes.md
@@ -0,0 +1,9 @@
+---
+name: Luis Yanes
+avatar: luis_yanes.png
+links:
+  - https://www.exscientia.ai
+  - https://www.linkedin.com/in/ljyanesm
+---
+
+Senior software engineer at [Exscientia](https://www.exscientia.ai/).
diff --git a/content/authors/marcel_rd.md b/content/authors/marcel_rd.md
new file mode 100644
index 0000000000..66ddada930
--- /dev/null
+++ b/content/authors/marcel_rd.md
@@ -0,0 +1,16 @@
+---
+name: Marcel Ribeiro-Dantas
+avatar: marcel.jpg
+links:
+  - https://twitter.com/mribeirodantas
+---
+
+Early Stage Researcher at [Institut Curie](https://institut-curie.org/) with
+over 10 years of experience in the field of biomedical engineering and health
+informatics. Areas of interest include Causal Inference, Artificial
+Intelligence, and Data Science. Degrees in Computer and Automation Engineering
+(Eng), Big Data (Grad degree), and Bioinformatics (MSc). Currently enrolled in a
+Ph.D. at EDITE (Sorbonne Université).
+
+Twitter: [@mribeirodantas](https://twitter.com/mribeirodantas) Website:
+[mribeirodantas.me](http://mribeirodantas.me)
diff --git a/content/authors/maria_khalusova.md b/content/authors/maria_khalusova.md
new file mode 100644
index 0000000000..910356a225
--- /dev/null
+++ b/content/authors/maria_khalusova.md
@@ -0,0 +1,8 @@
+---
+name: Maria Khalusova
+avatar: maria_khalusova.jpg
+links:
+  - https://twitter.com/mariaKhalusova
+---
+
+Senior Developer Advocate at [Iterative](https://iterative.ai)
diff --git a/content/authors/marija_ilic.md b/content/authors/marija_ilic.md
new file mode 100644
index 0000000000..fe23e8ea70
--- /dev/null
+++ b/content/authors/marija_ilic.md
@@ -0,0 +1,8 @@
+---
+name: Marija Ilić
+avatar: marija_ilic.png
+links:
+  - https://www.linkedin.com/in/marija-ili%C4%87-65b8a53
+---
+
+Data scientist at Njuškalo, Croatia.
diff --git a/content/authors/maxim_shmakov.md b/content/authors/maxim_shmakov.md
new file mode 100644
index 0000000000..48021520ff
--- /dev/null
+++ b/content/authors/maxim_shmakov.md
@@ -0,0 +1,8 @@
+---
+name: Maxim Shmakov
+avatar: maxim-shmakov.jpg
+links:
+  - https://github.com/mvshmakov
+---
+
+Front End Developer at [Iterative](https://iterative.ai)
diff --git a/content/authors/mert_bozkir.md b/content/authors/mert_bozkir.md
new file mode 100644
index 0000000000..30da6b4cac
--- /dev/null
+++ b/content/authors/mert_bozkir.md
@@ -0,0 +1,8 @@
+---
+name: Mert Bozkir
+avatar: mert_bozkir.jpg
+links:
+  - https://www.linkedin.com/in/mertbozkir/
+---
+
+Community Coordinator at [Iterative](https://iterative.ai/)
diff --git a/content/authors/mike0sv.md b/content/authors/mike0sv.md
new file mode 100644
index 0000000000..d7ea92986b
--- /dev/null
+++ b/content/authors/mike0sv.md
@@ -0,0 +1,8 @@
+---
+name: Mike Sveshnikov
+avatar: mike0sv.jpeg
+links:
+  - https://www.linkedin.com/in/mike0sv/
+---
+
+MLEM Project Team Lead at [Iterative](https://iterative.ai/)
diff --git a/content/authors/mikhail_rozhkov.md b/content/authors/mikhail_rozhkov.md
new file mode 100644
index 0000000000..83ea329970
--- /dev/null
+++ b/content/authors/mikhail_rozhkov.md
@@ -0,0 +1,8 @@
+---
+name: Mikhail Rozhkov
+avatar: mikhail_rozhkov.jpeg
+links:
+  - https://www.linkedin.com/in/mnrozhkov/
+---
+
+Solutions Engineer at [Iterative](https://iterative.ai/)
diff --git a/content/authors/milecia_mcgregor.md b/content/authors/milecia_mcgregor.md
new file mode 100644
index 0000000000..15b9762faf
--- /dev/null
+++ b/content/authors/milecia_mcgregor.md
@@ -0,0 +1,8 @@
+---
+name: Milecia McGregor
+avatar: milecia_mcgregor.jpg
+links:
+  - https://twitter.com/flippedcoding
+---
+
+Developer Advocate at [DVC](https://dvc.org)
diff --git a/content/authors/peter_rowlands.md b/content/authors/peter_rowlands.md
new file mode 100644
index 0000000000..7e71f62fd9
--- /dev/null
+++ b/content/authors/peter_rowlands.md
@@ -0,0 +1,9 @@
+---
+name: Peter Rowlands
+avatar: peter_rowlands.jpg
+links:
+  - https://github.com/pmrowla
+  - https://www.linkedin.com/in/pmrowla
+---
+
+Software engineer at [dvc.org](https://dvc.org/)
diff --git a/content/authors/peter_zikan.md b/content/authors/peter_zikan.md
new file mode 100644
index 0000000000..5b4519bd24
--- /dev/null
+++ b/content/authors/peter_zikan.md
@@ -0,0 +1,8 @@
+---
+name: Petr Zikán
+avatar: peter-zikan.jpeg
+links:
+  - https://www.linkedin.com/in/petr-zik%C3%A1n-88054815a/
+---
+
+CTO at [PlasmaSolve](https://plasmasolve.com)
diff --git a/content/authors/rob_dewit.md b/content/authors/rob_dewit.md
new file mode 100644
index 0000000000..ac386c7a6b
--- /dev/null
+++ b/content/authors/rob_dewit.md
@@ -0,0 +1,8 @@
+---
+name: Rob de Wit
+avatar: rob_dewit.jpg
+links:
+  - https://www.linkedin.com/in/rcdewit/
+---
+
+Developer Advocate at [Iterative](https://iterative.ai)
diff --git a/content/authors/ryan.md b/content/authors/ryan.md
new file mode 100644
index 0000000000..69532151c1
--- /dev/null
+++ b/content/authors/ryan.md
@@ -0,0 +1,8 @@
+---
+name: Ryan Turner
+avatar: ryan.jpg
+links:
+  - https://twitter.com/otterkoala
+---
+
+ML Solutions Engineer at [DVC](https://dvc.org)
diff --git a/content/authors/svetlana_grinchenko.md b/content/authors/svetlana_grinchenko.md
new file mode 100644
index 0000000000..e831a6f66a
--- /dev/null
+++ b/content/authors/svetlana_grinchenko.md
@@ -0,0 +1,8 @@
+---
+name: Svetlana Grinchenko
+avatar: svetlana_grinchenko.jpeg
+links:
+  - https://twitter.com/a142hr
+---
+
+Head of developer relations at [http://dvc.org](http://dvc.org)
diff --git a/content/authors/tapa_dipti_sitaula.md b/content/authors/tapa_dipti_sitaula.md
new file mode 100644
index 0000000000..a991447775
--- /dev/null
+++ b/content/authors/tapa_dipti_sitaula.md
@@ -0,0 +1,8 @@
+---
+name: Tapa Dipti Sitaula
+avatar: tapa_dipti_sitaula.png
+links:
+  - https://www.linkedin.com/in/tapa-dipti-sitaula/
+---
+
+Sr Product Engineer at [Iterative](https://iterative.ai/)
diff --git a/content/authors/tibor_mach.md b/content/authors/tibor_mach.md
new file mode 100644
index 0000000000..addf68ec4c
--- /dev/null
+++ b/content/authors/tibor_mach.md
@@ -0,0 +1,9 @@
+---
+name: Tibor Mach
+avatar: tibor_mach.jpeg
+links:
+  - https://github.com/tibor-mach
+  - https://www.linkedin.com/in/tibor-mach/
+---
+
+ML/MLOps engineer, ex data science architect at Atos, PhD in maths, drummer
diff --git a/content/blogs/2017-05-15-how-data-scientists-can-improve-their-productivity.md b/content/blogs/2017-05-15-how-data-scientists-can-improve-their-productivity.md
new file mode 100644
index 0000000000..720bec196d
--- /dev/null
+++ b/content/blogs/2017-05-15-how-data-scientists-can-improve-their-productivity.md
@@ -0,0 +1,169 @@
+---
+title: How Data Scientists Can Improve Their Productivity
+date: 2017-05-15
+description: >
+  Data science and machine learning are iterative processes. It is never
+  possible to successfully complete a data science project in a single pass.
+descriptionLong: >
+  The iteration time is a critical parameter in data science process. The
+  quicker you iterate, the more you can check ideas and build a better model.
+  The productivity of data scientists can be improved by speeding up iteration
+  processes and the DVC tool takes care of this.
+picture: 2017-05-15/post-image.jpg
+author: dmitry_petrov
+commentsUrl: https://discuss.dvc.org/t/how-a-data-scientist-can-improve-their-productivity/301
+tags:
+  - Productivity
+  - Python
+  - Tutorial
+---
+
+Data science and machine learning are iterative processes. It is never possible
+to successfully complete a data science project in a single pass. A data
+scientist constantly tries new ideas and changes steps of her pipeline:
+
+1. extract new features and accidentally find noise in the data;
+
+2. clean up the noise, find one more promising feature;
+
+3. extract the new feature;
+
+4. rebuild and validate the model, realize that the learning algorithm
+   parameters are not perfect for the new feature set;
+
+5. change machine learning algorithm parameters and retrain the model;
+
+6. find the ineffective feature subset and remove it from the feature set;
+
+7. try a few more new features;
+
+8. try another ML algorithm. And then a data format change is required.
+
+This is only a small episode in a data scientist’s daily life and it is what
+makes our job different from a regular engineering job.
+
+Business context, ML algorithm knowledge and intuition all help you to find a
+good model faster. But you never know for sure what ideas will bring you the
+best value.
+
+This is why the iteration time is a critical parameter in data science process.
+The quicker you iterate, the more you can check ideas and build a better model.
+
+> “A well-engineered pipeline gets data scientists iterating much faster, which
+> can be a big competitive edge” From
+> [Engineering Practices in Data Science](http://blog.untrod.com/2012/10/engineering-practices-in-data-science.html)
+> By Chris Clark.
+
+## A data science iteration tool
+
+To speed up the iterations in data science projects we have created an open
+source tool [data version control](http://dvc.org) or [DVC.org](http://dvc.org).
+
+DVC takes care of dependencies between commands that you run, generated data
+files, and code files and allows you to easily reproduce any steps of your
+research with regards to files changes.
+
+You can think about DVC as a Makefile for a data science project even though you
+do not create a file explicitly. DVC tracks dependencies in your data science
+projects when you run data processing or modeling code through a special
+command:
+
+```dvc
+$ dvc run python code/xml_to_tsv.py \
+                 data/Posts.xml data/Posts.tsv
+```
+
+`dvc run` works as a proxy for your commands. This allows DVC to track input and
+output files, construct the dependency graph
+([DAG](https://en.wikipedia.org/wiki/Directed_acyclic_graph)), and store the
+command and parameters for a future command reproduction.
+
+The previous command will be automatically piped with the next command because
+of the file `data/Posts.tsv` is an output for the previous command and the input
+for the next one:
+
+```dvc
+# Split training and testing dataset. Two output files.
+# 0.33 is the test dataset splitting ratio.
+# 20170426 is a seed for randomization.
+$ dvc run python code/split_train_test.py \
+                 data/Posts.tsv 0.33 20170426 \
+                 data/Posts-train.tsv data/Posts-test.tsv
+```
+
+DVC derives the dependencies automatically by looking to the list of the
+parameters (even if your code ignores the parameters) and noting the file
+changes before and after running the command.
+
+If you change one of your dependencies (data or code) then all the affected
+steps of the pipeline will be reproduced:
+
+```dvc
+# Change the data preparation code.
+$ vi code/xml_to_tsv.py
+
+# Reproduce.
+$ dvc repro data/Posts-train.tsv
+Reproducing run command for data item data/Posts.tsv.
+Reproducing run command for data item data/Posts-train.tsv.
+```
+
+This is a powerful way of quickly iterating through your pipeline.
+
+The pipeline might have a lot of steps and forms of acyclic dependencies between
+the steps. Below is an example of a canonical machine learning pipeline (more
+details in [the DVC tutorials](https://dvc.org/doc/tutorials):
+
+`gist:dmpetrov/7704a5156bdc32c7379580a61e2fe3b6#dvc_pipeline.sh`
+
+## Why are regular pipeline tools not enough?
+
+> “Workflows are expected to be mostly static or slowly changing.” (See
+> [Airflow](https://airflow.incubator.apache.org/).)
+
+Regular pipeline tools like [Airflow](http://airflow.incubator.apache.org) and
+[Luigi](https://github.com/spotify/luigi) are good for representing static and
+fault tolerant workflows. A huge portion of their functionality is created for
+monitoring, optimization and fault tolerance. These are very important and
+business critical problems. However, these problems are irrelevant to data
+scientists’ daily lives.
+
+Data scientists need a lightweight, dynamic workflow management system. In
+contrast to the traditional airflow-like system, DVC reflects the process of
+researching and looking for a great model (and pipeline), not optimizing and
+monitoring an existing one. This is why DVC is a good fit for iterative machine
+learning processes. When a good model was discovered with DVC, the result could
+be incorporated into a data engineering pipeline (Luigi or Airflow).
+
+## Pipelines and data sharing
+
+In addition to pipeline description, data reproduction and dynamic nature, DVC
+has one more important feature. It was designed in accordance with the best
+software engineering practices. DVC is based on Git. It keeps code, and stores
+DAG in the Git repository which allows you to share your research results. But
+it moves the actual file content outside the Git repository (in `.cache`
+directory which DVC includes in `.gitignore`) since Git is not designed to
+accommodate large data files.
+
+The data files can be shared between data scientists through cloud storages
+using a simple command:
+
+```dvc
+# Data scientists 1 syncs data to the cloud.
+$ dvc sync data/
+```
+
+![](../uploads/images/2017-05-15/git-server-or-github.jpeg)
+
+Currently, AWS S3 and GCP storage are supported by DVC.
+
+## Conclusion
+
+The productivity of data scientists can be improved by speeding up iteration
+processes and the DVC tool takes care of this.
+
+We are very interested in your opinion and feedback. Please post your comments
+here or contact us on Twitter — [FullStackML](https://twitter.com/FullStackML).
+
+If you found this tool useful, **please “star” the
+[DVC Github repository](https://github.com/iterative/dvc)**.
diff --git a/content/blogs/2017-07-24-r-code-and-reproducible-model-development-with-dvc.md b/content/blogs/2017-07-24-r-code-and-reproducible-model-development-with-dvc.md
new file mode 100644
index 0000000000..fd40fcfad6
--- /dev/null
+++ b/content/blogs/2017-07-24-r-code-and-reproducible-model-development-with-dvc.md
@@ -0,0 +1,225 @@
+---
+title: R code and reproducible model development with DVC
+date: 2017-07-24
+description: >
+  There are a lot of example on how to use Data Version Control (DVC) with a
+  Python project. In this document I would like to see how it can be used with a
+  project in R.
+descriptionLong: >
+  In this document we will briefly explore possibilities of a new open source
+  tool that could help with achieving code simplicity, readability and faster
+  model development.
+
+  There are a lot of example on how to use Data Version Control (DVC) with a
+  Python project. In this document I would like to see how it can be used with a
+  project in R.
+picture: 2017-07-24/post-image.png
+pictureComment: DAG on R example
+author: marija_ilic
+commentsUrl: https://discuss.dvc.org/t/r-code-and-reproducible-model-development-with-dvc/298
+tags:
+  - RStats
+  - R
+  - Tutorial
+---
+
+[DVC](https://dvc.org) or Data Version Control tool — its idea is to track
+files/data dependencies during model development in order to facilitate
+reproducibility and track data files versioning. Most of the
+[DVC tutorials](https://dvc.org/doc/tutorials) provide good examples of using
+DVC with Python language. However, I realized that DVC is a
+[language agnostic](https://en.wikipedia.org/wiki/Language-agnostic) tool and
+can be used with any programming language. In this blog post, we will see how to
+use DVC in R projects.
+
+## R coding — keep it simple and readable
+
+Each development is always a combination of following steps presented below:
+
+![Model development process](../uploads/images/2017-07-24/development-steps.png)
+_Model development process_
+
+Because of the specificity of the process — iterative development, it is very
+important to improve some coding and organizational skills. For example, instead
+of having one big R file with code it is better to split code in several logical
+files — each responsible for one small piece of work. It is smart to track
+history development with
+[git](https://git-scm.com/book/en/v2/Getting-Started-About-Version-Control)
+tool. Writing “_reusable code”_ is nice skill to have. Put comments in a code
+can make our life easier.
+
+Beside git, next step in further improvements is to try out and work with DVC.
+Every time when a change/commit in some of the codes and data sets is made, DVC
+will reproduce new results with just one bash command on a linux (or Win
+environment). It memorizes dependencies among files and codes so it can easily
+repeat all necessary steps/codes instead of us worrying about the order.
+
+## R example — data and code clarification
+
+We’ll take an Python example from
+[DVC tutorial](https://dvc.org/doc/tutorials/deep) (written by Dmitry Petrov)
+and rewrite that code in R. With an example we’ll show how can DVC help during
+development and what are its possibilities.
+
+Firstly, let’s initialize git and dvc on mentioned example and run our codes for
+the first time. After that we will simulate some changes in the codes and see
+how DVC works on reproducibility.
+
+R codes can be downloaded from the
+[Github repository](https://github.com/Zoldin/R_AND_DVC). A brief explanation of
+the codes is presented below:
+
+**parsingxml.R** — it takes xml that we downloaded from the web and creates
+appropriate csv file.
+
+`gist:Zoldin/47536af63182a0e8daf37a7b989e2e8d#parsingxml.R`
+
+**train_test_spliting.R** — stratified sampling by target variable (here we are
+creating test and train data set)
+
+`gist:Zoldin/7591c47ce5988cbe087e0038c9a850b9#train_test_splitting.R`
+
+**featurization.R** — text mining and tf-idf matrix creation. In this part we
+are creating predictive variables.
+
+`gist:Zoldin/9e79c047fd8ad7aa6596b0682aca83c6#featurization.R`
+
+**train_model.R** — with created variables we are building logistic regression
+(LASSO).
+
+`gist:Zoldin/1617b39f2acbde3cd486616ac442e7cf#train_model.R`
+
+**evaluate.R** — with trained model we are predicting target on test data set.
+AUC is final output which is used as evaluation metric.
+
+`gist:Zoldin/bfc2d4ee449098a9ff64b99c3326e61d#evaluate.r`
+
+Firstly, codes from above we will download into the new folder (clone the
+repository):
+
+```dvc
+$ mkdir R_DVC_GITHUB_CODE
+$ cd R_DVC_GITHUB_CODE
+
+$ git clone https://github.com/Zoldin/R_AND_DVC
+```
+
+## DVC installation and initialization
+
+On the first site it seemed that DVC will not be compatible to work with R
+because of the fact that DVC is written in Python and as that needs/requires
+Python packages and pip package manager. Nevertheless, the tool can be used with
+any programming language, it is language agnostic and as such is excellent for
+working with R.
+
+Dvc installation:
+
+```dvc
+$ pip3 install dvc
+$ dvc init
+```
+
+With code below 5 R scripts with `dvc run` are executed. Each script is started
+with some arguments — input and output file names and other parameters (seed,
+splitting ratio etc). It is important to use `dvc run` — with this command R
+script are entering pipeline (DAG graph).
+
+```dvc
+$ dvc import https://s3-us-west-2.amazonaws.com/dvc-public/data/tutorial/nlp/25K/Posts.xml.zip \
+             data/
+
+# Extract XML from the archive.
+$ dvc run tar zxf data/Posts.xml.tgz -C data/
+
+# Prepare data.
+$ dvc run Rscript code/parsingxml.R \
+                  data/Posts.xml \
+                  data/Posts.csv
+
+# Split training and testing dataset. Two output files.
+# 0.33 is the test dataset splitting ratio.
+# 20170426 is a seed for randomization.
+$ dvc run Rscript code/train_test_spliting.R \
+                  data/Posts.csv 0.33 20170426 \
+                  data/train_post.csv \
+                  data/test_post.csv
+
+# Extract features from text data.
+# Two TSV inputs and two pickle matrices outputs.
+$ dvc run Rscript code/featurization.R \
+                  data/train_post.csv \
+                  data/test_post.csv \
+                  data/matrix_train.txt \
+                  data/matrix_test.txt
+
+# Train ML model out of the training dataset.
+# 20170426 is another seed value.
+$ dvc run Rscript code/train_model.R \
+                  data/matrix_train.txt 20170426 \
+                  data/glmnet.Rdata
+
+# Evaluate the model by the testing dataset.
+$ dvc run Rscript code/evaluate.R \
+                  data/glmnet.Rdata \
+                  data/matrix_test.txt \
+                  data/evaluation.txt
+
+# The result.
+$ cat data/evaluation.txt
+```
+
+## Dependency flow graph on R example
+
+Dependency graph is shown on picture below:
+
+![Dependency graph](../uploads/images/2017-07-24/dependency-graph.png)_Dependency
+graph_
+
+DVC memorizes this dependencies and helps us in each moment to reproduce
+results.
+
+For example, lets say that we are changing our training model — using ridge
+penalty instead of lasso penalty (changing alpha parameter to `0`). In that case
+will change/modify `train_model.R` job and if we want to repeat model
+development with this algorithm we don’t need to repeat all steps from above,
+only steps marked red on a picture below:
+
+![](../uploads/images/2017-07-24/marked-steps.png)
+
+DVC knows based on DAG graph that changed `train_model.R` file will only change
+following files: `Glmnet.RData` and `Evaluation.txt`. If we want to see our new
+results we need to execute only `train_model.R` and `evaluate.R job`. It is cool
+that we don’t have to think all the time what we need to repeat (which steps).
+`dvc repro` command will do that instead of us. Here is a code example :
+
+```dvc
+$ vi train_model.R
+$ git commit -am "Ridge penalty instead of lasso"
+$ dvc repro data/evaluation.txt
+
+Reproducing run command for data item data/glmnet.Rdata. Args: Rscript code/train_model.R data/matrix_train.txt 20170426 data/glmnet.Rdata
+Reproducing run command for data item data/evaluation.txt. Args: Rscript code/evaluate.R data/glmnet.Rdata data/matrix_test.txt data/evaluation.txt
+
+$ cat data/evaluation.txt
+"AUC for the test file is :  0.947697381983095"
+```
+
+`dvc repro` always re executes steps which are affected with the latest
+developer changes. It knows what needs to be reproduced.
+
+DVC can also work in an _"multi-user environment”_ . Pipelines (dependency
+graphs) are visible to others colleagues if we are working in a team and using
+git as our version control tool. Data files can be shared if we set up a cloud
+and with _dvc sync_ we specify which data can be shared and used for other
+users. In that case other users can see the shared data and reproduce results
+with those data and their code changes.
+
+## Summary
+
+DVC tool improves and accelerates iterative development and helps to keep track
+of ML processes and file dependencies in the simple form. On the R example we
+saw how DVC memorizes dependency graph and based on that graph re executes only
+jobs that are related to the latest changes. It can also work in multi-user
+environment where dependency graphs, codes and data can be shared among multiple
+users. Because it is language agnostic, DVC allows us to work with multiple
+programming languages within a single data science project.
diff --git a/content/blogs/2017-07-27-data-version-control-in-analytics-devops-paradigm.md b/content/blogs/2017-07-27-data-version-control-in-analytics-devops-paradigm.md
new file mode 100644
index 0000000000..b74569e551
--- /dev/null
+++ b/content/blogs/2017-07-27-data-version-control-in-analytics-devops-paradigm.md
@@ -0,0 +1,191 @@
+---
+title: Data Version Control in Analytics DevOps Paradigm
+date: 2017-07-27
+description: >
+  Why DevOps matters in data science, what specific challenges data scientists
+  face in the day to day work, and how do we setup a better environment for the
+  team.
+descriptionLong: >
+  The eternal dream of almost every Data Scientist today is to spend all the
+  time exploring new datasets, engineering new features, inventing and
+  validating cool new algorithms and strategies. However, daily routines of a
+  Data Scientist include raw data pre-processing, dealing with infrastructure,
+  bringing models to production. That's where good DevOps practices and skills
+  are essential and will certainly be beneficial for industrial Data Scientists
+  as they can address the above-mentioned challenges in a self-service manner.
+picture: 2017-07-27/post-image.jpeg
+author: george_vyshnya
+commentsUrl: https://discuss.dvc.org/t/data-version-control-in-analytics-devops-paradigm/297
+tags:
+  - DevOps
+  - Company
+---
+
+## Data Science and DevOps Convergence
+
+The primary mission of DevOps is to help the teams to resolve various Tech Ops
+infrastructure, tools and pipeline issues.
+
+At the other hand, as mentioned in the conceptual review by
+[Forbes](https://www.forbes.com/sites/teradata/2016/11/14/devops-for-data-science-why-analytics-ops-is-key-to-value/)
+in November 2016, the industrial analytics is no more going to be driven by data
+scientists alone. It requires an investment in DevOps skills, practices and
+supporting technology to move analytics out of the lab and into the business.
+There are even
+[voices](https://www.computing.co.uk/ctg/news/2433095/a-lot-of-companies-will-stop-hiring-data-scientists-when-they-realise-that-the-majority-bring-no-value-says-data-scientist)
+calling Data Scientists to concentrate on agile methodology and DevOps if they
+like to retain their jobs in business in the long run.
+
+## Why DevOps Matters
+
+The eternal dream of almost every Data Scientist today is to spend all (well,
+almost all) the time in the office exploring new datasets, engineering decisive
+new features, inventing and validating cool new algorithms and strategies.
+However, reality is often different. One of the unfortunate daily routines of a
+Data Scientist work is to do raw data pre-processing. It usually translates to
+the challenges to
+
+1.  **Pull all kinds of necessary data from a variety of sources**
+
+    - Internal data sources like ERP, CRM, POS systems, or data from online
+      e-commerce platforms
+
+    - External data, like weather, public holidays, Google trends etc.
+
+2.  **Extract, transform, and load the data**
+
+    - Relate and join the data sources
+
+    - Aggregate and transform the data
+
+3.  **Avoid technical and performance drawbacks** when everything ends up in
+    “one big table” at the end
+
+4.  **Facilitate continuous machine learning and decision-making in a
+    business-ready framework**
+
+    - Utilize historic data to train the machine learning models and algorithms
+
+    - Use the current, up-to-date data for decision-making
+
+    - Export back the resulting decisions/recommendations to review by business
+      stakeholders, either back into the ERP system or some other data warehouse
+
+Another big challenge is to organize **collaboration and data/model sharing**
+inside and across the boundaries of teams of Data Scientists and Software
+Engineers.
+
+DevOps skills as well as effective instruments will certainly be beneficial for
+industrial Data Scientists as they can address the above-mentioned challenges in
+a self-service manner.
+
+## Can DVC Be a Solution?
+
+[Data Version Control](https://dvc.org) or simply DVC comes to the scene
+whenever you start looking for effective DevOps-for-Analytics instruments.
+
+DVC is an open source tool for data science projects. It makes your data science
+projects reproducible by automatically building data dependency graph (DAG).
+Your code and the dependencies could be easily shared by Git, and data — through
+cloud storage (AWS S3, GCP) in a single DVC environment.
+
+> Although DVC was created for machine learning developers and data scientists
+> [originally](https://dvc.org/doc/understanding-dvc/what-is-dvc), it appeared
+> to be useful beyond it. Since it brings proven engineering practices to not
+> well defined ML process, I discovered it to have enormous potential as an
+> Analytical DevOps instrument.
+
+It clearly helps to manage a big fraction of DevOps issues in daily Data
+Scientist routines
+
+1. **Pull all kinds of necessary data from a variety of sources**. Once you
+   configure and script your data extraction jobs with DVC, it will be
+   persistent and operable across your data and service infrastructure
+
+2. **Extract, transform, and load the data**. ETL is going to be easy and
+   repeatable once you configure it with DVC scripting. It will become a solid
+   pipeline to operate without major supportive effort. Moreover, it will track
+   all changes and trigger an alert for updates in the pipeline steps via DAG.
+
+3. **Facilitate continuous machine learning and decision-making.** The part of
+   the pipeline facilitated through DVC scripting can be jobs to upload data
+   back to any transactional system (like ERP, ERM, CRM etc.), warehouse or data
+   mart. It will then be exposed to business stakeholders to make intelligent
+   data-driven decisions.
+
+4. **Share your algorithms and data**. Machine Learning modeling is an iterative
+   process and it is extremely important to keep track of your steps,
+   dependencies between the steps, dependencies between your code and data files
+   and all code running arguments. This becomes even more important and
+   complicated in a team environment where data scientists’ collaboration takes
+   a serious amount of the team’s effort. DVC will be the arm to help you with
+   it.
+
+One of the ‘juicy’ features of DVC is ability to support multiple technology
+stacks. Whether you prefer R or use promising Python-based implementations for
+your industrial data products, DVC will be able to support your pipeline
+properly. You can see it in action for both
+[Python-based](https://blog.dvc.org/how-data-scientists-can-improve-their-productivity)
+and
+[R-based](https://blog.dvc.org/r-code-and-reproducible-model-development-with-dvc)
+technical stacks.
+
+As such, DVC is going to be one of the tools you would enjoy to use if/when you
+embark on building continual analytical environment for your system or across
+your organization.
+
+## Continual Analytical Environment and DevOps
+
+Building a production pipeline is quite different from building a
+machine-learning prototype on a local laptop. Many teams and companies face the
+challenges there.
+
+At the bare minimum, the following requirements shall be met when you move your
+solution into production
+
+1. Periodic re-training of the models/algorithms
+
+2. Ease of re-deployment and configuration changes in the system
+
+3. Efficiency and high performance of real-time scoring the new out-of-sample
+   observations
+
+4. Availability of the monitor model performance over time
+
+5. Adaptive ETL and ability to manage new data feeds and transactional systems
+   as data sources for AI and machine learning tools
+
+6. Scaling to really big data operations
+
+7. Security and Authorized access levels to different areas of the analytical
+   systems
+
+8. Solid backup and recovery processes/tools
+
+This goes into the territory traditionally inhabited by DevOps. Data Scientists
+should ideally learn to handle the part of those requirements themselves or at
+least be informative consultants to classical DevOps gurus.
+
+DVC can help in many aspects of the production scenario above as it can
+orchestrate relevant tools and instruments through its scripting. In such a
+setup, DVC scripts will be sharable manifestation (and implementation) of your
+production pipeline where each step can be transparently reviewed, easily
+maintained, and changed as needed over time.
+
+## Will DevOps Be Captivating?
+
+If you are further interested in understanding the ever-proliferating role of
+DevOps in the modern Data Science and predictive analytics in business, there
+are good resources for your review below
+
+1. [DevOps For Data Science: Why Analytics Ops Is Key To Value](https://www.forbes.com/sites/teradata/2016/11/14/devops-for-data-science-why-analytics-ops-is-key-to-value/)
+   (Forbes, Nov 14, 2016)
+
+2. [Bridging the Gap Between Data Science and DevOps](https://www.packtpub.com/books/content/bridging-gap-between-data-science-and-devops)
+
+3. [Is DevOps Making Life Better for Data Scientists?](https://devops.com/devops-life-better-data-scientists/)
+
+By any mean, DVC is going to be a useful instrument to fill the multiple gaps
+between the classical in-lab old-school data science practices and growing
+demands of business to build solid DevOps processes and workflows to streamline
+mature and persistent data analytics.
diff --git a/content/blogs/2017-08-23-ml-model-ensembling-with-fast-iterations.md b/content/blogs/2017-08-23-ml-model-ensembling-with-fast-iterations.md
new file mode 100644
index 0000000000..e225c2e560
--- /dev/null
+++ b/content/blogs/2017-08-23-ml-model-ensembling-with-fast-iterations.md
@@ -0,0 +1,240 @@
+---
+title: ML Model Ensembling with Fast Iterations
+date: 2017-08-23
+description: >
+  Here we'll talk about tools that help tackling common technical challenges of
+  building pipelines for the ensemble learning.
+descriptionLong: >
+  In many real-world Machine Learning projects, there is a need to ensemble
+  complex models as well as maintain pipelines. As we will demonstrate, DVC is a
+  good tool that helps tackling common technical challenges of building
+  pipelines for the ensemble learning.
+picture: 2017-08-23/post-image.png
+author: george_vyshnya
+commentsUrl: https://discuss.dvc.org/t/ml-model-ensembling-with-fast-iterations/296
+tags:
+  - Best Practices
+  - Model Ensembling
+  - R
+  - Tutorial
+---
+
+In a model ensembling setup, the final prediction is a composite of predictions
+from individual machine learning algorithms. To make the best model composite,
+you have to try dozens of combinations of weights for the model set. It takes a
+lot of time to come up with the best one. That is why the iteration speed is
+crucial in the ML model ensembling. We are going to make our research
+reproducible by using [Data Version Control](http://dvc.org) tool -
+([DVC](http://dvc.org)). It provides the ability to quickly re-run and replicate
+the ML prediction result by executing just a single command `dvc repro`.
+
+As we will demonstrate, DVC is a good tool that helps tackling common technical
+challenges of building pipelines for the ensemble learning.
+
+## Project Overview
+
+In this case, we will build an R-based solution to attack the
+supervised-learning regression problem to predict win sales per
+[Predict Wine Sales](https://inclass.kaggle.com/c/pred-411-2016-04-u3-wine/)
+Kaggle competition.
+
+An ensemble prediction methodology will be used in the project. The weighted
+ensemble of three models will be implemented, trained, and predicted from
+(namely, these are Linear Regression, `GBM`, and `XGBoost`).
+
+![](../uploads/images/2017-08-23/ensemble-prediction-methodology.png)
+
+If properly designed and used, ensemble prediction can perform much better then
+predictions of individual machine learning models composing the ensemble.
+
+Prediction results will be delivered in a format of output CSV file that is
+specified in the requirements to the
+[Predict Wine Sales](https://inclass.kaggle.com/c/pred-411-2016-04-u3-wine/)
+Kaggle competition (so called Kaggle submission file).
+
+## Important Pre-Requisites
+
+In order to try the materials of this
+[repository](https://github.com/gvyshnya/DVC_R_Ensemble) in your environment,
+the following software should be installed on your machine
+
+- **_Python 3_** runtime environment for your OS (it is required to run DVC
+  commands in the batch files)
+
+- **_DVC_** itself (you can install it as a python package by simply doing the
+  standard command in your command line prompt: `pip install dvc`)
+
+- **_R_** **_3.4.x_** runtime environment for your OS
+
+- **_git_** command-line client application for your OS
+
+## Technical Challenges
+
+The technical challenges of building the ML pipeline for this project were to
+meet business requirements below
+
+- Ability to conditionally trigger execution of 3 different ML prediction models
+
+- Ability to conditionally trigger model ensemble prediction based on
+  predictions of those 3 individual models
+
+- Ability to specify weights of each of the individual model predictions in the
+  ensemble
+
+- Quick and fast redeployment and re-run of the ML pipeline upon frequent
+  reconfiguration and model tweaks
+
+- Reproducibility of the pipeline and forecasting results across the multiple
+  machines and team members
+
+The next sections below will explain how these challenges are addressed in the
+design of ML pipeline for this project.
+
+## ML Pipeline
+
+The ML pipeline for this project is presented in the diagram below
+
+![](../uploads/images/2017-08-23/ml-pipeline.png)
+
+As you can see, the essential implementation of the solution is as follows
+
+- [`preprocessing.R`](https://gist.github.com/gvyshnya/443424775b0150baac774cc6cf3cb1cc)
+  handles all aspects of data manipulations and pre-processing (reading training
+  and testing data sets, removing outliers, imputing NAs etc.) as well as stores
+  refined training and testing set data as new files to reuse by model scripts
+
+- 3 model scripts implement training and forecasting algorithms for each of the
+  models selected for this project
+  ([`LR.R`](https://gist.github.com/gvyshnya/7ec76316c24bc1b4f595ef1256f52d3a),
+  [`GBM.R`](https://gist.github.com/gvyshnya/50e5ea3efa9771d2e7cc121c2f1a04e4),
+  [`xgboost.R`](https://gist.github.com/gvyshnya/2e5799863f02fec652c194020da82dd3))
+
+- [`ensemble.R`](https://gist.github.com/gvyshnya/84379d6a68fd085fe3a26aabad453e55)
+  is responsible for the weighted ensemble prediction and the final output of
+  the Kaggle submission file
+
+- `config.R` is responsible for all of the conditional logic switches needed in
+  the pipeline (it is included as a source to all of modeling and ensemble
+  prediction scripts, to get this done)
+
+There is a special note about lack of feature engineering for this project. It
+was an intended specification related to the specifics of the dataset. The
+existing features were quite instrumental to predict the target values ‘as is’.
+Therefore it had been decided to follow the well-known
+[Pareto principle](https://en.wikipedia.org/wiki/Pareto_principle) (interpreted
+as “**_20% of efforts address 80% of issues_**”, in this case) and not to spend
+more time on it.
+
+**_Note_**: all `R` and batch files mentioned throughout this blog post are
+available online in a separate GitHub
+[repository](https://github.com/gvyshnya/DVC_R_Ensemble). You will be also able
+to review more details on the implementation of each of the machine learning
+prediction models there.
+
+### Pipeline Configuration Management
+
+All of the essential tweaks to conditional machine learning pipeline for this
+project is managed by a configuration file. For ease of its use across solution,
+it was implemented as an R code module (`config.R`), to be included to all model
+training and forecasting. Thus the respective parameters (assigned as R
+variables) will be retrieved by the runnable scripts, and the conditional logic
+there will be triggered respectively.
+
+This file is not intended to run from a command line (unlike the rest of the R
+scripts in the project).
+
+`gist:gvyshnya/918e94b06ebf222f6bb56ed26a5f44ee#config.R`
+
+### Why Do We Need DVC?
+
+As we all know, there is no way to build the ideal ML model with sound
+prediction accuracy from the very beginning. You will have to continuously
+adjust your algorithm/model implementations based on the cross-validation
+appraisal until you yield the blooming results. This is especially true in the
+ensemble learning where you have to constantly tweak not only parameters of the
+individual prediction models but also the settings of the ensemble itself
+
+- changing ensemble composition — adding or removing individual prediction
+  models
+
+- changing model prediction weights in the resulting ensemble prediction
+
+Under such a condition, DVC will help you to manage your ensemble ML pipeline in
+a really solid manner. Let’s consider the following real-world scenario
+
+- Your team member changes the settings of `GBM` model and resubmit its
+  implementation to (this is emulated by the commit
+  [#8604103f0](https://github.com/gvyshnya/DVC_R_Ensemble/commit/27825d0732f72f07e7e4e48548ddb8a8604103f0),
+  check sum `27825d0`)
+
+- You rerun the entire ML pipeline on your computer, to get the newest
+  predictions from `GBM` as well as the updated final ensemble prediction
+
+- The results of the prediction appeared to be still not optimal thus someone
+  changes the weights of individual models in the ensemble, assigning `GBM`
+  higher weight vs. `xgboost` and `LR`
+
+- After the ensemble setup changes committed (and updated `config.R` appeared in
+  the repository, as emulated by the commit
+  [#eb97612ce](https://github.com/gvyshnya/DVC_R_Ensemble/commit/5bcbe115afcb24886abb4734ff2da42eb97612ce),
+  check sum `5bcbe11`), you re-run the model predictions and the final ensemble
+  prediction on your machine once again
+
+All that you need to do to handle the changes above is simply to keep running
+your **DVC** commands per the script developed (see the section below). You do
+not have to remember or know explicitly the changes being made into the project
+codebase or its pipeline configuration. **DVC** will automatically check out
+latest changes from the repo as well as make sure it runs only those steps in
+the pipeline that were affected by the recent changes in the code modules.
+
+### Orchestrating the Pipeline : DVC Command File
+
+After we developed individual R scripts needed by different steps of our Machine
+Learning pipeline, we orchestrate it together using DVC.
+
+Below is a batch file illustrating how DVC manages steps of the machine learning
+process for this project
+
+`gist:gvyshnya/7f1b8262e3eb7a8b3c16dbfd8cf98644#dvc.bat`
+
+If you then further edit ensemble configuration setup in `code/config.R`, you
+can simply leverage the power of DVC as for automatic dependencies resolving and
+tracking to rebuild the new ensemble prediction as follows
+
+`gist:gvyshnya/9d80e51ba3d7aa5bd37d100ed82376ee`
+
+## Summary
+
+In this blog post, we worked through the process of building an ensemble
+prediction pipeline using DVC. The essential key features of that pipeline were
+as follows
+
+- **_reproducibility_** — everybody on a team can run it on their premise
+
+- **_separation of data and code_** — this ensured everyone always runs the
+  latest versions of the pipeline jobs with the most up-to-date ‘golden copy’ of
+  training and testing data sets
+
+The helpful side effect of using DVC was you stop keeping in mind what was
+changed on every step of modifying your project scripts or in the pipeline
+configuration. Due to it maintaining the dependencies graph (DAG) automatically,
+it automatically triggered the only steps that were affected by the particular
+changes, within the pipeline job setup. It, in turn, provides the capability to
+quickly iterate through the entire ML pipeline.
+
+> As DVC brings proven engineering practices to often suboptimal and messy ML
+> processes as well as helps a typical Data Science project team to eliminate a
+> big chunk of common
+> [DevOps overheads](https://blog.dataversioncontrol.com/data-version-control-in-analytics-devops-paradigm-35a880e99133),
+> I found it extremely useful to leverage DVC on the industrial data science and
+> predictive analytics projects.
+
+## Further Reading
+
+1. [Ensemble Learning and Prediction Introduction](https://en.wikipedia.org/wiki/Ensemble_learning)
+
+2. [Using DVC in Machine Learning projects in Python](https://blog.dataversioncontrol.com/data-version-control-beta-release-iterative-machine-learning-a7faf7c8be67)
+
+3. [Using DVC in Machine Learning projects in R](https://blog.dataversioncontrol.com/r-code-and-reproducible-model-development-with-dvc-1507a0e3687b)
+
+4. [Kaggle Ensembling Guide](https://mlwave.com/kaggle-ensembling-guide/)
diff --git a/content/blogs/2017-09-26-best-practices-of-orchestrating-python-and-r-code-in-ml-projects.md b/content/blogs/2017-09-26-best-practices-of-orchestrating-python-and-r-code-in-ml-projects.md
new file mode 100644
index 0000000000..0322f7d956
--- /dev/null
+++ b/content/blogs/2017-09-26-best-practices-of-orchestrating-python-and-r-code-in-ml-projects.md
@@ -0,0 +1,262 @@
+---
+title: Best practices of orchestrating Python and R code in ML projects
+date: 2017-09-26
+description: >
+  What is the best way to integrate R and Python languages in one data science
+  project? What are the best practices?
+descriptionLong: >
+  Today, data scientists are generally divided among two languages — some prefer
+  R, some prefer Python. I will try to find an answer to a question: “What is
+  the best way to integrate both languages in one data science project? What are
+  the best practices?”
+picture: 2017-09-26/post-image.jpg
+pictureComment: |
+  Image was taken from
+  [this](http://intersog.com/blog/r-and-python-for-data-science-worthy-opponents/)
+  page
+author: marija_ilic
+commentsUrl: https://discuss.dvc.org/t/best-practices-of-orchestrating-python-and-r-code-in-ml-projects/295
+tags:
+  - R
+  - Python
+  - Tutorial
+  - Best Practices
+---
+
+Beside Git and shell scripting additional tools are developed to facilitate the
+development of predictive model in a multi-language environments. For fast data
+exchange between R and Python let’s use binary data file format
+[Feather](https://blog.rstudio.com/2016/03/29/feather/). Another language
+agnostic tool [DVC](http://dvc.org) can make the research reproducible — let’s
+use DVC to orchestrate R and Python code instead of a regular shell scripts.
+
+## Machine learning with R and Python
+
+Both R and Python are having powerful libraries/packages used for predictive
+modeling. Usually algorithms used for classification or regression are
+implemented in both languages and some scientist are using R while some of them
+preferring Python. In an example that was explained in previous
+[tutorial](https://blog.dataversioncontrol.com/r-code-and-reproducible-model-development-with-dvc-1507a0e3687b)
+target variable was binary output and logistic regression was used as a training
+algorithm. One of the algorithms that could also be used for prediction is a
+popular [Random Forest algorithm](https://en.wikipedia.org/wiki/Random_forest)
+which is implemented in both programming languages. Because of performances it
+was decided that Random Forest classifier should be implemented in Python (it
+shows better performances than random forest package in R).
+
+## R example used for DVC demo
+
+We will use the same example from previous blog
+[story](https://blog.dataversioncontrol.com/r-code-and-reproducible-model-development-with-dvc-1507a0e3687b),
+add some Python codes and explain how Feather and DVC can simplify the
+development process in this combined environment.
+
+Let’s recall briefly the R codes from previous tutorial:
+
+![R Jobs](../uploads/images/2017-09-26/r-jobs.png)_R Jobs_
+
+Input data are StackOverflow posts — an XML file. Predictive variables are
+created from text posts — relative importance
+[tf-idf](https://en.wikipedia.org/wiki/Tf%E2%80%93idf) of words among all
+available posts is calculated. With tf-idf matrices target is predicted and
+lasso logistic regression for predicting binary output is used. AUC is
+calculated on the test set and AUC metric is used on evaluation.
+
+Instead of using logistic regression in R we will write Python jobs in which we
+will try to use random forest as training model. Train_model.R and evaluate.R
+will be replaced with appropriate Python jobs.
+
+R codes can be seen
+[here](https://blog.dataversioncontrol.com/r-code-and-reproducible-model-development-with-dvc-1507a0e3687b).
+
+Code for `train_model_Python.py` is presented below:
+
+`gist:Zoldin/b312897cc492608feef1eaeae7f6eabc#train_model_Python.py`
+
+Also here we are adding code for `evaluation_python_model.py`:
+
+`gist:Zoldin/9eef13632d0a9039fe9b0dba376516a4#evaluation_python_model.py`
+
+Let’s download necessary R and Python codes from above (clone the
+[Github](https://github.com/Zoldin/R_AND_DVC) repository):
+
+```dvc
+$ mkdir R_DVC_GITHUB_CODE
+$ cd R_DVC_GITHUB_CODE
+
+$ git clone https://github.com/Zoldin/R_AND_DVC
+```
+
+Our dependency graph of this data science project look like this:
+
+![R (marked red) and Python (marked pink) jobs in one project](../uploads/images/2017-09-26/our-dependency-graph.png)_R
+(marked red) and Python (marked pink) jobs in one project_
+
+Now lets see how it is possible to speed up and simplify process flow with
+Feather API and data version control reproducibility.
+
+## Feather API
+
+Feather API is designed to improve meta data and data interchange between R and
+Python. It provides fast import/export of data frames among both environments
+and keeps meta data information which is an improvement over data exchange via
+csv/txt file format. In our example Python job will read an input binary file
+that was produced in R with Feather api.
+
+Let’s install Feather library in both environments.
+
+For Python 3 on linux environment you can use cmd and pip3:
+
+```dvc
+$ sudo pip3 install feather-format
+```
+
+For R it is necessary to install feather package:
+
+```R
+install.packages(feather)
+```
+
+After successful installation we can use Feather for data exchange.
+
+Below is an R syntax for data frame export with Feather (featurization.R):
+
+```R
+library(feather)
+
+write_feather(dtm_train_tfidf,args[3])
+write_feather(dtm_test_tfidf,args[4])
+print("Two data frame were created with Feather - one for train and one for test data set")
+```
+
+Python syntax for reading feather input binary files (train_model_python.py):
+
+```python
+import feather as ft
+
+input = sys.argv[1]
+df = ft.read_dataframe(input)
+```
+
+## Dependency graph with R and Python combined
+
+The next question what we are asking ourselves is why do we need DVC, why not
+just use shell scripting? DVC automatically derives the dependencies between the
+steps and builds
+[the dependency graph (DAG)](https://en.wikipedia.org/wiki/Directed_acyclic_graph)
+transparently to the user. Graph is used for reproducing parts/codes of your
+pipeline which were affected by recent changes and we don’t have to think all
+the time what we need to repeat (which steps) with the latest changes.
+
+Firstly, with `dvc run` command we will execute all jobs that are related to our
+model development. In that phase DVC creates dependencies that will be used in
+the reproducibility phase:
+
+```dvc
+$ dvc import https://s3-us-west-2.amazonaws.com/dvc-public/data/tutorial/nlp/25K/Posts.xml.zip \
+            data/
+
+$ dvc run tar zxf data/Posts.xml.tgz -C data/
+
+$ dvc run Rscript code/parsingxml.R \
+                  data/Posts.xml data/Posts.csv
+
+$ dvc run Rscript code/train_test_spliting.R \
+                  data/Posts.csv 0.33 20170426 \
+                  data/train_post.csv data/test_post.csv
+
+$ dvc run Rscript code/featurization.R \
+                  data/train_post.csv \
+                  data/test_post.csv data/matrix_train.feather \
+                  data/matrix_test.feather
+
+$ dvc run python3 code/train_model_python.py \
+                  data/matrix_train.feather \
+                  20170426 data/model.p
+
+$ dvc run python3 code/evaluate_python_mdl.py \
+                  data/model.p data/matrix_test.feather \
+                  data/evaluation_python.txt
+```
+
+After this commands jobs are executed and included in DAG graph. Result (AUC
+metrics) is written in evaluation_python.txt file:
+
+```dvc
+$ cat data/evaluation_python.txt
+AUC: 0.741432
+```
+
+It is possible to improve our result with random forest algorithm.
+
+We can increase number of trees in the random forest classifier — from 100 to
+500:
+
+```python
+clf = RandomForestClassifier(n_estimators=500,
+                             n_jobs=2,
+                             random_state=seed)
+clf.fit(x, labels)
+```
+
+After commited changes (in `train_model_python.py`) with `dvc repro` command all
+necessary jobs for `evaluation_python.txt` reproduction will be re-executed. We
+don’t need to worry which jobs to run and in which order.
+
+```dvc
+$ git add .
+$ git commit
+[master a65f346] Random forest classifier — more trees added
+    1 file changed, 1 insertion(+), 1 deletion(-)
+
+$ dvc repro data/evaluation_python.txt
+
+Reproducing run command for data item data/model.p. Args: python3 code/train_model_python.py data/matrix_train.txt 20170426 data/model.p
+Reproducing run command for data item data/evaluation_python.txt. Args: python3 code/evaluate_python_mdl.py data/model.p data/matrix_test.txt data/evaluation_python.txt
+Data item “data/evaluation_python.txt” was reproduced.
+```
+
+Beside code versioning, DVC also cares about data versioning. For example, if we
+change data sets `train_post.csv` and `test_post.csv` (use different splitting
+ratio) DVC will know that data sets are changed and `dvc repro` will re-execute
+all necessary jobs for evaluation_python.txt.
+
+```dvc
+$ dvc run Rscript code/train_test_spliting.R \
+                  data/Posts.csv 0.15 20170426 \
+                  data/train_post.csv \
+                  data/test_post.csv
+```
+
+Re-executed jobs are marked with red color:
+
+![](../uploads/images/2017-09-26/re-executed-jobs.png)
+
+```dvc
+$ dvc run Rscript code/train_test_spliting.R \
+                  data/Posts.csv 0.15 20170426 \
+                  data/train_post.csv \
+                  data/test_post.csv
+
+$ dvc repro data/evaluation_python.txt
+
+Reproducing run command for data item data/matrix_train.txt. Args: Rscript — vanilla code/featurization.R data/train_post.csv data/test_post.csv data/matrix_train.txt data/matrix_test.txt
+Reproducing run command for data item data/model.p. Args: python3 code/train_model_python.py data/matrix_train.txt 20170426 data/model.p
+Reproducing run command for data item data/evaluation_python.txt. Args: python3 code/evaluate_python_mdl.py data/model.p data/matrix_test.txt data/evaluation_python.txt
+
+Data item “data/evaluation_python.txt” was reproduced.
+
+$ cat data/evaluation_python.txt
+AUC: 0.793145
+```
+
+New AUC result is 0.793145 which shows an improvement compared to previous
+iteration.
+
+## Summary
+
+In data science projects it is often used R/Python combined programming.
+Additional tools beside git and shell scripting are developed to facilitate the
+development of predictive model in a multi-language environments. Using data
+version control system for reproducibility and Feather for data interoperability
+helps you orchestrate R and Python code in a single environment.
diff --git a/content/blogs/2018-10-18-ml-best-practices-in-pytorch-dev-conf-2018.md b/content/blogs/2018-10-18-ml-best-practices-in-pytorch-dev-conf-2018.md
new file mode 100644
index 0000000000..cf1cdc194a
--- /dev/null
+++ b/content/blogs/2018-10-18-ml-best-practices-in-pytorch-dev-conf-2018.md
@@ -0,0 +1,157 @@
+---
+title: ML best practices in PyTorch dev conf 2018
+date: 2018-10-18
+description: >
+  In the Machine Learning (ML) field tools and techniques for best practices are
+  just starting to be developed.
+descriptionLong: >
+  In the Machine Learning (ML) field tools and techniques for best practices are
+  just starting to be developed. At the PyTorch developer conference (PTDC-18),
+  several speakers including **Jerome Pesenti, VP of AI from Facebook** and
+  **Andrej Karpathy, Director of Tesla AI** spoke about best practices for
+  machine learning development.
+picture: 2018-10-18/post-image.jpeg
+pictureComment: |
+  The image source:
+  [link](https://blog.hubspot.com/customers/bid/109553/5-Homepage-Design-Best-Practices)
+author: dmitry_petrov
+commentsUrl: https://discuss.dvc.org/t/ml-best-practices-in-pytorch-dev-conf-2018/294
+tags:
+  - Machine Learning
+  - Best Practices
+  - PyTorch
+  - PTDC-18
+  - Company
+---
+
+The issues discussed included applying traditional software development
+techniques like unit testing, CI/CD systems, automated deployment, version
+control, and more to the ML field. In this blog post, we will go over the best
+practices ideas from PTDC-18 and the future of ML tool developments.
+
+## 1. Engineering practices from PyTorch developers
+
+In the PTDC-18
+[keynote speech](https://www.facebook.com/pytorch/videos/482401942168584/),
+**Jerome Pesenti** described the motivation and goals of PyTorch project and
+what the future of machine learning looks like.
+
+### 1.1. ML tooling future
+
+Regarding the future of ML, Jerome envisioned a “streamlined development, more
+accessible tools, breakthrough hardware, and more”. Talking about the gap huge
+gap between software engineering and ML engineering, Presenti said:
+
+> Machine learning engineering is where we were in Software Engineering 20 years
+> ago. A lot of things still need to be invented. We need to figure out what
+> testing means, what CD (continuous delivery) means, we need to develop tools
+> and environments that people can develop **robust ML that does not have too
+> many biases** and does not overfit.
+
+In that gap lives many opportunities to develop new tools and services. We in
+the ML ecosystem are called upon to implement the future of machine learning
+tools. Traditional software engineering has many useful tools and techniques
+which can either be repurposed for Machine Learning development or used as a
+source for ideas in developing new tools.
+
+### 1.2. PyTorch motivation
+
+PyTorch 1.0 implements one important engineering principle — “a seamless
+transition from AI research to production”. It helps to move AI technology from
+research into production as quickly as possible. In order to do that a few
+challenges were solved:
+
+1. **Write code once** — not have to rewrite or re-optimize code to go from
+   research to prod.
+
+1. **Performance** — training model on large datasets.
+
+1. **Other languages** — not only Python which is great for prototyping but also
+   C++ and other languages.
+
+1. **Scaling** — deploy PyTorch at scale more easily.
+
+## 2. Engineering practices for software 2.0
+
+### 2.1. Melting of software 2.0 and software 1.0
+
+**Andrej Karpathy** from Tesla AI had a
+[dedicated talk](https://www.facebook.com/pytorch/videos/169366590639145/) about
+best engineering practices in ML. He drew a contrast between traditional
+software development (software 1.0) with software utilizing Machine Learning
+techniques (software 2.0), saying that
+
+> “software 2.0 code also has new feature demands, contains bugs, and requires
+> iterations.”
+
+Meaning that ML development has a lifecycle similar to traditional software:
+
+> “When you are working with these [neural] networks **in production** you are
+> doing much more than that [training and measuring models]. You maintaining the
+> codebase and that codebase is alive is just like 1.0 code.”
+
+Machine Learning models need to grow and develop feature-by-feature, bugs need
+to be found and fixed, and repeatable processes are a must, as in earlier non-ML
+software development practices.
+
+### 2.2. Software 2.0 best practices
+
+Karpathy went on to describe how software 1.0 best practices can be used in
+software 2.0 (ML modeling):
+
+1. **Test-driven development** — test/train dataset separation is not enough
+   since it describes only expected performance. Edge cases have to be tested to
+   ensure the model performs as required. That requires incorporating more
+   examples in datasets, or changing model architecture, or changing
+   optimization functions.
+
+1. **Continues Integration and Continues Delivery** (CI/CD) — Intelligently used
+   of CI/CD can propel a team into rapid agile development of software systems.
+   The phases of CI/CD jobs include: 1) ML model auto re-training when code or
+   dataset changes; 2) running unit-tests; 3) easy access to the last model; 4)
+   Auto-deployment to test and/or production systems.
+
+1. **Version Control** — track all the changes in datasets (labels), not only
+   code.
+
+1. Train a **single model** from scratch every time without using other
+   pre-trained models. (External pre-trained models don’t count as far as I
+   understand.) A chain of fine-tuning models very quickly disintegrates
+   codebase. In software 1.0 a single **monorepo** is an analog of a single
+   model which also helps to avoid disintegration.
+
+This list of best practices shows how serious Tesla AI is about robust software
+which is not surprising for self-driving car area. Any company needs these
+practices in order to organize a manageable ML development process.
+
+## 3. Data file-centric tools
+
+Frameworks and libraries like PyTorch make a significant step in machine
+learning tooling and bringing the best practices. However, frameworks and
+libraries might be not enough for many of the ML best practices. For example,
+dataset versioning, ML model versioning, continuous integration (CI) and
+continuous delivery (CD) requires manipulation and transferring data files.
+These can be done in a **more efficient and natural way by data management
+tools** and storage systems rather than libraries.
+
+The need for a machine learning artifact manipulation tool with **data
+file-centric philosophy** was the major motivation behind open source project
+that we created — Data Version Control (DVC) or [DVC.org](http://dvc.org).
+
+DVC connects Git with data files and machine learning pipelines which helps keep
+version control on machine learning models and datasets using familiar Git
+semantics coupled with the power of cloud storage systems such as Amazon’s S3,
+Google’s GCS, Microsoft’s Azure or bare-metal servers accessed by SSH.
+
+If PyTorch helps in organizing code inside an ML project then data-centric tools
+like DVC help organized different pieces of ML projects into a single workflow.
+The machine learning future requires both types of tools — code level and data
+file level.
+
+## Conclusion
+
+Thus far only the first steps have been taken toward using machine learning
+tooling and the best machine learning practices. Mostly large companies are
+using these practices because they faced the problems a while ago. Best
+practices should be embraced by the entire industry which will help to bring
+machine learning to a higher new level.
diff --git a/content/blogs/2019-03-05-march-19-dvc-heartbeat.md b/content/blogs/2019-03-05-march-19-dvc-heartbeat.md
new file mode 100644
index 0000000000..c2cb188612
--- /dev/null
+++ b/content/blogs/2019-03-05-march-19-dvc-heartbeat.md
@@ -0,0 +1,164 @@
+---
+title: March ’19 DVC❤️Heartbeat
+date: 2019-03-05
+description: >
+  The very first issue of the DVC Heartbeat! News, links, Discord discussions
+  from the community.
+descriptionLong: >
+  Every month we are sharing here our news, findings, interesting reads,
+  community takeaways, and everything along the way.
+
+  Some of those are related to our brainchild [DVC](https://dvc.org) and its
+  journey. The others are a collection of exciting stories and ideas centered
+  around ML best practices and workflow.
+picture: 2019-03-05/post-image.jpeg
+author: svetlana_grinchenko
+commentsUrl: https://discuss.dvc.org/t/march-19-dvc-heartbeat/293
+tags:
+  - Heartbeat
+  - Community
+---
+
+This is the very first issue of the DVC❤️Heartbeat. Every month we will be
+sharing our news, findings, interesting reads, community takeaways, and
+everything along the way.
+
+Some of those are related to our brainchild [DVC](https://dvc.org) and its
+journey. The others are a collection of exciting stories and ideas centered
+around ML best practices and workflow.
+
+## News and links
+
+We read a ton of articles and posts every day and here are a few that caught our
+eye. Well-written, offering a different perspective and definitely worth
+checking.
+
+- **[Data science is different now](https://veekaybee.github.io/2019/02/13/data-science-is-different/)
+  by [Vicki Boykis](https://veekaybee.github.io/)**
+
+<external-link
+href="https://veekaybee.github.io/2019/02/13/data-science-is-different/"
+title="Data science is different now"
+description="Woman holding a balance, Vermeer 1664 What do you think of when you read the phrase 'data science'? It's probably some…"
+link="veekaybee.github.io"
+image="../uploads/images/2019-03-05/data-science-is-different-now.png" />
+
+> What is becoming clear is that, in the late stage of the hype cycle, data
+> science is asymptotically moving closer to engineering, and the
+> [skills that data scientists need](https://www.youtube.com/watch?v=frQeK8xo9Ls)
+> moving forward are less visualization and statistics-based, and
+> [more in line with traditional computer science curricula](https://tech.trivago.com/2018/12/03/teardown-rebuild-migrating-from-hive-to-pyspark/).
+
+- **[Data Versioning](https://emilygorcenski.com/post/data-versioning/) by
+  [Emily F. Gorcenski](https://emilygorcenski.com/)**
+
+<external-link
+href="https://emilygorcenski.com/post/data-versioning/"
+title="Data Versioning"
+description="Productionizing machine learning/AI/data science is a challenge. Not only are the outputs of machine-learning…"
+link="emilygorcenski.com"
+image="../uploads/images/2019-03-05/data-versioning.jpeg" />
+
+> I want to explore how the degrees of freedom in versioning machine learning
+> systems poses a unique challenge. I’ll identify four key axes on which machine
+> learning systems have a notion of version, along with some brief
+> recommendations for how to simplify this a bit.
+
+- **[Reproducibility in Machine Learning](https://blog.mi.hdm-stuttgart.de/index.php/2019/02/26/reproducibility-in-ml/)
+  by [Pascal Fecht](https://blog.mi.hdm-stuttgart.de/index.php/author/pf023/)**
+
+<external-link
+href="https://emilygorcenski.com/post/data-versioning/"
+title="Reproducibility in Machine Learning | Computer Science Blog"
+description="The rise of Machine Learning has led to changes across all areas of computer science. From a very abstract point of…"
+link="blog.mi.hdm-stuttgart.de"
+image="../uploads/images/2019-03-05/reproducibility-in-machine-learning.jpeg" />
+
+> ...the objective of this post is not to philosophize about the dangers and
+> dark sides of AI. In fact, this post aims to work out common challenges in
+> reproducibility for machine learning and shows programming differences to
+> other areas of Computer Science. Secondly, we will see practices and workflows
+> to create a higher grade of reproducibility in machine learning algorithms.
+
+<hr />
+
+## Discord gems
+
+There are lots of hidden gems in our Discord community discussions. Sometimes
+they are scattered all over the channels and hard to track down.
+
+We will be sifting through the issues and discussions and share the most
+interesting takeaways.
+
+### Q: [Edit and define DVC files manually, in a Makefile style](https://discordapp.com/channels/485586884165107732/485586884165107734/541622187296161816)
+
+There is no separate guide for that, but it is very straight forward. See
+[DVC file format](https://dvc.org/doc/user-guide/project-structure) description
+for how DVC file looks inside in general. All `dvc add` or `dvc run` does is
+just computing `md5` fields in it, that is all. You could write your DVC-file
+and then run `dvc repro` that will run a command(if any) and compute all needed
+checksums,[read more](https://discordapp.com/channels/485586884165107732/485586884165107734/541622187296161816).
+
+### Q: [Best practices to define the code dependencies](https://discordapp.com/channels/485586884165107732/485586884165107734/547424240677158915)
+
+There’s a ton of code in that project, and it’s very non-trivial to define the
+code dependencies for my training stage — there are a lot of imports going on,
+the training code is distributed across many modules,
+[read more](https://discordapp.com/channels/485586884165107732/485586884165107734/547424240677158915)
+
+### Q: [Azure data lake support](https://discordapp.com/channels/485586884165107732/485586884165107734/548495589428428801)
+
+DVC officially only supports regular Azure blob storage. Gen1 Data Lake should
+be accessible by the same interface, so configuring a regular azure remote for
+DVC should work. Seems like Gen2 Data Lake
+[has disable](https://discordapp.com/channels/485586884165107732/485586884165107734/550546413197590539)
+blob API. If you know more details about the difference between Gen1 and Gen2,
+feel free to join [our community](https://dvc.org/chat) and share this
+knowledge.
+
+### Q: [What licence DVC is released under](https://discordapp.com/channels/485586884165107732/485596304961962003/542390986299539459)
+
+Apache 2.0. One of the [most common](https://opensource.org/licenses) and
+permissible OSS licences.
+
+### Q: Setting up S3 compatible remote
+
+([Localstack](https://discordapp.com/channels/485586884165107732/485596304961962003/543445798868746278),
+[wasabi](https://discordapp.com/channels/485586884165107732/485596304961962003/541466951474479115))
+
+```dvc
+$ dvc remote add upstream s3://my-bucket
+$ dvc remote modify upstream region REGION_NAME
+$ dvc remote modify upstream endpointurl <url>
+```
+
+Find and click the `S3 API compatible storage` on
+[this page](https://dvc.org/doc/commands-reference/remote/add)
+
+### Q: [Why DVC creates and updates `.gitignore` file?](https://discordapp.com/channels/485586884165107732/485596304961962003/543914550173368332)
+
+It adds your data files there, that are tracked by DVC, so that you don’t
+accidentally add them to git as well you can open it with file editor of your
+liking and see your data files listed there.
+
+### Q: [Managing data and pipelines with DVC on HDFS](https://discordapp.com/channels/485586884165107732/485596304961962003/545562334983356426)
+
+With DVC, you could connect your data sources from HDFS with your pipeline in
+your local project, by simply specifying it as an external dependency. For
+example let’s say your script `process.cmd` works on an input file on HDFS and
+then downloads a result to your local workspace, then with DVC it could look
+something like:
+
+```dvc
+$ dvc run -d hdfs://example.com/home/shared/input \
+          -d process.cmd \
+          -o output process.cmd
+```
+
+[read more](https://discordapp.com/channels/485586884165107732/485596304961962003/545562334983356426).
+
+<hr />
+
+If you have any questions, concerns or ideas, let us know
+[here](https://dvc.org/support) and our stellar team will get back to you in no
+time.
diff --git a/content/blogs/2019-04-18-april-19-dvc-heartbeat.md b/content/blogs/2019-04-18-april-19-dvc-heartbeat.md
new file mode 100644
index 0000000000..6e76dee5c6
--- /dev/null
+++ b/content/blogs/2019-04-18-april-19-dvc-heartbeat.md
@@ -0,0 +1,264 @@
+---
+title: April ’19 DVC❤️Heartbeat
+date: 2019-04-18
+description: >
+  DVC creator Dmitry Petrov is giving a talk on PyCon 2019 🎤, new DVC logo
+  design, new Discord discussions, interesting reads that caught our eye, and
+  everything along the way.
+descriptionLong: >
+  Every month we are sharing here our news, findings, interesting reads,
+  community takeaways, and everything along the way.
+
+  Some of those are related to our brainchild [DVC](https://dvc.org) and its
+  journey. The others are a collection of exciting stories and ideas centered
+  around ML best practices and workflow.
+picture: 2019-04-18/post-image.jpeg
+author: svetlana_grinchenko
+commentsUrl: https://discuss.dvc.org/t/april-19-dvc-heartbeat/292
+tags:
+  - Heartbeat
+  - Community
+  - PyCon
+---
+
+## News and links
+
+We have some exciting news to share this month!
+
+DVC is going to [PyCon 2019](https://us.pycon.org/2019/)! It is the first
+conference that we attend as a team. When we say ‘team’ — we mean it. Our
+engineers are flying from all over the globe to get together offline and catch
+up with fellow Pythonistas.
+
+The [speaker pipeline](https://us.pycon.org/2019/schedule/talks/list/) is
+amazing! DVC creator Dmitry Petrov is giving a talk on
+[Machine learning model and dataset versioning practices](https://us.pycon.org/2019/schedule/presentation/176/).
+
+Stop by our booth at the Startup Row on Saturday, May 4, reach out and let us
+know that you are willing to chat, or simply find a person with a huge DVC owl
+on their shirt!
+
+Speaking of the owls — DVC has done some rebranding recently and we love our new
+logo. Special thanks to [99designs.com](https://99designs.com/) for building a
+great platform for finding trusted designers.
+
+![](../uploads/images/2019-04-18/trusted-designers.png)
+
+DVC is moving fast (almost as fast as my two-year-old). We do our best to keep
+up and totally love all the buzz in our community channels lately!
+
+Here is a number of interesting reads that caught our eye:
+
+- **[A walkthrough of DVC](https://blog.codecentric.de/en/2019/03/walkthrough-dvc/)
+  by [Bert Besser](https://www.linkedin.com/in/bert-besser-284564182/)**
+
+<external-link
+href="https://blog.codecentric.de/en/2019/03/walkthrough-dvc/"
+title="A walkthrough of DVC — codecentric AG Blog"
+description="This post is on how to systematially organize Machine Learning (ML) model development. A model’s performance improves…"
+link="blog.codecentric.de"
+image="../uploads/images/2019-04-18/walkthrough-of-dvc.png" />
+
+A great article about using DVC with a quite advanced scenario and docker. If
+you haven’t had a chance to try [DVC.org](http://dvc.org/) yet — this is a great
+comprehensive read on why you should do so right away.
+
+- **[The state of machine learning operations](https://github.com/EthicalML/state-of-mlops-2019)
+  by [Alejandro Saucedo](https://www.linkedin.com/in/axsaucedo/)**
+
+<external-link
+href="https://github.com/EthicalML/state-of-mlops-2019"
+title="The state of machine learning operations"
+description="Contribute to EthicalML/state-of-mlops-2019 development by creating an account on GitHub."
+link="github.com"
+image="../uploads/images/2019-04-18/the-state-of-machine-learning-operations.jpeg" />
+
+A short (only 8 minutes!) and inspiring talk by Alejandro Saucedo at FOSDEM.
+Alejandro covers the key trends in machine learning operations, as well as most
+recent open source tools and frameworks. Focused on reproducibility, monitoring
+and explainability, this lightning talk is a great snapshot of the current state
+of ML operations.
+
+- **[Interview with Kaggle Grandmaster, Senior Computer Vision Engineer at Lyft: Dr. Vladimir I. Iglovikov](https://hackernoon.com/interview-with-kaggle-grandmaster-senior-cv-engineer-at-lyft-dr-vladimir-i-iglovikov-9938e1fc7c)
+  by [Sanyam Bhutani](https://twitter.com/bhutanisanyam1)**
+
+<external-link
+href="https://hackernoon.com/interview-with-kaggle-grandmaster-senior-cv-engineer-at-lyft-dr-vladimir-i-iglovikov-9938e1fc7c"
+title="Interview with Kaggle Grandmaster, Senior Computer Vision Engineer at Lyft: Dr. Vladimir I. Iglovikov"
+description="Part 24 of The series where I interview my heroes."
+link="hackernoon.com"
+image="../uploads/images/2019-04-18/interview-with-kaggle-grandmaster.jpeg" />
+
+> There is no way you will become Kaggle Master and not learn how to approach
+> anew, the unknown problem in a fast hacking way with a very high number of
+> iterations per unit of time. This skill in the world of competitive learning
+> is the question of survival
+
+<hr />
+
+## Discord gems
+
+There are lots of hidden gems in our Discord community discussions. Sometimes
+they are scattered all over the channels and hard to track down.
+
+We are sifting through the issues and discussions and share with you the most
+interesting takeaways.
+
+### Q: [What are the system requirements to install DVC (type of operating system, dependencies of another application (as GIT), memory, cpu, etc).](https://discordapp.com/channels/485586884165107732/485596304961962003/552098155861114891)
+
+- It supports Windows, Mac, Linux. Python 2 and 3.
+
+- No specific CPU or RAM requirements — it’s a lightweight command line tool and
+  should be able run pretty much everywhere you can run Python.
+
+- It depends on a few Python libraries that it installs as dependencies (they
+  are specified in the
+  [`setup.py`](https://github.com/iterative/dvc/blob/master/setup.py)).
+
+- It does not depend on Git and theoretically could be run without any SCM.
+  Running it on top of a Git repository however is recommended and gives you an
+  ability to actually save history of datasets, models, etc (even though it does
+  not put them into Git directly).
+
+### Q: [Do I have to buy a server license to run DVC, do you have this?](https://discordapp.com/channels/485586884165107732/485596304961962003/560212552638791706)
+
+No server licenses for DVC. It is 100% free and open source.
+
+### Q: [What is the storage limit when using DVC?](https://discordapp.com/channels/485586884165107732/485596304961962003/560154903331340289)
+
+I am trying to version control datasets and models with >10 GB (Potentially even
+bigger). Can DVC handle this?
+
+There is no limit. None enforced by DVC itself. It depends on the size of your
+local or [remote storages](https://dvc.org/doc/commands-reference/remote). You
+need to have some space available on S3, your SSH server or other storage you
+are using to keep these data files, models and their version, which you would
+like to store.
+
+### Q: [How does DVC know the sequence of stages to run](https://discordapp.com/channels/485586884165107732/485596304961962003/553731815228178433)?
+
+How does it connect them? Does it see that there is a dependency which is
+outputted from the first run?
+
+DVC figures out the pipeline by looking at the dependencies and outputs of the
+stages. For example, having the following:
+
+`gist:SvetaGr/a2a28fbc9db0a675422785bc5f925e14#heartbeat-dvc-run-2019-04.sh`
+
+you will end up with two stages: `download.dvc` and `duplicate.dvc`. The
+download one will have `joke.txt` as an output . The duplicate one defined
+`joke.txt` as a dependency, as it is the same file. DVC detects that and creates
+a pipeline by joining those stages.
+
+You can inspect the content of each stage file
+[here](https://dvc.org/doc/user-guide/project-structure) (they are human
+readable).
+
+### Q: [Is it possible to use the same data of a remote in two different repositories?](https://discordapp.com/channels/485586884165107732/485596304961962003/560022999848321026)
+
+(e.g. in one repo `run dvc pull -r my_remote` to pull some data and running the
+same command in a different git repo should also pull the same)
+
+Yes! It’s a frequent scenario for multiple repos to share remotes and even local
+cache. DVC file serves as a link to the actual data. If you add the same DVC
+file (e.g. `data.dvc`) to the new repo and do `dvc pull -r remotename data.dvc`-
+it will fetch data. You have to use `dvc remote add` first to specify the
+coordinates of the remote storage you would like to share in every project.
+Alternatively (check out the question below), you could use `--global` to
+specify a single default remote (and/or cache dir) per machine.
+
+### Q: [Could I set a global remote server, instead of config in each project?](https://discordapp.com/channels/485586884165107732/485586884165107734/559653121228275727)
+
+Use `--global` when you specify the remote settings. Then remote will be visible
+for all projects on the same machine. `--global` — saves remote configuration to
+the global config (e.g. `~/.config/dvc/config`) instead of a per project one —
+`.dvc/config`. See more details
+[here](https://dvc.org/doc/commands-reference/remote/add).
+
+### Q: [How do I version a large dataset in S3 or any other storage?](https://discordapp.com/channels/485586884165107732/485596304961962003/554679392823934977)
+
+We would recommend to skim through our
+[get started](https://dvc.org/doc/get-started) tutorial, to summarize the data
+versioning process of DVC:
+
+- You create stage (aka DVC) files by adding, importing files (`dvc add` /
+  `dvc import`) , or run a command to generate files:
+
+```dvc
+$ dvc run --out file.csv "wget https://example.com/file.csv"
+```
+
+- This stage files are tracked by `git`
+
+- You use git to retrieve previous stage files (e.g. `git checkout v1.0`)
+
+- Then use `dvc checkout` to retrieve all the files related by those stage files
+
+All your files (with each different version) are stored in a `.dvc/cache`
+directory, that you sync with a remote file storage (for example, S3) using the
+`dvc push` or `dvc pull` commands (analogous to a `git push` / `git pull`, but
+instead of syncing your `.git`, you are syncing your `.dvc` directory) on a
+remote repository (let’s say an S3 bucket).
+
+### Q: [How do I move/rename a DVC-file?](https://discordapp.com/channels/485586884165107732/485596304961962003/558216007684980736)
+
+If you need to move your dvc file somewhere, it is pretty easy, even if done
+manually:
+
+`gist:SvetaGr/b25a5b45773bf94d36e60d48462502f4#heartbeat-dvc-rename.sh`
+
+### Q: [I performed `dvc push` of a file to a remote. On the remote there is created a directory called `8f` with a file inside called `2ec34faf91ff15ef64abf3fbffa7ee`. The original CSV file doesn’t appear on the remote. Is that expected behaviour?](https://discordapp.com/channels/485586884165107732/485596304961962003/555431645402890255)
+
+This is an expected behavior. DVC saves files under the name created from their
+checksum in order to prevent duplication. If you delete “pushed” file in your
+project directory and perform `dvc pull`, DVC will take care of pulling the file
+and renaming it to “original” name.
+
+Below are some details about how DVC cache works, just to illustrate the logic.
+When you add a data source:
+
+`gist:SvetaGr/b69fa8ce36bcce00ecd69e7f2d7ccd2e#heartbeat-remote-file-naming.sh`
+
+It computes the (md5) checksum of the file and generates a DVC file with related
+information:
+
+`gist:SvetaGr/110ae76df929654ec573ea9e4b1e1980#heartbeat-dvc-file-2019-04.yaml`
+
+The original file is moved to the cache and a link or copy (depending on your
+filesystem) is created to replace it on your working space:
+
+`gist:SvetaGr/133cb93e5a21c6f21a86f8709ed39ea9#heartbeat-cache-structure-2019-04.sh`
+
+### Q: [Is it possible to integrate dvc with our in-house tools developed in Python?](https://discordapp.com/channels/485586884165107732/485586884165107734/553570391000481802)
+
+Absolutely! There are three ways you could interact with DVC:
+
+1. Use [subprocess](https://docs.python.org/3/library/subprocess.html) to launch
+   DVC
+
+2. Use `from dvc.main import main` and use it with regular CLI logic like
+   `ret = main(‘add’, ‘foo’)`
+
+3. Use our internal API (see `dvc/repo` and `dvc/command` in our source to get a
+   grasp of it). It is not officially public yet, and we don’t have any special
+   docs for it, but it is fairly stable and could definitely be used for a POC.
+   We’ll add docs and all the official stuff for it in the not-so-distant
+   future.
+
+### Q: [Can I still track the linkage between data and model without using `dvc run`](https://discordapp.com/channels/485586884165107732/485586884165107734/555750217522216990) and a graph of tasks? Basically what would like extremely minimal DVC invasion into my GIT repo for an existing machine learning application?
+
+There are two options:
+
+1. Use `dvc add` to track models and/or input datasets. It should be enough if
+   you use `git commit` on DVC files produced by `dvc add`. This is the very
+   minimum you can get with DVC and it does not require using DVC run. Check the
+   first part (up to the Pipelines/Add transformations section) of the DVC
+   [get started](https://dvc.org/doc/get-started).
+
+2. You could use `--no-exec` in `dvc run` and then just `dvc commit` and
+   `git commit` the results. That way you’ll get your DVC files with all the
+   linkages, without having to actually run your commands through DVC.
+
+If you have any questions, concerns or ideas, let us know
+[here](https://dvc.org/support) and our stellar team will get back to you in no
+time.
diff --git a/content/blogs/2019-04-23-dvc-project-ideas-for-google-summer-of-docs-2019.md b/content/blogs/2019-04-23-dvc-project-ideas-for-google-summer-of-docs-2019.md
new file mode 100644
index 0000000000..525bc52c8a
--- /dev/null
+++ b/content/blogs/2019-04-23-dvc-project-ideas-for-google-summer-of-docs-2019.md
@@ -0,0 +1,227 @@
+---
+title: DVC project ideas for Google Season of Docs 2019
+date: 2019-04-23
+description: >
+  DVC.org is applying for Google Season of Docs — a new and unique program
+  sponsored by Google that pairs technical writers with open source projects to
+  collaborate on the open source project documentation.
+descriptionLong: >
+  [DVC.org](https://dvc.org) is applying for [Google Season of
+  Docs](https://developers.google.com/season-of-docs/) — a new and unique
+  program sponsored by Google that pairs technical writers with open source
+  projects to collaborate on the open source project documentation.
+
+  It’s happening for the first time in 2019 and we are excited about the
+  opportunity to be a part of it!
+picture: 2019-04-23/post-image.png
+author: svetlana_grinchenko
+commentsUrl: https://discuss.dvc.org/t/dvc-project-ideas-for-google-season-of-docs-2019/291
+tags:
+  - Google Season of Docs
+  - Python
+  - Documentation
+  - Company
+---
+
+We strongly believe that well-shaped documentation is key for making the product
+truly open. We have been investing lots of time and energy in improving our docs
+lately. Being a team of 90% engineers we are eager to welcome the writers into
+our team and our community. We are happy to share our experience, introduce them
+to the world of open source and machine learning best practices, guide through
+the OS contribution process and work together on improving our documentation.
+
+DVC was started in late 2017 by a data scientist and an engineer. It is now
+growing pretty fast and though our in-house team is quite small, we have to
+thank our contributors (more than 80 in both code and docs) for developing DVC
+with us. When working with DVC the technical writer will not only get lots of
+hands-on experience in writing technical docs, but will also immerse into DVC
+community — a warm and welcoming gathering of ML and DS enthusiasts and an
+invaluable source of inspiration and expertise in ML engineering.
+
+### About DVC
+
+DVC is a brainchild of a data scientist and an engineer, that was created to
+fill in the gaps in the ML processes tooling and evolved into a successful open
+source project.
+
+ML brings changes in development and research processes. These ML processes
+require new tools for data versioning, ML pipeline versioning, resource
+management for model training and others that haven’t been formalized. The
+traditional software development tools do not fully cover ML team’s needs but
+there are no good alternatives. It makes engineers to custom develop a new
+toolset to manage data files, keep track of ML experiments and connect data and
+source code together. The ML process becomes very fragile and requires tons of
+tribal knowledge.
+
+We have been working on [DVC](http://DVC.org) by adopting best ML practices and
+turning them into Git-like command line tool. DVC versions multi-gigabyte
+datasets and ML models, make them shareable and reproducible. The tool helps to
+organize a more rigorous process around datasets and the data derivatives. Your
+favorite cloud storage (S3, GCS, or bare metal SSH server) could be used with
+DVC as a data file backend.
+
+If you are interested in learning a little bit more about DVC and its journey,
+here is a great interview with DVC creator in the Episode 206 of
+Podcast.**init**. Listen to it
+[HERE ](https://www.pythonpodcast.com/data-version-control-episode-206/)or read
+the transcript
+[HERE.](https://towardsdatascience.com/data-version-control-with-dvc-what-do-the-authors-have-to-say-3c3b10f27ee)
+
+### The state of DVC documentation
+
+DVC is a pretty young project, developed and maintained solely by engineers. As
+many OS projects we started from the bottom and for a long time our
+[documentation](https://dvc.org/doc) was a bunch of bits and pieces. Nowadays
+improving documentation is one of our top priorities. We moved to the new
+in-house built documentation engine and started working with several technical
+writers. Certain parts have been tremendously improved recently, e.g.
+[Get Started](https://dvc.org/doc/get-started) and
+[certain parts of Commands Reference](https://dvc.org/doc/commands-reference/fetch)
+. So far most of our documentation has been written majorly by the engineering
+team and there is need for improving the overall structure and making some parts
+more friendly from a new user perspective. We have mostly complete
+[reference documentation](https://dvc.org/doc/commands-reference) for each
+command, although some functions are missing good actionable examples. We also
+have a [User Guide](https://dvc.org/doc/user-guide), however it is not in very
+good shape. We strive for making our documentation clear and comprehensive for
+users of various backgrounds and proficiency levels and this is where we do need
+some fresh perspective.
+
+### How DVC documentation is built
+
+We have an open Github Apache-2 licensed repository for the
+[DVC website](https://github.com/iterative/dvc.org), the documentation engine
+and the [documentation files](https://github.com/iterative/dvc.org). The website
+is built with Node.js + React, including the documentation engine (built
+in-house).
+
+Each documentation page is a static Markdown file in the repository, e.g.
+[example here](https://github.com/iterative/dvc.org/blob/main/content/docs/command-reference/index.md).
+It is rendered dynamically in the browser, no preprocessing is required. It
+means that tech writers or contributors need to write/edit a Markdown file,
+create a pull request and merge it into the master branch of the
+[repository.](https://github.com/iterative/dvc.org) The complete
+[documentation contributing guide](https://github.com/iterative/dvc.org/blob/main/README.md#contributing)
+describes the directory structure and locations for the different documentation
+parts.
+
+### DVC’s approach to documentation work
+
+Documentation tasks and issues are maintained on our doc’s GitHub
+[issue tracker](https://github.com/iterative/dvc.org/issues). Changes to the
+documentation are made via pull requests on GitHub, and go through our standard
+review process which is the same for documentation and code. A technical writer
+would be trained in working with our current development process. It generally
+means that tech writers or contributors need to write/edit a Markdown file, use
+git and Github to create a pull request and publish it. The documentation
+[contributing guide](https://github.com/iterative/dvc.org/blob/main/README.md#contributing)
+includes style conventions and other details. Documentation is considered of the
+same importance as code. Engineering team has a policy to write or update the
+relevant sections if something new is released. If it’s something too involved
+engineers may create a ticket and ask for help. There is one maintainer who is
+responsible for doing final reviews and merging the changes. In this sense, our
+documentation is very similar to any other open source project.
+
+## Project ideas for GSoD’19
+
+We identified a number of ideas to work on and there are two major topics these
+ideas fall into. Both topics are pretty broad and we don’t expect we can
+completely cover them during this GSoD but hopefully we can make certain
+progress.
+
+First of all, we want to bring more structure and logic to our documentation to
+improve user onboarding experience. The goal is for a new user to have a clear
+path they can follow and understand what takeaways each part of the
+documentation provides. In particular, improving how
+[Get Started](https://dvc.org/doc/get-started),
+[Tutorials](https://dvc.org/doc/tutorial) and
+[Examples](https://dvc.org/doc/tutorials/versioning) relate to each other,
+restructuring the existing [User Guide](https://dvc.org/doc/user-guide) to
+explain basic concepts, and writing more use cases that resonate with ML
+engineers and data scientists.
+
+The other issue we would like to tackle is improving and expanding the existing
+reference docs — commands descriptions, examples, etc. It involves filling in
+the gaps and developing new sections, similar to
+[this one](https://dvc.org/doc/commands-reference/fetch). We would also love to
+see more illustrative materials.
+
+### Project 1: Improving and expanding User Guide
+
+**Description and details:** Reviewing, restructuring and filling major gaps in
+the User Guide (introductory parts of the basic concepts of DVC), e.g. have a
+look at [this ticket](https://github.com/iterative/dvc.org/issues/144) or
+[this one](https://github.com/iterative/dvc.org/issues/53).
+
+**Mentors**: [@shcheklein](https://github.com/shcheklein) and
+[@dmpetrov](https://github.com/dmpetrov)
+
+### Project 2: Expanding and developing new tutorials and use cases.
+
+**Description and details:** We already have some requests for more tutorials,
+e.g. [this ticket](https://github.com/iterative/dvc.org/issues/96). Here is
+another good [use case request](https://github.com/iterative/dvc.org/issues/194)
+. If you are going to work on this project you would need some domain knowledge,
+preferably some basic ML or data science experience.
+
+**Mentors**: [@shcheklein](https://github.com/shcheklein) and
+[@dmpetrov](https://github.com/dmpetrov)
+
+### Project 3: Improving new user onboarding
+
+**Description and details:** Analyze and restructure user walkthrough across
+[Get started](https://dvc.org/doc/get-started),
+[Tutorials](https://dvc.org/doc/tutorial) and
+[Examples](https://dvc.org/doc/tutorials/versioning). These three have one thing
+in common — hands-on experience with DVC. If you choose this project, we will
+work together to come up with a better location for the Examples (to move them
+out of the Get Started shadow), and a better location for the Tutorials (to
+reference external tutorials that were developed by our community members and
+published on different platforms).
+
+**Mentors**: [@shcheklein](https://github.com/shcheklein) and
+[@dmpetrov](https://github.com/dmpetrov)
+
+### Project 4: Improving commands reference
+
+**Description and details:** We will work on improving our
+[Commands reference](https://dvc.org/doc/commands-reference) section. This
+includes expanding and filling in the gaps. One of the biggest pain points right
+now are Examples. Users want them to be
+[easy to run and try](https://github.com/iterative/dvc.org/issues/198) and here
+is a lot to be done in terms of improvement. We have a good example of how is
+should be done [here](https://dvc.org/doc/commands-reference/fetch).
+
+**Mentors**: [@shcheklein](https://github.com/shcheklein) and
+[@dmpetrov](https://github.com/dmpetrov)
+
+### Project 5: Describe and integrate “DVC packages”
+
+**Description and details:** Describe the brand new feature “DVC packages” and
+integrate it with the rest of the documentation. We have been working hard to
+release a few new commands to help with datasets management (have a look at
+[this ticket](https://github.com/iterative/dvc/issues/1487)). It’s a major
+feature that deserves its place in the Get Started, Use cases, Commands
+Reference, etc.
+
+**Mentors**: [@shcheklein](https://github.com/shcheklein) and
+[@dmpetrov](https://github.com/dmpetrov)
+
+The ideas we outline above are just an example of what we can work on. We are
+open for any other suggestions and would like to work together with the
+technical writer to make the contribution experience both useful and enjoyable
+for all parties involved. If you have any suggestions or questions we would love
+to hear from you => DVC.org/support and our DMs on
+[Twitter](https://twitter.com/DVCorg) are always open!
+
+<hr />
+
+Special thanks to the [NumFOCUS](https://numfocus.org/) for the ideas list
+inspiration.
+
+If you are a tech writer — check the
+[Technical writer guide](https://developers.google.com/season-of-docs/docs/tech-writer-guide).
+From April 30, 2019 you can see the list of participating open source
+organizations on the [Season of Docs website](https://g.co/seasonofdocs). The
+application period for technical writers opens on **May 29, 2019** and ends on
+June 28, 2019.
diff --git a/content/blogs/2019-05-21-may-19-dvc-heartbeat.md b/content/blogs/2019-05-21-may-19-dvc-heartbeat.md
new file mode 100644
index 0000000000..db4be21e11
--- /dev/null
+++ b/content/blogs/2019-05-21-may-19-dvc-heartbeat.md
@@ -0,0 +1,301 @@
+---
+title: May ’19 DVC❤️Heartbeat
+date: 2019-05-21
+description: >
+  DVC accepted into Google Season of Docs 🎉, Dmitry's talk at the O’Reilly AI
+  Conference, new portion of Discord gems, and articles either created or
+  brought to us by our community.
+descriptionLong: >
+  Every month we are sharing here our news, findings, interesting reads,
+  community takeaways, and everything along the way.
+
+  Some of those are related to our brainchild [DVC](https://dvc.org) and its
+  journey. The others are a collection of exciting stories and ideas centered
+  around ML best practices and workflow.
+picture: 2019-05-21/post-image.jpeg
+pictureComment: |
+  Kudos to [StickerMule.com](https://www.stickermule.com) for our amazing
+  stickers (and great customer service)!
+author: svetlana_grinchenko
+commentsUrl: https://discuss.dvc.org/t/may-19-dvc-heartbeat/290
+tags:
+  - Heartbeat
+  - Community
+  - Google Season of Docs
+---
+
+## News and links
+
+This section of DVC Heartbeat is growing with every new Issue and this is
+already quite a good piece of news!
+
+One of the most exciting things we want to share this month is acceptance of DVC
+into the [Google Season of Docs](https://developers.google.com/season-of-docs/).
+It is a new and unique program sponsored by Google that pairs technical writers
+with open source projects to collaborate and improve the open source project
+documentation. You can find the outline of DVC vision and project ideas in
+[this dedicated blogpost](https://blog.dataversioncontrol.com/dvc-project-ideas-for-google-summer-of-docs-2019-defe3a73b248)
+and check the
+[full list of participating open source organizations](https://developers.google.com/season-of-docs/docs/participants/).
+Technically the
+[program is starting in a few months](https://developers.google.com/season-of-docs/docs/timeline),
+but there is already a fantastic increase in the amount of commits and
+contributors, and we absolutely love it!
+
+The other important milestone for us was the first offline meeting with our
+distributed remote team. Working side by side and having non-Zoom meetings with
+the team was amazing. Joining our forces to prepare for the upcoming conferences
+turned out to be the most valuable, educating and uniting experience for the
+whole team.
+
+It’s a shame that our tech lead was unable to join us it due to another visa
+denial. We do hope he will finally make it to the USA for the next big
+conference.
+
+![](../uploads/images/2019-05-21/the-world-is-changing.png)
+
+While we were busy finalizing all the PyCon 2019 prep, our own
+[Dmitry Petrov](https://twitter.com/FullStackML) flew to New York to speak at
+the
+[O’Reilly AI Conference](https://conferences.oreilly.com/artificial-intelligence/ai-ny)
+about the
+[Open Source tools for Machine Learning Models and Datasets versioning](https://www.oreilly.com/library/view/artificial-intelligence-conference/9781492050544/video324691.html).
+Unfortunately the video is available for the registered users only (with a free
+trial option) but you can have a look at Dmitry’s slides
+[here](https://www.slideshare.net/DmitryPetrov15/dvc-oreilly-artificial-intelligence-conference-2019-new-york).
+
+![](../uploads/images/2019-05-21/iterative-ai-twitter.png)
+
+We renamed our Twitter! Our old handle was a bit misleading and we moved from
+@Iterativeai to [@DVCorg](https://twitter.com/DVCorg) (yet keep the old one for
+future projects).
+
+Our team is so happy every time we discover an article featuring DVC or
+addressing one of the burning ML issues we are trying to solve. Here are some of
+our favorite links from the past month:
+
+- **[Version Control For Your Machine Learning Projects — Episode 206](https://www.pythonpodcast.com/data-version-control-episode-206/)**
+  by **[Tobias Macey](https://www.linkedin.com/in/tmacey/)**
+
+<external-link
+href="https://www.pythonpodcast.com/data-version-control-episode-206/"
+title="Version Control For Machine Learning Projects"
+description="An interview with the creator of DVC about how it improves collaboration and reduces duplicate effort on data science…"
+link="pythonpodcast.com"
+image="../uploads/images/2019-05-21/version-control-for-your-machine-learning-projects.png" />
+
+> Version control has become table stakes for any software team, but for machine
+> learning projects there has been no good answer for tracking all of the data
+> that goes into building and training models, and the output of the models
+> themselves. To address that need Dmitry Petrov built the Data Version Control
+> project known as DVC. In this episode he explains how it simplifies
+> communication between data scientists, reduces duplicated effort, and
+> simplifies concerns around reproducing and rebuilding models at different
+> stages of the projects lifecycle.
+
+- **Here is an
+  [article](https://towardsdatascience.com/data-version-control-with-dvc-what-do-the-authors-have-to-say-3c3b10f27ee)
+  by [Favio Vázquez](https://medium.com/@faviovazquez) with a transcript of this
+  podcast episode.**
+
+<external-link
+href="https://towardsdatascience.com/data-version-control-with-dvc-what-do-the-authors-have-to-say-3c3b10f27ee"
+title="Data version control with DVC. What do the authors have to say?"
+description="Data versioning is one of the most ignored features in data science projects, but that has to change. Here I’ll discuss…"
+link="towardsdatascience.com"
+image="../uploads/images/2019-05-21/data-version-control-with-dvc.png" />
+
+- **[Why Git and Git-LFS is not enough to solve the Machine Learning Reproducibility crisis](https://towardsdatascience.com/why-git-and-git-lfs-is-not-enough-to-solve-the-machine-learning-reproducibility-crisis-f733b49e96e8)**
+
+<external-link
+href="https://towardsdatascience.com/why-git-and-git-lfs-is-not-enough-to-solve-the-machine-learning-reproducibility-crisis-f733b49e96e8"
+title="Why Git and Git-LFS is not enough to solve the Machine Learning Reproducibility crisis"
+description="Some claim the machine learning field is in a crisis due to software tooling that’s insufficient to ensure repeatable…"
+link="towardsdatascience.com"
+image="../uploads/images/2019-05-21/why-git-and-git-lfs-is-not-enough.jpeg" />
+
+> With Git-LFS your team has better control over the data, because it is now
+> version controlled. Does that mean the problem is solved? Earlier we said the
+> “_key issue is the training data_”, but that was a lie. Sort of. Yes keeping
+> the data under version control is a big improvement. But is the lack of
+> version control of the data files the entire problem? No.
+
+<hr />
+
+## Discord gems
+
+There are lots of hidden gems in our Discord community discussions. Sometimes
+they are scattered all over the channels and hard to track down.
+
+We are sifting through the issues and discussions and share with you the most
+interesting takeaways.
+
+### Q: This might be [a favourite gem of ours ](https://discordapp.com/channels/485586884165107732/485598848111083531/572960640122224640) — our engineers are so fast that someone assumed they were bots.
+
+We feared that too until we met them in person. They appeared to be real (unless
+bots also love Ramen now)!
+
+![](../uploads/images/2019-05-21/bots-also-love-ramen-now.png)
+
+### Q: [Is this the best way to track data with DVC when code and data are separate?](https://discordapp.com/channels/485586884165107732/485596304961962003/572974117351849997) Having being burned by this a couple of times, i.e accidentally pushing large files to GitHub, I now keep my code and data separate.
+
+Every time you run `dvc add` to start tracking some data artifact, its path is
+automatically added to the `.gitignore` file, as a result it is hard to commit
+it to git by mistake — you would need to explicitly modify the `.gitignore`
+first. The feature to track some external data is called
+[external outputs](https://dvc.org/doc/user-guide/managing-external-data) (if
+all you need is to track some data artifacts). Usually it is used when you have
+some data on S3 or SSH and don’t want to pull it into your working space, but
+it’s working even when your data is located on the same machine outside of the
+repository.
+
+### Q: [How do I wrap a step that downloads a file/directory into a DVC stage?](https://discordapp.com/channels/485586884165107732/485596304961962003/571342592508428289) I want to ensure that it runs only if file has no been downloaded yet
+
+Use `dvc import` to track and download the remote data first time and next time
+when you do dvc repro if data has changed remotely. If you don’t want to track
+remote changes (lock the data after it was downloaded), use `dvc run` with a
+dummy dependency (any text file will do you do not touch) that runs an actual
+wget/curl to get the data.
+
+### Q: [How do I show a pipeline that does not have a default Dvcfile?](https://discordapp.com/channels/485586884165107732/485596304961962003/570943786151313408) (e.g. I assigned all files names manually with `-f` in the `dvc run` command and I just don’t have `Dvcfile` anymore)
+
+Almost any command in DVC that deals with pipelines (set of DVC-files) accepts a
+single stage as a target, for example:
+
+```dvc
+$ dvc pipeline show — ascii model.dvc
+```
+
+### Q: [DVC hangs or I’m getting `database is locked` issue](https://discordapp.com/channels/485586884165107732/485596304961962003/570843482218823682)
+
+It’s a well known problem with NFS, CIFS (Azure) — they do not support file
+locks properly which is required by the SQLLite engine to operate. The easiest
+workaround — don’t create a DVC project on network attached partition. In
+certain cases a fix can be made by changing mounting options, check
+[this discussion](https://discordapp.com/channels/485586884165107732/485596304961962003/570276668694855690)
+for the Azure ML Service.
+
+### Q: [How do I use DVC if I use a separate drive to store the data and a small/fast SSD to run computations?](https://discordapp.com/channels/485586884165107732/485596304961962003/570091809594671126) I don’t have enough space to bring data to my working space.
+
+An excellent question! The short answer is:
+
+```dvc
+# To move your data cache to a big partition
+$ dvc cache dir --local /path/to/an/external/partition
+
+# To enable symlinks/harldinks to avoid actual copying
+$ dvc config cache.type reflink, hardlink, symlink, copy
+
+# To protect the cache
+$ dvc config cache.protected true
+```
+
+The last one is highly recommended to make links in your working space read-only
+to avoid corrupting the cache. Read more about different link types
+[here](https://dvc.org/doc/user-guide/large-dataset-optimization).
+
+To add your data first time to the DVC cache, do a clone of the repository on a
+big partition and run `dvc add` to add your data. Then you can do `git pull`,
+`dvc pull` on a small partition and DVC will create all the necessary links.
+
+### Q: [Why I’m getting `Paths for outs overlap` error when I run `dvc add` or `dvc run`?](https://discordapp.com/channels/485586884165107732/485596304961962003/571335064374345749)
+
+Usually it means that a parent directory of one of the arguments for `dvc add` /
+`dvc run` is already tracked. For example, you’ve added the whole datasets
+directory already. And now you are trying to add a subdirectory, which is
+already tracked as a part of the datasets one. No need to do that. You could
+`dvc add datasets` or `dvc repro datasets.dvc` to save changes.
+
+### Q: [I’m getting `ascii codec can’t encode character` error on DVC commands when I deal with unicode file names](https://discordapp.com/channels/485586884165107732/485596304961962003/567310354766495747)
+
+[Check the locale settings you have](https://perlgeek.de/en/article/set-up-a-clean-utf8-environment)
+(`locale` command in Linux). Python expects a locale that can handle unicode
+printing. Usually it’s solved with these commands: `export LC_ALL=en_US.UTF-8`
+and `export LANG=en_US.UTF-8`. You can place those exports into `.bashrc` or
+other file that defines your environment.
+
+### Q: [Does DVC use the same logins `aws-cli` has when using an S3 bucket as its repo/remote storage](https://discordapp.com/channels/485586884165107732/485596304961962003/563149775340568576)?
+
+In short — yes, but it can be also configured. DVC is going to use either your
+default profile (from `~/.aws/*`) or your env vars by default. If you need more
+flexibility (e.g. you need to use different credentials for different projects,
+etc) check out
+[this guide](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-profiles.html)
+to configure custom aws profiles and then you could use them with DVC using
+these
+[remote options](https://dvc.org/doc/commands-reference/remote/add#options).
+
+### Q: [How can I output multiple metrics from a single file?](https://discordapp.com/channels/485586884165107732/485596304961962003/566000729505136661)
+
+Let’s say I have the following in a file:
+
+```json
+{
+  “AUC_RATIO”:
+    {
+      “train”: 0.8922748258797667,
+      “valid”: 0.8561602726251776,
+      “xval”: 0.8843431199314923
+    }
+}
+```
+
+How can I show both `train` and `valid` without `xval`?
+
+You can use `dvc metrics show` command `--xpath` option and provide multiple
+attribute names to it:
+
+```dvc
+$ dvc metrics show metrics.json \
+                  --type json \
+                  --xpath AUC_RATIO[train,valid]
+    metrics.json:
+                 0.89227482588
+                 0.856160272625
+```
+
+### Q: [What is the quickest way to add a new dependency to a DVC-file?](https://discordapp.com/channels/485586884165107732/485596304961962003/566314479499870211)
+
+There are a few options to add a new dependency:
+
+- simply opening a file with your favorite editor and adding a dependency there
+  without md5. DVC will understand that that stage is changed and will re-run
+  and re-calculate md5 checksums during the next DVC repro;
+
+- use `dvc run --no-exec` is another option. It will rewrite the existing file
+  for you with new parameters.
+
+### Q: [Is there a way to add a dependency to a python package, so it runs a stage again if it imported the updated library?](https://discordapp.com/channels/485586884165107732/485596304961962003/566315265646788628)
+
+The only recommended way so far would be to somehow make DVC know about your
+package’s version. One way to do that would be to create a separate stage that
+would be dynamically printing version of that specific package into a file, that
+your stage would depend on:
+
+```dvc
+$ dvc run -o mypkgver 'pip show mypkg > mypkgver’
+$ dvc run -d mypkgver -d ... -o .. mycmd
+```
+
+### Q: [Is there anyway to forcibly recompute the hashes of dependencies in a pipeline DVC-file?](https://discordapp.com/channels/485586884165107732/485596304961962003/564807276146458624)
+
+E.g. I made some whitespace/comment changes in my code and I want to tell DVC
+“it’s ok, you don’t have to recompute everything”.
+
+Yes, you could `dvc commit -f`. It will save all current checksum without
+re-running your commands.
+
+### Q: [I have projects that use data that’s stored in S3. I never have data locally to use `dvc push`, but I would like to have this data version controlled.](https://discordapp.com/channels/485586884165107732/485596304961962003/563352000281182218) Is there a way to use the features of DVC in this use case?
+
+Yes! This DVC features is called
+[external outputs](https://dvc.org/doc/user-guide/large-dataset-optimization)
+and
+[external dependencies](https://dvc.org/doc/user-guide/external-dependencies).
+You can use one of them or both to track, process, and version your data on a
+cloud storage without downloading it locally.
+
+<hr />
+
+If you have any questions, concerns or ideas, let us know
+[here](https://dvc.org/support) and our stellar team will get back to you in no
+time!
diff --git a/content/blogs/2019-06-26-june-19-dvc-heartbeat.md b/content/blogs/2019-06-26-june-19-dvc-heartbeat.md
new file mode 100644
index 0000000000..408becc0a0
--- /dev/null
+++ b/content/blogs/2019-06-26-june-19-dvc-heartbeat.md
@@ -0,0 +1,233 @@
+---
+title: June ’19 DVC❤️Heartbeat
+date: 2019-06-26
+description: >
+  First DVC user survey, sharing our PyCon experience, new portion of Discord
+  discussions, and articles either created or brought to us by our community.
+descriptionLong: >
+  Every month we are sharing here our news, findings, interesting reads,
+  community takeaways, and everything along the way.
+
+  Some of those are related to our brainchild [DVC](https://dvc.org) and its
+  journey. The others are a collection of exciting stories and ideas centered
+  around ML best practices and workflow.
+picture: 2019-06-26/post-image.png
+pictureComment: |
+  Thanks to the amazing [Signaturit Tech](https://twitter.com/SignaturitTech)
+  team for this
+  [photo](https://twitter.com/SignaturitTech/status/1127927520140120065?s=20)!
+author: svetlana_grinchenko
+commentsUrl: https://discuss.dvc.org/t/june-19-dvc-heartbeat/289
+tags:
+  - PyCon
+  - Heartbeat
+  - Community
+---
+
+## News and links
+
+We want to start by saying to our users, contributors, and community members how
+grateful we are for the fantastic work you are doing contributing to DVC, giving
+talks about DVC, sharing your feedback, use cases and your concerns. A huge
+thank you to each of you from the DVC team!
+
+We would love to give back and support any positive initiative around DVC — just
+let us know [here](https://dvc.org/support) and we will send you a bunch of cool
+swag, connect to a tech expert or find another way to support your project. Our
+[DMs on Twitter](https://twitter.com/DVCorg) are open, too.
+
+**And if you have 4 minutes to spare, we are conducting out first
+[DVC user survey](https://docs.google.com/forms/d/1tmn8YHLUkeSi5AIq4DGJi28iZy9HTazl6DWKe3Hxpnc/edit?ts=5cfc47c2)
+and would love to hear from you!**
+
+Aside from admiring great DVC-related content from our users we have one more
+reason to particularly enjoy the past month — DVC team went to Cleveland to
+attend [PyCon 2019](https://us.pycon.org/2019/about/) and it was a blast!
+
+![](../uploads/images/2019-06-26/cleveland-to-attend-pycon-2019.jpeg) _Amazing
+[Jennifer](https://github.com/sureL) and her artwork for our
+[SupportOpenSource](https://twitter.com/hashtag/SupportOpenSource) contest_
+
+We had it all. Running our first ever conference booth, leading an impromptu
+unconference discussion and arranging some cool
+[#SupportOpenSource](https://twitter.com/hashtag/SupportOpenSource?src=hashtag_click)
+activities was great! Last-minute accommodation cancellations, booth equipment
+delivery issues, and being late for our very own talk was not so great. Will be
+sharing more about it in a separate blogpost soon.
+
+https://youtu.be/jkfh2PM5Sz8
+
+Here is [Dmitry Petrov](https://twitter.com/FullStackML)’s PyCon
+[talk](https://www.youtube.com/watch?v=jkfh2PM5Sz8) and
+[slides](https://docs.google.com/presentation/d/1CYt0w8WoZAXiQEtVDVDsTnQumzdZx91v32MwEK20R-E/edit)
+on Machine learning model and dataset versioning practices.
+
+We absolutely loved being at PyCon and can’t wait for our next conference!
+
+<hr />
+
+Our team is so happy every time we discover an article featuring DVC or
+addressing one of the burning ML issues we are trying to solve. Here are some of
+the links that caught our eye past month:
+
+- **[The Rise of DataOps (from the ashes of Data Governance)](https://towardsdatascience.com/the-rise-of-dataops-from-the-ashes-of-data-governance-da3e0c3ac2c4)
+  by [Ryan Gross](https://towardsdatascience.com/@ryanwgross).**
+
+A brilliant comprehensive read on the current data management issues. It might
+be the best article we have ever read on this subject. Every word strongly
+resonates with our vision and ideas behind DVC. Highly recommended by DVC team!
+
+<external-link
+href="https://towardsdatascience.com/the-rise-of-dataops-from-the-ashes-of-data-governance-da3e0c3ac2c4"
+title="The Rise of DataOps (from the ashes of Data Governance)"
+description="Legacy Data Governance is broken in the ML era. Let’s rebuild it as an engineering discipline to drive…"
+link="towardsdatascience.com"
+image="../uploads/images/2019-06-26/the-rise-of-data-ops.png" />
+
+> Legacy Data Governance is broken in the ML era. Let’s rebuild it as an
+> engineering discipline. At the end of the transformation, data governance will
+> look a lot more like DevOps, with data stewards, scientists, and engineers
+> working closely together to codify the governance policies.
+
+- **[First Impressions of Data Science Version Control (DVC)](https://medium.com/@christopher.samiullah/first-impressions-of-data-science-version-control-dvc-fe96ab29cdda)
+  by [Christopher Samiullah](https://christophergs.github.io/)**
+
+<external-link
+href="https://medium.com/@christopher.samiullah/first-impressions-of-data-science-version-control-dvc-fe96ab29cdda"
+title="First Impressions of Data Science Version Control (DVC)"
+description="A Powerful New Machine Learning Tool"
+link="medium.com"
+image="../uploads/images/2019-06-26/first-impressions-of-data-science-version-control.png" />
+
+> In 2019, we tend to find organizations using a mix of git, Makefiles, ad hoc
+> scripts and reference files to try and achieve reproducibility. DVC enters
+> this mix offering a cleaner solution, specifically targeting Data Science
+> challenges.
+
+- **[Versioning and Reproducibility with MLV-tools and DVC](https://github.com/peopledoc/mlvtools-tutorial):
+  [Talk](https://peopledoc.github.io/mlvtools-tutorial/talks/pyData/presentation.html#/)
+  and
+  [Tutorial](https://peopledoc.github.io/mlvtools-tutorial/talks/workshop/presentation.html#/)
+  by [Stéphanie Bracaloni](https://github.com/sbracaloni) and
+  [Sarah Diot-Girard](https://github.com/SdgJlbl).**
+
+![](../uploads/images/2019-06-26/versioning-and-reproducibility-with-mlv-tools.png)
+
+- **[Becoming a machine learning company means investing in foundational technologies](https://www.oreilly.com/ideas/becoming-a-machine-learning-company-means-investing-in-foundational-technologies)
+  by [Ben Lorica](https://www.oreilly.com/people/4e7ad-ben-lorica)**
+
+<external-link
+href="https://www.oreilly.com/ideas/becoming-a-machine-learning-company-means-investing-in-foundational-technologies"
+title="Becoming a machine learning company means investing in foundational technologies"
+description="Get expert knowledge on the tools and technologies you need to put your data strategies to work. Join us at the…"
+link="oreilly.com"
+image="../uploads/images/2019-06-26/becoming-a-machine-learning-company.jpeg" />
+
+> With an eye toward the growing importance of machine learning, we recently
+> completed
+> [a data infrastructure survey](https://www.oreilly.com/data/free/evolving-data-infrastructure.csp)
+> that drew more than 3,200 respondents.
+
+<hr />
+
+## Discord gems
+
+There are lots of hidden gems in our Discord community discussions. Sometimes
+they are scattered all over the channels and hard to track down.
+
+We are sifting through the issues and discussions and share with you the most
+interesting takeaways.
+
+### Q: [Does DVC support Azure Data Lake Gen1?](https://discordapp.com/channels/485586884165107732/563406153334128681/575655655629651968)
+
+Azure data lake is HDFS compatible. And DVC supports HDFS remotes. Give it a try
+and let us know if you hit any problems [here](https://dvc.org/chat).
+
+### Q: [An excellent discussion on versioning tabular (SQL) data.](https://discordapp.com/channels/485586884165107732/563406153334128681/575681811401801748) Do you know of any tools that deal better with SQL-specific versioning?
+
+It’s a wide topic. The actual solution might depend on a specific scenario and
+what exactly needs to be versioned. DVC does not provide any special
+functionality on top of databases to version their content.
+
+Depending on your use case, our recommendation would be to run SQL and pull the
+result file (CSV/TSV file?) that then can be used to do analysis. This file can
+be taken under DVC control. Alternatively, in certain cases source files (that
+are used to populate the databases) can be taken under control and we can keep
+versions of them, or track incoming updates.
+
+Read the
+[discussion](https://discordapp.com/channels/485586884165107732/563406153334128681/575681811401801748)
+to learn more.
+
+### Q: [How does DVC do the versioning between binary files?](https://discordapp.com/channels/485586884165107732/563406153334128681/575686711821205504) Is there a binary diff, similar to git? Or is every version stored distinctly in full?
+
+DVC is just saving every file as is, we don’t use binary diffs right now. There
+won’t be a full directory (if you added just a few files to a 10M files
+directory) duplication, though, since we treat every file inside as a separate
+entity.
+
+### Q: [Is there a way to pass parameters from e.g. `dvc repro` to stages?](https://discordapp.com/channels/485586884165107732/563406153334128681/576160840701575169)
+
+The simplest option is to create a config file — json or whatnot — that your
+scripts would read and your stages depend on.
+
+### Q: [What is the best way to get cached output files from different branches simultaneously?](https://discordapp.com/channels/485586884165107732/563406153334128681/577852740034625576) For example, cached tensorboard files from different branches to compare experiments.
+
+There is a way to do that through our (still not officially released) API pretty
+easily. Here is an
+[example script](https://cdn.discordapp.com/attachments/563406153334128681/577894682722304030/dvc_get_output_files.py)
+how it could be done.
+
+### Q: [Docker and DVC.](https://discordapp.com/channels/485586884165107732/563406153334128681/583949033685516299) To being able to push/pull data we need to run a git clone to get DVC-files and remote definitions — but we worry that would make the container quite heavy (since it contains our entire project history).
+
+You can do `git clone — depth 1`, which will not download any history except the
+latest commits.
+
+### Q: [After DVC pushing the same file, it creates multiple copies of the same file. Is that how it’s supposed to work?](https://discordapp.com/channels/485586884165107732/485596304961962003/574133734136086559)
+
+If you are pushing the same file, there are no copies pushed or saved in the
+cache. DVC is using checksums to identify files, so if you add the same file
+once again, it will detect that cache for it is already in the local cache and
+wont copy it again to cache. Same with dvc push, if it sees that you already
+have cache file with that checksum on your remote, it won’t upload it again.
+
+### Q: [How do I uninstall DVC on Mac (installed via `pkg` installer)?](https://discordapp.com/channels/485586884165107732/485596304961962003/574941227624169492)
+
+Something like this should work:
+
+```dvc
+$ which dvc
+/usr/local/bin/dvc -> /usr/local/lib/dvc/dvc
+
+$ ls -la /usr/local/bin/dvc
+/usr/local/bin/dvc -> /usr/local/lib/dvc/dvc
+
+$ sudo rm -f /usr/local/bin/dvc
+$ sudo rm -rf /usr/local/lib/dvc
+$ sudo pkgutil --forget com.iterative.dvc
+```
+
+### Q: [How do I pull from a public S3 bucket (that contains DVC remote)?](https://discordapp.com/channels/485586884165107732/485596304961962003/575236576309674024)
+
+Just add public URL of the bucket as an HTTP endpoint. See
+[here](https://github.com/iterative/example-get-started/blob/master/.dvc/config)
+for an example.
+[https://remote.dvc.org/get-started](https://remote.dvc.org/get-started) is made
+to redirect to the S3 bucket anyone can read from.
+
+### Q: [I’m getting the same error over and over about locking:](https://discordapp.com/channels/485586884165107732/485596304961962003/575535709490905101) `ERROR: failed to lock before running a command — cannot perform the cmd since DVC is busy and locked. Please retry the command later.`
+
+Most likely it happens due to an attempt to run DVC on NFS that has some
+configuration problems. There is a
+[well known problem with DVC on NFS](https://github.com/iterative/dvc/issues/1918)
+— sometimes it hangs on trying to lock a file. The usual workaround for this
+problem is to allocate DVC cache on NFS, but run the project (git clone, DVC
+metafiles, etc) on the local file system. Read
+[this answer](https://discuss.dvc.org/t/share-nas-data-in-server/180/4?u=shcheklein)
+to see how it can be setup.
+
+<hr />
+
+If you have any questions, concerns or ideas, let us know in the comments below
+or connect with DVC team [here](https://dvc.org/support). Our
+[DMs on Twitter](https://twitter.com/DVCorg) are open, too.
diff --git a/content/blogs/2019-08-01-july-19-dvc-heartbeat.md b/content/blogs/2019-08-01-july-19-dvc-heartbeat.md
new file mode 100644
index 0000000000..246053a312
--- /dev/null
+++ b/content/blogs/2019-08-01-july-19-dvc-heartbeat.md
@@ -0,0 +1,212 @@
+---
+title: July ’19 DVC❤️Heartbeat
+date: 2019-08-01
+description: >
+  As we continue to grow DVC together with our fantastic contributors, we enjoy
+  more and more insights, discussions, and articles either created or brought to
+  us by our community.
+descriptionLong: >
+  Every month we are sharing here our news, findings, interesting reads,
+  community takeaways, and everything along the way.
+
+  Some of those are related to our brainchild [DVC](https://dvc.org) and its
+  journey. The others are a collection of exciting stories and ideas centered
+  around ML best practices and workflow.
+picture: 2019-08-01/post-image.png
+pictureComment: |
+  Special edition
+  [DVC shirt](https://twitter.com/rkuprieiev/status/1144298339200098306?s=20).
+  We made this one for [Ruslan](https://github.com/efiop) — DVC maintainer and
+  the best tech lead.
+author: svetlana_grinchenko
+commentsUrl: https://discuss.dvc.org/t/july-19-dvc-heartbeat/288
+tags:
+  - Heartbeat
+  - Open Source Summit
+  - Community
+---
+
+## News and links
+
+As we continue to grow DVC together with our fantastic contributors, we enjoy
+more and more insights, discussions, and articles either created or brought to
+us by our community. We feel it is the right time to start sharing more of your
+news, your stories and your discoveries. New Heartbeat is here!
+
+Speaking of our own news — next month DVC team is going to the
+[Open Source North America Summit](https://events.linuxfoundation.org/events/open-source-summit-north-america-2019/).
+It is taking place in San Diego on August 21–23.
+[Dmitry](https://ossna19.sched.com/speaker/dmitry35) and
+[Sveta](https://ossna19.sched.com/speaker/svetlanagrinchenko) will be giving
+talks and we will run a booth. So looking forward to it! Stop by for a chat and
+some cool swag. And if you are in San Diego on those days and want to catch up —
+please let us know [here](http://dvc.org/support) or on Twitter!
+
+<external-link
+href="https://ossna19.sched.com/event/PUVv/open-source-tools-for-ml-experiments-management-dmitry-petrov-ruslan-kuprieiev-iterative-ai"
+title="Open Source Summit + ELC North America 2019: Open Source Tools for ML Experiments Man..."
+description="Speakers Software Engineer, Iterative AI Ruslan is a Software Engineer at Iterative AI. Previously he worked on live…"
+link="ossna19.sched.com"
+image="../uploads/images/2019-08-01/open-source-north-america-summit.png" />
+
+<external-link
+href="https://ossna19.sched.com/event/PWNk/speaker-preparation-simple-steps-with-a-tremendous-impact-svetlana-grinchenko-dvcorg"
+title="Open Source Summit + ELC North America 2019: Speaker Preparation: Simple Steps with a..."
+description="Speakers Head of Developer Relations, DVC.org Svetlana is driving developer relations and community at DVC.org…"
+link="ossna19.sched.com"
+image="../uploads/images/2019-08-01/open-source-north-america-summit.png" />
+
+Every month our team is excited to discover new great pieces of content
+addressing some of the burning ML issues. Here are some of the links that caught
+our eye in June:
+
+- **[Principled Machine Learning: Practices and Tools for Efficient Collaboration](https://dev.to/robogeek/principled-machine-learning-4eho)
+  by [David Herron](https://medium.com/@7genblogger)**
+
+<external-link
+href="https://dev.to/robogeek/principled-machine-learning-4eho"
+title="Principled Machine Learning: Practices and Tools for Efficient Collaboration"
+description="Machine learning projects are often harder than they should be. The code to train an ML model is just software, and we…"
+link="dev.to"
+image="../uploads/images/2019-08-01/principled-machine-learning.jpeg" />
+
+> As we’ve seen in this article some tools and practices can be borrowed from
+> regular software engineering. However, the needs of machine learning projects
+> dictate tools that better fit the purpose.
+
+- **First
+  [ML-REPA](http://ml-repa.ru/)[Meetup: Reproducible ML experiments](http://ml-repa.ru/page6697700.html)
+  hosted by [Raiffeisen DGTL](https://dgtl.raiffeisen.ru/) check out the video
+  and slide decks.**
+
+<external-link
+href="http://ml-repa.ru/"
+title="Machine Learning REPA"
+description="Анонсы мероприятий, проектов, обзоров инструментов и кейсов про ML проекты, управление экспериментами, автоматизацию и…"
+link="ml-repa.ru"
+image="../uploads/images/2019-08-01/machine-learning-repa.png" />
+
+[ML-REPA](http://ml-repa.ru/) is an a new fantastic resource for
+Russian-speaking folks interested in Reproducibility, Experiments and Pipelines
+Automation. Curated by [Mikhail Rozhkov](https://twitter.com/mnrozhkov) and
+highly recommended by our team.
+
+### [How do you manage your machine learning experiments?](https://www.reddit.com/r/MachineLearning/comments/bx0apm/d_how_do_you_manage_your_machine_learning/) discussion on Reddit is full of insights.
+
+<blockquote class="reddit-card" data-card-created="1576789144"><a href="https://www.reddit.com/r/MachineLearning/comments/bx0apm/d_how_do_you_manage_your_machine_learning/">[D] How do you manage your machine learning experiments?</a> from <a href="http://www.reddit.com/r/MachineLearning">r/MachineLearning</a></blockquote>
+
+<hr />
+
+## Discord gems
+
+There are lots of hidden gems in our Discord community discussions. Sometimes
+they are scattered all over the channels and hard to track down.
+
+We are sifting through the issues and discussions and share with you the most
+interesting takeaways.
+
+### Q: I have within one git repository different folders with very different content (basically different projects, or content I want to have different permissions to), and I thought about using different buckets in AWS as remotes. [I’m not sure if it’s possible with DVC to store some files in some remote, and some other files in some other remote, is it?](https://discordapp.com/channels/485586884165107732/485596304961962003/575718048330416158)
+
+You can definitely add more than one remote (see
+[dvc remote add](https://dvc.org/doc/commands-reference/remote/add)) and then
+[dvc push](https://dvc.org/doc/commands-reference/push) has a `-R` option to
+pick which one to send the cached data files (deps, outs, etc) to. We would not
+recommend doing this though. It complicates the commands you have to run — you
+will need to remember to specify a remote name for every command that deals with
+data — `push`, `pull`, `gc`, `fetch`, `status`, etc. Please, leave a comment in
+the relevant issue [here](https://github.com/iterative/dvc/issues/2095) if this
+case is important for you.
+
+### Q: [Is that possible with DVC to have multiple (few) metric files and compare them all at once?](https://discordapp.com/channels/485586884165107732/485596304961962003/578532350221352987) For example, we’d like to consider as metrics the loss of a neural network training process (loss as a `-M` output of a training stage), and also apart knowing the accuracy of the NN on a test set (another `-M` output of eval stage).
+
+Yes, it is totally fine to use `-M` in different stages. `dvc metrics show` will
+just show both metrics.
+
+### Q: [I have a scenario where an artifacts (data) folder is created by the dvc run command via the `-o` flag. I have manually added another file into or modified the artifacts folder but when I do `dvc push` nothing happens, is there anyway around this?](https://discordapp.com/channels/485586884165107732/485596304961962003/577362750443880449)
+
+Let’s first do a quick recap on how DVC handles data files (you can definitely
+find more information on the [DVC documentation site](http://dvc.org/docs)).
+
+- When you do `dvc add`, `dvc run` or `dvc import` DVC puts artifacts (in case
+  of `dvc run` artifacts == outputs produced by the command) into `.dvc/cache`
+  directory (default cache location). You don’t see this happening because
+  [DVC keeps links](https://dvc.org/doc/user-guide/large-dataset-optimization)
+  (or in certain cases creates a copy) to these files/directories.
+
+- `dvc push` does not move files from the workspace (that what you see) to the
+  remote storage, it always moves files/directories that are already in cache
+  (default is .dvc/cache).
+
+- So, now you’ve added a file manually, or made some other modifications. But
+  these files are not in cache yet. The analogy would be `git commit`. You
+  change the file, you do `git commit`, only after that you can push something
+  to Git server (Github/Gitlab, etc). The difference is that DVC is doing commit
+  (moves files to cache) automatically in certain cases — `dvc add`, `dvc run`,
+  etc.
+
+There is an explicit command — `dvc commit` - that you should run if you want to
+enforce the change to the output produced by `dvc run`. This command will update
+the corresponding DVC- files (.dvc extension) and will move data to cache. After
+that you should be able to run `dvc push` to save your data on the external
+storage.
+
+Note, when you do an explicit commit like this you are potentially “breaking”
+the reproducibility. In a sense that there is no guarantee now that your
+directory can be produced by `dvc run`/`dvc repro` — since you changed it
+manually.
+
+### Q: [I’d like to transform my dataset in-place to avoid copying it, but I can’t use `dvc run` to do this because it doesn’t allow the same directory as an output and a dependency.](https://discordapp.com/channels/485586884165107732/485596304961962003/578898899469729796)
+
+You could do this in one step (one stage). So that getting your data and
+modifying it, is one stage. So you don’t depend on the data folder. You just
+could depend on your download + modifying script.
+
+### Q: [Can anyone tell me what this error message is about?](https://discordapp.com/channels/485586884165107732/485596304961962003/579283950778712076) “To avoid unpredictable behavior, rerun command with non overlapping outs paths.”
+
+Most likely it means that there is a DVC-file that have the same output twice.
+Or there two DVC-files that share the same output file.
+
+### Q: [I’m getting “No such file or directory” error when I do `dvc run` or `dvc repro`](https://discordapp.com/channels/485586884165107732/485596304961962003/580176327701823498). The command runs find if I don’t use DVC.
+
+That happens because dvc run is trying to ensure that your command is the one
+creating your output and removes existing outputs before executing the command.
+So that when you run `dvc repro` later, it will be able to fully reproduce the
+output. So you need to make the script create the directory or file.
+
+### Q: [I’m implementing a CI/CD and I would like to simplify my CI/CD or even my training code (keeping them cloud agnostic) by using `dvc pull` inside my Docker container when initializing a training job. ](https://discordapp.com/channels/485586884165107732/485596304961962003/581256265234251776) Can DVC be used in this way?
+
+Yes, it’s definitely a valid case for DVC. There are different ways of
+organizing the storage that training machines are using to access data. From the
+very simple — using local storage volume and pulling data from the remote
+storage every time — to using NAS or EFS to store a shared DVC cache.
+
+### Q: [I was able to follow the getting started examples, however now I am trying to push my data to Github, I keep getting the following error: “ERROR: failed to push data to the cloud — upload is not supported by https remote”.](https://discordapp.com/channels/485586884165107732/563406153334128681/598866528984891403)
+
+HTTP remotes do not support upload yet. Example Get Started repository is using
+HTTP to keep it read-only and abstract the actual storage provider we are using
+internally. If you actually check the remote URL, you should see that it is an
+S3 bucket and AWS provides an HTTP end-point to read data from it.
+
+### Q: I’m looking to configure AWS S3 as a storage for DVC. I’ve set up the remotes and initialized dvc in the git repository. I tried testing it by pushing a dataset in the form of an excel file. The command completed without any issues but this is what I’m seeing in S3. [DVC seems to have created a subdirectory in the intended directory called “35” where it placed this file with a strange name.](https://discordapp.com/channels/485586884165107732/485596304961962003/585967551708921856)
+
+This is not an issue, it is an implementation detail. There’s no current way to
+upload the files with the original filename (In this case, the S3 bucket will
+have the file `data.csv` but with another name `20/893143…`). The reason behind
+this decision is because we want to store a file only once no matter how many
+dataset versions it’s used in. Also, it’s a reliable way to uniquely identify
+the file. You don’t have to be afraid that someone decided to create a file with
+the same name (path) but a different content.
+
+### Q: [Is it possible to only have a shared ‘local’ cache and no remote?](https://discordapp.com/channels/485586884165107732/563406153334128681/587730054893666326) I’m trying to figure out how to use this in a 40 node cluster which already has very fast NFS storage across all the nodes. Not storing everything twice seems desirable. Esp. for the multi-TB input data
+
+Yes and it’s one of the very common use case, actually. All you need to do is to
+use dvc cache dir command to setup an external cache. There are few caveats
+though. Please, read
+[this link](https://discuss.dvc.org/t/share-nas-data-in-server/180/4?u=shcheklein)
+for an example of the workflow.
+
+<hr />
+
+If you have any questions, concerns or ideas, let us know in the comments below
+or connect with DVC team [here](https://dvc.org/support). Our
+[DMs on Twitter](https://twitter.com/DVCorg) are always open, too.
diff --git a/content/blogs/2019-09-26-september-19-dvc-heartbeat.md b/content/blogs/2019-09-26-september-19-dvc-heartbeat.md
new file mode 100644
index 0000000000..5b6d4fc030
--- /dev/null
+++ b/content/blogs/2019-09-26-september-19-dvc-heartbeat.md
@@ -0,0 +1,355 @@
+---
+title: September ’19 DVC❤️Heartbeat
+date: 2019-09-26
+description: >
+  Announcing our first meetup in San Francisco, kicking off Google Season of
+  Docs program, sharing Open Source Summit experience, and more news, links, and
+  gems.
+descriptionLong: >
+  Every month we are sharing here our news, findings, interesting reads,
+  community takeaways, and everything along the way.
+
+  Some of those are related to our brainchild [DVC](https://dvc.org) and its
+  journey. The others are a collection of exciting stories and ideas centered
+  around ML best practices and workflow.
+picture: 2019-09-26/post-image.jpeg
+author: svetlana_grinchenko
+commentsUrl: https://discuss.dvc.org/t/september-19-dvc-heartbeat/287
+tags:
+  - Community
+  - Heartbeat
+  - Meetup
+  - Open Source Summit
+---
+
+## News and links
+
+We are super excited to co-host our very first
+**[meetup in San Francisco on October 10](https://www.meetup.com/San-Francisco-Machine-Learning-Meetup/events/264846847/)**!
+We will gather at the brand new Dropbox HQ office at 6:30 pm to discuss
+open-source tools to version control ML models and experiments.
+[Dmitry Petrov](https://twitter.com/FullStackML) is teaming up with
+[Daniel Fischetti](https://www.linkedin.com/in/daniel-fischetti-4a6592bb/) from
+[Standard Cognition](https://standard.ai/) to discuss best ML practices. Join us
+and save your spot now:
+
+<external-link
+href="https://www.meetup.com/San-Francisco-Machine-Learning-Meetup/events/264846847/"
+title="Open-source tools to version control Machine Learning models and experiments"
+description="AI and ML are becoming an essential part of the engineering and data science everyday workflow. ML teams need new tools…"
+link="meetup.com"
+image="../uploads/images/2019-09-26/open-source-tools-to-version-control.png" />
+
+If you are not in SF on this date and happen to be in Europe — don’t miss the
+PyCon DE & PyData Berlin 2019 joint event on October 9–11. We cannot make it to
+Berlin this year, but we were thrilled to discover 2 independent talks featuring
+DVC by
+[Alessia Marcolini](https://pyvideo.org/pydata-berlin-2019/version-control-for-data-science.html)
+and
+[Katharina Rasch](https://pyvideo.org/pydata-berlin-2019/tools-that-help-you-get-your-experiments-under-control.html).
+
+Some other highlights of the end of summer:
+
+- Our users and contributors keep creating fantastic pieces of content around
+  DVC (sharing some links below, but it’s only a fraction of what we have in
+  stock — can’t be more happy and humbled about it!).
+
+- We’ve reached 79 contributors to
+  [DVC core project](https://github.com/iterative/dvc) and 74 contributors to
+  [DVC documentation](https://github.com/iterative/dvc.org) (and have something
+  special in mind to celebrate our 100th contributors).
+
+- We enjoyed working with all the talented
+  [Google Season of docs](https://developers.google.com/season-of-docs/)
+  applicants and now moving to the next stage with our chosen tech writer
+  [Dashamir Hoxha](http://dashohoxha.fs.al/).
+
+- We’ve crossed the 3,000 stars mark on Github
+  ([over 3,500 now](https://github.com/iterative/dvc)). Thank you for your
+  support!
+
+  https://twitter.com/DVCorg/status/1147220439472545793
+
+- We’ve had great time at the
+  [Open Source Summit](https://events.linuxfoundation.org/events/open-source-summit-north-america-2019/program/)
+  by Linux foundation in San Diego — speaking on stage, running a booth and
+  chatting with all the amazing open-source crowd out there.
+
+  https://twitter.com/a142hr/status/1164256520235675648
+
+![](../uploads/images/2019-09-26/open-source-summit-by-linux-foundation.jpeg)
+
+<hr />
+
+Here are some of the great pieces of content around DVC and ML ops that we
+discovered in July and August:
+
+- ** Great insightful discussion on Twitter about versioning ML projects started
+  by [Nathan Benaich](https://medium.com/@NathanBenaich).**
+
+  https://twitter.com/NathanBenaich/status/1151815916512010242
+
+- **[Our Machine Learning Workflow: DVC, MLFlow and Training in Docker Containers](https://medium.com/ixorthink/our-machine-learning-workflow-dvc-mlflow-and-training-in-docker-containers-5b9c80cdf804)
+  by [Ward Van Laer](https://medium.com/@ward.vanlaer).**
+
+> It is possible to manage your work flow using open-source and free tools.
+
+<external-link
+href="https://medium.com/ixorthink/our-machine-learning-workflow-dvc-mlflow-and-training-in-docker-containers-5b9c80cdf804"
+title="Our Machine Learning Workflow: DVC, MLFlow and Training in Docker Containers"
+description="Googling for machine learning frameworks to version data, track python models etc.. I was surprised to see that these…"
+link="medium.com"
+image="../uploads/images/2019-09-26/our-machine-learning-workflow.jpeg" />
+
+- **[Using DVC to create an efficient version control system for data projects](https://medium.com/qonto-engineering/using-dvc-to-create-an-efficient-version-control-system-for-data-projects-96efd94355fe)
+  by [Basile Guerrapin](https://medium.com/@basile_16101).**
+
+> DVC brought versioning for inputs, intermediate files and algorithm models to
+> the VAT auto-detection project and this drastically increased our
+> **productivity**.
+
+<external-link
+href="https://medium.com/qonto-engineering/using-dvc-to-create-an-efficient-version-control-system-for-data-projects-96efd94355fe"
+title="Using DVC to create an efficient version control system for data projects"
+description="At first we were looking for a tool to help us dealing with production data files such as trained machine learning…"
+link="medium.com"
+image="../uploads/images/2019-09-26/using-dvc-to-create-an-efficient-vcs.png" />
+
+- **[Managing versioned machine learning datasets in DVC, and easily share ML projects with colleagues](https://techsparx.com/software-development/ai/dvc/versioning-example.html)
+  by [David Herron](https://twitter.com/7genblogger).**
+
+> In this tutorial we will go over a simple image classifier. We will learn how
+> DVC works in a machine learning project, how it optimizes reproducing results
+> when the project is changed, and how to share the project with colleagues.
+
+<external-link
+href="https://techsparx.com/software-development/ai/dvc/versioning-example.html"
+title="Managing versioned machine learning datasets in DVC, and easily share ML projects with colleagues"
+description="Software Development Artificial Intelligence Data Version Control (DVC) Managing versioned machine learning datasets in…"
+link="techsparx.com"
+image="../uploads/images/2019-09-26/managing-versioned-machine-learning-datasets.jpeg" />
+
+- **[How to use data version control (dvc) in a machine learning project](https://towardsdatascience.com/how-to-use-data-version-control-dvc-in-a-machine-learning-project-a78245c0185)
+  by [Matthias Bitzer](https://towardsdatascience.com/@matthiasbitzer94).**
+
+> To illustrate the use of dvc in a machine learning context, we assume that our
+> data is divided into train, test and validation folders by default, with the
+> amount of data increasing over time either through an active learning cycle or
+> by manually adding new data.
+
+<external-link
+href="https://towardsdatascience.com/how-to-use-data-version-control-dvc-in-a-machine-learning-project-a78245c0185"
+title="How to use data version control (dvc) in a machine learning project"
+description="When working in a productive machine learning project you probably deal with a tone of data and several models. To keep…"
+link="towardsdatascience.com"
+image="../uploads/images/2019-09-26/how-to-use-data-version-control.jpeg" />
+
+- **[Version Control ML Model](https://towardsdatascience.com/version-control-ml-model-4adb2db5f87c)
+  by [Tianchen Wu](https://towardsdatascience.com/@TianchenW)**
+
+> This post presents a solution to version control machine learning models with
+> git and dvc ([Data Version Control](https://dvc.org/doc/tutorial)).
+
+<external-link
+href="https://towardsdatascience.com/version-control-ml-model-4adb2db5f87c"
+title="Version Control ML Model"
+description="Machine Learning operations (let’s call it MLOps under the current buzzword pattern xxOps) are quite different from…"
+link="towardsdatascience.com"
+image="../uploads/images/2019-09-26/version-control-ml-model.png" />
+
+- **[Reflinks vs symlinks vs hard links, and how they can help machine learning projects](https://dev.to/robogeek/reflinks-vs-symlinks-vs-hard-links-and-how-they-can-help-machine-learning-projects-1cj4)
+  by [David Herron](https://medium.com/@7genblogger)**
+
+> In this blog post we’ll go over the details of using links, some cool new
+> stuff in modern file systems (reflinks), and an example of how DVC (Data
+> Version Control, [https://dvc.org/](https://dvc.org/)) leverages this.
+
+<external-link
+href="https://towardsdatascience.com/version-control-ml-model-4adb2db5f87c"
+title="Reflinks vs symlinks vs hard links, and how they can help machine learning projects"
+description="Hard links and symbolic links have been available since time immemorial, and we use them all the time without even…"
+link="dev.to"
+image="../uploads/images/2019-09-26/reflinks-vs-symlinks-vs-hard-links.jpeg" />
+
+- **[DVC dependency management — a guide](https://blog.codecentric.de/en/2019/08/dvc-dependency-management/)
+  by [Bert Besser](https://blog.codecentric.de/en/author/bert-besser/) and
+  [Veronika Schwan](https://blog.codecentric.de/en/author/veronika-schindler/).**
+
+> This post is a follow-up to
+> [A walkthrough of DVC](https://blog.codecentric.de/en/2019/03/walkthrough-dvc/)
+> that deals with managing dependencies between DVC projects. In particular,
+> this follow-up is about importing specific versions of an artifact (e.g. a
+> trained model or a dataset) from one DVC project into another.
+
+<external-link
+href="https://blog.codecentric.de/en/2019/08/dvc-dependency-management/"
+title="DVC dependency management - a guide - codecentric AG Blog"
+description="This post is a follow-up to A walkthrough of DVC that deals with managing dependencies between DVC projects. In…"
+link="blog.codecentric.de"
+image="../uploads/images/2019-09-26/dvc-org.png" />
+
+- **[Effective ML Teams — Lessons Learne](https://medium.com/@czeslaw.szubert/effective-ml-teams-lessons-learned-6a6e761bc283)
+  by [Czeslaw Szubert](https://medium.com/@czeslaw.szubert)**
+
+> In this post I’ll present lessons learned on how to setup successful ML teams
+> and what you need to devise an effective enterprise ML strategy.
+
+<external-link
+href="https://medium.com/@czeslaw.szubert/effective-ml-teams-lessons-learned-6a6e761bc283"
+title="Effective ML Teams — Lessons Learned"
+description="Machine Learning and Artificial Intelligence has entered our everyday lives — from Virtual Assistants built into each…"
+link="medium.com"
+image="../uploads/images/2019-09-26/effective-ml-teams.jpeg" />
+
+- **[Lessons learned from training a German Speech Recognition model](https://www.esentri.com/lessons-learned-from-training-a-german-speech-recognition-model/)
+  by [David Schönleber](https://www.linkedin.com/in/dschoenleber/).**
+
+> Setting up a documentation-by-design workflow and using appropriate tools
+> where needed, e.g. _MLFlow_ and _dvc,_ can be a real deal-breaker.
+
+<external-link
+href="https://medium.com/@czeslaw.szubert/effective-ml-teams-lessons-learned-6a6e761bc283"
+title="Lessons Learned from Training a German Speech Recognition Model - esentri AG"
+description="This post is the first of a two-part series. In this first part, I address learnings from a recent project in which I…"
+link="esentri.com"
+image="../uploads/images/2019-09-26/lessons-learned-from-training.jpeg" />
+
+<hr />
+
+## Discord gems
+
+There are lots of hidden gems in our Discord community discussions. Sometimes
+they are scattered all over the channels and hard to track down.
+
+We are sifting through the issues and discussions and share with you the most
+interesting takeaways.
+
+### Q: I’m getting an error message while trying to use AWS S3 storage: `ERROR: failed to push data to the cloud — Unable to locate credentials.` [Any ideas what’s happening?](https://discordapp.com/channels/485586884165107732/563406153334128681/587792932061577218)
+
+Most likely you haven’t configured your S3 credentials/AWS account yet. Please,
+read the full documentation on the AWS website. The short version of what should
+be done is the following:
+
+- [Create your AWS account.](https://portal.aws.amazon.com/gp/aws/developer/registration/index.html)
+
+- Log in to your AWS Management Console.
+
+- Click on your user name at the top right of the page.
+
+- Click on the Security Credentials link from the drop-down menu.
+
+- Find the Access Credentials section, and copy the latest `Access Key ID`.
+
+- Click on the Show link in the same row, and copy the `Secret Access Key`.
+
+Follow
+[this link](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html)
+to setup your environment.
+
+### Q: I added data with `dvc add` or `dvc run` and see that it takes twice what it was before (with `du` command). [Does it mean that DVC copies data that is added under its control? How do I prevent this from happening?](https://discordapp.com/channels/485586884165107732/563406153334128681/595402051203235861)
+
+To give a short summary — by default, DVC copies the files from your working
+directory to the cache (this is for safety reasons, it is better to duplicate
+the data). If you have reflinks (copy-on-write) enabled on your file system, DVC
+will use that method — which is as safe as copying. You can also configure DVC
+to use hardlinks/symlinks to save some space and time, but it will require
+enabling the protected mode (making data files in workspace read-only). Read
+more details [here](https://dvc.org/doc/user-guide/large-dataset-optimization).
+
+### Q: [How concurrent-friendly is the cache? And different remotes? Is it safe to have several containers/nodes fill the same cache at the same time?](https://discordapp.com/channels/485586884165107732/563406153334128681/599345778703597568)
+
+It is safe and a very common use case for DVC to have a shared cache. Please,
+check [this thread](https://discuss.dvc.org/t/share-nas-data-in-server/180/12),
+for example.
+
+### Q:[What is the proper way to exit the ASCII visualization?](https://discordapp.com/channels/485586884165107732/563406153334128681/603890677176336394) (when you run `dvc pipeline show` command).
+
+See this
+[document](https://dvc.org/doc/commands-reference/pipeline/show#options). To
+navigate, use arrows or W, A, S, D keys. To exit, press Q.
+
+### Q: [Is there an issue if I set my `cache.s3` external cache to my default remote?](https://discordapp.com/channels/485586884165107732/563406153334128681/606197026488844338) I don’t quite understand what an external cache is for other than I have to have it for external outputs.
+
+Short answer is that we would suggest keeping them separately to avoid possible
+checksum overlaps. Checksum on S3 might theoretically overlap with our checksums
+(with the content of the file being different), so it could be dangerous. The
+chances of losing data are pretty slim, but we would not risk it. Right now, we
+are working on making sure there are no possible overlapping.
+
+### Q: [What’s the right procedure to move a step .dvc file around the project?](https://discordapp.com/channels/485586884165107732/563406153334128681/606425815139221504)
+
+Assuming the file was created with `dvc run`. There are few possible ways.
+Obvious one is to delete the file and create a new one with
+`dvc run --no-exec -f file/path/and/name.dvc`. Another possibility is to
+rename/move and then edit manually. See
+[this document](https://dvc.org/doc/user-guide/project-structure) that describes
+how DVC-files are organized. No matter what method you use, you can run
+`dvc commit file.dvc` to save changes without running the command again.
+
+### Q: [`dvc status` doesn’t seem to report things that need to be dvc pushed, is that by design?](https://discordapp.com/channels/485586884165107732/563406153334128681/606917839688957952)
+
+You should try with dvc status `--cloud` or `dvc status --remote <your-remote>`
+to compare your local cache with a remote one, by default it only compares the
+“working directory” with your local cache (to check whether something should be
+reproduced and saved or not).
+
+### Q: [What kind of files can you put into `dvc metrics`?](https://discordapp.com/channels/485586884165107732/563406153334128681/608701494035873792)
+
+The file could be in any format, `dvc metrics` show will try to interpret the
+format and output it in the best possible way. Also, if you are using `csv` or
+`json`, you can use the `--xpath` flag to query specific measurements. **In
+general, you can make any file a metric file and put any content into it, DVC is
+not opinionated about it.** Usually though these are files that measures the
+performance/accuracy of your model and captures configuration of experiments.
+The idea is to use `dvc metrics show` to display all your metrics across
+experiments so you can make decisions of which combination (of features,
+parameters, algorithms, architecture, etc.) works the best.
+
+### Q: [Does DVC take into account the timestamp of a file or is the MD5 only depends on the files actual/bits content?](https://discordapp.com/channels/485586884165107732/563406153334128681/613639458000207902)
+
+DVC takes into account only content (bits) of a file to calculate hashes that
+are saved into DVC-files.
+
+### Q: [Similar to `dvc gc` is there a command to garbage collect from the remote?](https://discordapp.com/channels/485586884165107732/563406153334128681/616421757808541721)
+
+`dvc gc --remote NAME` is doing this, but you should be extra careful, because
+it will remove everything that is not currently “in use” (by the working
+directory). Also, please check this
+[issue](https://github.com/iterative/dvc/issues/2325) — semantics of this
+command might have changed by the time you read this.
+
+### Q: [How do I use and configure remote storage on IBM Cloud Object Storage?](https://discordapp.com/channels/485586884165107732/485596304961962003/591237578209099786)
+
+Since it’s S3 compatible, specifying `endpointurl` (exact URL depends on the
+[region](https://cloud.ibm.com/docs/services/cloud-object-storage?topic=cloud-object-storage-endpoints))
+is the way to go:
+
+```dvc
+$ dvc remote add -d mybucket s3://path/to/dir
+$ dvc remote modify mybucket \
+                    endpointurl \
+                    https://s3.eu.cloud-object-storage.appdomain.cloud
+```
+
+### Q: [How can I push data from client to google cloud bucket using DVC?](https://discordapp.com/channels/485586884165107732/485596304961962003/592958360903483403). Just want to know how can i set the credentials.
+
+You can do it by setting environment variable pointing to yours credentials
+path, like:
+
+```dvc
+$ export GOOGLE_APPLICATION_CREDENTIALS=path/to/credentials
+```
+
+It is also possible to set this variable via `dvc config`:
+
+```dvc
+$ dvc remote modify myremote credentialpath /path/to/my/creds
+```
+
+where `myremote` is your remote name.
+
+<hr />
+
+If you have any questions, concerns or ideas, let us know in the comments below
+or connect with DVC team [here](https://dvc.org/support). Our
+[DMs on Twitter](https://twitter.com/DVCorg) are always open, too.
diff --git a/content/blogs/2019-10-08-dvc-org-for-hacktoberfest-2019.md b/content/blogs/2019-10-08-dvc-org-for-hacktoberfest-2019.md
new file mode 100644
index 0000000000..ce3c36d241
--- /dev/null
+++ b/content/blogs/2019-10-08-dvc-org-for-hacktoberfest-2019.md
@@ -0,0 +1,114 @@
+---
+title: DVC.org for Hacktoberfest 2019
+date: 2019-10-08
+description: >
+  Our favorite month of the year Hacktoberfest is already in full swing and we
+  at DVC.org are so excited to be a part of it!
+descriptionLong: >
+  Our favorite month of the year
+  [Hacktoberfest](https://hacktoberfest.digitalocean.com/) is already in full
+  swing and we at [DVC.org](https://dvc.org) are so excited to be a part of it!
+picture: 2019-10-08/post-image.png
+author: svetlana_grinchenko
+commentsUrl: https://discuss.dvc.org/t/dvc-org-for-hacktoberfest-2019/286
+tags:
+  - Hacktoberfest
+  - Company
+---
+
+[Hacktoberfest](https://hacktoberfest.digitalocean.com/) is a monthly-long
+program that celebrates open source and encourages you to contribute to open
+source projects (and rewards you with stickers and a cool T-shirt!). Whether
+you’re a seasoned contributor or looking for projects to contribute to for the
+first time, you’re welcome to participate!
+
+It is the 6th season of Hacktoberfest and the 2d year of participating for
+DVC.org team. We really enjoyed it in 2018 and this year we are upping the game
+with our own cool stickers, special edition T-shirts and a
+[collection of carefully picked tickets](https://github.com/iterative/dvc/labels/hacktoberfest).
+
+### How to participate?
+
+If you haven’t started your Hacktoberfest challenge yet, it is just the right
+time, you have 3 weeks left to submit PRs and get your swag! Here are some
+important details:
+
+- Hacktoberfest is open to everyone in the global community.
+
+- You can sign up anytime between October 1 and October 31. Make sure to sign up
+  on the
+  [official Hacktoberfest website](https://hacktoberfest.digitalocean.com/) for
+  your PRs to count.
+
+- To get a shirt, you must make 4 legit pull requests (PRs) between October 1–31
+  in any time zone.
+
+- Pull requests can be made in any public GitHub-hosted repositories/projects,
+  not just the ones highlighted.
+
+And the special addition from DVC.org team:
+
+- Look through the list of
+  [DVC Hacktoberfest tickets](https://github.com/iterative/dvc/labels/hacktoberfest)
+  or the list of
+  [good DVC first issues](https://github.com/iterative/dvc/labels/good%20first%20issue).
+
+- Make a PR to DVC and get our stickers.
+
+- Close three issues for DVC and get a special DVC T-shirt.
+
+### Why contribute to DVC?
+
+[DVC](http://dvc.org) (Data Version Control) is a relatively young open source
+project. It was started in late 2017 by a data scientist and an engineer to fill
+in the gaps in the ML processes tooling. Nowadays DVC is growing pretty fast and
+though our in-house team is quite small, we have to thank our contributors (more
+than 100 in both code and docs) for developing DVC with us.
+
+DVC is participating in Hacktoberfest for 2 years in a row to bring more people
+into open source, to learn from them and to give back by sharing our own
+experience. This year we decided to focus on a single important topic for us —
+improving UI/UX.
+
+As our contributors and maintainers were sifting through the feature requests,
+bugs, and improvements to create a good
+[list of Hacktoberfest tickets](https://github.com/iterative/dvc/labels/hacktoberfest),
+we noticed that UI/UX label on Github is popping up again and again. DVC is a
+command line tool, and improving UI/UX in our case means making decisions on how
+to name command options, where and when to use
+[confirmation prompts](https://github.com/iterative/dvc/issues/2498) and/or
+where abort execution, what exactly user would expect to see in the output, how
+to test it later, etc.
+
+Why improving UI/UX appears to be so important for DVC at this stage? Perhaps
+because the project is more mature now and we are ready to spend more time on
+polishing it. Or maybe because it is still too-engineering focused and we used
+to disregard/de-prioritize all this ‘fancy’ stuff. Or it is because we just lack
+experience in creating good CLI UI/UX!
+
+One or another, those are great reasons to focus on improving UI (in a broader
+sense than just GUI), improving docs, creating powerful consistent experience
+for our users and increasing accessibility of DVC.
+
+That’s how
+[Heroku’s CLI style guide](https://devcenter.heroku.com/articles/cli-style-guide)
+starts:
+
+> Heroku CLI plugins should provide a clear user experience, targeted primarily
+> for human readability and usability, which delights the user, while at the
+> same time supporting advanced users and output formats. This article provides
+> a clear direction for designing delightful CLI plugins.
+
+At DVC we are building user experience in line with these principles too, but we
+also have our own challenges. And here we turn for help to the global open
+source community and all the contributors out there.
+
+For all of us who have a heart for open source — let’s discuss, contribute,
+learn, take the technologies forward and build something great together!
+
+Happy hacking!
+
+<hr />
+
+We are happy to hear from you [here](https://dvc.org/support). Our
+[DMs on Twitter](https://twitter.com/DVCorg) are always open, too!
diff --git a/content/blogs/2019-11-05-october-19-dvc-heartbeat.md b/content/blogs/2019-11-05-october-19-dvc-heartbeat.md
new file mode 100644
index 0000000000..e050f421c5
--- /dev/null
+++ b/content/blogs/2019-11-05-october-19-dvc-heartbeat.md
@@ -0,0 +1,270 @@
+---
+title: October ’19 DVC❤️Heartbeat
+date: 2019-11-05
+description: >
+  Every month we are sharing here our news, findings, interesting reads,
+  community takeaways, and everything along the way.
+descriptionLong: >
+  Every month we are sharing here our news, findings, interesting reads,
+  community takeaways, and everything along the way.
+
+  Some of those are related to our brainchild [DVC](https://dvc.org) and its
+  journey. The others are a collection of exciting stories and ideas centered
+  around ML best practices and workflow.
+picture: 2019-11-05/post-image.png
+author: svetlana_grinchenko
+commentsUrl: https://discuss.dvc.org/t/october-19-dvc-heartbeat/285
+tags:
+  - Meetup
+  - Heartbeat
+  - Hacktoberfest
+  - Community
+---
+
+## News and links
+
+Autumn is a great season for new beginnings and there is so much we love about
+it this year. Here are some of the highlights:
+
+- Co-hosting our
+  [first ever meetup](https://www.meetup.com/San-Francisco-Machine-Learning-Meetup/events/264846847/)!
+  Our [Dmitry Petrov](https://twitter.com/FullStackML) partnering with
+  [Dan Fischetti](https://www.linkedin.com/in/daniel-fischetti-4a6592bb/) from
+  [Standard Cognition](https://twitter.com/standardAI) to discuss Open-source
+  tools to version control Machine Learning models and experiments. The
+  recording is available now here.
+
+  https://youtu.be/RHQXK7EC0jI
+
+- [Getting ready for the Hacktoberfest](https://blog.dataversioncontrol.com/dvc-org-for-hacktoberfest-2019-ce5320151a0c)
+  and having the whole team get together to pick up and label nice issues and be
+  ready to support the contributors.
+
+- Discovering some really cool blogposts, talks and tutorials from our users all
+  over the world: check
+  [this blogpost in French](https://blog.octo.com/mise-en-application-de-dvc-sur-un-projet-de-machine-learning/)
+  or
+  [this tutorial in German](https://jupyter-tutorial.readthedocs.io/de/latest/productive/dvc/)!
+
+- Having a great time working with a
+  [tech writer](https://github.com/dashohoxha) brought to us by the
+  [Google Season of Docs](https://developers.google.com/season-of-docs) program.
+  Check out these
+  [interactive tutorials](https://dvc.org/doc/tutorials/interactive) we’ve
+  created together.
+
+- Having hot internal discussion about Discord vs Slack support/community
+  channels. If you are on the edge like us, have a look at
+  [this discussion](https://internals.rust-lang.org/t/exploring-new-communication-channels/7859)
+  in the Rust community, so helpful.
+
+- Seeing [Dmitry Petrov](https://twitter.com/FullStackML) being really happy one
+  day:
+
+  https://twitter.com/FullStackML/status/1169403554290814976
+
+<hr />
+
+We at [DVC.org](https://dvc.org) are so happy every time we discover an article
+featuring DVC or addressing one of the burning ML issues we are trying to solve.
+Here are some of the links that caught our eye past month:
+
+- **Continuous Delivery for Machine Learning by
+  [Danilo Sato](https://twitter.com/dtsato),
+  [Arif Wider](https://twitter.com/arifwider),
+  [Christoph Windheuser](https://twitter.com/intellification) and curated by
+  [Martin Fowler](https://martinfowler.com/).**
+
+> As Machine Learning techniques continue to evolve and perform more complex
+> tasks, so is evolving our knowledge of how to manage and deliver such
+> applications to production. By bringing and extending the principles and
+> practices from Continuous Delivery, we can better manage the risks of
+> releasing changes to Machine Learning applications in a safe and reliable way.
+
+<external-link
+href="https://martinfowler.com/articles/cd4ml.html"
+title="Continuous Delivery for Machine Learning"
+description="bio I am a consultant at ThoughtWorks Germany, where I am leading our data and machine learning activities. I enjoy…"
+link="martinfowler.com"
+image="../uploads/images/2019-11-05/continuous-delivery-for-machine-learning.png" />
+
+- **[The Path to Identity Validation](https://medium.com/signaturit-tech-blog/the-path-to-identity-validation-2-3-4f698b2ffae9)
+  by [Víctor Segura](https://medium.com/@victor.segura).**
+
+> So, the first question is clear: how to choose the optimal hardware for neural
+> networks? Secondly, assuming that we have the appropriate infrastructure, how
+> to build the machine learning ecosystem to train our models efficiently and
+> not die trying? At **Signaturit**, we have the solution ;)
+
+<external-link
+href="https://medium.com/signaturit-tech-blog/the-path-to-identity-validation-2-3-4f698b2ffae9"
+title="The Path to Identity Validation (2/3)"
+description="How to start your own machine learning project?"
+link="medium.com"
+image="../uploads/images/2019-11-05/the-path-to-identity-validation.jpeg" />
+
+- **Talk:
+  [Managing Big Data in Machine Learning projects](https://pretalx.com/pyconuk-2019/talk/GCLBFH/)
+  by [V Vishnu Anirudh](https://twitter.com/vvasworld) at the
+  [Pycon UK 2019.](https://2019.pyconuk.org/)**
+
+> My talk will focus on Version Control Systems (VCS) for big-data projects.
+> With the advent of Machine Learning (ML) , the development teams find it
+> increasingly difficult to manage and collaborate on projects that deal with
+> huge amounts of data and ML models apart from just source code.
+
+https://youtu.be/4XpHk85_x0E
+
+- **Podcast: TWIML Talk #295
+  [Managing Deep Learning Experiments](https://twimlai.com/twiml-talk-295-managing-deep-learning-experiments-with-lukas-biewald/)
+  with [Lukas Biewald](https://twitter.com/l2k)**
+
+> Seeing a need for reproducibility in deep learning experiments, Lukas founded
+> Weights & Biases. In this episode we discuss his experiment tracking tool, how
+> it works, the components that make it unique in the ML marketplace and the
+> open, collaborative culture that Lukas promotes. Listen to Lukas delve into
+> how he got his start in deep learning experiments, what his experiment
+> tracking used to look like, the current Weights & Biases business success
+> strategy, and what his team is working on today.
+
+<external-link
+href="https://twimlai.com/twiml-talk-295-managing-deep-learning-experiments-with-lukas-biewald/"
+title="Managing Deep Learning Experiments with Lukas Biewald — Talk #295"
+description="Today we are joined by Lukas Biewald, CEO and Co-Founder of Weights & Biases. Lukas, previously CEO and Founder of…"
+link="twimlai.com"
+image="../uploads/images/2019-11-05/managing-deep-learning-experiments.jpeg" />
+
+<hr />
+
+## Discord gems
+
+There are lots of hidden gems in our Discord community discussions. Sometimes
+they are scattered all over the channels and hard to track down.
+
+We are sifting through the issues and discussions and share with you the most
+interesting takeaways.
+
+### Q: I’ve just run a `dvc run` step, and realised I forgot to declare an output file. [Is there a way to add an output file without rerunning the (computationally expensive) step/stage?](https://discordapp.com/channels/485586884165107732/485596304961962003/593743448020877323)
+
+If you’ve already ran it, you could just open created DVC-file with an editor
+and add an entry to the outs field. After that, just run `dvc commit my.dvc` and
+it will save the checksums and data without re-running your command.
+`dvc run --no-exec` would also work with commit instead of modifying the
+DVC-file by hand.
+
+### Q: [For metric files do I have to use dvc run to set a metric or can I do it some other way?](https://discordapp.com/channels/485586884165107732/485596304961962003/593869598651318282) Can I use metrics functionality without the need to setup and manage DVC cache and remote storage?
+
+Any file that is under DVC control (e.g. added with `dvc add` or an output in
+`dvc run -o`) can be made a metric file with dvc metrics add file. Alternatively
+a command `dvc run -M` file makes file a metric without caching it. It means dvc
+metrics show can be used while file is still versioned by Git.
+
+### Q: [Is there a way not to add the full (Azure) connection string to the .dvc/config file that is being checked into Git for using dvc remotes](https://discordapp.com/channels/485586884165107732/485596304961962003/595586670498283520)? I think it’s quite unhealthy to have secrets checked in SCM.
+
+There are two options — use `AZURE_STORAGE_CONNECTION_STRING` environment
+variable or use `--local` flag that will put it into the `.dvc/config.local`
+that is added to the `.gitignore`, so you don’t track it with it and so won’t
+expose secrets.
+
+### Q: [I would like to know if it is possible to manage files under DVC whilst keeping them in their original locations (e.g. on a network drive in a given folder structure)](https://discordapp.com/channels/485586884165107732/485596304961962003/601068667131920385)? [If I want to add a large file to be tracked by DVC, and it is in a bucket on S3 or GCS, can I do that without downloading it locally?](https://discordapp.com/channels/485586884165107732/485596304961962003/615278138896941101)
+
+Yes, you are probably looking for external dependencies and outputs. This is the
+[link](https://dvc.org/doc/user-guide/managing-external-data) to the
+documentation to start.
+
+### Q: [How do I setup DVC so that NAS (e.g. Synology) acts as a shared DVC cache?](https://discordapp.com/channels/485586884165107732/485596304961962003/606388040377565215)
+
+Using NAS (e.g. NFS) is a very common scenario for DVC. In short you use
+`dvc cache dir` to setup a cache externally. Set cache type to use symlinks and
+enable protected mode. We are preparing a
+[document](https://github.com/iterative/dvc.org/blob/31c5d424c6530bb793af69c2af578d2b8a374d02/static/docs/use-cases/shared-storage-on-nfs.md)
+how to setup the NFS as a shared cache, but I think it can be applied to any
+NAS.
+
+### Q: So I have some data that is in the hundreds of gigs. [If I enable symlink, hardlink strategy and cache protecting, will DVC automatically choose this strategy over copying when trying to use dvc add](https://discordapp.com/channels/485586884165107732/485596304961962003/608013531010301952)?
+
+Yes, it will! Here is some clarification. So when you set those settings like
+that, `dvc add` data will move data to your cache and then will create a
+hardlink from your cache to your workspace.
+
+Unless your cache directory and your workspace are on different file systems,
+move should be instant. Please, find more information
+[here](https://dvc.org/doc/user-guide/large-dataset-optimization).
+
+### Q: My repo’s DVC is “busy and locked” and I’m not sure how it got that way and how to remove/diagnose the lock. [Any suggestions?](https://discordapp.com/channels/485586884165107732/485596304961962003/608392956679815168)
+
+DVC uses a lock file to prevent running two commands at the same time. The lock
+[file](https://dvc.org/doc/user-guide/dvc-internals) is under the `.dvc`
+directory. If no DVC commands running and you are still getting this error it’s
+safe to remove this file manually to resolve the issue.
+
+### Q: [I’m trying to understand how does DVC remote add work in case of a local folder and what is the best workflow when data is outside of your project root?](https://discordapp.com/channels/485586884165107732/485596304961962003/611209851757920266)
+
+When using DVC, in most cases we assume that your data will be somewhere under
+project root. There is an option to use so called
+[external dependencies](https://dvc.org/doc/user-guide/managing-external-data),
+which is data that is usually too big to be stored under your project root, but
+if you operate on data that is of some reasonable size, I would recommend
+starting with putting data somewhere under project root. Remotes are usually
+places where you store your data, but it is DVC task to move your data around.
+But if you want to keep your current setup where you will have data in different
+place than your project, you will need to refer to data with full paths. So, for
+example:
+
+1. You are in `/home/gabriel/myproject` and you have initialized dvc and git
+   repository
+
+2. You have `featurize.py` in your project dir, and want to use data to produce
+   some features and than `train.py` to train a model.
+
+3. Run the command:
+
+```dvc
+$ dvc run -d /research_data/myproject/videos \
+          -o /research_data/myproject/features \
+          python featurize.py
+```
+
+to tell DVC, that you use `/research_data/myproject/videos` to featurize, and
+produce output to your features dir. Note that your code should be aware of
+those paths, they can be hardcoded inside `featurize.py`, but point of `dvc run`
+is just to tell DVC what artifacts belong to currently defined step of ML
+pipeline.
+
+### Q: When I run `du` command to check how much space DVC project consumes I see that it duplicates/copies data. [It’s very space and time consuming to copy large data files, is there a way to avoid that?](https://discordapp.com/channels/485586884165107732/485596304961962003/613935477896249364) It takes too long to add large files to DVC.
+
+Yes! You don’t have to copy files with DVC. First of all, there are two reasons
+when du can show that it takes double the space to store data under DVC control.
+du can be inaccurate when the underlying file system supports reflinks (XFS on
+Linux, APFS on Mac, etc). This is actually the best scenario since no copying is
+happening and no changes are required to any DVC settings. Second, case means
+that copy semantics is used by default. It can be turned off by providing cache
+type `symlinks`, `hardlinks`. Please, read more on this
+[here](https://dvc.org/doc/user-guide/large-dataset-optimization#file-link-types-for-the-dvc-cache).
+
+### Q: [How can I detach a file from DVC control?](https://discordapp.com/channels/485586884165107732/485596304961962003/615479227189559323)
+
+Just removing the corresponding DVC-file and running `dvc gc` after that should
+be enough. It’ll stop tracking the data file and clean the local cache that
+might still contain it. Note! Don’t forget to run `dvc unprotect` if you use
+advanced[ DVC setup with symlinks and hardlinks](https://dvc.org/doc/user-guide/large-dataset-optimization)
+(`cache.type` config option is not default). If `dvc gc` behavior is not
+granular enough you can manually find the by its cache from the DVC-file in
+`.dvc/cache` and remote storage. Learn
+[here](https://dvc.org/doc/user-guide/dvc-internals#structure-of-cache-directory)
+how they are organized.
+
+### Q: [I’m trying to understand if DVC is an appropriate solution for storing data under GDPR requirements.](https://discordapp.com/channels/485586884165107732/485596304961962003/621057268145848340) That means that permanent deletion of files with sensitive data needs to be fully supported.
+
+Yes, in this sense DVC is not very different from using bare S3, SSH or any
+other storage where you can go and just delete data. DVC can give a bit of
+overhead to locate a specific file to delete, but otherwise it’s all the same
+you will be able to delete any file you want. See more details on how you
+retrospectively can edit directories under DVC control
+[here](https://discordapp.com/channels/485586884165107732/485596304961962003/621062105524862987).
+
+<hr />
+
+If you have any questions, concerns or ideas, let us know in the comments below
+or connect with DVC team [here](https://dvc.org/support). Our
+[DMs on Twitter](https://twitter.com/DVCorg) are always open, too.
diff --git a/content/blogs/2019-12-14-november-19-dvc-heartbeat.md b/content/blogs/2019-12-14-november-19-dvc-heartbeat.md
new file mode 100644
index 0000000000..75f19c0e45
--- /dev/null
+++ b/content/blogs/2019-12-14-november-19-dvc-heartbeat.md
@@ -0,0 +1,278 @@
+---
+title: November ’19 DVC❤️Heartbeat
+date: 2019-12-14
+description: >
+  Co-hosting our first ever meetup, sharing our Hacktoberfest experience, 4K ⭐,
+  fresh Discord gems and other news.
+descriptionLong: >
+  Every month we are sharing here our news, findings, interesting reads,
+  community takeaways, and everything along the way.
+
+  Some of those are related to our brainchild [DVC](https://dvc.org) and its
+  journey. The others are a collection of exciting stories and ideas centered
+  around ML best practices and workflow.
+picture: 2019-12-14/post-image.jpeg
+pictureComment:
+  How cool is this handmade swag from our community? We were in tears!
+author: svetlana_grinchenko
+commentsUrl: https://discuss.dvc.org/t/november-19-dvc-heartbeat/284
+tags:
+  - Meetup
+  - Heartbeat
+  - Hacktoberfest
+  - Community
+---
+
+The past few months have been so busy and full of great events! We love how
+involved our community is and can’t wait to share more with you:
+
+- We have organized our very first
+  [meetup](https://www.meetup.com/San-Francisco-Machine-Learning-Meetup/events/264846847/)!
+  So many great conversations, new use cases and insights! Many thanks to
+  [Dan Fischetti](https://www.linkedin.com/in/daniel-fischetti-4a6592bb/) from
+  [Standard Cognition](https://standard.ai/), who joined our Dmitry Petrov on
+  stage. Watch the recording here.
+
+  https://youtu.be/RHQXK7EC0jI
+
+- [Hacktoberfest](https://blog.dataversioncontrol.com/dvc-org-for-hacktoberfest-2019-ce5320151a0c)
+  was a great exercise for DVC team on many levels and we really enjoyed
+  supporting new contributors. Kudos to
+  [Nabanita Dash](https://twitter.com/explorer_07) for organizing a cool
+  DVC-themed hackathon!
+
+  https://twitter.com/psociiit/status/1185150096792535040
+
+- We’ve crossed 4k stars mark on [Github](https://github.com/iterative/dvc)!
+
+- DVC was participating in the
+  [Devsprints](https://twitter.com/FossMec/status/1192866498324254720) (Thank
+  you [Kurian Benoy](https://twitter.com/kurianbenoy2) for the intro!) and we
+  were happy to jump in and help with some mentoring.
+
+  https://twitter.com/FossMec/status/1192866498324254720
+
+![](../uploads/images/2019-12-14/devsprints.png)_Devsprints participants on our
+[Discord](http://dvc.org/chat) channel_
+
+- DVC became part of the default
+  [Homebrew formulae](https://formulae.brew.sh/formula/dvc)! So now you can
+  install it as easy as `brew install dvc`!
+
+- We helped 2 aspiring speakers deliver their very first conference talks.
+  [Kurian Benoy](https://twitter.com/kurianbenoy2/status/1183427495342694401?s=20)
+  was speaking at [PyconIndia](https://in.pycon.org/2019/) and
+  [Aman Sharma](https://www.linkedin.com/in/aman-sharma606/) was speaking at
+  [SciPyIndia](https://scipy.in/2019#speakers). **Supporting speakers is
+  something we are passionate about and if you ever wanted to give a talk on a
+  DVC-related topic — we are here to help, just
+  [let us know](https://dvc.org/support)!**
+
+  https://youtu.be/Ipzf6oQqQpo
+
+- Our own [Dmitry Petrov](https://twitter.com/FullStackML) went to Europe to
+  speak at the
+  [Open Source Summit Europe](https://osseu19.sched.com/speaker/dmitry35) in
+  Lyon, [Highload++](https://www.highload.ru/moscow/2019/abstracts/6032) in
+  Moscow and made a stop in in Berlin to co-host a
+  [meetup](https://www.meetup.com/codecentric-Berlin/events/265555810/) with our
+  favourite AI folks from [Codecentric](https://www.codecentric.de/)!
+
+<hr />
+
+Here are some of the great pieces of content around DVC and ML ops that we
+discovered in October and November:
+
+- **[Deploy Machine Learning Models with Django](https://www.deploymachinelearning.com/)
+  by Piotr Płoński.**
+
+> …building your ML system has a great advantage — it is tailored to your needs.
+> It has all features that are needed in your ML system and can be as complex as
+> you wish. This tutorial is for readers who are familiar with ML and would like
+> to learn how to build ML web services.
+
+<external-link 
+href="https://www.deploymachinelearning.com/" 
+title="Deploy Machine Learning Models with Django" 
+description="Version 1.0 (04/11/2019) Piotr Płoński The demand for Machine Learning (ML) applications is growing. Many resources…" 
+link="deploymachinelearning.com" 
+image="../uploads/images/2019-12-14/deploy-machine-learning-models.png" />
+
+- **[How to Manage Your Machine Learning Workflow with DVC, Weights & Biases, and Docker](https://towardsdatascience.com/how-to-manage-your-machine-learning-workflow-with-dvc-weights-biases-and-docker-5529ea4e59e0)
+  by [James Le](https://le-james94.medium.com).**
+
+> In this article, I want to show 3 powerful tools to simplify and scale up
+> machine learning development within an organization by making it easy to
+> track, reproduce, manage, and deploy models.
+
+<external-link
+href="https://towardsdatascience.com/how-to-manage-your-machine-learning-workflow-with-dvc-weights-biases-and-docker-5529ea4e59e0"
+title="How to Manage Your Machine Learning Workflow withDVC, Weights & Biases,
+and Docker" description="Managing a machine learning workflow is hard!"
+link="towardsdatascience.com"
+image="../uploads/images/2019-12-14/how-to-manage-your-machine-learning-workflow.jpeg" />
+
+- **[Creating a solid Data Science development environment](https://towardsdatascience.com/creating-a-solid-data-science-development-environment-60df14ce3a34)
+  by
+  [Gabriel dos Santos Goncalves](https://towardsdatascience.com/@gabrielsgoncalves)**
+
+> We do believe that Data Science is a field that can become even more mature by
+> using best practices in project development and that Conda, Git, DVC, and
+> JupyterLab are key components of this new approach
+
+<external-link
+href="https://towardsdatascience.com/creating-a-solid-data-science-development-environment-60df14ce3a34"
+title="Creating a solid Data Science development environment" 
+description="How to organize and replicate your development environment using Conda, Git, DVC, and JupyterLab."
+link="towardsdatascience.com"
+image="../uploads/images/2019-12-14/creating-solid-data-science-dev-env.png" />
+
+- **[Creating reproducible data science workflows with DVC](https://medium.com/y-data-stories/creating-reproducible-data-science-workflows-with-dvc-3bf058e9797b)
+  by [Gleb Ivashkevich](https://medium.com/@glib.ivashkevych).**
+
+> DVC is a powerful tool and we covered only the fundamentals of it.
+
+<external-link
+href="https://medium.com/y-data-stories/creating-reproducible-data-science-workflows-with-dvc-3bf058e9797b"
+title="Creating reproducible data science workflows with DVC"
+description="Getting started” tutorial into DVC to make a structure and order in your daily ML routine"
+link="medium.com"
+image="../uploads/images/2019-12-14/creating-reproducible-data-science-workflows.jpeg" />
+
+<hr />
+
+## Discord gems
+
+There are lots of hidden gems in our Discord community discussions. Sometimes
+they are scattered all over the channels and hard to track down.
+
+We are sifting through the issues and discussions and share with you the most
+interesting takeaways.
+
+### Q: When you do a `dvc import` you get the state of the data in the original repo at that moment in time from that repo, right? [The overall state of that repo (e.g. Git `commit id` (hash)) is not preserved upon import, right?](https://discordapp.com/channels/485586884165107732/563406153334128681/618744949277458462)
+
+On the contrary, DVC relies on Git `commit id` (hash) to determine the state of
+the data as well as code. Git `commit id` (hash) is saved in DVC file upon
+import, data itself is copied/downloaded into DVC repo cache but would not be
+pushed to the remote — DVC does not create duplicates. There is a command to
+advance/update it when it’s needed — `dvc update`. Git commit hash saved to
+provide reproducibility. Even if the source repo `HEAD` has changed your import
+stays the same until you run `dvc update` or redo `dvc import`.
+
+### Q: I’m trying to understand if DVC is an appropriate solution for storing data under GDPR requirements. [That means that permanent deletion of files with sensitive data needs to be fully supported.](https://discordapp.com/channels/485586884165107732/485596304961962003/621057268145848340)
+
+Yes, in this sense DVC is not very different from using bare S3, SSH or any
+other storage where you can go and just delete data. DVC can give a bit of
+overhead to locate a specific file to delete, but otherwise it’s all the same
+you will be able to delete any file you want. Read more details in
+[this discussion](https://discordapp.com/channels/485586884165107732/485596304961962003/621062105524862987).
+
+### Q: [Is there anyway to get the remote url for specific DVC-files?](https://discordapp.com/channels/485586884165107732/485596304961962003/621591769766821888) Say, I have a DVC-file `foo.png.dvc` — is there a command that will show the remote url, something like `dvc get-remote-url foo.png.dvc` which will return e.g. the Azure url to download.
+
+There is no special command for that, but if you are using Python, you could use
+our API specifically designed for that:
+
+```python
+from dvc.api import get_url
+
+url = get_url(path,
+              repo="https://github.com/user/proj",
+              rev="mybranch")
+```
+
+so, you could as well use this from CLI as a wrapper command.
+
+### Q: [Can DVC be integrated with MS Active Directory (AD) authentication for controlling access?](https://discordapp.com/channels/485586884165107732/563406153334128681/619244714071425035) The GDPR requirements would force me to use such a system to manage access.
+
+Short answer: no (as of the date of publishing this Heartbeat issue) Good news —
+it should be very easy to add, so we would welcome a contribution :) Azure has a
+connection argument for AD — quick googling shows this
+[library](https://github.com/AzureAD/azure-activedirectory-library-for-python),
+which is what probably needed.
+
+### Q: [How do I uninstall DVC from Mac installed as a package?](https://discordapp.com/channels/485586884165107732/485596304961962003/625124341201502209)
+
+When installing using `plain.pkg` it is a bit tricky to uninstall, so we usually
+recommend using things like brew cask instead if you really need the binary
+package. Try to run these commands:
+
+```dvc
+$ sudo rm -rf /usr/local/bin/dvc
+$ sudo rm -rf /usr/local/lib/dvc
+$ sudo pkgutil --forget com.iterative.dvc
+```
+
+to uninstall the package.
+
+### Q: We are using SSH remote to store data, but the problem is that everyone within the project has different username on the remote machine and thus we cannot set it in the config file (that is committed to Git). [Is there a way to add just host and path, without the username?](https://discordapp.com/channels/485586884165107732/563406153334128681/619420070111608848)
+
+Yes, you should use `--local` or `--global` config options to set user per
+project or per use machine without sharing (committing) them to Git:
+
+```dvc
+$ dvc remote modify myremote —local user myuser
+```
+
+or
+
+```dvc
+$ dvc remote modify myremote —global user myuser
+```
+
+### Q: [I still get the `SSL ERROR` when I try to perform a dvc push with or without `use_ssl = false`](https://discordapp.com/channels/485586884165107732/485596304961962003/628227197592797191)?
+
+A simple environment variable like this:
+
+```dvc
+$ export AWS_CA_BUNDLE=/path/to/cert/cert.crt dvc push
+```
+
+should do the trick for now, we plan to fix the ca_bundle option soon.
+
+### Q: I have just finished a lengthy `dvc repro` and I’m happy with the result. However, I realized that I didn’t specify a dependency which I needed (and obviously is used in the computation). [Can I somehow fix it?](https://discordapp.com/channels/485586884165107732/563406153334128681/620572187841265675)
+
+Add the dependency to the stage file without rerunning/reproducing the stage.
+This is not needed as this additional dependency hasn’t changed.
+
+You would need to edit the DVC-file. In the deps section add:
+
+```yaml
+-path: not/included/file/path
+```
+
+and run `dvc commit file.dvc` to save changes w/o running the pipeline again.
+See an example
+[here](https://discordapp.com/channels/485586884165107732/563406153334128681/620641530075414570).
+
+### Q: For some reason [we need to always specify the remote name when doing a `dvc push`](https://discordapp.com/channels/485586884165107732/485596304961962003/629704961868955648) e.g., `dvc push -r upstream` as opposed to `dvc push` (mind no additional arguments).
+
+You can mark a “default” remote:
+
+```dvc
+$ dvc remote add -d remote /path/to/my/main/remote
+```
+
+then, `dvc push` (and other commands like `dvc pull`) will know to push to the
+default
+
+### Q: [If I want stage B to run after stage A, but the stage A has no output, can I specify A’s DVC-file as B’s dependency?](https://discordapp.com/channels/485586884165107732/563406153334128681/620715145374466048)
+
+No, at least at the time of publishing this. You could use a phony output
+though. E.g. make the stage A output some dummy file and make B depend on it.
+Please, consider creating or upvoting a relevant issue on our Github if you’d
+this to be implemented.
+
+### Q: I’m just getting started with DVC, but I’d like to use it for multiple developers to access the data and share models and code. [I do own the server, but I’m not sure how to use DVC with SSH remote?](https://discordapp.com/channels/485586884165107732/563406153334128681/598867829785362452)
+
+Please, refer to
+[this answer](https://discuss.dvc.org/t/how-do-i-use-dvc-with-ssh-remote/279/2)
+on the DVC forum and check the documentation for the
+[`dvc remote add`](https://dvc.org/doc/command-reference/remote/add) and
+[`dvc remote modify`](https://dvc.org/doc/command-reference/remote/modify)
+commands to see more options and details.
+
+<hr />
+
+If you have any questions, concerns or ideas, let us know in the comments below
+or connect with DVC team [here](https://dvc.org/support). Our
+[DMs on Twitter](https://twitter.com/DVCorg) are always open, too.
diff --git a/content/blogs/2020-01-17-january-20-dvc-heartbeat.md b/content/blogs/2020-01-17-january-20-dvc-heartbeat.md
new file mode 100644
index 0000000000..ad74dba25f
--- /dev/null
+++ b/content/blogs/2020-01-17-january-20-dvc-heartbeat.md
@@ -0,0 +1,145 @@
+---
+title: January ’20 DVC❤️Heartbeat
+date: 2020-01-17
+description: >
+  Reaching 100 contributors, PyData LA, and more news from the DVC community.
+descriptionLong: >
+  Every month we share news, findings, interesting reads, community takeaways,
+  and everything else along the way. Some of those are related to our brainchild
+  [DVC](https://dvc.org) and its journey. The others are a collection of
+  exciting stories and ideas centered around ML best practices and workflow.
+picture: 2020-01-17/DVC_chalk_donuts.png
+pictureComment: We spread the joys of version control and donuts at PyData LA.
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/january-20-dvc-heartbeat/314
+tags:
+  - Heartbeat
+  - PyData
+---
+
+Welcome to the New Year! Time for a recap of the last few weeks of activity in
+the DVC community.
+
+## News
+
+We were honored to be named a [Project of the Year](https://ods.ai/awards/2019/)
+by Open Data Science, Russia's largest community of data scientists and machine
+learning practitioners. Check out our ⭐️incredibly shiny trophy⭐️!
+
+https://twitter.com/DVCorg/status/1209544709930016768
+
+DVC hit **100 individual contributors** on Github! To celebrate our
+100<sup>th</sup> contributor, [Vera Sativa](https://github.com/verasativa/), we
+sent her \$500 to use on any educational opportunity and her own DeeVee (that's
+our rainbow owl). We also awarded educational mini-grants to two of DVC's
+biggest contributors, [Vít Novotný](https://github.com/witiko), and
+[David Příhoda](https://twitter.com/david_prihoda).
+
+![](../uploads/images/2020-01-17/odd_with_deevee.png)_Vera (center, flashing a
+peace sign) thanked us with this lovely picture of DeeVee and her team,
+[Odd Industries](https://odd.co). They are making some extremely neat tools for
+construction teams using computer vision._
+
+**We were at PyData LA!** Our fearless leader
+[Dmitry gave a talk](https://www.youtube.com/watch?v=7Wsd6V0k4Oc) and we set up
+a busy booth to meet with the Pythonistas of Los Angeles. It was a cold and
+blustery day, but visitors kept showing up to our semi-outdoor booth. We're sure
+they came for the open source version control and not the donuts.
+
+![](../uploads/images/2020-01-17/py_data1.jpeg)
+![](../uploads/images/2020-01-17/py_data2.jpeg) _The DVC team and PyData
+volunteers who heroically staffed our booth in the rain._
+
+Our engineer and technical writer Jorge reported:
+
+> We were super happy to meet all kinds of data professionals and enthusiasts in
+> several fields who are learning and adopting DVC with their teams – including
+> several working with privacy-sensitive medical records, very cool!
+
+<hr>
+
+## From the community
+
+Here are some rumblings from the machine learning (ML) and data science
+community that got us talking.
+
+**A machine learning software wishlist.** Computer scientist and writer
+[Chip Huyen](https://twitter.com/chipro) tweeted about her ML software wishlist
+and kicked off a big community discussion.
+
+https://twitter.com/chipro/status/1202815757593108480
+
+Her tweet resonated with a lot of practitioners, who were eager to discuss the
+solutions they'd tried. Among the many thoughtful replies and recommendations,
+we were thrilled to see DVC mentioned.
+
+https://twitter.com/kristijan_ivanc/status/1202879739716870144
+
+If you haven't already, definitely check out Chip's
+[thread](https://twitter.com/chipro/status/1202815757593108480), and follow her
+on Twitter for more excllent, accessible content about ML engineering. We're
+thinking hard about these ideas and hope the discussion continues on- and
+offline.
+
+**A gentle intro to DVC for data scientists.** Scientist
+[Elle O'Brien](https://twitter.com/andronovhopf) published a code walkthrough
+about using DVC to make an image classification project more reproducible.
+Specifically, the blog is a case study about version control when a dataset
+grows over time. If you're looking for a DVC tutorial geared for data
+scientists, this might be up your alley.
+
+<external-link
+href="https://towardsdatascience.com/start-version-controlling-your-machine-learning-datasets-2b872e109856"
+title="Start Version Controlling your Machine Learning Datasets"
+description="Make your machine learning and data science projects reproducible with open source tools."
+link="medium.com"
+image="../uploads/images/2020-01-17/medium_1.png" />
+
+**Ideas for data scientists to level up their code** Machine learning engineer
+Andrew Greatorex posted a blog called “Down with technical debt! Clean Python
+for data scientists.” Andrew highlights something we can easily relate to: the
+“science” part of data science, which encourages experimentation and
+flexibility, sometimes means less emphasis on readable, shareable code. Andrew
+writes:
+
+> "I’m hoping to shed light on some of the ways that more fledgling data
+> scientists can write cleaner Python code and better structure small scale
+> projects, with the important side effect of reducing the amount of technical
+> debt you inadvertently burden on yourself and your team.”
+
+In this blog, DVC gets a shout-out as Andrew’s preferred data versioning tool,
+used in conjunction with Git for versioning Python code. Thanks!
+
+<external-link
+href="https://towardsdatascience.com/down-with-technical-debt-clean-python-for-data-scientists-aa7592eff7fc"
+title="Down with technical debt! Clean Python for data scientists."
+description=""
+link="medium.com"
+image="../uploads/images/2020-01-17/medium_2.png" />
+
+**An introduction to MLOps** Engineer
+[Sharif Elfouly](https://twitter.com/elfouly_sharif) wrote an approachable guide
+to thinking about MLOps, the growing field around making ML projects run
+efficiently from experimentation to production. He summarises why managing ML
+projects can be fundamentally different than traditional software development:
+
+> “The main difference between traditional software and ML is that you don’t
+> only have the code. You also have data, models, and experiments. Writing
+> traditional software is relatively straightforward but in ML you need to try
+> out a lot of different things to find the best and fastest model for your
+> use-case. You have a lot of different model types to choose from and every
+> single one of them has its specific hyperparameters. Even if you work alone
+> this can get out of hand pretty quickly.”
+
+Sharif gives some recommendations for tools that work especially well for ML,
+and he writes that DVC is the “perfect combination for versioning your code and
+data.” Thanks, Sharif! We think you’re perfect, too.
+
+<external-link
+href="https://towardsdatascience.com/down-with-technical-debt-clean-python-for-data-scientists-aa7592eff7fc"
+title="MLOps Done Right"
+description="What is MLOps? Why is it so important? How to do it right!"
+link="medium.com"
+image="../uploads/images/2020-01-17/medium_3.png" />
+
+That's a wrap for January. We'll see you next month with more updates!
diff --git a/content/blogs/2020-01-20-january-20-community-gems.md b/content/blogs/2020-01-20-january-20-community-gems.md
new file mode 100644
index 0000000000..cab81a49d4
--- /dev/null
+++ b/content/blogs/2020-01-20-january-20-community-gems.md
@@ -0,0 +1,150 @@
+---
+title: January '20 Community Gems
+date: 2020-01-20
+description: >
+  Great discussions and technical Q&A's from our users.
+descriptionLong: >
+  Every month we share news, findings, interesting reads, community takeaways,
+  and everything else along the way. Some of those are related to our brainchild
+  [DVC](https://dvc.org) and its journey. The others are a collection of
+  exciting stories and ideas centered around ML best practices and workflow.
+picture: 2020-01-20/Community_Gems.png
+pictureComment:
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/january-20-community-gems/315
+tags:
+  - Community Gems
+---
+
+## Discord gems
+
+There's a lot of action in our Discord channel these days. Ruslan, DVC's core
+maintainer, said it best with a gif.
+
+https://twitter.com/rkuprieiev/status/1144008869414342658?ref_src=twsrc%5Etfw
+
+It's a lot to keep up with, so here are some highlights. We think these are
+useful, good-to-know, and interesting conversations between DVC developers and
+users.
+
+### Q: [What pros does DVC have compared to Git LFS?](https://discordapp.com/channels/485586884165107732/563406153334128681/657590900754612284)
+
+For an in-depth answer, check out this
+[Stack Overflow discussion](https://stackoverflow.com/questions/58541260/difference-between-git-lfs-and-dvc).
+But in brief, with DVC you don't need a special server, and you can use nearly
+any kind of storage (S3, Google Cloud Storage, Azure Blobs, your own server,
+etc.) without a fuss. There are also no limits on the size of the data that you
+can store, unlike with GitHub. With Git LFS, there are some general LFS server
+limits, too. DVC has additional features for sharing your data (e.g.,
+`dvc import`) and has pipeline support, so it does much more than LFS. Plus, we
+have flexible and quick checkouts, as we utilize different link types (reflinks,
+symlinks, and hardlinks). We think there are lots of advantages; of course, the
+usefulness will depend on your particular needs.
+
+### Q: [How do I use DVC with SSH remote storage?](https://discordapp.com/channels/485586884165107732/563406153334128681/656016145119182849) I usually connect with a .pem key file. How do I do the same with DVC?
+
+DVC is built to work with the SSH protocol to access remote storage (we provide
+some
+[examples in our official documentation](https://dvc.org/doc/user-guide/external-dependencies#ssh)).
+When SSH requires a key file, try this:
+
+```dvc
+$ dvc remote modify myremote keyfile <path to *.pem>
+```
+
+### Q: [If you train a TensorFlow model that creates multiple checkpoint files, how do you establish them as dependencies in the DVC pipeline?](https://discordapp.com/channels/485586884165107732/563406153334128681/651098762466426891)
+
+You can specify a directory as a dependency/output in your DVC pipeline, and
+store checkpointed models in that directory. It might look like this:
+
+```dvc
+$ dvc run \
+     -f train.dvc \
+     -d data \
+     -d train.py \
+     -o models python code/train.py
+```
+
+where `models` is a directory created for checkpoint files. If you would like to
+preserve your models in the data directory, though, then you would need to
+specify them one by one. You can do this with bash:
+
+```dvc
+$ dvc run $(for file in data/*.gz; do echo -n -d $file; done)
+```
+
+Be careful, though: if you declare checkpoint files to be an output of the DVC
+pipeline, you won’t be able to re-run the pipeline using those checkpoint files
+to initialize weights for model training. This would introduce circularity, as
+your output would become your input.
+
+Also keep in mind that whenever you re-run a pipeline with `dvc repro`, outputs
+are deleted and then regenerated. If you don't wish to automatically delete
+outputs, there is a `--persist` flag (see discussion
+[here](https://github.com/iterative/dvc/issues/1214) and
+[here](https://github.com/iterative/dvc/issues/1884)), although we don't
+currently provide technical support for it.
+
+Finally, remember that setting something as a dependency (`-d`) doesn't mean it
+is automatically tracked by DVC. So remember to `dvc add` data files in the
+beginning!
+
+### Q: [Is it possible to use the same cache directory for multiple DVC repos that are used in parallel?](https://discordapp.com/channels/485586884165107732/485596304961962003/655012135973158942) Or do I need external software to prevent potential race conditions?
+
+This is absolutely possible, and you don't need any external software to safely
+use multiple DVC repos in parallel. With DVC, cache operations are atomic. The
+only exception is cleaning the cache with `dvc gc`, which you should only run
+when no one else is working on a shared project that is referenced in your cache
+(and also, be sure to use the `--projects` flag
+[as described in our docs](https://dvc.org/doc/command-reference/gc)). For more
+about using multiple DVC repos in parallel, check out some discussions
+[here](https://discuss.dvc.org/t/setup-dvc-to-work-with-shared-data-on-nas-server/180)
+and
+[here](https://dvc.org/doc/use-cases/fast-data-caching-hub#example-shared-development-server).
+
+### Q: [What are some strategies for reproducibility if parts of our model training pipeline are run on our organizations's HPC?](https://discordapp.com/channels/485586884165107732/485596304961962003/652380507832844328)
+
+Using DVC for version control is entirely compatible with using remote computing
+resources, like high performance computing (HPC), in your model training
+pipeline. We think a great example of using DVC with parallel computing is
+provided by [Peter Fogh](http://www.peterfogh.dk/) Take a
+[look at his repo](https://github.com/PeterFogh/dvc_dask_use_case) for a
+detailed use case. Please keep us posted about how HPC works in your pipeline,
+as we'll be eager to pass on any insights to the community.
+
+### Q: Say I have a Git repository with multiple projets inside (one classification, one object detection, etc.). [Is it possible to tell DVC to just pull data for one particular project?](https://discordapp.com/channels/485586884165107732/563406153334128681/646760832616890408)
+
+Absolutely, DVC supports pulling data from different DVC files. An example would
+be having two project subdirectories in your Git repo, `classification` and
+`detection`. You could use `dvc pull -R classification` to only pull files in
+that project to your workspace.
+
+If you prefer to be even more granular, you can `dvc add` files individually.
+Then you can use `dvc pull <filename>.dvc` to retrieve the outputs specified
+only by that file.
+
+### Q: [Is it possible to set an S3 remote without the use of AWS credentials with DVC?](https://discordapp.com/channels/485586884165107732/563406153334128681/623234659098296348) I want to publicly host a dataset so that everybody who clones my code repo can just run `dvc pull` to fetch the dataset.
+
+Yes, and we love the idea of publicly hosting a dataset. There are a few ways to
+do it with DVC. We use one method in our own DVC project repository on Github.
+If you run `git clone https://github.com/iterative/dvc` and then `dvc pull`,
+you’ll see that DVC is downloading data from an HTTP repository, which is
+actually just an S3 repository that we've granted public HTTP read-access to.
+
+So you would need to configure two remotes in your config file, each pointing to
+the same S3 bucket through different protocols. Like this:
+
+```dvc
+$ dvc remote add -d --local myremote s3://bucket/path
+$ dvc remote add -d mypublicemote http://s3-external-1.amazonaws.com/bucket/path
+```
+
+Here's why this works: the `-d` flag sets the default remote, and the `--local`
+flag creates a set of configuration preferences that will override the global
+settings when DVC commands are run locally and won't be shared through Git (you
+can read more about this
+[in our docs](https://dvc.org/doc/command-reference/remote/add)).
+
+This means that even though you and users from the public are accessing the
+stored dataset by different protocols (S3 and HTTPS), you'll all run the same
+command: `dvc pull`.
diff --git a/content/blogs/2020-02-04-gsoc-ideas-2020.md b/content/blogs/2020-02-04-gsoc-ideas-2020.md
new file mode 100644
index 0000000000..671b2e72a8
--- /dev/null
+++ b/content/blogs/2020-02-04-gsoc-ideas-2020.md
@@ -0,0 +1,130 @@
+---
+title: Join DVC for Google Summer of Code 2020
+date: 2020-02-04
+description: >
+  A call for student applications for Google Summer of Code 2020.
+descriptionLong: >
+  DVC is looking for students to take part in [Google Summer of Code
+  2020](https://summerofcode.withgoogle.com/).
+picture: 2020-02-04/Summer_of_Code_small.png
+pictureComment:
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/join-dvc-for-google-summer-of-code/317
+tags:
+  - Google Summer of Code
+  - Students
+  - Mentoring
+  - Company
+---
+
+Announcement, announcement! After a successful experience with
+[Google Season of Docs](https://developers.google.com/season-of-docs) in 2019,
+we're putting out a call for students to apply to work with DVC as part of
+[Google Summer of Code](https://summerofcode.withgoogle.com/). If you want to
+make a dent in open source software development with mentorship from our team,
+read on.
+
+## Prerequisites to apply
+
+Besides the general requirements to apply to Google Summer of Code, there are a
+few skills we look for in applicants.
+
+1. **Python experience.** All of our core development is done in Python, so we
+   prefer candidates that are experienced in Python. However, we will consider
+   applicants who are very strong in another language and familiar with Python
+   basics.
+2. **Git experience.** Git is also a key part of DVC development, as DVC is
+   built around Git; that said, for certain projects (rated as “Beginner”) a
+   surface-level knowledge of Git will be sufficient.
+3. **People skills.** Beyond technical fundamentals, we put a high value on
+   communication skills: the ability to report and document your experiments and
+   findings, to work kindly with teammates, and explain your goals and work
+   clearly.
+
+If you like our mission but aren't sure if you're sufficiently prepared, please
+be in touch anyway. We'd love to hear from you.
+
+## Project ideas
+
+Below are several project ideas that are an immediate priority for the core DVC
+team. Of course,we welcome students to create their own proposals, even if they
+differ from our ideas. Projects will be primarily mentored by co-founders
+[Dmitry Petrov](https://github.com/dmpetrov) and
+[Ivan Shcheklein](https://github.com/shcheklein).
+
+1. **Migrate to the latest v3 API to improve Google Drive support.** Our
+   organization is a co-maintainer of the PyDrive library in collaboration with
+   a team at Google. The PyDrive library is now several years old and still
+   relies on the v2 protocol. We would like to migrate to v3, which we expect
+   will boost performance for many DVC use cases (e.g. the ability to filter
+   fields being retrieved from our API, etc). For this project, we’re looking
+   for a student to work with us to prepare the next major version of the
+   PyDrive library, as well as making important changes to the core DVC code to
+   support it. Because PyDrive is broadly used outside of DVC, this project is a
+   chance to work on a library of widespread interest to the Python community.
+   <br /> <br /> _Skills required:_ Python, Git, experience with APIs <br />
+   _Difficulty rating:_ Beginner-Medium <br />
+
+2. **Introducing parallelism to DVC.** One of DVC’s features is the ability to
+   create pipelines, linking data repositories with code to process data, train
+   models, and evaluate model metrics. Once a DVC pipeline is created, the
+   pipeline can be shared and re-run in a systematic and entirely reproducible
+   way. Currently, DVC executes pipelines sequentially, even though some steps
+   may be run in parallel (such as data preprocessing). We would like to support
+   parallelization for pipeline steps specified by the user. Furthermore, we’ll
+   need to support building flags into DVC commands that specify the level of
+   parallelization (CPU, GPU or memory). <br /> <br /> _Skills required:_
+   Python, Git. Some experience with parallelization and/or scientific computing
+   would be helpful but not required. <br /> _Difficulty rating:_ Advanced
+   <br />
+
+3. **Developing use cases for data registries and ML model zoos.** A new DVC
+   functionality that we’re particularly excited about is `summon`, a method
+   that can turn remotely-hosted machine learning artifacts such as datasets,
+   trained models, and more into objects in the user’s local environment (such
+   as a Jupyter notebook). This is a foundation for creating data catalogs of
+   data-frames and machine learning model zoos on top of Git repositories and
+   cloud storages (like GCS or S3). We need to identify and implement model zoos
+   (think PyTorch Hub, the Caffe Model Zoo, or the TensorFlow DeepLab Model Zoo)
+   and data registries for types that are not supported by DVC yet. Currently,
+   we’ve tested `summon` with PyTorch image segmentation models and Pandas
+   dataframes. We’re looking for students to explore other possible use cases.
+   <br /> <br /> _Skills required:_ Python, Git, and some machine learning or
+   data science experience <br /> _Difficulty rating:_ Beginner-Medium <br />
+
+4. **Continuous delivery for JetBrains TeamCity.** Continuous integration and
+   continuous delivery (CI/CD) for ML projects is an area where we see
+   [DVC make a big impact](https://martinfowler.com/articles/cd4ml.html)-
+   specifically, by delivering datasets and ML models into CI/CD pipelines.
+   While there are many cases when DVC is used inside GitHub Actions and GitLab
+   CI, you will be transferring this experience to another type of CI/CD system,
+   [JetBrains TeamCity](https://www.jetbrains.com/teamcity/). We're working to
+   integrate DVC's model and dataset versioning into TeamCity's CI/CD toolkit.
+   This project would be ideal for a student looking to explore the growing
+   field of MLOps, an offshoot of DevOps with the specifics of ML projects at
+   the center. <br /> <br /> _Skills required:_ Python, Git, bash scripting. It
+   would be nice, but not necessary, to have some experience with CI/CD tools
+   and developer workflow automation. <br /> _Difficulty rating:_
+   Medium-Advanced <br />
+
+5. **DVC performance testing framework.** Performance is a core value of DVC. We
+   will be creating a performance monitoring and testing framework where new
+   scenarios (e.g., unit testing)can be populated. The framework should reflect
+   all performance improvements and degradations for each of the DVC releases.
+   It would be especially compelling if testing could be integrated with our
+   GitHub workflow (CI/CD). This is a great opportunity for a student to learn
+   about DVC and versioning in-depth and contribute to its stability. <br />
+   <br /> _Skills required:_ Python, Git, bash scripting. <br /> _Difficulty
+   rating:_ Medium-Advanced <br />
+
+## If you'd like to apply
+
+Please refer to the
+[Google Summer of Code](https://summerofcode.withgoogle.com/) application guides
+for specifics of the program. Students looking to know more about DVC, and our
+worldwide community of contributors, will learn most by visiting our
+[Discord channel](https://dvc.org/chat),
+[GitHub repository](https://github.com/iterative/dvc), and
+[Forum](https://discuss.dvc.org/). We are available to discuss project proposals
+from interested students and can be reached by [email](mailto:support@dvc.org)
+or on our Discord channel.
diff --git a/content/blogs/2020-02-10-february-20-dvc-heartbeat.md b/content/blogs/2020-02-10-february-20-dvc-heartbeat.md
new file mode 100644
index 0000000000..520c91d271
--- /dev/null
+++ b/content/blogs/2020-02-10-february-20-dvc-heartbeat.md
@@ -0,0 +1,146 @@
+---
+title: February ’20 DVC❤️Heartbeat
+date: 2020-02-10
+description: >
+  DVC talks around the world, new team members, and full-stack machine learning.
+descriptionLong: >
+  Every month we share news, findings, interesting reads, community takeaways,
+  and everything else along the way.
+
+  Look here for updates about [DVC](https://dvc.org), our journey as a startup,
+  projects by our users and big ideas about best practices in ML and data
+  science.
+picture: 2020-02-10/heartbeat_black.png
+pictureComment:
+  Just in time for Valentine's day, here's a seasonally-relevant DVC pipeline.
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/dvc-heartbeat-feburary-20/318
+tags:
+  - Heartbeat
+  - CI/CD
+---
+
+Welcome to the February Heartbeat! This month's featured image is a DVC pipeline
+[created by one of our users](https://medium.com/nlp-trend-and-review-en/use-dvc-to-version-control-ml-dl-models-bef61dbfe477),
+which _we_ think resembles a valentine. Here are some more highlights from our
+team and our community:
+
+## News
+
+**Our team is growing!** In early January, DVC gained two new folks: engineer
+[Saugat Pachhai](https://github.com/skshetry) and data scientist
+[Elle O'Brien](https://twitter.com/andronovhopf). Saugat, based in Nepal, will
+be contributing to core DVC. Elle (that's me!), currently in San Francisco, will
+be leading data science projects and outreach with DVC.
+
+We're **gearing up for a spring full of talks** about DVC projects, including
+new up-and-coming features for data cataloging and continuous integration. Here
+are just a few events that have been added to our schedule:
+
+<external-link
+href="https://www.mlprague.com/#schedule-saturday"
+title="Machine Learning Prague - March 19"
+description="DVC engineer Pawel Redzynski will talk about open source tools for versioning machine learning projects."
+link="mlprague.com"
+image="../uploads/images/2020-02-10/mlprague.jpg" />
+
+<external-link
+href="https://www.mlprague.com/#schedule-saturday"
+title="DivOps 2020 - March 24"
+description="Elle O'Brien is talking about open source software in the growing field of MLOps at this international, remote conference."
+link="https://divops.org/"
+image="../uploads/images/2020-02-10/divops_logo.png" />
+
+<external-link
+href="https://www.mlprague.com/#schedule-saturday"
+title="Women in Data Science San Diego - May 9"
+description="Elle O'Brien will be delivering a keynote talk about data catalogs and feature stores."
+link="https://www.widsconference.org/"
+image="../uploads/images/2020-02-10/wids.jpeg" />
+
+-Elle O'Brien was recently accepted to give a keynote at
+[Women in Data Science](https://www.widsconference.org/) San Diego on May 9. The
+talk is called "Packaging data and machine learning models for sharing."
+
+-Elle will also be speaking at [Div Ops](https://divops.org/), a new online
+conference about (you guessed it) DevOps, on March 27.
+
+Look out for more conference announcements soon- in our **brand new community
+page!** We've [just launched a new hub](https://dvc.org/community) for sharing
+events, goings-ons, and ways to contribute to DVC.
+
+## From the community
+
+Our users continue to put awesome things on the internet. Like this AI blogger
+who isn't afraid to wear his heart on his sleeve.
+
+<external-link
+href="https://medium.com/@matlihan/my-favorite-data-science-tool-is-dvc-data-version-control-e6ab8aed24d2"
+title="My favorite data science tool is DVC - Data Version Control"
+description="by Musa Atlıhan"
+link="medium.com"
+image="../uploads/images/2020-02-10/musa_atlihan.jpeg" />
+
+Musa Atlihan writes:
+
+> From my experience, whether it is a real-world data science project or it is a
+> data science competition, there are two major key components for success.
+> Those components are API simplicity and reproducible pipelines. Since data
+> science means experimenting a lot in a limited time frame, first, we need
+> machine learning tools with simplicity and second, we need
+> reliable/reproducible machine learning pipelines. Thanks to tools like Keras,
+> LightGBM, and fastai we already have simple yet powerful tools for rapid model
+> development. And thanks to DVC, we are building large projects with
+> reproducible pipelines very easily.
+
+It's cool how Musa puts DVC in context with libraries for model building. In a
+way, the libraries that have made it easier than ever to iterate through
+different model architectures have increased the need for reproducibility in
+proportion.
+
+Meanwhile in Germany, superusers Marcel Mikl and Bert Besser wrote
+[another](https://blog.codecentric.de/en/2019/03/walkthrough-dvc/) seriously
+comprehensive article about DVC for Codecentric. Marcel and Bert walk readers
+through the steps to **build a custom machine learning training pipeline with
+remote computing resources** like GCP and AWS. It's an excellent guide to
+configuring model training with attention to _automation_ and _collaboration_.
+We give them 🦉🦉🦉🦉🦉 out of 5.
+
+<external-link
+href="https://blog.codecentric.de/en/2020/01/remote-training-gitlab-ci-dvc/"
+title="Remote training with GitLab-CI and DVC"
+description="by Marcel Mikl and Bert Besser"
+link="blog.codecentric.de"
+image="../uploads/images/2020-02-10/marcel.png" />
+
+Here are a few more stories on our radar:
+
+- **AI Singapore shares their method for AI development and deployment.** This
+  ..
+  [blog about how Agile informs their processes](https://makerspace.aisingapore.org/2020/01/agile-ai-engineering-in-aisg/)
+  for continuous integration and delivery includes data versioning.
+
+- **Toucan AI dispenses advice for ML engineers.** This ..
+  [blog for practitioners](https://toucanai.com/blog/post/building-production-ml/)
+  discusses questions like, "When to work on ML vs. the processes that surround
+  ML". It covers how DVC is used for model versioning in the exploration stage
+  of ML.
+
+- **DVC at the University.** A recent ..
+  [pre-print from natural language processing researchers at Université Laval](https://arxiv.org/pdf/1912.01706.pdf)
+  explains how DVC facilitated dataset access for collaborators.
+
+  > "In our case, the original dataset takes up to 6 Gigabytes. The previous way
+  > of retrieving the dataset over the network with a standard 20 Mbits/sec
+  > internet connexion took up to an hour to complete (including uncompressing
+  > the data). Using DVC reduced the retrieval time of the dataset to 3 minutes
+  > over the network with the same internet connexion."
+
+  Thanks for sharing- this is a lovely result. Oh, and last...
+
+- **DVC is a job requirement**! We celebrated a small milestone when we stumbled
+  .. across a listing for a data engineer to support R&D at
+  [Elvie](https://www.elvie.com/en-us/), a maker of tech for women's health
+  (pretty neat mission). The decorations on the job posting are ours 😎
+
+![](../uploads/images/2020-02-10/elvie.png)_A job advertisement featuring DVC._
diff --git a/content/blogs/2020-02-17-a-public-reddit-dataset.md b/content/blogs/2020-02-17-a-public-reddit-dataset.md
new file mode 100644
index 0000000000..ff161e825e
--- /dev/null
+++ b/content/blogs/2020-02-17-a-public-reddit-dataset.md
@@ -0,0 +1,327 @@
+---
+title:
+  AITA for making this? A public dataset of Reddit posts about moral dilemmas
+date: 2020-02-17
+description: >
+  Releasing an open natural language dataset based on r/AmItheAsshole.
+descriptionLong: >
+  Delve into an open natural language dataset of posts about moral dilemmas from
+  [r/AmItheAsshole](https://www.reddit.com/r/AmItheAsshole/). Use this dataset
+  for whatever you want- here's how to get it and start playing.
+picture: 2020-02-17/post_header_gmoji.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/aita-for-making-this-a-public-dataset-of-reddit-posts-about-moral-dilemmas/323
+tags:
+  - Project
+  - Data
+  - Reddit
+  - Tutorial
+---
+
+In data science, we frequently deal with classification problems like, _is this
+[Yelp reviewer unhappy](https://www.ics.uci.edu/~vpsaini/) with their brunch? Is
+[this email](https://archive.ics.uci.edu/ml/datasets/spambase) begging me to
+claim my long-lost inheritance spam? Does this
+[movie critic](http://ai.stanford.edu/~amaas/data/sentiment/) have a positive
+opinion of Cats?_
+
+Perhaps we should also consider the fundamental introspective matter of, _am I
+maybe being a bit of an asshole?_
+
+I want to share a dataset of collected moral dilemmas shared on Reddit, as well
+as the judgments handed down by a jury of Redditors. The wellspring of this data
+is the [r/AmItheAsshole](https://www.reddit.com/r/AmItheAsshole/) subreddit, one
+of the natural wonders of the digital world. In this article, I'll show you
+what's in the dataset, how to get it, and some things you can do to move the
+frontiers of Asshole research forward.
+
+## What makes an Asshole?
+
+r/AmItheAsshole is a semi-structured online forum that’s the internet’s closest
+approximation of a judicial system. In this corner of the web, citizens post
+situations from their lives and Redditors vote to decide if the writer has acted
+as The Asshole or not. For example:
+
+![](../uploads/images/2020-02-17/aita_sample.png)
+
+Without bringing any code into the picture, it’s intuitive to think of each new
+post as a classification task for the subreddit. Formally, we could think of the
+subreddit as executing a function _f_ such that
+
+![](../uploads/images/2020-02-17/aita_formula.png '=500')
+
+Of course, finding f won’t be trivial. To be frank, I’m not positive how well we
+could hope to forecast the rulings of the subreddit. A lot of posts are not easy
+for me to decide- like,
+
+![](../uploads/images/2020-02-17/aita_llama.png)
+
+There are also many times I find myself disagreeing with the subreddit’s
+verdict. All this is to say, I don’t think it’s obvious how well a given human
+would do on the task of predicting whether Redditors find someone an Asshole.
+Nor is it clear how well we could ever hope for a machine to do approximating
+their judgment.
+
+It seems fun to try, though. It helps that the data is plentiful: because the
+subreddit is popular and well-moderated, there’s an especially strong volume of
+high-quality content (re: on-topic and appropriately formatted) being posted
+daily.
+
+## Building the dataset
+
+I pulled content from r/AmITheAsshole dating from the first post in 2012 to
+January 1, 2020 using the [pushshift.io](https://pushshift.io/) API to get post
+ids and
+[scores](https://www.reddit.com/wiki/faq#wiki_how_is_a_submission.27s_score_determined.3F),
+followed by Reddit’s API ([praw](https://praw.readthedocs.io/en/latest/)) to get
+post content and meta-data. Using a
+[similar standard as OpenAI](https://openai.com/blog/better-language-models/)
+for trawling Reddit, I collected text from posts with scores of 3 or more only
+for quality control. This cut the number of posts from ~355K to ~111K. Each data
+point contains an official id code, timestamp, post title, post text, verdict,
+score, and comment count; usernames are not included. The scraping and cleaning
+code is available
+[in the project GitHub repo](https://github.com/iterative/aita_dataset). For
+simplicity on the first iteration of this problem, I didn’t scrape post
+comments, which can number in the thousands for popular posts. But, should
+sufficient interest arise, I’d consider adding them to the dataset in some form.
+
+To focus on the task of classifying posts, I did some light cleaning: I removed
+posts in which the body of the text was redacted (surprisingly common) or blank,
+and attempted to remove edits where the author had clearly given away the
+verdict (e.g., an edit that says, “Update: You’re right, I was the asshole.”).
+There were also verdicts that only occurred once (“cheap asshole”, “Crouching
+Liar; hidden asshole”, “the pizza is the asshole”), so I restricted the dataset
+to posts with standard verdicts. This left ~63K points. Below is a sample of the
+resulting dataframe:
+
+![](../uploads/images/2020-02-17/df_sample.png)_Click to enlarge._
+
+The dataset is a snapshot of the subreddit in its current state, but the
+subreddit is certain to change over time as new content gets added. In the
+interest of having the most comprehensive dataset about being an asshole ever
+collected, _I’m planning to update this dataset monthly with new posts._
+
+## How to get the dataset
+
+Since this dataset will be updated regularly, we’re using git and DVC to
+package, version, and release it. The data itself is stored in an S3 bucket, and
+you can use DVC to import the data to your workspace. If you haven't already
+you'll need to [install DVC](https://dvc.org/doc/install); one of the simplest
+ways is `pip install dvc`.
+
+Say you have a directory on your local machine where you plan to build some
+analysis scripts. Simply run
+
+```dvc
+$ dvc get https://github.com/iterative/aita_dataset \
+      aita_clean.csv
+```
+
+This will download a .csv dataset into your local directory, corresponding to
+the cleaned version. If you wanted the raw dataset, you would substitute
+`aita_raw.csv` for `aita_clean.csv`.
+
+Because the dataset is >100 MB, I’ve created a git branch (called “lightweight”)
+with 10,000 randomly sampled (cleaned) data points for quick-and-dirty
+experimentation that won’t occupy all your laptop’s memory. To download only
+this smaller dataset, run
+
+```dvc
+$ dvc get --rev lightweight \
+      https://github.com/iterative/aita_dataset \
+      aita_clean.csv
+```
+
+## A quick look at the data
+
+Let’s take a flyover look at the dataset so far. The code to make the following
+visuals and results is
+[available on GitHub](https://github.com/andronovhopf/aita_viz_and_classify).
+First, here’s a frequency plot for how common different verdicts are on the
+subreddit. In addition to “Asshole” and “Not the Asshole”, there are two
+additional rulings: “Everybody Sucks” and “No Assholes Here”.
+
+![](../uploads/images/2020-02-17/freq_plot.svg)
+
+In general agreement with an
+[analysis by Nathan Cunn](http://www.nathancunn.com/2019-04-04-am-i-the-asshole/),
+the majority of posts are deemed “Not the Asshole” or “No Assholes Here”. If you
+are posting on r/AmITheAsshole, you are probably not the asshole.
+
+Next, I attempted a very basic classifier, logistic regression using 1-gram
+frequencies (i.e., the frequency of word occurences in post titles and bodies)
+as features. This is intended to give a baseline for what kind of performance
+any future modeling efforts should beat. Because of the strong class imbalance,
+I used
+[SMOTE to oversample](https://imbalanced-learn.org/stable/over_sampling.html#smote-variants)
+Asshole posts. And, for simplicity, I binarized the category labels:
+
+|     Verdict      | Label |
+| :--------------: | :---: |
+|     Asshole      |   1   |
+|  Everyone Sucks  |   1   |
+| Not the Asshole  |   0   |
+| No Assholes Here |   0   |
+
+With 5-fold cross-validation, this classifier performed above-chance but
+modestly: accuracy was 62.0% +/- 0.005 (95% confidence interval). Curiously, the
+only other classifier attempt I could find online
+[reported 61% accuracy on held-out data](https://github.com/amr-amr/am-i-the-asshole)
+using the much more powerful BERT architecture. Considering that logistic
+regression has zero hidden layers, and our features discard sequential
+information entirely, we’re doing quite well! Although I can’t be certain, I’m
+curious how much the discrepancy comes down to dataset size: the previous effort
+with BERT appears to be trained on ~30K posts.
+
+Seeing that logistic regression on word counts doesn’t produce total garbage, I
+looked at which words were predictive of class using the
+[chi-squared test](https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.chi2.html).
+The top five informative words were mom, wife, mother, edit, and dad (looks like
+Assholes go back to edit their posts). Since familial relationships featured
+prominently, I
+[estimated the log odds ratio](https://www.tidytextmining.com/twitter.html#comparing-word-usage)
+of being voted Asshole (versus Not the Asshole) if your post mentions a mom,
+dad, girlfriend/wife or boyfriend/husband. Roughly, the log odds ratio
+represents the difference in probability of a keyword occurring in Asshole posts
+compared to Not-Asshole posts.
+
+![](../uploads/images/2020-02-17/svg_kw2.svg)
+
+For reference, the log odd ratios are computed with base 2; a score of 1 means
+that Asshole posts are twice as likely to contain the keyword as Not the Asshole
+posts. So keep in mind that the effect sizes we’re detecting, although almost
+certainly non-zero, are still fairly small.
+
+There seems to be a slight anti-parent trend, with Redditors being more likely
+to absolve authors who mention a mom or dad. Only mentioning a female romantic
+partner (wife/girlfriend) was associated with a greater likelihood of being
+voted the Asshole. This surprised me. My unsubstantiated guess about the gender
+difference in mentioning romantic partners is that women may be particularly
+likely to question themselves when they act assertively in a relationship. If
+this were the case, we might find an especially high proportion of
+uncontroversial “Not the Asshole” posts from heterosexual women asking about
+situations with their male partners.
+
+## How to get more data
+
+As I said earlier, the plan is to grow the dataset over time. I’ve just run a
+new scrape for posts from January 1-31, 2020 and am adding them to the public
+dataset now. To check for a new release, you can re-run the `dvc get` command
+you used to grab the dataset.
+
+If you’re serious about taking on a project such as, say, building a classifier
+that beats our state of the art, word-count-based, logistic regression model,
+I’d like to recommend a better way to integrate the dataset into your workflow:
+`dvc import`. `dvc import` is like `dvc get`, but it preserves a link to the
+hosted data set. This is desirable if you might iterate through several
+experiments in the search for the right architecture, for example, or think
+you’ll want to re-train a model . To get the dataset the first time, you’ll run:
+
+```dvc
+$ git init
+$ dvc init
+$ dvc import https://github.com/iterative/aita_dataset \
+      aita_clean.csv
+```
+
+Then, because the dataset in your workspace is linked to our dataset repository,
+you can update it by simply running:
+
+```dvc
+$ dvc update aita_clean.csv
+```
+
+An additional benefit of codifying the link between your copy of the dataset and
+ours is that you can track the form of the dataset you used at different points
+in your project development. You can jump back and forth through the project
+history then, not only to previous versions of code but also to versions of
+(specifically, links to) data. For example, you could roll back the state of the
+project to before you updated the dataset and re-run your classifier:
+
+```dvc
+$ git log --oneline
+58e28a5 retrain logistic reg
+6a44161 update aita dataset
+0de4fc3 try logistic regression classifier
+a266f15 get aita dataset
+55031b0 first commit
+
+$ git checkout 0de4fc3
+$ dvc checkout
+$ python train_classifier.py
+```
+
+Oh, and one more note: you can always use `dvc get` and `dvc import` to grab an
+older version of the dataset using the tags associated with each release. The
+current release is v.20.1 and the original release is v.20.0- the numeric codes
+correspond to the year and month.
+
+```dvc
+$ dvc get --rev v.20.0 \
+      https://github.com/iterative/aita_dataset aita_clean.csv
+```
+
+## What’s next
+
+I hope that sharing this evolving dataset invites some curiosity, because a lot
+of questions come to mind:
+
+1. Can you beat our classifier that predicts how the subreddit will rule?
+2. Is verdict even the most interesting outcome to predict? For example,
+   developer Scott Ratigan
+   [created a tool to estimate weighted scores](https://github.com/scotteratigan/amitheahole)
+   for each post based on the comments (e.g., 75% Asshole, 25% Not the Asshole).
+   What metrics might invite deeper questions?
+3. Can you identify sentences or phrases that are most informative about the
+   verdict Redditors reach?
+4. Do voting patterns systematically differ by topic of discussion?
+5. How reliable are verdicts? When a very similar situation is posted multiple
+   times, do Redditors usually vote the same way?
+6. Is the subreddit’s posting and voting behavior changing over time?
+7. Can you formulate any testable hypotheses based on
+   [this survey of the subreddit’s demographics](https://www.reddit.com/r/AmItheAsshole/comments/dcae07/2019_subscriber_survey_data_dump/?)
+8. How often do non-Redditors agree with the subreddit? Under what circumstances
+   might they tend to disagree?
+
+I expect that leaning into the particulars of the dataset- thinking about how
+the format influences the content, and how a subreddit might select for
+participants that don’t fully represent the population at large- will lead to
+more interesting questions than, say, aiming to forecast something about
+morality in general. To put it another way, the data’s not unbiased- so maybe
+try to learn something about those biases.
+
+If you make something with this dataset, please share- perhaps we can form an
+international Asshole research collective, or at least keep each other appraised
+of findings. And of course, reach out if you encounter any difficulties or
+probable errors (you can file issues
+[on the GitHub repo](https://github.com/iterative/aita_dataset))!
+
+Lastly, please stay tuned for more releases- there are hundreds of new posts
+every day. The biggest asshole may still be out there.
+
+<hr />
+
+### More resources
+
+You may want to check out a few more efforts to get at r/AmItheAsshole from a
+data-scientific perspective, including
+[topic modeling](https://medium.com/@tom.gonda/what-does-reddit-argue-about-28432b11ea26),
+[visualizing voting patterns](http://www.nathancunn.com/2019-04-04-am-i-the-asshole/)
+and
+[growth of the subreddit](https://twitter.com/felipehoffa/status/1223278090958209025),
+and
+[classification](https://www.informatik.hu-berlin.de/de/forschung/gebiete/wbi/teaching/studienDiplomArbeiten/finished/2019/expose_fletcher.pdf)
+with [deep learning](https://github.com/amr-amr/am-i-the-asshole). With a
+dataset this rich, there’s much more to be investigated, including continuing to
+refine these existing methods. And there’s almost certainly room to push the
+state of the art in asshole detection!
+
+If you're interested in learning more about using Reddit data, check out
+[pushshift.io](https://pushshift.io/), a database that contains basically all of
+Reddit's content (so why make this dataset? I wanted to remove some of the
+barriers to analyzing text from r/AmItheAsshole by providing an
+already-processed and cleaned version of the data that can be downloaded with a
+line of code; pushshift takes some work). You might use pushshift's API and/or
+praw to augment this dataset in some way- perhaps to compare activity in this
+subreddit with another, or broader patterns on Reddit.
diff --git a/content/blogs/2020-02-19-february-20-community-gems.md b/content/blogs/2020-02-19-february-20-community-gems.md
new file mode 100644
index 0000000000..72c272a684
--- /dev/null
+++ b/content/blogs/2020-02-19-february-20-community-gems.md
@@ -0,0 +1,152 @@
+---
+title: February '20 Community Gems
+date: 2020-02-19
+description: >
+  Great discussions and technical Q&A's from our users.
+descriptionLong: >
+  Look here every month for great discussions and technical Q&A's from our users
+  and core development team.
+picture: 2020-02-19/feb20_gems_header_gr.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/feb-20-community-gems/330
+tags:
+  - Google Drive
+  - Azure
+  - Community Gems
+  - Homebrew
+---
+
+## Discord gems
+
+Welcome to the February roundup of useful, intriguing, and good-to-know
+discussions going on with DVC users and developers. Let's dive right in with
+some questions from our Discord channel.
+
+### Q: [If I have multiple outputs from a DVC pipeline and only want to checkout one, what command would I run?](https://discordapp.com/channels/485586884165107732/563406153334128681/670233820326264843)
+
+By default, `dvc checkout` is written for a
+[Git-like experience](https://dvc.org/doc/command-reference/checkout), meaning
+that it will sync your local workspace with all the model files, dependencies,
+and outputs specified by a project's `.dvc` files. If you only want to access
+one artifact from the project, you can do this with
+`dvc checkout <path to file>`. This will deliver the specified file to your
+workspace.
+
+If you're interested in sharing specific artifacts (like data files or model
+binaries) with other users, you might also consider `dvc get` and `dvc import`.
+These functions are ideal for downloading a single file (or a few files) to the
+local workspace, instead of the whole project.
+
+### Q: [I have a complicated use case.](https://discordapp.com/channels/485586884165107732/563406153334128681/668773484549242890) We're trying to set up a system where users act as data scientists. They'd select data, which would be cleaned/transformed in the backend, and experiment with model hyperparameters until they're happy with the model result. Then they can "save" the model, including artifacts like the input data used, metrics, and binary model file, placing the experiment under version control. Later they can "load" the model again and select new input data from our database, change parameters, and "update it". There might be hundreds of separate models. Can DVC do this?
+
+Most of this functionality is supported by DVC already. We recommend
+`dvc import` as a method for giving users access to data in a repostiory (and
+also check out our
+[tutorial on data registries](https://dvc.org/doc/use-cases/data-registries)).
+For pre-processing data,
+[DVC pipelines](https://dvc.org/doc/get-started/pipeline) can automate a
+procedure for transforming and cleaning inputs (i.e., you can use bash scripts
+to `dvc run` the pipeline whenever a user selects a dataset). Saving the
+workspace after experimentation, including model files, metrics, and outputs, is
+a core function of DVC (see `dvc add` and `dvc push` functions). We also have a
+[Python API](https://dvc.org/doc/use-cases/data-registries#programatic-reusability-of-dvc-data)
+so users can load artifacts like datasets and model files into their local
+Python session. When they're done experimenting, they can `dvc add` and
+`dvc push` their progress. Users can later "pull" a saved workspace and all
+associated files using `dvc checkout`
+
+As for how to organize hundreds of separate experiments, we're still evolving
+our strategy and best-practice recommendations. It's conceivable that each
+experiment could be carried out and saved on a separate branch of a project
+repository. Our thoughts about structuring version control around architecture
+search and hyperparameter tuning could fill up a whole blog (and probably will
+in the not-so-distant future); check out one of our
+[recent conversation threads](https://github.com/iterative/dvc/issues/2799) if
+you'd like to see where we're currently at. And please let us know how your use
+case goes—at this stage, we'd love to hear what works for you.
+
+### Q: [What's the difference](https://discordapp.com/channels/485586884165107732/563406153334128681/666708671333400599) between `config` and `config.local` files? Is it safe to do git commit without including my config file?
+
+There are indeed two kinds of config files you might come across in your project
+directory's `.dvc` folder and `.gitignore` file. The key difference is that
+`config` is intended to be committed to Git, while `config.local` is not. You'd
+use `config.local` to store sensitive information (like personal credentials for
+SSH or another kind of authenticated storage) or settings specific to your local
+environment—things you wouldn't want to push to a GitHub repo. DVC only modifies
+`config.local` when you explicitly use the `--local` flag in the `dvc config` or
+`dvc remote *` commands, so outside of these cases you shouldn't have to worry
+about it.
+
+As for using `git commit` without the `config` file, it is safe. _But_ you
+should check if there are any settings in `config.local` that you actually want
+to save to `config`. This would be rare, since as we mentioned, you'd only have
+settings in `config.local` if you expressly called for them with the `--local`
+flag.
+
+### Q: I have an Azure storage account container, and the only link I can see in my Azure portal for the container is an `http://` link. But the tutorial on DVC shows Azure storage accessed with the `azure://` protocol. [Which is right?](https://discordapp.com/channels/485586884165107732/563406153334128681/675087897661276169)
+
+What you're describing is exactly as it should be. `azure://` is an internal URL
+protocol that tells DVC which API to use to connect to your remote storage, not
+the exact address of your Blob. You can use the format
+`azure://<container-name>/<optional-path>`. For more details, you can refer to
+our documentation about
+[supported storage types](https://dvc.org/doc/command-reference/remote/add#supported-storage-types).
+
+### Q: [I'm using DVC to version my data with Google Drive storage.](https://discordapp.com/channels/485586884165107732/563406153334128681/667198775361536019) If I want a developer to be able to download the data, can I give them my `gdrive_client_id` and `gdrive_client_secret`, or maybe give them permission to access my Google Drive folder?
+
+For Google Drive, `gdrive_client_id` and `gdrive_client_secret` aren't used to
+access a specific user's Google Drive disk; they're predominantly used by
+Google's API to
+[track usage and set appropriate rate limits](https://rclone.org/drive/#making-your-own-client-id).
+So the risk in sharing them is not that your personal files will be vulnerable,
+but that your API usage limits could be negatively affected if others are using
+it with your credentials. Whether this risk is acceptable is up to you. It's not
+unusual for teams and organizations to share a set of credentials, so a
+reasonable level of security may mean ensuring that the `config` file for your
+project (which typically contains Google Drive credentials) is only visible to
+team members.
+
+Please check out our
+[docs about Google Drive](https://dvc.org/doc/user-guide/setup-google-drive-remote),
+too, for more about how DVC uses the Google Drive API.
+
+### Q: I just tried to upgrade DVC via `homebrew` and got a "SHA256 mismatch" error. [What's going on](https://discordapp.com/channels/485586884165107732/485596304961962003/672930535261339669)?
+
+What most likely happened is that you first installed DVC via
+`brew install iterative/homebrew-dvc/dvc`, which is no longer supported—because
+DVC is now a core Homebrew formula! Please uninstall and reinstall using
+`brew install dvc` for uninterrupted upgrades in the future.
+
+### Q: [I still can't convince myself to version-control the data rather than meta-data.](https://www.reddit.com/r/datascience/comments/aqkg59/does_anyone_use_data_version_control_dvc_thoughts/eq62lkt?utm_source=share&utm_medium=web2x) Can anyone give me a strong argument against version controlling data file paths in config files instead of using DVC?
+
+_This question is from a [Reddit discussion.](https://bit.ly/38HOEcj)_
+
+Versioning the meta-data associated with your dataset is certainly a workable
+strategy. You can use prefixes and suffixes to distinguish models trained on
+different versions of data, and keep your data files in one `.gitignored`
+directory. That may be enough for some projects. In our experience, though,
+we've found this comes with a host of complications that don't scale well:
+
+1. You'll have to write custom code to support this configuration, specifying
+   filepaths to your dataset with hardcoded links.
+2. For files that are outputs of your analysis pipeline, you'll need to agree on
+   conventions for suffixes/prefixes for naming to specify which version of the
+   dataset was used.
+3. Depending on the meta-data you use to version data files, you may not detect
+   changes made by users. Even if you can tell a change has occurred, you may
+   not be able to track _who_ did it _when_.
+
+We designed DVC to optimize data management from the user's perspective: users
+can change the dataset version without changing their code, so organizations
+don't have to adhere to explicit filenaming conventions and hardcoded links that
+are prone to human error. Furthermore, versioning data similar to how Git
+versions code provides a largely immutable record of every change that has
+occurred. We think this is important as teams and projects grow in complexity.
+And from a systems-level perspective, DVC does more than track data: it
+dedpulicates files behind the scenes, provides simple interfaces for sharing
+datasets (and models!) with collaborators and users, and connects specific model
+files with the dataset versions they were trained on.
+
+To summarize, DVC is not the only way to version your data. But we think it's
+one way to reduce the overhead of managing data infrastructure when your project
+involves experimentation or collaboration.
diff --git a/content/blogs/2020-03-11-march-20-dvc-heartbeat.md b/content/blogs/2020-03-11-march-20-dvc-heartbeat.md
new file mode 100644
index 0000000000..9d7270d053
--- /dev/null
+++ b/content/blogs/2020-03-11-march-20-dvc-heartbeat.md
@@ -0,0 +1,138 @@
+---
+title: March ’20 DVC❤️Heartbeat
+date: 2020-03-11
+description: >
+  DVC discussions around the web, our growing team, and recommended reading from
+  the open source community.
+descriptionLong: >
+  Every month we share news, findings, interesting reads, community takeaways,
+  and everything else along the way.
+
+  Look here for updates about [DVC](https://dvc.org), our journey as a startup,
+  projects by our users and big ideas about best practices in ML and data
+  science.
+picture: 2020-03-11/March_20_HB_header.png
+pictureComment:
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/march-20-heartbeat/335
+tags:
+  - Heartbeat
+  - CI/CD
+  - Book
+  - Monorepo
+  - New feature
+---
+
+Welcome to the March Heartbeat! Here are some highlights from our team and
+community this past month:
+
+## News
+
+**DVC is STILL growing!** In February, Senior Software Engineer
+[Guro Bokum](https://www.linkedin.com/in/jiojiajiu/) joined DVC. He's previously
+contributed to the core DVC code base and brings several years of full-stack
+engineering expertise to the team. Welcome, Guro!
+
+![](../uploads/images/2020-03-11/hi_guro.png 'Img=500x667')_Welcome, Guro!_
+
+**New feature alert.** We've received many requests for
+[monorepo](https://en.wikipedia.org/wiki/Monorepo) support in DVC. As of DVC
+[release 0.87.0](https://github.com/iterative/dvc/releases), users can version
+data science projects within a monorepo! The new `dvc init --subdir`
+functionality is designed to allow multiple DVC repositories within a single Git
+repository. Don't forget to upgrade and
+[check out the latest docs](https://dvc.org/doc/command-reference/init).
+
+## From the community
+
+First, there's an intriguing
+[discussion evolving in the DVC repo](https://github.com/iterative/dvc/issues/3393)
+about how machine learning hyperparameters (such as learning rate, number of
+layers in a deep neural network, etc.) can be tracked. Right now,
+hyperparameters are tracked as source code (i.e., with Git). Could we use some
+kind of abstraction to separate hyperparameters from source code in a
+DVC-managed project? Read on and feel free to jump into this discussion, largely
+helmed by software developer and DVC contributor
+[Helge Munk Jacobsen](http://elgehelge.github.io/).
+
+Another discussion we appreciated happened on Twitter:
+
+<blockquote class="twitter-tweet"><p lang="en" dir="ltr">We give tools like Slack and Zoom a lot of credit for making remote work possible, and I think Git and every hosted DVC system should equally get the same credit. Imagine life for a second without version control. Think about that.</p>&mdash; Celestine (@cyberomin) <a href="https://twitter.com/cyberomin/status/1223651811082559488?ref_src=twsrc%5Etfw">February 1, 2020</a></blockquote> <script async src="https://platform.twitter.com/widgets.js" charset="utf-8"></script>
+
+Thanks, [@cyberomin](https://twitter.com/cyberomin)!
+
+Elsewhere on the internet, DVC made the cut in a much-shared blog,
+[Five Interesting Data Engineering Projects](https://medium.com/@squarecog/five-interesting-data-engineering-projects-48ffb9c9c501)
+by [Dmitry Ryaboy](https://twitter.com/squarecog) (VP of Engineering at biotech
+startup Zymergen, and formerly Twitter). Dmitry wrote:
+
+> To be honest, I’m a bit of a skeptic on “git for data” and various automated
+> data / workflow versioning schemes: various approaches I’ve seen in the past
+> were either too partial to be useful, or required too drastic a change in how
+> data scientists worked to get a realistic chance at adoption. So I ignored, or
+> even explicitly avoided, checking DVC out as the buzz grew. I’ve finally
+> checked it out and… it looks like maybe this has legs? Metrics tied to
+> branches / versions are a great feature. Tying the idea of git-like branches
+> to training multiple models makes the value prop clear. The implementation,
+> using Git for code and datafile index storage, while leveraging scalable data
+> stores for data, and trying to reduce overall storage cost by being clever
+> about reuse, looks sane. A lot of what they have to say in
+> https://dvc.org/doc/understanding-dvc rings true.
+
+Check out the full blog here:
+
+<external-link
+href="https://medium.com/@squarecog/five-interesting-data-engineering-projects-48ffb9c9c501"
+title="Five Interesting Data Engineering Projects"
+description="There’s been a lot of activity in the data engineering world lately, and a ton of really interesting projects and ideas have come on the scene in the past few years. This post is an introduction to (just) five that I think a data engineer who wants to stay current needs to know about."
+link="medium.com"
+image="../uploads/images/2020-03-11/dmitry_r.jpg"/>
+
+One of the areas that DVC is growing into is continuous integration and
+continuous deployment (CI/CD), a part of the nascent field of MLOps. Naturally,
+we were thrilled to discover that CI/CD with DVC is taught in a new Packt book,
+["Learn Python by Building Data Science Applications"](https://www.packtpub.com/programming/learn-python-by-building-data-science-applications)
+by David Katz and Philipp Kats.
+
+In the authors words, the goal of this book is to teach data scientists and
+engineers "not only how to implement Python in data science projects, but also
+how to maintain and design them to meet high programming standards." Needless to
+say, we are considering starting a book club. Grab a copy here:
+
+<external-link
+href="https://www.packtpub.com/programming/learn-python-by-building-data-science-applications"
+title="Learn Python by Building Data Science Applications"
+description="Understand the constructs of the Python programming language and use them to build data science projects"
+link="packtpub.com"
+image="../uploads/images/2020-03-11/packt.jpeg"/>
+
+Last year in Mexico, DVC contributor Ramón Valles gave a talk about reproducible
+machine learning workflows at Data Day Monterrey—and
+[a video of his presentation](https://www.youtube.com/watch?v=tAxG-n20Di4) is
+now online! In this Spanish-language talk, Ramón gives a thorough look at DVC,
+particularly building pipelines for reproducible ML.
+
+<external-link
+href="https://www.youtube.com/watch?v=tAxG-n20Di4"
+title="Experimentación ágil de machine learning con DVC"
+description="Data Day Monterrey '19"
+link="youtube.com"
+image="../uploads/images/2020-03-11/dataday_mr.png"/>
+
+Finally, DVC data scientist Elle (that's me!) released a new public dataset of
+posts from the Reddit forum
+[r/AmItheAsshole](https://reddit.com/r/amitheasshole), and reported some
+preliminary analyses. We're inviting anyone and everyone to play with the data,
+make some hypotheses and share their findings. Check it out here:
+
+<external-link
+href="https://blog.dvc.org/a-public-reddit-dataset"
+title="AITA for making this? A public dataset of Reddit posts about moral dilemmas"
+description="Delve into an open natural language dataset of posts about moral dilemmas from r/AmItheAsshole. Use this dataset for whatever you want- here's how to get it and start playing."
+link="blog.dvc.org"
+image="../uploads/images/2020-03-11/aita_sm.png"/>
+
+That's all for now—thanks for reading, and be in touch on our
+[GitHub](https://github.com/iterative/dvc),
+[Twitter](https://twitter.com/dvcorg), and
+[Discord channel](https://dvc.org/chat).
diff --git a/content/blogs/2020-03-24-march-20-community-gems.md b/content/blogs/2020-03-24-march-20-community-gems.md
new file mode 100644
index 0000000000..377a1e5fb1
--- /dev/null
+++ b/content/blogs/2020-03-24-march-20-community-gems.md
@@ -0,0 +1,133 @@
+---
+title: March '20 Community Gems
+date: 2020-03-12
+description: >
+  Great discussions and technical Q&A's from our users.
+descriptionLong: >
+  Look here every month for great discussions and technical Q&A's from our users
+  and core development team.
+picture: 2020-03-12/march_20_header.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/march-20-community-gems/336
+tags:
+  - Tags
+  - Community Gems
+  - Data registry
+---
+
+## Discord gems
+
+Here are some Q&A's from our Discord channel that we think are worth sharing.
+
+### Q: I have several simulations organized with Git tags. I know I can compare the metrics with `dvc metrics diff [a_rev] [b_rev]`, substituting hashes, branches, or tags for [a_rev] and [b_rev]. [But what if I wanted to see the metrics for a list of tags?](https://discordapp.com/channels/485586884165107732/563406153334128681/687634347104403528)
+
+DVC has a built in function for this! You can use `dvc metrics show` with the
+`-T` option:
+
+```dvc
+$ dvc metrics show -T
+```
+
+to list the metrics for all tagged experiments.
+
+Also, we have a couple of relevant discussions going on in our GitHub repo about
+[handling experiments](https://github.com/iterative/dvc/issues/2799) and
+[hyperparameter tuning](https://github.com/iterative/dvc/issues/3393). Feel free
+to join the discussion and let us know what kind of support would help you most.
+
+### Q: [Is there a recommended way to save metadata about the data in a `.dvc` file?](https://discordapp.com/channels/485586884165107732/563406153334128681/685105104340386037) In particular, I'd like to save summary statistics (e.g., mean, minimum, and maximum) about my data.
+
+One simple way to keep metadata in a `.dvc` file is by using the `meta` field.
+Each `meta` entry is a `key:value` pair (for example, `name: Jean-Luc`). The
+`meta` field can be manually added or written programmatically, but note that if
+the `.dvc` file is overwritten (perhaps by `dvc run`, `dvc add`, or
+`dvc import`) these values will not be preserved. You can read more about this
+[in our docs](https://dvc.org/doc/user-guide/project-structure).
+
+Another approach would be to track the statistics of your dataset in a metric
+file, just as you might track performance metrics of a model. For a tutorial on
+using DVC metrics please
+[see our docs](https://dvc.org/doc/command-reference/metrics).
+
+### Q: My team has been using DVC in production. When we upgraded from DVC version 0.71.0, we started getting an error message: `ERROR: unexpected error - /my-folder is not a git repository`. [What's going on?](https://discordapp.com/channels/485586884165107732/485596304961962003/687403454989467650)
+
+This is a consequence of new support we've added for monorepos with the
+`dvc init --subdir` functionality
+([see more here](https://dvc.org/doc/command-reference/init#init)), which lets
+there be multiple DVC projects within a single Git repository. Now, if a DVC
+repository doesn't contain a `.git` directory, DVC expects the `no_scm` flag to
+be present in `.dvc/config` and raises an error if not. For example, one of our
+users reported this when using DVC to pull files into a Docker container that
+didn't have Git initialized (for more about using DVC without Git,
+[see our docs](https://dvc.org/doc/command-reference/init#initializing-dvc-without-git)).
+
+You can fix this by running `dvc config core.no_scm true` (you could include
+this command in the script that creates Docker images). Alternately, you could
+include `.git` in your Docker container, but this is not advisable for all
+situations.
+
+We are currently working to
+[add graceful error-handling](https://github.com/iterative/dvc/issues/3474) for
+this particular issue so stay tuned.
+
+### Q: [Is there a way to force the pipeline to rerun, even if its dependencies haven't changed?](https://discordapp.com/channels/485586884165107732/563406153334128681/687422002822381609)
+
+Yes, `dvc repro` has a flag that should help here. You can use the `-f` or
+`--force` flag to reproduce the pipeline even when no changes in the
+dependencies (for example, a training datset tracked by DVC) have been found. So
+if you had a hypoethetical DVC pipeline whose final process was `deploy.dvc`,
+you could run `dvc repro -f deploy.dvc` to rerun the whole pipeline.
+
+### Q: What's the best way to organize DVC repositories if I have several training datasets shared by several projects? Some projects use only one dataset while other use several. [Can one project have `.dvc` files corresponding to different remotes?](https://discordapp.com/channels/485586884165107732/563406153334128681/670664813973864449)
+
+Yes, one project directory can contain datasets from several different DVC
+remotes. Specifically, DVC has functions `dvc import` and `dvc get` that emulate
+the experience of using a package manager for grabbing datasets from external
+sources. You can use `dvc import` or `dvc get` to access any number of datasets
+that are dependencies in a given project. For more on this,
+[see our tutorial on data registries](https://dvc.org/doc/use-cases/data-registries).
+
+### Q: [What are the risks of using DVC on confidential data?](https://discordapp.com/channels/485586884165107732/563406153334128681/689848196473684024)
+
+DVC doesn't collect any information about your data (or code, or models, for
+that matter). You may have noticed that DVC
+[collects Anonymized Usage Analytics](https://dvc.org/doc/user-guide/analytics),
+which users may
+[opt out of](https://dvc.org/doc/user-guide/analytics#opting-out). The data we
+collect is extremely limited and anonymized, as it is collected mainly for the
+purpose of prioritizing bugs and feature development based on DVC usage. For
+example, we collect info about your operating system, DVC version, and
+installation method (the
+[complete list of collected features is here](https://dvc.org/doc/user-guide/analytics#what)).
+
+Many of our users work with sensitive or private data, and we've developed DVC
+with such scenarios in mind from day one.
+
+### Q: [Can you suggest a reference architecture for using DVC as part of MLOps?](https://discordapp.com/channels/485586884165107732/563406153334128681/683890642631524392)
+
+Increasingly, DVC is being used not to just to version and manage machine
+learning projects, but as part of MLOps, _practices for combining data science
+and software engineering_. As MLOps is a fairly new discipline, standards and
+references aren't yet solidified. So while there isn't (_yet_) a standard recipe
+for using DVC in MLOps projects, we can point you to a few architectures we
+like, and which have been reported in sufficient detail to recreate.
+
+First, DVC can be used to detect events (such as dataset changes) in a CI/CD
+system that traditional version control systems might not be able to. An
+excellent and thorough
+[blog by Danilo Sato et al.](https://martinfowler.com/articles/cd4ml.html)
+explores using DVC in this way, as part of a CI/CD system that retrains a model
+automatically when changes in the dataset are detected.
+
+Second, DVC can be used to support model training on cloud GPUs, particularly as
+a tool for pushing and pulling files (such as datasets and trained models)
+between cloud computing instances, DVC repositories, and other environments.
+This architecture was the subject of a
+[recent blog by Marcel Mikl and Bert Besser](https://blog.codecentric.de/en/2020/01/remote-training-gitlab-ci-dvc/).
+Their report describes the cloud computing setup and continuous integration
+pipeline quite well.
+
+If you develop your own architecture for using DVC in MLOps, please keep us
+posted. We'll be eager to learn from your experience. Also, keep an eye on our
+blog in the next few months. We're rolling out some new tools with a focus on
+MLOps!
diff --git a/content/blogs/2020-03-31-reimagining-devops-video.md b/content/blogs/2020-03-31-reimagining-devops-video.md
new file mode 100644
index 0000000000..7fab1700f5
--- /dev/null
+++ b/content/blogs/2020-03-31-reimagining-devops-video.md
@@ -0,0 +1,50 @@
+---
+title: 'New Video! 🎥 Reimagining DevOps for Machine Learning'
+date: 2020-03-31
+description: >
+  A talk about CI/CD with fuzzy animals.
+descriptionLong: >
+  As machine learning matures, we need to find better ways to integrate data
+  science with software development. In this talk for DivOps, a conference about
+  the future of DevOps, DVC data scientist Elle O'Brien discusses how CI/CD can
+  adapt to machine learning. This is MLOps, explained with fuzzy animals.
+picture: 2020-03-31/cover_image.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/new-video-reimagining-devops-for-machine-learning/341
+tags:
+  - CI/CD
+  - DevOps
+  - MLOps
+  - DivOps
+  - Company
+---
+
+Last week, DVC was part of [DivOps](https://divops.org/), a fully remote
+conference led by women in DevOps. DevOps, to the newly anointed, is a
+discipline bringing together strong software engineering practices with speedy
+development cycles. As machine learning is finding its way into just about
+_every_ area of research and development, we're going to need to come up with
+some conventions and tools for integrating machine learning and big data with
+software development. This growing field is called
+[MLOps](https://towardsdatascience.com/the-rise-of-the-term-mlops-3b14d5bd1bdb).
+
+I gave a lightning talk about how we'll have to rethink our software development
+practices in the age of machine learning. It's got a focus on
+[CI/CD](https://martinfowler.com/articles/cd4ml.html), a way of structuring
+workflows that we think can streamline exchanges between data scientists and
+software engineers. And, it's got fuzzy animals. Check it out here:
+
+https://youtu.be/0MDrZpO_7Q4
+
+If you liked this, you'll also want to check out the next talk in the DivOps
+playlist by
+[Anna Petrovicheva](https://www.linkedin.com/in/anna-petrovicheva-44b24673/),
+Founder and CEO of Xperience AI. Anna's talk goes deeper into developing best
+practices for software engineering with deep learning.
+
+https://youtu.be/8nwpCQufeE0
+
+All the talks from DivOps are
+[available online now](https://www.youtube.com/playlist?list=PLVeJCYrrCemgbA1cWYn3qzdgba20xJS8V),
+so please check out the YouTube channel. And stay tuned on our blog for more
+CI/CD discussions coming soon...
diff --git a/content/blogs/2020-04-06-april-20-dvc-heartbeat.md b/content/blogs/2020-04-06-april-20-dvc-heartbeat.md
new file mode 100644
index 0000000000..179a7ce59a
--- /dev/null
+++ b/content/blogs/2020-04-06-april-20-dvc-heartbeat.md
@@ -0,0 +1,171 @@
+---
+title: April ’20 DVC❤️Heartbeat
+date: 2020-04-06
+description: >
+  Catch up on new DVC releases, talks, and projects in our community. This
+  month, learn what we're up to in MLOps, CI/CD, and the intersection of data
+  science and software engineering.
+
+descriptionLong: >
+  Every month we share news, findings, interesting reads, community takeaways,
+  and everything else along the way.
+
+  Look here for updates about [DVC](https://dvc.org), our journey as a startup,
+  projects by our users and big ideas about best practices in ML and data
+  science.
+picture: 2020-04-06/april_header.png
+pictureComment: |
+  A view from
+  [Barrancas del Cobre](https://en.wikipedia.org/wiki/Copper_Canyon), shot by
+  Jorge Orpinel Pérez. Jorge has mastered the art of working on DVC remotely.
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/april-20-heartbeat/347
+tags:
+  - Heartbeat
+  - Google Drive
+  - MLOps
+  - CI/CD
+  - Podcast
+  - DivOps
+---
+
+Welcome to the April Heartbeat, our
+[monthly roundup of cool happenings](https://dvc.org/blog/tags/heartbeat), good
+reads and other bright spots in our community.
+
+## News
+
+**Adapting to the pandemic.** Although the world seems different than when we
+posted last month, the DVC community is steady and strong. As a predominantly
+distributed company, we've been developing our infrastructure for remote work
+from the get-go. It isn't always _easy_ to schedule an all-hands meeting across
+9 time zones but we make it work. This experience has prepared us well for the
+COVID-19 pandemic: although there are new challenges (like caring for families
+while working from home) we've been able to weather the transition to fully
+remote work relatively well.
+
+![](../uploads/images/2020-04-06/laptop_on_boat.jpeg)_Before social distancing
+started, DVC technical writer Jorge Orpinel Pérez has worked from a canoe. Check
+out more photos from his workations
+[on Instagram](https://www.instagram.com/workationer/)._
+
+**DVC sponsors DivOps.** In a time when many conferences are going remote out of
+necessity, we were fortunate to be part of an _intentionally_ remote conference
+this month! We sponsored [DivOps](https://divops.org/), a fully-online meeting
+led by women in DevOps. The DivOps lineup included speakers from GitHub,
+DropBox, Gremlin and more. DVC data scientist Elle (that's me!) gave a
+ten-minute talk about MLOps and CI/CD, so
+[please check out the video](https://dvc.org/blog/reimagining-devops-video).
+Another very relevant talk was from Anna Petrovicheva, CEO of
+[Xperience AI](http://xperience.ai/); Anna
+[spoke about her team's development workflow for deep learning projects](https://youtu.be/8nwpCQufeE0)
+and gave a clear overivew of how they use DVC.
+
+**DVC on the airwaves.** In early March, Elle was interviewed on an episode of
+[The Data Stream podcast](https://www.interviewquery.com/tag/podcast/) about a
+DVC data science project,
+[building a public dataset of posts](https://dvc.org/blog/a-public-reddit-dataset)
+from the "Am I the Asshole?" subreddit.
+
+<external-link
+href="https://www.interviewquery.com/blog-who-is-the-asshole/"
+title="The Data Stream #3 - Who is the A-hole? With Elle"
+description="Ever wonder if it's possible to train a model to discover whether your friends are assholes or not? Today Elle comes on the show to talk about her project building a classifier to predict the results from reddit's hottest advice community: Am I the Asshole (or AITA for short)."
+link="interviewquery.com"
+image="../uploads/images/2020-04-06/data_stream.png"/>
+
+## New releases
+
+This month, DVC has
+[released some new features](https://github.com/iterative/dvc/releases) and
+updates:
+
+- Did you know you can use Google Drive for remote storage with DVC? We've been
+  hard at work delivering the best performance with Google Drive and are
+  thrilled to invite users to try it out. Brand new
+  [docs](https://dvc.org/doc/user-guide/setup-google-drive-remote#setup-a-google-drive-dvc-remote)
+  explain how to get started.
+- We're introducing the `metrics diff` functionality, which lets you compare
+  metrics from different commits side-by-side
+  ([check out the docs](https://dvc.org/doc/command-reference/metrics/diff) to
+  learn more)
+- Windows users, we are here for you. Contributor
+  [rxxg](https://github.com/rxxg) helped us get better performance on copy
+  operations in Windows.
+
+## From the community
+
+**DVC and R working together** One of our favorite blogs this month came from
+Marcel Ribeiro-Dantas, a developer and PhD student at the
+[Institut Curie](https://institut-curie.org/). Marcel wrote about using DVC to
+manage projects in R, particularly defining and versioning pipelines of data
+processing and analysis that can be reproduced easily. While DVC is language
+agnostic, much of our user content has been Python-centric, so it's exciting to
+see a detailed post for the R-using data scientist (for more about R with DVC,
+see
+[Marija Ilić's post](https://dvc.org/blog/r-code-and-reproducible-model-development-with-dvc))!
+
+<external-link
+href="https://mribeirodantas.xyz/blog/index.php/2020/03/05/r-dvc-and-rmarkdown/"
+title="Manage your Data Science Project in R"
+description="A simple project tutorial with R/RMarkdown, Packrat, Git, and DVC."
+link="mribeirodantas.xyz"
+image="../uploads/images/2020-04-06/marcel.jpeg"/>
+
+Also, Marcel recently gave an interview on
+[The Data Hackers Podcast](https://medium.com/data-hackers/health-data-e-o-coronav%C3%ADrus-data-hackers-podcast-22-2b059d460cb1),
+a Portuguese-language show. Listen for a shout-out about DVC!
+
+**DVC is in another book!** Last month we reported that DVC is part of a Packt
+book,
+["Learn Python by Building Data Science Applications"](https://www.packtpub.com/programming/learn-python-by-building-data-science-applications).
+This month, DVC got a mention in a just-released O'Reilly book,
+["Building Machine Learning Pipelines"](https://www.oreilly.com/library/view/building-machine-learning/9781492053187/)
+by Hannes Hapke and Catherine Nelson.
+
+<external-link
+href="https://www.oreilly.com/library/view/building-machine-learning/9781492053187/"
+title="Building Machine Learning Pipelines"
+description="Automating Model Life Cycles with TensorFlow"
+link="oreilly.com"
+image="../uploads/images/2020-04-06/oreilly.jpeg"/>
+
+**Some more links we like.** Here are a few other discussions that have caught
+our attention.
+
+- **MLOps can be fun.** Jeroen France's blog, "MLOps: Not as boring as it
+  sounds!", reads like a "coming of age" story about embracing engineering as a
+  data scientist. It's part-motivational, part tutorial- definitely worth a
+  read. Here's a sample:
+
+  > No-one wants to baby-sit, maintain, and troubleshoot their own models once
+  > they are in production. Every data scientist secretly hopes they can pawn
+  > that job off to an engineering team, or maybe an intern, right? Well, in
+  > fact MLOps is going to make your data science life a lot better.
+
+- **Leveling up your Jupyter notebooks.** In a series called
+  ["How to Use Jupyter Notebooks in 2020"](https://ljvmiranda921.github.io/notebook/2020/03/16/jupyter-notebooks-in-2020-part-2/),
+  Lj Miranda discusses how to use Jupyter Notebooks in a mature software
+  development workflow. He makes several recommendations for tools, including
+  DVC.
+
+- **Reddit discussion about CI/CD** When we shared around our DivOps conference
+  presentation on Reddit, some
+  [great discussion happened](https://www.reddit.com/r/MachineLearning/comments/fshh9p/p_a_talk_about_adapting_cicd_systems_for_ml_full/).
+  We chatted about how CI/CD might work for data scientists, who often begin a
+  project with a phase of rapid exploration, and what version control for ML
+  could look like without Git.
+
+- **Smashing the data monolith.** Engineer Juan López López wrote a blog called
+  ["A complete guide about how to break the data monolith"](https://medium.com/packlinkeng/a-complete-guide-about-how-to-break-the-data-monolith-caa2ab2d01f6),
+  which is a neat manifesto about treating infrastructure _and_ data as code.
+  It's got nice coverage of DVC, code examples, and some deeply enjoyable
+  artwork.
+
+![](../uploads/images/2020-04-06/monolith.jpeg)_From Juan Juan López López's
+[blog](https://medium.com/packlinkeng/a-complete-guide-about-how-to-break-the-data-monolith-caa2ab2d01f6)._
+
+Thanks for reading. As always, let us know what you're making with DVC and what
+links are catching your interest in the blog comments, on
+[Twitter](https://twitter.com/DVCorg), and our
+[Discord channel](https://dvc.org/chat). Be safe and be in touch!
diff --git a/content/blogs/2020-04-16-april-20-community-gems.md b/content/blogs/2020-04-16-april-20-community-gems.md
new file mode 100644
index 0000000000..a7fed015be
--- /dev/null
+++ b/content/blogs/2020-04-16-april-20-community-gems.md
@@ -0,0 +1,151 @@
+---
+title: April '20 Community Gems
+date: 2020-04-16
+description: >
+  A roundup of technical Q&A's from the DVC community. This month, we discuss
+  the DVC cache, pipelines, cloud storage options and concurrency.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month, we discuss
+  the DVC cache, cloud storage options and concurrency.
+picture: 2020-04-16/DVC_Gems_April_20.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/april-20-community-gems/356
+tags:
+  - Community Gems
+  - Pipelines
+---
+
+## Discord gems
+
+Here are some Q&A's from our Discord channel that we think are worth sharing.
+
+### Q: [How can I view and download files that are being tracked by DVC in a repository?](https://discordapp.com/channels/485586884165107732/485596304961962003/698815826870009868)
+
+To list the files that are currently being tracked in a project repository by
+DVC and Git, you can use `dvc list`. This will display the contents of that
+repository, including `.dvc` files. To download the contents corresponding to a
+particular `.dvc` file, use `dvc get`:
+
+Let's consider an example using both functions. Assume we're working with DVC's
+data registry example repository. To list the files present, run:
+
+```dvc
+$ dvc list -R https://github.com/iterative/dataset-registry
+.gitignore
+README.md
+get-started/.gitignore
+get-started/data.xml
+get-started/data.xml.dvc
+...
+```
+
+Note that the `-R` flag, which enables `dvc list` to display the contents of
+directories inside the repository. Now assume you want to download `data.xml`,
+which we can see is being tracked by DVC. To download the dataset to your local
+workspace, you would then run
+
+```dvc
+$ dvc get https://github.com/iterative/dataset-registry get-started/data.xml
+```
+
+For more examples and information,
+[see the documents](https://dvc.org/doc/command-reference/list#list) for
+`dvc list` and for [`dvc get`](https://dvc.org/doc/command-reference/get).
+
+### Q: [I'm setting up cloud remote storage for DVC and I'd like to forbid `dvc gc --cloud` so users can't accidently delete files in the remote. Will it be sufficient to restrict deletion in the remote's settings?](https://discordapp.com/channels/485586884165107732/563406153334128681/698116671298076672)
+
+You're right to be careful, because `dvc gc --cloud` can be dangerous in the
+wrong hands- it'll remove any unused files in your remote (for more info,
+[see our docs](https://dvc.org/doc/command-reference/gc#gc)). To prevent users
+from having this power, setting your bucket policy to block object deletions
+should do the trick. How to do this will depend on your cloud storage provider-
+we found some relevant docs for
+[GCP](https://cloud.google.com/iam/docs/understanding-roles#cloud_storage_roles),
+[S3](https://docs.aws.amazon.com/AmazonS3/latest/dev/using-with-s3-actions.html),
+and
+[Azure](https://docs.microsoft.com/en-us/azure/storage/common/storage-auth-aad).
+For the full list of supported remote storage types,
+[see here](https://dvc.org/doc/command-reference/remote/add#supported-storage-types).
+
+### Q: [My team is interested in DVC, and we have all of our data in remote storage. Do we need to install a centralised enterprise version of DVC on a dedicated server? And do we have to also have a GitHub repository?](https://discordapp.com/channels/485586884165107732/563406153334128681/692524884701478992)
+
+There's no need for a DVC server. Our remote storage works on top of
+[most kinds of cloud storage by default](https://dvc.org/doc/command-reference/remote/add#supported-storage-types),
+including S3, GCP, Azure, Google Drive, and Aliyun, with no additional
+infrastructure required. As for GitHub (or BitBucket, or GitLab, etc.), this is
+only needed if you're interested in sharing your project with others over that
+channel. We _like_ sharing projects on GitHub, but you don't have to. Any Git
+repository, even a local one, will do.
+
+So a "minimal" DVC project for you might consist of a local workspace with Git
+enabled (which you _do_ need), a local Git repository, and your S3 remote
+storage. Check out our
+[use cases](https://dvc.org/doc/use-cases/versioning-data-and-model-files) to
+see some examples of infrastructure and workflow for teams.
+
+### Q: [Could there be any issues with concurrent `dvc push`-es to the same remote?](https://discordapp.com/channels/485586884165107732/563406153334128681/680053750320332800)
+
+There are a few ways for concurrency to occur: multiple jobs running in parallel
+on the same machine, or different users on different machines. But in any case,
+the answer is the same: there's nothing to worry about! When pushing a file to a
+DVC remote, all operations are non-destructive and atomic.
+
+### Q: [How do I only download part of my remote repository? For example, I only need the final output of my pipeline, not the raw data or intermediate steps.](https://discordapp.com/channels/485586884165107732/485596304961962003/696751934777852004)
+
+We support granular operations on DVC project repositories! Say your project's
+DVC remote contains several `.dvc` files corresponding to different stages of
+your pipeline: `0_process_data.dvc`, `1_split_test_train.dvc`, and
+`2_train_model.dvc`. If you're only interested in the files output by the final
+stage of the pipeline (`2_train_model.dvc`), you can run:
+
+```dvc
+$ dvc pull process_data_stage.dvc
+```
+
+You can also use `dvc pull` at the level of individual files. This might be
+needed if your DVC pipeline file creates 10 outputs, for example, and you only
+want to pull one (say, `model.pkl`, your trained model) from remote DVC storage.
+You'd simply run
+
+```dvc
+$ dvc pull model.pkl
+```
+
+### Q: [How can I remove a `.dvc` file, but keep the associated files in my workspace?](https://discordapp.com/channels/485586884165107732/485596304961962003/689827778358673469)
+
+Sometimes, you realize you don't want to put a file under DVC tracking after
+all. That's okay, easy to fix. Simply remove the `.dvc` file like any other-
+`rm <file>.dvc`. DVC will then stop tracking the file, and the associated target
+file will still be in your local workspace. Note that the file will still be in
+your
+[DVC cache](https://dvc.org/doc/user-guide/dvc-internals#structure-of-cache-directory)
+unless you clear it with `dvc gc`.
+
+### Q: [I'm trying to move a stage file with `dvc move`, but I'm getting an error. What's going on?](https://discordapp.com/channels/485586884165107732/563406153334128681/685125650901630996)
+
+The `dvc move` command is used to rename a file or directory and simultaneously
+modify its corresponding DVC file. It's handy so you don't rename a file in your
+local workspace that's under DVC tracking without updating DVC to the change
+(see an [example here](https://dvc.org/doc/command-reference/move#description)).
+The function doesn't work on
+["stage files"](https://dvc.org/doc/tutorials/pipelines#define-stages) from DVC
+pipelines. There's not currently an easy way to safely move `dvc.yaml` files,
+and it's an
+[open issue we're working on](https://github.com/iterative/dvc/issues/1489).
+Until then, you can manually update `dvc.yaml`, or make a new one in the desired
+location.
+
+### Q: [I just starting using DVC and noticed that when I `dvc push` files to remote cloud storage, the directory in my remote looks like my DVC cache, not my local workspace directory. Is this right?](https://discordapp.com/channels/485586884165107732/485596304961962003/693740598498426930)
+
+Yep, that's exactly how it should be! In order to provide deduplication and some
+other optimizations, your DVC remote's directory structure will mirror the DVC
+cache (which is by default in your local workspace under `.dvc/cache`).
+Effectively, DVC uses your Git repository to store DVC files, which are keys for
+cache files on your remote. So looking inside your remote won't be particularly
+enlightening if you're looking for human-readable filenames- the file names will
+look like hashes (because, well, they are). Luckily, DVC handles all the
+conversions between the filenames in your local workspace and these hashes.
+
+To get some more intuition about this, check out some of our
+[docs](https://dvc.org/doc/user-guide/dvc-internals) about how DVC organizes
+files.
diff --git a/content/blogs/2020-04-30-gsod-ideas-2020.md b/content/blogs/2020-04-30-gsod-ideas-2020.md
new file mode 100644
index 0000000000..84ea27400a
--- /dev/null
+++ b/content/blogs/2020-04-30-gsod-ideas-2020.md
@@ -0,0 +1,269 @@
+---
+title: Join DVC for Google Season of Docs 2020
+date: 2020-04-30
+description: >
+  A call for writer applications for Google Season of Docs 2020.
+descriptionLong: >
+  DVC is looking for technical writers to take part in [Google Season of Docs
+  2020](https://developers.google.com/season-of-docs) — a unique program
+  sponsored by Google that pairs technical writers with open source projects. In
+  this post we introduce our goals for the program, and specific [project
+  ideas](#project-ideas) for potential candidates.
+picture: 2019-04-23/post-image.png
+pictureComment:
+author: jorge_orpinel
+commentsUrl: https://discuss.dvc.org/t/join-dvc-for-google-season-of-docs-2020/375
+tags:
+  - Google Season of Docs
+  - Documentation
+  - Mentoring
+  - Company
+---
+
+After a successful experience with the first edition of **Google Season of
+Docs** [in 2019](/blog/dvc-project-ideas-for-google-summer-of-docs-2019), we're
+putting out a call for writers to apply to work with DVC as part of the
+[2020 edition](https://developers.google.com/season-of-docs). If you want to
+write open source software documentation with mentorship from our team, read on.
+
+**TLDR**: Skip to [project ideas](#project-ideas).
+
+[DVC](https://dvc.org/) has a dedicated docs team and a
+[well-defined process](https://dvc.org/doc/user-guide/contributing/docs) for
+creating and maintaining our documentation, modeled in part based on our past
+GSoD experience. We are happy to share our experience, introduce technical
+writers to the world of open source and machine learning best practices, and
+work together on improving our documentation.
+
+## Previous experience
+
+In last year's Season, we matched with prolific writer
+[Dashamir](https://github.com/dashohoxha), who helped us give proper structure
+important part of our docs, and address key issues.
+
+https://twitter.com/DVCorg/status/1205203662827483136
+
+Some of our achievements together were:
+
+- Reorganized our [tutorials](https://github.com/iterative/dvc.org/pull/666) and
+  core [contribution guide](https://github.com/iterative/dvc.org/pull/726)
+- Created [interactive lessons](https://github.com/iterative/dvc.org/issues/546)
+  on [Katacoda](https://www.katacoda.com/dvc)
+- Docs [cleanup](https://github.com/iterative/dvc.org/pull/734)
+- Suggested the creation of a
+  [How To](https://github.com/iterative/dvc.org/issues/563) section for our docs
+- Other
+  [contributions](https://github.com/iterative/dvc.org/pulls?q=is%3Apr+is%3Aclosed+author%3Adashohoxha)
+
+Another collaborator we connected with via GSoD’19 was an amazing student
+intern, [Aman](https://github.com/algomaster99). He helped us address
+[dozens of tickets](https://github.com/iterative/dvc.org/pulls?q=is%3Apr+author%3Aalgomaster99+is%3Aclosed)
+related to our Node.js docs web app. For example:
+
+- Contributed to our
+  [command reference](https://github.com/iterative/dvc.org/pull/315) and
+  [user guide](https://github.com/iterative/dvc.org/pull/366), and created a
+  much needed
+  [documentation contribution](https://github.com/iterative/dvc.org/pull/317)
+  guide
+
+- [Formatted](https://github.com/iterative/dvc.org/pull/328) the source code of
+  our docs and established an
+  [automated mechanism](https://github.com/iterative/dvc.org/pull/386) to
+  enforce pretty formatting going forward
+
+- Implemented super useful hovering tooltips based on a special
+  [glossary](https://github.com/iterative/dvc.org/pull/431):
+
+  ![](../uploads/images/2020-04-30/tooltip.png) _Toolip in the `dvc remote`
+  command reference_
+
+### Community outreach
+
+More positive results of the program included talks and meetups organized by our
+open source contributors, with our mentorship:
+
+![](../uploads/images/2020-04-30/SciPy_India_Aman.png) _Our intern Aman took a
+several-hour long train ride to
+[talk](https://static.fossee.in/scipy2019/SciPyTalks/SciPyIndia2019%5FS011%5FStoring%5Fa%5Ffew%5Fversions%5Fof%5Fa%5F5GB%5Ffile%5Fin%5Fa%5Fdata%5Fscience%5Fproject%5F20191130.mp4)
+at [SciPy India 2019](https://scipy.in/2019)._
+
+Another star contributor who found our project via GSoD,
+[Kurian](https://github.com/kurianbenoy), closed
+[several tickets](https://github.com/iterative/dvc.org/issues?q=is%3Aissue+kurianbenoy),
+produced a DVC intro tutorial in
+[Kaggle](https://www.kaggle.com/kurianbenoy/introduction-to-data-version-control-dvc)
+and
+[Colab](https://colab.research.google.com/drive/1O1XmUZ8Roj1dFxWTrpE55_A7lVkWfG04),
+and ended up giving a talk in
+[PyCon India](https://in.pycon.org/cfp/2019/proposals/machine-learning-model-and-dataset-versioning~dRqRb/):
+
+https://www.youtube.com/watch?v=Ipzf6oQqQpo
+
+He also covered DVC for the
+[Devsprints](https://kurianbenoy.github.io/2019-11-03-Devsprints%5Fexperience/)
+of [MEC.conf](https://enotice.vtools.ieee.org/public/50448)
+
+https://twitter.com/FossMec/status/1192866498324254720
+
+Yet another outstanding contributor,
+[Nabanita](https://twitter.com/explorer_07), ended up organizing a DVC-themed
+hackathon later that year:
+
+https://twitter.com/psociiit/status/1185150096792535040
+
+## Prerequisites to apply
+
+Besides the general requirements to apply to Google Season of Docs, there are a
+few skills we look for in applicants.
+
+1. **Clear English writing.** We strive express the concepts, processes, and
+   details around DVC clearly, correctly, and completely. We use general and
+   friendly wording as much as possible and pay close attention to consistency
+   in our terminology. Our team will help with copy editing.
+
+1. **Command line experience.** [DVC](https://dvc.org/doc) is a command line
+   tool that builds on top of [Git](https://git-scm.com/), so being able to play
+   with it and test the features will be very useful. Creating and managing
+   files, GNU/Linux commands, file and permission administration are desired
+   skills.
+
+1. **People skills.** We put a high value on communication: the ability to
+   discuss ideas, explain your goals, report progress, and work kindly with more
+   or less technical teammates.
+
+If you like our mission but aren't sure if you're sufficiently prepared, please
+be in touch anyway. We'd love to hear from you.
+
+## Project ideas
+
+Below are several project ideas that are an immediate priority for the DVC docs
+team. We welcome technical writers to create their own proposals, even if they
+differ from our ideas. Most projects will be mentored primarily by our lead
+technical writer, [Jorge](https://github.com/jorgeorpinel).
+
+1. **"How To" section.** Other than our
+   [use cases](https://dvc.org/doc/use-cases), we still lack a good place to
+   answer common questions in our docs (think FAQ). We have compiled
+   [set of topics](https://github.com/iterative/dvc.org/issues/899) that we
+   think would be best explained in a new **How To** section for this purpose.
+
+   This project would imply relocating bits and pieces of info from existing
+   docs into new how-tos, as well as writing significant new material to
+   complete them. Expanding on our
+   [troubleshooting](https://dvc.org/doc/user-guide/troubleshooting) page would
+   probably go well as part of this project as well.
+
+   _Difficulty rating:_ Beginner-Medium<br/><br/>
+
+1. **DVC 1.0 docs.** We are soon to release DVC 1.0.0! This version brings some
+   significant changes that for the first time in our
+   [release history](https://github.com/iterative/dvc/releases) are not
+   completely backward-compatible. We expect that fully updating all our
+   previous docs will take a few months, and you could help us with this! The
+   main new features are listed below.
+
+   > UPDATE: See [post](https://dvc.org/blog/dvc-3-years-and-1-0-release) about
+   > the release! And corresponding docs
+   > [epic](https://github.com/iterative/dvc.org/issues/1255) task
+
+   - A
+     [multi-stage _pipelines file_](https://github.com/iterative/dvc/issues/1871)
+     that partially substitutes
+     [DVC files](https://dvc.org/doc/user-guide/dvc-files)
+   - Separation between
+     [scalar vs. continuous metrics](https://github.com/iterative/dvc/issues/3409),
+     and new commands to visualize them, such as `dvc plots`
+   - A new [run cache](https://github.com/iterative/dvc/issues/1234) that
+     automatically saves experiment checkpoints between commits
+
+   _Difficulty rating:_ Beginner-Medium<br/><br/>
+
+1. **Video tutorials.** Written documentation is great, but other media can also
+   be important for our organization to reach a wide variety of learners.
+   Expanding to video is also a core part of our developer advocacy strategy.
+
+   One of DVC's priorities for this year is creating a library of video
+   tutorials ranging from short explanations of basic DVC functions to more
+   advanced use cases. You could assist in writing the scripts or even take the
+   lead producing some videos, so image/video editing skills would come in handy
+   (optional).
+
+   ![](../uploads/images/2020-04-30/Discord_user_video_tutorials.png) _Video
+   tutorials are a common request by users in our [chat](https://dvc.org/chat)._
+
+   **Mentor**: [Elle](https://github.com/elleobrien)
+
+   _Difficulty rating:_ Beginner-Medium<br/><br/>
+
+1. **Interactive guides.** Many of our docs include command line examples to
+   illustrate how DVC works. In some cases these are full guides we want people
+   to be able to follow by copying commands into their terminals. This has a few
+   challenges: mainly keeping the rest of the document maintainable, brief, and
+   easy to read; and supporting people on all platforms (Mac, Windows, Linux).
+
+   So we started extracting some of the command examples into interactive
+   [Katacoda scenarios](https://www.katacoda.com/dvc) to match certain docs,
+   however they are in need of maintenance and completion, as well as being
+   embedded into the corresponding pages per
+   [this issue](https://github.com/iterative/dvc.org/issues/670).
+
+   This may involve working with our front-end team or, preferably, having some
+   Javascript coding experience.
+
+   _Difficulty rating:_ Medium-Advanced
+
+1. **Javascript engine UI/UX.** Our website has custom
+   [source code](https://github.com/iterative/dvc.org/tree/main/src) we've
+   developed over the years to host our landing pages, docs, and blog all in a
+   high-performance, advanced static site (Node.js, Gatsby, React, Typescript).
+   We have several goals to further improve the usability and structure of our
+   site, such as:
+
+   - Creating a
+     [special docs home page](https://github.com/iterative/dvc.org/issues/1073)
+   - Improving [mobile menus](https://github.com/iterative/dvc.org/issues/808)
+   - Better navigation sidebar
+     [highlighting](https://github.com/iterative/dvc.org/issues/753) and
+     [positioning](https://github.com/iterative/dvc.org/issues/1198)
+   - Other
+     [doc-engine](https://github.com/iterative/dvc.org/issues?q=is%3Aopen+is%3Aissue+label%3Adoc-engine)
+     and
+     [blog-engine](https://github.com/iterative/dvc.org/issues?q=is%3Aopen+is%3Aissue+label%3Ablog-engine)
+     issues
+
+   _Difficulty rating:_ Medium-Advanced<br/><br/>
+
+1. **SEO/ Site Analytics.** Our current website analytics are somewhat basic. We
+   will need to have a clear strategy to follow and improve our Search Engine
+   results (with meta content, media optimization,
+   [etc.](https://github.com/iterative/dvc.org/issues?q=is%3Aissue+is%3Aopen+seo)),
+   as well as to understand the behavior of our users to improve their
+   experience. The specifics of the project are left for the applicant to
+   suggest! This should be relatively simple for someone with proven experience
+   in SEO or website QA.
+
+   What tools should we employ? (e.g. Google Analytics, etc.) What trends and
+   reports do we need to focus on? What kinds of users do we have and what
+   interaction flows do they each follow? Can we semi-identify these users
+   and/or cross-examine their data with DVC
+   [usage analytics](https://dvc.org/doc/user-guide/analytics)? Let's come up
+   with a plan to answer these and other related questions!
+
+   _Difficulty rating:_ Beginner-Medium<br/><br/>
+
+> For more inspiration, feel free to review our
+> [epics](https://github.com/iterative/dvc.org/labels/epic) and other open docs
+> [issues](https://github.com/iterative/dvc.org/issues?q=is%3Aopen+is%3Aissue+label%3Adoc-content+).
+
+## If you'd like to apply
+
+Please refer to the
+[Google Season of Docs](https://developers.google.com/season-of-docs)
+application guides for specifics of the program. Writers looking to know more
+about DVC, and our worldwide community of contributors, will learn most by
+visiting our [Discord chat](https://dvc.org/chat),
+[GitHub repository](https://github.com/iterative/dvc), and
+[Forum](https://discuss.dvc.org/). We are available to discuss project proposals
+from interested writers and can be reached by [email](mailto:support@dvc.org) or
+on Discord.
diff --git a/content/blogs/2020-05-04-dvc-3-years-and-1-0-release.md b/content/blogs/2020-05-04-dvc-3-years-and-1-0-release.md
new file mode 100644
index 0000000000..542771d9d8
--- /dev/null
+++ b/content/blogs/2020-05-04-dvc-3-years-and-1-0-release.md
@@ -0,0 +1,245 @@
+---
+title: DVC 3 Years 🎉 and 1.0 Pre-release 🚀
+date: 2020-05-04
+description: >
+  Today, we've got three big announcements: 🎉 3rd-year anniversary of DVC, 🚀
+  DVC 1.0 pre-release is ready and ⭐ 5000 GitHub starts.
+
+descriptionLong: |
+  Today, we've got three big announcements.
+
+  🎉 3rd-year anniversary of DVC
+
+  🚀 DVC 1.0 pre-release is ready
+
+  ⭐ DVC has reached 5K GitHub starts (coincidentally on the same day)
+
+  We'll share what we've learned from our journey, how users helped for the new
+  release and how DVC is growing.
+picture: 2020-05-04/owl.png
+pictureComment: DVC 3rd-year anniversary
+author: dmitry_petrov
+commentsUrl: https://discuss.dvc.org/t/dvc-3-years-anniversary-and-1-0-pre-release/374
+tags:
+  - Release
+  - MLOps
+  - DataOps
+  - CI/CD
+---
+
+## 3 years anniversary!
+
+Three years ago on **May 4th, 2017**, I published the
+[first blog post about DVC](https://www.kdnuggets.com/2017/05/data-version-control-iterative-machine-learning.html).
+[The first DVC discussion on Reddit](https://www.reddit.com/r/Python/comments/698ian/dvc_data_scientists_collaboration_and_iterative/).
+Until that point, DVC was a private project between
+[myself](https://github.com/dmpetrov) and [Ruslan](https://github.com/efiop).
+Today, things look very different.
+
+Today, DVC gets recognized at professional conferences: people spot our logo,
+and sometimes even our faces, and want to chat. There's much more content about
+DVC coming from bloggers than from inside our organization. We're seeing more
+and more job postings that list DVC as a requirement, and we're showing up in
+[data science textbooks](https://www.amazon.com/Learn-Python-Building-Science-Applications/dp/1789535360).
+When we find a new place DVC is mentioned, we celebrate in our Slack - we've
+come a long way!
+
+The data science and ML space is fast-paced and vibrant, and we're proud that
+DVC is making an impact on discussions about best practices for healthy,
+sustainable ML. Every week, we chat with companies and research groups using DVC
+to make their teams more productive. We're proud to be part of the growing MLOps
+movement: so far, a majority of CI/CD for ML projects are implemented with DVC
+under the hood.
+
+I can confidently say that DVC wouldn't have been possible without a lot of help
+from our community. Thank you to everyone who has supported us:
+
+**DVC core team.** The DVC team has been the force driving our project's
+evolution - we've grown from 2 to 12 full-time engineers, developers, and data
+scientists. Half of the team is purely focus on DVC while the other half on
+related to DVC new projects. We often get feedback about how fast our team
+answers user questions - we've been told our user support is one of DVC's
+"killer features". It's all thanks to this amazing team.
+
+**DVC contributors.** As of today, the DVC code base has
+[126 individual contributors](https://github.com/iterative/dvc/graphs/contributors).
+Many of these folks put hours into their code contribution. We're grateful for
+their tenacity and generosity.
+
+![](../uploads/images/2020-05-04/vera-sativa.png)_Vera - 100th DVC contributor
+[on GitHub](https://github.com/verasativa/)._
+
+**Documentation contributors.** Another
+[124 people contributed](https://github.com/iterative/dvc.org/graphs/contributors)
+to the [DVC documentation](https://dvc.org/doc) and
+[the website](https://dvc.org/). Every time a new person tries out DVC, they
+benefit from the hard work that's gone into our docs.
+
+**Active community members.** Active DVC users help our team understand and
+better anticipate their needs and identify priorities for development. They
+share bright ideas for new features, locate and investigate bugs in code, and
+welcome and support new users.
+
+**People who give DVC a shot.** Today, there are thousands of data scientists,
+ML engineers, and developers using DVC on a regular basis. The number of users
+is growing every week. Our [Discord channel](http://dvc.org/chat) has almost two
+thousand users. Hundreds more connect with us through email and Twitter. To
+everyone willing to try out DVC, thank you for the opportunity.
+
+## DVC 1.0 is the result of 3 years of learning
+
+All these contributions, big and small, have a collective impact on DVC's
+development. I'm happy (and a bit nervous) to announce that a pre-release of a
+brand new DVC 1.0 is ready for public beta testing.
+
+You can install the 1.0 pre-release from the master branch in our repo
+(instruction [here](https://dvc.org/doc/install/pre-release)) or through pip:
+
+```dvc
+$ pip install --upgrade --pre dvc
+```
+
+The new DVC is inspired by discussions and contributions from our community -
+both fresh ideas and bug reports 😅.
+
+Here are the most significant features we’re excited to be rolling out soon:
+
+### [Run cache](https://github.com/iterative/dvc/issues/1234)
+
+_Learnings:_ Forcing users to make Git commits for each ML experiment creates
+too much overhead.
+
+DVC 1.0 has a "long memory" of DVC commands runs. This means it can identify if
+a `dvc repro` has already been run and save compute time by returning the cached
+result - _even if you didn't Git commit that past run_.
+
+We added the run-cache with CI/CD systems and other MLOps and DataOps automation
+tools in mind. No more auto-commits needed after `dvc repro` in the CI/CD system
+side.
+
+### [Multi-stage DVC files](https://github.com/iterative/dvc/issues/1871)
+
+_Learnings:_ ML pipelines evolve much faster than data engineering pipelines.
+
+We redesigned the way DVC records data processing stages with metafiles, to make
+pipelines more interpretable and editable. All pipeline stages are now saved in
+a single metafile, with all stages stored together instead of in separate files.
+
+Data hash values are no longer stored in the pipeline metafile. This improves
+human-readability.
+
+```yaml
+stages:
+  process:
+    cmd: ./process_raw_data raw_data.log users.csv
+    deps:
+      - raw_data.log
+    params:
+      - process_file
+      - click_threshold
+    outs:
+      - users.csv
+  train:
+    cmd: python train.py
+    deps:
+      - users.csv
+    params:
+      - epochs
+      - log_file
+      - dropout
+    metrics_no_cache:
+      - summary.json
+    metrics:
+      - logs.csv
+    outs:
+      - model.pkl
+```
+
+### [Plots](https://github.com/iterative/dvc/issues/3409)
+
+_Learnings:_ Versioning metrics and plots are no less important than data
+versioning.
+
+Countless users asked us when we'd support metrics visualizations. Now it's
+here: DVC 1.0 introduces metrics file visualization commands, `dvc metrics diff`
+and `dvc plots show`. DVC plots are powered by the
+[Vega-Lite](https://vega.github.io/vega-lite/) graphic library.
+
+This function is designed not only for showing visualizations based on the
+current state of your project, but it can also combine multiple plots from your
+Git history in a single chart so you can compare results across commits. Users
+can visualize how, for example, their model accuracy in the latest commit
+differs from another commit (or even multiple commits).
+
+```dvc
+$ dvc plots diff -d logs.csv HEAD HEAD^ d1e4d848 baseline_march
+file:///Users/dmitry/src/plot/logs.csv.html
+$ open logs.csv.html
+```
+
+![](../uploads/images/2020-05-04/dvc-plots.svg)
+
+```dvc
+$ dvc plots diff -d logs.csv HEAD HEAD^ d1e4d848 baseline_march \
+        -x loss --template scatter
+file:///Users/dmitry/src/plot/logs.csv.html
+$ open logs.csv.html
+```
+
+![](../uploads/images/2020-05-04/dvc-plots-scatter.svg)
+
+### [Data transfer optimizations](https://github.com/iterative/dvc/issues/3488)
+
+_Learnings:_ In ML projects, data transfer optimization is still the king.
+
+We've done substantial work on optimizing data management commands, such as
+`dvc pull / push / status -c / gc -c`. Now, based on the amount of data, DVC can
+choose an optimal data remote traversing strategy.
+
+[Mini-indexes](https://github.com/iterative/dvc/issues/2147) were introduced to
+help DVC instantly check data directories instead of iterating over millions of
+files. This also speeds up file adding/removing to large directories.
+
+More optimizations are included in the release based on performance bottlenecks
+we profiled. More detailed
+[benchmark report](https://gist.github.com/pmrowla/338d9645bd05df966f8aba8366cab308)
+that shows how many second it takes to run a specific commands on 2M images
+directory.
+
+![](../uploads/images/2020-05-04/benchmarks.svg)
+
+### [Hyperparameter tracking](https://github.com/iterative/dvc/issues/3393)
+
+_Learnings:_ ML pipeline steps depends only on a subset of config file.
+
+This feature was actually released in the last DVC 0.93 version (see
+[params docs](https://dvc.org/doc/command-reference/params). However, it is an
+important step to support configuration files and ML experiments in a more
+holistic way.
+
+### For more information on the new features...
+
+Each of the big new features and improvements deserve a separate blog post. We
+will be posting more - please stay in touch.
+
+I hope our the most active users will find time to check the DVC pre-release
+version and provide their feedback. The installation instruction is
+[on our website](https://dvc.org/doc/install/pre-release).
+
+## 5000 GitHub stars
+
+Activity on our GitHub page has grown organically since the DVC repo went public
+on May 4th, 2017. Coincidentally, today, in the 3rd year anniversary we have
+reached 5000 starts:
+
+![](../uploads/images/2020-05-04/5k_github.png)
+
+## Thank you!
+
+Thank you again to all DVC contributors, community members, and users. Every
+piece of your help is highly appreciated and will bring huge benefits to the
+entire ecosystem of data and ML projects.
+
+Stay healthy and safe, wherever you are in the world. And be in touch on
+[Twitter](https://twitter.com/DVCorg), and our
+[Discord channel](https://dvc.org/chat).
diff --git a/content/blogs/2020-05-08-dvc-ambassador-program-announcement.md b/content/blogs/2020-05-08-dvc-ambassador-program-announcement.md
new file mode 100644
index 0000000000..3383d65e86
--- /dev/null
+++ b/content/blogs/2020-05-08-dvc-ambassador-program-announcement.md
@@ -0,0 +1,190 @@
+---
+title: 'Join the DVC Ambassador Program!'
+date: 2020-05-08
+description: >
+  We're launching our ambassador program for people all around the world to get
+  involved in the DVC community.
+descriptionLong: >
+  We're launching our ambassador program for people all around the world to get
+  involved in the DVC community. Our first ambassador, Marcel Ribeiro-Dantas,
+  shares a guest blog about how ambassadors support open source projects through
+  blog writing, public outreach, and code.
+picture: 2020-05-08/Ambassador_Header.png
+author: marcel_rd
+commentsUrl: https://discuss.dvc.org/t/join-the-dvc-ambassador-program/383
+tags:
+  - Ambassador
+  - Volunteer
+  - Meetup
+  - Blogging
+  - Company
+---
+
+DVC's software can be everywhere, but its developers can’t - that’s why
+ambassadors, folks who do outreach and community building around projects they
+love, are a key part of the open source community. DVC is starting an ambassador
+program to help people who are passionate about our mission get involved.
+
+As the first DVC ambassador, and a
+[Fedora ambassador](https://fedoraproject.org/wiki/User:Mribeirodantas) before
+that, I can tell you a bit about the role. As a representative of open source
+projects, I've participated in lots of events, made friends, and traveled. Every
+single time I’ve contributed, I got this nice feeling that it was all worth it.
+I believe that if you agree with the core values of the project, a great
+relationship lies ahead :).
+
+So what are the core values of DVC, exactly? DVC is founded on the principle of
+engineering solutions for making data science and machine learning rigorous and
+reproducible. If this matters to you, too, you might be a good fit for our
+ambassador program!
+
+As an ambassador, you’ll act as a bridge between DVC in your community. There
+are lots of ways to do this, big and small. For example:
+
+- Write a blog post talking about how you use DVC in your projects
+- What about creating a network of DVC users and data scientists in your town?
+  Even though we’re self-isolating now, you can still organize online meetups.
+  [We already did one!](https://tulu.la/events/dvc-virtual-meetup-2020-00032c)
+  We help cover costs to organize meetups.
+- Do you want to talk about DVC at your office, or at a conference? We help
+  speakers develop talks, and we have some discretionary funds for travel on a
+  case-by-case basis.
+- Want to develop a feature for DVC? We welcome contributions to the code base,
+  even if it’s your first pull request ever.
+
+Being an ambassador means getting closer to the team in charge of DVC, but at
+the same time, it means going farther to reach people outside the organization-
+including people who don’t know about DVC yet, people who need some help getting
+started, and people who are already excited about our mission and want to find
+meaningful ways to pitch in.
+
+## About Iterative and DVC
+
+DVC got started in 2017 as a personal project by Dmitry Petrov (
+[we just celebrated our 3rd birthday](https://dvc.org/blog/dvc-3-years-and-1-0-release)).
+Previously, Dmitry worked at Microsoft as a data scientist and did a PhD in
+Computer Science. In 2018, Dmitry teamed up with his co-founder Ivan Shcheklein
+(co-founder of [The Tweeted Times](https://tweetedtimes.com/) and
+[Sedna](https://www.sedna.org/) contributor) to incorporate Iterative.ai and
+grow the project. Iterative.ai is building enterprise tools for collaboration on
+ML projects. Currently, Iterative.ai's open source flagship project is Data
+Version Control (DVC), an open source version control system for managing
+complex workflows, datasets, and models.
+
+Development is ongoing in the core DVC project as well as new ventures into
+[MLOps and Continuous Integration & Delivery (CI/CD)](https://dvc.org/blog/reimagining-devops-video)
+for data science. The team is small-and-mighty, with developers, engineers, and
+data scientists on four continents. The open source community is a huge part of
+all Iterative.ai projects; currently, DVC has more than
+[5,000 stars on GitHub](https://github.com/iterative/dvc) and more than 100
+individual contributors!
+
+One of DVC’s main principles is adapting existing software engineering practices
+to machine learning. For example, DVC is built around Git version control: in an
+ML project using DVC, each experiment corresponds to a Git commit. When you
+check out any commit, you’ll see the source code as it was when you made the
+commit- as expected. But, you’ll also see your datasets as they were and the
+exact pipeline of commands you ran in that experiment!
+
+## Why become an ambassador?
+
+Like any volunteer position, the main benefit is getting to be involved in a
+project you believe in. But there are some perks:
+
+- Establishing a formal relationship with DVC that can go on your CV/resume.
+  We'll boost your content on our social channels, too.
+- Access to support from the DVC team, such as financial resources to organize
+  your own meetup for local data scientists and ML enthusiasts
+- Mentorship about crafting blogs and talks, if desired. DVC team members
+  regularly help people in the community develop their presentations and blogs
+  for accuracy and clarity
+- Closer relationships with the DVC team means more chances to participate in
+  conversations that guide our product decisions.
+
+For students and early career professionals, you can learn a lot by interacting
+with us! While you can certainly write a blog post or organize a meetup without
+being an ambassador, the program is a way to fast-track your learning- you'll
+have the creators of DVC helping you understand it well, and helping you
+discover features and best practices you might not have known about.
+
+If you're already active in the open source or MLOps community, then becoming an
+ambassador is a solid way to cement your relationship with DVC. We'd love to
+recognize you for the amazing stuff you already do.
+
+## How to become an ambassador
+
+If you’re interested in becoming an ambassador, send us an email at
+[info@dvc.org](mailto:info@dvc.org) with the subject line “I want to be an
+ambassador!” Please tell us:
+
+- A little about yourself and your professional background
+- Any outreach work you’ve done before
+- What kind of ambassador activities you’d be most interested in participating
+  in
+
+The program is structured to provide a lot of flexibility, so each ambassador
+can do outreach in ways that are personally motivating and enjoyable. There are
+a few guidelines:
+
+- We ask for at least one-year commitment
+- We ask ambassadors to contribute at least four activities per year, about once
+  every three months. There's no upper limit to how much you can do!
+- For your first contribution, we ask for a blog post- this way, we can
+  collaborate with you to help get all the technical details right. After that,
+  it’s up to you!
+
+## Some ideas to get started
+
+Our official ambassador program is just starting, but our community already has
+a lot of folks making noise. Here are just a few contributions we admire- we
+think they’re pretty cool inspirations for future projects.
+
+### Blogs and tutorials
+
+Shareable blogs are one of our most effective outreach strategies. They give
+visibility to the author _and_ new ways to use DVC, so it's a win-win.
+
+- [Remote training with GitLab-CI and DVC](https://blog.codecentric.de/en/2020/01/remote-training-gitlab-ci-dvc/),
+  by Mercel Mikl and Bert Besser (Bert has also organized a DVC meetup in
+  Berlin)
+- [Creating a solid Data Science development environment](https://towardsdatascience.com/creating-a-solid-data-science-development-environment-60df14ce3a34),
+  by Gabriel dos Santos Goncalves
+- [Continuous Delivery for Machine Learning](https://martinfowler.com/articles/cd4ml.html),
+  by Danilo Sato, Arif Wider, and Christoph Windheuser
+- [Manage your Data Science Project in R](https://mribeirodantas.xyz/blog/index.php/2020/03/05/r-dvc-and-rmarkdown/)
+  was my first blog post about using DVC in an R project!
+
+### Talks
+
+Community members have presented at events like PyCon, PyData, and local
+meetups.
+
+- [Version control for data science](https://www.slideshare.net/AlessiaMarcolini/version-control-for-data-science),
+  by Alessia Marcolini @ PyCon DE & PyData Berlin
+- [How to easily set up and version control your machine learning pipelines](https://www.youtube.com/watch?v=rUTlqpcmiQw),
+  by Sarah Diot-Girard & Stephanie Bracaloni @ PyData Amsterdam
+- [ML models and dataset versioning](https://speakerdeck.com/kurianbenoy/ml-models-and-dataset-versioning),
+  by Kurian Benoy @ PyCon India
+
+### Code contributions
+
+Our GitHub repository has lots of open discussions about potential features- its
+a goldmine for ways to pitch in. For example:
+
+- [Helge Munk Jacobsen](https://github.com/elgehelge) took on an open issue in
+  our code base about supporting hyperparameter tracking with DVC and made a
+  pull request to add this feature.
+
+- [Vera Sativa](https://github.com/verasativa/) added directory support to the
+  `dvc import-url` function- and she was our 100th contributor, so she won her
+  own DeeVee the owl.
+
+![](../uploads/images/2020-01-17/odd_with_deevee.png 'Vera and team =500')_Vera
+(center, flashing a peace sign) thanked us with this lovely picture of DeeVee
+and her team, [Odd Industries](https://odd.co)._
+
+If any of this sounds fun to you, please be in touch over
+[email](mailto:info@dvc.org) (and you can also reach us on
+[Twitter](https://twitter.com/dvcorg) and our
+[Discord Channel](https://discordapp.com/invite/dvwXA2N)). We look forward to
+connecting with you!
diff --git a/content/blogs/2020-05-14-may-20-dvc-heartbeat.md b/content/blogs/2020-05-14-may-20-dvc-heartbeat.md
new file mode 100644
index 0000000000..0ce0514ca6
--- /dev/null
+++ b/content/blogs/2020-05-14-may-20-dvc-heartbeat.md
@@ -0,0 +1,180 @@
+---
+title: May ’20 DVC❤️Heartbeat
+date: 2020-05-14
+description: >
+  Catch up on new DVC releases, talks, and projects in our community. This
+  month, learn about new features in the DVC 1.0 release, ways to get involved,
+  and more from the intersection of data science and software engineering.
+
+descriptionLong: >
+  Every month we share news, findings, interesting reads, community takeaways,
+  and everything else along the way.
+
+  Look here for updates about [DVC](https://dvc.org), our journey as a startup,
+  projects by our users and big ideas about best practices in ML and data
+  science.
+picture: 2020-05-14/May_20_Heartbeat.png
+pictureComment: A big hello from DVC mascot DeeVee.
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/dvc-heartbeat-may-20/391
+tags:
+  - Heartbeat
+  - Plots
+  - MLOps
+  - Meetup
+  - Google Season of Docs
+  - Ambassador
+---
+
+Welcome to the May Heartbeat, our [monthly roundup of cool happenings](#news),
+[new releases](#new-releases), [good reads](#from-the-community) and other
+noteworthy developments the DVC community.
+
+## News
+
+**DVC turns 3.** On May 4th, we celebrated DVC's third birthday! Fearless leader
+Dmitry Petrov
+[wrote a retrospective](https://dvc.org/blog/dvc-3-years-and-1-0-release) about
+how the team has grown and what we've learned from our users, contributors, and
+colleagues. Thanks to everyone who celebrated with us!
+
+**Ambassador program launched.** DVC has just kicked off our ambassador program
+with the help of our first ambassador,
+[Marcel Ribeiro-Dantas](https://twitter.com/messages/40813700-894970070358564864).
+Marcel is an early-stage researcher at the Institut Curie, a veteran
+[ambassador of the Fedora Project](https://fedoraproject.org/wiki/User:Mribeirodantas),
+and a [data science blogger](http://mribeirodantas.me/). Becoming an ambassador
+is a way for folks who are passionate about contributing to the DVC community to
+get recognized for their efforts. It's also a way for us to help volunteers with
+financial support for meetups and travel, as well as chances to work more
+closely with our team. The program is ideal for anyone who already likes
+blogging about DVC, contributing code, and hosting get-togethers (virtual or
+otherwise), but especially advanced students and early career data scientists
+and engineers!
+[Learn more about it here.](https://dvc.org/blog/dvc-ambassador-program-announcement)
+
+**DVC is part of 2020 Google Season of Docs.** Another way to get involved with
+DVC is through
+[Google Season of Docs](https://developers.google.com/season-of-docs), a program
+we're participating in for the second year in a row. This program is for
+technical writers to get paid experience working with the DVC team in fall 2020.
+Right now, we're accepting proposals from interested writers.
+[Find out more here.](https://dvc.org/blog/gsod-ideas-2020)
+
+**5000 GitHub Stars.** It finally happened- we passed 5,000 stars
+[on our GitHub repo!](https://github.com/iterative/dvc)
+
+![Animated GIF](https://media.giphy.com/media/igWE67cPgTrWwXq4Nz/giphy.gif)
+
+## New releases
+
+Coincident with DVC's 3rd birthday, we shared a pre-release of DVC 1.0. The
+release is expected in a few weeks, but you can experiment with 1.0 now (and
+make [tickets in our project repo](https://github.com/iterative/dvc) if you get
+a bug 🐛). Some major new features include:
+
+- **Run cache**, a cache of pipelines you've reproduced on your local workspace.
+  If you re-run `dvc repro` on a pipeline version that's already been executed,
+  run cache will save you compute time by returning the cached result.
+
+- **Multi-stage DVC files**. Users reported that their DVC pipelines changed a
+  lot, so we've made pipeline `.dvc` files more human-readable and editable for
+  fast redesigns.
+
+- **Plots** We've got plots powered by
+  [Vega-Lite](https://vega.github.io/vega-lite/) for making beautiful
+  vizualizations comparing model performance across commits! Developer Paweł
+  Redzyński is hard at work:
+
+https://twitter.com/Paffciu1/status/1260119918525194241
+
+You can read more about the big updates coming in DVC 1.0
+[in our birthday blog](https://dvc.org/blog/dvc-3-years-and-1-0-release#dvc-10-is-the-result-of-3-years-of-learning).
+
+## From the community
+
+Developers weren't the only ones hustling this month...
+
+**First ever virtual DVC Meetup.** Marcel, our new ambassador, lead an
+initiative to
+[organize a virtual meetup](https://tulu.la/events/dvc-virtual-meetup-2020-00032c)!
+Marcel shared his latest scientific work about creating a
+[new comprehensive dataset about mobility](https://www.sciencedirect.com/science/article/pii/S2352340920305928?via%3Dihub)
+during the COVID-19 pandemic and then passed off the mic to our two guest
+speakers. Data scientist [Elizabeth Hutton](https://github.com/ehutt) spoke how
+she was building a workflow for her NLP team with DVC, and
+[DAGsHub](https://dagshub.com/) co-founder
+[Dean Pleban](https://twitter.com/DeanPlbn) shared his custom remote file system
+setup for modeling Reddit post popularity. It was quite well-attended for our
+first ever virtual hangout: we logged 40 individual logins to the meetup with
+more than 30 people staying the whole time! A video of the meetup is
+[on the event page](https://tulu.la/events/dvc-virtual-meetup-2020-00032c), so
+you can still check out the talks and discussion we enjoyed.
+
+https://twitter.com/DeanPlbn/status/1258475031530790916
+
+**Some blogs we like.** As usual, there's a lot of share-worthy writing in the
+data science and MLOps space:
+
+- [Tania Allard](https://twitter.com/ixek) wrote an intensely readable,
+  extremely sharp guide to practical steps anyone can take to improve the
+  reproducibility of their ML projects. She really nails the complexity of the
+  workflow and the importance of decoupling code and data (which we obviously
+  agree with very much 😏). The graphics are also 💯- Tania is a developer
+  advocate to follow.
+
+<external-link
+href="https://dev.to/azure/10-top-tips-for-reproducible-machine-learning-36g0"
+title="10 top tips for reproducible Machine Learning"
+description="The one where you get some advice to make your workflows more reproducible"
+link="dev.to"
+image="../uploads/images/2020-05-14/dev_logo.png"/>
+
+- [Vimarsh Karbhari](https://medium.com/@vimarshk) blogged about how teams that
+  work with data can strategize better about versioning their data and analysis
+  pipelines. On the opposite end of giving very practical recommendations,
+  Vimarsh stresses a deliberate and careful approach. He emphasizes how the
+  team's choices should depend on factors like project maturity and how much
+  flexibility is going to be needed. It's a solid overview of how to begin
+  thinking about MLOps at a high level.
+
+<external-link
+href="https://medium.com/acing-ai/ml-ops-data-science-version-control-5935c49d1b76"
+title="ML Ops: Data Science Version Control"
+description="Data versioning primer for model, data and code."
+link="medium.com"
+image="../uploads/images/2020-05-14/acing_ai.png"/>
+
+- Over at [AutoRegresed](https://www.autoregressed.com/), Jack Pitts shared a
+  thorough tutorial about using [Pipenv](https://pypi.org/project/pipenv/), DVC
+  and Git together. As a trio, this manages dependencies and versions the
+  working environment, source code, dataset _and_ trained models. It's not only
+  a cool use case, but a very clear step-by-step explanation that should be easy
+  to try at home. Stay till the end for a neat trick about deploying a model as
+  a web service with Pipenv and DVC.
+
+<external-link
+href="https://www.autoregressed.com/blog/pipenv-and-dvc-reproducibility-in-data-science"
+title="Pipenv and DVC: Reproducibility in Data Science"
+description="Without standards and tools to easily reproduce models, Data Science teams can become bogged down in technical debt that will make it difficult to deploy and iterate on models. "
+link="autoregressed.com"
+image="../uploads/images/2020-05-14/ar_logo.jpg"/>
+
+## Nice tweets
+
+Last, here are some of our favorite tweets to read this past month:
+
+https://twitter.com/braaannigan/status/1257918525345234949
+
+<!-- https://twitter.com/josh_wills/status/1249774857614553097 -->
+
+https://twitter.com/tcgarvin/status/1258855168436813826
+
+_Thank you, thank you very much._
+
+![Thank You Very Much GIF by The Wiggles](https://media.giphy.com/media/gJ2sDSKAQHUCIYUFhx/giphy.gif)
+
+As always, we want to hear what you're making with DVC and what you're reading.
+Tell us in the blog comments, and be in touch on
+[Twitter](https://twitter.com/DVCorg) and
+[Discord channel](https://dvc.org/chat). Happy coding!
diff --git a/content/blogs/2020-05-26-may-20-community-gems.md b/content/blogs/2020-05-26-may-20-community-gems.md
new file mode 100644
index 0000000000..78d00dee74
--- /dev/null
+++ b/content/blogs/2020-05-26-may-20-community-gems.md
@@ -0,0 +1,132 @@
+---
+title: May '20 Community Gems
+date: 2020-05-26
+description: >
+  A roundup of technical Q&A's from the DVC community. This month, we discuss
+  development best practices, sharing models and data across projects, and using
+  DVC with teams.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month, we discuss
+  development best practices, sharing models and data across projects, and using
+  DVC with teams.
+picture: 2020-05-26/May_20_Gems_Header.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/may-20-community-gems/398
+tags:
+  - Community Gems
+  - Cache
+  - Google Cloud Storage
+  - Import
+---
+
+## Discord gems
+
+Here are some Q&A's from our Discord channel that we think are worth sharing.
+
+### Q: [How do I completely delete a file from DVC?](https://discord.com/channels/485586884165107732/563406153334128681/710546561498873886)
+
+To stop tracking a file with DVC, you can simply delete the file and its
+corresponding `.dvc` file (if there is one) from your project. But, what if you
+want to entirely erase a file from DVC?
+
+After deleting the `.dvc` file, you'll usually want to
+[clear your DVC cache](https://dvc.org/doc/command-reference/gc#gc). Ordinarily,
+that's done with `dvc gc`. However, if there's any chance the file you wish to
+remove might be referenced by another commit (even under a different name), be
+sure to use the right flag: `dvc gc --all-commits`.
+
+If you want to remove a single `.dvc` file without doing a cache cleanup, look
+into the `.dvc` file and note the `md5` field inside. Then use this value to
+identify the corresponding file in your `.dvc/cache` and delete it. For example:
+if your target file has `md5`: 123456, the corresponding file in your cache will
+be `.dvc/cache/12/3456`.
+
+There's one last case worth mentioning: what if you're deleting a file inside a
+DVC-tracked folder? For example, say you've previously run
+
+```dvc
+dvc add data_dir
+```
+
+and now want to remove a single file (say, `image_1.png`) from `data_dir`. When
+DVC starts tracking a directory, it creates a corresponding `.dir` file inside
+`.dvc/cache` that lists every file and subfolder, as well as an `md5` for each,
+in a JSON format. You'll want to locate this `.dir` file in the cache, and then
+find the entry corresponding to `image_1.png`. It'll give the `md5` for
+`image_1.png`. Finally, go back to `.dvc/cache`, identify the file corresponding
+to that `md5`, and delete it. For detailed instructions about `.dir` files,
+where to find them and how they're used,
+[see our docs about the structure of the cache](https://dvc.org/doc/user-guide/dvc-internals#structure-of-cache-directory).
+
+Having said all this... please know that in the future, we plan to support a
+function like `git rm` that will allow easier deletes from DVC!
+
+### Q: [Is it safe to add a custom file to my DVC remote?](https://discord.com/channels/485586884165107732/563406153334128681/707551737745244230https://discord.com/channels/485586884165107732/563406153334128681/707551737745244230)
+
+Definitely. Some people add additional files to their DVC remote, like a README
+to explain to teammates what the folder is being used for. Having an additional
+file in the remote that isn't part of DVC tracking won't pose any issues. You
+would only encounter problems if you were manually modifying or deleting
+contents of the remote managed by DVC.
+
+### Q: [Are there limits to how many files DVC can handle? My dataset contains ~100,000 files.](https://discord.com/channels/485586884165107732/563406153334128681/706538115048669274)
+
+We ourselves have stored datasets containing up to 2 million files, so 100,000
+is certainly feasible. Of course, the larger your dataset, the more time data
+transfer operations will take. Luckily, we have a
+[DVC 1.0 contains several data transfer optimizations](https://dvc.org/blog/dvc-3-years-and-1-0-release#data-transfer-optimizations)
+to substantially reduce the time needed to `dvc pull / push / status -c / gc -c`
+for very large datasets.
+
+### Q: [Two developers on my team are doing `dvc push` to the same remote. Should they `dvc pull` first?](https://discord.com/channels/485586884165107732/563406153334128681/704211629075857468)
+
+It's safe to push simultaneously, no `dvc pull` needed. While some teams might
+be in the habit of frequently pulling, like in Git flow, there are less risks of
+"merge conflicts" in DVC. That's because DVC remotes stores files indexed by
+`md5`s, so there's usually a very low probability of a collision (if two
+developers have two different versions of a file, they'll be stored as two
+separate files in the DVC remote- so no merge conflicts).
+
+### Q: [What are `*.tmp` files in my DVC remote?](https://discord.com/channels/485586884165107732/563406153334128681/698163554095857745)
+
+Inside your DVC remote, you might see `.tmp` files from incomplete uploads. This
+can happen if a user killed a process like `dvc push`. You can safely remove
+them; for example, if you're using an S3 bucket, `aws s3 rm ... *.tmp` will do
+the trick.
+
+One caveat: before you delete, make sure no one is actively running `dvc push`.
+
+### Q: [I'm using a Google Cloud Platform (GCP) bucket as a DVC remote and getting an error. Any ideas?](https://discord.com/channels/485586884165107732/485596304961962003/705131622537756702)
+
+If you're getting the error,
+
+```
+ERROR: unexpected error - ('invalid_grant: Bad Request', '{\n "error": "invalid_grant",\n "error_description": "Bad Request"\n}')
+```
+
+something is going wrong with your GCP authentication! A few things to check:
+first,
+[check out our docs](https://dvc.org/doc/command-reference/remote/add#supported-storage-types)
+to `dvc remote add` a Google Cloud bucket as your remote. Note that before DVC
+can use this type of remote, you have to configure your credentials through the
+GCP CLI
+([see docs here](https://dvc.org/doc/command-reference/remote/add#supported-storage-types)).
+
+If you're still getting an error, DVC probably can't find the `.json`
+credentials file for your GCP bucket. Try authenticating using
+`gcloud beta auth application-default login`. This command obtains your access
+credentials and places them in a `.json` in your local workspace.
+
+### Q: [I'm working on several projects that all need involve the same saved model. One project trains a model and pushes it to cloud storage with `dvc push`, and another takes the model out of cloud storage for use. What's the best practice for doing this with DVC?](https://discord.com/channels/485586884165107732/485596304961962003/708318821253120040)
+
+One of DVC's goals is to make it easy to move models and datasets in and out of
+cloud storage. We had this in mind when we designed the function `dvc import` -
+it lets you reuse artifacts from one project to another. And you can quickly
+synchronize an artifact, like a model or dataset, with its latest version using
+`dvc update`. Check out our
+[docs about `import`](https://dvc.org/doc/command-reference/import), and also
+our [data registry use case](https://dvc.org/doc/use-cases/data-registries) for
+an example of sharing artifacts across projects.
+
+![](../uploads/images/2020-05-26/data-registry.png) _Using DVC for sharing
+artifacts like datasets and models across projects and teammates._
diff --git a/content/blogs/2020-06-08-june-20-dvc-heartbeat.md b/content/blogs/2020-06-08-june-20-dvc-heartbeat.md
new file mode 100644
index 0000000000..1786c5d2ee
--- /dev/null
+++ b/content/blogs/2020-06-08-june-20-dvc-heartbeat.md
@@ -0,0 +1,214 @@
+---
+title: June ’20 Heartbeat
+date: 2020-06-08
+description: >
+  Catch up on new DVC releases, talks, and projects in our community. This
+  month, learn about finishing touches on DVC 1.0, DVC in biomedical research,
+  recommended reading and upcoming MLOps talks.
+descriptionLong: >
+  Catch up on new DVC releases, talks, and projects in our community. This
+  month, learn about finishing touches on DVC 1.0, DVC in biomedical research,
+  recommended reading and upcoming MLOps talks.
+picture: 2020-06-08/June_20_Heartbeat_small.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/june-20-heartbeat/404
+tags:
+  - Heartbeat
+  - Udemy Course
+  - Pipelines
+  - Plots
+  - MLOps
+---
+
+Welcome to the June Heartbeat, our monthly roundup of cool happenings,
+[good reads](#from-the-community) and
+[up-and-coming developments](#coming-up-soon) in the DVC community.
+
+## News
+
+In the beginning of May, we
+[pre-released DVC 1.0](https://dvc.org/blog/dvc-3-years-and-1-0-release). Ever
+since, we've been putting the final touches on 1.0- wrapping up features, fixing
+bugs 🐛, and responding to feedback from intrepid users
+[trying the pre-release](https://dvc.org/doc/install/pre-release). To recap,
+here are some of the big features coming:
+
+- **Plots powered by Vega-Lite** We're building
+  [functions for visualizing metrics](https://dvc.org/doc/command-reference/plots#plots)
+  in your project, as well as comparing metrics across commits. We chose
+  [Vega-Lite plots](https://github.com/vega/vega-lite) because they're
+  high-level, compatible with ML projects written in any language, and beautiful
+  by default.
+
+- **Human readable and writeable pipelines.** We're reworking pipelines so you
+  can modify dependencies, outputs, metrics, plots, and entire stages easily:
+  via manual edits to a `.yaml` pipeline fines. This redesign will consolidate
+  pipeline `.dvc` files into a single file (yay, simpler working directory). No
+  worries for pipeline enthusiasts- DVC 1.0 is backwards compatible, so your
+  existing projects won't be interrupted.
+
+- **Run cache.** One of the most exciting features is the run-cache, a local
+  record of pipeline versions that have previously been run and the outputs of
+  those runs. It can seriously cut down on compute time if you find yourself
+  repeating pipeline executions. For our CI/CD users, it also offers a way to
+  save the output of your pipeline- like models or results-
+  [without auto-commits](https://stackoverflow.com/questions/61245284/will-you-automate-git-commit-into-ci-cd-pipline-to-save-dvc-run-experiments).
+
+DVC 1.0 work has been our top priority this past month, and we are _extremely
+close_ to the releae. Think 1-2 weeks!
+
+Another neat announcement: DVC moved up on
+[ThoughtWorks Technology Radar](https://www.thoughtworks.com/radar/tools)! To
+quote ThoughtWorks:
+
+> In 2018 we mentioned DVC in conjunction with the versioning data for
+> reproducible analytics. Since then it has become a favorite tool for managing
+> experiments in machine learning (ML) projects. Since it's based on Git, DVC is
+> a familiar environment for software developers to bring their engineering
+> practices to ML practice. Because it versions the code that processes data
+> along with the data itself and tracks stages in a pipeline, it helps bring
+> order to the modeling activities without interrupting the analysts’ flow.
+
+And here we are on the radar, in the Trial zone:
+
+![](../uploads/images/2020-06-08/radar.png) _Blip, blip, blip!_
+
+We are honored. In fact, this was validating in several ways. We field a lot of
+questions about our decision to build around Git, rather than creating a
+platform. It's awesome to know our approach is resonating with teams at the
+intersection of ML and software development. Thanks, ThoughtWorks!
+
+Last up in company news: you might recall that in early May, we hosted an online
+meetup. [Marcel Ribeiro-Dantas](http://mribeirodantas.me) hosted guest talks
+from [Elizabeth Hutton](https://github.com/ehutt) and
+[Dean Pleban](https://twitter.com/DeanPlbn)- we heard about constructing a new
+COVID-19 dataset, using DVC with transformer language models, and building
+custom cloud infrastructure for MLOps. There's also Q&A with the DVC team, where
+we fielded audience questions. A video of the meetup is available now, so check
+it out if you missed the event.
+
+https://youtu.be/19GMtrFykSU
+
+## From the community
+
+As usual, there's a ton of noteworthy action in the DVC community.
+
+[Derek Haynes](https://twitter.com/dhaynes23), MLOps expert and new
+[DVC Ambassador](https://dvc.org/blog/dvc-ambassador-program-announcement)-
+wrote an excellent overview of using
+[GitHub CodeSpaces](https://github.com/features/codespaces/). CodeSpaces is a
+new development environment (currently in beta) that we're eagerly watching. As
+Derek shows in his blog, it lets you have a Jupyter Notebook experience without
+sacrificing on development standards- he uses
+[whisk](https://docs.whisk-ml.org/en/latest/) to structure the project and
+manage Python package dependencies, and DVC to version the model training
+pipeline.
+
+This use case is telling in the
+[battle over Jupyter notebooks](https://towardsdatascience.com/the-case-against-the-jupyter-notebook-d4da17e97243):
+we might just be able to have both a notebook _and_ mature project management.
+Give Derek's blog a read and tell us what you think.
+
+<external-link
+href="https://dlite.cc/2020/05/26/github-codespaces-machine-learning.html"
+title="GitHub Codespaces for Machine Learning"
+description="With Codespaces, contributors can spin up a ready-to-go GitHub project-specific dev environment in the cloud. In this post, I’ll show how to give potential contributors a graceful start by configuring Codespaces for an ML project."
+link="dlite.cc"
+image="../uploads/images/2020-06-08/derek_haynes.jpg"/>
+
+DVC Ambassador Marcel gave a tutorial about DVC to a bioinformatics student
+group, and then an even bigger talk at the Federal University of Rio Grande de
+Norte. His talk focused on how to use DVC in the context of scientific
+reproducibility- specifically, large biological datasets, which are often
+transformed and processed several times before ML models are fit. In my
+experience, Git-flow is severely underutilized in life sciences research, so
+it's exciting to see Marcel's ideas getting a big audience.
+
+https://twitter.com/ppgeecufrn/status/1263260554443005954
+
+Also, Marcel is the first author of a new scientific paper about mobility data
+across 131 countries during the COVID-19 pandemic. The preprocessing pipeline is
+versioned with DVC. We don't know how Marcel gets this much done.
+
+<external-link
+href="https://www.sciencedirect.com/science/article/pii/S2352340920305928"
+title="Dataset for country profile and mobility analysis in the assessment of COVID-19 pandemic"
+description="M. Ribeiro-Dantas, G. Alves, R.B. Gomes, L.C.T. Bezerra, L. Lima and I. Silva"
+link="sciencedirect.com"
+image="../uploads/images/2020-06-08/data_in_brief_logo.jpeg"/>
+
+Also just released is a scientific paper by Christoph Jansen et al. about a
+framework for computational reproducibility in the life sciences that integrates
+DVC. The framework is called
+[Curious Containers](https://github.com/curious-containers/curious-containers)-
+definitely worth checking out for biomedical researchers interested in deep
+learning.
+
+<external-link
+href="https://www.sciencedirect.com/science/article/abs/pii/S0167739X19318096"
+title="Curious Containers: A framework for computational reproducibility in life sciences with support for Deep Learning applications"
+description="C. Jansen, J. Annuscheit, B. Schilling, K. Strohmenger, M. Whitt, F. Bartusch, C. Herta, P. Hufnagl, and D. Krefting"
+link="sciencedirect.com"
+image="../uploads/images/2020-06-08/fgcs_cover.jpg"/>
+
+In other work of vital interest to the good of humanity, this month has seen
+some awesome applictions of the
+[public Reddit dataset we released in February](https://dvc.org/blog/a-public-reddit-dataset).
+The dataset is designed for an NLP task of mighty importance: will Redditors
+vote that the poster is an asshole, or not?
+
+Daniele Gentile beat our benchmark classifier (62% accuracy, but not bad for
+logistic regression!) with Doc2Vec embeddings and a 500-neuron network. He got
+71% accuracy on held out data- nice! His blog is a fun read, and code's included
+if you want to follow along.
+
+<external-link
+href="https://medium.com/@danielegentili/artificial-intelligence-confirms-you-are-an-a-hole-e8eef354dc2"
+title="Artificial Intelligence confirms you are an a**hole"
+description="Q-LO is a small artificial brain that can determine if you are the a**hole or not in a situation from its description."
+link="medium.com"
+image="../uploads/images/2020-06-08/medium_logo.png"/>
+
+Elsewhere on the internet, data scientist Dan Cassin delivered this beautiful
+tweet:
+
+https://twitter.com/Dan_Cassin/status/1256999648901787648
+
+Last, I want to point you to two other excellent blogs.
+[Venelin Valkov](https://github.com/curiousily) released a blog,
+[Reproducible machine learning and experiment tracking pipeline with Python and DVC](https://www.curiousily.com/posts/reproducible-machine-learning-and-experiment-tracking-pipiline-with-python-and-dvc/),
+that contains not only a detailed sample project but a livecoding video!
+
+https://youtu.be/6_kK6wRtzhk
+
+[Matthew McAteer](https://www.linkedin.com/in/matthewmcateer0/) revisited the
+famous 2015
+[Hidden Technical Debt in Machine Learning Systems](https://papers.nips.cc/paper/5656-hidden-technical-debt-in-machine-learning-systems.pdf)
+paper to ask which recommendations still work five years later. It's pretty
+great-
+[please read it](https://matthewmcateer.me/blog/machine-learning-technical-debt/).
+
+![](../uploads/images/2020-06-08/spongebob.png) _Meme by Matthew McAteer. Click
+to enlarge._
+
+## Coming up soon
+
+There are a couple of events to look forward to in the next few weeks. I'll be
+speaking at two conferences: first,
+[MLOps World](https://mlopsworld.com/program/) about CI/CD and ML. Next, I'm
+[organizing a workshop](https://computationalaudiology.com/the-critical-role-of-computing-infrastructure-in-computational-audiology/)
+at the Virtual Conference on Computational Audiology. To get ready, I'm
+gathering resources about good computing practices for scientists and biomedical
+research labs-
+[contributions are welcome](https://github.com/andronovhopf/Lab_Computing_Resources).
+
+Another talk on our radar is at EuroPython 2020. Engineer
+[Hongjoo Lee will be talking about building a CI/CD workflow for ML with DVC](https://ep2020.europython.eu/talks/CXG7TcM-automating-machine-learning-workflow-with-dvc/)-
+we're very interested to learn about their approach.
+
+Lastly, [ML REPA](http://ml-repa.ru/) leader and new DVC Ambassador
+[Mikhail Rozhkov](https://twitter.com/mnrozhkov) is working on a Udemy course
+about DVC. Look for more updates this summer!
+
+Thanks for reading this month. As always, we're proud of the ways our community
+works for better, more rigorous ML.
diff --git a/content/blogs/2020-06-22-dvc-1-0-release.md b/content/blogs/2020-06-22-dvc-1-0-release.md
new file mode 100644
index 0000000000..dc58939f72
--- /dev/null
+++ b/content/blogs/2020-06-22-dvc-1-0-release.md
@@ -0,0 +1,284 @@
+---
+title: 'DVC 1.0 release: new features for MLOps'
+date: 2020-06-22
+description: >
+  Today we're releasing DVC 1.0 with new exciting features that users were
+  waiting for ❤️. Find all the details in this blog post.
+descriptionLong: >
+  Today we're releasing DVC 1.0. It brings new exciting features that users were
+  waiting for ❤️. DVC is a more mature product now, with stable release cycles
+  and benchmarks. Find all the details in this blog post.
+picture: 2020-06-22/release.png
+pictureComment: DVC 1.0 release
+author: dmitry_petrov
+commentsUrl: https://discuss.dvc.org/t/dvc-1-0-release/412
+tags:
+  - Release
+  - MLOps
+  - DataOps
+  - CI/CD
+---
+
+## Introduction
+
+3 years ago, I was concerned about good engineering standards in data science:
+data versioning, reproducibility, workflow automation — like continuous
+integration and continuous delivery (CI/CD), but for machine learning. I wanted
+there to be a "Git for data" to make all this possible. So I created DVC (Data
+Version Control), which works as version control for data projects.
+
+Technically, DVC codifies your data and machine learning pipelines as text
+metafiles (with pointers to actual data in S3/GCP/Azure/SSH), while you use Git
+for the actual versioning. DevOps folks call this approach GitOps or, more
+specifically, in this case _DataOps_ or _MLOps_.
+
+The new DVC 1.0. is inspired by discussions and contributions from our community
+of data scientists, ML engineers, developers and software engineers.
+
+## DVC 1.0
+
+The new DVC 1.0 is inspired by discussions and contributions from our community
+— both fresh ideas and bug reports 😅. All these contributions, big and small,
+have a collective impact on DVC's development. I'm confident 1.0 wouldn't be
+possible without our community. They tell us what features matter most, which
+approaches work (and which don't!), and what they need from DVC to support their
+ML projects.
+
+A few weeks ago we announced the 1.0 pre-release. After lots of helpful feedback
+from brave users, it's time to go live. Now, DVC 1.0 is available with all the
+standard installation methods including `pip`, `conda`, `brew`, `choco`, and
+system-specific packages: deb, rpm, msi, pkg. See https://dvc.org/doc/install
+for more details.
+
+## New features
+
+It took us 3 years to finalize the requirements for DVC 1.0 and stabilize the
+commands (API) and DVC file formats. Below are the major lessons that we have
+learned in 3 years of this journey and how these are reflected in the new DVC.
+
+### [Multi-stage DVC files](https://github.com/iterative/dvc/issues/1871)
+
+Our users taught us that ML pipelines evolve much faster than data engineering
+pipelines with data processing steps. People need to change the commands of the
+pipeline often and it was not easy to do this with the old DVC-files.
+
+In DVC 1.0, the DVC metafile format was changed in three big ways. First,
+instead of multiple DVC "stage files" (`*.dvc`), each project has a single
+`dvc.yaml` file. By default, all stages go in this single YAML file.
+
+Second, we made clear connections between the `dvc run` command (a helper to
+define pipeline stages), and how stages are defined in `dvc.yaml`. Many of the
+options of `dvc run` are mirrored in the metafile. We wanted to make it far less
+complicated to edit an existing pipeline by making `dvc.yaml` more human
+readable and writable.
+
+Third, file and directory hash values are no longer stored in the pipeline
+metafile. This approach aligns better with the GitOps paradigms and simplifies
+the usage of DVC by tremendously improving metafile human-readability:
+
+```yaml
+stages:
+  process:
+    cmd: ./process_raw_data raw_data.log users.csv
+    deps:
+      - raw_data.log
+    params:
+      - process_file
+      - click_threshold
+    outs:
+      - users.csv
+  train:
+    cmd: python train.py
+    deps:
+      - users.csv
+    params:
+      - epochs
+      - log_file
+      - dropout
+    metrics:
+      - logs.csv
+      - summary.json:
+          cache: false
+    outs:
+      - model.pkl
+```
+
+All of the hashes have been moved to a special file, `dvc.lock`, which is a lot
+like the old DVC-file format. DVC uses this lock file to define which data files
+need to be restored to the workspace from data remotes (cloud storage) and if a
+particular pipeline stage needs to be rerun. In other words, we're separating
+the human-readable parts of the pipeline into `dvc.yaml`, and the auto-generated
+"machine" parts into `dvc.lock`.
+
+Another cool change: the auto-generated part (`dvc.lock`) doesn't necessarily
+have to be stored in your Git repository. The new run-cache feature eliminates
+the need of storing the lock file in Git repositories. That brings us to our
+next big feature:
+
+### [Run cache](https://github.com/iterative/dvc/issues/1234)
+
+We built DVC with a workflow in mind: one experiment to one commit. Some users
+love it, but this approach gets clunky fast for others (like folks who are
+grid-searching a hyperparameter space). Making Git commits for each ML
+experiment was a requirement with the old DVC, if you wanted to snapshot your
+project or pipelines on each experiment. Moving forward, we want to give users
+more flexibility to decide how often they want to commit.
+
+We had an insight that data remotes (S3, Azure Blob, SSH etc) can be used
+instead of Git for storing the codified meta information, not only data. In DVC
+1.0, a special structure is implemented, the run-cache, that preserves the state
+(including all the hashes). Basically, all the information that is stored in the
+new `dvc.lock` file is replicated in the run-cache.
+
+The advantage of the run-cache is that pipeline runs (and output file versions)
+are not directly connected to Git commits anymore. The new DVC can store all the
+runs in the run-cache, even if they were never committed to Git.
+
+This approach gives DVC a "long memory" of DVC stages runs. If a user tries to
+run a stage that was previously run (whether committed to Git or not), then DVC
+can return the result from the run-cache without rerunning it. It is a useful
+feature for a hyperparameter optimization stage — when users return to the
+previous sets of the parameters and don't want to wait for ML retraining.
+
+Another benefit of the run-cache is related to CI/CD systems for ML, which is a
+holy grail of MLOps. The long memory means users don't have to make auto-commits
+in their CI/CD system side - see
+[this Stackowerflow question](https://stackoverflow.com/questions/61245284/will-you-automate-git-commit-into-ci-cd-pipline-to-save-dvc-run-experiments).
+
+### [Plots](https://github.com/iterative/dvc/issues/3409)
+
+Countless users have asked when we'd support metrics visualizations. It became
+clear that metrics and their visualization are an essential part of _DataOps_,
+especially when it comes down to navigation around ML models, datasets and
+experiments. Now it's here: DVC 1.0 introduces metrics file visualization
+commands, `dvc plots diff` and `dvc plots show`. This is brand-new functionality
+in DVC and it's _in experimental mode_ now.
+
+This function is designed not only for visualizing the current state of your
+project, but also for comparing plots across your Git history. Users can
+visualize how, for example, their model accuracy in the latest commit differs
+from another commit (or even multiple commits).
+
+```dvc
+$ dvc plots diff -d logs.csv HEAD HEAD^ d1e4d848 baseline_march
+file:///Users/dmitry/src/plot/logs.csv.html
+$ open logs.csv.html
+```
+
+![](../uploads/images/2020-05-04/dvc-plots.svg)
+
+```dvc
+$ dvc plots diff -d logs.csv HEAD HEAD^ d1e4d848 baseline_march \
+        -x loss --template scatter
+file:///Users/dmitry/src/plot/logs.csv.html
+$ open logs.csv.html
+```
+
+![](../uploads/images/2020-05-04/dvc-plots-scatter.svg)
+
+DVC plots are powered by the
+[Vega-Lite graphic library](https://vega.github.io/vega-lite/). We picked Vega
+because it's high-level to manipulate, compatible with all ML frameworks, and
+looks great out of the box. However, you don't have to know Vega to use DVC
+plots: we've provided default templates for line graphs, scatterplots, and
+confusion matrices, so you can just point DVC plots to your metrics and go.
+
+### [Data transfer optimizations](https://github.com/iterative/dvc/issues/3488)
+
+In _DataOps_, data transfer speed is hugely important. We've done substantial
+work to optimize data management commands, like
+`dvc pull / push / status -c / gc -c`. Now, based on the amount of data to move,
+DVC can choose the optimal strategy for traversing your data remote.
+
+[Mini-indexes](https://github.com/iterative/dvc/issues/2147) help DVC instantly
+check data directories instead of iterating over millions of files. This also
+speeds up adding/removing files to/from large directories.
+
+More optimizations are included in the release based on our profiling of
+performance bottlenecks. More detailed
+[benchmark reports](https://gist.github.com/pmrowla/338d9645bd05df966f8aba8366cab308)
+show how many seconds it takes to run specific commands on a directory
+containing 2 million images.
+
+![](../uploads/images/2020-05-04/benchmarks.svg)
+
+### [Hyperparameter tracking](https://github.com/iterative/dvc/issues/3393)
+
+This feature was actually released in the last DVC 0.93 version (see the
+[params docs](https://dvc.org/doc/command-reference/params). However, it is an
+important step to support configuration files and ML experiments in a more
+holistic way.
+
+The parameters are a special type of dependency in the pipelines. This is the
+way of telling DVC that a command depends not on a file (`params.yaml`) but on a
+particular set of values in the file:
+
+```dvc
+$ dvc run -d users.csv -o model.pkl \
+        --params lr,train.epochs,train.layers \
+        python train.py
+```
+
+The `params.yaml` file is the place where the parameters are stored:
+
+```yaml
+lr: 0.0041
+
+train:
+  epochs: 70
+  layers: 9
+
+process:
+  thresh: 0.98
+  bow: 15000
+```
+
+### Stable releases cycles
+
+Today, many teams use DVC in their daily job for modeling and as part of their
+production MLOps automation systems. Stability plays an increasingly important
+role.
+
+We've always prioritized agility and speed in our development process. There
+have been weeks with two DVC releases! This approach had a ton of benefits in
+terms of learning speed and rapid feedback from users.
+
+Now we're seeing signs that it's time to shift gears. Our API is stabilized and
+version 1.0 is built with our long-term vision in mind. Our user-base has grown
+and we have footing with mature teams - teams that are using DVC in
+mission-critical systems. That's why we're intentionally going to spend more
+time on release testing in the future. It might increase the time between
+releases, but the quality of the tool will be more predictable.
+
+Additionally, we've already implemented a benchmark testing framework to track
+performance across potential releases: https://iterative.github.io/dvc-bench/ In
+this website, anyone can see the performance improvements and degradations for
+every release candidate:
+
+![](../uploads/images/2020-06-22/dvc-benchmark.png)
+
+### For more information on the new features...
+
+Each of these new features has a story that could fill a separate blog post - so
+that's what we'll be doing. We'll be posting more soon.
+[Peter Rowlands](https://github.com/pmrowla) will be writing a blog post about
+the performance optimization in DVC 1.0,
+[Paweł Redzyński](https://github.com/pared) about versioning and visualizing
+plots, [Saugat Pachhai](https://github.com/skshetry) about the new DVC file
+formats and pipelines, and [Ruslan Kuprieiev](https://github.com/efiop) about
+run-cache.
+
+Please stay in touch and subscribe to our newsletter in http://dvc.org.
+
+## Thank you!
+
+It's quite a journey to build an open source project in the ML/AI space. We're
+fortunate to have a community of DVC users, contributors and cheerleaders. All
+these folks tremendously help us to define, test and develop the project. We've
+reached this significant milestone of version 1.0 together and I hope we'll
+continue working on DVC and bringing the best practices of DataOps and MLOps to
+the ML/AI space.
+
+Thank you again! And please be in touch on
+[Twitter](https://twitter.com/DVCorg), and our
+[Discord channel](https://dvc.org/chat).
diff --git a/content/blogs/2020-06-26-scipy-2020-dvc-poster.md b/content/blogs/2020-06-26-scipy-2020-dvc-poster.md
new file mode 100644
index 0000000000..b58d3492bc
--- /dev/null
+++ b/content/blogs/2020-06-26-scipy-2020-dvc-poster.md
@@ -0,0 +1,231 @@
+---
+title: 'Packaging data and machine learning models for sharing'
+date: 2020-06-26
+description: >
+  A virtual poster for SciPy 2020 about sharing versioned datasets and ML models
+  with DVC.
+descriptionLong: >
+  A virtual poster for SciPy 2020 about sharing versioned datasets and ML models
+  with DVC.
+picture: 2020-06-26/SciPy_2020.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/packaging-data-and-machine-learning-models-for-sharing/423
+tags:
+  - Import
+  - SciPy
+  - Python
+  - Tutorial
+---
+
+When I was doing my Ph.D., every time I published a paper I shared a public
+GitHub repository with my dataset and scripts to reproduce my statistical
+analyses. While it took a bit of work to get the repository in good shape for
+sharing (cleaning up code, adding documentation), the process was
+straightforward: upload everything to the repo!
+
+But when I started working on deep learning projects, things got considerably
+more complicated. For example, in a
+[data journalism project I did with The Pudding](https://pudding.cool/2019/11/big-hair/),
+I wanted to understand how hair style (particularly size!) changed over the
+years. There were a lot of moving parts:
+
+- A public dataset of yearbook photos released and maintained by
+  [Ginosar et al.](https://people.eecs.berkeley.edu/~shiry/projects/yearbooks/yearbooks.html)
+- A deep learning model I trained to segment the hair in yearbook photos
+- A derivative dataset of "hair maps" for each photo in the original datasetr
+- All the code to train the deep learning model and analyse the derivative
+  dataset
+
+![](../uploads/images/2020-06-26/hairflow.png) _The parts of my big-hair-data
+project: an original public dataset, a model for segmenting the images, a
+derivative dataset of segment maps, and analysis scripts._
+
+How would you share this with a collaborator, or open it up to the public?
+Throwing it all in a GitHub repository was not an option. My model wouldn't fit
+on GitHub because it was over the 100 MB size limit. I also wanted to preserve a
+clear link between my derived dataset and the original- it should be obvious
+exactly how I got the public dataset. And if that public dataset were to ever
+change, I would ideally want it to be clear what version I used for my analyses.
+
+This blog is about several different ways of "releasing" data science projects,
+with an emphasis on preserving meaningful links about the origins of derived
+data and models. I'm not making any strong assumptions about whether project
+materials are relased within an organization (only to teammates, for example) or
+to the whole internet.
+
+Let's look at a few methods.
+
+# Method One: artifacts in the cloud
+
+When you work with big models and datasets, you often can't host them in a
+GitHub repo. But you can put them in cloud storage, and then provide a script in
+your GitHub repo to download them. For example, in the fantastic `gpt-2-simple`
+[project by Max Woolf](https://github.com/minimaxir/gpt-2-simple), Max stores
+huge GPT-2 models in Google Drive and provides a script to download a specified
+model to a user's local workspace if it isn't already there.
+
+Likewise, the [Nvidia StyleGAN release](https://github.com/NVlabs/stylegan)
+provides a hardcoded URL to their model in Google Drive storage. Both the
+`gpt-2-simple` and StyleGAN projects have custom scripts to handle these big
+downloads, and largely thanks to the work of the project maintainers, users only
+interact with the downloading process at a very high level.
+
+Considering some pros and cons of this approach:
+
+|               **Pros**                |            **Cons**            |
+| :-----------------------------------: | :----------------------------: |
+| It's easy to put a model in a bucket  |  Hardcoded links are brittle   |
+|        Works for pip packages         | Need to write custom functions |
+| No extra tools, just Python scripting |   Downloads aren't versioned   |
+
+# Method Two: Hubs, Catalogs & Zoos
+
+There are a (growing) number of websites willing to long-term host big models
+and datasets, plus relevant meta-data, code, and publications. Some even allow
+you to upload several versions of a project- it's not Git, for sure, but even
+basic version control is something.
+
+For example, [PyTorch Hub](https://pytorch.org/hub/) lets researchers publish
+trained models developed in the PyTorch framework, along with code and papers.
+It's easily searched and browsed, which makes projects discoverable.
+
+For a dataset analog, Kaggle is similar- they host user-submitted datasets and
+help other users find them. Both PyTorch Hub and Kaggle have APIs for
+programmatically downloading artifacts.
+
+|         **Pros**         |        **Cons**         |
+| :----------------------: | :---------------------: |
+| Browsable & discoverable |    Centrally managed    |
+|          Public          | Public (no granularity) |
+|   Good with big models   | Weak versioning support |
+
+# Method Three: Packaging with DVC
+
+[DVC](https://dvc.org), or Data Version Control, is a Python project for
+extending Git version control to large project artifacts like datasets and
+models. It's not a replacement for Git- DVC works _with_ Git!
+
+The basic idea is that your datasets and models are stored in a DVC repository,
+which can be any cloud storage or server of your choice. DVC creates metadata
+about file versions that can be tracked by Git and hosted on GitHub- so you can
+share your datasets and models like any GitHub project, with all the benefits of
+versioning. Let's look at a case study.
+
+## Creating a DVC project
+
+Say I have a project containing a dataset, model training code, and model.
+
+```dvc
+$ ls
+data.csv
+train.py
+model.pkl
+```
+
+Say our model and dataset are large and we want to track them with DVC. For
+remote storage, we want to use a personal S3 bucket. We would run:
+
+```dvc
+$ git init
+$ dvc init
+$ dvc remote add myremote s3://mybucket/myproject
+$ dvc add data.csv model.pkl
+$ dvc push
+```
+
+When I run these commands, I've initialized Git and DVC tracking. Next, I've set
+a DVC repository- my S3 bucket. Then I've added `data.csv` and `model.pkl` to
+DVC tracking. Finally, when I run `dvc push`, the model and dataset are pushed
+to the S3 bucket. On my local machine, two meta-files are created:
+`data.csv.dvc` and `model.pkl.dvc`. These can be tracked with Git!
+
+```dvc
+$ ls
+data.csv.dvc
+train.py
+model.pkl.dvc
+```
+
+So after setting a remote Git repository, `git add`, `commit` and `push` like
+usual (assuming you are a regualr Git user, that is):
+
+```dvc
+$ git remote add origin git@github.com:elle/myproject
+$ git add . && git commit -m "first commit"
+$ git push origin master
+```
+
+## Package management with DVC
+
+Now let's say one of my teammates wants to access my work so far- specifically,
+they want to see if another method for constructing features from raw data will
+help model accuracy. I've given them permission to access my GitHub repository.
+On their local machine, they'll run:
+
+```dvc
+$ dvc import https://github.com/elle/myproject data.csv model.pkl
+```
+
+This will download the latest version of the `data.csv` and `model.pkl`
+artifacts to their local machine, as well as the DVC metafiles `data.csv.dvc`
+and `model.pkl.csv` indicating the precise version and source.
+
+Collaborators can also download artifacts from previous versions, releases, or
+parallel feature branches of a project. For example, if I released a new version
+of my project with a Git tag (say `v.2.0.1`), collaborators can run
+
+```dvc
+$ dvc get --rev v.2.0.1 \
+    https://github.com/elle/myproject data.csv
+```
+
+Lastly, because `dvc import` maintains a link between the downloaded artifacts
+and my repository, collaborators can check for project updates with
+
+```dvc
+$ dvc update data.csv model.pkl
+```
+
+If new versions are detected, DVC automatically syncs the local workspace with
+those versions.
+
+## When should you do this?
+
+In my own experience releasing a large public dataset with DVC, I've seen
+several benefits:
+
+- Within an hour, someone found data points I'd been missing. It was
+  straightforward to make a new release after patching this error.
+- Several people modeled my dataset! Highly rewarding.
+- Since GitHub is a widely used platform for code sharing, it's a natural fit
+  for open source scientific projects and has little overhead for potential
+  collaborators
+
+To return to the pros and cons table:
+
+|                      **Pros**                      |                        **Cons**                         |
+| :------------------------------------------------: | :-----------------------------------------------------: |
+|              Git version your dataset              |          No GUI access to files in DVC remote           |
+|            Granular sharing permissions            |              Collaborators need to use DVC              |
+| DVC abstracts away download scripts/hardcoded URLs | Can be serverless, but you need to manage cloud storage |
+
+# The bottom line
+
+Packaging models and datasets is a non-trivial part of the machine learning
+workflow. DVC provides a method for giving users a Git-centric experience of
+cloning or forking these artifacts, with an emphasis on _versioning artifacts_
+and _abstracting away the processes of uploading, downloading, and storing
+artifacts_. For projects with high complexity- like my hair project, which had
+some gnarly dependencies and big artifacts- this kind of source control pays
+off. If you don't know where your data came from or how it's been transformed,
+it's impossible to be scientific.
+
+Thanks for stopping by our virtual poster! I'm happy to take questions or
+comments about how version control fits into the scientific workflow. Leave a
+comment, reach out on Twitter, or send an email.
+
+## Further reading
+
+_Check out our
+[tutorial about creating a data registry](https://dvc.org/doc/use-cases/data-registries)
+for more code examples._
diff --git a/content/blogs/2020-06-29-june-20-community-gems.md b/content/blogs/2020-06-29-june-20-community-gems.md
new file mode 100644
index 0000000000..8a1d5fe52b
--- /dev/null
+++ b/content/blogs/2020-06-29-june-20-community-gems.md
@@ -0,0 +1,163 @@
+---
+title: June '20 Community Gems
+date: 2020-06-29
+description: >
+  A roundup of technical Q&A's from the DVC community. This month, we discuss
+  migrating to DVC 1.0, the new pipeline format, and our Python API.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month, we discuss
+  migrating to DVC 1.0, the new pipeline format, and our Python API.
+picture: 2020-06-29/Gems_June_20.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/june-20-community-gems/426
+tags:
+  - Community Gems
+  - MinIO
+  - Pipelines
+  - Python
+  - Optimization
+---
+
+## Highlights from Discord
+
+Here are some Q&A's from our Discord channel that we think are worth sharing.
+
+### Q: I just upgraded to DVC 1.0. I've got some pipeline stages currently saved as `.dvc` files. [Is there an easy way to convert the old `.dvc` format to the new `dvc.yaml` standard?](https://discord.com/channels/485586884165107732/563406153334128681/725019219930120232)
+
+Yes! You can easily transfer the stages by hand: `dvc.yaml` is designed for
+manual edits in any text editor, so you can type your old stages in and then
+delete the old `.dvc` files. We also have a
+[migration script](https://gist.github.com/skshetry/07a3e26e6b06783e1ad7a4b6db6479da)
+available, although we can't provide long-term support for it.
+
+Learn more about the `dvc.yaml` format in our
+[brand new docs](https://dvc.org/doc/user-guide/dvc-files#dvcyaml-file)!
+
+![Year Opening GIF](https://media.giphy.com/media/JYpTAnhT0EI2Q/giphy.gif)
+
+_Just like this but with technical documentation._
+
+### Q: After I pushed my local data to remote storage, I noticed the file names are different in my storage repository- they're hash values. [Can I make them more meaningful names?](https://discord.com/channels/485586884165107732/563406153334128681/717737163122540585)
+
+No, but for a good reason! What you're seeing are cached files, and they're
+stored with a special naming convention that makes DVC versioning and addressing
+possible- these file names are how DVC deduplicates data (to avoid keeping
+multiple copies of the same file version) and ensures that each unique version
+of a file is immutable. If you manually overwrote those filenames you would risk
+breaking Git version control. You can
+[read more about how DVC uses this file format in our docs](https://dvc.org/doc/user-guide/dvc-internals#structure-of-cache-directory).
+
+It sounds like you're looking for ways to interact with DVC-tracked objects at a
+high level of abstraction, meaning that you want to interface with the original
+filenames and not the machine-generated hashes used by DVC. There are a few
+secure and recommended ways to do this:
+
+- If you want to see a human-readable list of files that are currently tracked
+  by DVC, try the `dvc list`
+  command-[read up on it here](https://dvc.org/doc/command-reference/list).
+- Check out our
+  [data registry tutorial](https://dvc.org/doc/use-cases/data-registries#data-registries)
+  to see how the commands `dvc get` and `dvc import` are used to download and
+  share DVC-tracked artifacts. The syntax is built for an experience like using
+  a package manager.
+- The [DVC Python API](https://dvc.org/doc/api-reference) gives you programmatic
+  access to DVC-tracked artifacts, using human-readable filenames.
+
+### Q: [Is it better practice to `dvc add` data files individually, or to add a directory containing multiple data files?](https://discord.com/channels/485586884165107732/563406153334128681/722141190312689675)
+
+If the directory you're adding is logically one unit (for example, it is the
+whole dataset in your project), we recommend using `dvc add` at the directory
+level. Otherwise, add files one-by-one. You can
+[read more about how DVC versions directories in our docs](https://dvc.org/doc/user-guide/dvc-internals#structure-of-cache-directory).
+
+### Q: [Do you have any examples of using DVC with MinIO?](https://discord.com/channels/485586884165107732/563406153334128681/722780202844815362)
+
+We don't have any tutorials for this use case exactly, but it's a very
+straightforward modification from
+[our basic use cases](https://dvc.org/doc/use-cases). The key difference when
+using MinIO or a similar S3-compatible storage (like DigitalOcean Spaces or IBM
+Cloud Object Storage) is that in addition to setting remote data storage, you
+must set the `endpointurl` too. For example:
+
+```dvc
+$ dvc remote add -d myremote s3://mybucket/path/to/dir
+$ dvc remote modify myremote endpointurl https://object-storage.example.com
+```
+
+Read up on configuring supported storage
+[in our docs](https://dvc.org/doc/command-reference/remote/add#supported-storage-types).
+
+### Q: [If I have a folder containing many data files, is there any advantage to zipping the folder and DVC tracking the `.zip`?](https://discord.com/channels/485586884165107732/563406153334128681/714922184455225445)
+
+There are a few things to consider:
+
+- **CPU time.** Even though it can be faster to pull a single file than a
+  directory (though not in all cases, since we can parallelize directory
+  downloads), the tradeoff is the time needed to unzip your data. Depending on
+  your constraints, this can be expensive and undesirable.
+
+- **Deduplication.** DVC deduplicates on the file level. So if you add one
+  single file to a directory, DVC will save only that file, not the whole
+  dataset again. If you use a zipped directory you won't get this benefit. In
+  the long run, this could be more expensive in terms of storage space for your
+  DVC cache and remote if the contents of your dataset change frequently.
+
+Generally, we would recommend first trying a plain unzipped directory. DVC is
+designed to work with large numbers of files (on the order of millions) and has
+the latest release (DVC 1.0) has
+[optimizations built for this purpose exactly](https://dvc.org/blog/dvc-1-0-release#data-transfer-optimizations).
+
+### [Q: Can I execute a `dvc push` with the DVC Python API inside a Python script?](https://discord.com/channels/485586884165107732/485596304961962003/718419219288686664)
+
+Currently, our [Python API](https://dvc.org/doc/api-reference#python-api)
+doesn't support commands like `dvc push`,`dvc pull`, or `dvc status`. It is
+designed for interfacing with objects tracked by DVC. That said, CLI commands
+are basically calling `dvc.repo.Repo` object methods. So if you want to use
+commands from within Python code, you could try creating a `Repo` object with
+`r = Repo({root_dir})` and then `r.push()`. Please note that we don't officially
+support this use case yet.
+
+Of course, you can also run DVC commands from a Python script using `sys` or a
+similar library for issuing system commands.
+
+### [Q: Does the `dvc pipeline` command for visualizing pipelines still work in DVC 1.0?](https://discord.com/channels/485586884165107732/485596304961962003/717682556203565127)
+
+Most of the `dvc pipeline` functionality- like `dvc pipeline show --ascii` to
+print out an ASCII diagram of your pipeline- has been migrated to a new command,
+`dvc dag`. This function is written for our new pipeline format. Check out
+[our new docs](https://dvc.org/doc/command-reference/dag#dag) for an example.
+
+### [Q: Is there a way to create a DVC pipeline stage without running the commands in that stage?](https://discord.com/channels/485586884165107732/485596304961962003/715271980978405447)
+
+Yes. Say you have a Python script, `train.py`, that takes in a dataset `data`
+and outputs a model `model.pkl`. To create a DVC pipeline stage corresponding to
+this process, you could do so like this:
+
+```dvc
+$ dvc run -n train
+        -d train.py -d data
+        -o model.pkl
+        python train.py
+```
+
+However, this would automatically rerun the command `python train.py`, which is
+not necessarily desirable if you have recently run it, the process is time
+consuming, and the dependencies and outputs haven't changed. You can use the
+`--no-exec` flag to get around this:
+
+```dvc
+$ dvc run --no-exec
+        -n train
+        -d train.py -d data
+        -o model.pkl
+        python train.py
+```
+
+This flag can also be useful when you want to define the pipeline on your local
+machine but plan to run it later on a different machine (perhaps an instance in
+the cloud).
+[Read more about the `--no-exec` flag in our docs.](https://dvc.org/doc/command-reference/run)
+
+One other approach worth mentioning is that you can manually edit your
+`dvc.yaml` file to add a stage. If you add a stage this way, pipeline commands
+won't be executed until you run `dvc repro`.
diff --git a/content/blogs/2020-07-07-cml-release.md b/content/blogs/2020-07-07-cml-release.md
new file mode 100644
index 0000000000..0181166f01
--- /dev/null
+++ b/content/blogs/2020-07-07-cml-release.md
@@ -0,0 +1,188 @@
+---
+title: 'New Release: Continuous Machine Learning (CML) is CI/CD for ML'
+date: 2020-07-07
+description: >
+  Today we're launching Continuous Machine Learning (CML), a new open-source
+  project for CI/CD with ML. Let's bring the power of DevOps to ML or MLOps.
+descriptionLong: >
+  Today we're launching Continuous Machine Learning (CML), a new open-source
+  project for CI/CD with ML. Use it to automate parts of your ML workflow,
+  including model training and evaluation, comparing ML experiments across your
+  project history, and monitoring changing datasets. Let's bring the power of
+  DevOps to ML or MLOps.
+picture: 2020-07-07/cover.png
+pictureComment: CML release
+author: dmitry_petrov
+commentsUrl: https://discuss.dvc.org/t/continuous-machine-learning-release/429
+tags:
+  - Release
+  - CI/CD
+  - MLOps
+  - DataOps
+---
+
+## CI/CD for machine learning
+
+Today, the DVC team is releasing a new open-source project called Continuous
+Machine Learning, or CML (https://cml.dev) to mainstream the best engineering
+practices of CI/CD to AI and ML teams. CML helps to organize MLOps
+infrastructure on top of the traditional software engineering stack instead of
+creating separate AI platforms.
+
+Continuous integration and continuous delivery (CI/CD) is a widely-used software
+engineering practice. It's a validated approach to increasing the agility of
+software development without sacrificing stability. **But why haven't CI/CD
+practices taken root in machine learning and data science so far?**
+
+We see three substantial technical barriers to using standard CI systems with
+machine learning projects:
+
+1. **Data dependencies.** In ML, data plays a similar role as code: ML results
+   critically depend on datasets, and changes in data need to trigger feedback
+   just like changes in source code. Furthermore, multi-GB datasets are
+   challenging to manage with Git-centric CI systems.
+2. **Metrics-driven.** The traditional software engineering idea of pass/fail
+   tests does not apply in ML. As an example, `+0.72% accuracy` and
+   `-0.35% precision` does not answer the question if the ML model is good or
+   not. Detailed reports with metrics and plots are needed to make a good/bad
+   model discussion
+3. **CPU/GPU resources**. ML training often requires more resources to train
+   then is typical to have in CI/CD runners. CI/CD must be connected with cloud
+   computing instances or Kubernetes clusters for ML training.
+
+## CI/CD for ML is the next step for the DVC team
+
+Since the beginning, our motivation has been helping ML teams benefit from
+DevOps. We started DVC because we knew that data management would be a crucial
+bottleneck, and sure enough, DVC was a big step towards making pipelines and
+experiments manageable and reproducible. But conversations with our community
+have brought us to one conclusion again and again: CI/CD for ML is the holy
+grail.
+
+Over the last 3 years, we've reached some big milestones:
+
+1. We built DVC to address the ML data management problem. Recently, we
+   [released DVC 1.0](https://dvc.org/blog/dvc-1-0-release), marking a new and
+   more stable era for our API.
+2. DVC has become a core part of many ML team's daily operations. The latest
+   [ThoughtWorks Technology Radar](https://www.thoughtworks.com/radar/tools)
+   says:
+
+   _"... it [DVC] has become a favorite tool for managing experiments in machine
+   learning (ML) projects. Since it's based on Git, DVC is a familiar
+   environment for software developers to bring their engineering practices to
+   ML practice."_
+
+3. An extraordinary team and community have emerged around DVC:
+   - 15 employees in our organization https://iterative.ai
+   - 100+ open-source contributors to DVC https://github.com/iterative/dvc and
+     another 100+ open-source contributors to docs
+     https://github.com/iterative/dvc.org
+   - 2000+ community members in our Discord https://dvc.org/chat and GitHub
+     issue tracker https://github.com/iterative/dvc
+   - 4000+ regular users of DVC
+
+Now that DVC is maturing, we're ready to take the next step: we want to
+revolutionize the ML development processes. We want ML experiments to have
+greater visibility to teammates, shorter feedback loops, and more
+reproducibility. We want teams to spend less time managing their computing
+resources and experiments, and more time building value. The goal is to extend
+the amazing results of DevOps from software development to ML and MLOps.
+
+## _Continuous Machine Learning_ release
+
+Today, we're releasing an open-source project https://cml.dev to close the gap
+between machine learning and software development practices.
+
+CML is a library of functions used inside CI/CD runners to make ML compatible
+with **GitHub Actions** and **GitLab CI**. We've created functions to:
+
+1. Generate informative reports on every Pull/Merge Request with metrics, plots,
+   and hyperparameters changes.
+2. Provision GPU\CPU resources from cloud service providers (**AWS, GCP, Azure,
+   Ali**) and deploy CI runners using
+   [Docker Machine](https://github.com/docker/machine).
+3. Bring datasets from cloud storage to runners (using **DVC**) for model
+   training, as well as save the resulting model in cloud storage.
+
+![Auto-generated metrics-driven report in GitLab Merge Request](../uploads/images/2020-07-07/cml-report-metrics.png)
+
+The workflow and visual reports are customizable by modifying the CI
+configuration file in your GitHub `./github/workflows/*.yaml` or GitLab
+`.gitlab-ci.yml` project. Use CML functions in conjunction with your own ML
+model training and testing scripts to create your own automated workflow and
+reporting system.
+
+```yaml
+# GitLab workflow in '.gitlab-ci.yml' file
+
+stages:
+  - cml_run
+
+cml:
+  stage: cml_run
+  image: iterativeai/cml:0-dvc2-base1
+  script:
+    - dvc pull data --run-cache
+
+    - pip install -r requirements.txt
+    - dvc repro
+
+    # Compare metrics to master
+    - git fetch --prune
+    - dvc metrics diff --show-md master >> report.md
+
+    # Visualize loss function diff
+    - dvc plots diff --target loss.csv --show-vega master > vega.json
+    - vl2png vega.json > plot.png
+    - cml publish --md plot.png >> report.md
+    - dvc push data --run-cache
+    - cml send-comment report.md
+```
+
+![Hyperparameter change with a result image in GitHub Pull request report](../uploads/images/2020-07-07/cml-report-params.png)
+
+In this example all the CML functions are defined in the **docker images** that
+is used in the workflow - `iterativeai/cml:0-dvc2-base1`. Users can specify any
+docker image. The only restriction is that the CML library need to be installed
+to enable all the CML commands for the reporting and graphs:
+
+```bash
+npm i @dvcorg/cml
+```
+
+Examples of docker images can be found in `docker` directory of the CML the
+repository: [CML repository](https://github.com/iterative/cml).
+
+As you can see, CML is based on the assumption that MLOps can work with
+traditional engineering tools. It shouldn't require an entirely separate
+platform. We're excited about a world where DevOps practitioners can work
+fluently on both software and ML aspects of a project.
+
+## The relationship between CML and DVC
+
+CML and DVC are related projects under the umbrella of the same team, but will
+have separate websites and independent development. The CML project is hosted on
+a new web site: https://cml.dev. The source code and issue tracker is on GitHub:
+https://github.com/iterative/cml
+
+For support and communications, the DVC Discord server is still the place to go:
+https://dvc.org/chat We've made a new `#cml` channel there to discuss CML, CI/CD
+for ML and other MLOps related questions.
+
+## Conclusion
+
+With the rise of AI/ML teams and ML platforms in addition to the software
+engineering stack, we believe that the industry needs a single technology stack
+to work with software as well as AI projects. A simple layer of a tool is
+required to close the gap between AI projects and software projects to fit them
+into the existing stack and CML is the way to make it.
+
+Our philosophy is that ML projects, and MLOps practices, should be built on top
+of traditional engineering tools and not as a separate stack. A simple layer of
+tools will be required to close the gap, and CML is part of this ecosystem. We
+think this is the future of MLOps.
+
+As always, thanks for reading and for being part of the DVC community. We'd love
+to hear what you think about CML. Please be in touch on
+[Twitter](https://twitter.com/dvcorg) and [Discord](https://dvc.org/chat)!
diff --git a/content/blogs/2020-07-10-july-20-dvc-heartbeat.md b/content/blogs/2020-07-10-july-20-dvc-heartbeat.md
new file mode 100644
index 0000000000..791520bc45
--- /dev/null
+++ b/content/blogs/2020-07-10-july-20-dvc-heartbeat.md
@@ -0,0 +1,195 @@
+---
+title: July ’20 Heartbeat
+date: 2020-07-10
+description: >
+  Catch up on new DVC releases, talks, and projects in our community. This
+  month, we recap the DVC 1.0 release, making the list of top 20 fastest growing
+  open-source startups, and interviews galore. Plus: 📣 an invitation to the
+  next DVC meetup!
+descriptionLong: >
+  Catch up on new DVC releases, talks, and projects in our community. This
+  month, we recap the DVC 1.0 release, making the list of top 20 fastest growing
+  open-source startups, and interviews galore. Plus: 📣 an invitation to the
+  next DVC meetup!
+picture: 2020-07-10/july_20_heartbeat_header.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/july-20-dvc-heartbeat/439
+tags:
+  - Heartbeat
+  - CI/CD
+  - DVC 1.0
+  - SciPy
+  - MLOps
+  - Reproducibility
+  - Meetup
+---
+
+Welcome to the July Heartbeat, our monthly roundup of [new releases](#news),
+[talks](#community-activity), [great articles](#good-reads), and
+[upcoming events](#coming-up-soon) in the DVC community.
+
+## News
+
+### DVC 1.0 release
+
+On June 22, DVC entered a new era: the
+[official release of version 1.0](https://dvc.org/blog/dvc-1-0-release). After
+several weeks of bug-catching with our pre-release, the team has issued DVC 1.0
+for the public! Now when you
+[install DVC through your package manager of choice](https://dvc.org/doc/install),
+you'll get the latest version. Welcome to the future.
+
+To recap, DVC 1.0 has some big new features like:
+
+- Plots powered by Vega-Lite so you can compare metrics across commits
+- New and easier pipeline configuration files- edit your DVC pipeline like a
+  text file!
+- Optimizations for data transfer speed
+
+Read all the [release notes](https://dvc.org/blog/dvc-1-0-release) for more, and
+stop by our [Discord](https://discordapp.com/invite/dvwXA2N) if you need support
+migrating (don't worry, 1.0 is backwards compatible).
+
+### Virtual meetup!
+
+In May, we had our [first every virtual meetup](/blog/may-20-dvc-heartbeat). We
+had amazing talks from [Dean Pleban](https://twitter.com/DeanPlbn) and
+[Elizabeth Hutton](https://github.com/ehutt), plus time for Q&A with the DVC
+team- you can
+[watch the recording](https://www.youtube.com/watch?v=19GMtrFykSU&list=PLVeJCYrrCemiOc1SS_PIB3Tb3HX0Aqw3j)
+if you missed it!
+
+On Thursday, July 30, we're hosting our second meetup! Ambassador
+[Marcel Ribeiro-Dantas](http://mribeirodantas.me/) is hosting once again. We'll
+have short talks about causal modeling and CI/CD, plus lots of time for chatting
+and catching up. Please RSVP!
+
+<blockquote class="embedly-card"><h4><a href="https://www.meetup.com/DVC-Community-Virtual-Meetups/events/271844501/">July DVC Meetup: Data Science & DevOps!</a></h4><p>This meetup will be hosted by DVC Ambassador Marcel! AGENDA:We have two 10-minute talks on the agenda:- Causal Modeling with DVC - Marcel- Continuous integration for ML case studies - Elle Following talks, we'll have Q&A with the DVC team and time for community discussion.</p></blockquote>
+<script async src="//cdn.embedly.com/widgets/platform.js" charset="UTF-8"></script>
+
+### DVC is in the top 20 fastest-growing open source startups
+
+Konstantin Vinogradov at [Runa Capital](https://runacap.com/) used the GitHub
+API to
+[identify the fastest growing public repositories on GitHub](https://medium.com/runacapital/open-source-growth-benchmarks-and-the-20-fastest-growing-oss-startups-d3556a669fe6)
+in terms of stars and forks. He used these metrics to estimate the top 20
+fastest growing startups in open source software. And guess what, DVC made the
+cut! We're in great company.
+
+![](../uploads/images/2020-07-10/top20startups.png)
+
+### New team member
+
+We have a new teammate-[Maxim Shmakov](https://www.linkedin.com/in/mvshmakov/),
+previously of Yandex, is joining us! Maxim is a front-end engineer joining us
+from Moscow. Please welcome him to DVC. 👋
+
+## Community activity
+
+We've been busy! Although we are mostly homebound these days, there has been no
+shortage of speaking engagements. Here's a recap.
+
+### Meetings and talks
+
+- Co-founders Dmitry and Ivan appeared on the HasGeek TV series
+  [Making Data Science Work](https://hasgeek.com/fifthelephant/making-data-science-work-session-3/)
+  to discuss engineering for data science with hosts
+  [Venkata Pingali](https://www.linkedin.com/in/pingali/) and
+  [Indrayudh Ghoshal](https://www.linkedin.com/in/indrayudhghoshal/). The
+  livestream is available for viewing on YouTube!
+
+https://www.youtube.com/watch?v=EWcpALbzZRg
+
+- Dmitry appeared on the [MLOps.community](https://mlops.community/) meetup to
+  chat with host [Demetrios Brinkmann](https://www.linkedin.com/in/dpbrinkm/).
+  They talked about the open source ecosystem, the difference between tools and
+  platforms, and what it means to codify data.
+
+https://www.youtube.com/watch?v=ojV1tK9jXH8&t=2295s
+
+- I (Elle) gave a talk at the
+  [MLOps Production & Engineering World](https://mlopsworld.com/) meeting,
+  called "Adapting continuous integration and continuous delivery for ML". I
+  shared an approach to using GitHub Actions with ML projects. Video coming
+  soon!
+
+https://twitter.com/TMLS_TO/status/1273693487104503808
+
+- Extremely early the next morning, clinician-scientist
+  [Cris Lanting](https://www.linkedin.com/in/crislanting/?originalSubdomain=nl)
+  and I co-led a workshop about developing strong computational infrastructure
+  and practices in research as part of the
+  [Virtual Conference on Computational Audiology](https://computationalaudiology.com/).
+  We talked about big ideas for making scientific research reproducible,
+  manageable, and shareable. For the curious, the workshop is still viewable!
+
+https://www.youtube.com/watch?v=W4CoptalWw0
+
+- DVC has a virtual poster at [SciPy 2020](https://www.scipy2020.scipy.org/)! We
+  prepared a demo about
+  [packaging models and datasets like software](https://dvc.org/blog/scipy-2020-dvc-poster)
+  so they can be widely disseminated via GitHub.
+
+### Good reads
+
+Some excellent reading recommendations from the community:
+
+- Data scientist Déborah Mesquita published a thorough guide to using new DVC
+  1.0 pipelines in a sample ML project. It's truly complete, covering data
+  collection to model evaluation, with detailed code examples. If you are new to
+  pipelines, do not miss this!
+
+<external-link
+href="https://towardsdatascience.com/the-ultimate-guide-to-building-maintainable-machine-learning-pipelines-using-dvc-a976907b2a1b"
+title="The ultimate guide to building maintainable Machine Learning pipelines using DVC"
+description="Learn the principles for building maintainable Machine Learning pipelines using DVC"
+link="medium.com"
+image="../uploads/images/2020-07-10/pipes.jpg"/>
+
+- Caleb Kaiser of [Cortex](https://github.com/cortexlabs/cortex) (another
+  startup in the Runa Capital's Top 20 list!) shared a thinkpiece about
+  challenges from software engineering that can inform production ML. We really
+  agree with what he has to say about reproducibility:
+
+> You typically hear about “reproducibility” in reference to ML research,
+> particularly when a paper doesn’t include enough information to recreate the
+> experiment. However, reproducibility also comes up a lot in production ML.
+> Think of it this way — you’re on a team staffed with data scientists and
+> engineers, and you’re all responsible for an image classification API. The
+> data scientists are constantly trying new techniques and architectural tweaks
+> to improve the model’s baseline performance, while at the same time, the model
+> is constantly being retrained on new data. Looking over the APIs performance,
+> you see one moment a week ago where the model’s performance dropped
+> significantly. What caused that drop? Without knowing exactly how the model
+> was trained, and on what data, it’s impossible to know for sure.
+
+<external-link
+href="https://towardsdatascience.com/what-software-engineers-can-bring-to-machine-learning-25f458c80e5"
+title="What software engineers can bring to machine learning"
+description="Many production machine learning challenges are paralleled in software engineering"
+link="medium.com"
+image="../uploads/images/2020-07-10/tds.jpg"/>
+
+- Mukul Sood wrote about the Real World, a place beyond Jupyter notebooks where
+  data is non-stationary and servers are unreliable! He covers some very real
+  challenges for taking a data science project into production and introduces
+  the need for CI/CD practices in healthy, scalable ML applications.
+
+<external-link
+href="https://towardsdatascience.com/scaling-machine-learning-in-real-world-cb601b2baf4a"
+title="Scaling Machine Learning in the  Real World"
+description="Any conversation around scaling or productionizing data science, would need to talk about Continuous Integration/Continuous Deployment."
+link="medium.com"
+image="../uploads/images/2020-07-10/storm.jpg"/>
+
+### A nice tweet
+
+We'll close on a nice tweet from [Russell Jurney](https://datasyndrome.com/):
+
+https://twitter.com/rjurney/status/1266735603921547264
+
+Thanks, we couldn't do it without our community! As always, thanks for joining
+us and reading. There are lots of ways to stay in touch and we always love to
+hear from you. Follow us on [Twitter](twitter.com/dvcorg), join our
+[Discord server](https://discordapp.com/invite/dvwXA2N), or leave a blog
+comment. Until next time! 😎
diff --git a/content/blogs/2020-07-16-devops-for-data-scientists.md b/content/blogs/2020-07-16-devops-for-data-scientists.md
new file mode 100644
index 0000000000..1ff8173ebe
--- /dev/null
+++ b/content/blogs/2020-07-16-devops-for-data-scientists.md
@@ -0,0 +1,379 @@
+---
+title: What data scientists need to know about DevOps
+date: 2020-07-16
+description: >
+  A philosophical and practical guide to using continuous integration (via
+  GitHub Actions) to build an automatic model training system.
+picture: 2020-07-16/unicorn_floatie.jpg
+pictureComment: |
+  The unicorn! A mythical data scientist who can code, write unit tests
+  AND resist the lure of a deep neural network when logistic regression
+  will do.
+
+  Photo by [James Lee](https://unsplash.com/@picsbyjameslee) via
+  [Unsplash](https://unsplash.com/photos/qSf_4bNsoWc).
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/what-data-scientists-need-to-know-about-devops/447
+tags:
+  - GitHub Actions
+  - MLOps
+  - CI/CD
+  - Cloud training
+  - CML
+  - Company
+  - Tutorial
+---
+
+With the rapid evolution of machine learning (ML) in the last few years, it’s
+become
+[trivially easy to begin ML experiments](https://towardsdatascience.com/deep-learning-isnt-hard-anymore-26db0d4749d7).
+Thanks to libraries like [scikit-learn](https://scikit-learn.org/stable/) and
+[Keras](https://github.com/keras-team/keras), you can make models with a few
+lines of code.
+
+But it’s harder than ever to turn data science projects into meaningful
+applications, like a model that informs team decisions or becomes part of a
+product. The typical ML project involves
+[so many distinct skill sets](https://ieeexplore.ieee.org/abstract/document/8804457)
+that it’s challenging, if not outright impossible, for any one person to master
+them all — so hard, the rare data scientist who can also develop quality
+software and play engineer is called a unicorn!
+
+As the field matures, a lot of jobs are going to require a mix of software,
+engineering, and mathematical chops. Some say
+[they](https://www.anaconda.com/state-of-data-science-2020?utm_medium=press&utm_source=anaconda&utm_campaign=sods-2020&utm_content=report)
+[already](http://veekaybee.github.io/2019/02/13/data-science-is-different/)
+[do](https://tech.trivago.com/2018/12/03/teardown-rebuild-migrating-from-hive-to-pyspark/).
+
+To quote the unparalleled data scientist/engineer/critical observer Vicki Boykis
+in her blog
+[Data science is different now](http://veekaybee.github.io/2019/02/13/data-science-is-different/):
+
+> What is becoming clear is that, in the late stage of the hype cycle, data
+> science is asymptotically moving closer to engineering, and the
+> [skills that data scientists need](https://www.youtube.com/watch?v=frQeK8xo9Ls)
+> moving forward are less visualization and statistics-based, and
+> [more in line with traditional computer science curricula](https://tech.trivago.com/2018/12/03/teardown-rebuild-migrating-from-hive-to-pyspark/).
+
+## Why data scientists need to know about DevOps
+
+So which of the many, many engineering and software skills should data
+scientists learn? My money is on DevOps. DevOps, a portmanteau of development
+and operations, was officially born in 2009
+[at a Belgian conference](https://en.wikipedia.org/wiki/DevOps#History). The
+meeting was convened as a response to tensions between two facets of tech
+organizations that historically experienced deep divisions. Software developers
+needed to move fast and experiment often, while Operations teams prioritized
+stability and availability of services (these are the people who keep servers
+running day in and day out). Their goals were not only opposing, they were
+competing.
+
+That sounds awfully reminiscent of today’s data science. Data scientists create
+value by experiments: new ways of modeling, combining, and transforming data.
+Meanwhile, the organizations that employ data scientists are incentivized for
+stability.
+
+The consequences of this division are profound: in the
+[latest Anaconda “State of Data Science” report](https://www.globenewswire.com/news-release/2020/06/30/2055578/0/en/Anaconda-Releases-2020-State-of-Data-Science-Survey-Results.html),
+“fewer than half (48%) of respondents feel they can demonstrate the impact of
+data science” on their organization. By some estimates, the vast majority of
+[models created by data scientists end up stuck on a shelf](https://venturebeat.com/2019/07/19/why-do-87-of-data-science-projects-never-make-it-into-production/).
+We don’t yet have strong practices for passing models between the teams that
+create them and the teams that deploy them. Data scientists and the developers
+and engineers who implement their work have entirely different tools,
+constraints, and skill sets.
+
+DevOps emerged to combat this sort of deadlock in software, back when it was
+developers vs. operations. And it was tremendously successful:
+[many](http://engineering.microsoft.com/devops/)
+[teams](https://insights.sei.cmu.edu/devops/2015/02/devops-case-study-amazon-aws.html)
+have gone from deploying new code every few months to several times a day. Now
+that we have machine learning vs. operations, it’s time to think about MLOps —
+principles from DevOps that work for data science.
+
+## Introducing Continuous Integration
+
+DevOps is both a philosophy and a set of practices, including:
+
+1. Automate everything you can
+
+2. Get feedback on new ideas fast
+
+3. Reduce manual handoffs in your workflow
+
+In a typical data science project, we can see some applications:
+
+1. **Automate everything you can.** Automate parts of your data processing,
+   model training, and model testing that are repetitive and predictable.
+
+2. **Get feedback on new ideas fast.** When your data, code, or software
+   environment changes, test it immediately in a production-like environment
+   (meaning, a machine with the dependencies and constraints you anticipate
+   having in production).
+
+3. **Reduce manual handoffs in your workflow.** Find opportunities for data
+   scientists to test their own models as much as possible. Don’t wait until a
+   developer is available to see how the model will behave in a production-like
+   environment.
+
+The standard DevOps approach for accomplishing these goals is a method called
+continuous integration (CI).
+
+The gist is that when you change a project’s source code (usually, changes are
+registered via git commits), your software is automatically built and tested.
+Every action triggers feedback. CI is often used with
+[Git-flow](https://nvie.com/posts/a-successful-git-branching-model/), a
+development architecture in which new features are built on Git branches (need a
+Git refresher?
+[Try this](https://towardsdatascience.com/why-git-and-how-to-use-git-as-a-data-scientist-4fa2d3bdc197)).
+When a feature branch passes the automated tests, it becomes a candidate to be
+merged into the master branch.
+
+![](../uploads/images/2020-07-16/basic_ci_system.png) _Here's what continuous
+integration looks like in software development._
+
+With this setup, we have automation — code changes trigger an automatic build
+followed by testing. We have fast feedback, because we get test results back
+quickly, so the developer can keep iterating on their code. And because all this
+happens automatically, you don’t need to wait for anyone else to get feedback —
+one less handoff!
+
+_So why don’t we use continuous integration already in ML?_ Some reasons are
+cultural, like a low crossover between data science and software engineering
+communities. Others are technical- for example, to understand your model’s
+performance, you need to look at metrics like accuracy, specificity, and
+sensitivity. You might be assisted by data visualizations, like a confusion
+matrix or loss plot. So pass/fail tests won’t cut it for feedback. Understanding
+if a model is improved requires some domain knowledge about the problem at hand,
+so test results need to be reported in an efficient and human-interpretable way.
+
+![](../uploads/images/2020-07-16/ci_for_data_system.png) _Here's what continuous
+integration might look like in a machine learning project. Inspected by Data
+Science Doggy._
+
+## How do CI systems work?
+
+Now we’ll get even more practical. Let’s take a look at how a typical CI system
+works. Luckily for learners, the barrier has never been lower thanks to tools
+like GitHub Actions and GitLab CI- they have clear graphical interfaces and
+excellent docs geared for first-time users. Since GitHub Actions is completely
+free for public projects, we’ll use it for this example. It works like this:
+
+1. You create a GitHub repository. You create a directory called
+   `.github/workflows`, and inside, you place a special `.yaml` file with a
+   script you want to run- like,
+
+```dvc
+$ python train.py
+```
+
+2. You change the files in your project repository somehow and Git commit the
+   change. Then, push to your GitHub repository.
+
+```dvc
+# Create a new git branch for experimenting
+$ git checkout -b "experiment"
+$ edit train.py
+
+# git add, commit, and push your changes
+$ git add . && commit -m "Normalized features"
+$ git push origin experiment
+```
+
+3. As soon as GitHub detects the push, GitHub deploys one of their computers to
+   run the functions in your `.yaml`.
+
+4. GitHub returns a notification if the functions ran successfully or not.
+
+![](../uploads/images/2020-07-16/run_notification.png) _Find this in the Actions
+tab of your GitHub repository._
+
+That’s it! What’s really neat here is that you’re using GitHub’s computers to
+run your code. All you have to do is update your code and push the change to
+your repository, and the workflow happens automatically.
+
+Back to that special `.yaml` file I mentioned in Step 1- let’s take a quick look
+at one. It can have any name you like, as long as the file extension is `.yaml`
+and it’s stored in the directory `.github/workflows`. Here’s one:
+
+```yaml
+# .github/workflows/ci.yaml
+name: train-my-model
+on: [push]
+jobs:
+   run:
+      runs-on: [ubuntu-latest]
+   steps:
+      - uses: actions/checkout@v2
+      - name: training
+      run: |
+         pip install -r requirements.txt
+         python train.py
+```
+
+There’s a lot going on, but most of it is the same from Action to Action- you
+can pretty much copy and paste this standard GitHub Actions template, but fill
+in your workflow in the `run` field.
+
+If this file is in your project repo, whenever GitHub detects a change to your
+code (registered via a push), GitHub Actions will deploy an Ubuntu runner and
+attempt to execute your commands to install requirements and run a Python
+script. Be aware that you have to have the files required for your workflow —
+here, `requirements.txt` and `train.py` — in your project repo!
+
+## Get better feedback
+
+As we alluded to earlier, automatic training is pretty cool and all, but it’s
+important to have the results in a format that’s easy to understand. Currently,
+GitHub Actions gives you access to the runner’s logs, which are plain text.
+
+![](../uploads/images/2020-07-16/github_actions_log.png) _An example printout
+from a GitHub Actions log._
+
+But understanding your model’s performance is tricky. Models and data are high
+dimensional and often behave nonlinearly — two things that are especially hard
+to understand without pictures!
+
+I can show you one approach for putting data viz in the CI loop. For the last
+few months, my team at Iterative.ai has been working on a toolkit to help use
+GitHub Actions and GitLab CI for machine learning projects. It’s called
+[Continuous Machine Learning](https://cml.dev) (CML for short), and it’s open
+source and free.
+
+Working from the basic idea of, “Let’s use GitHub Actions to train ML models,”,
+we’ve built some functions to give more detailed reports than a pass/fail
+notification. CML helps you put images and tables in the reports, like this
+confusion matrix generated by SciKit-learn:
+
+![](../uploads/images/2020-07-16/cml_basic_report.png) _This report appears when
+you make a Pull Request in GitHub!_
+
+To make this report, our GitHub Action executed a Python model training script,
+and then used CML functions to write our model accuracy and confusion matrix to
+a markdown document. Then CML passed the markdown document to GitHub.
+
+Our revised `.yaml` file contains the following workflow:
+
+```yaml
+name: train-my-model
+on: [push]
+jobs:
+  run:
+    runs-on: [ubuntu-latest]
+    container: iterativeai/cml:0-dvc2-base1
+    steps:
+      - uses: actions/checkout@v2
+      - name: training
+        env:
+          repo_token: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # train.py outputs metrics.txt and plot.png
+          pip3 install -r requirements.txt
+          python train.py
+
+          # copy the contents of metrics.txt to our markdown report
+          cat metrics.txt >> report.md
+
+          # add our confusion matrix to report.md
+          cml publish plot.png --md >> report.md
+
+          # send the report to GitHub for display
+          cml send-comment report.md
+```
+
+You can see the entire
+[project repository here](https://github.com/iterative/cml_base_case). Note that
+our .yaml now contains a few more configuration details, like a special Docker
+container and an environmental variable, plus some new code to run. The
+container and environmental variable details are standard in every CML project,
+not something the user needs to manipulate, so focus on the code!
+
+With the addition of these CML functions to the workflow, we’ve created a more
+complete feedback loop in our CI system:
+
+1. Make a Git branch and change your code on that branch.
+
+2. Automatically train model and produce metrics (accuracy) and a visualization
+   (confusion matrix).
+
+3. Embed those results in a visual report in your Pull Request.
+
+Now, when you and your teammates are deciding if your changes have a positive
+effect on your modeling goals, you have a dashboard of sorts to review. Plus,
+this report is linked by Git to your exact project version (data and code) AND
+the runner used for training AND the logs from that run. Very thorough! No more
+graphs floating around your workspace that have long ago lost any connection to
+your code!
+
+So that’s the basic idea of CI in a data science project. To be clear, this
+example is among the simplest way to work with CI. In real life, you’ll likely
+encounter considerably more complex scenarios. CML also has features to help you
+use large datasets stored outside your GitHub repository (using DVC) and train
+on cloud instances, instead of the default GitHub Actions runners. That means
+you can use GPUs and other specialized setups.
+
+For example, I made a project using GitHub Actions to deploy an
+[EC2 GPU and then train a neural style transfer model](https://github.com/iterative/cml_cloud_case).
+Here’s my CML report:
+
+![](../uploads/images/2020-07-16/cloud_report.png) _Training in the cloud!
+Weeeeeee!_
+
+You can also use your own Docker containers, so you can closely emulate the
+environment of a model in production. I’ll be blogging more about these advanced
+use cases in the future.
+
+## Final thoughts on CI for ML
+
+To summarize what we’ve said so far:
+
+**DevOps is not a specific technology, but a philosophy and a set of principles
+and practices for fundamentally restructuring the process of creating
+software.** It’s effective because it **addresses systemic bottlenecks** in how
+teams work and experiment with new code.
+
+As data science matures in the coming years, people who understand how to apply
+DevOps principles to their machine learning projects will be a valuable
+commodity — both in terms of salary and their organizational impact. Continuous
+integration is a staple of DevOps and one of the most effective known methods
+for building a culture with reliable automation, fast testing, and autonomy for
+teams.
+
+CI can be implemented with systems like
+[GitHub Actions](https://github.com/features/actions) or
+[GitLab CI](https://about.gitlab.com/stages-devops-lifecycle/continuous-integration/),
+and you can use these services to build automatic model training systems. The
+benefits are numerous:
+
+1. Your code, data, models, and training infrastructure (hardware and software
+   environment) are Git versioned.
+
+2. You’re automating work, testing frequently and getting fast feedback (with
+   visual reports if you use CML). In the long run, this will almost certainly
+   speed up your project’s development.
+
+3. CI systems make your work is visible to everyone on your team. No one has to
+   search very hard to find the code, data, and model from your best run.
+
+And I promise, once you get into the groove, it is incredibly fun to have your
+model training, recording, and reporting automatically kicked off by a single
+git commit.
+
+You will feel so cool.
+
+![Pixel Illustration GIF by Walter Newton](https://media.giphy.com/media/26AHG5KGFxSkUWw1i/giphy.gif)
+
+### Further reading
+
+- [Continuous Integration](https://www.martinfowler.com/articles/continuousIntegration.html),
+  the seminal Martin Fowler blog on the subject
+
+- [Continuous Delivery for Machine Learning](https://martinfowler.com/articles/cd4ml.html),
+  another excellent blog on Martin Fowler’s site about building a continuous
+  integration & continuous delivery system for ML
+
+- [The DevOps Handbook](https://www.amazon.com/DevOps-Handbook-Second-World-Class-Organizations/dp/B09L56CT6N),
+  a beloved guide that is recommended for nearly any organization (ML, software,
+  or not)
+
+_**Note:** This article has been cross-posted on Medium._
diff --git a/content/blogs/2020-07-22-july-20-community-gems.md b/content/blogs/2020-07-22-july-20-community-gems.md
new file mode 100644
index 0000000000..37a809e055
--- /dev/null
+++ b/content/blogs/2020-07-22-july-20-community-gems.md
@@ -0,0 +1,192 @@
+---
+title: July '20 Community Gems
+date: 2020-07-31
+description: >
+  A roundup of technical Q&A's from the DVC community. This month, we discuss
+  getting started with CML, configuring your DVC cache, and how to request a
+  tutorial video.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month, we discuss
+  getting started with CML, configuring your DVC cache, and how to request a
+  tutorial video.
+picture: 2020-07-31/Gems_July_20.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/july-20-community-gems/460
+tags:
+  - Community Gems
+  - CML
+  - GCP
+  - DVC 1.0
+---
+
+Here are some of our top Q&A's from around the community. With the launch of
+[CML](https://cml.dev) earlier in the month, we've got some new ground to cover!
+
+## DVC questions
+
+### [Q: Recently, I set up a global DVC remote. Where can I find the config file?](https://discordapp.com/channels/485586884165107732/563406153334128681/717673618217238598)
+
+When you
+[create a global DVC remote](https://dvc.org/doc/command-reference/remote/list#options),
+a config file will be created in `~/.config/dvc/config` instead of your project
+directory (i.e., `.dvc/config`).
+
+Note that on a Windows system, the config file will be created at
+`C:\Users\<username>\AppData\Local\iterative\dvc\config`.
+
+### [Q: I'm working on a collaborative project, and I use `dvc pull` to sync my local workspace with the project repository. Then, I try running `dvc repro`, but get an error: `dvc.yaml does not exist`. No one else on my team is having this issue. Any ideas?](https://discordapp.com/channels/485586884165107732/485596304961962003/731188065078345799)
+
+This error suggests there is no `dvc.yaml` file in your project. Most likely,
+this means your teammates are using DVC version 0.94 or earlier, before the
+`dvc.yaml` standard was introduced. Meanwhile, it sounds like you're using
+version 1.0 or later. You can check by running
+
+```dvc
+$ dvc version
+```
+
+The best solution is for your whole team to upgrade to the latest version- and
+there's an easy
+[migration script to help you make the move](https://towardsdatascience.com/automatically-migrate-your-project-from-dvc-0-94-to-dvc-1-x-416a5b9e837b).
+If for some reason this won't work for your team, you can either downgrade to a
+previous version, or use a workaround:
+
+```dvc
+$ dvc repro <.dvc file>
+```
+
+substituting the appropriate `.dvc` file for your pipeline. DVC 1.0 is backwards
+compatible, so pipelines created with previous versions will still run.
+
+### [Q: Does the DVC installer for Windows also include the dependencies for using cloud storage, like S3 and GCP?](https://discordapp.com/channels/485586884165107732/485596304961962003/715717911574216735)
+
+If you're installing DVC from binary-such as the `dvc.exe`
+[downloadable on the DVC homepage](https://dvc.org/)- all the standard
+dependencies are included. You shouldn't need to use `pip` to install extra
+packages (like `boto` for S3 storage).
+
+### [Q: Is there a way to setup my DVC remote so I can manually download files from it without going through DVC?](https://discordapp.com/channels/485586884165107732/563406153334128681/717458695709130764)
+
+When DVC adds a file to a remote repository (such as an S3 bucket, or an SSH
+file server), there's only one change happening: DVC calculates an md5 for the
+file and renames it with that md5. In technical terms, it's storing files in a
+"content-addressable way". That means if you know the hash of a file, you can
+locate it in your DVC remote and manually download it.
+
+To find the hash for a given file, say `data.csv`, you can look in the
+corresponding DVC file:
+
+```dvc
+$ cat data.csv.dvc
+```
+
+Another approach is using a built-in DVC function:
+
+```dvc
+$ dvc get --show-url . data.csv
+```
+
+You can read more about `dvc get --show-url` in
+[our docs](https://dvc.org/doc/command-reference/get#options). Note that this
+functinality is also part of our Python API, so you can locate the path to a
+file in your remote within a Python environment.
+[Check out our API docs!](https://dvc.org/doc/api-reference/get_url)
+
+### [Q: By default, each DVC project has its own cache in the project repository. To save space, I'm thinking about locally creating a single cache folder and letting multiple project repositories point there. Will this work?](https://discordapp.com/channels/485586884165107732/563406153334128681/736164141701791815)
+
+Yes, we hear from many users who have created a
+[shared cache](https://dvc.org/doc/user-guide/how-to/share-a-dvc-cache#configure-the-shared-cache).
+Because of the way DVC uses content-addressable filenames, you won't encounter
+issues like accidentally overwriting files from one project with another.
+
+A possible issue is that a shared cache will grant all teammates working on a
+given project access to the data from all other projects using that cache. If
+you have sensitive data, you can create different caches for projects involving
+private and public data.
+
+To learn more about setting your cache directory location,
+[see our docs](https://dvc.org/doc/command-reference/cache/dir).
+
+## CML questions
+
+### Q: I use Bitbucket. Will CML work for me?
+
+The first release of CML is compatible with GitHub and GitLab. We've seen
+[many requests for Bitbucket support](https://github.com/iterative/cml/issues/140),
+and we're actively investigating how to add this. Stay tuned.
+
+### [Q: I have on-premise GPUs. Can CML use them to execute pipelines?](https://discordapp.com/channels/485586884165107732/728693131557732403/730070747388706867)
+
+Yep! You can use on-premise compute resources by configuring them as self-hosted
+runners. See
+[GitHub](https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners)
+and [GitLab](https://docs.gitlab.com/runner/)'s official docs for more details
+and setup instructions.
+
+### [Q: I'm building a workflow that deploys a GCP Compute Engine instance, but I can only find examples with AWS EC2 in the CML docs. What do I do?](https://discordapp.com/channels/485586884165107732/728693131557732403/730688592787275806)
+
+There is a slight difference in the way CML handles credentials for AWS and GCP,
+and that means you'll have to modify your workflow file slightly. We've added an
+example workflow for GCP to our
+[project README](https://github.com/iterative/cml#allocating-cloud-resources-with-cml).
+
+We've updated our
+[cloud compute use case repository docs](https://github.com/iterative/cml_cloud_case#using-a-different-cloud-service)
+to cover a GCP example.
+
+Note that for Azure, the workflow will be the same as for AWS. You'll only have
+to change the arguments to `docker-machine`.
+
+### [Q: I don't see any installation instructions in the CML docs. Am I missing something?](https://discordapp.com/channels/485586884165107732/728693131557732403/733659483758133269)
+
+Nope, there's no installation unless you wish to install CML in your own Docker
+image. As long as you are using GitHub Actions or GitLab CI with the CML Docker
+images, no other steps are needed.
+
+If you're creating your own Docker image to be used in a GitHub Action or GitLab
+CI pipeline, you can add CML to your image via npm:
+
+```bash
+$ npm i -g @dvcorg/cml
+```
+
+### [Q: Can I use CML with MLFlow?](https://www.youtube.com/watch?v=9BgIDqAzfuA&lc=Ugw-VxQqAaqi9hmqB3t4AaABAg)
+
+CML is designed to integrate with lots of tools that ML teams are already
+familiar with. For example, we set up a wrapper to use CML with Tensorboard, so
+you get a link to your Tensorboard in a PR whenever your model is training
+([check out the use case](https://github.com/iterative/cml_tensorboard_case/pull/3)).
+
+While we haven't yet tried to create a use case with MLFlow in particular, we
+think a similar approach could work. We could imagine using MLFlow for
+hyperparameter searching, for example, and then checking in your best model with
+Git to a CI system for evaluation in a production-like environment. CML could
+help you orchestrate compute resources for model evaluation in your custom
+environment, pulling the model and any validation data from cloud storage, and
+reporting the results in a PR.
+
+If this is something you're interested in, make an issue on our project
+repository to tell us more about your project and needs- that lets us know it's
+a priority in the community.
+
+### Q: Are there more tutorial videos coming?
+
+Yes! We recently launched
+[our first CML tutorial video](https://dvc.org/blog/first-mlops-tutorial), and a
+lot of folks let us know they want more. We're aiming to release a new video
+every week or so in the coming months. Topics will include:
+
+- Using DVC to push and pull data from cloud storage to your CI system
+- Using CML with your on-premise hardware
+- Building a data dashboard in GitHub & GitLab for monitoring changes in dynamic
+  datasets
+- Provisioning cloud compute from your CI system
+- Creating a custom Docker container for testing models in a production-like
+  environment
+
+We really want to know what use cases, questions, and issues are most important
+to you. This will help us make videos that are most relevant to the community!
+If you have a suggestion or idea, no matter how small, we want to know. Leave a
+[comment on our videos](https://youtu.be/9BgIDqAzfuA),
+[reach out on Twitter](https://twitter.com/dvcorg), or
+[ping us in Discord](https://discord.gg/bzA6uY7).
diff --git a/content/blogs/2020-07-24-first-mlops-tutorial.md b/content/blogs/2020-07-24-first-mlops-tutorial.md
new file mode 100644
index 0000000000..abb8ed97c8
--- /dev/null
+++ b/content/blogs/2020-07-24-first-mlops-tutorial.md
@@ -0,0 +1,67 @@
+---
+title: |
+  NEW VIDEO! 🎥 MLOps Tutorial #1:
+  Intro to continuous integration for ML
+date: 2020-07-24
+description: >
+  A video tutorial about using continuous integration in data science and
+  machine learning projects. This tutorial shows how to use GitHub Actions and
+  Continuous Machine Learning (CML) to create your own automated model training
+  and evaluation system.
+picture: 2020-07-24/blog_header.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/new-video-mlops-tutorial-1-intro-to-continuous-integration-for-ml/454
+tags:
+  - CI/CD
+  - DevOps
+  - MLOps
+  - CML
+  - Tutorial
+---
+
+Earlier this month, we launched [CML](https://cml.dev), our latest open-source
+project in the MLOps space. We think it's a step towards establishing powerful
+DevOps practices (like continuous integration) as a regular fixture of machine
+learning and data science projects. But there are plenty of challenges ahead,
+and a big one is _literacy_.
+
+So many data scientists, like developers, are self-taught. Data science degrees
+have only recently emerged on the scene, which means if you polled a handful of
+senior-level data scientists, there'd almost certainly be no universal training
+or certificate among them. Moreover, there's still no widespread agreement about
+what it takes to be a data scientist: is it an engineering role with a little
+bit of Tensorflow sprinkled on top? A title for statisticians who can code?
+We're not expecting an easy resolution to these existential questions anytime
+soon.
+
+In the meantime, we're starting a video series to help data scientists curious
+about DevOps (and developers and engineeers curious about data science!) get
+started. Through hands-on coding examples and use cases, we want to give data
+science practitioners the fundamentals to explore, use, and influence MLOps.
+
+The first video in this series uses a lightweight and fairly popular data
+science problem- building a model to predict wine quality ratings- as a
+playground to introduce continuous integration.
+
+The tutorial covers:
+
+- Using Git-flow in a data science project (making a feature branch and pull
+  request)
+- Creating your first GitHub Action to train and evaluate a model
+- Using CML to generate visual reports in your pull request summarizing model
+  performance
+
+It's now up on YouTube!
+
+https://youtu.be/9BgIDqAzfuA
+
+[Code for the project is available online](https://github.com/andronovhopf/wine)
+so you can follow along! We also recommend checking out the
+[CML docs](https://github.com/iterative/cml) for more details, tutorials, and
+use cases.
+
+If you have questions, the best way to get in touch is by leaving a comment on
+the blog, video, or our [Discord channel](https://discord.gg/bzA6uY7). And,
+we're especially interested to hear what use cases you'd like to see covered in
+future videos- tell us about your data science project and how you could imagine
+using continuous integration, and we might be able to create a video!
diff --git a/content/blogs/2020-07-27-shtab-completion-release.md b/content/blogs/2020-07-27-shtab-completion-release.md
new file mode 100644
index 0000000000..33246b8ed1
--- /dev/null
+++ b/content/blogs/2020-07-27-shtab-completion-release.md
@@ -0,0 +1,212 @@
+---
+title: '(Tab) Complete Any Python Application in 1 Minute or Less'
+date: 2020-07-27
+description: >
+  We've made a painless tab-completion script generator for Python applications!
+  Find out how to take advantage of it in this blog post.
+descriptionLong: >
+  We've made a painless tab-completion script generator for Python applications!
+  It's called `shtab` and it currently works with `argparse`, `docopt`, and
+  `argopt` to produce `bash` and `zsh` completion scripts. This tool was
+  originally created to help `dvc`, but we realised it could be made more
+  generic and valuable to the world's entire ecosystem of Python CLI
+  applications. Find out how to take advantage of it in this blog post.
+picture: 2020-07-27/tab-py.jpg
+pictureComment: Zero Effort Tab Completion for Python Applications
+author: casper_dcl
+commentsUrl: https://discuss.dvc.org/t/tab-complete-any-python-application-in-1-minute-or-less/455
+tags:
+  - shtab
+  - Release
+  - CLI
+  - Autocomplete
+  - Tab
+  - Completion
+  - Python
+---
+
+Command line tools are powerful. Things like [`make`] have manual pages
+spanning, well,
+[pages](https://www.gnu.org/software/make/manual/make.html#Options-Summary),
+while just the list of [`git`] subcommands is longer than can fit on a standard
+`80 x 24` terminal screen.
+
+```dvc
+$ git <TAB>
+add                  filter-branch        rebase
+am                   format-patch         reflog
+annotate             fsck                 relink
+...
+describe             prco                 unassume
+--More--
+```
+
+Notice the `--More--` at the bottom? That's the joy of pagination.
+
+Notice the `<TAB>` at the top? That represents actually pressing the tab key.
+Ah, the joy of shell tab completion.
+
+Tab completion is an indispensable part of writing anything on the command-line.
+Personally, I can't imagine trying to `git co` (aliased to `git checkout`) a
+branch without `<TAB>` to do the heavy lifting.
+[They say](https://en.wikipedia.org/wiki/Letter_frequency) "E" is the most
+common vowel, and "T" the most common consonant. My keyboard use probably looks
+more like this:
+
+![](../uploads/images/2020-07-27/key-frequencies.png 'Yes, I use vim =500')_My
+key usage_
+
+Now, there's a tool called `dvc` which is like [Git for data](https://dvc.org).
+It can be viewed as a cross-platform combination of [`git`] and [`make`]
+designed for handling big data and multiple cloud storage repositories, as well
+as tracking machine learning experiments. As you can imagine, supporting that
+many buzzwords means it also has a large number of subcommands and options.
+
+_Every time a new feature is added, maintainers and contributors have to update
+tab completion scripts for multiple supported shells. At best, it's a pain, and
+at worst, error-prone. If you've worked on maintaining CLI applications, you'll
+sympathise._
+
+Surely the parser code you've written is informative enough to automate tab
+completion? Surely you shouldn't have to maintain and synchronise separate tab
+completion scripts?
+
+Good news: [`shtab`] is a new tool which magically does all of this work.
+
+Any Python CLI application using [`argparse`], [`docopt`], or [`argopt`] can
+have tab completion for free!
+
+Simply hand your parser object to `shtab` (either via the CLI or the Python
+API), and a tab completion script will be generated for your preferred shell.
+It's as easy as:
+
+- CLI: `shtab --shell=bash myprogram.main.parser`, or
+- Python API: `import shtab; print(shtab.complete(parser, shell="bash"))`.
+
+### `argparse` example
+
+Suppose you have some code in a module `hello.main`:
+
+```python
+import argparse
+
+def get_main_parser():
+    parser = argparse.ArgumentParser(prog="hello")
+    parser.add_argument(
+        "who", help="good question", nargs="?", default="world")
+    parser.add_argument(
+        "--what", help="a better question", default="hello",
+        choices=["hello", "goodbye"])
+    return parser
+
+if __name__ == "__main__":
+    parser = get_main_parser()
+    args = parser.parse_args()
+    print("{}, {}!".format(args.what, args.who))
+```
+
+To get tab completion for `bash`, simply install [`shtab`] and then run:
+
+```bash
+shtab --shell=bash hello.main.get_main_parser \
+  | sudo tee "$BASH_COMPLETION_COMPAT_DIR"/hello >/dev/null
+```
+
+Zsh user? Not a problem. Simply run:
+
+```bash
+shtab --shell=zsh hello.main.get_main_parser \
+  | sudo tee /usr/local/share/zsh/site-functions/_hello >/dev/null
+# note the underscore `_` prefix in the filename
+```
+
+Handily you can install `shtab`'s own completions by following the above
+examples replacing `hello` with `shtab`.
+
+![](../uploads/images/2020-07-27/dvc.gif)_`shtab`-driven `dvc` completion in
+`bash` and `zsh`_
+
+Using `shtab`, here's what
+[`dvc`'s completion](https://dvc.org/doc/install/completion) looks like when
+installed:
+
+```dvc
+% dvc <TAB>
+Completing dvc commands
+add         -- Track data files or directories with DVC.
+cache       -- Manage cache settings.
+checkout    -- Checkout data files from cache.
+commit      -- Save changed data to cache and update DVC-files.
+completion  -- Prints out shell tab completion scripts.
+At Top: Hit TAB for more, or the character to insert
+```
+
+All completion suggestions guaranteed in-sync with the code! The maintainers of
+`dvc` were very surprised to find no less than
+[84 commits](https://github.com/iterative/dvc/commits/main/scripts/completion)
+touching their old completion scripts. Such churn is now a thing of the past!
+
+You might notice one of the subcommands provided by `dvc` is
+[`completion`](https://dvc.org/doc/install/completion). Here's a quick example
+of how to provide such convenience for users:
+
+### Integrating library example
+
+Feeling minimal? How about adding `import shtab` to your application itself for
+a cleaner user interface? And let's use [`argopt`] to convert [`docopt`]'s neat
+syntax to `argparse` while we're at it.
+
+```python
+"""Greetings and partings.
+
+Usage:
+  greeter [options] [<you>] [<me>]
+
+Options:
+  -g, --goodbye  : Say "goodbye" (instead of "hello")
+  -b, --print-bash-completion  : Output a bash tab-completion script
+  -z, --print-zsh-completion  : Output a zsh tab-completion script
+
+Arguments:
+  <you>  : Your name [default: Anon]
+  <me>  : My name [default: Casper]
+"""
+import sys, argopt, shtab
+
+parser = argopt.argopt(__doc__)
+if __name__ == "__main__":
+    args = parser.parse_args()
+    if args.print_bash_completion:
+        print(shtab.complete(parser, shell="bash"))
+        sys.exit(0)
+    if args.print_zsh_completion:
+        print(shtab.complete(parser, shell="zsh"))
+        sys.exit(0)
+
+    msg = "k thx bai!" if args.goodbye else "hai!"
+    print("{} says '{}' to {}".format(args.me, msg, args.you))
+```
+
+### Try it out
+
+There are many more options and features. The [documentation][`shtab`] includes
+examples of working with custom file completions and providing a `completion`
+subcommand when integrating more tightly with existing applications.
+
+Try it out with `pip install -U shtab` or `conda install -c conda-forge shtab`!
+
+Is it worth the time?
+
+![](https://imgs.xkcd.com/comics/is_it_worth_the_time.png)_It's worth it
+[xkcd#1205](https://xkcd.com/1205)_
+
+[`shtab`] would be on the second row, far left (maybe even off grid). It's worth
+spending days to get right yet only takes seconds to install.
+
+[`argopt`]: https://pypi.org/project/argopt
+[`argparse`]: https://docs.python.org/library/argparse
+[`docopt`]: https://pypi.org/project/docopt
+[`dvc`]: https://github.com/iterative/dvc
+[`git`]: https://git-scm.com
+[`make`]: https://en.wikipedia.org/wiki/Make_(software)
+[`shtab`]: https://github.com/iterative/shtab
diff --git a/content/blogs/2020-08-07-cml-self-hosted-runners-on-demand-with-gpus.md b/content/blogs/2020-08-07-cml-self-hosted-runners-on-demand-with-gpus.md
new file mode 100644
index 0000000000..c3c2636e29
--- /dev/null
+++ b/content/blogs/2020-08-07-cml-self-hosted-runners-on-demand-with-gpus.md
@@ -0,0 +1,178 @@
+---
+title: CML self-hosted runners on demand with GPUs
+date: 2020-08-07
+description: >
+  Use your own GPUs with GitHub Actions & GitLab for continuous machine
+  learning.
+descriptionLong: >
+  Training models often requires special hardware, like extra memory or GPUs.
+  How can we make a CI/CD pipeline with this hardware? Find out how to set up
+  your own self-hosted runners on-demand with GPUs for fast training.
+picture: 2020-08-07/header.png
+author: david_g_ortega
+commentsUrl: https://discuss.dvc.org/t/cml-self-hosted-runners-on-demand-with-gpus/462
+tags:
+  - CML
+  - CI/CD
+  - MLOps
+  - GPUs
+  - Self-hosted runners
+  - Reproducibility
+  - Tutorial
+---
+
+When creating your CI/CD workflow for a machine learning (ML) project, you might
+find that by default, neither GitHub Actions nor GitLab CI provides the
+computing capabilities you need- like GPUs, high memory instances, or multiple
+cores.
+
+To overcome this hardware hurdle, one practical approach is to use self-hosted
+runners: runners that you manage, but are accessible to your CI/CD system for
+executing jobs. It could be an EC2 instance or the GPU under your desk. In our
+[recently-released project](https://dvc.org/blog/cml-release), Continuous
+Machine Learning (CML), our Docker image acts as a thin wrapper over GitLab and
+GitHub runners, adding some extra capabilities.
+
+Here are some benefits of using CML with a self-hosted runner:
+
+1.  **Easy to use.** Working the same way for both GitLab and GitHub.
+
+2.  **Get out of dependency hell.** We tend to install packages (on top of
+    packages, on top of packages…) while we‘re experimenting with models. In ML
+    in particular, we can be dependent on drivers AND libraries, and sometimes
+    precise versions of them (CUDA and TensorFlow, anyone?). Your CI workflow
+    will install all the dependencies in the containerised runner leaving your
+    machine always clean.
+
+3.  **Security.** If your repo is public your runners could be accessed by
+    anyone that could add
+    [scripts that exploits your machine](https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners#self-hosted-runner-security-with-public-repositories).
+    With the containerised runner you are restricting the access to your real
+    machine.
+
+4.  **Gain reproducibility.** One of the biggest technical debts in the ML space
+    is reproducibility. A few weeks post-experiment, we often discover that
+    trying to put your model back in shape is a pain. Looking at our repo, it’s
+    not obvious what data or training infrastructure or dependencies went into a
+    given result. When you move your ML experiments into a CI/CD system you are
+    making a contract of the dependencies and hardware used for your experiment.
+    Having that contract isolated by the containerised runner, your experiment
+    is perfectly reproducible by anyone in the future.
+
+## Hands on GPU Self-hosted runners 101
+
+### 1) Install nvidia drivers and nvidia-docker in your machine (ubuntu 18.04)
+
+```dvc
+$ curl -s -L https://nvidia.GitHub.io/nvidia-docker/gpgkey | sudo apt-key add - && \
+  curl -s -L https://nvidia.GitHub.io/nvidia-docker/ubuntu18.04/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list && \
+  sudo apt update && sudo apt install -y ubuntu-drivers-common  && \
+  sudo ubuntu-drivers autoinstall  && \
+  sudo apt install -y nvidia-container-toolkit && \
+  sudo systemctl restart docker
+```
+
+You can test that your gpus are up and running with the following command:
+
+```dvc
+$ docker run --gpus all iterativeai/cml:0-dvc2-base1-gpu nvidia-smi
+```
+
+We should see something like this:
+![](../uploads/images/2020-08-07/nvidia-smi-output.png)
+
+### 2) Start your self-hosted runner
+
+With CML docker images launching your own self-hosted runner is very easy. These
+images have CML and DVC preinstalled (among other perks), plus CUDA drivers.
+That's all. You can clone these images and add your own dependencies to better
+mimic your own production environment.
+
+```dvc
+$ docker run --name myrunner -d --gpus all \
+    -e RUNNER_IDLE_TIMEOUT=1800 \
+    -e RUNNER_LABELS=cml,gpu \
+    -e RUNNER_REPO=$my_repo_url \
+    -e repo_token=$my_repo_token \
+    iterativeai/cml:0-dvc2-base1-gpu runner
+```
+
+where:
+
+`RUNNER_IDLE_TIMEOUT` is the time in seconds that the runner is going to be idle
+at most waiting for jobs to come, if no one comes the runner shuts down and
+unregisters from your repo.
+
+`RUNNER_LABELS` a comma delimited list of labels that we are setting in our
+workflow that the jobs will wait for.
+
+`RUNNER_REPO` is the url of your GitLab or GitHub repo. repo_token is the
+personal token generated for your GitHub or GitLab repo. Note that for GitHub
+you must check `workflow` along with `repo`.
+
+If everything went fine we should see a runner registered in our repo.
+
+![](../uploads/images/2020-08-07/registered-cml-runner-github.png)
+
+![](../uploads/images/2020-08-07/registered-cml-runner-gitlab.png)
+
+### 3) Setup your GitHub Actions or GitLab workflow yaml file to use the runner and commit your changes.
+
+GitLab
+
+```yaml
+train:
+  tags:
+    - cml
+    - gpu
+  script:
+    - echo 'Hi from CML!' >> report.md
+    - cml send-comment report.md
+```
+
+GitHub
+
+```yaml
+name: train-my-model
+on: [push]
+jobs:
+  train:
+    runs-on: [self-hosted, cml, gpu]
+    steps:
+      - uses: actions/checkout@v2
+      - name: cml_run
+        run: |
+          echo 'Hi from CML!' >> report.md
+          cml send-comment report.md
+```
+
+Congrats! At this point you have done all the steps to have your GPUs up and
+running with CML.
+
+# Limitations and future directions
+
+There are still some limitations to be solved at this stage:
+
+- GitHub Actions
+  [can’t run a workflow longer than 72 hours](https://docs.github.com/en/actions/getting-started-with-github-actions/about-github-actions#usage-limits).
+
+- Self-hosted runners
+  [don’t behave well when they disconnect from the repo](https://GitLab.com/GitLab-org/GitLab/-/issues/229851#note_390371734),
+  limiting the possibilities with preemptible instances (also known as spot
+  instances).
+
+We’re working on these issues (see issues
+[#161](https://github.com/iterative/cml/issues/161),
+[#174](https://github.com/iterative/cml/issues/174), and
+[#208](https://github.com/iterative/cml/issues/208)) both in terms of CML and
+DVC capabilities. So keep watching this space for updates!
+
+<hr />
+
+We started CML to help teams deal with the complexity of ML more effectively-
+continuous integration is a proven approach to keeping projects agile even as
+the team size, number of experiments, and number of dependencies increase.
+Treating experiments like potential new features in a software project opens up
+many possibilities for improving our engineering practices. We’re looking
+forward to an era when ML experiments can be created, logged, and merged into
+production-ready code in minutes, not days or weeks.
diff --git a/content/blogs/2020-08-10-august-20-dvc-heartbeat.md b/content/blogs/2020-08-10-august-20-dvc-heartbeat.md
new file mode 100644
index 0000000000..66af8f1b93
--- /dev/null
+++ b/content/blogs/2020-08-10-august-20-dvc-heartbeat.md
@@ -0,0 +1,216 @@
+---
+title: August ’20 Heartbeat
+date: 2020-08-10
+description: >
+  Catch our monthly updates- featuring the CML release, DVC meetup recap,  a new
+  video tutorial series, and the best reading about pipelines and DataOps.
+descriptionLong: >
+  Catch our monthly updates- featuring the CML release, DVC meetup recap, a new
+  video tutorial series, and the best reading about pipelines and DataOps.
+picture: 2020-08-10/header.png
+pictureComment: DeeVee avoids the summer sun at Mount Rainier National Park.
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/august-20-heartbeat/465
+tags:
+  - Heartbeat
+  - CML
+  - DVC
+  - Meetup
+---
+
+Welcome to our August roundup of cool news, new releases, and recommended
+reading in the MLOps world!
+
+## News
+
+### CML release
+
+At the beginning of July, we went live with a new project:
+[Continuous Machine Learning, or CML](https://cml.dev) for short. If you
+hadven't heard, CML is an open-source toolkit for adapting popular continuous
+integration systems like GitHub Actions and GitLab CI for machine learning and
+data science. This release marks a new stage for our organization: while CML can
+work with DVC, and both are built around Git, CML is designed for standalone
+use. That means we're supporting TWO projects now!
+
+![Threaten Ashley Olsen GIF](https://media.giphy.com/media/X5i2BoQeD9kWY/giphy.gif)
+
+Luckily, we received plenty of encouraging and helpful feedback following the
+CML release. CML was on the front page of Hacker News for most of release day!
+We also got
+[covered on Heise](https://www.heise.de/news/Machine-Learning-CML-schickt-Daten-und-Modelltraining-in-die-Pipeline-4841023.html),
+a popular German IT news source. I (Elle, a proud part of the CML team!) also
+gave a talk presenting our approach as part of the MLOps World meeting, which is
+now available for online viewing.
+
+https://youtu.be/yp0su5mOeko
+
+Of course, we're fielding lots of questions too! We've compiled some of the most
+common questions (and their answers!) in our last
+[Community Gems post](https://dvc.org/blog/july-20-community-gems), and CML
+developer [David G. Ortega](https://github.com/DavidGOrtega) has written a
+tutorial for a much-asked-for use case: doing
+[continuous integration with on-demand GPUs](https://dvc.org/blog/cml-self-hosted-runners-on-demand-with-gpus).
+
+If you have comments, questions, or feature requests about CML, we _really_ want
+to hear from you. A few ways to be in touch:
+
+- Open an [issue on the project repo](https://github.com/iterative/cml/issues)
+- Drop by the [CML Discord channel](https://discord.gg/bzA6uY7)
+- Send us [an email](mailto:support@dvc.org)
+
+### July Meetup
+
+Last week, we had another meetup!
+[DVC Ambassador Marcel](http://mribeirodantas.me/) kicked us off with a short
+talk about how he's using DVC as part of his causal modeling approach to
+bioinformatics. It's cool stuff. Then, I talked a bit about CML and did some
+live-coding. The beauty of live-coding is getting to answer questions in
+real-time, and if you're totally new to the idea of continuous integration (or
+want to understand how CML works with GitHub Actions/GitLab CI) seeing a project
+in-action is one of the best ways to learn.
+
+You can watch a recording of the meetup online now (it's lightly edited to
+remove some pesky Zoom trolls), and
+[join our Meetup group](https://www.meetup.com/DVC-Community-Virtual-Meetups) to
+get updates for the next one. In future meetups, we'd love to support community
+members sharing their work, so get in touch if you'd like to present.
+
+https://youtu.be/tnTPHG5seDs
+
+### New video series
+
+We're starting up some new YouTube features! If you haven't seen our channel,
+[check it out and consider subscribing](https://www.youtube.com/channel/UC37rp97Go-xIX3aNFVHhXfQ)
+for hands-on tutorials and demos. Our
+[first video introduced continuous integration and GitHub Actions](https://youtu.be/9BgIDqAzfuA),
+and the second showed
+[how to use DVC and free Google Drive storage to add external data storage to a GitHub project](https://youtu.be/kZKAuShWF0s).
+
+In the coming weeks, we'll be covering:
+
+- Using CML and GitHub Actions with hardware for deep learning, like on-premise
+  GPUs
+- Understanding Vega plots and making data viz part of your CI system
+- Some DVC basics to supplement our docs
+
+## From the community
+
+### SpaCy + DVC = ❤️
+
+We're huge fans of a recent Python Bytes episode featuring
+[Ines Montani](https://twitter.com/_inesmontani), founder of Explosion and one
+of the makers of the incredible SpaCy library for NLP (seriously, I have the
+highest recommendations for SpaCy).
+
+> My [@PythonBytes](https://twitter.com/pythonbytes) episode is out now!
+>
+> 🎙️ Listen here: [https://t.co/fHLF2hR4cM](https://t.co/fHLF2hR4cM)
+>
+> My picks of the week are:  
+> 🐙 TextAttack by @jxmorris12:
+> [https://t.co/jySYrtzzp8](https://t.co/jySYrtzzp8)  
+> 🦉 Data Version Control (DVC) [@DVCorg](https://twitter.com/DVCorg):
+> [https://t.co/3610F6kv8v](https://t.co/3610F6kv8v)  
+> 🐍 Built-in generic types in 3.9
+>
+> — Ines Montani 〰️ (@\_inesmontani)
+> [July 23, 2020](https://twitter.com/_inesmontani/status/1286222512762871808)
+
+Ines' episode discussed DVC, and DVC is going to be integrated with SpaCy in
+their 3.0 release. SpaCy + DVC is going to be a powerhouse and we can't wait.
+
+### Take a stab at shtab
+
+Another cool software project: [Casper da Costa-Luis](https://cdcl.ml), DVC
+contributor and creator of the popular
+[tqdm library](https://github.com/tqdm/tqdm), has published a tab-completion
+script generator for Python applications! `shtab`, as it's called, was
+originally designed for DVC, but Casper developed it into a generic tool that
+can be used for virtually any Python CLI application. Check out
+[`shtab` on GitHub](https://github.com/iterative/shtab) and read the release
+blog.
+
+<external-link
+href="https://dvc.org/blog/shtab-completion-release"
+title="(Tab) Complete Any Python Application in 1 Minute or Less"
+description="We've made a painless tab-completion script generator for Python applications!"
+link="dvc.org"
+image="../uploads/images/2020-08-10/shtab.png"/>
+
+### DVC 1.0 migration script
+
+Our friends at [DAGsHub](https://dagshub.com/) have released a script to help
+DVC users upgrade their pipelines to the new DVC 1.0 format! Says Simon, a
+DAGsHub engineer, in his tutorial:
+
+> In this post, I'll walk you through the process of migrating your existing
+> project from DVC ≤ 0.94 to DVC 1.X using a single automated script, and then
+> demonstrate a way to check that your migration was successful.
+
+Read the blog and get migrating (but don't worry if you can't; DVC 1.0 is
+backwards compatible). <external-link
+href="https://towardsdatascience.com/automatically-migrate-your-project-from-dvc-0-94-to-dvc-1-x-416a5b9e837b"
+title="Automatically migrate your project from DVC≤ 0.94 to DVC 1.x"
+description="Migrating your project from DVC ≤ 0.94 to DVC 1.x can be a very involved process. Here’s an easy way to do it."
+link="medium.com"
+image="../uploads/images/2020-08-10/dagshub.jpg"/>
+
+### Recommended reading
+
+Here are some of our favorite blogs from around the internet 🌏.
+
+- [Déborah Mesquita](https://deborahmesquita.com/), data scientist (and an
+  excellent writer to follow), published a tutorial about DVC pipelines that is
+  truly deserving of the moniker "ultimate guide". It's a start-to-finish case
+  study about a typical machine learning project, with DVC pipelines to automate
+  everything from grabbing the data to training and evaluating a model. Also, it
+  comes with a video tutorial if you prefer to watch instead of read!
+
+<external-link
+href="https://towardsdatascience.com/the-ultimate-guide-to-building-maintainable-machine-learning-pipelines-using-dvc-a976907b2a1b"
+title="The ultimate guide to building maintainable Machine Learning pipelines using DVC"
+description="Learn the principles for building maintainable Machine Learning pipelines using DVC"
+link="medium.com"
+image="../uploads/images/2020-08-10/deborah.jpg"/>
+
+- Software engineer
+  [Vaithy Narayanan](https://www.linkedin.com/in/vaithyanathan/) created the
+  first ever ☝️ CML user blog! Vaithy created a pipeline that covers data
+  collection to model training and testing, and used CML to automate the
+  pipeline execution whenever the project's GitHub repository is updated. He
+  ends with some insightful discussion about the strengths and weaknesses of the
+  approach.
+
+<external-link
+href="https://medium.com/@karthik.vaithyanathan/using-continuous-machine-learning-to-run-your-ml-pipeline-eeeeacad69a3"
+title="Using Continuous Machine Learning to Run Your ML Pipeline"
+description="Vaithy Narayanan"
+link="medium.com"
+image="../uploads/images/2020-08-10/vaithy.jpg"/>
+
+- [Ryan Gross](https://www.linkedin.com/in/ryan-w-gross/), a VP at Pariveda
+  Solutions, blogged about the future of data governance and the lessons from
+  DevOps that might save the day. Honestly, you should probably start reading
+  for this cover image alone.
+
+  ![](../uploads/images/2020-08-10/dataops.png) _DataOps is accurately depicted
+  as a badass flaming eagle._ Check out the blog here:
+
+<external-link
+href="https://towardsdatascience.com/the-rise-of-dataops-from-the-ashes-of-data-governance-da3e0c3ac2c4"
+title="The Rise of DataOps (from the ashes of Data Governance)"
+description="Legacy Data Governance is broken in the ML era. Let’s rebuild it as an engineering discipline to drive orders-of-magnitude improvements."
+link="medium.com"
+image="../uploads/images/2020-08-10/ryan.png"/>
+
+And, there's a
+[noteworthy counterpoint](https://locallyoptimistic.com/post/git-for-data-not-a-silver-bullet/?utm_campaign=Data_Elixir&utm_source=Data_Elixir_298)
+by
+[Michael Kaminsky](https://www.linkedin.com/in/michael-the-data-guy-kaminsky/).
+Read them both!
+
+Thanks everyone, that's it for this month. We hope you're staying safe and
+making cool things!
+
+![Reaction GIF by MOODMAN](https://media.giphy.com/media/35EsMpEfGHkVoHbNTU/giphy.gif)
diff --git a/content/blogs/2020-08-27-august-20-community-gems.md b/content/blogs/2020-08-27-august-20-community-gems.md
new file mode 100644
index 0000000000..1650a45fda
--- /dev/null
+++ b/content/blogs/2020-08-27-august-20-community-gems.md
@@ -0,0 +1,194 @@
+---
+title: August '20 Community Gems
+date: 2020-08-27
+description: >
+  A roundup of technical Q&A's from the DVC community. This month, we discuss
+  using CI/CD to validate models, advanced DVC pipeline scenarios, and how CML
+  adds pictures to your GitHub and GitLab comments.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month, we discuss
+  using CI/CD to validate models, advanced DVC pipeline scenarios, and how CML
+  adds pictures to your GitHub and GitLab comments.
+picture: 2020-08-27/Gems_Aug_20.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/august-20-community-gems/477
+tags:
+  - Community Gems
+  - CML
+  - Hyperparameters
+  - Git LFS
+  - Pipelines
+---
+
+Here are some of our top Q&A's from around the community. With the launch of
+[CML](https://cml.dev) earlier in the month, we've got some new ground to cover!
+
+## DVC questions
+
+### [Q: What's the relationship between the DVC remote and cache? If I have an external cache, do I really need a DVC remote?](https://discordapp.com/channels/485586884165107732/563406153334128681/747588572479094866)
+
+You can think of your DVC remote similar to your Git remote, but for data and
+model artifacts- it's a place to backup and share artifacts. It also gives you
+methods to push and pull those artifacts to and from your team.
+
+Your DVC cache (by default, it's located in `.dvc/cache`) serves a similar
+purpose to your Git objects database (which is by default located in
+`.git/objects`). They're both _local_ caches that store files (including various
+versions of them) in a content-addressable format, which helps you quickly
+checkout different versions to your local workspace. The difference is that
+`.dvc/cache` is for data/model artifacts, and `.git/objects` is for code.
+
+Usually, your DVC remote is a superset of `.dvc/cache`- everything in your cache
+is a copy of something in your remote (though there may be files in your DVC
+remote that are not in your cache (and vice versa) if you have never attempted
+to `push` or `pull` them locally).
+
+In theory, if you are using an
+[external cache](https://dvc.org/doc/use-cases/fast-data-caching-hub#example-shared-development-server)-
+meaning a DVC cache configured on a separate volume (like NAS, large HDD, etc.)
+outside your project path- and all your projects and all your teammates use that
+external cache, and you _know_ that the storage is highly reliable, you don't
+need to also have a DVC remote. If you have any doubts about access to your
+external cache or its reliability, we'd recommend also keeping a remote.
+
+### [Q: One of my files is an output of a DVC pipeline, and I want to track this file with Git and store it in my Git repository since it isn't very big. How can I make this work?](https://discordapp.com/channels/485586884165107732/563406153334128681/732308317627613235)
+
+Yes! There are two approaches. We'll be assuming you have a pipeline stage that
+outputs a file, `myfile`.
+
+- If you haven't declared the pipeline stage with `dvc run` yet, then you'll do
+  it like this:
+
+```dvc
+$ dvc run -n <stage name> -d <dependency> -O myfile
+```
+
+Note that instead of using the flag `-o` for specifying the output `myfile`,
+we're using `-O`- it's shorthand for `--outs-no-cache`. You can
+[read about this flag in our docs](https://dvc.org/doc/command-reference/run#options).
+
+- If you've already created your pipeline stage, go into your `dvc.yaml` and
+  manually add the field `cache: false` to the stage as follows:
+
+```yaml
+outs:
+  - myfile:
+      cache: false
+```
+
+Please note one special case: if you previously enabled hardlinks or symlinks in
+DVC via `dvc config cache`, you may need to run `dvc unprotect myfile` to fully
+unlink `myfile` from your DVC cache. If you haven't enabled these types of file
+links (and if you're not sure, _you probably didn't!_), this step is unncessary.
+[See our docs for more.](https://dvc.org/doc/command-reference/unprotect)
+
+### [Q: Can I change my `params.yaml` file to a `.json`?](https://discordapp.com/channels/485586884165107732/563406153334128681/730614265051873370)
+
+Yes, this is straightforward- you change your `params.yaml` to `params.json` in
+your workspace, and then use it in `dvc run`:
+
+```dvc
+$ dvc run -p params.json:myparam ...
+```
+
+Alternately, if your pipeline stage has already been created, you can manually
+edit your `dvc.yaml` file to replace `params.yaml` with `params.json`.
+
+For more about the `params.yaml` file,
+[see our docs](https://dvc.org/doc/start/experiments#defining-parameters).
+
+### [Q: Is there a guide for migrating from Git-LFS to DVC?](https://discordapp.com/channels/485586884165107732/485596304961962003/743559246599421974)
+
+We don't know of any published guide. One of our users shared their procedure
+for disabling LFS:
+
+```dvc
+$ git lfs uninstall
+$ git rm .gitattributes
+$ git rm .lfsconfig
+```
+
+Then you can `dvc add` files you wish to put in DVC tracking, and `dvc push`
+them to your remote. After that, `git commit` and you're good!
+
+Note that, if you're going to delete any LFS files, make sure you're certain the
+corresponding data has been transferred to DVC.
+
+### [Q: Is there a way to use DVC and CML to validate a model in a GitHub Action, without making the validation data available to the user opening the Pull Request?](https://discordapp.com/channels/485586884165107732/485596304961962003/739202123295883325)
+
+We don't have special support for this use case, and there may be some security
+downsides to using a confidential validation dataset with someone else's code
+(be sure nothing in their code could expose your data!). But, there are ways to
+implement this if you're sure about it.
+
+One possible approach is to create a separate "data registry" repository using a
+private cloud bucket to store your validation dataset
+([see our docs about the why and how of data registries](https://dvc.org/doc/use-cases/data-registries#data-registries)).
+Your CI system can be setup to have access to the data registry via secrets
+(called "variables" in GitLab). Then when you run validation via
+`dvc repro validate`, you could use `dvc get` to pull the private data from the
+registry.
+
+The data is never exposed to the user in an interactive setting, only on the
+runner- and there it's ephemeral, meaning it does not exist once the runner
+shuts down.
+
+## CML questions
+
+### [Q: Sometimes when I make a commit on a branch, my CI workflow isn't triggered. What's going on?](https://www.youtube.com/watch?v=9BgIDqAzfuA&lc=UgwKIYsCo194AErdeBJ4AaABAg)
+
+If your workflow is set to trigger on a push (as in the CML use cases), it isn't
+enough to `git commit` locally- you need to push to your GitHub or GitLab
+repository. If you want every commit to trigger your workflow, you'll need to
+push each one!
+
+What about if you _don't_ want a push to trigger your worfklow? In GitLab, you
+can use the
+[`[ci skip]` flag](https://docs.gitlab.com/ee/ci/yaml/#skip-pipeline)- make sure
+your commit message contains `[ci skip]` or `[skip ci]`, and GitLab CI won't run
+the pipeline in your `gitlab-ci.yml` file.
+
+In GitHub Actions, this flag isn't supported, so you can manually kill any
+workflows in the Actions dashboard. For a programmatic fix,
+[check out this workaround by Tim Heuer](https://timheuer.com/blog/skipping-ci-github-actions-workflows/).
+
+### [Q: Can I do the bulk of my model training outside of my CI system, and then share the result with CML?](https://twitter.com/peterkuai/status/1295899690404175872)
+
+Definitely! This is a desirable workflow in several cases:
+
+- You have a preferred approach for experiment tracking (for example, DVC or
+  MLFlow) that you want to keep using
+- You don't want to set up a self-hosted runner to connect your computing
+  resources to GitHub or GitLab
+- Training time is on the order of days or more
+
+CML is very flexible, and one strong use case is for sanity checking and
+evaluating a model in a CI system post-training. When you have a model that
+you're satisifed with, you can check it into your CI system and use CML to
+evaluate the model in a production-like environment (such as a custom Docker
+container), report its behavior and informative metrics. Then you can decide if
+it's ready to be merged into your main branch.
+
+### [Q: Can I make a CML report comparing models across different branches of a project?](https://github.com/iterative/cml/issues/188)
+
+Definitely. This is what `dvc metrics diff` is for- like a `git diff`, but for
+model metrics instead of code. We made a video about how to do this in CML!
+
+https://youtu.be/xPncjKH6SPk
+
+### [Q: In the function `cml publish`, it looks like you're uploading published files to `https://asset.cml.dev`. Why don't you just save images in the Git repository?](https://discordapp.com/channels/485586884165107732/728693131557732403/745168931521822740)
+
+If an image file is created as part of your workflow, it's ephemeral- it doesn't
+exist outside of your CI runner, and will disappear when your runner is shut
+down. To include an image in a GitHub or GitLab comment, a link to the image
+needs to persist. You could commit the image to your repository, but typically,
+[it's undesireable to automatically commit results of a CI workflow](https://stackoverflow.com/questions/61245284/is-it-necessary-to-commit-dvc-files-from-our-ci-pipelines).
+
+We created a publishing service to help you host files for CML reports. Under
+the hood, our service uploads your file to an S3 bucket and uses a key-value
+store to share the file with you.
+
+This covers a lot of cases, but if the files you wish to publish can't be shared
+with our service for security or privacy reasons, you can emulate the
+`cml publish` function with your own storage. You would push your file to
+storage and include a link to its address in your markdown report.
diff --git a/content/blogs/2020-09-09-september-20-dvc-heartbeat.md b/content/blogs/2020-09-09-september-20-dvc-heartbeat.md
new file mode 100644
index 0000000000..44e8dce7c5
--- /dev/null
+++ b/content/blogs/2020-09-09-september-20-dvc-heartbeat.md
@@ -0,0 +1,188 @@
+---
+title: September ’20 Heartbeat
+date: 2020-09-09
+description: >
+  This month, catch us on the Software Engineering Daily Podcast, check out our
+  favorite DVC and CML tutorials and projects, and  celebrate 1000 YouTube
+  subscribers!
+descriptionLong: >
+  This month, catch us on the Software Engineering Daily Podcast, check out our
+  favorite DVC and CML tutorials and projects, and celebrate 1000 YouTube
+  subscribers!
+picture: 2020-09-09/header.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/september-20-heartbeat/488
+tags:
+  - Heartbeat
+  - CML
+  - DVC
+  - R
+  - Meetup
+  - Videos
+---
+
+## News
+
+### Dmitry on Software Engineering Daily
+
+Our CEO Dmitry Petrov was interviewed on the much-beloved Software Engineering
+Daily podcast! Host [Jeff Meyerson](https://twitter.com/the_prion) kicked off
+the discussion:
+
+> Code is version controlled through Git, the version control system originally
+> built to manage the Linux codebase. For decades, software has been developed
+> using git for version control. More recently, data engineering has become an
+> unavoidable facet of software development. It is reasonable to ask–why are we
+> not version controlling our data?
+
+For the rest of the episode, listen here!
+
+<external-link
+href="https://softwareengineeringdaily.com/2020/08/24/data-version-control-with-dmitry-petrov/"
+title="Data Version Control with Dmitry Petrov"
+description=""
+link="softwareengineeringdaily.com"
+image="../uploads/images/2020-09-09/sedaily.jpeg"/>
+
+### Contributor's meetup
+
+Last week, we held a meetup for contributors to DVC! Core maintainer
+[Ruslan Kupriev](https://github.com/efiop) hosted a get-together for folks who
+contribute new features, bug fixes, and more to the community. If you missed it,
+you can watch it on YouTube.
+
+https://youtu.be/jUYSTERXxWg
+
+### New videos
+
+We've released several new videos to our growing
+[YouTube channel](https://www.youtube.com/channel/UC37rp97Go-xIX3aNFVHhXfQ)- and
+cool news, we passed 1,000 subscribers! The support has been surprising in the
+best way possible. We're seeing a lot of repeat commenters and folks from the
+DVC meetups! It's been so rewarding to get positive feedback from the community
+and we're planning to build our YouTube presence even more.
+
+![Happy GIF](https://media.giphy.com/media/ZE0JppdERv8t4jVCAt/giphy.gif)
+
+_Even Skeletor finds joy in this._
+
+We now have 4 tutorials in our MLOps series. In the latest, we cover how to use
+your own GPU (on-premise or in the cloud) to run GitHub Actions workflows. Check
+it out and give it a try, the code examples are freely available :)
+
+https://youtu.be/rVq-SCNyxVc
+
+We also made our first ever "explainer" video to talk through how DVC works in
+five minutes.
+
+https://youtu.be/UbL7VUpv1Bs
+
+As always, video requests are welcome! Reach out and let us know what topics and
+tutorials you want to see covered. And we appreciate any likes, shares, and
+subscribes on our growing YouTube channel.
+
+## From the community
+
+### A three-part CML series (featuring R!)
+
+DVC ambassador [Marcel Ribeiro-Dantas](https://twitter.com/mribeirodantas) has
+published two of three tutorial blogs in a series on CML! Marcel's use case is
+especially cool because he's using R, plus some causal modeling related to his
+work in bioinformatics, with GitHub Actions.
+
+In Part I, Marcel introduces his project and how he uses DVC, CML and GitHub
+Actions together (with his custom R library).
+
+<external-link
+href="https://mribeirodantas.xyz/blog/index.php/2020/08/10/continuous-machine-learning/"
+title="Continuous Machine Learning - Part I"
+description="by Marcel Ribeiro-Dantas"
+link="mribeirodantas.xyz"
+image="../uploads/images/2020-09-09/MLOps.png"/>
+
+In Part II, Marcel takes a deeper dive into Docker. He explains how to create a
+your own Docker image and test it. This case should be helpful for folks who
+want to include the CML library in their own Docker container.
+
+<external-link
+href="https://mribeirodantas.xyz/blog/index.php/2020/08/18/continuous-machine-learning-part-ii/"
+title="Continuous Machine Learning - Part II"
+description="by Marcel Ribeiro-Dantas"
+link="mribeirodantas.xyz"
+image="../uploads/images/2020-09-09/docker_logo.png"/>
+
+### Real Python talks DVC
+
+[Kristijan Ivancic](https://twitter.com/kristijan_ivanc) of
+[Real Python](realpython.com), a library of online Python tutorials and lessons,
+created a _seriously_ impressive DVC tutorial (this thing is a beast 🐺- it has
+a table of contents!)
+
+![](../uploads/images/2020-09-09/Real_Python.png)_How cool is this artwork?_
+
+And, the Real Python podcast discussed their DVC tutorial (plus the joys of
+version control for data!) on a recent episode.
+
+<external-link
+href="https://realpython.com/podcasts/rpp/25/"
+title="Episode 25: Data Version Control in Python and Real Python Video Transcripts"
+description="The Real Python Podcast"
+link="realpython.com"
+image="../uploads/images/2020-09-09/podcast_log.png"/>
+
+### Recommended reading
+
+There's a lot of cool stuff happening out there in the data science world 🌏!
+
+- [Fabiana Clemente](https://twitter.com/fab_clemente), Chief Data Officer of
+  [YData](https://ydata.ai/), published a blog for The Startup about four
+  reasons to start using data version control- and, with her expertise in data
+  privacy, she's especially well-qualified to explain the role of DVC in
+  compliance and auditing! Check out her blog (it comes with a quick-start
+  tutorial, too).
+
+<external-link
+href="https://medium.com/swlh/4-reasons-why-data-scientists-should-version-data-672aca5bbd0b"
+title="4 reasons why data scientists should version data"
+description="How to start data versioning using DVC"
+link="medium.com"
+image="../uploads/images/2020-09-09/fabiana.jpg"/>
+
+- Ryzal Kamis at the [AI Singapore Makerspace](makerspace.aisingapore.org)
+  shared a blog (the first of two!) about creating end-to-end CI/CD workflows
+  for machine learning. In his first blog, Ryzal gives a high-level overview of
+  the need for data version control and compares several tools in the space.
+  Then he gives a walkthrough (quite easy to follow!) of how DVC fits in his
+  workflow. We're eagerly awaiting the second installment of this series, which
+  promises to bring more advanced automation scenarios and a CI/CD pipeline.
+
+<external-link
+href="https://makerspace.aisingapore.org/2020/08/data-versioning-for-cd4ml-part-1/"
+title="Data Versioning for CD4ML"
+description="Part 1"
+link="makerspace.aisingapore.org"
+image="../uploads/images/2020-09-09/singapore.jpg"/>
+
+- [Isaac Sacolick](https://www.infoworld.com/author/Isaac-Sacolick/),
+  contributing editor at InfoWorld, penned an article about the growing field of
+  MLOps and its role in data-driven businesses. He writes:
+
+> Too many data and technology implementations start with poor or no problem
+> statements and with inadequate time, tools, and subject matter expertise to
+> ensure adequate data quality. Organizations must first start with asking smart
+> questions about big data, investing in dataops, and then using agile
+> methodologies in data science to iterate toward solutions.
+
+Read the rest here:
+
+<external-link
+href="https://www.infoworld.com/article/3570716/mlops-the-rise-of-machine-learning-operations.html"
+title="MLops: The rise of machine learning operations"
+description="Once machine learning models make it to production, they still need updates and monitoring for drift. A team to manage ML operations makes good business sense"
+link="infoworld.com"
+image="../uploads/images/2020-09-09/infoworld.png"/>
+
+Thanks everyone, that's a wrap for this month. Be safe, stay in touch, and get
+ready for pumpkin spice latte season 🎃.
+
+![Cat Fall GIF](https://media.giphy.com/media/EDpVRPFK5bjfq/giphy.gif)
diff --git a/content/blogs/2020-09-28-september-20-community-gems.md b/content/blogs/2020-09-28-september-20-community-gems.md
new file mode 100644
index 0000000000..24cc5f0a77
--- /dev/null
+++ b/content/blogs/2020-09-28-september-20-community-gems.md
@@ -0,0 +1,208 @@
+---
+title: September '20 Community Gems
+date: 2020-09-28
+description: >
+  A roundup of technical Q&A's from the DVC community. This month, we discuss
+  customizing your DVC plots, the difference between external dependencies and
+  outputs, and how to save models and data in CI.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month, we discuss
+  customizing your DVC plots, the difference between external dependencies and
+  outputs, and how to save models and data in CI.
+picture: 2020-09-28/Gems_Sept_20.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/september-20-community-gems/512
+tags:
+  - Community Gems
+  - CML
+  - Hyperparameters
+  - External Data
+  - SSH
+  - Vega
+---
+
+## DVC questions
+
+### [Q: When I try to push to my DVC remote, I get an error about my SSH-RSA keys. What's going on?](https://discordapp.com/channels/485586884165107732/485596304961962003/748735263634620518)
+
+If you're using DVC with an SSH-protected remote, DVC uses a Python library
+called `paramiko` to create a connection to your remote. There is a
+[known issue](https://stackoverflow.com/questions/51955990/base64-decoding-error-incorrect-padding-when-loading-putty-ppk-private-key-to)
+that `paramiko` expects RSA keys in OpenSSH key format, and can throw an error
+if the keys are in an alternative format (such as default PuTTY formatted keys).
+If this is the case, you'll likely see:
+
+```
+ERROR: unexpected error - ('... ssh-rsa ...=', Error('Incorrect padding',))
+```
+
+To fix this, convert your RSA key to the OpenSSH format. Tools like
+[PuTTYgen](https://www.puttygen.com/) and
+[MobaKeyGen](https://mobaxterm.mobatek.net/) can help you do this.
+
+### [Q: Can I have multiple `param.yaml` files in a project?](https://discordapp.com/channels/485586884165107732/563406153334128681/753322309942509578)
+
+Yes, you can have as many separate parameter files as you'd like. It's only
+important that they are correctly specified in your DVC pipeline stages.
+
+For example, if you have files `params_data_processing.yaml` and
+`params_model.yaml` in your project (perhaps to store hyperparameters of your
+data processing and model fitting stages, respectively), you'll want to call the
+right file at each stage. For example:
+
+```dvc
+$ dvc run -n preprocess \
+   -p params_data_process.yaml:param1,param2,...
+```
+
+### [Q: Is there a way to automatically produce SVG plots from `dvc plot`? I don't like having to click through the Vega-Lite GUI to get an SVG, and my plots look so small when I access them in the browser.](https://discordapp.com/channels/485586884165107732/563406153334128681/750012082149392414)
+
+If your DVC plots (and by DVC plots, we mean Vega-Lite plots 😉) look small in
+your browser, you can modify this programmatically! DVC generates Vega-Lite
+plots by way of a few templates that come pre-loaded. The templates are in
+`.dvc/plots` (assuming you're in a DVC directory).
+
+Find the template that corresponds to your plot (if you didn't specify a plot
+type in your CLI command, it's probably `default.json`) and modify the `height`
+and `width` paramters. Then save your changes.
+
+For more about how to modify your plot templates, check out the
+[Vega docs](https://vega.github.io/vega/docs/specification/). If you're
+considering making a whole new template that's custom for your data viz needs,
+[we've got docs on that](https://dvc.org/doc/command-reference/plots#custom-templates),
+too.
+
+One last tip: did you know about the
+[Vega-Lite CLI](https://anaconda.org/conda-forge/vega-lite-cli)? It provides
+functions for converting Vega-Lite plots to `.pdf`,`.png`,`.svg`, and `.vg`
+(Vega) formats. To use this approach with DVC, you'll want to use the
+`--show-vega` flag to print your plot specification to a `.json` file.
+
+```dvc
+$ dvc plots --show-vega  > vega.json
+$ vl2svg vega.json
+```
+
+### [Q: I'm confused about external dependencies and outputs. What's the difference?](https://discordapp.com/channels/485586884165107732/485596304961962003/752478399326453840)
+
+In short, external outputs and dependencies are files or directories that are
+tracked by DVC, but physically reside outside of the local workspace. This could
+happen for a few reasons:
+
+- You want to version a dataset in cloud storage that is too large to transfer
+  to your local workspace efficiently
+- Your DVC pipeline writes directly to cloud storage
+- Your DVC pipeline depends on a dataset or other file in cloud storage
+
+An **external output** is declared in two ways: for example, if you have a file
+`data.csv` in S3 storage, you can use
+`dvc add --external s3://mybucket/data.csv` to begin DVC tracking the file
+([there are plenty more details and tips about managing external data in our docs](https://dvc.org/doc/user-guide/managing-external-data))).
+You can also declare `data.csv` as an output of a DVC pipeline with
+`dvc run -o s3://mybucket/data.csv`.
+
+An **external dependency** is a dependency of a DVC pipeline that resides in
+cloud storage. It's declared with the syntax
+`dvc run -d s3://mybucket/data.csv`.
+
+One other difference to note: DVC doesn't cache external dependencies; it merely
+checks if they have changed when you run `dvc repro`. On the other hand, DVC
+_does_ cache external outputs. You'll want to set up an
+[external cache](https://dvc.org/doc/user-guide/how-to/share-a-dvc-cache#configure-the-shared-cache)
+in the same remote location where your files are stored. This is because the
+default cache location (in your local workspace) no longer makes sense when the
+dataset never "visits" your local workspace! An external cache works largely the
+same as a typical cache in your workspace.
+
+## CML questions
+
+### [Q: How can I use CML with my own Docker container?](https://discordapp.com/channels/485586884165107732/728693131557732403/757553135840526376)
+
+In many of our CML docs and videos, we've shown how to get CML on your CI
+(continuous integration) runner via a Docker container that comes with
+everything installed. But this is not the only way to use CML, especially if you
+want workflows to run in your own Docker container.
+
+You can install CML via `npm`, either in your own Docker container or in your CI
+workflow (i.e., in your GitHub Actions `.yaml` or GitLab CI `.yml` workflow
+file).
+
+To install CML as a package, you'll want to run:
+
+```bash
+$ npm i -g @dvcorg/cml
+```
+
+Note that you may need to install additional dependencies if you want to use DVC
+plots and Vega-Lite commands:
+
+```bash
+$ sudo apt-get install -y libcairo2-dev libpango1.0-dev libjpeg-dev libgif-dev \
+          librsvg2-dev libfontconfig-dev
+$ npm install -g vega-cli vega-lite
+```
+
+If you're installing CML as part of your workflow, you may need to install Node
+first-
+[check out our docs](https://github.com/iterative/cml#install-cml-as-a-package)
+for how to do this in GitHub Actions and GitLab CI.
+
+### [Q: After running a GitHub Action workflow that runs a DVC pipeline, I want to save the output of the pipeline. Why doesn't CML automatically save the output?](https://discordapp.com/channels/485586884165107732/728693131557732403/757686601953312988)
+
+By design, artifacts generated in a CI workflow aren't saved anywhere- they
+disappear as soon as the runner shuts down. So a DVC pipeline executed in your
+CI system might produce outputs, like transformed datasets and model files, that
+will be lost at the end of the run. If you want to save them, there are a few
+methods.
+
+One approach is with auto-commits: a `git commit` at the end of your CI workflow
+to commit any new artifacts to your Git repository. However, auto-commits have a
+lot of downsides- they don't make sense for a lot of users, and generally, it's
+better to re-create outputs as needed than save them forever in your Git repo.
+
+We created the DVC `run-cache` in part
+[to solve this issue](https://stackoverflow.com/questions/61245284/is-it-necessary-to-commit-dvc-files-from-our-ci-pipelines).
+Here's how it works: you'll setup a DVC remote with access credentials passed to
+your GitHub Action/GitLab CI via CML (see, for example,
+[this workflow](https://github.com/iterative/cml_dvc_case/blob/master/.github/workflows/cml.yaml)).
+Then you'll use the following protocol in your CI workflow (your workflow config
+file in GitHub/GitLab):
+
+```dvc
+$ dvc pull --run-cache
+$ dvc repro
+$ dvc push --run-cache
+```
+
+When you use this design, any artifacts of `dvc repro`, such as models or
+transformed datasets, will be saved in DVC storage and indexed by the pipeline
+version that generated them. You can access them in your local workspace by
+running
+
+```dvc
+$ dvc pull --run-cache
+$ dvc repro
+```
+
+While we think this is ideal for typical data science and machine learning
+workflows, there are other approaches too- if you want to go deeper exploring
+auto-commits, checkout the
+[Add & Commit GitHub Action](https://github.com/marketplace/actions/add-commit).
+
+### [Q: What can CML do that Circle CI can't do?](https://www.youtube.com/watch?v=9BgIDqAzfuA&lc=Ugylt6QR5ClmD8uHe4B4AaABAg)
+
+To be clear, CML isn't a competitor to Circle CI. Circle CI is more analogous to
+GitHub Actions or GitLab CI; it's a continuous integration system.
+
+CML is a toolkit that works with a continuous integration system to 1) provide
+big data management (via DVC & cloud storage), 2) help you write model metrics
+and data viz to comments in GitHub/Lab, and 3) orchestrate cloud resources for
+model training and testing. Currently, CML is only available for GitHub Actions
+and GitLab CI.
+
+So to sum it up: CML is not a standalone continuous integration system! It's a
+toolkit that works with existing systems, which in the future could include
+Circle CI, Jenkins, Bamboo, Azure DevOps Pipelines, and Travis CI. Feel free to
+[open a feature request ticket](https://github.com/iterative/cml/issues), or
+leave a 👍 on open requests, to "vote" for the integrations you'd like to see
+most.
diff --git a/content/blogs/2020-10-12-october-20-dvc-heartbeat.md b/content/blogs/2020-10-12-october-20-dvc-heartbeat.md
new file mode 100644
index 0000000000..291b075a63
--- /dev/null
+++ b/content/blogs/2020-10-12-october-20-dvc-heartbeat.md
@@ -0,0 +1,164 @@
+---
+title: October ’20 Heartbeat
+date: 2020-10-12
+description: >
+  This month, hear about our international talks,  new video docs on our YouTube
+  channel,  and the best tutorials from our community.
+descriptionLong: >
+  This month, hear about our international talks, new video docs on our YouTube
+  channel, and the best tutorials from our community.
+picture: 2020-10-12/cover.png
+pictureComment:
+  Double DeeVee! One of these birds is on a layover before heading to Germany.
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/october-20-heartbeat/527
+tags:
+  - Heartbeat
+  - CML
+  - DVC
+  - Tutorial
+  - Conference
+  - Meetup
+  - YouTube
+---
+
+## News
+
+### Paweł gets ready to speak at Poland's largest data science meeting
+
+DVC developer Paweł Redzyński (he's written a lot of the code behind
+`dvc plots`) is giving at talk at the [Data Science Summit](https://dssconf.pl/)
+in Poland! The virtual meeting is on October 16, but talks are available for
+streaming on demand up to a week before. Paweł's talk is part of the DataOps &
+Development track, where he'll be sharing about CML and GitHub Actions (note
+that it'll be delivered in English).
+
+[![](../uploads/images/2020-10-12/dss.png)](https://dssconf.pl)
+
+### Dmitry talks at Data Engineering Melbourne
+
+CEO
+[Dmitry Petrov dropped into the Data Engineering Melbourne meetup](https://www.meetup.com/Data-Engineering-Melbourne/events/267033998/)
+to talk about Data Versioning and DataOps! He spoke about the differences
+between end-to-end platforms and ecosystems of tools, and how this distinction
+informs the development of software like DVC and CML (hint: we picked tools over
+platforms).
+
+Keep an eye on this meetup, which is now accessible to folks on all continents
+thanks to the magic of the internet :)
+
+<external-link
+href="https://www.meetup.com/Data-Engineering-Melbourne/"
+title="Data Engineering Melbourne"
+description="Dmitry Petrov presents on DataOps and versioning."
+link="meetup.com"
+image="../uploads/images/2020-10-12/Meetup_Logo.png"/>
+
+### Elle has talks at PyCon India and PyData Global
+
+Last week I gave a talk about CML at
+[PyCon India](https://in.pycon.org/cfp/2020/proposals/how-to-make-continuous-integration-work-with-machine-learning~avK5b/),
+and have another one coming up at
+[PyData Global](https://global.pydata.org/talks/321) this November 11-15.
+
+<external-link
+href="https://global.pydata.org/talks/321"
+title="DevOps for science: using continuous integration for rigorous and reproducible analysis"
+description="PyData Global"
+link="https://global.pydata.org"
+image="../uploads/images/2020-10-12/pydata.png"/>
+
+PyData Global has a fantastic lineup of talks spanning science and engineering,
+so please consider joining!
+
+### DVC at DataFest
+
+DVC Ambassador Mikhail Rozhkov co-hosted the Machine Learning REPA
+(Reproducibility, Experiments and Pipelines Automation) track of
+[DataFest 2020](https://datafest.ru/), and DVC showed up in full force! There
+were talks from Dmitry, ambassador Marcel Ribeiro-Dantas, and myself about all
+aspects of MLOps and automation.
+
+DataFest is over (until next year, anyway), but
+[visit the ML-REPA community](http://ml-repa.ru/en#about) for ongoing content
+and opportunities for networking.
+
+### New videos
+
+Since the summer, we've been building our
+[YouTube channel](https://www.youtube.com/channel/UC37rp97Go-xIX3aNFVHhXfQ).
+It's going great- we've gotten more than 18,000 views in the last few months and
+1,500 subscribers!
+
+Our latest video in the
+[MLOps Tutorials](https://www.youtube.com/playlist?list=PL7WG7YrwYcnDBDuCkFbcyjnZQrdskFsBz)
+series introduced using GitHub Actions for model testing- instead of training a
+model in continuous integration, the idea is to train locally and "check-in"
+your favorite model for testing in a standardized environment. This approach
+lets you completely control the environment, infrastructure, and code used to
+evaluate your model, and save the run in a place that's easy to share (GitHub!).
+
+https://youtu.be/bSXUJRnQPPo
+
+We'll be going deeper into the art and craft of testing ML models in the next
+few weeks, so stay tuned. Another big initative is adding videos to our docs:
+since video seems like a popular format for a lot of learners, we're working to
+supplement our official docs with embedded videos. Check out our first
+installment on the
+[Getting Started with Data Versioning](https://dvc.org/doc/start/data-and-model-versioning).
+
+https://youtu.be/kLKBcPonMYw
+
+## From the community
+
+Our community makes some amazing tutorials. Here are a few on our radar:
+
+Data scientist and full-stack developer
+[Ashutosh Hathidara](https://github.com/ashutosh1919) shared an end-to-end
+machine learning project made with DVC and CML... and released it in video form!
+It's a neat setup and a nice model for folks to study.
+
+https://youtu.be/H1VBsK7XiKs
+
+Another detailed and easy-to-follow tutorial, with a similarly impressive scope,
+appeared on [Heise Online](https://www.heise.de/). This project puts together
+DVC, Cortex, and ONNX to develop and deploy a model trained on the Fashion MNIST
+dataset (note: the article is in German, and I read it with Chrome's English
+translation).
+
+<external-link
+href="https://www.heise.de/hintergrund/Verwaltung-und-Inbetriebnahme-von-ML-Modellen-4911723.html"
+title="Managing and commissioning ML models"
+description="Tools like DVC and Cortex, which are designed for the operationalization of AI projects, are intended to help developers deploy models in production."
+link="https://heise.de"
+image="../uploads/images/2020-10-12/heise.png"/>
+
+You'll also want to check out [anno.ai](https://www.anno.ai/)'s tutorial about
+managing large datasets with DVC and S3 storage- it's detailed, but also a
+quick-start guide informed by the team's practical experience.
+
+<external-link
+href="https://medium.com/@anno.ai/mlops-and-data-managing-large-ml-datasets-with-dvc-and-s3-part-1-d5b8f2fb8280"
+title="MLOps and Data: Managing Large ML Datasets with DVC and S3 (Part 1)"
+description="A quick start guide to version control for machine learning data"
+link="medium.com/@anno.ai"
+image="../uploads/images/2020-10-12/legos.jpg"/>
+
+Data scientist and mathematician [Khuyen Tran](https://twitter.com/KhuyenTran16)
+blogged about why and how to start using DVC- and her tutorial includes Google
+Drive remote storage, a feature we're especially excited about. Check it out and
+follow along with her code examples!
+
+<external-link
+href="https://towardsdatascience.com/introduction-to-dvc-data-version-control-tool-for-machine-learning-projects-7cb49c229fe0"
+title="Introduction to DVC: Data Version Control Tool for Machine Learning Projects"
+description="Just like Git, but with Data!"
+link="medium.com"
+image="../uploads/images/2020-10-12/khuyen_tran.jpg"/>
+
+And to end on a thoughtful note... have you seen this thread by ML Engineer
+[Shreya Shankar](https://twitter.com/sh_reya)? She beautifully summarizes many
+of the ideas and technical challenges our community thinks about every day. Read
+and reflect!
+
+https://twitter.com/sh_reya/status/1314338372073263112
diff --git a/content/blogs/2020-10-26-october-20-community-gems.md b/content/blogs/2020-10-26-october-20-community-gems.md
new file mode 100644
index 0000000000..67298fa0d7
--- /dev/null
+++ b/content/blogs/2020-10-26-october-20-community-gems.md
@@ -0,0 +1,170 @@
+---
+title: October '20 Community Gems
+date: 2020-10-26
+description: >
+  A roundup of technical Q&A's from the DVC community. This month, learn how DVC
+  files work, how to use DVC plots for multi-class classification problems, and
+  how to deal with some spooky error messages 👻.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month, learn how DVC
+  files work, how to use DVC plots for multi-class classification problems, and
+  how to deal with some spooky error messages 👻.
+picture: 2020-10-26/Gems_Oct_20.png
+pictureComment: |
+  Happy Halloween from Pirate DeeVee!
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/october-20-community-gems/535
+tags:
+  - Community Gems
+  - CML
+  - Vega
+  - Metrics
+---
+
+## DVC questions
+
+### [Q: What's in a `.dvc` file, and what would happen if decided not push my `.dvc` files to my Git repo?](https://discordapp.com/channels/485586884165107732/485596304961962003/760920403064520755)
+
+DVC creates lightweight metafiles (`.dvc` files) that correspond to large
+artifacts in your project. These `.dvc` files contain pointers to your artifacts
+in remote storage (we use a simple content-based storage scheme). Because we use
+content-based storage, the remote storage itself isn't designed for browsing
+(although
+[there are some discussions](https://github.com/iterative/dvc/issues/3621) about
+how to make stored files more "discoverable", and you can always identify them
+manually by their contents and meta-information like timestamps).
+
+Your `.dvc` files help establish meaningful links between human-readable
+filenames and file contents in remote storage, as well as to use Git versioning
+on your stored datasets and models. You can think of your DVC remote storage as
+a _compliment_ to your Git repository, not a replacement.
+
+In other words... if you're not Git versioning your `.dvc` files, you're not
+versioning anything in DVC remote storage!
+
+### [Q: Can I limit the number of network connections used by DVC during `dvc pull`?](https://discordapp.com/channels/485586884165107732/485596304961962003/739760523293360182)
+
+Yep- by default, DVC data transfer operations use a number of threads
+proportional to the number of CPUs detected. But, there's a handy flag for
+`dvc pull` and `dvc push` that lets you override the defaults:
+
+```dvc
+-j <number>, --jobs <number> - number of threads to run
+simultaneously to handle the downloading of files from
+the remote. The default value is 4 * cpu_count(). For
+SSH remotes, the default is just 4. Using more jobs may
+improve the total download speed if a combination of small
+and large files are being fetched.
+```
+
+### [Q: I'm working on a multi-class classification task. Can `dvc plots` show multiple precision recall curves- one for each class?](https://discordapp.com/channels/485586884165107732/485596304961962003/765117500530491472)
+
+Currently, `dvc plots` doesn't support multiple linear curves on a single plot
+(except for `dvc plots diff`, of course!). But, you could make one precision
+recall curve per class and display them side-by-side.
+
+To do this, you'd want to write the precision recall curve values to separate
+files for each class (`prc-0.json`,`prc-1.json`, etc.). Then you would run:
+
+```dvc
+$ dvc plots show prc-0.json prc-1.json
+```
+
+And you'll see two plots side-by-side! A benefit of this approach is that when
+you run `dvc plots diff` to compare precision recall curves across Git commits,
+you'll get a comparison plotted for each class.
+
+### [Q: Are you sure I should commit my `.dvc/config` file? It contains my logging credentials for storage, and I'm nervous about adding it to a shared Git repository.](https://discordapp.com/channels/485586884165107732/563406153334128681/768770079596740650)
+
+This is a common scenario- you don't necessarily want to broadcast your remote
+storage credentials to everyone on your team, but you still want to check-in
+your DVC setup (meaning, your `.dvc/config` file). In this case, you want to use
+a `local` config file!
+
+You can use the command
+
+```dvc
+$ dvc config --local
+```
+
+to setup remote credentials that will be stored in `.dvc/config.local`- by
+default, this file is in your `.gitignore` so you don't have to worry about
+accidentally committing secrets to your Git repository.
+[Check out the docs](https://dvc.org/doc/command-reference/config) for more,
+including the `--system` and `--global` options for setting your configuration
+for multiple projects and users respectively.
+
+## CML Questions
+
+### [Q: What's the file size limit for publishing files with `cml publish`?](https://discordapp.com/channels/485586884165107732/728693131557732403/751001285100306502)
+
+`cml publish` is a service for hosting files that are embedded in CML reports,
+like images, audio files, and GIFS. By default, we have a limit of 2 MB per
+upload.
+
+If your files are larger than this (which can happen, depending on the machine
+learning problem you're working on!) we recommend using GitLab's artifact
+storage.
+[Based on discussions in the community](https://github.com/iterative/cml/issues/232),
+we recently implemented a CML flag (`--gitlab-uploads`) to streamline the
+process:
+
+```dvc
+$ cml publish movie.mov --md --gitlab-uploads > report.md
+```
+
+Note that we don't currently have an analagous solution for GitHub, because
+GitHub artifacts expire after 90 days (whereas they're permanent in GitLab).
+
+### [Q: I'm getting a mysterious error message, `Failed guessing mime type of file`, when I try to use `cml publish`. What's going on?](https://discordapp.com/channels/485586884165107732/728693131557732403/763840404675756042)
+
+This error message usually means that the target of `cml publish`- for example,
+
+```dvc
+$ cml publish <target file>
+```
+
+is not found. Check for typos in the target filename and ensure that the file
+was in fact generated during the run (if it isn't part of your Git repository).
+We've [opened an issue](https://github.com/iterative/cml/issues/308) to add a
+more informative error message in the future.
+
+### [Q: In my GitHub Actions workflow, I use `dvc metrics diff` to compare metrics generated during the run to metrics on the main branch and print a table- but the table isn't showing any of the metrics from `main`. What could be happening?](https://discordapp.com/channels/485586884165107732/728693131557732403/768815157034876929)
+
+When a continuous integration runner won't report metrics from previous versions
+of your project (or other branches), that's usually a sign that the runner
+doesn't have access to the full Git history of your project or your metrics
+themselves. Here are a few things to check for:
+
+1. **Did you fetch your Git working tree in the runner?** Functions like
+   `dvc metrics diff` require the Git history to be accessible- make sure that
+   in your workflow, before you run this function, you've done a `git fetch`. We
+   recommend:
+
+```dvc
+$ git fetch --prune --unshallow
+```
+
+2. **Are your metrics in your DVC remote?** If your metrics are _cached_ (which
+   they are by default when you create a DVC pipeline), your DVC remote should
+   be accessible to your runner. That means you need to add any credentials as
+   repository secrets (or variables, in GitLab), and do `dvc pull` in your
+   workflow before attempting `dvc metrics diff`.
+
+3. **Are your metrics in your local workspace?** If you are _not_ using a DVC
+   remote, your metric files must be _uncached_ and committed to your Git
+   repository. To explore an example, say you have a pipeline stage that creates
+   `metric.json`:
+
+```dvc
+$ dvc run -n mystage -m metric.json train.py
+```
+
+By default, `metric.json` is cached and ignored by Git- which means that if you
+aren't using a DVC remote in your CI workflow, `metric.json` will effectively be
+abandoned on your local machine! You can avoid this by using the `-M` flag
+instead of `-m` in `dvc run`, or manually adding the field `cache: false` to
+your metric in `dvc.yaml`. Be sure to remove your metrics from any `.gitignore`
+files, and commit and push them to your Git repository.
+
+That's all for this month- Happy Halloween! Watch out for scary bugs. 🐛
diff --git a/content/blogs/2020-11-11-november-20-dvc-heartbeat.md b/content/blogs/2020-11-11-november-20-dvc-heartbeat.md
new file mode 100644
index 0000000000..dd1c219ce7
--- /dev/null
+++ b/content/blogs/2020-11-11-november-20-dvc-heartbeat.md
@@ -0,0 +1,172 @@
+---
+title: November ’20 Heartbeat
+date: 2020-11-11
+description: >
+  Catch our monthly updates- featuring new video docs and talks, new jobs at
+  DVC, and must-read contributions from the community about MLOps, data science
+  with R, and ML in production.
+descriptionLong: >
+  Catch our monthly updates- featuring new video docs and talks, new jobs at
+  DVC, and must-read contributions from the community about MLOps, data science
+  with R, and ML in production.
+picture: 2020-11-08/cover.png
+pictureComment: Double DeeVee!
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/november-20-heartbeat/554
+tags:
+  - Heartbeat
+  - CML
+  - DVC
+  - Tutorial
+  - Conference
+  - R
+  - MLOps
+---
+
+## News
+
+Welcome to the November Heartbeat! Let's dive in with some news from the team.
+
+### DataCouncil interviews Dmitry
+
+[Data Council](https://twitter.com/DataCouncilAI)'s
+[Peter Soderling](https://twitter.com/petesoder?lang=en) interviewed CEO Dmitry!
+Check out the recording from Data Council's live event, including Q&A from the
+Data Council community, on YouTube.
+
+https://youtu.be/8dBCgIa7TGE
+
+### We're hiring
+
+Did you know we're hiring for two roles in our growing team? We're looking for:
+
+- A
+  [**Senior Software Engineer**](https://weworkremotely.com/remote-jobs/iterative-senior-software-engineer-open-source-dev-tools-3)
+  for the core DVC team- someone with strong Python development skills who can
+  build and ship essential DVC features.
+
+- A
+  [**Developer Advocate**](https://weworkremotely.com/remote-jobs/iterative-developer-advocate)
+  to lead the community, support contributors and new users, and create new
+  content like blogs and videos about DVC and CML.
+
+Here are a few reasons to consider joining us:
+
+- Your work will be visible and will be used by thousands developers every day!
+- We're a small, fully remote team. Work from anywhere!
+- Competitive salary and benefits
+- Family-friendly benefits, including unlimited PTO
+
+If you're interested, we'd love to hear from you about either role (and we
+welcome referrals if you know a good candidate)!
+
+### New videos
+
+We're continuing to develop our video docs, and now half of our "Getting
+Started" section has video accompaniments. Check out our latest release on
+[data access with DVC](https://dvc.org/doc/start/data-and-model-access):
+
+https://youtu.be/EE7Gk84OZY8
+
+This video covers functions like `dvc get`, `dvc import`, and the DVC Python
+API.
+
+We took a quick break from releasing videos during the US election week, but
+look out for a new video on our
+[YouTube channel](https://www.youtube.com/channel/UC37rp97Go-xIX3aNFVHhXfQ)
+about model testing with continuous integration! Subscribe to get alerts
+whenever we have something new :)
+
+### Workshops and conferences
+
+As usual, there are plenty of remote meetings on our schedules:
+
+- [HealthData Bootcamp](http://www.bootcamp.dadosesaude.com/) is a weeklong
+  intensive for all things biomedical data science. Dmitry and myself (Elle),
+  plus DVC Ambassadors Mikhail Rozhkov and Marcel Ribeiro-Dantas, will be
+  presenting lectures and workshops about MLOps throughout the week!
+
+- I'll be leading a hands-on workshop at the
+  [Toronto Machine Learning Society Annual Meeting](https://torontomachinelearning.com/).
+  It'll cover how to get started using
+  [Continuous Machine Learning](https://cml.dev)(CML) with GitHub Actions-
+  [register here](https://torontomachinelearning.com/), and be sure to reserve
+  your spot in the workshop.
+
+- This week, I have another talk at [PyData Global](https://global.pydata.org/)
+  about CML. PyData Global is online for the first time ever and promises to be
+  a great gathering for Python-using data scientists in industry and academic
+  research alike.
+
+## From the community
+
+Here are some of our favorite happenings around the MLOps community this week.
+
+### A new online course
+
+[Goku Mohandas](https://twitter.com/GokuMohandas), founder of
+[Made with ML](https://twitter.com/madewithml), announced plans to release a new
+online course about putting ML in production. The curriculum will cover
+everything from experiment tracking to deploying and monitoring models in
+production, and you can expect DVC to be included! Keep an eye on Goku and Made
+with ML on Twitter for updates.
+
+https://twitter.com/GokuMohandas/status/1315990996849627136
+
+### Our favorite blogs
+
+[Dr. Larysa Visengeriyeva](https://twitter.com/visenger), creator of the
+top-notch
+["Awesome MLOps" GitHub repo](https://github.com/visenger/awesome-mlops), and
+DevOps expert Anja Kammer wrote a must-read essay about CI/CD for ML (note: it's
+published in German; I used Chrome's built-in translation to read in English).
+
+The blog covers key concepts like continuous integration, deployment, and
+training with ML, as well as practical approaches and sample architectures.
+
+<external-link
+href="https://www.innoq.com/de/articles/2020/10/mlops-operations-fuer-machine-learning/"
+title="MLOps: You Train It, You Run It!"
+description="CI / CD & Operations for machine learning"
+link="innoq.com"
+image="../uploads/images/2020-11-08/innoq.png"/>
+
+_Also_, there's some cool art.
+
+![](../uploads/images/2020-11-08/mlops_diagram.png)
+
+Another blog on our radar: [Sean Lopp](https://twitter.com/lopp_sean) at
+[RStudio](https://twitter.com/rstudio) made the first known blog about a CML
+report with a ggplot! Using RStudio's
+[GitHub Actions for R](https://github.com/r-lib/actions) and CML, Sean built a
+sample data science workflow that runs automatically in GitHub Actions on a
+push. He reports on some pros, cons, and areas for future development to make R
+language data science easy to automate.
+
+<external-link
+href="https://loppsided.blog/posts/2020-10-26-tidymodels-dvc-mashup/"
+title="Tidymodels DVC Mashup"
+description="Using Github Actions and Data Version Control for ModelOps in R"
+link="loppsided.blog"
+image="../uploads/images/2020-11-08/sean_lopp.jpg"/>
+
+Finally, developer [Petr Stribny](https://twitter.com/stribny) wrote about how
+to version big files in a Git project with DVC. It's a short-and-sweet guide to
+getting started, and if you're trying to decide if DVC is for you, this is worth
+a look.
+
+<external-link
+href="https://stribny.name/blog/2020/10/versioning-large-files-in-git-with-dvc/"
+title="Versioning large files in git with DVC"
+description="Software development and beyond"
+link="stribny.name"
+image="../uploads/images/2020-11-08/petr.jpg"/>
+
+### A nice tweet
+
+To wrap it up, here's a kind tweet that we really like. It's always good to be
+mentioned in the same tweet as some of our heroes :)
+
+https://twitter.com/ethanjb/status/1316833012676354048
+
+Thanks for reading this month!
diff --git a/content/blogs/2020-11-25-november-20-community-gems.md b/content/blogs/2020-11-25-november-20-community-gems.md
new file mode 100644
index 0000000000..3ad37077d4
--- /dev/null
+++ b/content/blogs/2020-11-25-november-20-community-gems.md
@@ -0,0 +1,212 @@
+---
+title: November '20 Community Gems
+date: 2020-11-25
+description: >
+  A roundup of technical Q&A's from the DVC community. This month, learn how to
+  clean your cache and use Git hooks with DVC. And here's an early holiday gift-
+  new Bitbucket support for CML!
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month, learn how to
+  clean your cache and use Git hooks with DVC. And here's an early holiday gift-
+  new Bitbucket support for CML!
+picture: 2020-11-25/cover.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/november-20-community-gems/566
+tags:
+  - Community Gems
+  - CML
+  - Cache
+  - Bitbucket
+---
+
+## DVC questions
+
+### [Q: If I checkout a different Git branch, how do I synchronize with DVC?](https://discord.com/channels/485586884165107732/485596304961962003/773498570795778058)
+
+Here's what we recommend: when you checkout a different Git branch in your
+project:
+
+```dvc
+$ git checkout -b <my_great_new_branch>
+```
+
+you'll want to next run
+
+```dvc
+$ dvc checkout
+```
+
+to synchronize your `.dvc` files on that branch. But _did you know_ you can
+automate this with a `post-checkout` Git hook? We've got a hook that executes
+`dvc checkout` whenever you run `git checkout`, so you'll always have the
+correct data file versions. Head to our docs to
+[read up on installing Git hooks into your DVC repository](https://dvc.org/doc/command-reference/install#install)
+so you never forget to `dvc checkout`!
+
+### [Q: I have a big, 100 GB directory. I want to know where the contents are located so I can open them with Spark- is there a way to get the location of my files without caching them locally?](https://discord.com/channels/485586884165107732/485596304961962003/771386223403073587)
+
+For this, we'd recommend the
+[DVC Python API](https://dvc.org/doc/api-reference/get_url#dvcapiget_url)'s
+`get_url` function. For example, in a Python script you'd write:
+
+```python
+import dvc.api
+
+resource_url = dvc.api.get_url(
+  "<top-level-directory>",
+  repo="https://github.com/<your-repo>")
+)
+```
+
+This code means the API will return the URL for a file that ends in `.dir`. The
+`.dir` file contains a JSON-formatted table of the hashes and relative paths for
+all the files inside `<top-level-directory>`. You could then parse that file to
+get the relative paths to the files in your remote storage.
+
+The JSON object will look something like this, for a file `foo/bar` in your
+project:
+
+```json
+{ "md5": "abcd123", "relpath": "foo/bar" }
+```
+
+Then you can convert the relative path to `foo/bar` to an absolute path as
+follows:
+
+```dvc
+https://<path-to-your-remote-storage>/ab/cd123
+```
+
+To better understand how DVC uses
+[content-addressable storage](https://en.wikipedia.org/wiki/Content-addressable_storage)
+in your remote,
+[read up in our docs](https://dvc.org/doc/user-guide/dvc-internals#structure-of-the-cache-directory).
+
+### [Q: Can I have more than one `dvc.yaml` file in my project?](https://discord.com/channels/485586884165107732/563406153334128681/777946398250893333)
+
+By default, DVC pipelines records all your stages (and their inputs and outputs)
+in a single file, `dvc.yaml`. Per directory, you can have one `dvc.yaml` file.
+If you want to run pipelines in a different folder than your project root, you
+could create another `dvc.yaml` in a subdirectory.
+
+However, `dvc.yaml` is intended to be the only file you need to record and
+reproduce pipelines per directory. Pipelines are designed to have all stages
+stored in the same place, and there's currently no method to rename `dvc.yaml`.
+
+### [Q: How can I untrack a file that's being tracked by DVC? I want to remove it from remote storage and my local cache, too.](https://discord.com/channels/485586884165107732/563406153334128681/773277514717462548)
+
+If you want to untrack a file, perhaps something you added to DVC in error, you
+can use `dvc remove` to get rid of the `.dvc` file corresponding to your file,
+and then clear your DVC cache with `dvc gc -w --cloud`.
+[Check out our docs](https://dvc.org/doc/user-guide/how-to/stop-tracking-data)
+to learn more about `dvc gc` and what its flags mean (you'll want to be sure you
+know what you're doing, since cache cleaning deletes files permanently!).
+
+Alternatively, you can manually find and delete your files:
+
+1. Find the file using its hash from the corresponding `.dvc` file (or, if it's
+   part of a pipeline, the `dvc.lock` file).
+2. Look in your remote storage and remove the file matching the hash.
+3. Look in `.dvc/cache` and remove the file as well. If you'd like to better
+   understand how your cache is organized,
+   [we have docs for that](https://dvc.org/doc/user-guide/dvc-internals#structure-of-the-cache-directory).
+
+Your DVC remote storage and cache are simply storage locations, so once your
+file is gone from there it's gone for good.
+
+### [Q: My DVC cache is getting a bit big. Can I clean it?](https://discord.com/channels/485586884165107732/563406153334128681/771275051382341674)
+
+Definitely. Have you seen the command `dvc gc`? It helps you clean your local
+cache- [read up here](https://dvc.org/doc/command-reference/gc). This function
+lets you get granular about what you're keeping; for example, you can instruct
+`dvc gc` to preserve cache files that are currently used your local worksapce,
+tips of Git branches, tagged Git commits or all Git commits. Everything else
+will be removed.
+
+One word of caution: make sure that when you collect garbage from your cache,
+you don't delete any files that you haven't yet pushed to a remote. If this
+happens, you'll delete them permanently. To be safe, it never hurts to
+`dvc push` your files of interest before cleaning.
+
+## CML questions
+
+### [Q: Does CML support Bitbucket?](https://github.com/iterative/cml/issues/140)
+
+We've just unrolled Bitbucket Cloud support! There are brand new docs in the CML
+project repo,
+[so check them out](https://github.com/iterative/cml/wiki/CML-with-Bitbucket-Cloud)
+to get started. A few quick notes to keep in mind:
+
+1. Like GitLab, Bitbucket Cloud requires you to create a token for authorizing
+   CML to write comments. Make sure you don't forget this step (it's in the
+   docs!) or you'll surely hit a permissions error.
+
+2. Bitbucket Cloud uses Bitbucket Pipelines for continuous integration
+   workflows, which
+   [currently doesn't support self-hosted runners](https://jira.atlassian.com/browse/BCLOUD-16995).
+   That means
+   [bringing your own GPUs is not supported](https://community.atlassian.com/t5/Bitbucket-questions/Does-bitbucket-pipe-support-GPUs-yet/qaq-p/1042659).
+   Sorry! But you can still have all the other CML benefits of plots, tables and
+   text in your Pull Request.
+
+3. Bitbucket Server support (with Jenkins and Bamboo) is under active
+   development. Stay tuned!
+
+![](../uploads/images/2020-11-25/bitbucket_cloud_pr.png)_Now your Bitbucket PRs
+can be as pretty as you._
+
+### [Q: Can I use CML with Windows runners?](https://discord.com/channels/485586884165107732/728693131557732403/772519007894765600)
+
+While all our CML tutorials and docs use Ubuntu runners of various flavors,
+there's no problem with using Windows runners. Both
+[GitHub Actions](https://docs.github.com/en/free-pro-team@latest/actions/reference/specifications-for-github-hosted-runners)
+and
+[GitLab CI](https://about.gitlab.com/blog/2020/01/21/windows-shared-runner-beta/)
+have Windows runners up for grabs. And of course, you can set up your own
+Windows machine as a self-hosted runner (see the self-hosted runner docs for
+your CI system to learn more).
+
+What if you have a GPU? If you want to use
+[`nvidia-docker` to put GPU drivers in your container](https://dvc.org/blog/cml-self-hosted-runners-on-demand-with-gpus),
+you'll want to use `nvidia-docker` with the Windows Subsytem for Linux (WSL).
+That means you'll first install an Ubuntu subsystem on your Windows machine,
+then all your Nvidia drivers, then Docker and `nvidia-docker`. Check out some
+[more docs about CUDA with WSL](https://docs.nvidia.com/cuda/wsl-user-guide/index.html)
+to lear more.
+
+### [Q: I'm using CML to deploy a self-hosted runner with GitLab. I noticed that in your docs, the runner is always set to timeout after 1800 seconds, and then it gets unregistered from GitLab. What if I want to keep my runner registered after the job ends?](https://discord.com/channels/485586884165107732/728693131557732403/779317571354099722)
+
+With CML, we introduced an approach using Docker Machine to provision instances
+in the cloud, and then use `dvc run` to register them as self-hosted runners to
+completed your workflow. As this question points out, we like to set runners to
+timeout after 1800 seconds- that's why you'll see this code in our
+[sample "Cloud GPU" workflow](https://github.com/iterative/cml_cloud_case/blob/master/.github/workflows/cml.yaml):
+
+```dvc
+$ sudo docker run --name myrunner -d --gpus all \
+  -e RUNNER_IDLE_TIMEOUT=1800 \
+  -e RUNNER_LABELS=cml,gpu \
+  -e RUNNER_REPO=$CI_SERVER_UR \
+  -e repo_token=$REGISTRATION_TOKEN \
+  -e RUNNER_DRIVER=gitlab \
+  iterativeai/cml:0-dvc2-base1-gpu runner
+```
+
+We did this so you'll avoid running up GPU hours and a big bill. If you're not
+worried about that, though, you can set the environmental variable
+`RUNNER_IDLE_TIMEOUT` in the `dvcorg/cml` container to 0. Then, your self-hosted
+runner will stay on forever, or at least until you manually turn it off.
+
+By the way... stay tuned for a big update here. We're currently replacing the
+Docker Machine approach with a method based on TerraForm, and we can't wait to
+unveil it. It should make deploying cloud instances on AWS, GCP and Azure work
+with less code than ever.
+
+### Q: What did DeeVee do for Thanksgiving?
+
+She stayed home and made mashed potatoes.
+
+![](../uploads/images/2020-11-25/deevee_n_taters.png)
+
+That's all for now, everyone! As always, keep in touch with all your questions
+big and small.
diff --git a/content/blogs/2020-11-26-dvc-vs-rclone.md b/content/blogs/2020-11-26-dvc-vs-rclone.md
new file mode 100644
index 0000000000..043b9b08ab
--- /dev/null
+++ b/content/blogs/2020-11-26-dvc-vs-rclone.md
@@ -0,0 +1,431 @@
+---
+title: 'Cloud Data Sync Methods and Benchmark: DVC vs Rclone'
+date: 2020-11-26
+description: >
+  DVC 1.0 optimized data synchronization to and from remote storage. Here's how
+  we did it.
+descriptionLong: >
+  Synchronizing data to and from remote storage requires addressing an often
+  overlooked performance bottleneck: Determining which files to upload and
+  download. Here we'll outline the general methods used to solve this problem,
+  and investigate each method's effects on performance by comparing benchmark
+  results from DVC and rclone. We'll then conclude with a more in-depth
+  explanation of the optimizations made in DVC 1.0 which enabled us to
+  outperform both older DVC releases as well as general data sync tools like
+  rclone.
+commentsUrl: https://discuss.dvc.org/t/cloud-data-sync-methods-and-benchmark-dvc-vs-rclone/562
+tags:
+  - Rclone
+  - Performance
+  - Engineering
+  - Benchmark
+  - Tutorial
+picture: 2020-11-26/header.png
+author: peter_rowlands
+---
+
+Many general-use tools are available for synchronizing data to and from cloud
+storage, some widely used options are [rsync](https://rsync.samba.org/),
+[rclone](https://rclone.org/) and
+[aws sync](https://docs.aws.amazon.com/cli/latest/reference/s3/sync.html), each
+with their own advantages and disadvantages. Likewise, in [DVC](/) we provide
+the ability to efficiently sync versioned datasets to and from cloud storage
+through a git-like push and pull
+[interface](https://dvc.org/doc/start/data-management/data-versioning).
+
+Given that transferring data over a network to and from cloud storage is an
+inherently slow operation, it's important for data sync tools to optimize
+performance wherever possible. While the data transfer itself may be the most
+apparent performance bottleneck in the data sync process, **here we'll cover a
+less obvious performance issue: How to determine which files to upload and
+download.**
+
+In this post, we'll outline the general methods used to solve this problem, and
+investigate each method's effects on performance by comparing benchmark results
+from DVC and rclone. We'll then conclude with a more in-depth explanation of new
+optimizations made in DVC 1.0 which enabled us to outperform both older DVC
+releases as well as general data sync tools (like rclone).
+
+_Note: "Cloud storage" and "remote storage" will be used interchangeably
+throughout this post. When discussing dataset size in this post, we mean size in
+terms of total number of files in a dataset, rather than the total amount of
+file data (bytes)._
+
+### Outline
+
+- [Why a "trivial" problem has a not-so-trivial performance impact](#why-a-trivial-problem-has-a-not-so-trivial-performance-impact)
+- [Real-world numbers - DVC and rclone performance examples](#real-world-numbers---dvc-and-rclone-performance-examples)
+- [How DVC 1.0 speeds things up](#how-dvc-10-speeds-things-up)
+- [Conclusion](#conclusion)
+
+## Why a "trivial" problem has a not-so-trivial performance impact
+
+At the start of any data sync operation, we must first do the following steps,
+in order to determine which files to upload and download between the local
+machine and cloud storage:
+
+1. Determine which files are present locally.
+2. Query the cloud storage API to determine which files are present in the
+   cloud.
+3. Compute the difference between the two sets of files.
+
+Once this difference in file status has been determined, the necessary files can
+be copied to or from cloud storage as needed ("file status" meaning file
+existence as well as other potential status information, such as modification
+time). **While this may seem like a trivial problem, the second step is actually
+a significant potential performance bottleneck.**
+
+In general, cloud storage APIs provide two possible ways to determine what files
+are present in cloud storage, and it's up to the data sync tool to select which
+method to use. Even for an operation as simple as synchronizing a single local
+file to cloud storage, choosing incorrectly between these two options could
+actually mean the difference between that "simple" operation taking several
+hours to complete instead of just a few seconds.
+
+_Note: The term "file status query" will be used throughout this post when
+referring to this type of cloud storage API query._
+
+### Method 1: Query individual files
+
+The first query method is to individually check whether or not particular files
+exist in cloud storage, one at a time.
+
+_Ex: The S3 API provides the `HeadObject` method.`_
+
+When using this method, performance depends on the number of files being
+queried - for a single file, it would take a single API request, for 1 million
+files, it would take 1 million API requests. In this case, the overall amount of
+time it will take to complete the full operation will scale with the number of
+files to query.
+
+One particular advantage to using this method is that it can be easily
+parallelized. Overall runtime can be improved by making simultaneous API
+requests to query for multiple files at once.
+
+### Method 2: Query full remote listing
+
+The second query method is to request the full listing of files present in cloud
+storage, all at once.
+
+_Ex: The S3 API provides the `ListObjects` method._
+
+With this method, the overall amount of time it will take to complete the full
+operation scales with the total number of files in cloud storage, rather than
+the number of files we wish to query.
+
+It's important to note that when using this method, cloud APIs will only return
+a certain number of files at a time (the amount returned varies depending on the
+API). This means that for an API which returns 1000 files at a time (such as
+S3), retrieving the full listing of a remote containing 1000 files or less would
+would only take a single API request. Listing a remote which contains 1 million
+files would take 1000 API requests.
+
+Another important note is that API calls for this method must be made
+sequentially and can't be easily parallelized. Using S3 as an example, the first
+API call would return files 0 through 999. The next call would return files 1000
+through 1999, and so on. However, the API provides no guarantee of ordering, and
+API calls must be made sequentially, until the full list has been retrieved. So
+we can't make two simultaneous requests for both "files 1-999" and "files
+1000-1999".
+
+### How selecting one method or the other can drastically improve performance
+
+Consider an example scenario where a dataset being synchronized contains 100
+local files, and we need to check which of those files exist in cloud storage.
+For the purposes of this example, we'll also assume that all individual API
+calls take the same amount of time to complete, and that we are not running any
+tasks in parallel. Additionally, let's say that our example cloud storage API
+returns 1000 files per page when using query method 2.
+
+In this situation, we know that the first query method will always take a fixed
+number of API calls to complete (100). The number of API calls required for the
+second query method depends on the total number of files that already exist in
+the remote.
+
+Since we know that the API returns 1000 results per API call, we can say that if
+the remote contains less than `1000 * 100 = 100,000` files, fetching the full
+remote listing (method 2) will be faster than checking each file individually,
+since it will take less than 100 API calls to complete. In the case that the
+remote contains 1000 or less files, method 2 would only require a single API
+call (potentially outperforming method 1 by 100x).
+
+However, if the remote contains anything over this 100,000 threshold, method 1
+will be faster than method 2, with the difference in performance between the two
+methods scaling linearly as the potential remote size increases.
+
+**Total API calls required to query 100 local files from S3**
+![API calls](../uploads/images/2020-11-26/api_calls_100_local.svg 'API calls required to query 100 local files from S3')
+
+This example illustrates an important point. Given a (relatively) small set of
+files to query and a sufficiently large remote, method 1 will always be faster
+than method 2.
+
+Thinking about it from a different perspective, what happens if we have the
+ability to reduce the size of a (relatively) large query set?
+
+Once our query set is smaller than a certain threshold, we'll be able to use
+method 1 rather than method 2. On top of that, we know that the runtime of
+method 1 scales with query set size. **In simple terms, by reducing the size of
+our query set as much as possible, we can also improve performance.**
+
+So, as we have shown, choosing the optimal method depends on both:
+
+- The number of files that we need to query.
+- The total number of files in the remote.
+
+_Note: In terms of real world performance, there are other considerations that
+DVC must account for, such as different API calls taking different amounts of
+time to complete, parallelization, and the amount of time it takes to run list
+comparison operations in Python._
+
+## Real-world numbers - DVC and rclone performance examples
+
+Now let's take a look at some real-world numbers to examine the impact selecting
+one query method or the other has on data sync performance in DVC and rclone.
+Both tools can utilize either potential query method, with some differences:
+
+- In rclone, the user can specify the `--no-traverse` option to select the first
+  query method, otherwise rclone will default to the second method in most
+  situations (with the exception being cases with very small query set sizes).
+- In DVC prior to 1.0, the first query method would be used by default for all
+  supported cloud storage platforms except Google Drive, and the user could
+  specify one method or the other via the `no_traverse` configuration option.
+- **In DVC 1.0 and later, the optimal query method is selected automatically.**
+
+In the following scenarios, we are simulating the typical DVC use case in which
+a user tracks a local directory containing some number of files using DVC, and
+then synchronizes the DVC-tracked directory to cloud storage (S3 in these
+examples) using either DVC or rclone. The user would then continually repeat a
+process of:
+
+1. Modify a small subset of files in the directory.
+2. Push the updated version of the directory into cloud storage.
+
+Keep in mind that for DVC's purposes, we are most interested in optimizing
+performance for scenarios which are normally very slow to complete. If you
+consider an operation which previously took several hours to complete, improving
+that runtime down to a few minutes will have a much greater impact for our users
+versus shaving a few seconds off of an operation which previously took under a
+minute to run.
+
+_Note: For these benchmarks we are only interested in the amount of time
+required to determine file status for this one-way push operation. So the
+runtimes in each case are for status queries only (using `dvc status -c` in DVC
+and `rclone copy --dry-run` in rclone). No file data was transferred to or from
+S3 in any of these scenarios._
+
+_Benchmark command usage:_
+
+```dvc
+$ time dvc status -c -r remote
+$ time rclone copy --dry-run --progress --exclude "**/**.unpacked/" .dvc/cache remote:...
+```
+
+_rclone run with `--no-traverse` where indicated_
+
+_Benchmark platform: Python 3.7, macOS Catalina, DVC installed from pip,
+dual-core 3.1GHz i7 cpu_
+
+**Local directory w/100k total files, S3 bucket w/1M total files (1 file
+modified since last sync)**
+![benchmarks](../uploads/images/2020-11-26/dvc_rclone_bench.svg 'DVC 1.0 vs rclone performance comparison')
+
+The previous chart contains benchmarks for a scenario in which the local
+directory contains 100,000 files, and the S3 bucket contains approximately 1
+million files. One file in the local directory has been modified since the
+directory was last synchronized with the S3 bucket. This scenario tests the
+length of time it takes DVC or rclone to determine (and report to the user) that
+only the one modified file is missing from the S3 bucket and needs to be
+uploaded.
+
+This illustrates DVC's performance advantage over rclone with regard to
+synchronizing iterations of a versioned dataset over time, as well as the DVC
+1.0 performance improvements over prior releases.
+
+_Note: In these examples, the local file count refers to the number of files
+inside the original tracked directory. The number of files present in the DVC
+cache will differ slightly, since the DVC cache will contain an additional file
+representing the tracked directory itself, but the end result is that both DVC
+and rclone will both need to query for the same number of files (i.e. the number
+of files in the cache directory)._
+
+**Local directory w/1 file, S3 bucket w/1M total files**
+![benchmarks](../uploads/images/2020-11-26/dvc_rclone_bench2.svg 'DVC 1.0 vs rclone performance comparison')
+
+In this example, we are testing a simple scenario in which the local directory
+contains 1 file and the S3 bucket contains approximately 1 million files.
+
+In this case, in DVC 0.91 we essentially get lucky that our default choice for
+S3 happens to be the first query method. If we ran this same scenario with a
+Google Drive remote (where the 0.91 default choice is the second query method)
+instead of S3, we would see a very long runtime for DVC 0.91.
+
+Also note that here, rclone is able to determine that with a single local file
+to query, it should use the first query method instead of defaulting to the
+second method.
+
+_Note: We are unsure of the reason for the rclone runtime difference with and
+without `--no-traverse` for this scenario, but rclone does do some computation
+to determine whether or not to default to `no-traverse` behavior for small query
+sets. It's likely that specifying `--no-traverse` allows rclone to skip that
+overhead entirely in this case._
+
+**Local directory w/1M files, Empty S3 bucket**
+![benchmarks](../uploads/images/2020-11-26/dvc_rclone_bench3.svg 'DVC 1.0 vs rclone performance comparison')
+_Note: DVC 0.91 and rclone with `--no-traverse` both take multiple hours to
+complete in this scenario and continue off of the chart._
+
+In this example, we are testing a simple scenario in which the local directory
+contains approximately 1 million files and the S3 bucket is empty.
+
+The difference in rclone runtime with or without `--no-traverse` in this
+scenario shows the performance impact of selecting the optimal query method for
+a given situation.
+
+This scenario also shows that rclone can outperform DVC with regard to
+collecting the list of local files during certain types of sync operations. In
+this case, rclone simply iterates over whatever files exist in the local
+directory without doing any additional steps, since our benchmark uses a one-way
+`rclone copy` operation.
+
+However, in DVC, we have some extra overhead for this step, since we collect the
+list of files expected to be present in the current DVC repository revision, and
+then verify that those files are present locally. We would then check to see if
+any missing files are available to be downloaded from remote storage.
+
+It should also be noted that in common use cases where the number of files in
+cloud storage continues to grow over time (such as in backup solutions or in
+dataset versioning), rclone's advantage in this case would only apply for this
+initial sync operation. Once the local dataset has been pushed to cloud storage,
+DVC's advantage in synchronizing modifications to existing datasets would become
+more apparent (as shown in the first example).
+
+## How DVC 1.0 speeds things up
+
+So I hope that by now you're curious about DVC, and are planning on using (or
+maybe even already are using 😀) it to sync your files. For those who are
+wondering where the magic actually happens, let's dive a bit deeper into how DVC
+stores files, and how we were able to leverage that storage format to implement
+query performance optimzations in DVC 1.0. (This will also be a useful primer
+for anyone interested in learning about DVC internals in general.)
+
+Previously, we have established that:
+
+- Selecting the right query method will have a significant performance impact.
+- Reducing the number of files to query will improve performance.
+
+In this section, we'll cover the ways in which DVC 1.0 has directly addressed
+both of these key points:
+
+- Automatically selecting the optimal query method for any given sync operation.
+- Indexing cloud storage remotes to eliminate the need to query for already
+  synchronized files.
+
+### DVC storage structure
+
+Before continuing, it will be helpful for the reader to understand a few things
+about the DVC cache and remote storage structure.
+
+```
+.
+├── 00
+│   ├── 411460f7c92d2124a67ea0f4cb5f85
+│   ├── 6f52e9102a8d3be2fe5614f42ba989
+│   └── ...
+├── 01
+├── 02
+├── 03
+├── ...
+└── ff
+```
+
+_Example DVC cache/remote structure_
+
+- Files versioned by DVC are identified and stored in subdirectories according
+  to their [MD5](https://en.wikipedia.org/wiki/MD5) hash (i.e.
+  [content addressable storage](https://en.wikipedia.org/wiki/Content-addressable_storage)).
+- MD5 is an
+  [evenly distributed](https://michiel.buddingh.eu/distribution-of-hash-values)
+  hash function, so the DVC cache (and DVC remote storage) will be evenly
+  distributed (i.e. given a large enough dataset, each remote subdirectory will
+  contain an approximately equal number of files)
+
+### How DVC 1.0 automatically selects a query method
+
+In DVC, the number of files we need to query is just the number of files for a
+given project revision. So, as long as we can estimate the number of files in a
+DVC remote, we can programmatically choose the optimal query method for a remote
+operation.
+
+In DVC 1.0, we accomplish this by taking advantage of the DVC remote structure.
+The over/under remote size threshold only depends on the number of files being
+queried (i.e. the number of files in our DVC versioned dataset). And as we have
+already established, a DVC remote will be evenly distributed. Therefore, if we
+know the number of files contained in a subset of the remote, we can then
+estimate the number of files contained in the entire remote.
+
+For example, if we know that the remote subdirectory `00/` contains 10 files, we
+can estimate that the remote contains roughly `256 * 10 = 2,560` files in total.
+So, by requesting a list of one subdirectory at a time (rather than the full
+remote) via the cloud storage API, we can calculate a running estimate of the
+total remote size. If the running estimated total size goes over the threshold
+value, DVC will stop fetching the contains of the remote subdirectory, and
+switch to querying each file in our dataset individually. If DVC reaches the end
+of the subdirectory without the estimated size going over the threshold, it will
+continue to fetch the full listing for the rest of the remote.
+
+By estimating remote size in DVC 1.0, we can ensure that we always use the
+optimal method when querying remote status.
+
+### How DVC 1.0 uses indices to reduce the number of files to query
+
+A common DVC use case is
+[versioning](https://dvc.org/doc/use-cases/versioning-data-and-model-files) the
+contents of a large directory. As the contents of the directory changes over
+time, DVC will be used to push each updated version of the directory into cloud
+storage. In many cases, only a small number of files within that directory will
+be modified between project iterations.
+
+So after the first version of a project is pushed into cloud storage, for
+subsequent versions, only the small subset of changed files actually needs to be
+synchronized with cloud storage.
+
+Consider a case where a user has an existing directory with 1 million files
+which has been versioned and pushed to a remote with DVC. In the next iteration
+of the project, only a single file in the directory has been modified. We can
+obviously see that everything other than the one modified file will already
+exist in cloud storage. Ideally, we should only need to query for the single
+modified file.
+
+However, in DVC releases prior to 1.0, DVC would always need to query for every
+file in the directory, regardless of whether or not a given file had changed
+since the last time it was pushed to remote storage.
+
+But in DVC 1.0, we now keep an index of directories which have already been
+versioned and pushed into remote storage. By referencing this index, DVC will
+"remember" which files already exist in a remote, and will remove them from our
+query set at the start of a data sync operation (before we choose a query
+method, and before we make any cloud storage API requests).
+
+_Note: This optimization only applies to DVC versioned directories. Individually
+versioned files (including those added with `dvc add -R`) are not indexed in DVC
+1.0, and will always be queried during remote operations._
+
+## Conclusion
+
+By utilizing a storage structure that allows for optimized status queries, DVC
+makes data synchronization incredibly fast. Coupled with the ability to quickly
+identify which files remain unchanged between sync operations, DVC 1.0 is a
+powerful data management tool.
+
+Whether you are upgrading from a prior DVC release, or trying DVC for the first
+time, we hope that all of our users are able to benefit from these new
+optimizations. DVC performance is an important issue, and our team is looking
+forward to working on further
+[performance optimizations](https://github.com/iterative/dvc/labels/performance)
+in the future - across all areas in DVC, not just remotes.
+
+As always, if you have any questions, comments or suggestions regarding DVC
+performance, please feel free to connect with the DVC community on
+[Discourse](https://discuss.dvc.org/), [Discord](https://dvc.org/chat) and
+[GitHub](https://github.com/iterative/dvc).
diff --git a/content/blogs/2020-12-18-december-20-dvc-heartbeat.md b/content/blogs/2020-12-18-december-20-dvc-heartbeat.md
new file mode 100644
index 0000000000..c4dd7c611d
--- /dev/null
+++ b/content/blogs/2020-12-18-december-20-dvc-heartbeat.md
@@ -0,0 +1,170 @@
+---
+title: December ’20 Heartbeat
+date: 2020-12-18
+description: >
+  Monthly updates are here- read all about  our brand new video docs, the DVC
+  Udemy course, open jobs with our team, and  essential reading about Git-flow
+  with DVC.
+descriptionLong: >
+  Monthly updates are here- read all about our brand new video docs, the DVC
+  Udemy course, open jobs with our team, and essential reading about Git-flow
+  with DVC.
+picture: 2020-12-18/cover.png
+pictureComment: |
+  This holiday season, show your loved ones
+  you care with our new shirt.
+
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/december-20-heartbeat/585
+tags:
+  - Heartbeat
+  - CML
+  - DVC
+  - Udemy
+  - MLOps
+---
+
+## News
+
+Welcome to the December Heartbeat! Let's dive in with some news from the team.
+
+### We're still hiring
+
+Our search continues for two roles:
+
+- A
+  [**Senior Software Engineer**](https://weworkremotely.com/remote-jobs/iterative-senior-software-engineer-open-source-dev-tools-3)
+  for the core DVC team- someone with strong Python development skills who can
+  build and ship essential DVC features.
+
+- A
+  [**Developer Advocate**](https://weworkremotely.com/remote-jobs/iterative-developer-advocate)
+  to support and inspire developers by creating new content like blogs,
+  tutorials, and videos- plus lead outreach through meetups and conferences.
+
+Does this sound like you or someone you know? Be in touch!
+
+### Video docs complete!
+
+As you may have heard
+[last month](https://dvc.org/blog/november-20-dvc-heartbeat), we've been working
+on adding complete video docs to the "Getting Started" section of the DVC site.
+We now have 100% coverage! We have videos that mirror the tutorials for:
+
+- [Data versioning](https://dvc.org/doc/start/data-and-model-versioning) - how
+  to use Git and DVC together to track different versions of a dataset
+
+- [Data access](https://dvc.org/doc/start/data-and-model-access) - how to share
+  models and datasets across projects and environments
+
+- [Pipelines](https://dvc.org/doc/start/data-pipelines) - how to create
+  reproducible pipelines to transform datasets to features to models
+
+- [Experiments](https://dvc.org/doc/start/experiments) - how to do a `git diff`
+  for models that compares and visualizes metrics
+
+![Mission Accomplished GIF by memecandy](https://media.giphy.com/media/L4ZZNbDpOCfiX8uYSd/giphy.gif)
+
+The
+[full playlist is on our YouTube channel](https://www.youtube.com/playlist?list=PL7WG7YrwYcnDb0qdPl9-KEStsL-3oaEjg)-
+where, by the way, we've recently passed 2,000 subscribers! Thanks so much for
+your support. There's much more coming up soon.
+
+### Collaboration with GitLab
+
+We recently released a new blog with GitLab all about using [CML](cml.dev) with
+GitLab CI.
+
+https://twitter.com/gitlab/status/1334631001956487171
+
+You may notice that the tweet spelled our name differently, and since Twitter
+doesn't have an edit button, I think that means we're "Interative" now.
+[Hurry up and get your merch!](https://www.zazzle.com/t_shirt-235920696568133954)
+
+![](../uploads/images/2020-12-18/newname.png)
+
+### Workshops
+
+We gave a workshop at a virtual meetup held by the
+[Toronto Machine Learning Society](https://mlopsworld.com/about-us/), and you
+can catch a video recording if you missed it. This workshop was all about
+getting started with GitHub Actions and CML! It starts with some high-level
+overview and then gets into live-coding.
+
+https://youtu.be/51H13lfHdMw
+
+## From the community
+
+There's no shortage of cool things to report from the community:
+
+### The DVC Udemy Course
+
+Now you can learn the fundamentals of machine learning engineering, from
+experiment tracking to data management to continuous integration, with DVC and
+Udemy! Data scientists/DVC ambassadors
+[Mikhail Rozhkov](https://www.udemy.com/user/mnrozhkov/) and
+[Marcel Ribeiro-Dantas](https://www.udemy.com/user/marcel-da-camara-ribeiro-dantas/)
+created a course full of
+[practical tips and tricks for learners of all levels](https://www.udemy.com/course/machine-learning-experiments-and-engineering-with-dvc/?referralCode=68BEB2A7E246A54E5E35).
+
+<external-link
+href="https://www.udemy.com/course/machine-learning-experiments-and-engineering-with-dvc/?referralCode=68BEB2A7E246A54E5E35"
+title="Machine Learning Experiments and Engineering with DVC"
+description="Automate machine learning experiments, pipelines and model deployment (CI/CD, MLOps) with Data Version Control (DVC)."
+link="udemy.com"
+image="../uploads/images/2020-12-18/udemy.png"/>
+
+### A proposal for Git-flow with DVC
+
+[Fabian Rabe](https://www.uni-augsburg.de/en/fakultaet/fai/informatik/prof/swtpvs/team/fabian-rabe/)
+at [Universität Augsburg](https://www.uni-augsburg.de/en/) wrote a killer doc
+about his team's tried-and-true approach to creating a workflow for a DVC
+project. He writes,
+
+> Over the past couple of months we have started using DVC in our small team.
+> With a handful of developers all coding, training models & committing in the
+> same repository, we soon realized the need for a workflow.
+
+The post outlines three strategies his team adopted:
+
+1. Create a "debugging dataset" containing a subset of your data, with which you
+   can test your complete DVC pipeline locally on a developer's machine
+
+2. Use CI-Runners to execute the DVC pipeline on the full dataset
+
+3. Adopt a naming convention for Git branches that correspond to machine
+   learning experiments, in addition to the usual feature branches
+
+Agree? Disagree? Fabian is actively soliciting feedback on his proposal (and
+possible solutions for some unresolved issues), so please read and
+[chime in on our discussion board](https://discuss.dvc.org/t/git-flow-for-dvc/578/6).
+
+<external-link
+href="https://git.rz.uni-augsburg.de/rabefabi/git-flow-for-dvc"
+title="Git Flow for DVC"
+description="Fabian Rabe"
+link="git.rz.uni-augsburg.de"
+image="../uploads/images/2020-12-18/universitat_augs.jpg"/>
+
+### Channel 9 talks Machine Learning and Python
+
+[The AI Show on Channel 9](https://channel9.msdn.com/Shows/AI-Show), part of the
+Microsoft DevRel universe, put out an episode all about ML and scientific
+computing with Python featuring [Tania Allard](https://twitter.com/ixek) and
+[Seth Juarez](https://twitter.com/sethjuarez). Their episode includes how DVC
+can fit in this development toolkit, so check it out!
+
+<iframe src="https://channel9.msdn.com/Shows/AI-Show/Machine-Learning-and-Scientific-Computing-with-Python/player" width="960" height="540" allowFullScreen frameBorder="0" title="Machine Learning and Scientific Computing with Python - Microsoft Channel 9 Video"></iframe>
+
+### A nice tweet
+
+We'll end on a tweet we love:
+
+https://twitter.com/iamjoyheron/status/1336698583689596929
+
+This beautiful diagram, made by [Joy Heron](https://twitter.com/iamjoyheron) in
+response to a talk by [Dr. Larysa Visengeriyeva](https://twitter.com/visenger)
+about MLOps, is a wonderful encapsulation of the many considerations (at many
+scales) that go into ML engineering. Do you see DVC in there? 🕵️
+
+Thank you for reading, and happy holidays to you! ❄️ 🎁 ☃️
diff --git a/content/blogs/2020-12-30-december-20-community-gems.md b/content/blogs/2020-12-30-december-20-community-gems.md
new file mode 100644
index 0000000000..7a34f5c178
--- /dev/null
+++ b/content/blogs/2020-12-30-december-20-community-gems.md
@@ -0,0 +1,196 @@
+---
+title: December '20 Community Gems
+date: 2020-12-30
+description: >
+  A roundup of technical Q&A's from the DVC community. This month, read about
+  custom DVC plots, teamwork with DVC, CML without Docker, and maintaining
+  several pipelines in parallel!
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month, read about
+  custom DVC plots, teamwork with DVC, CML without Docker, and maintaining
+  several pipelines in parallel!
+picture: 2020-12-30/cover.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/december-20-gems/606
+tags:
+  - Community Gems
+  - CML
+  - Plots
+  - Pipelines
+  - Docker
+---
+
+## DVC questions
+
+### [Q: Is there a way to plot all columns in a `.csv` file on a single graph using `dvc plot`?](https://discord.com/channels/485586884165107732/563406153334128681/768689062314770442)
+
+By default, `dvc plot` graphs one or two columns from the metric file of your
+choice (use the `-x` and `-y` flags to specify which columns).
+
+However, there's nothing special about the way DVC makes plots. The plot
+function is a wrapper for the [Vega-Lite](https://vega.github.io/vega-lite-v1/)
+grammar, which can make pretty much any kind of plot you can imagine. If you
+check inside `.dvc/plots/`, you'll see a few Vega-Lite template files- that's
+where the plotting instructions are stored!
+
+You can create your own, or modify the existing templates, by
+[following the instructions in our docs](https://dvc.org/doc/command-reference/plots#plot-templates).
+In short, you'll create a new template and then run
+`dvc plot show -t <name-of-template>` to use it!
+
+Vega-Lite has an
+[interactive template editor online](https://vega.github.io/editor/#/), which
+might help you test out ideas. Happy creating, and if you come up with a
+template you'd like to share with the DVC community,
+[consider opening a pull request!](https://github.com/iterative/dvc)
+
+### [Q: My teammate and I are having some issues keeping our workplaces synced. We're tracking some folders with DVC, and he recently added a new file to each of these folders. How does he update the tracked folder and push the new contents so I can access them, too?](https://discord.com/channels/485586884165107732/563406153334128681/785965719367843860)
+
+Your partner should first run
+
+```dvc
+$ dvc add <folder>
+$ dvc push
+```
+
+to update DVC about the new file and then push its contents to remote storage.
+Next, they'll run:
+
+```dvc
+$ git commit <folder>.dvc
+$ git push
+```
+
+to update your shared Git repository. Then you can do a `git pull` and
+`dvc pull` to sync the changes with your local workspace!
+
+### [Q: I forgot to declare a metric output in my `dvc.yaml` file, so one of my metrics is currently untracked. How can I fix this without rerunning the stage? It takes a long time to run.](https://discord.com/channels/485586884165107732/485596304961962003/781643749050155009)
+
+No problem- what you'll want to do is edit your `dvc.yaml` case and then run
+`dvc commit dvc.yaml` to store the change.
+
+`dvc commit` is a helpful function that updates your `dvc.lock` file and `.dvc`
+files as needed, which forces DVC to accept any modifications to tracked data
+currently in your workspace. That should cover the case where you have a metric
+file from your last pipeline run in your workspace, but forgot to add it to the
+`dvc.yaml` as an output!
+
+[Check out the docs](https://dvc.org/doc/command-reference/commit#commit) for
+more about `dvc commit` and how it can help you edit pipeline dependencies as
+you work.
+
+### [Q: Can I have multiple `dvc.yaml` files?](https://discord.com/channels/485586884165107732/485596304961962003/784083794583486496)
+
+Yes. The catch is that they have to be in separate directories. For example, you
+can define independent pipelines in a `dvc.yaml` file each. It's also possible
+to spread a single pipeline into more than one `dvc.yaml` file. DVC analyzes all
+of them to rebuild the DAG(s), for example during `dvc repro`.
+
+### [Q: I want to work on my DVC pipeline on a different computer than usual. For the stage I'm developing, I don't need access to all the data dependencies of the earlier stages- is there a way to download only what I need?](https://discord.com/channels/485586884165107732/563406153334128681/788068487246512158)
+
+Say for example that you have a pipeline like this:
+
+```
++----------+
+| data.dvc |
++----------+
+      *
+      *
+      *
+   +----+
+   | s1 |
+   +----+
+      *
+      *
+      *
+   +----+
+   | s2 |
+   +----+
+      *
+      *
+      *
+   +----+
+   | s3 |
+   +----+
+```
+
+where stage `s2` is frozen (meaning, its dependencies will not change and we can
+be reasonably sure the outputs of `s2` are static).
+
+To work on stage `s3` in a new workspace, you could run:
+
+```dvc
+$ dvc pull s2
+$ dvc repro s3
+```
+
+This set of commands will pull only the targeted stage (not the data
+corresponding to `data.dvc`), and then execute the final stage of your pipeline
+only.
+
+## CML questions
+
+### [Q: Why do you need Docker to run CML?](https://www.youtube.com/watch?v=rVq-SCNyxVc&lc=UgzohiMVxO1GKB30bad4AaABAg)
+
+Even though we use Docker in many of our tutorials, you technically _don't_ need
+it at all! Here's what's going on:
+
+We use a custom Docker container that comes with the CML functions installed (as
+well as some useful data science tools like Python, Vega-Lite, and CUDA
+drivers). If you want to use your own Docker container, that's fine too- just
+make sure you install the CML library of functions on your runner.
+
+To install CML as an `npm` package on your runner, we recommend:
+
+```dvc
+npm i -g @dvcorg/cml
+```
+
+Once this is done, you should be able to execute functions like `cml publish`
+and `cml send-comment` on your runner.
+
+For more tips about using CML without Docker,
+[see our docs](https://github.com/iterative/cml#install-cml-as-a-package).
+
+### [Q: I'm using CML to print a `dvc metrics diff` to my pull request in GitHub, but I'm getting an error: `token not found`. What does that mean?](https://discord.com/channels/485586884165107732/728693131557732403/786382971706933258)
+
+Generally, `token` refers to an authorization token that grants your runner
+certain permissions with the GitHub API- such as the ability to post a comment
+on your pull request. If you're working in GitHub, you don't have to follow any
+manual steps to create a token. But you _do_ need to make sure your
+environmental variables in the workflow are named properly.
+
+Make sure you've specified the following field in your workflow file:
+
+```yaml
+env:
+  repo_token: ${{ secrets.GITHUB_TOKEN }}
+```
+
+The variable must be called `repo_token` for CML to recognize it!
+
+A few other pointers:
+
+- In GitLab, you have to set a variable in your repository called `repo_token`
+  whose value is Personal Access token. We have
+  [step-by-step instructions in our docs](https://github.com/iterative/cml/wiki/CML-with-GitLab#variables).
+  Forgetting to set this is the #1 issue we see with first-time GitLab CI users!
+- In BitBucket Cloud, you need to set a variable in your repository called
+  `repo_token` whose value is your API credentials. We have
+  [detailed docs for creating this token](https://github.com/iterative/cml/wiki/CML-with-Bitbucket-Cloud#repository-variables),
+  too.
+- Need to see more sample workflows to get a feel for it? We have plenty
+  [of case studies](https://dvc.org/doc/cml#case-studies) to examine.
+
+### [Q: Is there any reason why an experimental DVC feature wouldn't work on the CML Docker container?](https://discord.com/channels/485586884165107732/728693131557732403/788512890394247178)
+
+Generally, no- the container `dvcorg/cml:latest` should have the latest DVC
+release and the latest CML release (you can see where DVC and CML are installed
+from in our
+[Dockerfile](https://github.com/iterative/cml/blob/master/Dockerfile)). So
+besides the time it takes for releases to be published on various package
+managers, there shouldn't be any lag. That means experimental features are ready
+to play on your runner!
+
+Note that you can also install pre-release versions of DVC- check out our
+[docs about installing the latest stable version ahead of official releases](https://dvc.org/doc/install/pre-release).
diff --git a/content/blogs/2021-01-20-january-21-dvc-heartbeat.md b/content/blogs/2021-01-20-january-21-dvc-heartbeat.md
new file mode 100644
index 0000000000..5267801455
--- /dev/null
+++ b/content/blogs/2021-01-20-january-21-dvc-heartbeat.md
@@ -0,0 +1,162 @@
+---
+title: January ’21 Heartbeat
+date: 2021-01-20
+description: >
+  Monthly updates are here! read all about  our new R language tutorial, putting
+  DVC  to work on an image segmentation pipeline, and a new fast way to setup
+  your DVC remote.
+descriptionLong: >
+  Monthly updates are here! read all about our new R language tutorial, putting
+  DVC to work on an image segmentation pipeline, and a new fast way to setup
+  your DVC remote.
+picture: 2021-01-20/cover.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/dvc-heartbeat-jan-21/632
+tags:
+  - Heartbeat
+  - CML
+  - DVC
+  - DAGsHub
+  - R
+  - MLOps
+---
+
+## News
+
+Welcome to the first Heartbeat of 2021! Here's some new year news.
+
+### We're still hiring
+
+Our search continues for a
+[**Developer Advocate**](https://weworkremotely.com/remote-jobs/iterative-developer-advocate)
+to support and inspire developers by creating new content like blogs, tutorials,
+and videos- plus lead outreach through meetups and conferences.
+
+Does this sound like you or someone you know? Be in touch!
+
+### 7000 stars on GitHub
+
+We recently passed 7000 stars on the
+[DVC GitHub repository](https://github.com/iterative/dvc)! We crossed the 7k
+mark extremely close to midnight on New Year's Eve, so we probably hit it in
+time for the new year in at least one time zone. Anyway, it made for a very
+suspenseful countdown to midnight. Woot woot!
+
+![Make Countdown GIF](https://media.giphy.com/media/QAPFLCrpfalPi/giphy.gif)
+
+The repo is HQ for DVC development, meaning- if you have an issue to report, a
+feature to request, or a pull request to offer, this is where you should start!
+
+### New video for R users
+
+A lot of our videos about GitHub Actions have used Python scripts, but there's
+no reason to restrict [Continuous Machine Learning](https://cml.dev) to one
+language. We've just released our first-ever R language video, which covers
+
+- How to install R on a GitHub Actions runner
+- How to manage R package dependencies for continuous integration (teaser: CRAN
+  binaries are amazing)
+- Putting a `ggplot` or a `kable` table in your pull request
+
+Watch and follow along! If you make something based on this approach, or if you
+think there's a better way, please tell us- we're eager to see what the R
+community thinks.
+
+https://youtu.be/NwUijrm2U2w
+
+### Workshops and talks
+
+On Friday, January 24, I (Elle) spoke with
+[Alexey Grigorev](https://twitter.com/Al_Grigor) (author of a
+[Data Science Bookcamp](https://mlbookcamp.com/)), on his podcast about being a
+developer advocate in the machine learning space! If you're curious about what
+the role entails, or what to look for when hiring a developer advocate for your
+machine learning project, please come by. The event is up on YouTube, and will
+soon be available as a podcast for your listening pleasure 🎧
+
+https://youtu.be/jv5W4jXk4P4
+
+## From the community
+
+As ever, we have much to share from the great citizens of the DVC community.
+
+### Where's Baby Yoda?
+
+There's a brand new blog post we love, and only half of that has to do with its
+impressive collection of Baby Yoda pics.
+[Simon Lousky](https://dagshub.com/blog/author/simon/), developer at
+[DAGsHub](https://dagshub.com), published a blog provocatively titled
+[_Datasets should behave like git repositories_](https://dagshub.com/blog/datasets-should-behave-like-git-repositories/).
+He writes:
+
+> While data versioning solves the problem of managing data in the context of
+> your machine learning project, it brings with it a new approach to managing
+> datasets. This approach, also described as data registries here, consists of
+> creating a git repository entirely dedicated to managing a dataset. This means
+> that instead of training models on frozen datasets - something researchers,
+> students, kagglers, and open source machine learning contributors often do -
+> you could link your project to a dataset (or to any file for that matter), and
+> treat it as a dependency. After all, data can and should be treated as code,
+> and follow through a review process.
+
+We agree! Lousky goes on to show us a brilliant code example wherein he segments
+instances of Baby Yoda out of frames from The Mandalorian. DVC plays a key role
+in keeping track of all the Baby Yodas, which is pretty much the most important
+use case we could've imagined.
+
+![](../uploads/images/2021-01-20/bb_yoda.png)_Found them!_
+
+There's also a
+[lively discussion about the post on Reddit](https://www.reddit.com/r/MachineLearning/comments/l0l0oc/p_datasets_should_behave_like_git_repositories/).
+Check it out and consider contributing your own Baby Yoda image annotations to
+grow the dataset!
+
+### Data Version Control Explained
+
+Researcher [Nimra Ejaz](https://blog.crowdbotics.com/author/nimra/) published a
+fantastically detailed introduction to DVC. She even included a "History of DVC"
+section, which is pretty cool for us- this might be a first!
+
+Her blog covers not only the key features of DVC, but a thoughtful pros-and-cons
+list _and_ a case study about using DVC in an image classification project. If
+you want an up-to-date, high-level overview of DVC and some help deciding if it
+fits your needs, I couldn't recommend Nimra's blog more.
+
+<external-link
+href="https://blog.crowdbotics.com/data-version-control-explained/"
+title="Data Version Control Explained"
+description="Nimra Ejaz"
+link="crowdbotics.com"
+image="../uploads/images/2021-01-20/crowdbotics.png"/>
+
+### One more thing from DAGsHub
+
+[Dean Pleban](https://twitter.com/DeanPlbn), CEO of DAGsHub, shared an important
+update: they now offer FREE dataset and model hosting for DVC projects (up to 10
+GB per user and project, with flexibility for public projects)! And with no
+configuration!
+
+That means you don't have to configure your DVC remote to use DVC with model and
+data storage in the cloud- DAGsHub will handle _all_ of it. Your DVC remote can
+be added as easily as a Git remote, in other words. Read the announcement, and
+then dig into their
+[basic tutorial](https://dagshub.com/docs/experiment-tutorial/overview/) to get
+started.
+
+<external-link
+href="https://dagshub.com/blog/dagshub-storage-zero-configuration-dataset-model-hosting/"
+title="Free Dataset & Model Hosting with Zero Configuration – Launching DAGsHub Storage"
+description="Dean Pleban"
+link="dagshub.com"
+image="../uploads/images/2021-01-20/dagshub.jpg"/>
+
+### A nice tweet
+
+[Bilgin Ibryam](https://twitter.com/bibryam), author of the
+[Kubernetes Patterns](https://www.redhat.com/en/engage/kubernetes-containers-architecture-s-201910240918)
+book, gave us a shoutout for being an interesting data engineering project
+(according to a list by another expert we trust,
+[Dmitry Ryabov](https://twitter.com/squarecog)). Thanks Bilgin and Dmitry, we
+think you're very interesting too!
+
+https://twitter.com/bibryam/status/1341777034448650242
diff --git a/content/blogs/2021-01-26-january-21-community-gems.md b/content/blogs/2021-01-26-january-21-community-gems.md
new file mode 100644
index 0000000000..5da60d717b
--- /dev/null
+++ b/content/blogs/2021-01-26-january-21-community-gems.md
@@ -0,0 +1,180 @@
+---
+title: January '21 Community Gems
+date: 2021-01-26
+description: >
+  A roundup of technical Q&A's from the DVC community. This month: parallelize
+  your data transfer,  compressed datasets, and DVC pipelines in CI/CD.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month: parallelize
+  your data transfer, compressed datasets, and DVC pipelines in CI/CD.
+picture: 2021-01-28/gems-cover.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/january-21-community-gems/645
+tags:
+  - Community Gems
+  - CML
+  - Plots
+  - Pipelines
+  - Docker
+---
+
+## DVC questions
+
+### [Q: Is there an equivalent of `git restore <file>` for DVC?](https://discord.com/channels/485586884165107732/563406153334128681/799598181310267392)
+
+Yes! You'll want `dvc checkout`. It restores the corresponding verion of your
+DVC-tracked file or directory from
+[the cache](https://dvc.org/doc/user-guide/dvc-internals#structure-of-the-cache-directory)
+to your local workspace.
+[Read up in our docs for more info!](https://dvc.org/doc/command-reference/checkout#checkout)
+
+### [Q: My dataset is made of more than _a million_ small files. Can I use an archive format, like `tar.gz` with DVC?](https://discord.com/channels/485586884165107732/485596304961962003/798983422965841920)
+
+There are some downsides to using archive formats, and often we discourage it-
+but let's review some factors to consider, so you can make the best choice for
+your project.
+
+- If your `tar.gz` file changes at all- perhaps because you changed a single
+  file before zipping- you'll end up with an entirely new copy of the archive
+  every time you commit! This is not very space efficient, but if space isn't an
+  issue it might not be a dealbreaker.
+- Because of the way we optimize data transfer, you'll end up transferring the
+  whole archive anytime you modify a single file and `dvc push`/`dvc pull`.
+- In general, archives don't play nice with the concept of diffs. Looking back
+  at your git history, it can be challenging to log how files were deleted,
+  modified, or added when you're versioning archives.
+
+While we can't do much about the general issues that archives present for
+version control systems, DVC does have some options that might help you achieve
+better data transfer speeds. We recommend exploring DVC's built-in parallelism-
+data transfer functions like `dvc push` and `dvc pull` have a flag (`-j`) for
+increasing the number of jobs run simultaneously.
+[Check out the docs for more details](https://dvc.org/doc/command-reference/push#options).
+
+In summary, the advantage of using an archive format will depend on both how
+often you modify your dataset and how often you need to push and pull data. You
+might consider exploring both approaches (with and without compression) and run
+some speed tests for your use case. We'd love to know what you find!
+
+### [Q: My DVC remote is a server with a self-signed certificate. When I push data, DVC is giving me an SSL verification error- how can I get around this?](https://discord.com/channels/485586884165107732/563406153334128681/800707271502856222)
+
+On S3 or S3-compatible storage, you can configure your AWS CLI to use a custom
+certificate path.
+[As suggested by their docs](https://docs.aws.amazon.com/credref/latest/refdocs/setting-global-ca_bundle.html),
+you can also set the environment variable `AWS_CA_BUNDLE` to your `.pem` file.
+
+Similarly, on HTTP and Webdav remotes, there's `REQUESTS_CA_BUNDLE` environment
+variable that you can set your self-signed certificate file to.
+
+Then, when DVC tries to access your storage, you should be able to get past SSL
+verification!
+
+### [Q: I want to be able to make my own plots in Python with data points from my `dvc plots`, including older versions of those plots. What do you recommend to get the raw historical data?](https://discord.com/channels/485586884165107732/563406153334128681/799617584336338954)
+
+We suggest
+
+```python
+from git import Repo
+
+revs = Repo().plots.collect(revs=revs)
+```
+
+Then you can plot the data contained in `revs` to your heart's content!
+
+### [Q: Is it safe to share a DVC remote between two projects or registries?](https://discord.com/channels/485586884165107732/563406153334128681/799216349405904896)
+
+You can share a remote with as many projects as you like. Because DVC uses
+content-addressible storage, you'll still get benefits like file deduplication
+over every project that uses the remote. This can be useful if you're likely to
+have many shared files across projects.
+
+One big thing to watch out for: you have to be very careful with clearing the
+DVC cache. Make sure you don't remove files associated with another project when
+running `dvc gc` by using the `--projects` flag.
+[Read up in the docs!](https://dvc.org/doc/command-reference/gc#options)
+
+### [Q: Can I throttle the number of simultaneous uploads to remote storage with DVC?](https://discord.com/channels/485586884165107732/563406153334128681/802099863076208662)
+
+Yep! That'll be the `-j/--jobs` flag, for example:
+
+```dvc
+$ dvc push -j <number>
+```
+
+will control the number of simultaneous uploads DVC attempts when pushing files
+to your remote storage
+([see more in our docs](https://dvc.org/doc/command-reference/push#push)).
+
+## CML questions
+
+### [Q: I have a DVC pipeline that I want to run in CI/CD. Specifically, I only want to reproduce the stages that have changed since my last commit. What do I do?](https://discord.com/channels/485586884165107732/728693131557732403/796185815574511616)
+
+DVC pipelines, like makefiles, will only reproduce stages that DVC detects have
+changed since the last commit. So to do this in CI/CD systems like GitHub
+Actions or GitLab CI, you'll want to make sure the workflow a) syncs the runner
+with the latest version of your pipeline, including all inputs and dependencies,
+and b) reruns your DVC pipeline.
+
+In practice, your workflow needs to include these two commands:
+
+```dvc
+$ dvc pull
+$ dvc repro
+```
+
+You pull the latest version of your pipeline, inputs and dependencies from cloud
+storage with `dvc pull`, and then `dvc repro` intelligently reproduces the
+pipeline (meaning, it should avoid rerunning stages that haven't changed since
+the last commit).
+
+Check out an
+[example workflow here](https://github.com/iterative/cml_dvc_case/blob/master/.github/workflows/cml.yaml).
+
+### [Q: I'm using DVC and CML to pull data from cloud storage, then train a model. I want to push the trained model into cloud storage when I'm done, what should I do?](https://discord.com/channels/485586884165107732/728693131557732403/801553810618187796)
+
+One approach is to run
+
+```dvc
+$ dvc add <model>
+$ dvc push <model>
+```
+
+to the end of your workflow. This will push the model file, but there's a
+downside: it won't keep a strong link between the pipeline (meaning, the command
+you used to generate the model and any code/data dependencies) and the model
+file.
+
+What we recommend is that you create a
+[DVC pipeline](https://dvc.org/doc/start/data-pipelines#get-started-data-pipelines)
+with one stage- training your model- and declaring your model file as an output.
+Then, your workflow can look like this:
+
+```dvc
+# get data
+$ dvc pull --run-cache
+
+# run the pipeline
+$ dvc repro
+
+# push to remote storage
+$ dvc push --run-cache
+```
+
+When you do this workflow with the `--run-cache` flags, you'll be able to save
+all the results of the pipeline in the cloud
+([read more here](https://dvc.org/doc/command-reference/push#options)). When the
+run has completed, you can go to your local workspace and run:
+
+```dvc
+$ dvc pull --run-cache
+$ dvc repro
+```
+
+This will put your model in your local workspace! And, you get an immutable link
+between the code version, data version and model you end up with.
+
+We recommend this approach so you don't lose track of how model files relate to
+the data and code that produced them. It's a little more work to set up, but
+Future You will thank you!
+
+![Tim Robinson Reaction GIF by The Lonely Island](https://media.giphy.com/media/l0LEIXSRRuv9QQIRNI/giphy.gif)
diff --git a/content/blogs/2021-02-16-february-21-dvc-heartbeat.md b/content/blogs/2021-02-16-february-21-dvc-heartbeat.md
new file mode 100644
index 0000000000..295165733c
--- /dev/null
+++ b/content/blogs/2021-02-16-february-21-dvc-heartbeat.md
@@ -0,0 +1,177 @@
+---
+title: February ’21 Heartbeat
+date: 2021-02-16
+description: >
+  Monthly updates are here! Read all about our growing team, our CEO's interview
+  on The New Stack, integration with spaCy and more!
+descriptionLong: >
+  Monthly updates are here! Read all about our growing team, our CEO's interview
+  on The New Stack, integration with spaCy and more!
+picture: 2021-02-16/feb21cover.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/february-21-heartbeat/669
+tags:
+  - Heartbeat
+  - CML
+  - DVC
+  - DAGsHub
+  - spaCy
+  - ML Summit 2021
+  - Spell
+  - MLOps
+---
+
+## News
+
+Happy February! Here's all the news to keep you up to date.
+
+## We've hired and are still hiring!
+
+We have four new team members this month!
+
+[**Dave Berenbaum**](https://www.linkedin.com/in/david-berenbaum-20b6b424/) came
+to Iterative.ai by way of a
+[previous contribution](https://github.com/iterative/dvc/pull/2107) to our open
+source products while working as a Data Science Manager at Captial One. He joins
+the team as a Technical Product Manager. We are thrilled he's here!
+
+[**Batuhan Taskaya**](https://www.linkedin.com/in/batuhan-osman-taskaya-7803b61a0/)
+joins us as a DVC Software Engineer working on the Python core. Batuhan is
+excited to work on open source full time and we are excited to have him do so!
+
+[**Jeny De Figueiredo**](https://www.linkedin.com/in/jenifer-de-figueiredo/) is
+involved in the Seattle area data science community at Data Circles and is a
+WiDS Puget Sound Ambassador. She joins us as our new Community Manager and is
+looking forward to further building and engaging the community in MLOps! (Hi!
+This is me. 🙋🏻‍♀️ I'll be writing Heartbeat!)
+
+[**Roger Parent**](https://www.linkedin.com/in/rogermparent/) has already been a
+big part of building DVC and [CML](https://cml.dev/). He has been a primary
+developer of a UI that interfaces with the DVC Python application to provide an
+interface with the Experiments feature that's coming out with DVC 2.0. We are so
+excited to have him joining us full time as Software Engineer.
+
+![Search](https://media.giphy.com/media/vAvWgk3NCFXTa/giphy.gif)
+
+## Open Positions
+
+We are on the hunt for a
+[TypeScript Front-End Engineer](https://docs.google.com/document/d/1aT5HZYt4kAUxXqD4JNTe3jPDlVUwSmnEWDPR2QoKdvo/edit)
+to build SaaS and a VS Code UI for our popular machine learning tools: DVC and
+CML. The ML tools ecosystem is what JS space was 10 years ago. Come join us on
+this exciting project!
+
+Our search continues for a
+[Developer Advocate](https://weworkremotely.com/remote-jobs/iterative-developer-advocate)
+to support and inspire developers by creating new content like blogs, tutorials,
+and videos - plus lead outreach through meetups and conferences.
+
+Does this sound like you or someone you know? Be in touch!
+
+## Iterative.ai Featured on The New Stack
+
+[Susan Hall](https://thenewstack.io/author/susanhall/) of
+[The New Stack.io](https://thenewstack.io/) interviewed our very own CEO,
+[Dmitry Petrov](https://twitter.com/fullstackml), discussing the needs of ML
+engineers and how Iterative.ai makes tools to enable version control and CI/CD
+for versioning data and ML models.
+
+> "ML engineers, they still need collaboration. They need GitHub for
+> collaboration, they need this CI/CD system to resolve [issues] between each
+> other, between the team and productions system." - Dmitry Petrov
+
+<external-link
+href="https://thenewstack.io/iterative-ai-git-based-machine-learning-tools-for-data-engineers/"
+title="Learning Tools for ML Engineers"
+description="Susan Hall"
+link="thenewstack.io"
+image="../uploads/images/2021-02-16/newstack_image.png"/>
+
+## Workshops and Talks
+
+### Developer Advocacy for Data Science
+
+So you saw the post further up. 👆🏽 Curious about developer advocacy or what to
+look for in a hire for this position?
+[Elle O'Brien](https://twitter.com/drelleobrien) dove into this recently with
+[Alexey Grigorev](https://twitter.com/Al_Grigor) (author of a
+[Data Science Bookcamp](https://mlbookcamp.com/))
+[in this podcast](https://www.youtube.com/watch?v=jv5W4jXk4P4) on
+[DataTalks.club](http://datatalks.club/) You can watch it here below. 👇🏼
+
+https://www.youtube.com/watch?v=jv5W4jXk4P4
+
+## From the Community
+
+As ever, we have much to share from the great citizens of the DVC community.
+
+### spaCy and DVC Integration
+
+If your NLP team uses spaCy to manage your projects, with spaCy's release of
+v3.0, you can now enjoy DVC integration to manage your workflow like Git! Check
+out the [documentation here](https://spacy.io/usage/projects#integrations) to
+streamline and track your process! 🏆
+
+<external-link
+href="https://spacy.io/usage/projects#integrations/"
+title="spaCy Integration"
+description="spaCy Integration with DVC"
+link="spacy.io"
+image="../uploads/images/2021-02-16/spacy_integration.jpg"/>
+
+### DagsHub and DVC Integrations
+
+This month two great articles came out regarding the integration of DAGsHub and
+DVC. First, this article: [Datasets Should Behave Like Git Repo walks you
+through the steps to use DVC in your data versioning. The following image shows
+the dependencies and how you simply need to do a `dvc update` each time your
+dataset or model changes to track the process.
+
+<external-link
+href="https://dagshub.com/blog/datasets-should-behave-like-git-repositories/"
+title="Datasets Should Behave Like Git Repositories"
+description="Steps to use DVC in your data versioning"
+link="dagshub.com"
+image="../uploads/images/2021-02-16/dagshub-logo.png"/>
+
+### Did you say "Works Out of the Box?"
+
+Also from DAGsHub, by CEO [Dean Pleban](https://twitter.com/DeanPlbn),
+[Free Dataset & Model Hosting with Zero Configuration - Launching DAGsHub Storage](https://dagshub.com/blog/dagshub-storage-zero-configuration-dataset-model-hosting/)
+tells how their new DAGsHub storage is a DVC remote that requires zero
+configuration (!) and will allow for team and organization access controls as
+well as easy visibility.
+
+![Friends](https://media.giphy.com/media/Ftz07proVX6Rq/giphy.gif)
+
+### Model Management and ML Workflow Orchestration with DVC and Apache Airflow 🇩🇪 ❗️
+
+We're really excited about a German language workshop led by
+[Matthias Niehoff](https://twitter.com/matthiasniehoff)! The workshop will be a
+part of the ML Summit 2021 taking place April 19-21st, but registration closes
+February 18th. So time is ticking. ⏰ The Conference is online, but will be in
+German. For more info, head here 👉🏽 for the
+[Workshop Details](https://ml-summit.de/machine-learing/modellmanagement-und-ml-workflow-orchestrierung-mit-dvc-und-apache-airflow/).
+
+### "_The_ most popular 'N+1' tool used by teams on Spell"
+
+[Using DVC as a Lightweight Feature Store on Spell](https://spell.ml/blog/using-dvc-with-spell-YBHOChEAACgAaSmV)
+by [Aleksey Bilogur](https://twitter.com/ResidentMario) , reviews the process of
+using DVC with Spell for managing changing datasets, enabling team-wide data
+reproducibility and why Spell fans are DVC fans, and vice versa. 🔄
+
+![Fans](https://media.giphy.com/media/GM8PrUsm92hRC/giphy.gif)
+
+## Tweet Love ❤️
+
+https://twitter.com/mihail_eric/status/1357014486377324547?s=20
+
+You're all caught up! See you at the next Community Gems 💎!
+
+---
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2021-02-18-dvc-2-0-pre-release.md b/content/blogs/2021-02-18-dvc-2-0-pre-release.md
new file mode 100644
index 0000000000..2752f229f1
--- /dev/null
+++ b/content/blogs/2021-02-18-dvc-2-0-pre-release.md
@@ -0,0 +1,574 @@
+---
+title: DVC 2.0 Pre-Release
+date: 2021-02-17
+description: >
+  Today, we're announcing DVC 2.0 pre-release. We'll share lessons from our
+  journey and how these will be reflected in the coming release.
+descriptionLong: >
+  The new release is a result of our learning from our users. There are four
+  major features coming:
+
+  🔗 ML pipeline templating and iterative foreach stages
+
+  🧪 Lightweight ML experiments
+
+  📍 ML model checkpoints
+
+  📈 Dvc-live - new open-source library for metrics logging
+picture: 2021-02-18/dvc-2-0-pre-release.png
+pictureComment: DVC 2.0 Pre-Release
+author: dmitry_petrov
+commentsUrl: https://discuss.dvc.org/t/dvc-2-0-pre-release/681
+tags:
+  - Release
+  - MLOps
+  - DataOps
+---
+
+## Install
+
+First things first. You can install the 2.0 pre-release from the master branch
+in our repo (instruction [here](https://dvc.org/doc/install/pre-release)) or
+through pip:
+
+```dvc
+$ pip install --upgrade --pre dvc
+```
+
+## ML pipelines parameterization and foreach stages
+
+After introducing the multi-stage pipeline file `dvc.yaml`, it was quickly
+adopted among our users. The DVC team got tons of positive feedback from them,
+as well as feature requests.
+
+### Pipeline parameters from `vars`
+
+The most requested feature was the ability to use parameters in `dvc.yaml`. For
+example. So, you can pass the same seed value or filename to multiple stages in
+the pipeline.
+
+```yaml
+vars:
+    train_matrix: train.pkl
+    test_matrix: test.pkl
+    seed: 20210215
+
+...
+
+stages:
+    process:
+        cmd: python process.py \
+                --seed ${seed} \
+                --train ${train_matrix} \
+                --test ${test_matrix}
+        outs:
+        - ${test_matrix}
+        - ${train_matrix}
+
+        ...
+
+    train:
+        cmd: python train.py ${train_matrix} --seed ${seed}
+        deps:
+        - ${train_matrix}
+```
+
+Also, it gives an ability to localize all the important parameters in a single
+`vars` block and play with them. This is a natural thing to do for scenarios
+like NLP or when hyperparameter optimization is happening not only in the model
+training code but in the data processing as well.
+
+### Pipeline parameters from params files
+
+It is quite common to define pipeline parameters in a config file or a
+parameters file (like `params.yaml`) instead of in the pipeline file `dvc.yaml`
+itself. These parameters defined in `params.yaml` can also be used in
+`dvc.yaml`.
+
+```yaml
+# params.yaml
+models:
+  us:
+    thresh: 10
+    filename: 'model-us.hdf5'
+```
+
+```yaml
+# dvc.yaml
+stages:
+  build-us:
+    cmd: >-
+      python script.py
+        --out ${models.us.filename}
+        --thresh ${models.us.thresh}
+    outs:
+      - ${models.us.filename}
+```
+
+DVC properly tracks params dependencies for each stage starting from the
+previous DVC version 1.0. See the
+[`--params` option](https://dvc.org/doc/command-reference/run#for-displaying-and-comparing-data-science-experiments)
+of `dvc run` for more details.
+
+### Iterating over params with foreach stages
+
+Iterating over params was a frequently requested feature. Now users can define
+multiple similar stages with a templatized command.
+
+```yaml
+stages:
+  build:
+    foreach:
+      gb:
+        thresh: 15
+        filename: 'model-gb.hdf5'
+      us:
+        thresh: 10
+        filename: 'model-us.hdf5'
+    do:
+      cmd: >-
+        python script.py --out ${item.filename} --thresh ${item.thresh}
+      outs:
+        - ${item.filename}
+```
+
+## Lightweight ML experiments
+
+DVC uses Git versioning as the basis for ML experiments. This solid foundation
+makes each experiment reproducible and accessible from the project's history.
+This Git-based approach works very well for ML projects with mature models when
+only a few new experiments per day are run.
+
+However, in more active development, when dozens or hundreds of experiments need
+to be run in a single day, Git creates overhead — each experiment run requires
+additional Git commands `git add/commit`, and comparing all experiments is
+difficult.
+
+We introduce lightweight experiments in DVC 2.0! This is how you can auto-track
+ML experiments without any overhead from ML engineers.
+
+⚠️ Note, our new ML experiment features (`dvc exp`) are experimental in the
+coming release. This means that the commands might change a bit in the following
+minor releases.
+
+`dvc exp run` can run an ML experiment with a new hyperparameter from
+`params.yaml` while `dvc exp diff` shows metrics and params difference:
+
+```dvc
+$ dvc exp run --set-param featurize.max_features=3000
+
+Reproduced experiment(s): exp-bb55c
+Experiment results have been applied to your workspace.
+
+$ dvc exp diff
+Path         Metric    Value    Change
+scores.json  auc       0.57462  0.0072197
+
+Path         Param                   Value    Change
+params.yaml  featurize.max_features  3000     1500
+```
+
+More experiments:
+
+```dvc
+$ dvc exp run --set-param featurize.max_features=4000
+Reproduced experiment(s): exp-9bf22
+Experiment results have been applied to your workspace.
+
+$ dvc exp run --set-param featurize.max_features=5000
+Reproduced experiment(s): exp-63ee0
+Experiment results have been applied to your workspace.
+
+$ dvc exp run --set-param featurize.max_features=5000 \
+                --set-param featurize.ngrams=3
+Reproduced experiment(s): exp-80655
+Experiment results have been applied to your workspace.
+```
+
+In the examples above, hyperparameters were changed with the `--set-param`
+option, but you can make these changes by modifying the params file instead. In
+fact _any code or data files can be changed_ and `dvc exp run` will capture the
+variations.
+
+See all the runs:
+
+```dvc
+$ dvc exp show --no-pager --no-timestamp \
+        --include-params featurize.max_features,featurize.ngrams
+```
+
+```dvctable
+ ─────────────────────────────────────────────────────────────────────
+  **Experiment**          **auc**   **featurize.max_features**   **featurize.ngrams**
+ ─────────────────────────────────────────────────────────────────────
+  workspace       0.56359   5000                     3
+  master           0.5674   1500                     2
+  ├── exp-80655   0.56359   5000                     3
+  ├── exp-63ee0    0.5515   5000                     2
+  ├── exp-9bf22   0.56448   4000                     2
+  └── exp-bb55c   0.57462   3000                     2
+ ─────────────────────────────────────────────────────────────────────
+```
+
+Under the hood, DVC uses Git to store the experiments' meta-information. A
+straight-forward implementation would create visible branches and auto-commit in
+them, but that approach would over-pollute the branch namespace very quickly. To
+avoid this issue, we introduced custom Git references `exps`, the same way as
+GitHub uses custom references `pulls` to track pull requests (this is an
+interesting technical topic that deserves a separate blog post). Below you can
+see how it works.
+
+No artificial branches, only custom references `exps` (do not worry if you don't
+understand this part - it is an implementation detail):
+
+```dvc
+$ git branch
+* master
+
+$ git show-ref
+5649f62d845fdc29e28ea6f7672dd729d3946940 refs/exps/exec/EXEC_APPLY
+5649f62d845fdc29e28ea6f7672dd729d3946940 refs/exps/exec/EXEC_BRANCH
+5649f62d845fdc29e28ea6f7672dd729d3946940 refs/exps/71/67904d89e116f28daf7a6e4c0878268117c893/exp-80655
+f16e7b7c804cf52d91d1d11850c15963fb2a8d7b refs/exps/97/d69af70c6fb4bc59aefb9a87437dcd28b3bde4/exp-63ee0
+0566d42cddb3a8c4eb533f31027f0febccbbc2dd refs/exps/91/94265d5acd847e1c439dd859aa74b1fc3d73ad/exp-bb55c
+9bb067559583990a8c5d499d7435c35a7c9417b7 refs/exps/49/5c835cd36772123e82e812d96eabcce320f7ec/exp-9bf22
+```
+
+The best experiment can be promoted to the workspace and committed to Git.
+
+```dvc
+$ dvc exp apply exp-bb55c
+$ git add .
+$ git commit -m 'optimize max feature size'
+```
+
+Alternatively, an experiment can be promoted to a branch (`big_fr_size` branch
+in this case):
+
+```dvc
+$ dvc exp branch exp-80655 big_fr_size
+Git branch 'big_fr_size' has been created from experiment 'exp-c695f'.
+To switch to the new branch run:
+
+	git checkout big_fr_size
+```
+
+Remove all the experiments that were not used:
+
+```dvc
+$ dvc exp gc --workspace --force
+```
+
+## Model checkpoints
+
+ML model checkpoints are an essential part of deep learning. ML engineers prefer
+to save the model files (or weights) at checkpoints during a training process
+and return back when metrics start diverging or learning is not fast enough.
+
+The checkpoints create a different dynamic around ML modeling process and need a
+special support from the toolset:
+
+1. Track and save model checkpoints (DVC outputs) periodically, not only the
+   final result or training epoch.
+2. Save metrics corresponding to each of the checkpoints.
+3. Reuse checkpoints - warm-start training with an existing model file,
+   corresponding code, dataset version and metrics.
+
+This new behavior is supported in DVC 2.0. Now, DVC can version all your
+checkpoints with corresponding code and data. It brings the reproducibility of
+DL processes to the next level - every checkpoint is reproducible.
+
+This is how you define checkpoints with live-metrics:
+
+```dvc
+$ dvc stage add -n train \
+        -d users.csv -d train.py \
+        -p dropout,epochs,lr,process \
+        --checkpoint model.h5 \
+        --live logs \
+    python train.py
+
+Creating 'dvc.yaml'
+Adding stage 'train' in 'dvc.yaml'
+```
+
+Note, we use `dvc stage add` command instead of `dvc run`. Starting from DVC 2.0
+we begin extracting all stage specific functionality under `dvc stage` umbrella.
+`dvc run` is still working, but will be deprecated in the following major DVC
+version (most likely in 3.0).
+
+Start the training process and interrupt it after 5 epochs:
+
+```dvc
+$ dvc exp run
+'users.csv.dvc' didn't change, skipping
+Running stage 'train':
+> python train.py
+...
+^CTraceback (most recent call last):
+...
+KeyboardInterrupt
+```
+
+Navigate in checkpoints:
+
+```dvc
+$ dvc exp show --no-pager --no-timestamp
+```
+
+```dvctable
+ ──────────────────────────────────────────────────────────────────────
+  **Experiment**      **step**     **loss**   **accuracy**   **val_loss**   **…**   **epochs**   **…**
+ ──────────────────────────────────────────────────────────────────────
+  workspace          4   2.0702    0.30388      2.025   …   5        …
+  master             -        -          -          -   …   5        …
+  │ ╓ exp-e15bc      4   2.0702    0.30388      2.025   …   5        …
+  │ ╟ 5ea8327        4   2.0702    0.30388      2.025   …   5        …
+  │ ╟ bc0cf02        3   2.1338    0.23988     2.0883   …   5        …
+  │ ╟ f8cf03f        2   2.1989    0.17932     2.1542   …   5        …
+  │ ╟ 7575a44        1   2.2694    0.12833      2.223   …   5        …
+  ├─╨ a72c526        0   2.3416     0.0959     2.2955   …   5        …
+ ──────────────────────────────────────────────────────────────────────
+```
+
+Each of the checkpoints above is a separate experiment with all data, code,
+paramaters and metrics. You can use the same `dvc exp apply` command to extract
+any of these.
+
+Another run continues this process. You can see how accuracy metrics are
+increasing - DVC does not remove the model/checkpoint and training code trains
+on top of it:
+
+```dvc
+$ dvc exp run
+Existing checkpoint experiment 'exp-e15bc' will be resumed
+...
+^C
+KeyboardInterrupt
+
+$ dvc exp show --no-pager --no-timestamp
+```
+
+```dvctable
+ ──────────────────────────────────────────────────────────────────────
+  **Experiment**      **step**     **loss**   **accuracy**   **val_loss**   **…**   **epochs**   **…**
+ ──────────────────────────────────────────────────────────────────────
+  workspace          9   1.7845    0.58125     1.7381   …   5        …
+  master             -        -          -          -   …   5        …
+  │ ╓ exp-e15bc      9   1.7845    0.58125     1.7381   …   5        …
+  │ ╟ 205a8d3        9   1.7845    0.58125     1.7381   …   5        …
+  │ ╟ dd23d96        8   1.8369    0.54173     1.7919   …   5        …
+  │ ╟ 5bb3a1f        7   1.8929    0.49108     1.8474   …   5        …
+  │ ╟ 6dc5610        6    1.951    0.43433     1.9046   …   5        …
+  │ ╟ a79cf29        5   2.0088    0.36837     1.9637   …   5        …
+  │ ╟ 5ea8327        4   2.0702    0.30388      2.025   …   5        …
+  │ ╟ bc0cf02        3   2.1338    0.23988     2.0883   …   5        …
+  │ ╟ f8cf03f        2   2.1989    0.17932     2.1542   …   5        …
+  │ ╟ 7575a44        1   2.2694    0.12833      2.223   …   5        …
+  ├─╨ a72c526        0   2.3416     0.0959     2.2955   …   5        …
+ ──────────────────────────────────────────────────────────────────────
+```
+
+After modifying the code, data, or params, the same process can be resumed. DVC
+recognizes the change and shows it (see experiment `b363267`):
+
+```dvc
+$ vi train.py     # modify code
+$ vi params.yaml  # modify params
+
+$ dvc exp run
+Modified checkpoint experiment based on 'exp-e15bc' will be created
+...
+
+$ dvc exp show --no-pager --no-timestamp
+```
+
+```dvctable
+ ──────────────────────────────────────────────────────────────────────────────
+  **Experiment**              **step**     **loss**   **accuracy**   **val_loss**   **…**   **epochs**   **…**
+ ──────────────────────────────────────────────────────────────────────────────
+  workspace                 13   1.5841    0.69262     1.5381   …   15       …
+  master                     -        -          -          -   …   5        …
+  │ ╓ exp-7ff06             13   1.5841    0.69262     1.5381   …   15       …
+  │ ╟ 6c62fec               12   1.6325    0.67248     1.5857   …   15       …
+  │ ╟ 4baca3c               11   1.6817    0.64855     1.6349   …   15       …
+  │ ╟ b363267 (2b06de7)     10   1.7323    0.61925     1.6857   …   15       …
+  │ ╓ 2b06de7                9   1.7845    0.58125     1.7381   …   5        …
+  │ ╟ 205a8d3                9   1.7845    0.58125     1.7381   …   5        …
+  │ ╟ dd23d96                8   1.8369    0.54173     1.7919   …   5        …
+  │ ╟ 5bb3a1f                7   1.8929    0.49108     1.8474   …   5        …
+  │ ╟ 6dc5610                6    1.951    0.43433     1.9046   …   5        …
+  │ ╟ a79cf29                5   2.0088    0.36837     1.9637   …   5        …
+  │ ╟ 5ea8327                4   2.0702    0.30388      2.025   …   5        …
+  │ ╟ bc0cf02                3   2.1338    0.23988     2.0883   …   5        …
+  │ ╟ f8cf03f                2   2.1989    0.17932     2.1542   …   5        …
+  │ ╟ 7575a44                1   2.2694    0.12833      2.223   …   5        …
+  ├─╨ a72c526                0   2.3416     0.0959     2.2955   …   5        …
+ ──────────────────────────────────────────────────────────────────────────────
+```
+
+Sometimes you might need to train the model from scratch. The reset option
+removes the checkpoint file before training: `dvc exp run --reset`.
+
+## Metrics logging
+
+Continuously logging ML metrics is a very common practice in the ML world.
+Instead of a simple command-line output with the metrics values, many ML
+engineers prefer visuals and plots. These plots can be organized in a "database"
+of ML experiments to keep track of a project. There are many special solutions
+for metrics collecting and experiment tracking such as sacred, mlflow, weight
+and biases, neptune.ai, or others.
+
+With DVC 2.0, we are releasing a new open-source library
+[DVC-Live](https://github.com/iterative/dvclive) that provides functionality for
+tracking model metrics and organizing metrics in simple text files in a way that
+DVC can visualize the metrics with navigation in Git history. So, DVC can show
+you a metrics difference between the current model and a model in `master` or
+any other branch.
+
+This approach is similar to the other metrics tracking tools with the difference
+that Git becomes a "database" or of ML experiments.
+
+### Generate metrics file
+
+Install the library:
+
+```dvc
+$ pip install dvclive
+```
+
+Instrument your code:
+
+```python
+import dvclive
+from dvclive.keras import DvcLiveCallback
+
+dvclive.init("logs") #, summarize=True)
+
+...
+
+model.fit(...
+          # Set up DVC-Live callback:
+          callbacks=[ DvcLiveCallback() ]
+         )
+
+```
+
+During the training you will see the metrics files that are continuously
+populated each epochs:
+
+```dvc
+$ ls logs/
+accuracy.tsv     loss.tsv         val_accuracy.tsv val_loss.tsv
+
+$ head logs/accuracy.tsv
+timestamp	step	accuracy
+1613645582716	0	0.7360000014305115
+1613645585478	1	0.8349999785423279
+1613645587322	2	0.8830000162124634
+1613645589125	3	0.9049999713897705
+1613645590891	4	0.9070000052452087
+1613645592681	5	0.9279999732971191
+1613645594490	6	0.9430000185966492
+1613645596232	7	0.9369999766349792
+1613645598034	8	0.9430000185966492
+```
+
+In addition to the continuous metrics files, you will see the summary metrics
+file and HTML file with the same file prefix. The summary file contains the
+result of the latest epoch:
+
+```dvc
+$ cat logs.json | python -m json.tool
+{
+    "step": 41,
+    "loss": 0.015958430245518684,
+    "accuracy": 0.9950000047683716,
+    "val_loss": 13.705962181091309,
+    "val_accuracy": 0.5149999856948853
+}
+```
+
+The HTML file contains all the visuals for continuous metrics as well as the
+summary metrics on a single page:
+
+![](../uploads/images/2021-02-18/dvclive-html.png)
+
+Note, the HTML and the summary metrics files are generating automatically for
+each. So, you can monitor model performance in realtime.
+
+### Git-navigation with the metrics file
+
+DVC repository is NOT required to use the live metrics functionality from the
+above. It works independently from DVC.
+
+DVC repository becomes useful when the metrics and plots are committed in your
+Git repository, and you need navigation around the metrics.
+
+Metrics difference between workspace and the last Git commit:
+
+```dvc
+$ git status -s
+ M logs.json
+ M logs/accuracy.tsv
+ M logs/loss.tsv
+ M logs/val_accuracy.tsv
+ M logs/val_loss.tsv
+ M train.py
+?? model.h5
+
+$ dvc metrics diff --target logs.json
+Path       Metric        Old       New      Change
+logs.json  accuracy      0.995     0.99     -0.005
+logs.json  loss          0.01596   0.03036  0.0144
+logs.json  step          41        36       -5
+logs.json  val_accuracy  0.515     0.5175   0.0025
+logs.json  val_loss      13.70596  3.29033  -10.41563
+```
+
+The difference between a particular commit/branch/tag or between two commits:
+
+```dvc
+$ dvc metrics diff --target logs.json HEAD^ 47b85c
+Path       Metric        Old       New      Change
+logs.json  accuracy      0.995     0.998    0.003
+logs.json  loss          0.01596   0.01951  0.00355
+logs.json  step          41        82       41
+logs.json  val_accuracy  0.515     0.51     -0.005
+logs.json  val_loss      13.70596  5.83056  -7.8754
+```
+
+The same Git-navigation works with the plots:
+
+```dvc
+$ dvc plots diff --target logs
+file:///Users/dmitry/src/exp-dc/plots.html
+```
+
+![](../uploads/images/2021-02-18/dvclive-diff-html.png)
+
+Another nice thing about the live metrics - they work across ML experiments and
+checkpoints, if properly set up in dvc stages. To set up live metrics, you need
+to specify the metrics directory in the `live` section of a stage:
+
+```yaml
+stages:
+  train:
+    cmd: python train.py
+    live:
+      logs:
+        cache: false
+        summary: true
+        report: true
+    deps:
+      - data
+```
+
+## Thank you!
+
+I'd like to thank all of you DVC community members for the feedback that we are
+constantly getting. This feedback helps us build new functionalities in DVC and
+make it more stable.
+
+Please be in touch with us on [Twitter](https://twitter.com/DVCorg) and our
+[Discord channel](https://dvc.org/chat).
diff --git a/content/blogs/2021-02-22-cml-runner-prerelease.md b/content/blogs/2021-02-22-cml-runner-prerelease.md
new file mode 100644
index 0000000000..2793f9c63e
--- /dev/null
+++ b/content/blogs/2021-02-22-cml-runner-prerelease.md
@@ -0,0 +1,177 @@
+---
+title:
+  'CML Pre-Release Notes: Automatically Train Models in the Cloud with CML 0.3.0'
+date: 2021-02-22
+description: >
+  New features are here to make launching cloud compute for continuous
+  integration workflows shorter, sweeter and easier than ever. Plus, a new
+  GitHub Action to setup CML means more ways to use CML without our Docker
+  container.
+descriptionLong: >
+  New features are here to make launching cloud compute for continuous
+  integration workflows shorter, sweeter and easier than ever. Plus, a new
+  GitHub Action to setup CML means more ways to use CML without our Docker
+  container.
+picture: 2021-02-22/cover.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/cml-0-3-0-pre-release/685
+tags:
+  - CML
+  - GitHub Actions
+  - GitLab CI
+  - Terraform
+  - Release
+---
+
+Today, we're pre-releasing some new features in Continuous Machine Learning, or
+[CML](https://cml.dev)—our open source project to adapt popular continuous
+integration (CI) systems like GitHub Actions and GitLab CI for data science. CML
+has become a popular tool for auto-generating ML model reports right in a GitHub
+Pull Request and orchestrating resources for training models in the cloud.
+
+Here's what's in today's pre-release:
+
+## Brand new method to provision cloud compute for your CI workflows
+
+After the initial CML release, we found ways to significantly simplify the
+process of allocating resources in CI/CD. We developed a brand new CML command
+`cml runner` that hides much of the complexity of configuring and provisioning
+an instance, keeping your workflows free of `bash` scripting clutter (until the
+official release, docs are
+[in development here](https://github.com/iterative/cml/blob/c2b96c461011f01ab2476e1542fb89d7229d150d/README.md)).
+The new approach uses Terraform provider under the hood instead of Docker
+Machine, as in the first version.
+
+Check out this example workflow to launch an EC2 instance from a GitHub Action
+workflow and then train a model. We hope you'll agree it's shorter, sweeter, and
+more powerful than ever!
+
+```yaml
+name: 'Train in the cloud'
+on: [push]
+
+jobs:
+  deploy-runner:
+    runs-on: [ubuntu-latest]
+    steps:
+      - uses: iterative/setup-cml@v1
+      - uses: actions/checkout@v2
+      - name: deploy
+        shell: bash
+        env:
+          repo_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+        run: |
+          cml runner \
+          --cloud aws \
+          --cloud-region us-west \
+          --cloud-type=t2.micro \
+          --labels=cml-runner
+  train-model:
+    needs: deploy-runner
+    runs-on: [self-hosted, cml-runner]
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.x'
+      - name: 'Train my model'
+        run: |
+          pip install -r requirements.txt
+          python train.py
+```
+
+If you use CML functions in the `train-model` step, you can go even further and
+get a closed loop—sending model training results from the EC2 instance to your
+pull request or merge request! For example, if we expand the `train-model` step
+to incorporate functions like `cml publish` and `cml send-comment`:
+
+```yaml
+train-model:
+  needs: deploy-runner
+  runs-on: [self-hosted, cml-runner]
+  container: docker://dvcorg/cml
+  steps:
+    - uses: actions/checkout@v2
+    - uses: actions/setup-python@v2
+      with:
+        python-version: '3.x'
+    - name: 'Train a model'
+      env:
+        repo_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
+      run: |
+        pip install -r requirements.txt
+        python train.py
+
+        echo "## Report from your EC2 Instance" > report.md
+        cat metrics.txt >> report.md
+        cml publish "plot.png" --md >> report.md
+        cml send-comment report.md
+```
+
+You'll get a pull request that looks something like this:
+
+![](../uploads/images/2021-02-22/sample_pr.png)
+
+All the code to replicate this example is up on a
+[brand new demo repository](https://github.com/iterative/cml-runner-base-case).
+
+### Our favorite details
+
+The new `cml runner` function lets you turn on instances, including GPU,
+high-memory and spot instances, and kick off a new workflow using the hardware
+and environment of your choice—and of course, it'll turn _off_ those instances
+after a configurable timeout! In the first CML release, this took
+[more than 30 lines of code](https://github.com/iterative/cml_cloud_case/blob/master/.github/workflows/cml.yaml)
+to configure. Now it's just one function.
+
+Another highlight: you can use whatever Docker container you'd like on your
+instance. In the above example, we use our
+[custom CML Docker container](https://github.com/iterative/cml/blob/master/Dockerfile)
+(because we like it!)—but you certainly don't have to! Whatever image you
+choose, we highly recommend containerizing your environment for ultimate
+reproducibility and security with CML.
+
+You can also use the new `cml runner` function to set up a
+[local self-hosted runner](https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners).
+On your local machine or on-premise GPU cluster, you'll install CML as a package
+and then run:
+
+```bash
+$ cml runner \
+    --repo $your_project_repository_url \
+    --token=$personal_access_token \
+    --labels tf \
+    --idle-timeout 180
+```
+
+Now your machine will be listening for workflows from your project repository.
+
+## A New GitHub Action
+
+One more thing: you might've noticed in our example workflow above that there's
+a [new CML GitHub Action](https://github.com/iterative/setup-cml)! The new
+Action helps you setup CML, giving you one more way to mix and match the CML
+suite of functions with your preferred environment.
+
+The new Action is designed to be a straightforward, all-in-one install that
+gives you immediate use of functions like `cml publish` and `cml runner`. You'll
+add this step to your workflow:
+
+```yaml
+steps:
+  - uses: actions/checkout@v2
+  - uses: iterative/setup-cml@v1
+```
+
+[More details are in the docs!](https://github.com/iterative/setup-cml)
+
+## Get ready for the release
+
+We're inviting our community members to explore these new features in
+anticipation of our upcoming, _official_ release. As always, feedback is welcome
+by opening an issue on the
+[CML GitHub repository](https://github.com/iterative/cml), as a comment here or
+via our [Discord channel](https://discord.gg/bzA6uY7). We're excited to hear
+what you think!
diff --git a/content/blogs/2021-02-26-february-21-community-gems.md b/content/blogs/2021-02-26-february-21-community-gems.md
new file mode 100644
index 0000000000..022be9b61a
--- /dev/null
+++ b/content/blogs/2021-02-26-february-21-community-gems.md
@@ -0,0 +1,175 @@
+---
+title: February '21 Community Gems
+date: 2021-02-26
+description: >
+  A roundup of technical Q&A's from the DVC community. This month: best
+  practices for config files, pipeline dependency management,and caching data
+  for CI/CD. Plus a new CML feature to launch cloud compute with Terraform!
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month: best
+  practices for config files, pipeline dependency management,and caching data
+  for CI/CD. Plus a new CML feature to launch cloud compute with Terraform!
+picture: 2021-02-26/feb-gems-cover.png
+author: elle_obrien
+commentsUrl: https://discuss.dvc.org/t/february-21-community-gems/686
+tags:
+  - Community Gems
+  - CML
+  - Pipelines
+  - Terraform
+  - Conda
+---
+
+## DVC Questions
+
+### [Q: I noticed I have a DVC `config` file and a `config.local` file. What's best practice for committing these to my Git repository?](https://discord.com/channels/485586884165107732/563406153334128681/666708671333400599)
+
+DVC uses the `config` and `config.local` files to link your remote data
+repository to your project. `config` is intended to be committed to Git, while
+`config.local` is not - it's a file that you use to store sensitive information
+(e.g. your personal credentials - username, password, access keys, etc. for
+remote storage) or settings that are specific to your local environment.
+
+Usually, you don't have to worry about ensuring your `config.local` file is
+being ignored by Git- the only way to create a `config.local` file is using the
+`--local` flag explicitly in functions like `dvc remote` and `dvc config`
+commands, so you'll know you've made one! And your `config.local` file is
+`.gitignored` by default. If you're concerned, take a look and make sure there
+are no settings in your `config.local` file that you actually want in your
+regular `config` file.
+
+To learn more about `config` and `config.local`,
+[read up in our docs](https://dvc.org/doc/command-reference/remote#example-add-a-default-local-remote).
+
+### [Q: What's the best way to install the new version of DVC in a Conda environment? I'm concerned about the `paramiko` dependency.](https://discord.com/channels/485586884165107732/563406153334128681/669173874247729165)
+
+When you install DVC via `conda`, it will come with dependencies like
+`paramiko`.
+
+The only exception when installing DVC as a Python library is with `pip`: you
+might want to specify the kind of remote storage you need to make sure all
+dependencies are present (like `boto` for S3). You can run
+`pip install "dvc[<option>]"`, with supported options like `[s3]`, `[azure]`,
+`[gdrive]`, `[gs]`, `[oss]`, `[ssh]`. Or, use `[all]` to include them all.
+
+For more about installing DVC and its dependencies,
+[check out our docs](https://dvc.org/doc/install).
+
+### [Q: How do I keep track of changes in _modules_ that my DVC pipeline depends on? For example, I have a pipeline stage that runs a script `prepare.py`, which imports a module `module.py`. If `module.py` changes, how will DVC know to rerun the pipeline stage?](https://discord.com/channels/485586884165107732/563406153334128681/663952575984435220)
+
+If your DVC pipeline only lists `prepare.py` as a dependency, then changing code
+in module files won't trigger a re-run of the pipeline. Meaning that if you run
+`dvc repro` after updating `module.py`, DVC will simply return the result of
+your last pipeline run and a message that nothing has changed.
+
+To explain further why this happens:
+
+DVC is platform agnostic and it doesn't know whether your command's executable
+is `python`, some other script interpreter, or a compiled binary for that
+matter.
+
+> E.g. this is a valid stage: `dvc run -o hello.txt 'echo "Hello!" > hello.txt'`
+> (where the executable is echo).
+
+DVC also doesn't know what's going on inside the command's source code.
+Therefore, any file that your code requires internally should be explicitly
+specified as a pipeline stage dependency (in CLI, `dvc run -d` , or in YAML,
+`deps:`) for DVC to track it.
+
+If you're not interested in adding modules as explicit dependencies, there are a
+few other approaches:
+
+- Make your `requirements.txt` file a stage dependency (if the loaded module
+  comes from a package).
+- Manually rebuild the pipeline (with `dvc repro --force <stage>.dvc`) when you
+  know an unmarked dependency is changed – although this is prone to human
+  error.
+- Have a version/build number comment in the main script that always gets
+  updated when an unmarked dependency changes – this could be automated.
+
+[See here for more information on similar use cases.](https://discordapp.com/channels/485586884165107732/563406153334128681/658501655641325580)
+
+We also have an ongoing discussion about this issue on our GitHub repository,
+and we'd love your input.
+[Please participate in this issue if you can here!](https://github.com/iterative/dvc/issues/1577#issuecomment-568391709)
+
+### [Q: My DVC pipeline has _a lot_ of dependencies, and I don't want to manually write them all out in my `dvc.yaml` file. Are there any ways to use wildcards (like `*`) or specify directories as dependencies?](https://discord.com/channels/485586884165107732/563406153334128681/803961071135883294)
+
+Yes, you can set a directory to be a dependency or an output of a DVC pipeline
+stage. This means you can have tens, hundreds, thousands or millions of
+dependency files in one directory, and all you have to declare in the pipeline
+is the address of that directory.
+
+[Check out the all the options here.](https://dvc.org/doc/command-reference/run#options)
+
+## CML Questions
+
+### [Q: I heard there's a new CML feature using Terraform to provision runners. When is this coming out?](https://discord.com/channels/485586884165107732/728693131557732403/812069229473562624)
+
+You're in luck, because we just shared this feature as part of the CML 0.3.0
+pre-release! The pre-release introduced a new function, `cml runner`, which
+upgraded our
+[previous method for launching instances in the cloud from a CI workflow using Docker Machine](https://github.com/iterative/cml_cloud_case/blob/b76aba13791ce18c5715f464f58877ffa10d4cfa/.github/workflows/cml.yaml).
+In the new `cml runner` function built on Terraform, you can deploy instances in
+AWS and Azure with a single command (it used to take about 30 lines of code!).
+For example, to launch a `t2.micro` instance on AWS from your GitHub Actions or
+GitLab CI workflow, you'll run:
+
+```bash
+cml runner \
+	--cloud aws \
+	--cloud-region us-west \
+	--cloud-type=t2.micro \
+	--labels=cml-runner
+```
+
+Check out the [pre-release notes](https://dvc.org/blog/cml-runner-prerelease)
+and our
+[example project repository](https://github.com/iterative/cml-runner-base-case)
+to get started.
+
+### [Q: My CI workflow creates a `[report.md](http://report.md)` document that gets published to my pull request by CML. I want to save the `report.md` file to my repository, too. Is this possible?](https://discord.com/channels/485586884165107732/728693131557732403/810946119374340127)
+
+By default, files that are created in a GitHub Actions or GitLab CI workflow
+only exist on the runner- as soon as the runner turns off, they vanish.
+Functions like `cml publish` and `cml send-comment` create persistent links to
+data visualizations, tables, and other outputs of your workflow so you can view
+them long after your run ends. However, by design, CML doesn't commit files to
+your repository (not all users want this!)
+
+What you're likely looking for is an auto-commit, to essentially `git add` and
+`git commit` files generated by the workflow to your repository. You can
+manually write this code into your workflow file, or you can use a GitHub Action
+tool like the
+[Auto Commit](https://github.com/marketplace/actions/git-auto-commit) or
+[Add & Commit](https://github.com/marketplace/actions/add-commit) Actions.
+
+### [Q: Do you have any suggested caching strategies with CML and DVC? My DVC pipeline runs in a CI workflow, and it depends on ~15 GB of data. I don't want to download this dataset to my runner every time the workflow runs.](https://discord.com/channels/485586884165107732/728693131557732403/812059539696386079)
+
+Downloading data to a runner on every CI workflow can be needlessly time
+consuming, particularly when the data rarely changes.
+
+While we don't have a CML-specific mechanism in the works for this use case,
+there are two main approaches we see as viable:
+
+1. **Attach an EBS volume** to the instance that runs your workflow. If you're
+   using DVC, DVC needs to run in that volume (at the very least, your DVC cache
+   must be there). A user
+   [recently let us know](https://discord.com/channels/485586884165107732/728693131557732403/812059539696386079)
+   that this approach is working well for them and prevents unnecessary
+   re-downloads of their DVC cache. They also
+   [recommended this article](https://towardsdatascience.com/stop-duplicating-deep-learning-training-datasets-with-amazon-ebs-multi-attach-d9f61fdc1de4)
+   for setup guidelines.
+2. **Use a shared DVC cache.** Currently, many DVC users configure their cache
+   in shared [NFS](https://en.wikipedia.org/wiki/Network_File_System). A similar
+   setup that might help here is using a single shared development server-
+   [check out our docs for a use case](https://dvc.org/doc/use-cases/fast-data-caching-hub#example-shared-development-server).
+
+<hr />
+
+As always, if you have any use case questions or need support, join us in
+[Discord](https://discord.com/invite/dvwXA2N)! Or head to the
+[DVC Forum](https://discuss.dvc.org/) to discuss your ideas and best practices.
+
+And, you can follow us on [Twitter](https://twitter.com/dvcorg) and
+[LinkedIn](https://www.linkedin.com/company/iterative-ai)!
diff --git a/content/blogs/2021-03-03-dvc-2-0-release.md b/content/blogs/2021-03-03-dvc-2-0-release.md
new file mode 100644
index 0000000000..1a5b5080e1
--- /dev/null
+++ b/content/blogs/2021-03-03-dvc-2-0-release.md
@@ -0,0 +1,724 @@
+---
+title: DVC 2.0 Release
+date: 2021-03-03
+description: >
+  Today is DVC 2.0 release day! Watch a video from DVC-team when we explain the
+  new features and read more details in this blog post.
+descriptionLong: >
+  Today is DVC 2.0 release day! Watch a video from DVC-team when we explain the
+  new features and read more details in this blog post.
+
+  New features:
+
+  🧪 Lightweight ML experiments
+
+  📍 ML model checkpoints versioning
+
+  📈 Dvc-live - new open-source library for metrics logging
+
+  🔗 ML pipeline templating and iterative foreach-stages
+
+  🤖  CML - new way to get GPU/CPU in clouds and GitHub Actions support
+picture: 2021-03-03/dvc-2-0-release.png
+pictureComment: DVC 2.0 Release
+author: dmitry_petrov
+commentsUrl: https://discuss.dvc.org/t/dvc-2-0-release/695
+tags:
+  - Release
+  - MLOps
+  - DataOps
+  - CI/CD
+---
+
+## TL;DR; video
+
+https://www.youtube.com/watch?v=h-ioXYurEJo
+
+## What is new in DVC 2.0?
+
+We have been working on DVC for almost 4 years. In the previous versions, we
+have built a great foundation on versioning data, code and ML models that helps
+make your ML projects reproducible.
+
+With the 2.0 release, we are going deeper into machine learning and deep
+learning scenarios such as **experiment management**, **ML model checkpoints**
+and **ML metrics logging**. These scenarios are widely adopted by ML
+practitioners and instrumented with custom tools or external frameworks and SaaS
+services. **Our vision** is to make the ML experimentation experience
+distributed (like Git) and independent of external SaaS platforms, and to
+introduce proper data and model management to ML experiments.
+
+⚠️ DVC 2.0 is the first release with ML experements, which is still in
+experimentation mode (yeah, experiments in experimentation mode 😅), so the API
+might change a bit in the following releases.
+
+**ML pipelines parametrization** is another big improvement in DVC 2.0. This was
+the most requested feature during the last year. We are introducing variables in
+pipelines as well as foreach-stages. This is a significant improvement for users
+who work on multi-stages ML projects, which is very common for NLP projects.
+
+A better **CPU/GPU resource allocation** is another important direction for DVC.
+Together with DVC 2.0 we are releasing new version 0.3 of CML (CI/CD for ML). It
+aims to hide all complexity of clouds from data scientists and ML engineers. We
+developed a brand new Iterative Terraform Provider to reach this goal and
+simplify the end-user experience. In future releases, we expect DVC to use this
+Terraform provider to access cloud resources directly.
+
+The last but not least important part - we made the new release with **minimum
+breaking changes to our API**. That makes migration to DVC 2.0 smooth and
+low-risk.
+
+## Install
+
+The new version is generally available!
+
+Install DVC 2.0 [through OS packages](https://dvc.org/doc/install) or as Python
+library:
+
+```dvc
+$ pip install --upgrade dvc
+```
+
+CML is pre-installed in the CML docker containers (e.g.
+`iterativeai/cml:0-dvc2-base1`) and also available as an NPM package:
+
+```dvc
+$ npm i -g @dvcorg/cml
+```
+
+## Lightweight ML experiments
+
+DVC uses Git versioning as the basis for ML experiments. This solid foundation
+makes each experiment reproducible and accessible from the project's history.
+This Git-based approach works very well for ML projects with mature models when
+only a few new experiments per day are run.
+
+However, in more active development, when dozens or hundreds of experiments need
+to be run in a single day, Git creates overhead — each experiment run requires
+additional Git commands `git add/commit`, and comparing all experiments is
+difficult.
+
+We are introducing lightweight experiments in DVC 2.0! This is how you can
+auto-track ML experiments without any overhead.
+
+⚠️ Note, our new ML experiment features (`dvc exp`) are experimental. This means
+that the commands might change a bit in the following minor releases.
+
+`dvc exp run` can run an ML experiment with a new hyperparameter from
+`params.yaml` while `dvc exp diff` shows metrics and params difference:
+
+```dvc
+$ dvc exp run --set-param featurize.max_features=3000
+
+Reproduced experiment(s): exp-bb55c
+Experiment results have been applied to your workspace.
+
+$ dvc exp diff
+Path         Metric    Value    Change
+scores.json  auc       0.57462  0.0072197
+
+Path         Param                   Value    Change
+params.yaml  featurize.max_features  3000     1500
+```
+
+More experiments:
+
+```dvc
+$ dvc exp run --set-param featurize.max_features=4000
+Reproduced experiment(s): exp-9bf22
+Experiment results have been applied to your workspace.
+
+$ dvc exp run --set-param featurize.max_features=5000
+Reproduced experiment(s): exp-63ee0
+Experiment results have been applied to your workspace.
+
+$ dvc exp run --set-param featurize.max_features=5000 \
+                --set-param featurize.ngrams=3
+Reproduced experiment(s): exp-80655
+Experiment results have been applied to your workspace.
+```
+
+In the examples above, hyperparameters were changed with the `--set-param`
+option, but you can make these changes by modifying the params file instead. In
+fact _any code can be changed_ and `dvc exp run` will capture the variations.
+
+See all the runs:
+
+```dvc
+$ dvc exp show --no-pager --no-timestamp \
+        --include-params featurize.max_features,featurize.ngrams
+```
+
+```dvctable
+ ─────────────────────────────────────────────────────────────────────
+  **Experiment**          **auc**   **featurize.max_features**   **featurize.ngrams**
+ ─────────────────────────────────────────────────────────────────────
+  workspace       0.56359   5000                     3
+  master           0.5674   1500                     2
+  ├── exp-80655   0.56359   5000                     3
+  ├── exp-63ee0    0.5515   5000                     2
+  ├── exp-9bf22   0.56448   4000                     2
+  └── exp-bb55c   0.57462   3000                     2
+ ─────────────────────────────────────────────────────────────────────
+```
+
+Under the hood, DVC uses Git to store the experiments' meta-information. A
+straight-forward implementation would create visible branches and auto-commit in
+them, but that approach would over-pollute the branch namespace very quickly. To
+avoid this issue, we introduced custom Git references `exps`, the same way as
+GitHub uses custom references `pulls` to track pull requests (this is an
+interesting technical topic that deserves a separate blog post). Below you can
+see how it works.
+
+No artificial branches, only custom references `exps` (do not worry if you don't
+understand this part - it is an implementation detail):
+
+```dvc
+$ git branch
+* master
+
+$ git show-ref
+5649f62d845fdc29e28ea6f7672dd729d3946940 refs/exps/exec/EXEC_APPLY
+5649f62d845fdc29e28ea6f7672dd729d3946940 refs/exps/exec/EXEC_BRANCH
+5649f62d845fdc29e28ea6f7672dd729d3946940 refs/exps/71/67904d89e116f28daf7a6e4c0878268117c893/exp-80655
+f16e7b7c804cf52d91d1d11850c15963fb2a8d7b refs/exps/97/d69af70c6fb4bc59aefb9a87437dcd28b3bde4/exp-63ee0
+0566d42cddb3a8c4eb533f31027f0febccbbc2dd refs/exps/91/94265d5acd847e1c439dd859aa74b1fc3d73ad/exp-bb55c
+9bb067559583990a8c5d499d7435c35a7c9417b7 refs/exps/49/5c835cd36772123e82e812d96eabcce320f7ec/exp-9bf22
+```
+
+The best experiment can be promoted to the workspace and committed to Git.
+
+```dvc
+$ dvc exp apply exp-bb55c
+$ git add .
+$ git commit -m 'optimize max feature size'
+```
+
+Alternatively, an experiment can be promoted to a branch (`big_fr_size` branch
+in this case):
+
+```dvc
+$ dvc exp branch exp-80655 big_fr_size
+Git branch 'big_fr_size' has been created from experiment 'exp-c695f'.
+To switch to the new branch run:
+
+	git checkout big_fr_size
+```
+
+Remove all the experiments that were not used:
+
+```dvc
+$ dvc exp gc --workspace --force
+```
+
+## ML model checkpoints versioning
+
+ML model checkpoints are an essential part of deep learning. ML engineers prefer
+to save the model files (or weights) at checkpoints during a training process
+and return back when metrics start diverging or learning is not fast enough.
+
+The checkpoints create a different dynamics around ML modeling process and need
+a special support from the toolset:
+
+1. Track and save model checkpoints (DVC outputs) periodically, not only the
+   final result or training epoch.
+2. Save metrics corresponding to each of the checkpoints.
+3. Reuse checkpoints - warm-start training with an existing model file,
+   corresponding code, dataset version and metrics.
+
+This new behavior is supported in DVC 2.0. Now, DVC can version all your
+checkpoints with corresponding code and data. It brings the reproducibility of
+DL processes to the next level - every checkpoint is reproducible.
+
+This is how you define checkpoints with live-metrics:
+
+```dvc
+$ dvc stage add -n train \
+        -d users.csv -d train.py \
+        -p dropout,epochs,lr,process \
+        --checkpoint model.h5 \
+        --live logs \
+    python train.py
+
+Creating 'dvc.yaml'
+Adding stage 'train' in 'dvc.yaml'
+```
+
+Note, we use `dvc stage add` command instead of `dvc run`. Starting from DVC 2.0
+we begin extracting all stage specific functionality under `dvc stage` umbrella.
+`dvc run` is still working, but will be deprecated in the following major DVC
+version (most likely in 3.0).
+
+Start the training process and interrupt it after 5 epochs:
+
+```dvc
+$ dvc exp run
+'users.csv.dvc' didn't change, skipping
+Running stage 'train':
+> python train.py
+...
+^CTraceback (most recent call last):
+...
+KeyboardInterrupt
+```
+
+Navigate in checkpoints:
+
+```dvc
+$ dvc exp show --no-pager --no-timestamp
+```
+
+```dvctable
+ ──────────────────────────────────────────────────────────────────────
+  **Experiment**      **step**     **loss**   **accuracy**   **val_loss**   **…**   **epochs**   **…**
+ ──────────────────────────────────────────────────────────────────────
+  workspace          4   2.0702    0.30388      2.025   …   5        …
+  master             -        -          -          -   …   5        …
+  │ ╓ exp-e15bc      4   2.0702    0.30388      2.025   …   5        …
+  │ ╟ 5ea8327        4   2.0702    0.30388      2.025   …   5        …
+  │ ╟ bc0cf02        3   2.1338    0.23988     2.0883   …   5        …
+  │ ╟ f8cf03f        2   2.1989    0.17932     2.1542   …   5        …
+  │ ╟ 7575a44        1   2.2694    0.12833      2.223   …   5        …
+  ├─╨ a72c526        0   2.3416     0.0959     2.2955   …   5        …
+ ──────────────────────────────────────────────────────────────────────
+```
+
+Each of the checkpoints above is a separate experiment with all data, code,
+paramaters and metrics. You can use the same `dvc exp apply` command to extract
+any of these.
+
+Another run continues this process. You can see how accuracy metrics are
+increasing - DVC does not remove the model/checkpoint and training code trains
+on top of it:
+
+```dvc
+$ dvc exp run
+Existing checkpoint experiment 'exp-e15bc' will be resumed
+...
+^C
+KeyboardInterrupt
+
+$ dvc exp show --no-pager --no-timestamp
+```
+
+```dvctable
+ ──────────────────────────────────────────────────────────────────────
+  **Experiment**      **step**     **loss**   **accuracy**   **val_loss**   **…**   **epochs**   **…**
+ ──────────────────────────────────────────────────────────────────────
+  workspace          9   1.7845    0.58125     1.7381   …   5        …
+  master             -        -          -          -   …   5        …
+  │ ╓ exp-e15bc      9   1.7845    0.58125     1.7381   …   5        …
+  │ ╟ 205a8d3        9   1.7845    0.58125     1.7381   …   5        …
+  │ ╟ dd23d96        8   1.8369    0.54173     1.7919   …   5        …
+  │ ╟ 5bb3a1f        7   1.8929    0.49108     1.8474   …   5        …
+  │ ╟ 6dc5610        6    1.951    0.43433     1.9046   …   5        …
+  │ ╟ a79cf29        5   2.0088    0.36837     1.9637   …   5        …
+  │ ╟ 5ea8327        4   2.0702    0.30388      2.025   …   5        …
+  │ ╟ bc0cf02        3   2.1338    0.23988     2.0883   …   5        …
+  │ ╟ f8cf03f        2   2.1989    0.17932     2.1542   …   5        …
+  │ ╟ 7575a44        1   2.2694    0.12833      2.223   …   5        …
+  ├─╨ a72c526        0   2.3416     0.0959     2.2955   …   5        …
+ ──────────────────────────────────────────────────────────────────────
+```
+
+After modifying the code, data, or params, the same process can be resumed. DVC
+recognizes the change and shows it (see experiment `b363267`):
+
+```dvc
+$ vi train.py     # modify code
+$ vi params.yaml  # modify params
+
+$ dvc exp run
+Modified checkpoint experiment based on 'exp-e15bc' will be created
+...
+
+$ dvc exp show --no-pager --no-timestamp
+```
+
+```dvctable
+ ──────────────────────────────────────────────────────────────────────────────
+  **Experiment**              **step**     **loss**   **accuracy**   **val_loss**   **…**   **epochs**   **…**
+ ──────────────────────────────────────────────────────────────────────────────
+  workspace                 13   1.5841    0.69262     1.5381   …   15       …
+  master                     -        -          -          -   …   5        …
+  │ ╓ exp-7ff06             13   1.5841    0.69262     1.5381   …   15       …
+  │ ╟ 6c62fec               12   1.6325    0.67248     1.5857   …   15       …
+  │ ╟ 4baca3c               11   1.6817    0.64855     1.6349   …   15       …
+  │ ╟ b363267 (2b06de7)     10   1.7323    0.61925     1.6857   …   15       …
+  │ ╓ 2b06de7                9   1.7845    0.58125     1.7381   …   5        …
+  │ ╟ 205a8d3                9   1.7845    0.58125     1.7381   …   5        …
+  │ ╟ dd23d96                8   1.8369    0.54173     1.7919   …   5        …
+  │ ╟ 5bb3a1f                7   1.8929    0.49108     1.8474   …   5        …
+  │ ╟ 6dc5610                6    1.951    0.43433     1.9046   …   5        …
+  │ ╟ a79cf29                5   2.0088    0.36837     1.9637   …   5        …
+  │ ╟ 5ea8327                4   2.0702    0.30388      2.025   …   5        …
+  │ ╟ bc0cf02                3   2.1338    0.23988     2.0883   …   5        …
+  │ ╟ f8cf03f                2   2.1989    0.17932     2.1542   …   5        …
+  │ ╟ 7575a44                1   2.2694    0.12833      2.223   …   5        …
+  ├─╨ a72c526                0   2.3416     0.0959     2.2955   …   5        …
+ ──────────────────────────────────────────────────────────────────────────────
+```
+
+Sometimes you might need to train the model from scratch. The reset option
+removes the checkpoint file before training: `dvc exp run --reset`.
+
+## Metrics logging
+
+Continuously logging ML metrics is a very common practice in the ML world.
+Instead of a simple command-line output with the metrics values, many ML
+engineers prefer visuals and plots. These plots can be organized in a "database"
+of ML experiments to keep track of a project. There are many special solutions
+for metrics collecting and experiment tracking such as sacred, mlflow, weight
+and biases, neptune.ai, or others.
+
+With DVC 2.0, we are releasing a new open-source library
+[DVC-Live](https://github.com/iterative/dvclive) that provides functionality for
+tracking model metrics and organizing metrics in simple text files in a way that
+DVC can visualize the metrics with navigation in Git history. So, DVC can show
+you a metrics difference between the current model and a model in `master` or
+any other branch.
+
+This approach is similar to the other metrics tracking tools with the difference
+that Git becomes a "database" or of ML experiments.
+
+### Generate metrics file
+
+Install the library:
+
+```dvc
+$ pip install dvclive
+```
+
+Instrument your code:
+
+```python
+import dvclive
+from dvclive.keras import DvcLiveCallback
+
+dvclive.init("logs") #, summarize=True)
+
+...
+
+model.fit(...
+          # Set up DVC-Live callback:
+          callbacks=[ DvcLiveCallback() ]
+         )
+
+```
+
+During the training you will see the metrics files that are continuously
+populated each epochs:
+
+```dvc
+$ ls logs/
+accuracy.tsv     loss.tsv         val_accuracy.tsv val_loss.tsv
+
+$ head logs/accuracy.tsv
+timestamp	step	accuracy
+1613645582716	0	0.7360000014305115
+1613645585478	1	0.8349999785423279
+1613645587322	2	0.8830000162124634
+1613645589125	3	0.9049999713897705
+1613645590891	4	0.9070000052452087
+1613645592681	5	0.9279999732971191
+1613645594490	6	0.9430000185966492
+1613645596232	7	0.9369999766349792
+1613645598034	8	0.9430000185966492
+```
+
+In addition to the continuous metrics files, you will see the summary metrics
+file and HTML file with the same file prefix. The summary file contains the
+result of the latest epoch:
+
+```dvc
+$ cat logs.json | python -m json.tool
+{
+    "step": 41,
+    "loss": 0.015958430245518684,
+    "accuracy": 0.9950000047683716,
+    "val_loss": 13.705962181091309,
+    "val_accuracy": 0.5149999856948853
+}
+```
+
+The HTML file contains all the visuals for continuous metrics as well as the
+summary metrics on a single page:
+
+![](../uploads/images/2021-02-18/dvclive-html.png)
+
+Note, the HTML and the summary metrics files are generating automatically for
+each. So, you can monitor model performance in realtime.
+
+### Git-navigation with the metrics file
+
+DVC repository is NOT required to use the live metrics functionality from the
+above. It works independently from DVC.
+
+DVC repository becomes useful when the metrics and plots are committed in your
+Git repository, and you need navigation around the metrics.
+
+Metrics difference between workspace and the last Git commit:
+
+```dvc
+$ git status -s
+ M logs.json
+ M logs/accuracy.tsv
+ M logs/loss.tsv
+ M logs/val_accuracy.tsv
+ M logs/val_loss.tsv
+ M train.py
+?? model.h5
+
+$ dvc metrics diff --target logs.json
+Path       Metric        Old       New      Change
+logs.json  accuracy      0.995     0.99     -0.005
+logs.json  loss          0.01596   0.03036  0.0144
+logs.json  step          41        36       -5
+logs.json  val_accuracy  0.515     0.5175   0.0025
+logs.json  val_loss      13.70596  3.29033  -10.41563
+```
+
+The difference between a particular commit/branch/tag or between two commits:
+
+```dvc
+$ dvc metrics diff --target logs.json HEAD^ 47b85c
+Path       Metric        Old       New      Change
+logs.json  accuracy      0.995     0.998    0.003
+logs.json  loss          0.01596   0.01951  0.00355
+logs.json  step          41        82       41
+logs.json  val_accuracy  0.515     0.51     -0.005
+logs.json  val_loss      13.70596  5.83056  -7.8754
+```
+
+The same Git-navigation works with the plots:
+
+```dvc
+$ dvc plots diff --target logs
+file:///Users/dmitry/src/exp-dc/plots.html
+```
+
+![](../uploads/images/2021-02-18/dvclive-diff-html.png)
+
+Another nice thing about the live metrics - they work across ML experiments and
+checkpoints, if properly set up in dvc stages. To set up live metrics, you need
+to specify the metrics directory in the `live` section of a stage:
+
+```yaml
+stages:
+  train:
+    cmd: python train.py
+    live:
+      logs:
+        cache: false
+        summary: true
+        report: true
+    deps:
+      - data
+```
+
+## ML pipelines parameterization and foreach stages
+
+After introducing the multi-stage pipeline file `dvc.yaml`, it was quickly
+adopted among our users. The DVC team got tons of positive feedback from them,
+as well as feature requests.
+
+### Pipeline parameters from `vars`
+
+The most requested feature was the ability to use parameters in `dvc.yaml`. For
+example. So, you can pass the same seed value or filename to multiple stages in
+the pipeline.
+
+```yaml
+vars:
+  - train_matrix: train.pkl
+  - test_matrix: test.pkl
+  - seed: 20210215
+
+...
+
+stages:
+    process:
+        cmd: python process.py \
+                --seed ${seed} \
+                --train ${train_matrix} \
+                --test ${test_matrix}
+        outs:
+        - ${test_matrix}
+        - ${train_matrix}
+
+        ...
+
+    train:
+        cmd: python train.py ${train_matrix} --seed ${seed}
+        deps:
+        - ${train_matrix}
+```
+
+Also, it gives an ability to localize all the important parameters in a single
+`vars` block and play with them. This is a natural thing to do for scenarios
+like NLP or when hyperparameter optimization is happening not only in the model
+training code but in the data processing as well.
+
+### Pipeline parameters from params files
+
+It is quite common to define pipeline parameters in a config file or a
+parameters file (like `params.yaml`) instead of in the pipeline file `dvc.yaml`
+itself. These parameters defined in `params.yaml` can also be used in
+`dvc.yaml`.
+
+```yaml
+# params.yaml
+models:
+  us:
+    thresh: 10
+    filename: 'model-us.hdf5'
+```
+
+```yaml
+# dvc.yaml
+stages:
+  build-us:
+    cmd: >-
+      python script.py
+        --out ${models.us.filename}
+        --thresh ${models.us.thresh}
+    outs:
+      - ${models.us.filename}
+```
+
+DVC properly tracks params dependencies for each stage starting from the
+previous DVC version 1.0. See the
+[`--params` option](https://dvc.org/doc/command-reference/run#for-displaying-and-comparing-data-science-experiments)
+of `dvc run` for more details.
+
+### Iterating over params with foreach stages
+
+Iterating over params was a frequently requested feature. Now users can define
+multiple similar stages with a templatized command.
+
+```yaml
+stages:
+  build:
+    foreach:
+      gb:
+        thresh: 15
+        filename: 'model-gb.hdf5'
+      us:
+        thresh: 10
+        filename: 'model-us.hdf5'
+    do:
+      cmd: >-
+        python script.py --out ${item.filename} --thresh ${item.thresh}
+      outs:
+        - ${item.filename}
+```
+
+## New method to provision cloud compute in new CML release
+
+We are releasing new CML release 0.3 together with DVC 2.0. We developed a brand
+new CML command `cml runner` that hides much of the complexity of configuring
+and provisioning an instance, keeping your workflows free of bash scripting
+clutter.
+
+The new approach uses our new
+[Iterative Terraform Provider](https://github.com/iterative/terraform-provider-iterative)
+under the hood instead of Docker Machine, as in the first version of CML.
+
+This example workflow to launch an EC2 instance from a GitHub Action workflow
+and then train a model. We hope you'll agree it's shorter, sweeter, and more
+powerful than ever!
+
+```yaml
+name: 'Train in the cloud'
+on: [push]
+
+jobs:
+  deploy-runner:
+    runs-on: [ubuntu-latest]
+    steps:
+      - uses: iterative/setup-cml@v1
+      - uses: actions/checkout@v2
+      - name: deploy
+        shell: bash
+        env:
+          repo_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+        run: |
+          cml runner \
+          --cloud aws \
+          --cloud-region us-west \
+          --cloud-type=t2.micro \
+          --labels=cml-runner
+  train-model:
+    needs: deploy-runner
+    runs-on: [self-hosted, cml-runner]
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.x'
+      - name: 'Train my model'
+        run: |
+          pip install -r requirements.txt
+          python train.py
+```
+
+You'll get a pull request that looks something like this:
+
+![](../uploads/images/2021-02-22/sample_pr.png)
+
+All the code to replicate this example is up on a
+[brand new demo repository](https://github.com/iterative/cml-runner-base-case).
+
+Please find more details in the
+[CML 0.3 pre-release blog post](https://dvc.org/blog/cml-runner-prerelease) or
+in the [CML website](https://cml.dev/).
+
+## GitHub Actions in new CML release
+
+One more thing: you might've noticed in our example workflow above that there's
+a [new CML GitHub Action](https://github.com/iterative/setup-cml)! The new
+Action helps you setup CML, giving you one more way to mix and match the CML
+suite of functions with your preferred environment.
+
+The new Action is designed to be a straightforward, all-in-one install that
+gives you immediate use of functions like `cml publish` and `cml runner`. You'll
+add this step to your workflow:
+
+```yaml
+steps:
+  - uses: actions/checkout@v2
+  - uses: iterative/setup-cml@v1
+```
+
+[More details are in the docs!](https://github.com/iterative/setup-cml)
+
+The same way you can reference DVC as a GitHub Action:
+
+```yaml
+steps:
+  - uses: actions/checkout@v2
+  - uses: iterative/dvc-action@v1
+```
+
+[See DVC GitHub Action](https://github.com/iterative/setup-dvc)
+
+## Breaking changes
+
+We put a lot of efforts to make this release with very minimum amount of
+breaking changes to simplify migration to the new version for the users:
+
+1. Dropped support for external outputs in Google Cloud Storage and changed the
+   default checksum from md5 to etag.
+2. Dropped support for login with p12 files on service authentication for Google
+   Drive.
+3. Stages without dependencies will not always run as if changed. Instead, use
+   `--always-changed`.
+4. Environment variables inside the cmd of a stage using `${VAR}` syntax must be
+   escaped as `\${VAR}` in 2.0 due to the use of `${}` syntax for templating.
+
+## Thank you!
+
+Thank you to all DVC users and community members for the help. Please try out
+the new DVC and CML releases and do not get lost in your ML experiments!
diff --git a/content/blogs/2021-03-15-March-21-dvc-heartbeat.md b/content/blogs/2021-03-15-March-21-dvc-heartbeat.md
new file mode 100644
index 0000000000..a8dacd4d5b
--- /dev/null
+++ b/content/blogs/2021-03-15-March-21-dvc-heartbeat.md
@@ -0,0 +1,201 @@
+---
+title: March ’21 Heartbeat
+date: 2021-03-15
+description: >
+  Monthly updates are here! News, use cases, videos, journal publications and
+  more!
+descriptionLong: >
+  Monthly updates are here! Read all about our growing team, our CEO's interview
+  on TFIR, Elle's talk at DataTalks.Club Conference and more!
+picture: 2021-03-15/mar21cover.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/march-21-heartbeat/703
+tags:
+  - Heartbeat
+  - CML
+  - DVC
+  - MLOps
+---
+
+## News
+
+Welcome to March! It's been a great month already! Here's all that will keep you
+in the know.
+
+![UnderRock](https://media.giphy.com/media/J2gg8fO7RarRgQRC4d/giphy.gif)
+
+## ICYMI - DVC 2.0 is here!
+
+If you somehow missed our
+[March 3rd announcment](https://dvc.org/blog/dvc-2-0-release), DVC 2.0 is here
+with loads of features to make your life easier.
+
+🧪 Lightweight ML experiments
+
+📍 ML model checkpoints versioning
+
+📈 Dvc-live - new open-source library for metrics logging
+
+🔗 ML pipeline templating and iterative foreach-stages
+
+🤖 CML - new way to get GPU/CPU in clouds and GitHub Actions
+
+This video from the team gives you an overview of all the new features.
+
+https://www.youtube.com/watch?v=h-ioXYurEJo
+
+## And we keep on growing our worldwide team! 🌏
+
+We have three new team members this month!
+
+[**Laurens Duijvesteijn**](https://www.linkedin.com/in/duijf/) joins the team
+from Utrecht, The Netherlands as a backend infrastructure engineer. Previously
+he led a devops team at Channable where he learned that he really enjoys working
+on developer tools and empowering people to do great work. When not solving dev
+challenges, he enjoys bouldering/climbing, snowboarding and hiking! Welcome
+Laurens!
+
+[**Helio Machado**](https://github.com/0x2b3bfa0) joins our team from Spain as a
+CML engineer! Helio comes from a heutogogic background, mainly focused on the
+Free and Open Source culture and technologies from a systems perspective. You
+will find his clever cryptograph handle helping you out in Discord with your CML
+questions. Fun fact: Our two CML engineers, Helio and David Ortega live just 300
+km apart in Spain! CML has some Spanish flare! 💃🏻🇪🇸
+
+[**MikHail Rozhkov**](https://www.linkedin.com/in/mikhail-rozhkov-33549118/)
+joins us from Moscow, Russia as a Solution Engineer. Mikhail has been working
+with DVC for 2+ years in the banking industry and is also the creator of the
+[**Machine Learning REPA**](https://mlrepa.com) community as well as created our
+[**first course on Udemy**](https://www.udemy.com/course/machine-learning-experiments-and-engineering-with-dvc/).
+We are so excited to have him officially join our team full-time!
+
+![Join Us](https://media.giphy.com/media/3ohhwznAY9PN08m0H6/giphy.gif)
+
+## Open Positions
+
+Come join our team! Open positions this month:
+
+[TypeScript Front-End Engineer](https://docs.google.com/document/d/1aT5HZYt4kAUxXqD4JNTe3jPDlVUwSmnEWDPR2QoKdvo/edit)
+to build SaaS and a VS Code UI for our popular machine learning tools: DVC and
+CML. The ML tools ecosystem is what JS space was 10 years ago. Come join us on
+this exciting project!
+
+Our search continues for a
+[Developer Advocate](https://weworkremotely.com/remote-jobs/iterative-developer-advocate)
+to support and inspire developers by creating new content like blogs, tutorials,
+and videos - plus lead outreach through meetups and conferences.
+
+Does this sound like you or someone you know? Be in touch!
+
+## Dmitry featured on TFIR Insights
+
+[**Swapnil Bhartiya**](https://twitter.com/tfir_io) of
+[TFIR Insights](https://www.tfir.io/) interviewed our very own CEO,
+[**Dmitry Petrov**](https://twitter.com/fullstackml), on his show discussing:
+
+- Iterative.ai
+- Why Open Source is a better approach for AI/ML
+- DVC and CML
+- Who should care about these tools
+- How DVC and CML stack up against proprietary AI Platforms such as AWS
+  SageMaker and Microsoft Azure ML Engineer
+
+https://www.youtube.com/watch?v=lv2cpm9Pduk
+
+## Elle at DataTalks.Club Conference
+
+[**Elle O'Brien**](https://twitter.com/drelleobrien) presents her talk
+"Automating ML with Continuous Integration" at the
+[DataTalks.Club](http://datatalks.club/) Conference with
+[**Alexey Grigorev**](https://twitter.com/Al_Grigor) and
+[**Demtrios Brinkmann**](https://www.linkedin.com/in/dpbrinkm/) of
+[MLOps Community](https://open.spotify.com/show/7wZygk3mUUqBaRbBGB1lgh). You can
+catch her talk starting at 3:03 below. 👇🏼
+
+<external-link 
+href="https://www.youtube.com/watch?v=og1DG1KZ71c&t=11382s"
+title="Automating ML with Continuous Integration"
+description="Elle O'Brien, PhD presents at DataTalks.Club Conference"
+link="DataTalks.Club"
+target="_blank"
+image="../uploads/images/2021-03-15/confused-animals.png"/>
+
+## From the Community
+
+### Using DVC in Lab Data Management
+
+This great tutorial from [Matsui-lab Blog](https://mti-lab.github.io/blog/)
+provides a solution using DVC for the data management problem labs face.
+
+<external-link
+href="https://mti-lab.github.io/blog/yusuke%20matsui/education/labops/2021/03/03/dvc.html"
+title="Versioning a Shared Dataset Using DVC and S3"
+description="DVC solution in a lab environment"
+link="mti-lab.github.io"
+image="../uploads/images/2021-03-15/matsui-lab-blog.png"/>
+
+### Healthcare Use Case Video Tutorial
+
+[**Danial Senejohnny**](https://www.linkedin.com/in/danial-senejohnny/) created
+this video outlining the use of DVC for healthcare institutes where the data
+must be kept private and on premise data store is preferred. 👇🏼
+
+https://www.youtube.com/watch?v=K1iyWr4Z6go
+
+## Scientific Journals 🧑🏻‍🔬
+
+We are excited to announce a scientific paper purely devoted to DVC coming out
+from Queen's University. This publication by
+[**Amine Barrak**](https://www.linkedin.com/in/amine-barrak-0bb99160/),
+[**Ellis E Eghan**](https://www.linkedin.com/in/elliseghan/) and
+[**Bram Adams**](https://www.linkedin.com/in/bramadams/), will be presented at
+the 28th IEEE International Conference on Software Analysis, Evolution, and
+Reengineering. You can check it out here. 👇🏼
+
+<external-link
+href="https://mcis.cs.queensu.ca/publications/2021/saner.pdf"
+title="On the Co-evolution of ML Pipelines and Source Code - Empirical Study of DVC Projects"
+description="Empirical Study of DVC Projects"
+link="mcis.cs.queensu.ca"
+image="../uploads/images/2021-03-15/EmpiricalStudyDVC.png"/>
+
+This article by **Samuel Idowu**,
+[**Daniel Struber**](https://www.linkedin.com/in/daniel-g-str%C3%BCber-359134100/),
+and
+[**Thorsten Berger**](https://www.linkedin.com/in/thorsten-berger-3a6a851ab/),
+reviews a number of asset management tools for machine learning including DVC,
+that solve the commonly reported ML engineering challenges.
+
+<external-link
+href="https://arxiv.org/pdf/2102.06919.pdf"
+title="Asset Management in Machine Learning: A Survey"
+description="Steps to use DVC in your data versioning"
+link="arxiv.org"
+image="../uploads/images/2021-03-15/arxiv.png"/>
+
+![ScienceMindBlown](https://media.giphy.com/media/xT0xeJpnrWC4XWblEk/giphy.gif)
+
+## Tweet Love ❤️
+
+From a Portuguese speaking community member in Finland...
+
+> "The @DVCorg surely it is among the best tools of the ecosystem of the last 3
+> years. It won't be long before DVC is as common as Scikit-Learn in ML / DS
+> projects with high maturity. 👏🏼👏🏼👏🏼"
+
+O [@DVCorg](https://twitter.com/DVCorg) seguramente está entre as melhores
+ferramentas do ecossistema dos últimos 3 anos. Não vai demorar para o DVC ser
+tão comum quanto o Scikit-Learn em projetos de ML/DS com alta maturidade. 👏👏👏
+https://t.co/nnfecYoTQv
+
+— Flávio Clésio March 3, 2021
+
+We think so too! 🙌🏼 You're all caught up! See you at the next Community Gems 💎!
+
+---
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2021-03-31-march-21-community-gems.md b/content/blogs/2021-03-31-march-21-community-gems.md
new file mode 100644
index 0000000000..fb796518c8
--- /dev/null
+++ b/content/blogs/2021-03-31-march-21-community-gems.md
@@ -0,0 +1,130 @@
+---
+title: March '21 Community Gems
+date: 2021-03-31
+description: >
+  A roundup of technical Q&A's from the DVC community. This month: remote
+  storage integration, hyperparameter tuning, best practices for managing
+  experiments and more.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month: remote
+  storage integration, hyperparameter tuning, best practices for managing
+  experiments and more.
+picture: 2021-03-31/gems-cover.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/march-2021-community-gems/708
+tags:
+  - Community Gems
+  - Pipelines
+  - Remote Storage
+---
+
+### [Q: Will DVC work with <my remote cloud storage of choice?>](https://discord.com/channels/485586884165107732/563406153334128681/821493606770409493)
+
+We recently had questions about this, specifically regarding Huawei Cloud and
+Backblaze B2 Storage. The answer is any cloud storage that has an S3 interface
+will work with DVC and both of the aforementioned do! In addition DVC works with
+Azure, Google Drive, GS, OSS, and SSH.
+[Learn more about S3 combatibility integrations and all available remote storage capabilities here.](https://dvc.org/doc/command-reference/remote)
+
+Thanks to @luke and @Samuel H from Discord for asking these questions that led
+to this Gem! 💎
+
+### [Q: I had understood previously that DVC was not suitable for hyperparameter tuning. Has that changed?](https://discord.com/channels/485586884165107732/485596304961962003/820722752709328967)
+
+Yes indeed! With DVC 2.0, the capabilities have evolved quite a bit! We have
+introduced experiments and metrics which enables you to track and compare the
+different runs of your models with various hyperparameters. You can check out
+the documents [here](https://dvc.org/doc/start/experiments) and
+[here](https://dvc.org/doc/start/metrics-parameters-plots) to see all the
+details.
+
+Thanks to @saif3r for helping us highlight the new features in DVC!
+
+### [Q: Is it possible to set up a DVC repo with pipelines which have all the data (cache, input, output) on another (local) location outside the repo?](https://discord.com/channels/485586884165107732/485596304961962003/819509440217874473)
+
+Thanks for the question @EEisbrenner!
+
+One solution to this would be to keep your DVC cache on your mount, and use the
+`symlink` cache type so all of your data would remain on that mount, but for
+DVC's purposes it would only deal with files that are "inside" your repo (via
+symlinks). Note that your data on that mount would be stored in DVC's
+content-addressable cache format, and not in `path/to/mount/foo.nc`. Check out
+the docs on
+[how to keep DVC cache on your mount here.](https://dvc.org/doc/use-cases/fast-data-caching-hub#example-shared-development-server)
+
+To actually work with `foo.nc`, you'd end up with a symlink `foo.nc` inside your
+git/DVC repo that points to some object in your DVC cache.  
+[See these docs](https://dvc.org/doc/user-guide/large-dataset-optimization) for
+info on how the cache link types work. For doing the initial `dvc add` step for
+your data without needing to copy it into the DVC/repo first,
+[check out these docs](https://dvc.org/doc/command-reference/add#example-transfer-to-the-cache).
+
+### [Q: My peers and I share a repo where we have a folder that is versioned with DVC. I'm getting an error message when trying to pull data from the cloud. What could be causing it?](https://discord.com/channels/485586884165107732/563406153334128681/799617584336338954)
+
+I see you are having the following error:
+
+```dvc
+$ dvc pull
+
+Everything is up to date.
+ERROR: failed to pull data from the cloud - 'data\rhinoceros.dvc' format error: extra keys not allowed @ data['outs'][0]['size']
+
+$ dvc doctor
+
+DVC version: 1.9.1 (exe)
+---------------------------------
+Platform: Python 3.7.9 on Windows-10-10.0.19041-SP0
+Supports: All remotes
+Cache types: hardlink
+Cache directory: NTFS on C:\
+Workspace directory: NTFS on C:\
+Repo: dvc, git
+```
+
+You're colleague is likely running a newer version of DVC. Upgrade so that all
+are on the same version and you will be good to go!
+
+Thanks @ojon for this important gem! 💎
+
+### [Q: How do I create multiple pipeline (`dvc.yaml`) files for different experiments?](https://discord.com/channels/485586884165107732/485596304961962003/824846339288334356)
+
+You could create separate directories for each experiment and keep your
+pipelines organized with separate `dvc.yaml` files. You can find more
+information on
+[organization patterns for experiments here.](https://dvc.org/doc/user-guide/experiment-management#organization-patterns)
+Currently we are working on a way to compare metrics between different paths if
+using this method of keeping experiments in different directories.
+[You can follow that issue here!](https://github.com/iterative/dvc/issues/5074)
+
+Thanks @tijoseymathew for your question in Discord!
+
+### [Q: Is there a way to run "git checkout and "dvc checkout" in one command?](https://discord.com/channels/485586884165107732/563406153334128681/818488624303046677)
+
+Yep! There's a way! We offer a Git hook for `post-checkout`, which automates DVC
+checkout right after `git checkout`. You can use `dvc install` to install that
+hook.  
+[Check out these docs](https://dvc.org/doc/command-reference/install) for all
+the info on installing Git hooks
+[and here](https://dvc.org/doc/command-reference/install#example-checkout-both-git-and-dvc)
+for a specific example!
+
+Many thanks to @Thyrix for this question!
+
+### [Q: How do I set a remote in Google Drive and share with someone else?](https://discord.com/channels/485586884165107732/563406153334128681/819432969260761131)
+
+[These docs](https://dvc.org/doc/user-guide/setup-google-drive-remote) will show
+you how to get a remote Google Drive set up! Be sure to setup the remote
+folder's permissions! For more information on sharing permissions in Google
+Drive
+[see these docs.](https://support.google.com/drive/answer/7166529?co=GENIE.Platform%3DDesktop&hl=en)
+
+Thanks @Carlos Lopez H for this important gem! 💎
+
+![Shut It Down GIF by Matt Cutshall](https://media.giphy.com/media/l0IycQmt79g9XzOWQ/giphy.gif)
+
+At our April Office Hours Meetup we will be demo-ing pipelines as well as CML.
+[RSVP for the Meetup here](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/277245660/?isFirstPublish=true)
+to stay up to date with specifics as we get closer to the event!
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to get all your DVC and
+CML questions answered!
diff --git a/content/blogs/2021-04-16-april-21-dvc-heartbeat.md b/content/blogs/2021-04-16-april-21-dvc-heartbeat.md
new file mode 100644
index 0000000000..8bf86937f0
--- /dev/null
+++ b/content/blogs/2021-04-16-april-21-dvc-heartbeat.md
@@ -0,0 +1,193 @@
+---
+title: April ’21 Heartbeat
+date: 2021-04-16
+description: >
+  Monthly updates are here! Loads from the Community, use cases, videos, 5 new
+  hires, and more!
+descriptionLong: >
+  Monthly updates are here! Read all about new lessons and a tutorials from our
+  community, DVC's use in longevity research, our CEO's interviews and talk at
+  the Toronto Machine Learning Society, new team members, and more!
+picture: 2021-04-16/april21cover.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/april-21-heartbeat/718
+tags:
+  - Heartbeat
+  - DVC
+  - MLOps
+  - Made with ML
+  - AI Singapore
+  - DAGsHub
+  - Toronto Machine Learning Society
+---
+
+## From the Community
+
+We're starting with the community this month because it has been overflowing
+with great content from our users. It's like we're on a sugar high!
+
+![Sugar High](https://media.giphy.com/media/oiGCnybFPh6Q8/giphy.gif)
+
+### Goku Mohandas' New Lessons!
+
+First up, [Goku Mahandas](https://twitter.com/GokuMohandas) of
+[Made With ML](https://madewithml.com/) has added this
+[Versioning Lesson](https://madewithml.com/courses/mlops/versioning/) to the
+popular **MLOps Course** using DVC.  
+It's RT'ing around the MLOps Twitter space like hotcakes! 🥞
+
+<external-link
+href="https://madewithml.com/courses/mlops/"
+title="MLOps - Versioning Code, Data and Models"
+description="Using DVC to version data and models for reproducibility 
+in a local storage use case"
+link="https://madewithml.com/"
+image="../uploads/images/2021-04-16/made-with-ml-logo.png"/>
+
+### Ryzal Kamis Tutorial
+
+[**Ryzal Kamis**](https://www.linkedin.com/in/ryzalkamis/) of
+[AI Singapore](https://twitter.com/AISingapore) has created an
+[**in depth tutorial**](https://makerspace.aisingapore.org/2021/04/data-versioning-for-cd4ml-part-2/)
+on data versioning using DVC. This is a follow up article to his
+[tutorial that was featured in the September Heartbeat.](https://dvc.org/blog/september-20-dvc-heartbeat)
+Thanks Ryzal for this detailed work! 🙏🏼
+
+<external-link
+href="https://makerspace.aisingapore.org/2021/04/data-versioning-for-cd4ml-part-2/"
+title="Data Versioning for CD4ML - Part 2"
+description="Complete tutorial for beginning continuous integration, automated 
+testing and versioning, experiment tracking, reproducing the model training 
+pipeline and creating a Flask app for predictive use of the model "
+link="https://makerspace.aisingapore.org/"
+image="../uploads/images/2021-04-16/ai-singapore-logo.jpeg"/>
+
+## DVC used to help in Research published in the International Journal of Molecular Sciences 🧑🏻‍🔬
+
+[Anton Kulaga](https://www.linkedin.com/in/antonkulaga/) and his team used DVC
+pipeline tracking in their research that selects genes connected with maximum
+lifespan in mammals. You can check out the
+[paper here](https://www.mdpi.com/1422-0067/22/3/1073) as well as their
+[pipeline use case here](https://docs.google.com/document/d/1kI1f62z0Opt8KD4Mf1yrYKftYLOZel3EjbfjDJiQQzg/edit)
+and their [GitHub repository.](https://github.com/antonkulaga/yspecies)
+
+See the diagram of the research below.👇🏼
+
+![](../uploads/images/2021-04-16/longevity-study.png)_longevity research
+diagram_
+
+## DAGsHub ❤️ DVC Colab Notebook
+
+The DevRel team at [DAGsHub](https://dagshub.com/) made
+[this cool notebook](https://colab.research.google.com/drive/1JJIwAH0TBSY49um5s2FD0GEA6bw3SKrd#scrollTo=cjbAYZDfB3JB)
+that trains a model to classify email as either 'Ham' or 'Spam.' The notebook
+shows how to integrate DAGsHub remote storage with DVC to track code and data
+files.
+
+![Robin Williams Thats The Good Stuff GIF](https://media.giphy.com/media/7pLv68ItwBaHS/giphy.gif)
+
+## En Español
+
+Yurely Camacho of [Open Science Labs](http://opensciencelabs.org/) created this
+blog post on DVC and the advantages of using it for our Spanish speaking
+friends! ¡Olé!💃🏻
+
+<external-link
+href="http://opensciencelabs.org/2021/03/22/que-es-el-data-version-control-y-por-que-es-necesario-que-tu-equipo-sepa-como-utilizarlo/"
+title="Qué es el Data Version Control y por qué es necesario que tu equipo sepa cómo utilizarlo"
+description="Advantages to using DVC for data version control and team collaboration"
+link="http://opensciencelabs.org/"
+image="../uploads/images/2021-04-16/open-science-labs-logo.png"/>
+
+## DVC News
+
+Pick a card, any card... You have not 1, but 3 interviews and talks to choose
+from this Heartbeat:
+
+- [**Dmitry Petrov's**](https://twitter.com/FullStackML)
+  [interview](https://opencv.org/opencv-ai-for-entrepreneurs-unveils-new-podcast-episode/)
+  with
+  [Anna Petrovicheva](https://www.linkedin.com/in/anna-petrovicheva-44b24673/)
+  on [Open CV](https://twitter.com/opencvlibrary)
+- Dmitry's [interview](https://www.youtube.com/watch?v=g3i-9Gk8BiA) with
+  [Harshit Tyagi](https://twitter.com/dswharshit) of
+  [Data Science with Harshit](https://www.youtube.com/channel/UCH-xwLTKQaABNs2QmGxK2bQ),
+  and
+- Dmitry's [talk](https://www.youtube.com/watch?v=J8mCr3wVgdA) at the
+  [Toronto Machine Learning Society](https://twitter.com/TMLS_TO)
+
+Spoiler alert ⚠️: You can't choose wrong!
+
+![Cards GIF](https://media.giphy.com/media/GXrcAztzRX9kI/giphy.gif)
+
+## And we keep on growing our worldwide team! 🌏
+
+We are getting to the point where our new hires could take up our whole
+Heartbeat! 😅🚀💗
+
+[**Julie Galvan**](https://www.linkedin.com/in/julianna-galvan/) joins our team
+from Houston, Texas as an engineer. She is focused on web development. In her
+free time Julie loves reading, especially fantasy fiction (Harry Potter #6 was
+fav) and paper crafting. Welcome Julie!🎉
+
+[**Matt Seddon**](https://www.linkedin.com/in/matt-seddon/) joins us from Down
+Under as a DVC front-end engineer! 🦘 He lives in Kiama, a small town on the
+East Coast of Australia. Originally from Scotland, when he's not programming he
+likes to spend time with his family away from screens (😅🙌🏼) and he volunteers
+for the state emergency service. 🤲🏼
+
+[**Yanxiang Gao**](https://www.linkedin.com/in/gaoyanxiang/) (who graciously
+allows us to call him Gao) joins us from Hangzhou, China as new DVC engineer.
+Gao has a Masters in Physics and has previously worked as a Machine Learning
+engineer in Chinese tech companies using DVC. He has been a long time
+contributor to DVC and we are so glad to have him on the team now!🎉
+
+[**Daniel Kharitonov**](https://www.linkedin.com/in/danielkharitonov/) joins us
+from Stanford, California as a Technical Product Manager Intern. Daniel
+graduated from Stanford with Masters CS / AI and PhD MS&E degrees. His previous
+industry roles involved working on core routing products at juniper.net, medical
+image augmentation with GANs, and synth data generation for autonomous vehicles.
+Welcome to the team Daniel! 🙌🏼
+
+Last but not least joining just this week,
+[**Milecia McGregor**](https://www.linkedin.com/in/milecia/) joins us as a
+Developer Advocate from Tulsa, Oklahoma. Milecia has a background in mechanical
+and aerospace engineering, some machine learning on autonomous vehicles, and
+basically everything that the web touches. She also practices kung fu in her
+free time.🥋🙇🏻‍♀️ We think that's "Oklahoma, OK!" 👌🏼
+
+## Open Positions
+
+Even with all our new hires, we're still building!
+
+[**Check out our three open roles**](https://weworkremotely.com/company/iterative)
+for:
+
+- [**Senior Frontend Engineer**](https://weworkremotely.com/remote-jobs/iterative-senior-frontend-engineer)
+- [**Senior Sofware Engineer - Open Source, Dev Tools**](https://weworkremotely.com/remote-jobs/iterative-senior-software-engineer-open-source-dev-tools-3)
+  and
+- [**Developer Advocate**.](https://weworkremotely.com/remote-jobs/iterative-developer-advocate)
+
+Does this sound like you or someone you know? Be in touch!
+
+## Next Meetup
+
+Don't miss our
+[Meetup](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/277245660)
+April 28th at 3:00pm UTC, where we will be demo-ing Pipelines and CML! Bring
+your questions! We're here to help!
+
+## Tweet Love ❤️
+
+https://twitter.com/viglovikov/status/1367193818152411137
+
+We love removing your headaches! 🙌🏼 You're all caught up! See you at the next
+Community Gems 💎!
+
+---
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2021-04-19-experiment-refs.md b/content/blogs/2021-04-19-experiment-refs.md
new file mode 100644
index 0000000000..6a04faea08
--- /dev/null
+++ b/content/blogs/2021-04-19-experiment-refs.md
@@ -0,0 +1,278 @@
+---
+title: Git Custom References for ML Experiments
+date: 2021-04-19
+description: >
+  In DVC 2.0, we’ve introduced a new feature set aimed at simplifying the
+  versioning of lightweight ML experiments. In this post, we’ll dive into how
+  exactly these new experiments work.
+
+descriptionLong: >
+  In [DVC 2.0](/blog/dvc-2-0-release), we’ve introduced a new feature set aimed
+  at simplifying the versioning of lightweight ML experiments. In this post,
+  we’ll show how DVC leverages the power of Git references to track each
+  experiment, while also completely abstracting away the need for you to
+  manually manage a potentially unlimited number of Git feature branches or
+  tags.
+
+picture: 2021-04-19/experiment-refs.png
+pictureComment: Utilizing Custom Git References in DVC
+author: peter_rowlands
+commentsUrl: https://discuss.dvc.org/t/utilizing-custom-git-references-in-dvc/727
+tags:
+  - MLOps
+  - Git
+  - Experiments
+  - Release
+---
+
+One of the main features provided by DVC is the ability to version machine
+learning (ML) pipelines and experiments using Git commits. While this works very
+well for versioning mature projects and models, for projects under active
+development that may require generating hundreds of experiments or more in a
+single day, typical Git workflows can be difficult to work with. This type of
+rapid experimentation may appear to fit nicely with the concept of Git feature
+branches, but a Git repository with such large numbers of branches will
+eventually become too unwieldy to manage.
+
+In DVC 2.0, we’ve introduced a new feature set aimed at simplifying the
+versioning of lightweight ML experiments. DVC now provides a series of `dvc exp`
+commands which allow you to easily generate new experiments with modified
+hyperparameters, and to quickly compare their results. In this post, we’ll show
+how DVC leverages the power of Git references to track each experiment, while
+also completely abstracting away the need for you to manually manage a
+potentially unlimited number of Git feature branches or tags.
+
+_Note: This post mainly focuses on the “How?” side of DVC 2.0 experiments. For a
+great overview of the “What?” check out our
+[2.0 release post](/blog/dvc-2-0-release) and our
+[Get Started: Experiments](https://dvc.org/doc/start/experiments) guide._
+
+## Experiments in DVC 2.0
+
+At the heart of the new experiments feature is the `dvc exp run` command.
+Whenever a pipeline is executed with `dvc exp run`, the results will be
+automatically tracked by DVC as a single “experiment”. DVC will track everything
+in your workspace as a part of the experiment, including unstaged changes made
+prior to execution. This means that DVC experiments can be used to test the
+result of changes to DVC-tracked data or pipeline parameters, as well as changes
+to Git-tracked code.
+
+![Example experiment run](../uploads/images/2021-04-19/exp-run.gif 'Example experiment run')
+
+_Note: You can follow along with the commands used in this example and
+throughout this post, using our
+[example-get-started](https://github.com/iterative/example-get-started)
+repository._
+
+Now let’s take a deeper look into what actually happened when we ran our
+experiment. Starting from the latest commit in our repository’s `master` branch,
+we invoked `dvc exp run --set-param` to generate a new experiment with the
+specified parameter value. DVC then reproduced our pipeline as if we had
+manually edited our `params.yaml` to contain that parameter change (setting
+`featurize.max_features` to `2000`), and then saved the results in a new
+experiment named `exp-26220`.
+
+Returning DVC users will likely be familiar with the typical Git+DVC workflow of
+reproducing your pipeline, staging the results in Git, and then Git committing
+those changes:
+
+```dvc
+$ dvc repro
+$ git add .
+$ git commit
+```
+
+This workflow is now essentially automated within our single `exp run` command,
+with one key difference. Rather than saving the results in a Git _branch_, the
+results are saved in a custom Git _reference_.
+
+## What is a Git reference?
+
+A Git reference (or ref) is a named reference to a Git commit. References are
+addressed via a pathname starting with `refs/`. Git branches and tags are
+actually just references which are stored in the `refs/heads` and `refs/tags`
+namespaces respectively. In our repo, we can see that:
+
+The tip of our `master` branch is commit `f137703`:
+
+```dvc
+$ git show master
+commit f137703af59ba1b80e77505a762335805d05d212 (HEAD -> master)
+Author: dberenbaum <dave@iterative.ai>
+Date:   Wed Apr 14 14:31:54 2021 -0400
+
+    Run experiments tuning random forest params
+```
+
+`master` itself is a Git ref (`refs/heads/master`) pointing to that commit:
+
+```dvc
+$ git show-ref master
+f137703af59ba1b80e77505a762335805d05d212 refs/heads/master
+```
+
+## What exactly is a DVC experiment?
+
+Now, going back to our experiment run, we see that DVC has generated and saved
+an experiment named `exp-26220`. We can even use that name freely within DVC
+commands as if it was a Git branch or tag name:
+
+```dvc
+$ dvc metrics diff master exp-26220
+Path         Metric    Old      New      Change
+scores.json  avg_prec  0.60405  0.58589  -0.01817
+scores.json  roc_auc   0.9608   0.945    -0.01581
+
+$ dvc diff master exp-26220
+Modified:
+    data/features/
+    data/features/test.pkl
+    data/features/train.pkl
+    model.pkl
+    prc.json
+    roc.json
+    scores.json
+
+files summary: 0 added, 0 deleted, 0 renamed, 6 modified
+```
+
+However, Git tells us that there is no branch or tag named `exp-26220`, and we
+cannot use that name in Git porcelain commands:
+
+```dvc
+$ git tag -l
+0-git-init
+1-dvc-init
+10-bigrams-experiment
+11-random-forest-experiments
+2-track-data
+3-config-remote
+4-import-data
+5-source-code
+6-prepare-stage
+7-ml-pipeline
+8-evaluation
+9-bigrams-model
+baseline-experiment
+bigrams-experiment
+random-forest-experiments
+
+$ git branch -l
+* master
+
+$ git checkout exp-26220
+error: pathspec 'exp-26220' did not match any file(s) known to git
+```
+
+_Note: The Git CLI is divided into
+[two sets of commands](https://git-scm.com/book/en/v2/Git-Internals-Plumbing-and-Porcelain):
+the commonly used user-friendly “porcelain” commands (like `git checkout`) and
+the lower level “plumbing” commands._
+
+This naturally begs the question, “What is `exp-26220`?”
+
+The answer is simple, it’s a custom DVC Git ref pointing to a Git commit:
+
+```dvc
+$ git show-ref exp-26220
+c42f48168830148b946f6a75d1bdbb25cda46f35 refs/exps/f1/37703af59ba1b80e77505a762335805d05d212/exp-26220
+```
+
+_Note: that `dvc exp show --sha` can be used to view Git commit SHAs for
+experiments. Using DVC experiments should never require you to use any of the
+low-level Git plumbing commands like `git show-ref`._
+
+If we examine the experiment commit itself, we can see that it is just a regular
+commit object that contains our hyperparameter change and the results of the
+run:
+
+```dvc
+$ git show c42f481
+commit c42f48168830148b946f6a75d1bdbb25cda46f35 (refs/exps/f1/37703af59ba1b80e77505a762335805d05d212/exp-26220)
+Author: Peter Rowlands <peter@pmrowla.com>
+Date:   Mon Apr 19 04:24:04 2021 +0000
+
+    dvc: commit experiment 262206295221319fe5e8ca8a9854d6eb93ec0931fb377488910304cf5ed55f84
+
+diff --git a/dvc.lock b/dvc.lock
+index 0e92326..d81fe2b 100644
+--- a/dvc.lock
++++ b/dvc.lock
+@@ -30,19 +30,19 @@ stages:
+       size: 2455
+     params:
+       params.yaml:
+-        featurize.max_features: 3000
++        featurize.max_features: 2000
+         featurize.ngrams: 2
+...
+diff --git a/scores.json b/scores.json
+index 27f6dab..8270914 100644
+--- a/scores.json
++++ b/scores.json
+@@ -1,4 +1,4 @@
+ {
+-    "avg_prec": 0.6040544652105823,
+-    "roc_auc": 0.9608017142900953
++    "avg_prec": 0.5858888885424922,
++    "roc_auc": 0.944996664954421
+ }
+...
+```
+
+## DVC and custom Git refs
+
+In DVC 2.0, we now use the custom `refs/exps` namespace for storing DVC
+experiments in Git. Under the hood, using Git refs allows us to keep using all
+of the versioning capabilities provided by Git, without polluting your
+repository with actual Git branches and tags. Since the user-friendly Git
+porcelain commands (like `git checkout` and `git diff`) only resolve branches
+and tags (and will ignore custom references), DVC experiments are essentially
+hidden from your Git repository (and only visible to DVC commands).
+
+Even though the experiment refs themselves are “invisible” to Git porcelain
+commands, Git commit SHAs for experiments can be used in any Git command. This
+allows you to leverage the power of tools like `git diff` to compare things like
+code changes between a DVC experiment and any other Git commit (meaning you can
+even compare experiment commit SHAs to Git branches or tags).
+
+Likewise, for tools which provide a GUI on top of Git, experiments will be
+hidden from your repository in typical use cases:
+
+![`gitk --branches --tags` example](../uploads/images/2021-04-19/gitk-branches-tags.png 'gitk --branches --tags')
+_`gitk --branches --tags`_
+
+Tools which provide the capability to displaying all Git refs (including custom
+namespaces) can also be used to view experiments as if they were Git branches:
+
+![`gitk --all` example screenshot](../uploads/images/2021-04-19/gitk-all.png 'gitk --all')
+_`gitk --all`_
+
+Experiments are also completely local (since custom refs are not transferred to
+or from Git remotes on `git push` and `git pull`), meaning that even if you run
+thousands of experiments locally, you do not need to worry about accidentally
+polluting your team’s upstream Github or Gitlab repository with those
+experiments. However, individual DVC experiments can be explicitly shared via
+remote Git repositories using the `dvc exp push` and `dvc exp pull` commands.
+Regular Git branches can also be created from experiments can via
+`dvc exp branch`.
+
+## Conclusion
+
+Prior to version 2.0, DVC already provided a method for versioning (and
+reproducing) ML pipelines with Git. By extending DVC's existing capabilities
+with the functionality offered by custom Git references, we've created a new
+framework for users to easily generate and track their experiments. And when
+used in conjunction with the other new features provided in 2.0 (like
+[checkpoints versioning](https://dvc.org/doc/command-reference/exp/run#checkpoints)
+and
+[pipeline parametrization](https://dvc.org/doc/user-guide/project-structure/pipelines-files#templating)),
+DVC can now fulfill certain use cases which were unfeasible with typical pre-2.0
+DVC + Git workflows, including hyperparameter tuning and deep learning
+scenarios.
+
+We hope that whether you are new to DVC or a long time user, you will try out
+the new capabilities provided in our 2.0 release. And as always, if you have any
+questions, comments or suggestions, please feel free to connect with the DVC
+community on [Discourse](https://discuss.dvc.org/),
+[Discord](https://dvc.org/chat) and [GitHub](https://github.com/iterative/dvc).
diff --git a/content/blogs/2021-05-21-may-21-dvc-heartbeat.md b/content/blogs/2021-05-21-may-21-dvc-heartbeat.md
new file mode 100644
index 0000000000..a03876f988
--- /dev/null
+++ b/content/blogs/2021-05-21-may-21-dvc-heartbeat.md
@@ -0,0 +1,239 @@
+---
+title: May ’21 Heartbeat
+date: 2021-05-21
+description: >
+  Monthly updates are here! We've hit 30 team members! MLOps learning
+  opportunities, tutorials with integrations, conference videos, Discord  server
+  growth, and more!
+descriptionLong: |
+  This month you will find:
+  🚀 info on our growing team
+  📖 MLOps learning opportunities,
+  🔃 tutorials with integrations,
+  🎥 conference videos from our team members,
+  💥 Discord server growth, and more!
+picture: 2021-05-21/may21cover.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/dvc-may-2021-heartbeat/
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - MLOps
+  - ML Repa
+  - Andrew Ng
+  - DAGsHub
+  - Curvenote
+---
+
+# From the Community
+
+It's been another month full of community goodness and we are grateful! Let's
+get right to it!
+
+![Thank you](https://media.giphy.com/media/jmqWAjoxFCxJNHD2Kz/giphy.gif)
+
+### Curvenote with DVC tutorials
+
+Interested in versioning your data AND your notebooks?  
+[Steve Purves](https://twitter.com/stevejpurves) CTO and co-founder of
+[Curvenote](https://curvenote.com/) gave a three-part tutuorial on integrating
+DVC and Curvenote for creating reproducible, collaborative version control for
+data scientists. The videos are beginner accessible with tips for intermediate
+git users.
+[Access the videos here.](https://www.youtube.com/watch?v=OnNVbIEIO7A)
+
+![](../uploads/images/2021-05-21/curvenote-dvc-integration.jpeg)_DVC and
+Curvenote for the version control win!_
+
+### CML with Jenkins in DAGsHub
+
+Next up, [Puneeth Pai](https://www.linkedin.com/in/puneeth-pai-b3b299a1/) of
+[Thoughtworks](https://www.thoughtworks.com/) wrote a two-part blog series with
+a how-to for achieving continuous machine learning using DVC pipelines with
+Jenkins and DAGsHub. Quoted in the article is our own
+[David Ortega](https://github.com/DavidGOrtega),
+
+> Treating experiments like potential new features in a software project opens
+> up many possibilities for improving our engineering practices.
+
+Check out these posts at the link below or catch Puneeth at our next
+[Meetup](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/278163666/)
+where he will be giving a high level overview of this content as well as
+answering questions.
+
+<external-link
+href="https://dagshub.com/blog/in-depth-tour-of-jenkinsfile/"
+title="CML with Jenkins in DAGsHub"
+description="The first of a two-part series on how to set up continuous machine learning using DVC pipelines with Jenkins and DAGsHub."
+link="https://dagshub.com/"
+image="../uploads/images/2021-05-21/puneeth-gears.png"/>
+
+### Discord Server Explosion
+
+Our [Discord server](https://discord.com/invite/dvwXA2N) has exploded since last
+month, up 30% in membership 😱, thanks in large part to a
+[**Towards Data Science**](https://towardsdatascience.com/) post from
+[Sara Metwalli](https://www.linkedin.com/in/sara-a-metwalli/) recommending
+[**9 Discord Servers for Math, Python, and Data Science You Need to Join Today.**](https://towardsdatascience.com/9-discord-servers-for-math-python-and-data-science-you-need-to-join-today-34214b93d6b8)
+
+Sara encourages readers to connect, learn and get inspired. 🚀 Thanks Sara!
+We're on board with that! Rest assured our growing team is hard at work creating
+content, improving tools and working on new tools 😶🤗 to continue to grow and
+serve our MLOps community!
+
+# In Other MLOps News ...
+
+## Learning Opportunities
+
+[Andrew NG](https://twitter.com/AndrewYNg) of
+[Deep Learning AI](https://twitter.com/DeepLearningAI_) and
+[Coursera](https://www.coursera.org/) fame has just released a new course
+specializing in MLOps, called
+[Machine Learning Engineering for Production (MLOps) Specialization](https://www.coursera.org/specializations/machine-learning-engineering-for-production-mlops?utm_campaign=20210423-mlep-1-program-email-mlep-launch&utm_medium=institutions&_hsmi=126760441&_hsenc=p2ANqtz-9wSUanrnpyWNavtaCEzBLVpDXwatEig_ahaksJQhZO6dKkLRykfOxRwkpAZiipxWej4xs1uQgrXl-JCgB0M-Ha_vCUvEqaswIVZQhNd-jUDsE8SJs&utm_source=deeplearning-ai).
+The course "combines the foundational concepts of machine learning with the
+functional expertise of modern software development and engineering roles."
+Methodologies and capabilities of MLOps are introduced while addressing the
+challenges and consequences of machine learning engineering in production. I'm
+signed up! 🙋🏻‍♀️ How 'bout you?
+
+<external-link
+href="https://www.coursera.org/specializations/machine-learning-engineering-for-production-mlops?utm_campaign=20210423-mlep-1-program-email-mlep-launch&utm_medium=institutions&_hsmi=126760441&_hsenc=p2ANqtz-9wSUanrnpyWNavtaCEzBLVpDXwatEig_ahaksJQhZO6dKkLRykfOxRwkpAZiipxWej4xs1uQgrXl-JCgB0M-Ha_vCUvEqaswIVZQhNd-jUDsE8SJs&utm_source=deeplearning-ai"
+title="Machine Learning Engineering for Production (MLOps) Specialization"
+description="Andrew Ng's new course in Coursera providing the foundation to successful and efficient MLOps"
+link="https://www.coursera.org/"
+image="../uploads/images/2021-05-21/andrew-ng.png"/>
+
+Next for your learning pleasure,
+[Simone Scardapane](https://twitter.com/s_scardapane) is in the process of
+fulfilling a "personal challenge" to create a PhD course for
+[**Reproducible Deep Learning**](https://twitter.com/s_scardapane/status/1389240445788643329?s=20)
+that includes the use of open source tools including our own DVC!
+[Head to the link](https://github.com/sscardapane/reprodl2021) to star the repo
+and cheer him on. We will be! 🙌🏼
+
+![](../uploads/images/2021-05-21/reproducedl.png)_Reproducible Deep Learning PhD
+Course_
+
+[Simone Scardapane](https://twitter.com/s_scardapane) is in the process of
+fulfilling a "personal challenge" to create a PhD course for
+[**Reproducible Deep Learning**](https://twitter.com/s_scardapane/status/1389240445788643329?s=20)
+that includes the use of open source tools including our own DVC!
+[Head to the link](https://github.com/sscardapane/reprodl2021) to star the repo
+and cheer him on. We will be! 🙌🏼
+
+You see what I did there, right? **Reproducible**... **Deep Learning**.....  
+Get it? Layers of wit people. I learned from the best! Just wanted to make sure
+you were paying attention!
+
+![Marvel Studios Smile GIF by Disney+](https://media.giphy.com/media/6ra84Uso2hoir3YCgb/giphy.gif)
+
+# DVC News
+
+We've hit 30 team members! Our team is distributed all over the world and has
+grown so much that we now have two all-hands meetings! Affectionately called
+UTC + and UTC -, these meetings honor all our different time zones while
+allowing the other group to watch via recording when they are awake! You know
+we're all about solving complicated problems. 💪🏼
+
+![](../uploads/images/2021-05-21/team-map.png)_Our team_
+
+## New Team Members
+
+[Svetlana Sachkovskaya](https://www.linkedin.com/in/svetlana-sachkovskaya/) is
+originally from Belarus, but is currently living in Poland. She has been a full
+stack developer for over seven years. She loves traveling, meeting new people
+and is excited to work on open source software. In her spare time you may find
+her dancing the tango! 💃🏻 Welcome Sveta!
+
+Exemplifying our diverse team in one fell swoop,
+[Casper da Costa-Luis](https://cdcl.ml) has lived in three continents. He has
+been working on DVC for a couple of years and is a long-standing contributor to
+open source. He now joins us on the CML & Docs teams after completing his PhD in
+Medical Imaging. Fun facts about Casper include his becoming the U18 chess
+champion of Kenya when he was 14 and being a qualified SCUBA diver. 🤿
+
+[Emre Şahin](https://github.com/iesahin) joins us on the DVC team as a technical
+writer/ML enthusiast/AI dreamer/tutorial builder from Instanbul, Turkey. A
+self-described zealot for technologies, Emre has worked in many development/ML
+related projects and has been programming in Python since v. 1.7. We are excited
+for Emre to bring you excellent technical content! ✍🏼
+
+[Tapa Dipti Sitaula](https://www.linkedin.com/in/tapa-dipti-sitaula/) joins us
+as a Senior Product Engineer from Nepal. She previously worked as a Principal
+Engineer at a tech start up in India and has worked in various capacities in her
+career from engineering to project management and communications. Her interests
+include learning languages and breaking gender stereotypes. We're right there
+with you Tapa! 🚀
+
+## Open Positions
+
+And we're still hiring!
+
+[**Check out our three open roles**](https://weworkremotely.com/company/iterative)
+for:
+
+- [**Senior Frontend Engineer**](https://weworkremotely.com/remote-jobs/iterative-senior-front-end-engineer)
+- [**Senior Sofware Engineer - Open Source, Dev Tools**](https://weworkremotely.com/remote-jobs/iterative-senior-software-engineer-open-source-dev-tools-3)
+  and
+- [**Developer Advocate**.](https://weworkremotely.com/remote-jobs/iterative-developer-advocate)
+
+Does this sound like you or someone you know? Be in touch!
+
+## DVCTeam Conference Talks
+
+[ML Repa Week](https://mlrepa.com/) took place last month and team members gave
+three great talks. [Dmitry Petrov](https://twitter.com/FullStackML) gave a talk
+on data versioning and machine learning experiments on top of Git.
+[Elle O'Brien](https://www.linkedin.com/in/drelleobrien/) gave a talk on
+automating machine learning with Github action and GitLab CI. And finally,
+[Mikhail Rozhkov](https://www.linkedin.com/in/mnrozhkov/) gave a talk on setting
+up the workflow for machine learning batch scoring applications using DVC,
+MLflow and Airflow. Be sure to check out all three talks and other great talks
+from the week long Conference.
+
+<external-link
+href="https://www.youtube.com/watch?v=OD2KiIOMeMw&list=PLlxErbAvYYLDRP6cHtVP76f2g5Yoh6c5R&index=2"
+title="DVC: Data Versioning and ML Experiments on Top of Git"
+description="Dmitry Petrov's talk at ML Repa Week on using DVC as an extension of Git for data versioning and machine learning experiments"
+link="http://ml-repa.ru/en/"
+image="../uploads/images/2021-05-21/dmitry-ml-repa-week.png"/>
+
+<external-link
+href="https://youtu.be/tOo98CtiDJg"
+title="Automating Machine Learning with GitHub Actions & GitLab CI"
+description="Elle O'Brien's conference talk about how to use GitHub actions or GitLab CI to provide automation for your machine learning projects"
+link="http://ml-repa.ru/en"
+image="../uploads/images/2021-05-21/elle-ml-repa-week.png"/>
+
+<external-link
+href="https://youtu.be/PYzvLc7o7u0"
+title="Workflow & MLOps for Batch Scoring Applications with DVC, MLflow and Airflow"
+description="Mikhail Rozhkov's talk on how to set up a workflow for batch scoring applications integrating DVC, MLflow and Airlow "
+link="http://ml-repa.ru/en"
+image="../uploads/images/2021-05-21/mikhail-ml-repa-week.png"/>
+
+## Next Meetup
+
+Don't miss our
+[Meetup](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/277245660)
+May 27th at 3:00pm UTC, where we will hear from Puneeth Pai as mentioned above
+👆🏽, as well as another user putting DVC and CML into action on his team, and
+finally from David Ortega discussing CML pull requests! Bring your questions!
+We're here to help!
+
+## Tweet Love ❤️
+
+https://twitter.com/DynamicWebPaige/status/1394389238750326787?s=20
+
+That's quite a shout out! Thanks to
+[Jorge Orpinel](https://twitter.com/JorgeOrpinel) and team for always raising
+the bar on our docs! Until next month! 👩🏽‍💻
+
+---
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2021-05-28-may-21-community-gems.md b/content/blogs/2021-05-28-may-21-community-gems.md
new file mode 100644
index 0000000000..5c2c35a222
--- /dev/null
+++ b/content/blogs/2021-05-28-may-21-community-gems.md
@@ -0,0 +1,240 @@
+---
+title: May '21 Community Gems
+date: 2021-05-28
+description: >
+  A roundup of technical Q&A's from the DVC and CML community. This month:
+  remote storage integration, removing old experiments, ideas for running CML
+  pipeline reports and more.
+descriptionLong: >
+  A roundup of technical Q&A's from the [DVC](#dvc) and [CML](#cml) community.
+  This month: remote storage integration, removing old experiments, ideas for
+  running CML pipeline reports and more.
+picture: 2021-05-28/gems-cover.png
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/may-21-community-gems/767
+tags:
+  - Discord
+  - DVC
+  - CML
+  - Remote Storage
+  - Community Gems
+---
+
+Each month we go through our Discord messages to pull out some of the best
+questions from our community. AKA: Community Gems. 💎 This month we'd like to
+thank @asraniel, @PythonF, @mattlbeck, @Ahti, @yikeqicn, @lexzen, @EdAb,
+@FreshLettuce for inspiring this month's gems!
+
+As always, [join us in Discord](https://discord.com/invite/dvwXA2N) to get all
+your DVC and CML questions answered!
+
+## DVC
+
+### [What is the best way to commit 2 experiment runs?](https://discord.com/channels/485586884165107732/485596304961962003/836626346544594995)
+
+You want to use `dvc exp branch` if you want to keep multiple experiments. That
+way, each one is in a separate branch rather than trying to apply one experiment
+on top of another.
+
+### [How can I clean up the remote caches after a lot of experiments and branches have been pushed?](https://discord.com/channels/485586884165107732/485596304961962003/831142466169733120)
+
+`dvc exp gc` requires some kind of flags to operate. At the very least,
+`--workspace`. So, with `--workspace`, `dvc` will try to read all of the pointer
+files: `.dvc` files and `dvc.yaml` files in the workspace. It will read all of
+them and will determine all the cache objects/files that need to be preserved
+(since they are being used in the current workspace). The rest of the files in
+the `.dvc/cache` are removed.
+
+_This does not require any Git operations!_
+
+You can also use the `--all-branches` flag. It will read all of the files
+present in the current workspace and from the commits in the branches you have
+locally. Then it will use that list to determine what to keep and what to
+remove.
+
+If you need to read pointer files from given tags you have locally, the
+`--all-tags` flag is the best option.
+
+The `--all-commits` flag reads pointer files from every commit and it will make
+a list of all the files that are in the cache/remote and if the _.dvc_ file
+isn't found in any commits of the Git repo, it will delete those files.
+
+### [If I have two cloud folder links added to the DVC config, I'm able to push the data to the default one. How could I push the data to the other cloud folder?](https://discord.com/channels/485586884165107732/563406153334128681/833176227762274364)
+
+You're looking for the `-r / --remote` option for `dvc push`. The command looks
+like this:
+
+```dvc
+$ dvc push --remote <name_of_remote_storage>
+```
+
+It will push directly to the remote storage you defined in the command above.
+
+### [What's the current recommended way to automate hyperparameter search when using DVC pipelines?](https://discord.com/channels/485586884165107732/563406153334128681/829803720190590986)
+
+Take a look at the new
+[experiments feature](https://dvc.org/doc/start/experiments)! It enables you to
+easily experiment with different parameter values.
+
+You could script a grid search pretty easily by queueing an experiment for each
+set of parameter values you want to try. For example:
+
+```dvc
+$ dvc exp run --queue -S alpha={alpha},beta={beta}
+$ dvc exp run --run-all --jobs 2
+```
+
+The `--jobs 2` flag means you're running 2 queued experiments in parallel. By
+default, the `--run-all` flag runs 1 queued experiment at a time.
+
+Then you can compare the results with `dvc exp show`.
+
+```dvctable
+ ───────────────────────────────────────────────────────────────────
+  **Experiment**      **avg_prec**   **roc_auc**   **train.n_est**  **train.min_split**
+ ───────────────────────────────────────────────────────────────────
+  workspace        0.56191   0.93345   50           2
+  master           0.55259   0.91536   50           2
+  ├── exp-bfe64    0.57833   0.95555   50           8
+  ├── exp-b8082    0.59806   0.95287   50           64
+  ├── exp-c7250    0.58876   0.94524   100          2
+  ├── exp-b9cd4    0.57953   0.95732   100          8
+  ├── exp-98a96    0.60405    0.9608   100          64
+  └── exp-ad5b1    0.56191   0.93345   50           2
+ ───────────────────────────────────────────────────────────────────
+```
+
+We are working on developing experiments to have features or documented patterns
+explicitly for grid search support, so definitely
+[share any feedback](https://github.com/iterative/dvc/issues/4283) to help drive
+the future direction of that!
+
+### [When importing/getting data from a repo, how do I provide credentials to the source repo remote storage without saving it into that Git repo?](https://discord.com/channels/485586884165107732/563406153334128681/830021022337073185)
+
+There's a bit of context behind this question that might give it more meaning.
+Here's the background information given by @EdAb in Discord:
+
+---
+
+I set up a private GitHub repo to be a data registry and I have set up a private
+Azure remote where I have pushed some datasets.
+
+I am now trying to read those datasets from another repository
+("my-project-repo"), using `dvc get` (e.g.
+`dvc get git@github.com:data-registry-repo.git path/data.csv`) but I get this
+error:
+
+```bash
+ERROR: failed to get 'path/data.csv' from 'git@github.com:data-registry-repo.git' - Authentication to Azure Blob Storage via default credentials (https://azuresdkdocs.blob.core.windows.net/$web/python/azure-identity/1.4.0/azure.identity.html#azure.identity.DefaultAzureCredential) failed.
+Learn more about configuration settings at <https://man.dvc.org/remote/modify>: unable to connect to account for Must provide either a connection_string or account_name with credentials!!
+```
+
+---
+
+Generally, there are two ways solve this issue:
+
+- [ENV vars](https://dvc.org/doc/command-reference/remote/modify)
+- Setup some options using the `--global` or `--system` flags to update the DVC
+  config
+
+If you're going to update the DVC config to include your cloud credentials, use
+the `dvc remote modify` command. Here's an example of how you can do that with
+Azure using the `--global` flag.
+
+```dvc
+$ dvc remote modify --global myremote connection_string 'mysecret'
+```
+
+You should initialize `myremote` in the config file with `dvc remote add` and
+remove the URL to rely on the one that comes from the repo being imported.
+
+This will modify the global config file, instead of the _.dvc/config_ file. You
+could also use the `--system` flag to modify the system file if that's necessary
+for your project. You can take a look at the specific
+[config file locations here](https://dvc.org/doc/command-reference/config).
+
+### [Is there any way to ensure that `dvc import` uses the cache from the config file and how can I keep the cache consistent for multiple team members?](https://discord.com/channels/485586884165107732/563406153334128681/827574712825413672)
+
+This is another great question where a little context might be useful.
+
+---
+
+I'm trying to import a dataset project called _dvcdata_ into another DVC
+project.
+
+The config for _dvcdata_ is:
+
+```ini
+[core]
+    remote = awsremote
+[cache]
+    type = symlink
+    dir = /home/user/dvc_cache
+['remote "awsremote"']
+    url = s3://...
+```
+
+When I run `dvc import git@github.com:user/dvcdata.git my_data`, it starts to
+download it. I have double checked that I have pushed this config file to master
+and don't understand why it's not pulling the data from my cache instead of
+downloading the data again.
+
+---
+
+The repo you are importing into has its own cache directory. If you want to use
+the same cache directory across both projects, you have to configure _cache.dir_
+in both projects. You also have the option to configure the _cache.type_.
+
+You can set up the cache dir and cache link type in your own global config and
+then when project 1 imports `dvcdata`, it will be cached there. Finally when
+project 2 imports `dvcdata`, it will just be linked or copied, depending on the
+config, from the cache without downloading.
+
+We recommend you use the `--global` or `--system` flags in the `dvc config`
+command for updating the configs globally. An example of this would be:
+
+```dvc
+$ dvc config --global cache.dir path/to/cache/
+```
+
+If you set up a cache that is not shared and located on a separate volume and
+you have a lot of data - consider also enabling symlinks as described here -
+[Large Data Optimizations](https://dvc.org/doc/user-guide/large-dataset-optimization#large-dataset-optimization)
+
+You might also consider using the local URL of the source project to avoid the
+import downloading from the remote storage. That would look something like this:
+
+```dvc
+$ dvc import /home/user/dvcdata my_data
+```
+
+If your concern is keeping these configs consistent for multiple users on the
+same machine, check out
+[the doc on shared server development](https://dvc.org/doc/use-cases/fast-data-caching-hub#example-shared-development-server)
+to get more details!
+
+## CML
+
+[I have an ML model that retrains every 24 hours with updated data, but I do not want to create a merge request every time. I just need a nice way to look at the results. Is there a solution on how to report the results of a pipeline in Gitlab?](https://discord.com/channels/485586884165107732/728693131557732403/827099289372983336)
+
+Great question! CML doesn't currently have a feature that takes care of this,
+but here are a couple of solutions (only one is needed):
+
+1. Keep a separate branch with unrelated history for committing the reports.
+2. Keep a single report file on the repository and update it with each commit.
+
+[I've run into an error trying to get CML to orchestrate runs in my AWS account. It doesn't seem to be a permissions issue as the `AWSEc2FullAccess` policy seems to have worked, but I can't see the security group. What could be going wrong?](https://discord.com/channels/485586884165107732/728693131557732403/818450988084101160)
+
+Check to make sure you are deploying to the correct region. Use the argument
+`--cloud-region <region>` (`us-west` for example) to mark the region where the
+instance is deployed.
+
+[Head to these docs]([https://discord.com/channels/485586884165107732/728693131557732403/818450988084101160)
+for more information on the optional arguments that the CML runner accepts.
+
+Until next month...
+
+![You Got This Hedgehog GIF by MOODMAN](https://media.giphy.com/media/XcAa52ejGuNqdb5SFQ/giphy.gif)
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to get all your DVC and
+CML questions answered and contribute to the MLOps community! 🚀
diff --git a/content/blogs/2021-06-02-introducing-dvc-studio.md b/content/blogs/2021-06-02-introducing-dvc-studio.md
new file mode 100644
index 0000000000..b3cbe86bd7
--- /dev/null
+++ b/content/blogs/2021-06-02-introducing-dvc-studio.md
@@ -0,0 +1,173 @@
+---
+title: Introducing DVC Studio
+date: 2021-06-02
+description: >
+  🚀 We are excited to release DVC Studio, the online UI for DVC and CML. Use
+  DVC Studio for ML versioning, visualization, teamwork and no-code automation
+  on top of DVC and Git. Read all about the exciting features and watch videos
+  to get started quickly.
+descriptionLong: >
+  With [DVC Studio](https://studio.datachain.ai), you can use Git and
+  [DVC](https://dvc.org) to track your code, ML models, metrics,
+  hyperparameters, and data, all together.
+
+  Experiment tracking, visualization and collaboration can be done through a
+  visual UI. Even running new iterations becomes a matter of clicking a few
+  buttons. Find all the exciting details in this blog post.
+picture: 2021-06-02/dvc-studio-release.png
+author: tapa_dipti_sitaula
+commentsUrl: https://discuss.dvc.org/t/introducing-dvc-studio/774
+tags:
+  - Release
+  - DataChain Studio
+  - DVC
+  - CML
+  - MLOps
+  - DataOps
+  - CI/CD
+---
+
+We are excited to release DVC Studio - the online UI for DVC and CML.
+
+[DVC](https://dvc.org) and [CML](https://cml.dev) have been widely used by ML
+engineers, data scientists and researchers to simplify their Machine Learning
+processes. With 8000 GitHub 🌟 and 200+ open source contributors, they have
+gained popularity as tools that take advantage of the existing engineering
+toolset that you're already familiar with (Git, CI/CD, etc.) to provide you the
+best practices for organizing your data and ML projects and collaborating
+effectively. DVC Studio, an extension on top of DVC and CML, adds even more
+capabilities to your MLOps toolset.
+
+DVC Studio is a big new step for our team. Many of you have rightly pointed out
+the [need for a visual UI](https://github.com/iterative/dvc/issues/1074) for
+DVC. Your needs,
+[ideas and suggestions](https://github.com/iterative/dvc/discussions/5941) are
+our priority. And so, we are thrilled that our new product will make your ML
+journeys even more smooth.
+
+## How does DVC Studio work?
+
+DVC Studio is a web application that you can
+[access online](https://studio.datachain.ai/) or even host on-prem. It works
+with the data, metrics and hyperparameters that you add to your ML project
+repositories.
+
+![](../uploads/images/2021-06-02/dvc-studio-view.png)_Each experiment,
+represented by a commit in your Git history, is presented along with its data,
+metrics and hyperparameters. This is your playground for visualizing, comparing
+and even running experiments._
+
+With DVC Studio we rely on you saving information into your Git repository.
+Connect DVC Studio with GitHub, GitLab or Bitbucket to read repositories and to
+run new experiments (using regular CI/CD capabilities - we'll talk about this in
+a moment).
+
+DVC Studio analyzes Git history and extracts information about your ML
+experiments - datasets being used, metrics and hyperparameters. By using DVC,
+you can be sure not to bloat your repositories with large volumes of data or
+huge models. These large assets reside in cloud or other remote storage
+locations (and we don't require you giving us access to it!).
+
+## Visualize. Collaborate. Track.
+
+This video shows you how you can visualize your experiments using DVC Studio.
+
+https://www.youtube.com/watch?v=hKf4twg832g
+
+DVC, along with Git, performs your ML bookkeeping automatically. Using a simple
+UI, you can import your experiment history from Git. You can get quick access to
+important metrics across multiple projects, or dive deep and explore individual
+experiments. You can visualize and compare models the way that best fits your
+needs, whether it is through precision-recall curves, scores comparison, or
+trend charts showing how your model is evolving over time.
+
+This makes it easy to see exactly how your model’s performance changed when you
+increased the number of layers in your neural net, added some more samples to
+your training dataset, or increased the number of epochs to run the training
+for.
+
+![](../uploads/images/2021-06-02/trends-chart.png)_With DVC Studio, you can
+visualize your model evolution. This Trends chart, for instance, shows how the
+average precision increased over the course or your experiments._
+
+You will get the dashboard and all the visuals automatically if your metrics and
+plots are stored in Git through DVC. But if you do not use DVC, you can still
+add custom files with your metrics and parameters and DVC Studio will
+efficiently generate tables and plots for your custom input.
+
+DVC Studio also provides visual UI to create and manage teams, manage roles, and
+share your experiment tables, enabling easy and efficient knowledge sharing and
+collaboration.
+
+## Use Git for ML metrics tracking. Nothing fancy.
+
+Most ML engineers already use Git for code versioning. `dvc init`, `dvc add`,
+`dvc push` - these simple Git-like DVC commands are all you need to convert your
+Git repos into DVC repos - a single source of truth for not just your code but
+also your data, model and metrics.
+
+https://www.youtube.com/watch?v=5xM5az78Lrg
+
+What makes DVC Studio special is this connection to the Git ecosystem. The table
+and visuals in DVC Studio aren’t magic - they are simply a representation of the
+data in JSON or CSV files in your Git repositories.
+
+## Automate your ML process. No-code.
+
+Mature ML teams reuse their code over and over again while tuning data and
+hyperparameters. DVC Studio automates this in the visual user interface. To run
+an experiment on DVC Studio, use its UI to modify the ML model hyperparameters
+and dataset version. The modifications and the message you enter will be
+automatically converted to a proper Git commit. Your team members can see the
+changes through your Git platform or DVC Studio and track the author and
+timestamp of the change.
+
+https://www.youtube.com/watch?v=nXJXR-zBvHQ
+
+If your project is integrated with the CI/CD process, the model training process
+will be automatically triggered. Once the experiment completes, all its inputs
+and outputs are available in DVC Studio, ready for visualizing and comparing.
+This visual modification helps your team to iterate faster and avoid mistakes
+with manual code changes.
+
+[CML](https://cml.dev/) can create reports and orchestrate resources in your
+cloud (GCP, AWS or Azure) or Kubernetes to run training. Because this is
+cloud-agnostic, you are not tied to a particular cloud provider, and this helps
+you avoid vendor lock-in.
+
+With this approach, the managers, and DevOps folks who are not experts in
+creating ML models, can also be part of the ML model training process. They can
+re-train your model on a new version of the dataset or try other changes to your
+model.
+
+## Create magic!
+
+So, don’t reinvent the wheel. Use Git. Through a simple UI. Use your existing
+CI/CD setup. Use your existing cloud. Get the most out of them. And create magic
+:) Okay, the tables and visuals in DVC Studio aren’t magic, but they sure are
+magical. Right?
+
+## Get started now
+
+Get started at [https://studio.datachain.ai](https://studio.datachain.ai).
+Simply connect with your GitHub, GitLab or Bitbucket account. No additional
+sign-ups are required.
+
+For more information on how to use DVC Studio, please check out the
+[docs](https://dvc.org/doc/studio).
+
+DVC Studio is completely free for individuals and small teams. Let us know if
+you would like to set up DVC Studio
+for[ 5+ member teams](https://form.typeform.com/to/nydf3Oys?typeform-medium=embed-snippet)
+or for
+[enterprises](https://form.typeform.com/to/bd9lTEt9?typeform-medium=embed-snippet),
+and we will get back to you soon.
+
+We would love to get your feedback. Reach out to us with your questions,
+concerns or requests on [Discord](https://discord.com/invite/dvwXA2N). Head to
+the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas. You can also
+raise an issue on [GitHub](https://github.com/iterative/studio-support).
+
+We are super excited to have you use DVC Studio. We’re confident that it’ll make
+your Machine Learning journeys so much easier. We can’t wait to hear how it
+goes.
diff --git a/content/blogs/2021-06-18-june-21-dvc-heartbeat.md b/content/blogs/2021-06-18-june-21-dvc-heartbeat.md
new file mode 100644
index 0000000000..6d9413c3ce
--- /dev/null
+++ b/content/blogs/2021-06-18-june-21-dvc-heartbeat.md
@@ -0,0 +1,241 @@
+---
+title: June '21 Heartbeat
+date: 2021-06-18
+description: >
+  Monthly updates are here! Making sense of the MLOps Landscape, Community love,
+  our MLOps philosophy, DVC Studio, R for DVC, new learning opportunities, team
+  members and more!
+descriptionLong: |
+  This month you will find:
+  🗺 Navigating the MLOps Landscape,
+  🧐 Our MLOps philosophy
+  📖 MLOps learning opportunities,
+  💻 R with DVC,
+  🎥 Conference videos from our team members,
+  🚀 Info on our growing team, and more!
+picture: 2021-06-18/june21cover.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/june-21-heartbeat/793
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - MLOps Community
+  - R
+  - PyData SoCal
+---
+
+# From the Community
+
+This month I'm going to take you on a thought provoking journey through some of
+the content from our community.
+
+![So many choices...](https://media.giphy.com/media/Uni2jYCihB3fG/giphy.gif)
+
+## LJ Miranda's Triad of order
+
+The MLOps tool landscape can be confusing to say the least.  
+[LJ Miranda](https://twitter.com/ljvmiranda921), in a well written
+[three-part series](https://ljvmiranda921.github.io/notebook/2021/05/10/navigating-the-mlops-landscape/)
+lays out a framework for making sense of this space. The list of tools is not
+exhaustive, but the framework and thought process for evaluating the tools is
+intriguing. Additionally he encourages thinking about the skillset of the
+members of your team within this framework to help you make decisions on the
+right tools. It's not just about the tools, it's about the people!
+
+As you can see DVC makes it into the "Trial" loop, but we think we will be be
+making it into the adoption region in relatively short order. 😉🚀
+
+![](../uploads/images/2021-06-18/LJMiranda.png) _Making sense of the MLOps
+Landscape_
+
+## Found in the MLOps Community
+
+You can find more comments from LJ Miranda and others in response to a
+[great question](https://mlops-community.slack.com/?redir=%2Farchives%2FC015J2Y9RLM%2Fp1622714574054300)
+from André Godinho in the [MLOps Community](https://mlops.community/) Slack (see
+below). If you're into MLOps and you're NOT a part of this Community, you should
+be. You can join their Slack
+[here](https://mlops-community.slack.com/join/shared_invite/zt-o96abp9z-sRYKWb96wGK9vdhUvbSrsQ#/shared-invite/email).
+
+> I have recently came across with DVC by listening to MLOps Coffee Sessions #6
+> with David Aponte and Elle O'Brien (Such an interesting talk! 💯). This tool
+> integrates smoothly with Git, tracks models & datasets, and also has an online
+> UI DVC Studio 🚀. Is there any use case of MLflow that DVC can't handle? I
+> find DVC to give more rise to creativity as it integrates really well with
+> Git. - André Godinho
+
+## Neda Sultova's Tutorial and Tool Rubric
+
+Drilling down to the next level, I give you
+[this tutorial](https://medium.com/geekculture/exploring-dvc-for-machine-learning-pipelines-in-research-part-1-3ebc2ca35a18)
+by [Neda Sultova](https://www.linkedin.com/in/neda-sultova-597a811a8/). Not only
+is it a great tutorial of DVC in and of itself, but Neda also defines a clear
+framework for the decision making process at
+[Helmholtz AI](https://www.helmholtz.ai/). Among the needs are reproducibility,
+workflow integration, exchangeable backend, framework agnostic, open source, and
+the ability of the solution to be tweaked to the team's needs.
+
+<external-link
+href="https://medium.com/geekculture/exploring-dvc-for-machine-learning-pipelines-in-research-part-1-3ebc2ca35a18"
+title="Exploring DVC for Machine Learning Pipelines in Research (Part 1)"
+description="The first of a multi-part series on the search and decision making process for MLOps tools at Helmholtz AI."
+link="https://medium.com"
+image="../uploads/images/2021-06-18/neda-sultova.png"/>
+
+## Our Philosophy
+
+And at last I bring you to
+["The Road to AI Hell Starts with Good MLOps Intentions" ](https://thenewstack.io/the-road-to-ai-hell-starts-with-good-mlops-intentions/)
+by our CEO [Dmitry Petrov](https://twitter.com/FullStackML) which explains our
+philosophy in the MLOps space. You will learn about the experiences that led to
+developing our tools, what we think is the right way to solve MLOps challenges,
+and how we do it.
+
+> Teams made up of data scientists and developers should be able to define their
+> own workflow based on their business requirements and team preferences, just
+> like they do today when constructing any other software artifact. Rather than
+> a platform forcing teams to embrace a highly opinionated workflow, they can
+> employ flexible tools such Git, GitHub, and their existing CI tools as they
+> see fit. - Dmitry Petrov
+
+<external-link
+href="https://thenewstack.io/the-road-to-ai-hell-starts-with-good-mlops-intentions/"
+title="The Road to AI Hell Starts with Good MLOps Intentions"
+description="Dmitry Petrov explains the journey and philosophy at the heart of Iterative.ai's MLOps tools."
+link="https://thenewstack.io"
+image="../uploads/images/2021-06-18/ai-hell.png"/>
+
+# Big News! 🚀🚀🚀
+
+In case you missed it, June 3rd we introduced our latest tool: DVC Studio! A web
+application that GUI display your team's work with DVC and CML. We know this has
+been on our Community's wishlist and now it's here! You can check out all its
+features and [give it a try here](https://studio.datachain.ai/). Or check out
+the introduction video below.
+
+https://youtu.be/hKf4twg832g
+
+# Learning Opportunities
+
+## R for DVC!
+
+Are you or someone on your team an R user?
+[João Santiago](https://twitter.com/jcpsantiago) who has contributed to DVC,
+recently came up with "dvcru" to provide utility functions for DVC pipelines
+using R scripts. Additionally the project aims to show typical workflows they
+enable as well as provide project templates. Check out all the R goodness in
+[this Github Repository](https://github.com/jcpsantiago/dvcru).
+
+<external-link
+href="https://github.com/jcpsantiago/dvcru"
+title="dvcru"
+description="João Santiago's repository for dvcru, providing utility functions for DVC Pipelines using R scripts."
+link="https://github.com"
+image="../uploads/images/2021-06-18/r.png"/>
+
+## Milecia McGregor at PyData SoCal
+
+Next up we have [Milecia McGregor](https://twitter.com/FlippedCoding) presenting
+and live coding at [PyData SoCal](https://www.meetup.com/PyData-SoCal/)
+organized by [Pramit Choudhary](https://twitter.com/MaverickPramit). Check out
+her talk on "Reproducible ML Experiments (with Git and DVC)" and all the great
+questions that ensued.
+
+https://youtu.be/h0vDuw3s2fE
+
+## Dmitry Petrov at MLOps World
+
+Finally we have [Dmitry Petrov's](https://twitter.com/FullStackML) talk at the
+[MLOps World Conference](https://mlopsworld.com/) about machine learning in
+production entitled "Data Versioning and ML Experiments on Top of Git."
+
+https://www.youtube.com/watch?v=Lc0hsT-i7qo
+
+# DVC News
+
+We're still growing! Meet this month's new team members.
+
+## New Team Members
+
+[Jelle Bouwman](https://www.linkedin.com/in/jelle-bouwman/) joins us from
+Utrecht, Netherlands as a software engineer. He's worked as a consultant and at
+an agency. He's most proud of
+[the work he did with his team at the Port of Rotterdam](https://rotterdam.navigate-connections.com/voyages).
+In his free time, Jelle loves reading fiction and books on human
+psychology/productivity, hiking and making music with others. He has already
+shared with the team a
+[great playlist](https://open.spotify.com/album/1LqgEMQNmL2yvjsGpihGee?si=7tCaG8-QQ92xvrlVvaUR7A)
+to listen to while trying to focus! Welcome Jelle! 🎼
+
+Next we welcome [Alexander Gushcin](https://www.linkedin.com/in/1aguschin/).
+Alexander joins us from Russia where he has been a Data Scientist/ML Engineer
+for the last five years. He's also participated in many Kaggle competitions and
+was ranked 5th in general competitions at some point! This led him to create a
+Coursera course on
+[how to win data science competitions](https://www.coursera.org/learn/competitive-data-science)
+about the tips and tricks needed to win one. Teaching is his passion and you
+will probably see him producing some content in the near future. 🧑🏽‍💻
+
+[Mikhail Sveshnikov](https://www.linkedin.com/in/mike0sv/) also joins us from
+Russia where he formerly worked as a Data Engineer Team Lead for Rubbles. He
+created [ebonite](https://github.com/zyfra/ebonite), an ML deployment tool and
+teaches Python and Big Data at HSE University. Finally he is one of the admins
+of [ods.ai](https://ods.ai/) community, which creates global projects to unite
+the community, promote Data Science, and help people develop their skills. In
+his spare time he likes to play guitar, badminton, ski, and mix cocktails. 🍸
+Cheers Mikhail!
+
+[Jervis Hui](https://www.linkedin.com/in/jervishui/) is joining the go-to-market
+team at Iterative and is from NYC. He's worked in product marketing at various
+Silicon Valley tech companies over the years and is excited to bring his
+experience to the open source world of Iterative. He's passionate about D&I in
+hiring and looks forward to learning from everyone! We're excited to have Jervis
+on board! 🎉
+
+![Hiring GIF](https://media.giphy.com/media/Kzo0heGPi6xwjpC5JL/giphy.gif)
+
+## Open Positions
+
+And yes indeed, we are still hiring!
+[Use this link](https://www.notion.so/iterative/iterative-ai-is-hiring-852cb978129645e1906e2c9a878a4d22)
+to find details of all the positions including:
+
+- Senior Front-End Engineer (TypeScript, Node, React)
+- Senior Software Engineer (ML, Dev Tools, Python)
+- Senior Software Engineer (ML, Data Infra, GoLang)
+- Machine Learning Engineer/Field Data Scientist
+- Developer Advocate (ML)
+- Director/VP of Engineering (ML, DevTools)
+- Director/VP of Product (ML, Data Infra, SaaS)
+- Director/VP of Operations/Chief of Staff
+
+Please pass this info on to anyone you know that may fit the bill. We look
+forward to new team members! 🎉
+
+## Next Meetup
+
+Don't miss our [Meetup](https://www.meetup.com/DVC-Community-Virtual-Meetups/)
+June 24th at 3:00 pm UTC (8:00 am PDT), where
+[Sami Jawhar](https://www.linkedin.com/in/sami-jawhar-a58b9849/) of Kernel will
+present different experiment use cases. Bring your questions and thinking cap!
+It's bound to be a great session!
+
+<external-link
+href="https://www.meetup.com/DVC-Community-Virtual-Meetups/events/278729121/"
+title="dvcru"
+description="June DVC Office Hours with Sami Jawhar of Kernel presenting experiment use cases."
+link="https://meetup.com"
+image="../uploads/images/2021-06-18/meetup.png"/>
+
+## Tweet Love ❤️
+
+https://twitter.com/jcpsantiago/status/1402221732480569349?s=20
+
+---
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2021-06-30-june-21-community-gems.md b/content/blogs/2021-06-30-june-21-community-gems.md
new file mode 100644
index 0000000000..f6a5f0dde4
--- /dev/null
+++ b/content/blogs/2021-06-30-june-21-community-gems.md
@@ -0,0 +1,140 @@
+---
+title: June '21 Community Gems
+date: 2021-06-30
+description: >
+  A roundup of technical Q&A's from the DVC community. This month: DVC pipeline
+  configs, working with remotes, file handling and more.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month: DVC pipeline
+  configs, working with remotes, file handling and more.
+picture: 2021-06-30/gems-cover.png
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/june-21-community-gems/779
+tags:
+  - Community Gems
+  - Plots
+  - Pipelines
+  - CML
+  - Git
+---
+
+### [Q: Is it possible to plot multiple experiments together?](https://discord.com/channels/485586884165107732/563406153334128681/834387923482181653)
+
+You can use experiment names in the `dvc plots` commands. You need to use the
+`diff` command to compare multiple plots. Try
+`dvc plots diff exp-6ef18 exp-b17b4 exp-26e88`.
+
+Thanks to @PythonF from Discord for asking this question that led to this Gem!
+💎
+
+### [Q: Where is the list of experiment being pushed in Git when I run `dvc exp push`?](https://discord.com/channels/485586884165107732/563406153334128681/837773937390649364)
+
+It uses custom Git refs internally, similar to the way GitHub handles PRs. It’s
+a custom DVC Git ref pointing to a Git commit. Here's an example.
+
+```dvc
+$ git show-ref exp-26220
+c42f48168830148b946f6a75d1bdbb25cda46f35 refs/exps/f1/37703af59ba1b80e77505a762335805d05d212/exp-26220
+```
+
+If you want to see your local experiments (that have not been pushed), you can
+run `dvc exp list --all`.
+
+You can read more about how we handle our custom Git refs in
+[this blog post](https://dvc.org/blog/experiment-refs).
+
+Thanks to @Chandana for asking this question about experiments!
+
+### [Q: Is there a way to list all the experiments I have on my DVC remote that have not been committed to Git?](https://discord.com/channels/485586884165107732/563406153334128681/836705209039978538)
+
+Yes! You can quickly look at all of the experiments in any repo with:
+
+```dvc
+$ dvc exp list --all <git repo URL>
+```
+
+or
+
+```dvc
+$ dvc exp list --all <git remote>
+```
+
+Thanks again @Chandana for this gem!
+
+### [Q: Is CML compatible with Azure DevOps?](https://discord.com/channels/485586884165107732/728693131557732403/841664412221177926)
+
+Another great question from @Chandana!
+
+Right now, we support GitHub and GitLab.
+
+Azure DevOps and GCP (Google Cloud Platform) support are on the roadmap. Stay
+tuned for more updates!
+
+You can stay up to date with our Azure DevOps progress on
+[this issue on GitHub](https://github.com/iterative/cml/issues/142). You can
+also follow along with GCP updates with
+[this issue](https://github.com/iterative/terraform-provider-iterative/issues/64).
+
+### [Q: I pushed a lot of files using `dvc push` to my DVC remote, but there are a few that couldn't be pushed at the time. If I run `dvc push` again, will it just upload the missing files?](https://discord.com/channels/485586884165107732/563406153334128681/842662337159757854)
+
+Thanks for the question @petek!
+
+Yes! You can just re-run `dvc push` and it will only upload the missing files.
+
+It might be a little slower than you would expect because DVC has to do some
+checks to make sure that the other files were uploaded successfully before, but
+as far as the actual data transfer goes, only the missing files will be
+uploaded.
+
+### [Q: Let's say I have a DVC pipeline with two stages, can I only pull the second one and keep the first one for other uses? Can I pull some specific output from the pipeline?](https://discord.com/channels/485586884165107732/485596304961962003/841688323663855616)
+
+You can pull specific outputs from a pipeline with
+`dvc pull path/to/specific/output`. This is similar to how you can use `dvc add`
+to work with specific files and directories.
+
+Thanks for such a great question @LucZ!
+
+### [Q: How does DVC handle incremental changes in the data and how does it work with non-DVC based pipeline features?](https://discord.com/channels/485586884165107732/485596304961962003/846364469524430848)
+
+These are good questions for common problems in MLOps from @Phoenix!
+
+To answer the first part, say you are getting new data every week. When you use
+DVC, you don't have to worry about getting duplicate data.
+
+DVC supports file-level deduplication right now, so if your data is in a shape
+of directory with files, then all unique files will only be stored once.
+Chunk-level deduplication is on our todo list. You can see how it's going in
+[this issue we have on GitHub](https://github.com/iterative/dvc/issues/829).
+
+For the second part of the question, you can use data management with DVC and
+have your own pipelines. Just treat it as Git for data then be sure to
+`dvc add`, `dvc push`, `dvc pull` and you should be set. Hooks, like
+`pre-commit` or `post-pipeline-run`, are a good way to go about it.
+
+### [Q: Is there a way to tell DVC to use a different profile instead of the default profile when interacting with S3?](https://discord.com/channels/485586884165107732/563406153334128681/846857498094469120)
+
+When you have a remote that is not on your default AWS profile and when you
+access it via the `awscli` using something like
+`aws s3 --profile=second_profile ls`, you'll need to update your remote config
+in DVC.
+
+You can run a command like:
+
+```dvc
+$ dvc remote modify myremote profile myprofile
+```
+
+Check out the docs on `dvc remote modify` for all the remote config options.
+
+Great question @Avi!
+
+---
+
+![Shut It Down GIF by Matt Cutshall](https://media.giphy.com/media/l0IycQmt79g9XzOWQ/giphy.gif)
+
+At our July Office Hours Meetup we will be demo-ing pipelines as well as CML.
+[RSVP for the Meetup here](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/279024694/)
+to stay up to date with specifics as we get closer to the event!
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to get all your DVC and
+CML questions answered!
diff --git a/content/blogs/2021-07-16-july-21-dvc-heartbeat.md b/content/blogs/2021-07-16-july-21-dvc-heartbeat.md
new file mode 100644
index 0000000000..140c2fd327
--- /dev/null
+++ b/content/blogs/2021-07-16-july-21-dvc-heartbeat.md
@@ -0,0 +1,248 @@
+---
+title: July '21 Heartbeat
+date: 2021-07-16
+description: >
+  Monthly updates are here! Great new tutorials from the Community, uptick in
+  jobs requiring DVC, awesome Community discussion on experiments at our June
+  Meetup and a cipher. Can you figure it out?
+descriptionLong: |
+  This month you will find:
+  - 📈 DVC + Streamlit = ❤️,
+  - 🇯🇵 DVC in Japanese,
+  - 📖 A new Udacity Course that includes DVC,
+  - 🧑🏽‍💻 More and more jobs requiring DVC
+  - 🧪 June Meetup on Experiments,
+  - 🚀 New team member, a secret code and more!
+picture: 2021-07-16/july21cover.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/july-heartbeat/825
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - Streamlit
+  - Udacity
+---
+
+# Welcome to Summer!
+
+![It's summer!](https://media.giphy.com/media/WuY9yfI89DbNu/giphy.gif)
+
+# From the Community
+
+<span style="color:purple">**A**</span>s usual we have a ton of goodness from
+the Community! Let's jump in!
+
+## Antoine Toubhans' Post Combining Streamlit and DVC!
+
+[Antoine Toubhans](https://www.linkedin.com/in/antoine-toubhans-92262119/) of
+[Sicara](https://www.sicara.fr/) wrote a fantastic and detailed tutorial
+entitled
+[**How to Build Customizable Web UI for Machine Learning with Streamlit and DVC**](https://www.sicara.ai/blog/dvc-streamlit-webui-ml)
+bringing together the best of DVC and integrating it with Streamlit to provide a
+customizable UI. The tutorial <span style="color:purple">**g**</span>oes through
+the steps of setting up a pipeline, spltting a dataset, training and evaluating
+a model, tracking changes to data and model, dvc
+<span style="color:purple">**m**</span>etrics and plots and then bridging the
+gap in visualizations using <span style="color:purple">**S**</span>treamlit. You
+won't want to miss this one!
+
+![DVC and Streamlit](../uploads/images/2021-07-16/streamlit2.png '=700') _DVC +
+Streamlit = ♥️!
+[Source link](https://www.sicara.ai/blog/dvc-streamlit-webui-ml)_
+
+## DVC and CML in Japanese!
+
+For our friends that speak Japanese,
+[these slides](https://www.slideshare.net/yusukeshibui/testing-machine-learningdevelopment)
+created by
+[Yusuke Shibui](https://www.slideshare.net/yusukeshibui?utm_campaign=profiletracking&utm_medium=sssite&utm_source=ssslideview)
+walk you through a machine learning to production project using
+D<span style="color:purple">**V**</span>C and
+C<span style="color:purple">**M**</span>L. We love seeing our tools being used
+all around the world! 🌏
+
+![DVC and CML in Japanese](../uploads/images/2021-07-16/in-japanese.png) _DVC
+and CML in Japanese!
+[Source link](https://www.slideshare.net/yusukeshibui/testing-machine-learningdevelopment)_
+
+## Miguel Méndez' DVC Tutorial
+
+[Miguel Méndez](https://www.linkedin.com/in/miguel-mendez/) and his team at
+[Gradiant](https://www.gradiant.org/en/)
+<span style="color:purple">**s**</span>truggled with reproducibility before
+using DVC for versioning their image dataset and annotations. The dataset and
+annotaions are held in a shared storage space and used by the whole team. DVC
+enables the team to track changes and know what versions of the dataset produce
+the best results. His tutorial walks you through the steps to set it up!
+
+<external-link
+href="https://mmeendez8.github.io/2021/07/01/dvc-tutorial.html"
+title="Version Control Your Dataset with DVC"
+description="Miguel Méndez' tutorial on using DVC for versioning datasets and providing reproducibility"
+link="https://github.io"
+image="../uploads/images/2021-07-16/git-dvc.png"/>
+
+## Jobs requiring DVC!
+
+We have been seeing an uptick in the number of jobs requiring knowledge of DVC.
+It's exciting to see that our tools are
+helpin<span style="color:purple">**g**</span> these companies in their MLOps
+workflows! 🎉
+
+![](../uploads/images/2021-07-16/job-descriptions.png)
+
+# Learning Opportunities
+
+With all those DVC job opportunities out there, you
+<span style="color:purple">**b**</span>etter get on it! 😉
+
+## A New Udacity Course Incorporating DVC!
+
+Just this month a new
+[Udacity](https://www.udacity.com/course/machine-learning-dev-ops-engineer-nanodegree--nd0821)
+nannodegree program came out entitled
+[**Machine Learning DevOps Engineer**](https://www.udacity.com/course/machine-learning-dev-ops-engineer-nanodegree--nd0821),
+that teaches DVC as part of the program. This course includes sections on:
+
+- Clean Code Principles
+- Building a Reproducible <span style="color:purple">**M**</span>odel Workflow
+- Deploying a Scalable ML Pipeline in Production
+- Automated Model Scoring and Monitoring
+
+<external-link
+href="https://www.udacity.com/course/machine-learning-dev-ops-engineer-nanodegree--nd0821"
+title="Machine Learning DevOps Engineer"
+description="A new nanodegree program offered by Udacity teaching DVC as part of the curriculum"
+link="https://udacity.com"
+image="../uploads/images/2021-07-16/udacity.png"/>
+
+## DVC Lea<span style="color:purple">**r**</span>n
+
+This week we kicked off our new DVC Learn Meetup series with
+[**Milecia McGregor**](https://twitter.com/FlippedCoding). This set of three,
+short, half-hour classes are designed to get you up and running in DVC. If you
+are just getting started with <span style="color:purple">**D**</span>VC or
+kicking the tires, this Meetup series is for you! Our next class on August 4th
+will get you started with experiments.
+
+If you are interested in weighing in on what kinds of educational content you
+would like to see from us, we'd be grateful if you'd fill out
+[**this survey**](https://docs.google.com/forms/d/e/1FAIpQLSdmwjs0ZkxDdODfZTvSwP2bVW4JAVVdxiYhQPyW5dSbsZC8qg/viewform?pli=1)
+to help us plan! 🙏🏼
+
+<external-link
+href="https://www.meetup.com/DVC-Community-Virtual-Meetups/events/279447414/"
+title="DVC Learn - Getting Started: Experiments"
+description="The next DVC Learn Meetup taught by Melecia McGregor designed to get you started with DVC Experiments"
+link="https://meetup.com"
+image="../uploads/images/2021-07-16/dvc_learn.png"/>
+
+## Data Science Journal Article on Reproducibility Practices in Research
+
+New research presented in the
+[Data Science Journal](https://datascience.codata.org/) aims to provide best
+practices for providing reproducibility in research datasets. This is necessary
+to pinpoint the version of the dataset that grounds any research. In this work
+the authors reviewed 39 use cases from 33 organizations to arrive at six
+principles for versioning datasets. These include **Revision**, **Release**,
+**Granularity**, **Manifestation**,
+<span style="color:purple">**P**</span>**rovenance** and **Citation**. See the
+full work below. 👇🏼
+
+<external-link
+href="https://datascience.codata.org/articles/10.5334/dsj-2021-012/"
+title="Versioning Data is About More Than Revisions:  A Conceptual Framework and Proposed Priniciples"
+description="Authors analyze 39 use cases in 33 organziations to arrive at proposed principles when versioning data."
+link="https://datascience.codata.org"
+image="../uploads/images/2021-07-16/dsj.png"/>
+
+## June Office Hours Meetup
+
+The June Office Hours Meetup was 🔥! Amazing discussion on experiments ignited
+by [Sami Jawhar](https://www.linkedin.com/in/sami-jawhar-a58b9849/) of
+[Kernel](https://www.kernel.com/) around experiment use cases and workflows.  
+You can
+[find the repo for his presentation here](https://github.com/sjawhar/dvc-cloud-runner)
+and watch all the great DVC discussion below.
+
+https://youtu.be/DxZdWq3Weng
+
+# DVC News
+
+<span style="color:purple">**S**</span>ummer and vaccinations mean travel! ☀️💉
+And that travel has enabled some of our team members to get together! Pictured
+below are Dmitry Petrov, Alexander Guschin, Max Shmakov, Mikhail Rozhkov, Sergey
+Kryukov, Mikhail Sveshnikov, and Guro Bokum... But not necessarily in that
+order.
+
+The first person to guess the correct order of our teammates starting from the
+upper right of the picture moving clockwise, **and** post in the corresponding
+[Twitter](https://twitter.com/DVCorg) Heartbeat post, will win some DVC SWAG!
+Hint: If you've been wondering why there are random purple letters in this blog
+post, they're a clue to this cipher. 🧐
+
+![](../uploads/images/2021-07-16/team.png '=700') _Team Meetup in Moscow! (hand
+signals obscured for our UK friends, because we care! 🤗)_
+
+## New Team Member
+
+[David de la Iglesia Castro](https://www.linkedin.com/in/david-de-la-iglesia-castro-b4b67b20a/)
+is the third teammate joining us from Spain! 🇪🇸 And also the third David! He
+hails from Galicia and has been an active member of our Community for over two
+years. We are so excited to have him join the team as a software enginer where
+he will wor<span style="color:purple">**k**</span> to improve DVC Live. When
+he's not contributing to DVC, David likes to go climbing, surfing or just hiking
+whenever he can! Welcome David!
+
+## Open Positions
+
+And yes indeed, we are still hiring!
+[Use this link](https://www.notion.so/iterative/iterative-ai-is-hiring-852cb978129645e1906e2c9a878a4d22)
+to find details of all the positions including:
+
+- Senior Front-End Engineer (TypeScript, Node, React)
+- Senior Software Engineer (ML, Dev Tools, Python)
+- Senior Software Engineer (ML, Data Infra, GoLang)
+- Machine Learning Engineer/Field Data Scientist
+- Developer Advocate (ML)
+- Director/VP of Engineering (ML, DevTools)
+- Director/VP of Product (ML, Data Infra, SaaS)
+- Director/VP of Operations/Chief of Staff
+
+Please pass this info on to anyone you know that may fit the bill. We look
+forward to new team members! 🎉
+
+## Next Meetup
+
+Don't miss our
+[Meetup](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/279024694/)
+July 28th at 2:00 pm UTC (7:00 am PDT), where
+[João Santiago](https://www.linkedin.com/in/jcpsantiago/) of
+[Billie](https://www.billie.io/) will present "DVThis" a set of utility
+functions for DVC pipelines using R scripts. Additionally the project aims to
+document the usual workflows of a DVC pipeline using these scripts and create
+templates for the use of DVC and R together.
+
+Following Santiago, team member
+[Tapa Dipti Sitaula](https://www.linkedin.com/in/tapa-dipti-sitaula/) will give
+a demo of DVC Studio! Bring your questions; we look forward to seeing you!
+
+<external-link
+href="https://www.meetup.com/DVC-Community-Virtual-Meetups/events/279024694/"
+title="DVThis"
+description="July DVC Office Hours with João Santiago of Billie shows us how to use R with DVC, presenting DVThis and Tapa Dipti Sitaula shares a demo of DVC Studio."
+link="https://meetup.com"
+image="../uploads/images/2021-07-16/office-hours-meetup.png"/>
+
+## Tweet Love ❤️
+
+https://twitter.com/DataChaz/status/1410319379837894656
+
+---
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2021-07-19-hyperparam-tuning.md b/content/blogs/2021-07-19-hyperparam-tuning.md
new file mode 100644
index 0000000000..582f6c874a
--- /dev/null
+++ b/content/blogs/2021-07-19-hyperparam-tuning.md
@@ -0,0 +1,306 @@
+---
+title: Tuning Hyperparameters with Reproducible Experiments
+date: 2021-07-19
+description: >
+  Using DVC, you'll be able to track the changes that give you an ideal model.
+descriptionLong: >
+  We'll go through an example of grid search and random search using DVC.
+picture: 2021-07-19/hyperparameters-july-website.png
+pictureComment: Tuning Hyperparameters with Reproducible Experiments
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/tuning-hyperparameters-with-reproducible-experiments/821
+tags:
+  - MLOps
+  - DVC
+  - Git
+  - Experiments
+  - Hyperparameter
+  - Reproducibility
+  - Tutorial
+---
+
+## Intro
+
+When you're starting to build a new machine learning model and you're deciding
+on the model architecture, there are a number of issues that arise. You have to
+monitor code changes you make, note any differences in the data you've used for
+training, and keep up with hyperparameter value updates.
+
+Being able to track all of these changes is important so that you can reproduce
+your experiments without wondering which changes gave you the best model. You
+can go back to any point in your experimenting process to see which changes gave
+you the best results.
+
+In this post, we're going to go through an example of hyperparameter tuning with
+reproducibility using DVC. You can add this to any existing project you're
+working on or start from a fresh project.
+
+https://youtu.be/W48Tvx2p-xE
+
+## Background on Hyperparameters
+
+Before jumping straight into training and experiments, let's briefly go over
+some background on hyperparameters.
+[Hyperparameters](https://dvc.org/doc/command-reference/params) are the values
+that define your model. This includes things like the number of layers in a
+neural network or the learning rate for gradient descent.
+
+These parameters are different from model parameters because we can't get them
+from training our model. They are used to _create_ the model we train with.
+Optimizing these values means running training steps for different kinds of
+models to see how accurate the results are. We can get the best model from
+iterating through different hyperparameter values and seeing how they effect our
+accuracy.
+
+That's why we do hyperparameter tuning. There are a couple common methods that
+we'll do some code examples with: grid search and random search.
+
+## Tuning with DVC
+
+Let's start by talking about DVC a bit because we'll be using it to add
+reproducibility to our tuning process. This is the tool we'll be using to track
+changes in our data, code, and hyperparameters. With DVC, we can add some
+automation to the tuning process and be able to find and restore any really good
+models that emerge.
+
+A few things DVC makes easier to do:
+
+- Letting you make changes without worrying about finding them later
+- Onboarding other engineers to a project
+- Sharing experiments with other engineers on different machines
+
+For hyperparameter tuning, this means you can play with their values without
+losing track of which changes made the best model and also have other engineers
+take a look. We'll do an example of this with grid search in DVC first.
+
+## Working with a DVC project
+
+We're going to be working with an existing NLP project. You can
+[get the code we're working with in this repo](https://github.com/iterative/example-get-started).
+It already has DVC set up, but you can check out
+[the Get Started docs](https://dvc.org/doc/start) if you want to know how the
+DVC pipeline was created.
+
+First make sure you're in a virtual environment with a command similar to this.
+
+```dvc
+$ python -m venv .venv
+```
+
+After you've cloned the repo, install all of the dependencies with this command.
+
+```dvc
+$ pip install -r requirements.txt
+```
+
+You should be able to open your terminal and run an experiment with the
+following command.
+
+```dvc
+$ dvc exp run
+```
+
+This will trigger the training process to run and it will record the
+[ROC-AUC](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_auc_score.html)
+of your model. You can check out the results of your experiment with the
+following command.
+
+```dvc
+$ dvc exp show --no-timestamp --include-params train.n_est,train.min_split
+```
+
+_We're adding a few options here to make the table view clearer. We aren't
+showing timestamps and we're only looking at two hyperparameter values. You can
+run `dvc exp show` without the options to see the entire table._
+
+This will produce a table similar to this.
+
+```dvctable
+ ──────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**                metric:**avg_prec**   metric:**roc_auc**   param:**train.min_split**   param:**train.n_est**
+ ──────────────────────────────────────────────────────────────────────────────
+  **workspace**                 **0.51682**    **0.93819**   **175**           **64**
+  **master**                    **0.56447**    **0.94713**   **100**           **64**
+  └── a1e8716 [exp-09074]   0.57333    0.94801   100           32
+ ──────────────────────────────────────────────────────────────────────────────
+```
+
+### Start tuning with grid search
+
+<admon type="tip">
+
+Starting with DVC `2.25.0`, you can peform a Grid Search directly using
+`exp run --set-param`. See the
+[example in the command reference](https://dvc.org/doc/command-reference/exp/run#example-grid-search).
+
+</admon>
+
+Now that you've seen how to run an experiment, we're going to write a small
+script to automate grid search for us using DVC. Using grid search in
+hyperparameter tuning means you have an exhaustive list of hyperparameter values
+you want to cycle through. Grid search will cover every combination of those
+hyperparameter values.
+
+We'll do this by creating queues. A queue is how DVC allows us to create
+experiments that won't be run until later. That way we can cycle through
+multiple hyperparameters quickly instead of manually updating a config file with
+new hyperparameter values for each experiment run. The command syntax for
+creating queues looks like this:
+
+```dvc
+$ dvc exp run --queue --set-param train.min_split=8
+```
+
+In the example queue above, we're updating the `train.min_split` value that's
+inside of the `params.yaml` file. This file holds all of the hyperparameter
+values and is where DVC looks to determine if any values have changed. With the
+command above, we're automatically updating that value in the `params.yaml`
+using a queued experiment.
+
+Now we can make the script. You can add a new file to the `src` directory called
+`grid_search.py`. Inside of the file, add the following code.
+
+```python
+import itertools
+import subprocess
+
+# Automated grid search experiments
+n_est_values = [250, 300, 350, 400, 450, 500]
+min_split_values = [8, 16, 32, 64, 128, 256]
+
+# Iterate over all combinations of hyperparameter values.
+for n_est, min_split in itertools.product(n_est_values, min_split_values):
+    # Execute "dvc exp run --queue --set-param train.n_est=<n_est> --set-param train.min_split=<min_split>".
+    subprocess.run(["dvc", "exp", "run", "--queue",
+                    "--set-param", f"train.n_est={n_est}",
+                    "--set-param", f"train.min_split={min_split}"])
+```
+
+This is a simple grid search. We have two hyperparameters we want to tune:
+`n_est` and `min_split`. So we have arrays with a few values in them to mimic
+the exhaustive search a grid search can handle. Then we loop through the values
+and create queued experiments for them using `subprocess`.
+
+You can run this script now and generate your queue with this command.
+
+```dvc
+$ python src/grid_search.py
+```
+
+You'll see some outputs in the terminal telling you that your experiments have
+been queued. Then you can run them all with the following command.
+
+```dvc
+$ dvc exp run --run-all
+```
+
+This will run every experiment that has been queued. Once all of those have run,
+take a look at your metrics for each experiment.
+
+```dvc
+$ dvc exp show --include-params=train.min_split,train.n_est --no-timestamp
+```
+
+Your table should look similar to this when you run the command above. We've
+included the `--include-params` and `--no-timestamp` options to give us a table
+that's easier to read.
+
+```dvctable
+ ──────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**                metric:**avg_prec**   metric:**roc_auc**   param:**train.min_split**   param:**train.n_est**
+ ──────────────────────────────────────────────────────────────────────────────
+  **workspace**                  **0.67038**   **0.96693**   **64**                **100**
+  **try-large-dataset**          **0.67038**   **0.96693**   **64**                **100**
+  ├── 4899d41 [exp-ae5ed]     0.6888   0.97028   8                 250
+  ├── bcdd8ed [exp-56613]    0.68733   0.96773   16                250
+  ├── 703f20b [exp-caa84]    0.68942    0.9698   32                250
+  ├── 1a882e6 [exp-c208f]      0.681   0.96772   64                250
+  ├── 3ac33fb [exp-4c53e]    0.67775   0.96664   128               250
+  ├── ea90ee0 [exp-fdb47]    0.65382   0.96719   256               250
+  ├── b8277b1 [exp-3fb5c]    0.68547   0.97011   8                 300
+  ├── 7be641e [exp-3bbbc]     0.6883   0.96724   16                300
+  ├── 4202757 [exp-38ca4]    0.68808   0.96968   32                300
+  ├── b71ee2f [exp-5384b]    0.68111   0.96848   64                300
+  ├── 1bbb0f4 [exp-f5d54]    0.67707   0.96753   128               300
+  ├── 71ba159 [exp-31749]    0.65282   0.96752   256               300
+  ├── 836c1c5 [exp-2ce0a]    0.68758   0.96998   8                 350
+  ├── dac9e22 [exp-5c799]    0.68778   0.96779   16                350
+```
+
+Now you can see how your precision changed with each hyperparameter value
+update. This is a quick implementation of grid search in DVC. You could read the
+hyperparameter values from a different file or data source or make this tuning
+script as fancy as you like. The main thing you need is the
+`dvc exp run --queue --set-param <param>` command to execute when you add new
+values.
+
+### Random search
+
+Another commonly used method for tuning hyperparameters is random search. This
+takes random values for hyperparameters and builds the model with them. It
+usually takes less time than an exhaustive grid search and it can perform better
+if run for a similar amount of time as a grid search.
+
+We're going to add a example of random search in a new file called
+`random_search.py` simialr to the file we created for grid search. This will add
+queued experiments with the randomly selected hyperparameter values. Add the
+following code to `random_search.py`.
+
+```python
+import subprocess
+import random
+
+# Automated random search experiments
+num_exps = 10
+random.seed(0)
+
+for _ in range(num_exps):
+    params = {
+        "rand_n_est_value": random.randint(250, 500),
+        "rand_min_split_value": random.choice([8, 16, 32, 64, 128, 256])
+    }
+    subprocess.run(["dvc", "exp", "run", "--queue",
+                    "--set-param", f"train.n_est={params['rand_n_est_value']}",
+                    "--set-param", f"train.min_split={params['rand_min_split_value']}"])
+```
+
+This search could be far more complex with Bayesian optimization to handle the
+hyperparameter value selections, but we're keeping it super simple by choosing
+random numbers to focus on reproducibility. This will generate ten experiments
+with random values for each hyperparameter.
+
+You can run these new experiments with `dvc exp run --run-all` and then take a
+look at the results with
+`dvc exp show --include-params=train.min_split,train.n_est --no-timestamp`. Your
+table should look something like this.
+
+```dvctable
+ ──────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**                metric:**avg_prec**   metric:**roc_auc**   param:**train.min_split**   param:**train.n_est**
+ ──────────────────────────────────────────────────────────────────────────────
+  **workspace**                  **0.67038**   **0.96693**   **64**                **100**
+  **try-large-dataset**          **0.67038**   **0.96693**   **64**                **100**
+  ├── fc28c0c [exp-45902]    0.68358   0.96956   64                466
+  ├── f13ac72 [exp-b9dfa]    0.68275   0.96914   64                444
+  ├── a8cbc8f [exp-b0aeb]    0.68989   0.97003   32                260
+  ├── 4791c52 [exp-5f2b5]    0.67711   0.96809   128               497
+  ├── c5398e0 [exp-86c74]     0.6811   0.96829   64                374
+  ├── db16c91 [exp-db50f]    0.68986   0.97073   32                485
+  ├── 2dd08fa [exp-fee4f]    0.68262   0.96941   64                497
+  ├── 18d2ec5 [exp-d73c7]    0.67696   0.96726   128               341
+  ├── 1710032 [exp-dd198]    0.68756    0.9687   16                478
+  ├── 4f0b80a [exp-746c1]    0.68724   0.96811   16                379
+```
+
+This shows the difference in the randomly selected values and the values from
+grid search. You might find a better value with random search because it jumps
+around a range of values which might hit the optimum faster than it would with a
+grid search.
+
+## Conclusion
+
+With the comparison between grid search and random search, you can see how
+reproducibility can help you find the best model for your project. You'll be
+able to see all of the hyperparameter changes and code changes that created each
+model. This gives you the ability to fine tune your model because you can go to
+any experiment and resume training with different values, code, or data.
diff --git a/content/blogs/2021-07-27-july-21-dvc-community-gems.md b/content/blogs/2021-07-27-july-21-dvc-community-gems.md
new file mode 100644
index 0000000000..f03b331370
--- /dev/null
+++ b/content/blogs/2021-07-27-july-21-dvc-community-gems.md
@@ -0,0 +1,128 @@
+---
+title: July '21 Community Gems
+date: 2021-07-27
+description: >
+  A roundup of technical Q&A's from the DVC community. This month: self-hosted
+  runners, DVC commits, troubleshooting remotes, and more.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month: self-hosted
+  runners, DVC commits, troubleshooting remotes, and more.
+picture: 2021-07-27/gems-cover.png
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/july-21-community-gems/823
+tags:
+  - Community Gems
+  - Self-hosted runners
+  - Remotes
+  - CML
+  - DVC
+---
+
+### [Q: I'm trying to use the `--reuse` option of `cml runner`. If I launch 2 CML experiments in parallel, will CML use the same runner or spin up another one if the existing one is in use?](https://discord.com/channels/485586884165107732/728693131557732403/850340190434492445)
+
+If you don't reuse the runner and you have set up a deploy job, that deploy job
+will launch two cloud runners. With `--reuse` it will check if the runner with
+that tag exists and will not launch another one. Every runner will be listening
+for incomming jobs until the max idle time.
+
+Let's say that you set up one runner with `--reuse` and launch multiple jobs.
+What will happen is that only one runner should be launched and will take all
+the jobs.
+
+The runner that deploys the workflow is not tied specifically to the train job
+that it's going to be launched in the same workflow. You just add runners to the
+pool and they will be waiting until the idle time is done.
+
+We're working on something like `--reuse-idle` that would be easy to implement.
+The idea would be to reuse only idle runners, so that if your job fails and the
+fix is pretty fast, you don't need to spin up another runner. You can track our
+progress on that through
+[this GitHub issue](https://github.com/iterative/cml/issues/575).
+
+A great question from @Corentin in the Discord community!
+
+### [Q: How can I run self-hosted runners on an on-premise machine indefinitely?](https://discord.com/channels/485586884165107732/728693131557732403/851923384613994496)
+
+You can achieve this by passing the `--idle-timeout=0` option to `cml runner` in
+order to disable the timeout.
+
+Thanks @achbogga!
+
+### [Q: How can I change the default VPC to a different one with `cml-runner` for AWS?](https://discord.com/channels/485586884165107732/728693131557732403/857940793616498738)
+
+Great gem from @krish98409!
+
+You could setting the security group via `cloud-aws-security-group`. It will
+pick the VPC that manages that precise security group.
+
+We still don't provide a way of specifying VPCs other than the default one, but
+it's an issue that we're currently working on:
+https://github.com/iterative/terraform-provider-iterative/issues/107
+
+### [Q: Is it possible to rename and modify a file inside a directory tracked by DVC in one commit/change?](https://discord.com/channels/485586884165107732/485596304961962003/849589484517588992)
+
+If you modify the name and modify the file, you just need to run `dvc commit`
+and then commit the change into Git.
+
+This was a good question for everyone. Thanks @snowpong!
+
+### [Q: How can I list the experiments I've queued?](https://discord.com/channels/485586884165107732/485596304961962003/856882434138570753)
+
+This is a great question to help us all understand something so thanks
+@adwivedi.
+
+To look at your queued experiments, run `dvc exp show`. All of the queued
+experiments will be marked with an asterisk `*`.
+
+_Queued experiments are not shown with the `dvc exp list` command at the
+moment._
+
+### [Q: I have two machines and a central remote. With my second machine, I want to pull the dataset from the first machine. How can I pull the data with DVC?](https://discord.com/channels/485586884165107732/485596304961962003/859034882297823233)
+
+Make sure that you have configured a DVC remote and run `dvc push` from your
+first machine. You should be able to find the files on the remote storage where
+you pushed them to after running that command. Then you can run `dvc pull` on
+your second machine and this should give you the dataset you pushed from the
+first machine.
+
+You will run into some issues if your remote isn't configured properly on the
+second machine. Check your `.dvc/config` file for the second machine to make
+sure there aren't any errors. It could be something as simple as a connection
+string without the necessary quotation marks!
+
+Thanks so much for this question @raharth!
+
+### [Q: `dvc push` says, "Everything is up to date." However, I modified my dataset and this is confirmed with `dvc status`, where it lists a "modified" entry on the changed outs. How can I force a push of my changes?](https://discord.com/channels/485586884165107732/485596304961962003/857931383476977695)
+
+You need to run `dvc commit` to commit your changes to the cache.
+
+Good question @BSVogler.
+
+### [Q: I'm trying to use the DVC API in a Jupyter notebook. Can I simulate a `dvc push` command via the API?](https://discord.com/channels/485586884165107732/485596304961962003/856979475068878898)
+
+Nice job working with the Python API @harry134!
+
+You can use the `Repo` API like this.
+
+```python
+from dvc.repo import Repo
+
+repo = Repo()
+repo.push()
+```
+
+The API isn't production ready, so documentation is lacking at the moment.
+Although, we do use it internally all the time, so you can use it with caution
+too.
+
+---
+
+![Done GIF by Quizizz](https://media.giphy.com/media/l0Iyl55kTeh71nTXy/giphy.gif)
+
+At our August Office Hours Meetup, we'll be learning about DVC and Streamlit
+integration.
+[RSVP for the Meetup here](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/279723437/)
+to stay up to date with specifics as we get closer to the event!
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to get answers for your
+DVC and CML questions!
diff --git a/content/blogs/2021-08-17-august-21-dvc-heartbeat.md b/content/blogs/2021-08-17-august-21-dvc-heartbeat.md
new file mode 100644
index 0000000000..9f1b72770d
--- /dev/null
+++ b/content/blogs/2021-08-17-august-21-dvc-heartbeat.md
@@ -0,0 +1,255 @@
+---
+title: August'21 Heartbeat
+date: 2021-08-17
+description: >
+  Monthly updates are here! The new data centricity focus, a comparison of DVC,
+  MLFlow and Metaflow, tutorials and tool stacks, doc updates and more!
+descriptionLong: |
+  This month you will find:
+  - 🧑🏽‍💻 Data-centric for the win,
+  - 🧐 Comparison of DVC, MLFlow and Metaflow,
+  - 🛠 Tutorials and Tool Stacks,
+  - 📈 DVC + Streamlit = ❤️,
+  - 📖 Doc Updates,
+  - 🎥 July Meetup Video available,
+  - 🚀 and more!
+picture: 2021-08-17/august21cover.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/august-heartbeat/843
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - Streamlit
+---
+
+# It's all about that Data!
+
+![Data! Data! Data!](https://media.giphy.com/media/4FQMuOKR6zQRO/giphy.gif)
+
+# From the Community
+
+This month we are seeing the progression of a couple of pieces from the
+[June Heartbeat](https://media.giphy.com/media/62HBhssMOgdJUZQp1X/giphy.gif) as
+well as checking out a use case, tool stack, and some great tutorials of our
+Community members.
+
+## LJ Miranda synthesizes the MLOps space once again!
+
+[LJ Miranda](https://twitter.com/ljvmiranda921) writes another amazing article
+after the series of articles he wrote covering the MLOps tools landscape we
+covered in the June Heartbeat. This time he focuses on the wave of data-centric
+focus taking over the space giving a review of the methods, approaches, and
+techniques to ensure quality data for ML projects. If the adroit summaries of
+complex concepts doesn't thrill you, the links to no less than 63 (😱) resources
+will get you on your way to data-centric nirvana.
+
+![Data Centric Framework](../uploads/images/2021-08-17/lj-miranda-data-centric.png)
+_LJ Miranda's Framework for putting data-centric machine learning into context
+[Source link](https://ljvmiranda921.github.io/notebook/2021/07/30/data-centric-ml/)_
+
+## Neda Sultova's Comparison of DVC, MLFlow and Metaflow
+
+Also covered in the June Hearbeat was
+[Neda Sultova's](https://www.linkedin.com/in/neda-sultova-597a811a8/) piece on
+the rubric she is using to decide on the what MLOps tools to use for the teams
+at [Helmholtz AI](https://www.helmholtz.ai/). This
+[next article](https://medium.com/geekculture/comparing-metaflow-mlflow-and-dvc-e84be6db2e2)
+reviews her research into DVC, MLFlow and Metaflow and offers a thorough
+analysis of the tools across multiple dimensions. Beyond the article, check out
+her [MLOps Comparison repository](https://github.com/hzdr/mlops_comparison) as
+well as her
+[Comparison Table](https://github.com/hzdr/mlops_comparison/blob/master/Content/Comparison_table.pdf).
+They will not disappoint!
+
+![Machine Learning Lifecycle](../uploads/images/2021-08-17/neda-sultova-2.png)
+_Machine Learning Lifecycle
+[Source link](https://medium.com/geekculture/comparing-metaflow-mlflow-and-dvc-e84be6db2e2)_
+
+## Amit Kulkarni's Tutorials
+
+Writing for the
+[Analytics Vidhya Data Science Blogathon,](https://datahack.analyticsvidhya.com/contest/data-science-blogathon-9/#LeaderBoard)
+[Amit Kulkarni](https://www.linkedin.com/in/amitvkulkarni2/) created two
+tutorials on DVC.
+[Tracking ML Experiments with Data Version Control](https://www.analyticsvidhya.com/blog/2021/06/mlops-tracking-ml-experiments-with-data-version-control/?utm_source=dlvr.it&utm_medium=twitter)
+reviews DVC and takes you through getting started, setup, fetching data and
+pre-processing, and the steps of an ML project. Next it sets up DVC, the
+pipeline, and shows how to run model metrics and plots. In
+[MLOps| Versioning with Git & DVC,](https://www.analyticsvidhya.com/blog/2021/06/mlops-versioning-datasets-with-git-dvc/)
+Amit continues with an explanation how data and model versioning works with
+Github paired with DVC.
+
+In a previous article entitled
+[Bring DevOps to Data Science with MLOps](https://www.analyticsvidhya.com/blog/2021/04/bring-devops-to-data-science-with-continuous-mlops/)
+Amit walks through a tutorial using CML to bring CI/CD functionality to your ML
+project and automate the process. All great posts to check out!👇🏼
+
+<external-link
+href="https://www.analyticsvidhya.com/blog/2021/06/mlops-tracking-ml-experiments-with-data-version-control/?utm_source=dlvr.it&utm_medium=twitter"
+title="Tracking ML Experiments With Data Version Control"
+description="Amit Kulkarni's tutorial on getting started with DVC and tracking eperiments"
+link="https://analyticsvidhya.com"
+image="../uploads/images/2021-08-17/a-v.png"/> <external-link
+href="https://www.analyticsvidhya.com/blog/2021/06/mlops-versioning-datasets-with-git-dvc/"
+title="MLOps | Versioning Datasets with Git & DVC"
+description="Amit Kulkarni's tutorial on how to DVC works with Git to version your datasets."
+link="https://analyticsvidhya.com"
+image="../uploads/images/2021-08-17/a-v.png"/> <external-link
+href="https://www.analyticsvidhya.com/blog/2021/04/bring-devops-to-data-science-with-continuous-mlops/"
+title="Bring DevOps To Data Science With MLOps"
+description="Amit Kulkarni's tutorial on how to use CML to bring the CI/CD functionality of DevOps to your data science projects."
+link="https://analyticsvidhya.com"
+image="../uploads/images/2021-08-17/a-v.png"/>
+
+## Andreas Malekos' MLOps Tool Stack at Continuum Industries
+
+Last but not least, we bring you a great article from
+[Andreas Malekos](https://www.linkedin.com/in/andreasmalekos/), Chief Scientist
+at [Continuum Industries](https://www.continuum.industries/). In
+[the post](https://neptune.ai/blog/mlops-tool-stack-continuum-industries) he
+outlines the tool stack and MLOps platform they use to do their work automating
+and optimizing the design of linear infrastructure assets like water pipelines,
+overhead transmission lines, subsea power lines, or telecommunication cables.
+
+Amongst their tool stack are DVC and CML, and the article outlines what they
+like (!🙈Spoiler alert🙊! DVC making repeatability achievable) and the things
+that they don't like that still need to be improved.
+
+![Continuum Industries MLOps Tool Stack](../uploads/images/2021-08-17/continuum-tool-stack.png)
+_Continuum Industries MLOps Tool Stack
+[Source link](https://neptune.ai/wp-content../uploads/Continuum-Industries-tool-stack-final.png)_
+
+# DVC News
+
+Though the team has been taking some vacation time in the last month, there's
+still a lot going on!
+
+![Typing Cat](https://media.giphy.com/media/aNqEFrYVnsS52/giphy.gif)
+
+## Docs Updates
+
+This month we are introducing docs updates so that you will always be aware of
+what has changed as our open source projects mature.
+
+Our docs team made up of
+[Jorge Orpinel](https://www.linkedin.com/in/jorgeorpinel/),
+[Emre Şahin](https://emresahin.net), [Casper da Costa-Luis](https://cdcl.ml),
+and
+[David de la Iglesia-Castro,](https://www.linkedin.com/in/david-de-la-iglesia-castro-b4b67b20a/)
+has been hard at work updating our docs to make sure you have what you need to
+be successful using our tools! Updates include:
+
+- Complete [DVCLive docs](https://dvc.org/doc/dvclive)
+- We have a new [Glossary page](https://dvc.org/doc/user-guide/glossary) and a
+  first Basic Concepts page
+  ([_DVC Workspace_](https://dvc.org/doc/user-guide/basic-concepts/workspace))
+- [CML Docs migration to CML.Dev](https://cml.dev/doc)
+- [Added Videos to Get Started: Metrics and Experiments pages](https://dvc.org/doc/start)
+  and
+  [Checkpoints Guide](https://dvc.org/doc/user-guide/experiment-management/checkpoints)
+- Authentication examples for
+  [Azure Blob remote storage](https://dvc.org/doc/command-reference/remote/modify#example-some-azure-authentication-methods)
+  from Community member @meierale ❤️
+
+## Batuhan Taskaya's Refactor Project hits First Page in HackerNews!
+
+A [Refactor Project](https://github.com/isidentical/refactor) created by team
+Member [Batuhan Taskaya](https://twitter.com/isidentical) (AKA @isidentical),
+was shared by someone on HackerNews and made it to the main page! You can
+[catch all the comments here](https://news.ycombinator.com/item?id=28027016)!
+
+Explanation of the project:
+
+> refactor is an end-to-end refactoring framework that is built on top of the
+> 'simple but effective refactorings' assumption. It is much easier to write a
+> simple script with it rather than trying to figure out what sort of a regex
+> you need in order to replace a pattern (if it is even matchable with regexes).
+
+> Every refactoring rule offers a single entrypoint, match(), where they accept
+> an AST node (from the ast module in the standard library) and respond with
+> either returning an action to refactor or nothing. If the rule succeeds on the
+> input, then the returned action will build a replacement node and refactor
+> will simply replace the code segment that belong to the input with the new
+> version.
+
+Way to go Batuhan! 🚀
+
+## July Office Hour Meetup
+
+If you missed our July Office Hours, good news! It's now available on our
+[YouTube Channel](https://www.youtube.com/channel/UC37rp97Go-xIX3aNFVHhXfQ) and
+you can see [João Santiago](https://twitter.com/jcpsantiago) shares about
+{dvthis}, and how his team at [Billie.io](https://www.billie.io/) uses DVC to
+productionize rstats.
+
+Also in the Meetup is a DVC Studio demo by
+[Tapa Dipti Situala](https://www.linkedin.com/in/tapa-dipti-sitaula/), Senior
+Product Engineer for Studio. You can catch the presentations along with great
+questions and discussion from the Community!
+
+https://www.youtube.com/watch?v=H22j1lWIvMw&t=1546s
+
+## Next Meetup
+
+So remember when I told you last month about DVC + Streamlit = ❤️ ? Well at our
+August Office Hours Meetup,
+[Antoine Toubhans](https://www.linkedin.com/in/antoine-toubhans-92262119/) of
+[Sicara](https://www.sicara.fr/) will be presenting
+[his tutorial](https://www.sicara.ai/blog/dvc-streamlit-webui-ml) on how to do
+just that! Join us in the integrating fun on August 19th at 3:00 pm UTC! RSVP at
+this link below! 👇🏼
+
+<external-link
+href="https://www.meetup.com/DVC-Community-Virtual-Meetups/events/279723437/"
+title="DVC Office Hours - DVC and Streamlit Integration"
+description="Antoine Toubhans of Sicara shares his tutorial for using Streamlit with DVC to create a customizable web UI"
+link="https://meetup.com"
+image="../uploads/images/2021-08-17/streamlit-oh.png"/>
+
+## Learning Opportunities
+
+This week's DVC Learn Meetup (August 18th) will be the last in our series of DVC
+Learn Meetups designed to get teams up and running with DVC. We will digest our
+learnings from this first cohort and revamp for the next set of three classes
+that will begin in September. Subscribe to
+[our Meetup group](https://www.meetup.com/DVC-Community-Virtual-Meetups/) and
+and follow us in [Twitter](https://twitter.com/DVCorg) and
+[LinkedIn](https://www.linkedin.com/company/18657719) to stay in the know about
+all of our upcoming events!
+
+If you are interested in weighing in on what kinds of educational content you
+would like to see from us, we'd be grateful if you'd fill out
+[**this survey**](https://docs.google.com/forms/d/e/1FAIpQLSdmwjs0ZkxDdODfZTvSwP2bVW4JAVVdxiYhQPyW5dSbsZC8qg/viewform?pli=1)
+to help us plan! 🙏🏼
+
+![DVC Online Course survey](../uploads/images/2021-08-17/survey.png) _Help us
+plan our Online Course! 🙏🏼
+[Source link](https://docs.google.com/forms/d/e/1FAIpQLSdmwjs0ZkxDdODfZTvSwP2bVW4JAVVdxiYhQPyW5dSbsZC8qg/viewform?pli=1))_
+
+## Open Positions
+
+Looking for a great opportunity at an amazing company? Check out our open
+postions
+[at this link](https://www.notion.so/iterative/iterative-ai-is-hiring-852cb978129645e1906e2c9a878a4d22)
+to find details of all the positions including:
+
+- Senior Front-End Engineer (TypeScript, Node, React)
+- Senior Software Engineer (ML, Dev Tools, Python)
+- Senior Software Engineer (ML, Data Infra, GoLang)
+- Machine Learning Engineer/Field Data Scientist
+- Developer Advocate (ML)
+- Director/VP of Engineering (ML, DevTools)
+- Director/VP of Product (ML, Data Infra, SaaS)
+- Director/VP of Operations/Chief of Staff
+
+Please pass this info on to anyone you know that may fit the bill. We look
+forward to new team members! 🎉
+
+---
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2021-08-24-transfer-learning-experiments.md b/content/blogs/2021-08-24-transfer-learning-experiments.md
new file mode 100644
index 0000000000..f0209ea59d
--- /dev/null
+++ b/content/blogs/2021-08-24-transfer-learning-experiments.md
@@ -0,0 +1,381 @@
+---
+title: Using Experiments for Transfer Learning
+date: 2021-08-24
+description: >
+  You can work with pretrained models and fine-tune them with DVC experiments.
+descriptionLong: >
+  DVC experiments help fine-tune models by tracking code and data changes.
+picture: 2021-08-24/pretrained-models.png
+pictureComment: Using Experiments to Improve Pre-trained Models
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/using-experiments-for-transfer-learning/846
+tags:
+  - MLOps
+  - Experiments
+  - Reproducibility
+  - DVC
+  - Pre-trained Models
+  - Tutorial
+---
+
+## Intro
+
+There are plenty of machine learning models available that have been trained to
+solve one problem and the knowledge gained from that can be applied to a new,
+yet related problem. For example, a model like AlexNet has been trained on
+millions of images so you could potentially use this to classify cars, animals,
+or even people. This is called
+[transfer learning](https://towardsdatascience.com/a-comprehensive-hands-on-guide-to-transfer-learning-with-real-world-applications-in-deep-learning-212bf3b2f27a)
+and it can save a lot of time on developing a model from scratch.
+
+https://youtu.be/S3Hm_BPLie0
+
+For us to take advantage of transfer learning, we can use fine-tuning to adopt
+the model to our new problem. In many cases, we start by replacing the last
+layer of the model. With the AlexNet example, this might mean the last layer was
+previously used to classify cars but our new problem is classifying animals.
+
+Even though we already have the bulk of the model defined, we'll still have to
+do some experimentation to determine whether we need to replace more layers in
+the model or if any other changes need to be made.
+
+In this post, we'll go through an example of fine-tuning
+[AlexNet](https://towardsdatascience.com/alexnet-the-architecture-that-challenged-cnns-e406d5297951)
+and
+[SqueezeNet](https://towardsdatascience.com/review-squeezenet-image-classification-e7414825581a)
+to classify bees and ants. We'll use DVC to handle experiments for us and we'll
+compare the results of both models at the end.
+
+## Initialize the pre-trained model
+
+We'll be fine-tuning the AlexNet model and the SqueezeNet model to classify
+images of bees and ants. You can find the project we're working with in
+[this repo](https://github.com/iterative/pretrained-model-demo), which is based
+on the tutorial over at
+[this post](https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html).
+
+In the `pretrained_model_tuner.py` file, you'll find the code that defines both
+the AlexNet and SqueezeNet models. We start by initializing these models so we
+can get the number of model features and the input size we need for fine-tuning.
+
+Since the project has everything we need to initialize the models, we can start
+training and comparing the differences between them with the ants/bees dataset.
+Running experiments to get the best tuning for each model can make it difficult
+to see which changes led to a better result. That's why we will be using DVC to
+track changes in the code and the data.
+
+## Adding the train stage
+
+Stages in DVC let us define individual data processes and can be used to build
+detailed machine learning pipelines. You have the ability to define the
+different steps of model creation like preprocessing, featurization, and
+training.
+
+We currently have a `train` stage in the `dvc.yaml` file. If you take a look at
+it, you'll see something like:
+
+```yaml
+stages:
+  train:
+    cmd: python pretrained_model_tuner.py
+    deps:
+      - data/hymenoptera_data
+      - pretrained_model_tuner.py
+    params:
+      - lr
+      - momentum
+      - model_name
+      - num_classes
+      - batch_size
+      - num_epochs
+    outs:
+      - model.pt:
+          checkpoint: true
+    live:
+      results:
+        summary: true
+        html: true
+```
+
+The reason we need this `dvc.yaml` file is so DVC knows what to pay attention to
+in our workflow. It will start managing data, understand which metrics to pay
+attention to, and what the expected output for each step is.
+
+You'll typically add stages to `dvc.yaml` using the `dvc stage add` command and
+this is one of the ways you can add new stages or update existing ones.
+
+With the `train` stage defined, let's look at where the metrics actually come
+from in the code. If you open `pretrained_model_tuner`, you'll see a line where
+we dump the accuracy and loss for the training epochs into the `results.json`
+file. We're also saving the model on the epoch run and recording metrics for
+each epoch using `dvclive` logging.
+
+```python
+if phase == 'train':
+    torch.save(model.state_dict(), "model.pt")
+
+    dvclive.log('acc', epoch_acc.item())
+    dvclive.log('loss', epoch_loss)
+    dvclive.log('training_time', epoch_time_elapsed)
+
+if phase == 'val':
+    dvclive.log('val_acc', epoch_acc.item())
+    dvclive.log('val_loss', epoch_loss)
+
+    val_acc_history.append(epoch_acc)
+
+    dvclive.next_step()
+```
+
+This code is needed to let DVC access the metrics in the project because it will
+read the metrics from the `dvclive.json` file.
+
+Since we have several hyperparameters set in the `params.yaml`, we need to use
+those values when we run the training stage. The following code makes the
+hyperparameter values accessible in the `train` function.
+
+```python
+with open("params.yaml") as f:
+    yaml=YAML(typ='safe')
+    params = yaml.load(f)
+```
+
+With all of this in place, we can finally start running experiments to fine-tune
+the two models.
+
+## Fine-tuning AlexNet
+
+You can find the code that initializes the AlexNet model in the
+`initialize_model` function in `pretrained_model_tuner.py`. Since we have DVC
+set up, we can jump straight into fine-tuning this model to see which
+hyperparameters give us the best accuracy.
+
+We'll run the first experiment with the following command.
+
+```dvc
+$ dvc exp run
+```
+
+This will execute the `pretrained_model_tuner.py` script and run for 5 epochs
+since that's what we defined in `params.yaml`. When this finishes, you can check
+out the metrics from this run with the current hyperparameter values.
+
+```dvc
+$ dvc exp show
+```
+
+You'll see a table similar to this.
+
+```dvctable
+ ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**                neutral:**Created**    metric:**step**       metric:**acc**      metric:**loss**   metric:**training_time**   metric:**val_acc**   metric:**val_loss**   param:**lr**      param:**momentum**   param:**model_name**   param:**num_classes**   param:**batch_size**   param:**num_epochs**
+ ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  **workspace**                 -             **4**   **0.92623**   **0.19567**          **229.18**    **0.9085**    **0.25145**   **0.001**   **0.09**       **alexnet**      **2**             **8**            **5**
+  **main**                      **01:58 PM**      -         -         -               -         -          -   **0.001**   **0.09**       **alexnet**      **2**             **8**            **5**
+  │ ╓ bf81637 [exp-a1f53]   02:05 PM      4   0.92623   0.19567          229.18    0.9085    0.25145   0.001   0.09       alexnet      2             8            5
+  │ ╟ 9ca3fb8               02:04 PM      3   0.89344   0.27423          178.34   0.90196    0.26965   0.001   0.09       alexnet      2             8            5
+  │ ╟ a34ead1               02:03 PM      2   0.87295   0.29018          127.36    0.9085     0.2796   0.001   0.09       alexnet      2             8            5
+  │ ╟ ae382c7               02:02 PM      1   0.89754   0.26993          76.419   0.89542    0.31113   0.001   0.09       alexnet      2             8            5
+  ├─╨ a95260d               02:01 PM      0   0.73361    0.5271           25.71   0.86928    0.36408   0.001   0.09       alexnet      2             8            5
+ ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+```
+
+Now let's update the hyperparameters and run another experiment. There are
+several ways to do this with DVC:
+
+- Change the hyperparameter values directly in `params.yaml`
+- Update the values using the `--set-param` or the shorthand `-S` option on
+  `dvc exp run`
+- Queue multiple experiments with different values using the `--queue` option on
+  `dvc exp run`
+
+We'll do an example of each of these throughout the rest of this article.
+
+Let's start by updating the hyperparameter values in `params.yaml`. You should
+have these values in your file.
+
+```yaml
+lr: 0.009
+momentum: 0.017
+```
+
+Now run another experiment with `dvc exp run`. To make the table more readable,
+we're going to specify the parameters we want to show and take a look at the
+metrics with:
+
+```dvc
+$ dvc exp show --no-timestamp --include-params lr,momentum,model_name
+```
+
+Your table should look something like this now. Since we're using checkpoints,
+note that we continue training additional epochs on top of your previous
+experiment. You'll see what it takes to start training from scratch later.
+
+```dvctable
+ ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**                metric:**step**       metric:**acc**      metric:**loss**   metric:**training_time**   metric:**val_acc**   metric:**val_loss**   param:**lr**      param:**momentum**   param:**model_name**
+ ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  **workspace**                    **9**   **0.91803**   **0.27989**          **228.59**   **0.82353**    **0.69077**   **0.009**   **0.017**      **alexnet**
+  **main**                         **-**         **-**         **-**               **-**         **-**          **-**   **0.001**   **0.09**       **alexnet**
+  │ ╓ 2361cff [exp-c0b11]      9   0.91803   0.27989          228.59   0.82353    0.69077   0.009   0.017      alexnet
+  │ ╟ 7686d2f                  8   0.90984   0.23496          177.65   0.87582    0.50887   0.009   0.017      alexnet
+  │ ╟ 671f8cd                  7   0.88934   0.39237           126.7   0.86928    0.47856   0.009   0.017      alexnet
+  │ ╟ ea1bf61                  6   0.84836    0.4195          75.834   0.91503    0.30885   0.009   0.017      alexnet
+  │ ╟ a9f8dab (bf81637)        5   0.79508   0.72891          25.219   0.66667     1.0311   0.009   0.017      alexnet
+  │ ╓ bf81637 [exp-a1f53]      4   0.92623   0.19567          229.18    0.9085    0.25145   0.001   0.09       alexnet
+  │ ╟ 9ca3fb8                  3   0.89344   0.27423          178.34   0.90196    0.26965   0.001   0.09       alexnet
+  │ ╟ a34ead1                  2   0.87295   0.29018          127.36    0.9085     0.2796   0.001   0.09       alexnet
+  │ ╟ ae382c7                  1   0.89754   0.26993          76.419   0.89542    0.31113   0.001   0.09       alexnet
+  ├─╨ a95260d                  0   0.73361    0.5271           25.71   0.86928    0.36408   0.001   0.09       alexnet
+ ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+```
+
+Finding good values for hyperparameters can take a few iterations, even when
+you're working with a pretrained model. So we'll run one more experiment to
+fine-tune this AlexNet model. This time we'll do it using the `-S` option.
+
+```dvc
+$ dvc exp run -S lr=0.025 -S momentum=0.5 -S num_epochs=2
+```
+
+The updated table will have values similar to this.
+
+```dvctable
+ ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**                metric:**step**       metric:**acc**      metric:**loss**    metric:**training_time**   metric:**val_acc**   metric:**val_loss**   param:**lr**      param:**momentum**   param:**model_name**
+ ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  **workspace**                   **11**   **0.88525**    **1.1355**           **76.799**    **0.9085**     **1.7642**   **0.025**   **0.5**        **alexnet**
+  **main**                         **-**         **-**          **-**               **-**         **-**          **-**   **0.001**   **0.09**       **alexnet**
+  │ ╓ 54e87bc [exp-52406]     11   0.88525    1.1355           76.799    0.9085     1.7642   0.025   0.5        alexnet
+  │ ╟ b2b9ad0 (2361cff)       10   0.79098    2.9427           25.715    0.8366     1.4148   0.025   0.5        alexnet
+  │ ╓ 2361cff [exp-c0b11]      9   0.91803   0.27989           228.59   0.82353    0.69077   0.009   0.017      alexnet
+  │ ╟ 7686d2f                  8   0.90984   0.23496           177.65   0.87582    0.50887   0.009   0.017      alexnet
+  │ ╟ 671f8cd                  7   0.88934   0.39237            126.7   0.86928    0.47856   0.009   0.017      alexnet
+  │ ╟ ea1bf61                  6   0.84836    0.4195           75.834   0.91503    0.30885   0.009   0.017      alexnet
+  │ ╟ a9f8dab (bf81637)        5   0.79508   0.72891           25.219   0.66667     1.0311   0.009   0.017      alexnet
+  │ ╓ bf81637 [exp-a1f53]      4   0.92623   0.19567           229.18    0.9085    0.25145   0.001   0.09       alexnet
+```
+
+If you take a look at the metrics and the corresponding hyperparameter values,
+you'll see which direction you should try next with your values. That's one way
+we can use DVC to fine-tune AlexNet for this particular dataset.
+
+## Fine-tuning SqueezeNet
+
+We'll switch over to fine-tuning SqueezeNet now that you've seen how the process
+works in DVC. You'll need to update the `model_name` hyperparameter in
+`params.yaml` to `squeezenet` if you're following along. The other
+hyperparameter values can stay the same for now.
+
+This is a good time to note that DVC is not only tracking the changes of your
+hyperparameters for each experiment, it also tracks any code changes and dataset
+changes as well.
+
+Let's run one experiment with `dvc exp run --reset` just to show the difference
+in the metrics between the two models. Remember, since we're using checkpoints
+it continues training on top of the previous experiment. That's why we're using
+the `--reset` option here so that we can start a fresh experiment for the new
+model. You should see results similar to this in your table.
+
+```dvctable
+ ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**                metric:**step**       metric:**acc**      metric:**loss**   metric:**training_time**   metric:**val_acc**   metric:**val_loss**   param:**lr**      param:**momentum**   param:**model_name**
+ ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  **workspace**                    **1**   **0.85656**   **0.35667**          **83.414**   **0.87582**    **0.34273**   **0.025**   **0.5**        **squeezenet**
+  **main**                         -         -         -               -         -          -   **0.001**   **0.09**       **squeezenet**
+  │ ╓ 87ccd2e [exp-95f0f]      1   0.85656   0.35667          83.414   0.87582    0.34273   0.025   0.5        squeezenet
+  ├─╨ 7d2fafc                  0   0.80328   0.50723          29.165   0.89542     0.3987   0.025   0.5        squeezenet
+  │ ╓ 54e87bc [exp-52406]     11   0.88525    1.1355          76.799    0.9085     1.7642   0.025   0.5        alexnet
+  │ ╟ b2b9ad0 (2361cff)       10   0.79098    2.9427          25.715    0.8366     1.4148   0.025   0.5        alexnet
+  │ ╓ 2361cff [exp-c0b11]      9   0.91803   0.27989          228.59   0.82353    0.69077   0.009   0.017      alexnet
+```
+
+The newest experiment has an accuracy that's significantly different since we
+switched models. That tells us that the hyperparameter values that were good for
+AlexNet might not work the best for SqueezeNet.
+
+So we'll need to run a few experiments to find the best hyperparameter values.
+This time, we'll take advantage of queues in DVC to set up the experiments and
+then run them at the same time. To set up a queue, we'll run this command.
+
+```dvc
+$ dvc exp run --queue -S lr=0.0001 -S momentum=0.9 -S num_epochs=2
+```
+
+Running this sets up an experiment for future execution so we'll go ahead a run
+this command one more time with different values.
+
+```dvc
+$ dvc exp run --queue -S lr=0.001 -S momentum=0.09 -S num_epochs=2
+```
+
+You can check out the details for the queues you have in place by looking at the
+experiments table with `dvc exp show`. You'll see something like this.
+
+```dvctable
+ ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**                metric:**step**       metric:**acc**      metric:**loss**   metric:**training_time**   metric:**val_acc**   metric:**val_loss**   param:**lr**      param:**momentum**   param:**model_name**
+ ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  **workspace**                    **1**   **0.85656**   **0.35667**          **83.414**   **0.87582**    **0.34273**   **0.025**   **0.5**        **squeezenet**
+  **main**                         -         -         -               -         -          -   **0.001**   **0.09**       **squeezenet**
+  │ ╓ 87ccd2e [exp-95f0f]      1   0.85656   0.35667          83.414   0.87582    0.34273   0.025   0.5        squeezenet
+  ├─╨ 7d2fafc                  0   0.80328   0.50723          29.165   0.89542     0.3987   0.025   0.5        squeezenet
+  │ ╓ 54e87bc [exp-52406]     11   0.88525    1.1355          76.799    0.9085     1.7642   0.025   0.5        alexnet
+  │ ╟ b2b9ad0 (2361cff)       10   0.79098    2.9427          25.715    0.8366     1.4148   0.025   0.5        alexnet
+  │ ╓ 2361cff [exp-c0b11]      9   0.91803   0.27989          228.59   0.82353    0.69077   0.009   0.017      alexnet
+  │ ╟ 7686d2f                  8   0.90984   0.23496          177.65   0.87582    0.50887   0.009   0.017      alexnet
+  │ ╟ 671f8cd                  7   0.88934   0.39237           126.7   0.86928    0.47856   0.009   0.017      alexnet
+  │ ╟ ea1bf61                  6   0.84836    0.4195          75.834   0.91503    0.30885   0.009   0.017      alexnet
+...
+  ├── *2df7fa5                -          -          -         -         -               -   0.0001  0.9        squeezenet
+  ├── *699dcae                -          -          -         -         -               -   0.001   0.09       squeezenet
+ ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+```
+
+Then you can execute all of the queues with this command.
+
+```dvc
+$ dvc exp run --run-all
+```
+
+Now if you take a look at your table, you'll see the metrics from those 3
+experiments.
+
+```dvctable
+ ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**                metric:**step**       metric:**acc**      metric:**loss**   metric:**training_time**   metric:**val_acc**   metric:**val_loss**   param:**lr**       param:**momentum**   param:**model_name**
+ ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  **workspace**                    **5**   **0.76639**   **0.49865**          **85.705**   **0.81699**     **0.4518**   **0.001**    **0.09**       **squeezenet**
+  **main**                         -         -         -               -         -          -   **0.001**    **0.09**       **squeezenet**
+  │ ╓ 699dcae [exp-8322f]      5   0.76639   0.49865          85.705   0.81699     0.4518   0.001    0.09       squeezenet
+  │ ╟ d26c25b (2df7fa5)        4   0.60246   0.68464          29.243   0.69935    0.55156   0.001    0.09       squeezenet
+  │ ╓ 2df7fa5 [exp-d1c65]      3   0.78689     0.488          83.929   0.83007    0.41527   0.0001   0.9        squeezenet
+  │ ╟ 05e1b41 (87ccd2e)        2   0.59016   0.76999          28.455   0.75163    0.49807   0.0001   0.9        squeezenet
+  │ ╓ 87ccd2e [exp-95f0f]      1   0.85656   0.35667          83.414   0.87582    0.34273   0.025    0.5        squeezenet
+  ├─╨ 7d2fafc                  0   0.80328   0.50723          29.165   0.89542     0.3987   0.025    0.5        squeezenet
+  │ ╓ 54e87bc [exp-52406]     11   0.88525    1.1355          76.799    0.9085     1.7642   0.025    0.5        alexnet
+  │ ╟ b2b9ad0 (2361cff)       10   0.79098    2.9427          25.715    0.8366     1.4148   0.025    0.5        alexnet
+  │ ╓ 2361cff [exp-c0b11]      9   0.91803   0.27989          228.59   0.82353    0.69077   0.009    0.017      alexnet
+  │ ╟ 7686d2f                  8   0.90984   0.23496          177.65   0.87582    0.50887   0.009    0.017      alexnet
+```
+
+Then you'll be able to make a decision on which way to go with your fine-tuning
+efforts and make a decision on which model works best for your project. In this
+case, it seems like SqueezeNet might be the winner!
+
+You can take all of the DVC setup and apply this to your own custom fine-tuning
+use case.
+
+## Conclusion
+
+When you're working with pretrained models, it can be hard to fine-tune them to
+give you the results you need. You might end up replacing the last layer of the
+model to fit your problem or you might need to dig deeper. Then you have to
+consider updating the hyperparameter values until you get the best model you
+can.
+
+That's why it's important to research tools that make this process more
+efficient. Using DVC to help with this kind of experimentation will give you the
+ability to reproduce any experiment you run, making it easier to collaborate
+with others on a project. It will also help you keep track of what you've
+already tried in previous experiments.
diff --git a/content/blogs/2021-08-31-august-21-community-gems.md b/content/blogs/2021-08-31-august-21-community-gems.md
new file mode 100644
index 0000000000..52eb313e40
--- /dev/null
+++ b/content/blogs/2021-08-31-august-21-community-gems.md
@@ -0,0 +1,152 @@
+---
+title: August '21 Community Gems
+date: 2021-08-31
+description: >
+  A roundup of technical Q&A's from the DVC community. This month: separate DVC
+  pipelines, working with CML, handling metrics, and more.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month: separate DVC
+  pipelines, working with CML, handling metrics, and more.
+picture: 2021-08-31/Community-Gems-0830-August.png
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/august-21-community-gems/838
+tags:
+  - Community Gems
+  - Plots
+  - Pipelines
+  - CML
+  - Git
+---
+
+### [Q: Are TOML files supported for storing model metrics and displaying them via `dvc metrics show`?](https://discord.com/channels/485586884165107732/485596304961962003/865974923079319563)
+
+Thanks for the question @naeljaneLiblikas!
+
+DVC does not support TOML files for metrics. TOML files are supported for
+parameters only at the moment.
+
+We do have an [open issue](https://github.com/iterative/dvc/issues/6402) for
+this. Please feel free to add any comments or emojis to this issue so we know
+how to prioritize it!
+
+### [Q: Is there a way to store the results of the experiments table in a CSV file?](https://discord.com/channels/485586884165107732/485596304961962003/872554861340803092)
+
+Take a look at the `--show-json` option of `dvc exp show`. This will print the
+table in JSON format and you can write a script to save it to another file.
+
+We have an [open feature request](https://github.com/iterative/dvc/issues/5446)
+to add CSV support. Give us some feedback so we know how to prioritize this on
+our roadmap!
+
+There's another workaround you could test out using our Python API, just keep in
+mind that it isn't public and it's not as user-friendly as it could be.
+Although, you can try something like this:
+
+```python
+import itertools
+import dvc.api
+
+exps = itertools.chain.from_iterable(dvc.api.Repo().experiments.ls().values())
+
+def get_exp_info(exp):
+  exp_dict = {"exp": exp}
+  with dvc.api.open("params.yaml", rev=exp) as p:
+    params = yaml.load(p, Loader=yaml.Loader)
+    exp_dict.update(params)
+  with dvc.api.open("scores.json", rev=exp) as s:
+    metrics = json.load(s)
+    exp_dict.update(metrics)
+  return exp_dict
+
+exps_list = [get_exp_info(exp) for exp in exps]
+
+df = pd.DataFrame.from_records(exps_list)
+```
+
+Great question @Jess\_!
+
+### [Q: Is there a recommended way to specify multiple pipelines in DVC?](https://discord.com/channels/485586884165107732/485596304961962003/864230750325047316)
+
+You'll want to keep each pipeline in a separate `dvc.yaml` if you want to work
+with multiple pipelines. This is a recommendation and is not required to specify
+different pipelines. Here's a bit of explanation:
+
+- Splitting a `dvc.yaml` file into multiple files is encouraged where there are
+  clear logical groupings between stages. It avoids confusion, improves
+  readability, and shortens commands by avoiding long paths preceding every
+  filename.
+- `dvc.yaml` files can be in any sub-directory or nested sub-directory in the
+  project structure and DVC will find them.
+- DVC will process them just the same as if they were one DVC file i.e.
+  dependencies between stages in different `dvc.yaml` files are still respected.
+- Each `dvc.yaml` file will have its own `dvc.lock` file in the same directory.
+
+If you want to see the rest of the explanation,
+[check out this user guide PR we have up](https://github.com/iterative/dvc.org/issues/2494).
+Please feel free to add a comment or emoji on this PR so we know how to
+prioritize this content for you!
+
+Thanks @Tups!
+
+### [Q: Is there way to allow different pipelines to have common dependencies and outputs in DVC pipelines?](https://discord.com/channels/485586884165107732/563406153334128681/867747202306146335)
+
+Good question @vgodie!
+
+It is possible to have overlapping dependencies, but not overlapping outputs.
+Having overlapping outputs introduces uncertainty into DVC commands, like
+`dvc checkout`.
+
+Sometimes people want to have overlapping directory outputs (different stages
+that wrote many different files in the same directory). They might have a series
+of stages that append to the same file. In this case, we suggest creating new
+files and combining them in a final stage so they are consistently written in
+the same order.
+
+### [Q: How does the CML runner restart workflows if it's been shut down by AWS (e.g. spot instances)?](https://discord.com/channels/485586884165107732/728693131557732403/862641924200857660)
+
+You shouldn't have to do anything. Spot instances sends a `SIGINT` that we
+handle to restart the workflow. We have been supporting graceful shutdown by
+unregistering runners for a while now.
+
+The main difference now is that we restart workflows with unfinished jobs.
+
+Thanks for such a good question @andee96!
+
+### [Q: Can I change an endpoint that is being? Or does `cml publish` always save the artifacts on this endpoint?](https://discord.com/channels/485586884165107732/728693131557732403/864444303169421322)
+
+Good question @Nwp8nice!
+
+If you use GitLab you can use the `--native` option to upload to GitLab instead.
+
+It would be nice to be able to offer an alternative link so if you're
+interested, a PR for [this issue](https://github.com/iterative/cml/issues/291)
+would be awesome! 😊
+
+### [Q: Is CML used for creating the MLOps workflows, like Apache Airflow?](https://discord.com/channels/485586884165107732/728693131557732403/866624571519664128)
+
+This is a really good question @Ravi Kumar!
+
+CML is intended to augment existing CI/CD engines like GitHub Actions or GitLab
+CI/CD, not replace them. It's a lightweight wrapper and not a complete
+replacement workflow ecosystem like Airflow. We don't like reinventing working
+wheels.
+
+### [Q: Does CML have the ability to cope with long-running instances, e.g. launching an AWS instance via GitHub Actions that lasts more than 72 hours?](https://discord.com/channels/485586884165107732/728693131557732403/866730530262351873)
+
+Once the GitHub Actions limit of 72 hours is reached for self-hosted runners,
+CML will handle restarting the Action and reconnecting to the runner. Meanwhile,
+on GitLab there is no time limit to circumvent for self-hosted runners.
+
+Thanks @sergechuvakin!
+
+---
+
+![Shut It Down GIF by Matt Cutshall](https://media.giphy.com/media/l0IycQmt79g9XzOWQ/giphy.gif)
+
+At our September Office Hours Meetup we will be doing a live demo of running
+experiments to fine-tune an existing model to work on a different dataset.
+[RSVP for the Meetup here](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/279024694/)
+to stay up to date with specifics as we get closer to the event!
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to get all your DVC and
+CML questions answered!
diff --git a/content/blogs/2021-09-07-road-to-hell.md b/content/blogs/2021-09-07-road-to-hell.md
new file mode 100644
index 0000000000..0d5a8a7f64
--- /dev/null
+++ b/content/blogs/2021-09-07-road-to-hell.md
@@ -0,0 +1,144 @@
+---
+title: The Road to Hell Starts with Good MLOps Intentions
+date: 2021-09-07
+description: >
+  Why we believe extending best practices of software engineering to machine
+  learning projects will streamline ML and AI development and keep all of us off
+  the road to hell.
+descriptionLong: >
+  Our philosophy on how ML and AI should be developed.  Why we believe extending
+  the best practices of software engineering to the machine learning space will
+  ultimately be the most effective solution for MLOps.
+picture: 2021-09-07/hell.png
+pictureComment: Let's avoid hell, shall we?
+author: dmitry_petrov
+commentsUrl: https://discuss.dvc.org/t/the-road-to-hell-starts-with-good-mlops-intentions/873
+tags:
+  - MLOps
+  - DataOps
+  - CI/CD
+  - Company
+---
+
+Machine learning operations (MLOps) in the last year has emerged as a distinct
+IT discipline for building machine learning (ML) or artificial intelligence (AI)
+models. While at first blush that may seem like a viable method for automating
+the building of AI models, in reality purveyors of MLOps platforms have a vested
+interest in convincing organizations to acquire platforms that exist outside of
+best DevOps practices that have already been proven to accelerate application
+development.
+
+AI models, however, are ultimately a software artifact like any other that needs
+to be integrated within an application. The trouble with MLOps as it is most
+often pursued today is data scientists are constructing AI models in almost
+complete isolation from the rest of the organization. The hope is that somehow
+when the AI model is completed it will magically be incorporated into an
+application development workflow. Unfortunately for all concerned, the rate at
+which applications are being developed using best DevOps practices rarely align
+with the rate at which AI models are being constructed.
+
+> "The trouble with MLOps as it is most often pursued today is data scientists
+> are constructing AI models in almost complete isolation from the rest of the
+> organization."
+
+The result is not only a lot of wasted time and effort, the rate at which
+digital business transformation initiatives that depend on AI models are rolled
+out becomes a significant competitive disadvantage. In effect, the road to AI
+hell is paved with good MLOps intentions.
+
+While working as a data scientist at Microsoft, I saw firsthand how machine
+learning and AI was traditionally implemented in an isolated fashion. That
+unsatisfactory experience led to the launch of opensource Data Version Control
+(DVC) and Continuous Machine Learning (CML) tools that integrate ML workflows
+into best practices for software development. Instead of creating a separate
+proprietary AI platform that needs to be acquired and maintained, the goal needs
+to be to extend traditional software tools such as Git, collaboration and
+continuous integration/continuous delivery (CI/CD) platforms to meet the needs
+of both developers and ML engineers. The entire ML stack needs to be reinvented
+in a way that makes it accessible to every developer.
+
+![DVC Studio](../uploads/images/2021-09-07/dvc-studio.png)
+
+DVC and CML are open source tools, that now along with DVC Studio, streamline
+the workflow of data scientists. They integrate ML workflows into current
+practices for software development in a way that eliminates the need for many
+features of proprietary AI platforms such as AWS SageMaker, Microsoft Azure ML
+and Google Vertex AI by extending traditional software tools like Git and CI/CD
+platforms to meet the needs of ML researchers and ML engineers. In essence, they
+provide an open platform based on best DevOps practices to operationalize ML and
+AI.
+
+> "DVC and CML are open source tools that streamline the workflow of data
+> scientists. They integrate ML workflows into current practices for software
+> development in a way that eliminates the need for many features of proprietary
+> AI platforms such as AWS SageMaker, Microsoft Azure ML and Google Vertex AI by
+> extending traditional software tools like Git and CI/CD platforms to meet the
+> needs of ML researchers and ML engineers."
+
+MLOps is about operations and automation for ML and AI. It covers the entire
+lifecycle of an ML process including labeling data, development, modeling, and
+monitoring. Every ML/AI platform offers this functionality. However, our vision
+for MLOps is different. We think it should be embedded within your DevOps
+processes. It should be part of your engineering infrastructure, engineering
+stack and engineering processes. ML requires additional tools. It’s just those
+tools need to be incorporated into a larger toolchain.
+
+The primary reason to do this is to interact more consistently with people from
+the software engineering side and to reuse proven tools such as Git,
+GitHub/GitLab and CI/CD systems. An ML silo that builds an AI model outside the
+traditional application development process creates a divide that needs to be
+bridged whenever a data scientist needs to collaborate with engineers. For
+example, with a traditional AI platform, all the workflows are predefined. There
+may be some opportunity to modify them, but for all intents and purposes, those
+workflows are inflexible. That’s the wrong approach. Teams made up of data
+scientists and developers should be able to define their own workflow based on
+their business requirements and team preferences, just like they do today when
+constructing any other software artifact. Rather than a platform forcing teams
+to embrace a highly opinionated workflow, they can employ flexible tools such
+Git, GitHub, and their existing CI tools as they see fit.
+
+> "Teams made up of data scientists and developers should be able to define
+> their own workflow based on their business requirements and team preferences,
+> just like they today when constructing any other software artifact."
+
+## How We Do It
+
+When it comes to software engineering, everything in a workflow is based on the
+version of the artifact. However, when working with large data sets, that
+approach doesn’t work because there is no data versioning with existing tools.
+We extend existing DevOps tools so that developers can version code in addition
+to ML models.
+
+In addition to allowing for data and modeling versioning, we also align data
+scientists to the CI/CD process. This enables the data scientist to share code
+and data with other members of the team in a way that actually works on their
+machines! That’s critical because code is typically run through a third-party
+platform to determine if it will run in a production environment. There is no
+way to bring data into this process, which means there’s no real way to
+determine whether a model works before deploying it. There are no ways to show
+metrics. There are no ways to compare your metrics with your production metrics.
+In this scenario, everything needs to be instrumented to attach required plots
+to test. That takes a lot of time. We enable multiple plot points to be tested.
+Finally, we provide a place to visualize and analyze data other than employing
+Microsoft Excel spreadsheets. We extend traditional software engineering
+functionality by providing a better system to visualize data right on top of
+your GitHub, GitLab or BitBucket user interface.
+
+> "We believe an open source-based workflow based on version control and CI
+> tools will streamline machine learning in the same way software development
+> has already been modernized."
+
+## Conclusion
+
+We believe an open source-based workflow based on version control and CI tools
+will streamline machine learning in the same way software development has
+already been modernized. If data scientists, engineers and developers can
+accelerate the development of ML/AI models by reusing files, pipelines,
+experiments and even entire models stored in a Git repository, the rate at which
+AI will be infused into software will increase by several orders of magnitude
+and, best of all, the road to AI hell is not taken.
+
+---
+
+_This post originally appeared in_
+[The New Stack.](https://thenewstack.io/the-road-to-ai-hell-starts-with-good-mlops-intentions/)
diff --git a/content/blogs/2021-09-14-september-21-dvc-heartbeat.md b/content/blogs/2021-09-14-september-21-dvc-heartbeat.md
new file mode 100644
index 0000000000..09f913538d
--- /dev/null
+++ b/content/blogs/2021-09-14-september-21-dvc-heartbeat.md
@@ -0,0 +1,247 @@
+---
+title: September'21 Heartbeat
+date: 2021-09-14
+description: >
+  Monthly updates are here! Awesome new tutorials and guides from the Community,
+  cat's out of the bag on VS Code extension, doc updates, DVC + Streamlit Meetup
+  video, and more!
+descriptionLong: |
+  This month you will find:
+  - 🛠 New Tutorials and Guides,
+  - 🤫 VS Code extension,
+  - 📖 Doc Updates!,
+  - 🎥 August Meetup Video available,
+  - 🚀 and more!
+picture: 2021-09-14/september21cover.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/september-21-heartbeat/888
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - VS Code
+---
+
+# This month's head-turning News from the Community!
+
+![Head Turning Content from the DVC Community!](https://media.giphy.com/media/1hWHUCgi3wKT6/giphy.gif?cid=ecf05e47a5sz6kvyp4h1swih08yokkbdfr39pq9pxscg975u&rid=giphy.gif&ct=g)
+
+### Tezan Sahu's 4-part blog series
+
+Welcome to September! We'll kick off this month's Community picks with a
+four-part series by [**Tezan Sahu**](https://twitter.com/SahuTezan) on the
+[**Fundamentals of MLOps.**](https://tezansahu.medium.com/fundamentals-of-mlops-part-1-a-gentle-introduction-to-mlops-1b184d2c32a8)
+Tehan introduces readers to the core ideas behind taking the best practices of
+DevOps and how they are being adapted to machine learning projects that deploy
+large scale AI powered applications. The series includes:
+
+- Part 1:
+  [A Gentle Introduction to MLOps](https://tezansahu.medium.com/fundamentals-of-mlops-part-1-a-gentle-introduction-to-mlops-1b184d2c32a8)
+- Part 2:
+  [Data & Model Management with DVC](https://tezansahu.medium.com/fundamentals-of-mlops-part-2-data-model-management-with-dvc-6be2ad284ec4)
+  We love this part best! ❤️😉
+- Part 3:
+  [MLExperimentation with PyCaret](https://tezansahu.medium.com/fundamentals-of-mlops-part-3-ml-experimentation-using-pycaret-747f14e4c28d)
+- Part 4:
+  [Tracking with MLFlow & Deployment with Fast API](https://tezansahu.medium.com/fundamentals-of-mlops-part-4-tracking-with-mlflow-deployment-with-fastapi-61614115436)
+
+![Fundamentals of MLOps](../uploads/images/2021-09-14/tezan-sahu.png) _Tezan
+Sahu's 4 part series on the Fundamentals of MLOps
+[Source link](https://ljvmiranda921.github.io/notebook/2021/07/30/data-centric-ml/)_
+
+If you follow the steps through this series, you will learn how to build and
+deploy an end-to-end ML project - all the steps leading to production!
+
+### Miguel Méndez' Tutorial on DVC + MMdetection
+
+This month [Miguel Méndez](https://www.linkedin.com/in/miguel-mendez/) of
+[Gradiant](https://www.gradiant.org/en//) brings us a guide on object detection
+using the [MMdetection]() framework in conjunction with DVC to design the
+pipeline, version models and monitor training progress. This follows his
+[first guide](https://mmeendez8.github.io/2021/07/01/dvc-tutorial.html) covering
+how to version your datasets with DVC, which we shared in the
+[July Heartbeat.](https://dvc.org/blog/july-21-dvc-heartbeat)
+
+In
+[this new guide,](https://mmeendez8.github.io/2021/08/30/mmdet-dvc-tutorial.html)
+you'll gain a thorough understanding of the steps, have access to
+[his repo](https://github.com/mmeendez8/mmdetection_dvc) for the project, and
+find his thoughts on scaling hyperparameter tuning through this
+[open issue](https://github.com/iterative/dvc/issues/5477#issuecomment-905440724)
+about exeperiments that we are trying to resolve. Join the conversation! We'd
+love your input!
+
+![DVC + MMdetection](../uploads/images/2021-09-14/mmdetection.png) _Miguel
+Méndez' second guide in a series using DVC in an object detecton project
+[Source link](https://mmeendez8.github.io/2021/08/30/mmdet-dvc-tutorial.html)_
+
+## Hrittik Roy's Complete Intro to DVC
+
+It was just a few short months ago when [Hrittik Roy]() joined us at his first
+[DVC Office Hours](). Now he's written
+[DVC (Git for Data): A Complete Tutorial](https://dev.to/hrittikhere/dvc-git-for-data-a-complete-intro-4626)
+on DVC and how it solves the challenges of ML engineers. In this piece he takes
+you through set up, pipeline and versioning, experiments and sharing through our
+built in shared caching, so that you and your teammates can reduce resource use
+when focusing on a subset of datasets as you move through your project.
+
+![DVC (Git for Data): A complete Intro](../uploads/images/2021-09-14/hrittik-roy.png)
+_Hrittik Roy's Complete Intro on DVC
+[Source link](https://dev.to/hrittikhere/dvc-git-for-data-a-complete-intro-4626)_
+
+## Andrey Kurenkov's curated list of AI Newsletters
+
+In case you missed it,
+[Andy Kurenkov](https://twitter.com/andrey_kurenkov?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor)
+tweeted that he finally got around to writing about his list of 21 favorite AI
+Newsletters. You can find the article
+[right here.](https://medium.com/@andreykurenkov/the-best-ai-newsletters-483dc75134b)
+Be sure to check it out and get reading...
+
+<external-link
+href="https://www.meetup.com/DVC-Community-Virtual-Meetups/events/279723437/"
+title="One PhD student’s curated list of 21 newsletters to help you keep up with AI news and research"
+description="Andrey Kurenkov's curated list of the best AI newsletters"
+link="https://medium.com.com"
+image="../uploads/images/2021-09-14/andrey-wordcloud.png"/>
+
+# DVC News
+
+We know there were a lot of peeps out on holiday over the last month so let me
+fill you in!
+
+![Grab the popcorn!](https://media.giphy.com/media/lz7212bWGdZbkm30KJ/giphy.gif?cid=ecf05e47hg6at9zmqb1pglypfrzi6vrgdsbay6zgza7wmwwu&rid=giphy.gif&ct=g)
+
+## Yes, that's right, VS Code extension is coming!
+
+[Paige Bailey](https://twitter.com/DynamicWebPaige) let the cat out of the bag
+[with her tweet](https://twitter.com/DynamicWebPaige/status/1430920240251035649?s=20)
+about the developent of our VS Code extension for DVC. We're getting closer
+every day! If you'd like to be a part of the beta testing (how could you not?)
+[join us here.](https://t.co/F64H9yyDH9?amp=1)
+
+![VS Code Extension for DVC](../uploads/images/2021-09-14/VSCode.png) _Paige
+Bailey let's the cat out of the bag
+[Source link](https://twitter.com/DynamicWebPaige/status/1430920240251035649?s=20)_
+
+## 📖 Docs Updates
+
+As promised, we will be adding this section to the Heartbeat each month so that
+you can stay in the know about the doc updates that will most impact your
+workflows. You won't want to miss these...
+
+### 📖 Fast and Secure Data Caching Hub
+
+First up, a new doc on our
+[Fast and Secure Data Caching Hub.](https://dvc.org/doc/use-cases/fast-data-caching-hub#fast-and-secure-data-caching-hub)
+Checkout this doc to learn how DVC's built-in data caching lets you implement a
+simple and efficient storage layer globally - FOR YOUR ENTIRE TEAM. This lets
+you:
+
+- ⏱ Speed data transfers from massive object stores currently on the cloud
+- 💰 Pay only for fast access to frequently-used data
+- 🙅🏻‍♂️ Avoid extra downloads and duplicating data
+- ⚡️ Switch data inputs fast (without re-downloading) on a shared server used
+  for machine learning experiments.
+
+Status: Must read. 📖
+
+![Fast and Secure Data Cachin Hub](../uploads/images/2021-09-14/fcaching.gif)
+_Fast and Secure Data Cachin Hub
+[Source link](https://dvc.org/doc/use-cases/fast-data-caching-hub#fast-and-secure-data-caching-hub)_
+
+### 📖 CI/CD for Machine Learning
+
+Is this your life?
+
+![Rage Quit Job](../uploads/images/2021-09-14/cicd4ml-0.png) _Is this your life?
+[Source link](https://dvc.org/doc/use-cases/ci-cd-for-machine-learning#continuous-integration-and-deployment-for-machine-learning)_
+
+Our latest doc,
+[Continuous Integration and Deployment for Machine Learning,](https://dvc.org/doc/use-cases/ci-cd-for-machine-learning#continuous-integration-and-deployment-for-machine-learning)
+shows you how to move from the above chaos to CI/CD victory through:
+
+- ✅ Data validation
+- ✅ Model validation
+- 🎟 Provisioning
+- 📈 Metrics
+
+Read the whole doc to learn how DVC and CML will enable you to run entire
+experiments/research online and remove most of your managment headaches to look
+more like this. 👇🏼
+
+![Traditional ML meets CI/CD](../uploads/images/2021-09-14/cicd4ml-1.png)
+_Traditional ML meets CI/CD with DVC and CML
+[Source link](https://dvc.org/doc/use-cases/ci-cd-for-machine-learning#continuous-integration-and-deployment-for-machine-learning)_
+
+### 📖 Need to Clean up Your Worksapce?
+
+[Cleaning Up Experiments](https://dvc.org/doc/user-guide/experiment-management/cleaning-experiments)
+has been made bright and shiny and new to do the same with your experiments. Be
+sure to check it out!
+
+### 📖 Hugging Face Integration with DVC Live
+
+[Hugging Face](https://huggingface.co/) fans now have an integration with
+DVCLive! Checkout how to
+[get set up here!](https://dvc.org/doc/dvclive/api-reference/ml-frameworks/huggingface)
+Thanks [@pacifikus](https://github.com/pacifikus), for the contribution! 🙏🏼
+
+## Next Meetup
+
+This Thursday at our
+[September Office Hours Meetup](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/280212578/),
+[Milicia McGregor](https://twitter.com/FlippedCoding) will be presenting her
+tutorial on
+[Using Experiments For Transfer Learning.](https://dvc.org/blog/transfer-learning-experiments)
+Join us on September 16th at 3:00 pm UTC! RSVP at this link below! 👇🏼
+
+<external-link
+href="https://www.meetup.com/DVC-Community-Virtual-Meetups/events/280212578/"
+title="DVC Office Hours - Using Experiments For Transfer Learning"
+description="Milecia McGregor shows how to use DVC experiment tracking to compare models in a transfer learning project"
+link="https://meetup.com"
+image="../uploads/images/2021-09-14/pretrained-models.png"/>
+
+## Learning Opportunities
+
+Our August Meetup video is out, so if you weren't able to make it, you can catch
+all the details on [Antoine Toubhan's](https://twitter.com/AntoineToubhans)
+tutorial on
+[DVC + Streamlit = ❤️](https://www.sicara.ai/blog/dvc-streamlit-webui-ml)
+
+https://youtu.be/F318uN01v7M
+
+## Open Positions
+
+We'll be introducing some new team member next month, but we are still hiring.
+So do checkout our open positions
+[here](https://www.notion.so/iterative/iterative-ai-is-hiring-852cb978129645e1906e2c9a878a4d22)
+to find details of all the positions including:
+
+- Senior Front-End Engineer (TypeScript, Node, React)
+- Senior Software Engineer (ML, Dev Tools, Python)
+- Senior Software Engineer (ML, Data Infra, GoLang)
+- Machine Learning Engineer/Field Data Scientist
+- Developer Advocate (ML)
+- Director/VP of Engineering (ML, DevTools)
+- Director/VP of Product (ML, Data Infra, SaaS)
+- Director/VP of Operations/Chief of Staff
+
+Please pass this info on to anyone you know that may fit the bill. We look
+forward to new team members! 🎉
+
+## Tweet Love ❤️
+
+Last week this Tweet brought us another 300 Twitter followers, catapulting us
+over 3000! Thanks Community for joining us on this MLOps ride! More to come! 🚀
+
+https://twitter.com/DynamicWebPaige/status/1435256826375720964
+
+---
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2021-09-24-refactor.md b/content/blogs/2021-09-24-refactor.md
new file mode 100644
index 0000000000..9ae14cad0e
--- /dev/null
+++ b/content/blogs/2021-09-24-refactor.md
@@ -0,0 +1,156 @@
+---
+title: Easy Stuctural Refactors to Python Source Code
+date: 2021-09-24
+description: >
+  Simple, hassle-free, dependency-free, AST based source code refactoring
+  toolkit.
+descriptionLong: >
+  [Refactor](https://github.com/isidentical/refactor) is a source code
+  refactoring engine. By taking advantage of the Python standard library's
+  [AST](https://docs.python.org/3/library/ast.html) module, we can find-and-fix
+  patterns in massive codebases.
+picture: 2021-09-24/refactor.png
+author: batuhan_taskaya
+commentsUrl: https://discuss.dvc.org/t/easy-structural-refactors-to-python-source-code/895
+tags:
+  - Refactor
+  - Release
+  - Python
+  - AST
+  - Open-source
+  - Engineering
+---
+
+It is common for big codebases to grow to a complexity where it is nearly
+impossible for someone to tediously and flawlessly refactor things manually
+everywhere. The main problem with existing automated solutions (such as
+regex-based find-and-replace tools) is that they treat source code like a plain
+text document. This often results in false positives (tools making changes when
+they shouldn't) and/or false negatives (not changing what they should). This is
+primarily due to a lack of ability to truly encapsulate structural concepts of
+the programming language: syntax and grammar that are impossible to manifest in
+regexes.
+
+This is where [AST](https://en.wikipedia.org/wiki/Abstract_syntax_tree)s shine.
+They are the common building blocks of source code; produced by a parser that
+actually understands the language's syntax and creates a tree object where
+smaller parts (e.g. tokens) are ordered in a way that they are related by their
+syntactical meanings.
+
+```python
+password = input("password? ")
+if password == secrets.get("my_password"):
+    print("correct")
+else:
+    print("incorrect")
+```
+
+For example, the AST for the code above will look like this:
+
+![Fundamentals of MLOps](../uploads/images/2021-09-24/ast.png) _Abstract Syntax
+Tree_
+
+The top-most "root" node of this tree represents a single Python file. Each file
+consists of a number of statements (e.g. function definitions, loops, etc.). For
+our example we have only 2 statements: an assignment (to `password`), and an
+`if` statement. Each of these statements in turn has child nodes as defined by
+[Python's ASDL](https://docs.python.org/3/library/ast.html#abstract-grammar).
+
+## Refactoring source code through ASTs
+
+[Refactor](https://github.com/isidentical/refactor) simplifies the process of
+matching ASTs. It then applies your transformations to these ASTs without
+touching the other parts of your source code.
+
+For example, consider this code:
+
+```python
+foo = [
+    1,
+    2
+]
+
+foo_2 = ['a', *foo]
+
+if foo[0] >= 1:
+    assert secrets.get("foo") == foo
+```
+
+As a simple example, let's try to find and replace all instances of the `foo`
+variable with `bar`... but without changing things inside strings or partial
+matches like `foo_2`.
+
+```python
+import ast
+import refactor
+```
+
+The first thing we need to do is define a rule. Each rule is a class that
+defines a single entrypoint (`match())`), takes AST nodes from the tree, and
+either rejects them (via raising an `AssertionError` or just returning `None`)
+or accepts them (via returning a `refactor.Action`).
+
+```python
+class ReplaceFoo(refactor.Rule):
+
+    def match(self, node):
+```
+
+Next, in the `match()` method, we will look for all `Name`s (which is what the
+actual identifier is wrapped in), and check whether its `id` is `foo`.
+
+```python
+        assert isinstance(node, ast.Name)
+        assert node.id == "foo"
+```
+
+If any of these assertions fail, the function will terminate and the engine will
+move to the next `node` in the tree. But if we have a match, we need to return
+some sort of an action. The simplest thing we can return is a
+`refactor.ReplacementAction` which takes this `node` and replaces it with the
+given argument.
+
+```python
+        return refactor.ReplacementAction(
+            node,
+            ast.Name("bar", node.ctx)
+        )
+```
+
+And that's it! To run this refactoring, we can simply create a CLI application
+from our rules via `refactor.run()`:
+
+```python
+if __name__ == "__main__":
+    refactor.run(rules=[ReplaceFoo])
+```
+
+If we run it on the file above, we will get this `diff`:
+
+```diff
+@@ -1,9 +1,9 @@
+-foo = [
++bar = [
+     1,
+     2
+ ]
+
+-foo_2 = ['a', *foo]
++foo_2 = ['a', *bar]
+
+-if foo[0] >= 1:
+-    assert secrets.get("foo") == foo
++if bar[0] >= 1:
++    assert secrets.get("foo") == bar
+```
+
+All instances of the `foo` variable have been replaced, but items like `foo_2`
+and `"foo"` are left alone as expected!
+
+## Going Deeper
+
+Obviously not all refactorings are as simple as this, so `refactor` is equipped
+with more features like different actions, observers and representatives for
+context manager. If you are curious about these and more advanced features, be
+sure to check out the
+[`refactor` documentation](https://refactor.readthedocs.io/en/latest)!
diff --git a/content/blogs/2021-09-30-september-21-community-gems.md b/content/blogs/2021-09-30-september-21-community-gems.md
new file mode 100644
index 0000000000..8b4e320658
--- /dev/null
+++ b/content/blogs/2021-09-30-september-21-community-gems.md
@@ -0,0 +1,115 @@
+---
+title: September '21 Community Gems
+date: 2021-09-30
+description: >
+  A roundup of technical Q&A's from the DVC and CML communities. This month:
+  data registries, working with DVC remotes, queued experiments, and more.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC and CML community. This month: data
+  registries, working with DVC remotes, queued experiments, and more.
+picture: 2021-09-30/gems-cover.png
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/september-21-community-gems/871
+tags:
+  - Data Registry
+  - DVC Remotes
+  - Queued Experiments
+  - Pipelines
+  - Git
+  - Community Gems
+---
+
+### [Is there a way to share data across multiple on-premise machines so that users can train models individually?](https://discord.com/channels/485586884165107732/563406153334128681/879718738163826698)
+
+This is a good scenario to try out one of these use cases:
+
+- [Configuring a DVC cache](https://dvc.org/doc/user-guide/how-to/share-a-dvc-cache)
+- [Sharing a development server](https://dvc.org/doc/use-cases/fast-data-caching-hub)
+
+You can have a single storage location mounted on each workstation to serve as a
+central cache.
+
+That way all of your machine learning engineers can work with the same data in a
+central location.
+
+Thanks for the question @fchpriani!
+
+### [If we change the remote we are using in our workspace, does that effect where DVC pulls and pushes data to for all historical commits?](https://discord.com/channels/485586884165107732/563406153334128681/882951655979622400)
+
+Thanks for bringing this up @mattlbeck!
+
+Right now DVC just uses whichever remote is configured in a respective commit
+that you've checked out.
+
+To clarify things a bit more, if you run `dvc push/pull` in a workspace with a
+new remote, that new remote will be used for `--all-branches`, `--all-tags`, and
+`--all-commits`.
+
+### [Is there a command to execute only a few specific stages in a DVC pipeline?](https://discord.com/channels/485586884165107732/485596304961962003/888054401640562698)
+
+You can freeze the stages that you do not want to be executed.
+
+`dvc freeze` and `dvc unfreeze` help you do this. Or you can use
+`dvc repro --glob pattern*` together with `-s` to match the stages you want to
+run.
+
+Thanks for the question @LucZ!
+
+### [When running queued experiments, is it expected for DVC to run `dvc checkout` for each experiment?](https://discord.com/channels/485586884165107732/485596304961962003/883144885417431081)
+
+This brings up a good point, so thanks @dmh!
+
+If you usually run experiments with `dvc repro`, you'll notice that it doesn't
+checkout any files. That's because the experiment is running in the current
+workspace.
+
+When you use `dvc exp run --queue` or `dvc exp run --run-all`, it runs each
+experiment in its own separate temp workspace, so files have to be checked out
+into those workspaces. Check out the notes in
+[this reference doc on queueing and parallel execution](https://dvc.org/doc/command-reference/exp/run#queueing-and-parallel-execution)
+for more details.
+
+### [When working with a data registry, is it possible to pull a specific project folder, modify it, then push Git changes and `dvc push` to the remote storage without pulling data from all the directories?](https://discord.com/channels/485586884165107732/485596304961962003/887427010044002345)
+
+This is definitely possible. The most common way to handle this is by working in
+the specific folder. You can `dvc pull -R` from the sub-directory, then make
+your changes in the sub-directory, and `dvc add` the changes. Then you can do a
+`git commit` and `dvc push` to manage those changes.
+
+You can also use a Git sub-repo and a DVC sub-repo to do this if each folder has
+a distinct project. Use `git init` and `dvc init` in the project folders and
+then you can pull them down, modify, commit and push commit back.
+
+Really good question @ross.tsenov!
+
+### [Is it possible to auto-generate reports with metrics and plots by running DVC in a CML job when the data is stored in AWS bucket instead of GitHub?](https://discord.com/channels/485586884165107732/728693131557732403/877072469188575262)
+
+Thanks for asking @Masmoudi!
+
+When you need to retrieve data, you can run `dvc pull` to get it from the S3
+bucket. If you run into an error with this, try adding
+`uses: iterative/setup-dvc@v1` to the `dvc pull` command. This could happen
+because the default CML action doesn't install DVC.
+
+If you want more details on how CML works in GitHub, check out
+[the docs](https://cml.dev/doc/start/github#the-cml-github-action)!
+
+### [What mechanism can I use in GitLab to trigger a CI pipeline periodically so that models get re-trained and logged to DVC automatically?](https://discord.com/channels/485586884165107732/728693131557732403/887306645883990037)
+
+You can use
+[pipeline schedules](https://docs.gitlab.com/ee/ci/pipelines/schedules.html) to
+train your model periodically and `dvc push` the results.
+
+Good question @mihaj!
+
+---
+
+![Its Over GIF](https://media.giphy.com/media/8UF0EXzsc0Ckg/giphy.gif)
+
+At our October Office Hours Meetup we will be going over how to get started with
+data version control.
+[RSVP for the Meetup here](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/280814318/)
+to stay up to date with specifics as we get closer to the event!
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to get all your DVC and
+CML questions answered!
diff --git a/content/blogs/2021-10-05-adding-data-to-build-a-more-generic-model.md b/content/blogs/2021-10-05-adding-data-to-build-a-more-generic-model.md
new file mode 100644
index 0000000000..b6e16d3c4f
--- /dev/null
+++ b/content/blogs/2021-10-05-adding-data-to-build-a-more-generic-model.md
@@ -0,0 +1,263 @@
+---
+title: Adding Data to Build a More Generic Model
+date: 2021-10-05
+description: >
+  You can easily make changes to your dataset using DVC to handle data
+  versioning. This will let you extend your models to handle more generic data.
+descriptionLong: >
+  When you have an existing model trained for one problem, you might want to
+  extend it to handle other problems. When you have data versioning, it's easier
+  to see which data additions make your model better or worse and then you can
+  see where to make improvements.
+picture: 2021-10-05/cats-and-dogs.png
+pictureComment: Adding more data to your dataset for a more generic model
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/extending-models-with-more-data/881
+tags:
+  - MLOps
+  - DVC
+  - Git
+  - Experiments
+  - Data Versioning
+  - Tutorial
+---
+
+## Intro
+
+You might be in the middle of training a model and then the business problem
+shifts. Now you have this model that has been going through the training process
+with a specific dataset and you need to make the model more generic.
+
+There's likely something that your model learned that can be useful on this new
+dataset, so you might not have to restart the entire training process. We'll do
+an example of updating a pre-trained model to use a broader dataset with DVC. By
+the end of this, you should see how you can handle this quickly and start
+running new experiments to get a more generic model.
+
+## The original pre-trained model
+
+For this post, we'll be making a more generic image classifier by taking the
+original dataset with bees and ants and adding cats and dogs to it. You can
+clone [this GitHub repo](https://github.com/iterative/pretrained-model-demo) to
+get the current bees and ants model and check out
+[this post](https://dvc.org/blog/transfer-learning-experiments) on how we
+experimented with both AlexNet and SqueezeNet to build this model.
+
+So we're starting from our current bees and ants model and extending it to
+classify dogs and cats as well. We'll start by adding some cats and dogs data to
+our validation data and do some experiments with the current model to see how it
+performs on generic data.
+
+Then we'll add the cats and dogs data to the training data and watch how the
+model improves as we run experiments.
+
+## Updating the dataset with DVC
+
+To add the new cats and dogs dataset to the project, we'll use this DVC command.
+
+```dvc
+$ dvc get https://github.com/iterative/dataset-registry blog/cats-dogs
+```
+
+This downloads a sample dataset with images of cats and dogs. You can use this
+command to download files or directories that are tracked by DVC or Git. This
+command can be used from anywhere in the file system, as long as DVC is
+installed.
+
+This will make a new directory called `./cats-dogs/data/` that was downloaded
+from the DVC remote and it has images for cats and dogs. Now we can slowly add
+in the new data to the existing data.
+
+We'll start by moving the `val` data for `cats` and `dogs` from the
+`/cats-dogs/data/` directory to the corresponding directory in
+`data/hymenoptera_data`.
+
+_Just a quick note, cats and dogs don't really belong in the `hymenoptera`
+directory since that's specific to ants and bees, but it's the easiest and
+fastest way to add the data for this tutorial._
+
+With this new data in place, we can start training our model.
+
+## Running new experiments with generic data
+
+With the updated data, let's run an experiment on the model and see how good the
+results are. To run a new experiment, open your terminal and make sure you have
+a virtual environment enabled. Then run this command:
+
+```dvc
+$ dvc exp run
+```
+
+Once the training epochs are finished, run the following command.
+
+```dvc
+$ dvc exp show --no-timestamp \
+--include-metrics step,acc,val_acc,loss,val_loss \
+--include-params lr,momentum
+```
+
+The `--no-timestamp` hides the timestamps from table. The `--includes-metrics`
+option lets us choose which metrics we want to show in the table. The
+`--includes-params` option does the same for hyperparameters. This gives us a
+table that's easier to read quickly.
+
+```dvctable
+ ────────────────────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**                metric:**step**       metric:**acc**   metric:**val_acc**      metric:**loss**   metric:**val_loss**   param:**lr**      param:**momentum**
+ ────────────────────────────────────────────────────────────────────────────────────────────
+  **workspace**                    **3**   **0.86885**      **0.46**   **0.31573**     **3.7067**   **0.001**   **0.09**
+  **data-change**                  **-**         **-**         **-**         **-**          **-**   **0.001**   **0.09**
+  │ ╓ 3b3a2a2 [exp-23593]      3   0.86885      0.46   0.31573     3.7067   0.001   0.09
+  │ ╟ 93d015d                  2   0.83197   0.41333   0.36851     3.4259   0.001   0.09
+  │ ╟ d474c42                  1   0.79918   0.43333   0.46612      3.286   0.001   0.09
+  ├─╨ 1582b4b                  0   0.52869      0.39   0.94102     2.5967   0.001   0.09
+ ────────────────────────────────────────────────────────────────────────────────────────────
+```
+
+You'll notice that the validation accuracy is really low. That's because the
+training metrics are based on bees and ants while the validation metrics are
+based on bees, ants, cats, and dogs. If we looked at the validation metrics by
+class, they'd likely be better for bees and ants than cats and dogs.
+
+That means we should probably add more data to the training dataset.
+
+## Adding the cats data to the training dataset
+
+Let's add the `train` data for `cats` to the corresponding directory in
+`data/hymenoptera_data` and go through another experiment run with a different
+learning rate. With this new data, we can run another experiment. One important
+thing to note here is that we're using checkpoints in our experiments. That's
+how we get the metrics for each training epoch.
+
+If we want to run a fresh experiment that doesn't resume training from the last
+epoch, we need to reset our experiment. That's what we're going to do with this
+command.
+
+```dvc
+$ dvc exp run --reset
+```
+
+This will reset all of the existing checkpoints and excute the training script.
+Once it's finished, let's take a look at the metrics table with this command.
+It's the same as the one we ran last time.
+
+```dvc
+$ dvc exp show --no-timestamp \
+    --include-metrics step,acc,val_acc,loss,val_loss \
+    --include-params lr,momentum
+```
+
+Now you'll have a table that shows both experiments and you can see how much
+better the new one did with the `cats` data added.
+
+```dvctable
+ ────────────────────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**                metric:**step**       metric:**acc**   metric:**val_acc**      metric:**loss**   metric:**val_loss**   param:**lr**      param:**momentum**
+ ────────────────────────────────────────────────────────────────────────────────────────────
+  **workspace**                    **3**   **0.91389**      **0.87**   **0.20506**    **0.66306**   **0.001**   **0.09**
+  **data-change**                  **-**         **-**         **-**         **-**          **-**   **0.001**   **0.09**
+  │ ╓ 9405575 [exp-54e8a]      3   0.91389      0.87   0.20506    0.66306   0.001   0.09
+  │ ╟ 856d80f                  2   0.90215   0.87333   0.27204    0.61631   0.001   0.09
+  │ ╟ 23dc98f                  1   0.87671      0.86   0.35964    0.61713   0.001   0.09
+  ├─╨ 99a3c34                  0   0.71429      0.82   0.67674    0.62798   0.001   0.09
+  │ ╓ 3b3a2a2 [exp-23593]      3   0.86885      0.46   0.31573     3.7067   0.001   0.09
+  │ ╟ 93d015d                  2   0.83197   0.41333   0.36851     3.4259   0.001   0.09
+  │ ╟ d474c42                  1   0.79918   0.43333   0.46612      3.286   0.001   0.09
+  ├─╨ 1582b4b                  0   0.52869      0.39   0.94102     2.5967   0.001   0.09
+ ────────────────────────────────────────────────────────────────────────────────────────────
+```
+
+There's another way you can look at the difference between the model before we
+added the `cats` data and after. If you run this in your terminal, you'll get a
+plot comparing the two experiments.
+
+```dvc
+$ dvc plots diff exp-23593 exp-54e8a
+```
+
+The `exp-23593` and `exp-54e8a` values are the ids for the experiments you want
+to compare. You'll see a new file gets generated in the `dvc_plots` directory in
+your project. That's where you'll find the `index.html` file you should open in
+your browser. You'll see something similar to this.
+
+![plots comparing the accuracy, validation accuracy, loss, and validation loss for all epochs of each experiment](../uploads/images/2021-10-05/with-cats-data.png)
+
+There's a huge difference in the accuracy of our model after we've added this
+additional data. Let's see if we can make it even better by adding the `dogs`
+data.
+
+## Adding the dogs data to the training dataset
+
+We'll add the `train` data for `dogs` to the corresponding directory in
+`data/hymenoptera_data` just like we did for the `cats` data. Now we can run a
+new experiment with all of the new data included. We'll still need to reset the
+experiment like before, so run the following command.
+
+```dvc
+$ dvc exp run --reset
+```
+
+Once the training epochs are finished, we can take one more look at that metrics
+table.
+
+```dvc
+$ dvc exp show --no-timestamp \
+--include-metrics step,acc,val_acc,loss,val_loss \
+--include-params lr,momentum
+```
+
+Now we'll have all three experiments to compare.
+
+```dvctable
+ ────────────────────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**                metric:**step**       metric:**acc**   metric:**val_acc**      metric:**loss**   metric:**val_loss**   param:**lr**      param:**momentum**
+ ────────────────────────────────────────────────────────────────────────────────────────────
+  **workspace**                    **3**    **0.8795**   **0.90667**   **0.29302**    **0.25752**   **0.001**   **0.09**
+  **data-change**                  **-**         **-**         **-**         **-**          **-**   **0.001**   **0.09**
+  │ ╓ c20220f [exp-82f70]      3    0.8795   0.90667   0.29302    0.25752   0.001   0.09
+  │ ╟ fcb5a0b                  2   0.85915   0.92333   0.38274    0.25257   0.001   0.09
+  │ ╟ 3768821                  1   0.80751   0.84667   0.47681    0.40228   0.001   0.09
+  ├─╨ 7e1b8fb                  0   0.64632      0.84   0.87301    0.46744   0.001   0.09
+  │ ╓ 9405575 [exp-54e8a]      3   0.91389      0.87   0.20506    0.66306   0.001   0.09
+  │ ╟ 856d80f                  2   0.90215   0.87333   0.27204    0.61631   0.001   0.09
+  │ ╟ 23dc98f                  1   0.87671      0.86   0.35964    0.61713   0.001   0.09
+  ├─╨ 99a3c34                  0   0.71429      0.82   0.67674    0.62798   0.001   0.09
+  │ ╓ 3b3a2a2 [exp-23593]      3   0.86885      0.46   0.31573     3.7067   0.001   0.09
+  │ ╟ 93d015d                  2   0.83197   0.41333   0.36851     3.4259   0.001   0.09
+  │ ╟ d474c42                  1   0.79918   0.43333   0.46612      3.286   0.001   0.09
+  ├─╨ 1582b4b                  0   0.52869      0.39   0.94102     2.5967   0.001   0.09
+ ────────────────────────────────────────────────────────────────────────────────────────────
+```
+
+These results make sense for the experiments we've run. We're paying attention
+to the validation accuracy here because this gives us a fair comparison of
+what's happening as we add more data.
+
+The first experiment's training metrics are for bees and ants. The second
+experiment's training metrics are for bees, ants, and cats. And the third
+experiment's training metrics are for all four classes. So we can't really
+compare these metrics.
+
+We can look at a comparison between the experiments with the `cats` data and
+both the `cats` and `dogs` data.
+
+```dvc
+$ dvc plots diff exp-23593 exp-54e8a exp-82f70
+```
+
+![plot of differences between model with just cats data and model with both cats and dogs data](../uploads/images/2021-10-05/with-cats-and-dogs-data.png)
+
+The results you see line up with what is expected for the validation metrics
+based on how we added the data to the training set. Now you can keep running
+experiments until you get your model tuned like you need it!
+
+## Conclusion
+
+When you want to change datasets quickly and start tracking how they affect our
+model, using a DVC remote makes it easy to do so on different computers. You'll
+be able to quickly upload and download GBs of data and see how changes affect
+individual experiments.
+
+If you need help with anything DVC or CML, make sure to
+[join our Discord community](https://discord.com/invite/dvwXA2N)! We're always
+answering questions and having good conversations with everybody that shows up.
diff --git a/content/blogs/2021-10-15-october-21-heartbeat.md b/content/blogs/2021-10-15-october-21-heartbeat.md
new file mode 100644
index 0000000000..736c1b0c79
--- /dev/null
+++ b/content/blogs/2021-10-15-october-21-heartbeat.md
@@ -0,0 +1,370 @@
+---
+title: October '21 Heartbeat
+date: 2021-10-15
+description: >
+  Monthly updates are here! The word of the month is workflows! Checkout
+  how  Community members improve their workflows with DVC and CML. Find out news
+  from the team, new learning opportunities, and more!
+descriptionLong: |
+  This month you will find:
+
+    🗺 MLOps workflows,
+
+    🤔 Lots of ways to learn,
+
+    🎥 Meetup and Conference videos,
+
+    📖 Docs updates,
+
+    🚀 Info on our growing team, and more!
+picture: 2021-10-15/october21cover.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/october-21-heartbeat/916
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - MLOps Community
+  - PyTorch Lightning
+  - DAGsHub
+  - shtab
+---
+
+# From the Community
+
+This month we have been flooded with content from our Community. We are grateful
+and inspired to keep serving you!
+
+![Thank you!](https://media.giphy.com/media/xUA7aN1MTCZx97V1Ic/giphy.gif)
+
+## Ricardo Manhães Savii: Trying to turn Machine Learning into value
+
+If we can't turn machine learning into value, what good are we?
+[**Ricardo Manhães Savii**](https://www.linkedin.com/in/ricardoms/)
+[wrote a piece in Medium](https://medium.com/@ricardosavii/trying-to-turn-machine-learning-into-value-de9f28cde056)
+where he tackles how to technically and visually define the steps to deliver an
+Intelligent System with the same level of best practice maturity that software
+development has today. He combines and synthesizes the ideas of some of the best
+known thinkers in the space to build a thorough architecture of machine learning
+best practices. You won't want to miss this post and wrap your head around these
+diagrams!
+
+![CI/CD for Machine Learning](../uploads/images/2021-10-15/manhaes.png) _Ricardo
+Manhães Savii's Addendum to François
+Chollet's](https://medium.com/@francois.chollet) figure on result of machine
+learning
+([Source link](https://medium.com/@ricardosavii/trying-to-turn-machine-learning-into-value-de9f28cde056))_
+
+## RappiBank: How to build an efficient machine learning project workflow
+
+Continuing the theme of ML workflow Complexity,
+[**Daniel Baena**](https://www.linkedin.com/in/data-box-science/) wrote a
+[great overview and tutorial piece](https://medium.com/rappibank/how-to-build-an-efficient-machine-learning-project-workflow-using-data-version-control-dvc-aaeaa9cfb79b)
+outlining the challenges that his team at
+[RappiBank](https://bank.rappi.com.br/) encountered and found ways to solve with
+DVC including:
+
+- confusing experiment files with different names
+- disjointed messaging about training and models and dataset changes
+- holding in your head or own notes progress that is not visible to the rest of
+  the team
+- heavy run and re-run times without a modularized system
+
+Daniel shows how all of these things can be solved using DVC.🏆
+
+<external-link
+href="https://medium.com/rappibank/how-to-build-an-efficient-machine-learning-project-workflow-using-data-version-control-dvc-aaeaa9cfb79b"
+title="How to Build an Efficient Machine Learning Project Workflow Usign Data Version Control (DVC)"
+description="Daniel Baena's overview of common MLOps challenges encoutered at Rappi Bank and how they are solved with DVC."
+link="https://medium.com"
+image="../uploads/images/2021-10-15/baena.jpeg"/>
+
+## DAGsHub: Production Oriented Work
+
+Next up, [**Nir Barazida**](https://twitter.com/barazida) from
+[DAGsHub](https://dagshub.com/)
+[created a video](https://dagshub.com/docs/workshops/production_oriented_work/?utm_content=bufferef4d6&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer)
+on Production-oriented work using a monorepo strategy and focusing on moving
+from research to production-ready code using Git and DVC. If you are a data
+scientist trying to wrap your head around going from your notebook to
+production, this may help!
+
+<external-link
+href="https://dagshub.com/docs/workshops/production_oriented_work/?utm_content=bufferef4d6&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer"
+title="Production-Oriented Work with Git, DVC and DAGsHub"
+description="Nir Barazida's tutorial and video on who to use a monorepo strategy and go from your notebook to production-ready code."
+link="https://dagshub.com"
+image="../uploads/images/2021-10-15/dagshub.jpg"/>
+
+## ML Data Versioning with DVC: How to Manage Machine Learning Data
+
+[**Piotr Storożenko**](https://www.linkedin.com/in/piotr-storo%C5%BCenko-438087128/)
+of [Appsilon](https://appsilon.com/) wrote
+[a great tutorial](https://appsilon.com/ml-data-versioning-with-dvc/) taking
+into account the many challenges data scientists and ML engineers encounter in
+their data versioning efforts and how DVC solves them. Do these scenarios from
+his article look familiar?
+
+> Was it in `model_3final.pth` or `model_last.pth` that I used a bigger lerning
+> rate?
+>
+> When did I start using data preprocessing, during `model_2a.pth` or
+> `model_2aa.pth`
+>
+> Is `model_7.pth` trained on the new dataset or on the old one?`
+>
+> Oh, gosh, which set of parameters and data have I used to train `model_2.pth`?
+> It was pretty good in the end…”
+
+# Learning Opportunities
+
+## Raviraja Ganta's 10-week course on Basic MLOps
+
+Twitter and LinkedIn were a blaze in the last month when
+[**Raviraja Ganta**](https://www.linkedin.com/in/ravirajag/) announced his
+[10-Week Course](https://www.ravirajag.dev/blog/mlops-summary) on MLOps basics.
+This course is chock full of resoures and practical tutorials to build your
+MLOps platform and knowledge. [Week 3](https://www.ravirajag.dev/blog/mlops-dvc)
+of the course is about DVC and its ability to solve your versioning and
+reproducibility challenges. Be sure to check out
+[the course repo](https://github.com/graviraja/MLOps-Basics) as well!
+
+[**MLOps Community**](https://mlops.community/) is hosting him to speak about
+his course on October 20th.
+[Sign up to attend here!](https://airtable.com/shrh5eGdEbcBsdEdq)
+
+![Raviraja Ganta's 10-Week MLOps Course](../uploads/images/2021-10-15/ganta.png)
+_Raviraja Ganta's 10-Week Course on MLOps Basics
+([Source link](https://www.ravirajag.dev/blog/mlops-summary))_
+
+## Josh Willis video on COVID simulations with DVC
+
+This week,
+[this Tweet comment](https://twitter.com/josh_wills/status/1441456258746249216)
+led me to
+[this work](https://mlconf.com/sessions/the-covid-scenario-pipeline-high-stakes-data-science/)
+by [**Josh Wills.**](https://twitter.com/josh_wills) Josh was tapped by
+[**DJ Patil**](https://twitter.com/dpatil) to participate in some COVID
+simulation research early on in the pandemic in which he used DVC. In his
+presentation about the project, he tells of the tools he used and challenges of
+the use case. Nice DVC shout out at 19:56! Ah, the fruits of a Twitter 🐇🕳!
+
+https://www.youtube.com/watch?v=tu7N8M-jwPU&t=10s
+
+## September Office Hours Video: Transfer Learning with Milecia McGregor
+
+If you missed last month's Office Hours
+[Meetup](https://www.meetup.com/DVC-Community-Virtual-Meetups/), you can now
+catch the video! [**Milecia's**](https://twitter.com/FlippedCoding) presentation
+was based on [her blog post](https://dvc.org/blog/transfer-learning-experiments)
+on the same topic: Using Experiments for Transfer Learning. If you're curious
+about transfer learning in general, AlexNet and SqueezeNet in particular, or
+using DVC experiments and checkpoints to track all that you do, this video's for
+you!
+
+https://www.youtube.com/watch?v=RmJbyQ36zVk
+
+## Quoc-Tien Au: Continuously Learning on the Job as a Data Scientist
+
+[This Towards Data Science](https://towardsdatascience.com/the-what-where-and-how-about-continuously-learning-on-the-job-as-a-data-scientist-b0a31ea4ac48)
+article by [**Quoc-Tien Au**](https://www.linkedin.com/in/quoctienau/) entitled
+"The What, Where, and How about continuously learning on the job as a data
+scientist," speaks to some higher points on the need to have a mindset for
+continuous learning in the Data Science field. It's packed with great thought
+processes and resources on what to learn, where to learn, and how to keep
+learning while still getting your work done. Who stuggles with this? 😅
+
+![Thats Me I Am GIF by Ryn Dean](https://media.giphy.com/media/icJCVO3GPDbCvvfgpf/giphy.gif)
+
+# DVC News
+
+## Amsterdam Off-site
+
+Most of our team members from Europe got together in Amsterdam recently for a
+couple days of brainstorming and team bonding. They went on a Treasure Hunt, ate
+Ramen (a favorite among our team) and had great discussions on how to make our
+tools and our team even better! Pictured below from front of the room left,
+going clockwise (to the back of the room and back up) are David Ortega, Helio
+Machado, David de la Iglesia Castro, Laurens Duijvesteijn, Ruslan Kupriev
+(hidden), Dmitry Petrov, Jelle Bouwman, Batuhan Taskaya,Svetlana Sachkovskaya,
+and Paweł Redzyński.
+
+Be sure to check out this section next month as our Americas team members will
+meet in San Francisco!
+
+![Europe Iterative Team Members meet in Amsterdam](../uploads/images/2021-10-15/amsterdam.jpg)
+_Iterative Team Members meet in Amsterdam
+([Source: David Ortega](https://www.linkedin.com/in/gortegadavid/)))_
+
+## New Team Members
+
+[**Jordan Weber**](https://www.linkedin.com/in/jordanwweber/) joins us from Los
+Angeles, California as our new Chief of Staff. She has previously held similar
+roles at venture captial and FinTech firms. In Jordan's free time she enjoys
+cooking, tennis, dance, and hiking! 🎾
+
+[**Ken Thom**](https://www.linkedin.com/in/kenthom/) joins us from Palo Alto,
+California as our new Director of Operations. His past work includes business
+operations, product management, software and hardware development. In his spare
+time he likes to spend time with his family, swim, ski, and hike! 🥾
+
+[**Jon Burdo**](https://www.linkedin.com/in/jon-burdo-59730a83/) joins us from
+Boston, Massachusetts as a Senior Software engineer. He's been working for the
+past few years as a machine learnng engineer with a focus on NLP. In his last
+role he used DVC and loved it, which is how he eventually ended up here! 🎉 In
+his spare time, Jon likes learning about open source software, tinkering with
+Linux, and inline skating.
+
+[**Stephanie Roy**](https://www.linkedin.com/in/stephroy1/) joins the team as a
+Senior Software Engineer from Quebec, Canada. Our first Canadian team member!
+She has previously worked at LogMeln on one of their mobile apps. In her spare
+time she likes taking care of her plants in her indoor grow house, playing
+roller derby, and discovering new things to watch, listen to and eat! 😋
+
+Welcome to all our new team members! We are so glad you are here! 🙌🏼
+
+## Open Positions
+
+And wouldn't you know it? We're still hiring!
+[Use this link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22)
+to find details of all the positions including:
+
+- Senior Software Engineer (ML, Labeling, Python)
+- Senior Software Engineer (ML, Labeling, Python)
+- Senior Software Engineer (ML, DevTools, Python)
+- Field Data Scientist / Sales Engineer
+- Developer Advocate (ML)
+- Director / VP of Engineering (ML, DevTools)
+- Director / VP of Product (ML, Data Infra, SaaS)
+- Head of Talent
+- Head of DevRel
+
+Please pass this info on to anyone you know that may fit the bill. We look
+forward to new team members! 🎉
+
+![High Five Amy Poehler GIF](https://media.giphy.com/media/120jXUxrHF5QJ2/giphy.gif)
+
+## Docs Updates
+
+Here are a few important docs updates you may want to take a look at this month!
+
+### 📖 PyTorch Lightning
+
+We all have
+[**Ilia Sirotkin**](https://www.linkedin.com/search/results/all/?keywords=ilia%20sirotkin&origin=RICH_QUERY_SUGGESTION&position=0&searchId=e7bb3154-797a-44a5-a209-90ffece95246&sid=GeC)
+to thank for his contribution to our docs. He created the
+[PyTorch Lightning integration docs](https://dvc.org/doc/dvclive/api-reference/ml-frameworks/pytorch-lightning)
+for all to use!
+
+### 📖 CML with DVC guide:
+
+[Our updated CML with DVC Guide](https://cml.dev/doc/cml-with-dvc) provides
+updated code and streamlined information on Cloud Storage Provider credentials
+and GitHub Actions set up.
+
+```yaml
+name: CML & DVC
+on: [push]
+jobs:
+  run:
+    runs-on: ubuntu-latest
+    container: docker://ghcr.io/iterative/cml:0-dvc2-base1
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+      - name: Train model
+        env:
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+        run: |
+          pip install -r requirements.txt  # Install dependencies
+          dvc pull data --run-cache        # Pull data & run-cache from S3
+          dvc repro                        # Reproduce pipeline
+      - name: Create CML report
+        env:
+          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          echo "## Metrics" >> report.md
+          dvc metrics diff master --show-md >> report.md
+
+          # Publish confusion matrix diff
+          echo "## Plots" >> report.md
+          echo "### Class confusions" >> report.md
+          dvc plots diff \
+            --target classes.csv \
+            --template confusion \
+            -x actual \
+            -y predicted \
+            --show-vega master > vega.json
+          vl2png vega.json -s 1.5 > plot.png
+          cml publish --md plot.png >> report.md
+
+          # Publish regularization function diff
+          echo "### Effects of regularization" >> report.md
+          dvc plots diff \
+            --target estimators.csv \
+            -x Regularization \
+            --show-vega master > vega.json
+          vl2png vega.json -s 1.5 > plot.png
+          cml publish --md plot.png >> report.md
+
+          cml send-comment report.md
+```
+
+### 📖 Shtab
+
+Team member [**Casper da Costa-Luis**](https://cdcl.ml) has
+[created a docs website](https://docs.iterative.ai/shtab/) for his python tab-
+completion script generator project [shtab](https://github.com/iterative/shtab).
+For more info checkout
+[the original blog post](https://dvc.org/blog/shtab-completion-release) about it
+as well.
+
+## Next Meetups
+
+For the second class of
+[DVC Learn,](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/280814336/)
+join us to learn about getting started running experiments! This lesson will
+include information on how to use our
+[checkpoints](https://dvc.org/doc/user-guide/experiment-management/checkpoints)
+feature as well. We look forward to seeing you there!
+
+<external-link
+href="https://www.meetup.com/DVC-Community-Virtual-Meetups/events/280814336/"
+title="DVC Learn - Getting Started with Running Experiments"
+description="Milecia McGregor shows us how to get started with DVC Experiments and Checkpoints"
+link="https://meetup.com"
+image="../uploads/images/2021-10-15/dvc_learn.png"/>
+
+Be sure to join us at the
+[November Office Hours Meetup,](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/281355245/)
+where [**Maykon Shots**](https://www.linkedin.com/in/maykon-schots/) will talk
+about how he used DVC and CML to create an internal Kaggle competition for his
+team to arrive at their best models in their work for the largest bank in
+Brazil.
+
+<external-link
+href="https://www.meetup.com/DVC-Community-Virtual-Meetups/events/281355245//"
+title="DVC Office Hours - Creating an Internal Kaggle Competition with DVC and CML"
+description="Maykon Shots shows us how he used DVC and CML to create an internal Kaggle competition for his team"
+link="https://meetup.com"
+image="../uploads/images/2021-10-15/office-hours-meetup.png"/>
+
+## Tweet Love ❤️
+
+This month, it was exceedingly hard to pick just one Tweet. I'm leaving you with
+one that ballooned our followers over the last month. But there have been many!
+I encourage you to visit our newly created
+[_Wall of Love ❤️_](https://testimonial.to/iterative-open-source-community-shout-outs/all)
+to see all the beautiful Iterative tool love. 🛠❤️🤗
+
+https://twitter.com/DynamicWebPaige/status/1435256826375720964?s=20
+
+---
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2021-10-28-october-21-community-gems.md b/content/blogs/2021-10-28-october-21-community-gems.md
new file mode 100644
index 0000000000..be1c66d73f
--- /dev/null
+++ b/content/blogs/2021-10-28-october-21-community-gems.md
@@ -0,0 +1,259 @@
+---
+title: October '21 Community Gems
+date: 2021-10-28
+description: >
+  A roundup of technical Q&A's from the DVC community. This month: DVC stages,
+  working with outputs, DVC API, and more.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month: DVC stages,
+  working with outputs, DVC API, and more.
+picture: 2021-10-28/oct-community-gems.png
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/october-21-community-gems/931
+tags:
+  - Data Versioning
+  - DVC API
+  - Pipelines
+  - Community Gems
+---
+
+### [Is there a command to force reproduce a specific stage of a DVC pipeline?](https://discord.com/channels/485586884165107732/563406153334128681/893056918699008000)
+
+Good question @wickeat!
+
+You can use `dvc repro -f <stage_name>`, although this will reproduce the
+earlier dependency stages in the pipeline up to that point. If you only want to
+reproduce a single target stage, you can add `-s/--single-item` to the
+`dvc repro` command.
+
+### [How do you manage a `dvc.yaml` file for a project that's going to be a big, sparse DAG?](https://discord.com/channels/485586884165107732/563406153334128681/893487527749623859)
+
+This an awesome use case from @Ian!
+
+Let's say we have this scenario:
+
+- A new data set is delivered to you every day
+- It needs to be featurized (does not depend on previous days' data)
+- Subsequent stage depends on all days
+
+The recommended approach is to keep all of the previous days and use the
+`foreach` syntax, which ensures your DAG still knows about all the previously
+processed days:
+
+```yaml
+stages:
+  featurize:
+    foreach:
+      - 20210101
+      - 20210102
+      - 20210103
+    do:
+      cmd: python featurize.py ${item}
+      deps:
+        - raw/${item}.csv
+      outs:
+        - intermediate/${item}.csv
+  combine:
+    cmd: python combine.py
+    deps:
+      - intermediate
+    outs:
+      - combined.csv
+```
+
+That way if you adjusted something in your featurize script, for example, it
+would automatically reprocess every day's data.
+
+### [What is the best practice for capturing and saving `stdout`?](https://discord.com/channels/485586884165107732/563406153334128681/893903023355613214)
+
+The best practice when using DVC is to pipe each command `stdout` into a
+different file with a unique name, like a timestamp, in a directory that becomes
+the stage output.
+
+If optimizing storage space is a concern, in case the `stdout` dumps grow a lot,
+this is what we recommend.
+
+Here's an example of what that might look like if you're using a tool like
+`tee`.
+
+```yaml
+train:
+  cmd: python src/train.py data/features model.pkl | tee -a 20211021_model.pkl
+  deps:
+    - data/features
+    - src/train.py
+  params:
+    - train.min_split
+    - train.n_est
+    - train.seed
+  outs:
+    - models/20211026_model.pkl
+```
+
+This will output the `stdout` from the train stage in the terminal and also save
+it in a new file with the timestamp as part of the title.
+
+That was a helpful question. Thanks @gregk0!
+
+### [There is a file in our pipeline that needs to be manually modified and then used as the input to other stages in the pipeline. What would be the best approach for this with DVC?](https://discord.com/channels/485586884165107732/563406153334128681/894577842363445308)
+
+This is another great use case. Thanks @omarelb!
+
+Let's say that you have a process similar to this.
+
+- Run the first stage of the pipeline, for example a stage called `cleaning`
+- Inspect its output, `lexicon.txt`, and modify it if necessary
+- The modified version of `lexicon.txt` is then cached and used as input to
+  following stages of the pipeline
+
+You can copy the output and modify and commit it in the copied location so the
+first stage and its output are separate from the modified file and subsequent
+stages.
+
+If you want to link the first stage to the rest of the pipeline, you could have
+your 2nd stage be something like:
+
+```yaml
+manual:
+  cmd: |
+    # To generate lexicon_modified.txt:
+    # 1. Run `cp lexicon.txt lexicon_modified.txt`.
+    # 2. Check and modify lexicon_modified.txt.
+    # 3. Run `dvc commit manual`.
+  deps:
+    - lexicon.txt
+  outs:
+    - lexicon_modified.txt
+```
+
+To clarify, if you put that `manual` stage into your `dvc.yaml`, it should
+connect the whole pipeline. Each time you run `dvc repro` and the first stage
+generates a new `lexicon.txt`, you will get
+`ERROR: failed to reproduce 'dvc.yaml': output 'lexicon_modified.txt' does not exist`
+because the manual stage doesn't generate the expected output.
+
+You can then manually copy, modify, and commit your new `lexicon_modified.txt`
+and run `dvc repro` again to run the rest of the pipeline.
+
+### [What is the workflow if I want to remove some files from my dataset registry with DVC?](https://discord.com/channels/485586884165107732/485596304961962003/895192983366942740)
+
+In this case, assume that the data was added as a folder containing images,
+which means that there is a single `.dvc` for the whole folder. You don't need
+to remove the `.dvc` file that's tracking the data in that folder.
+
+You can delete the files you want to remove and then re-add the folder using
+`dvc commit`. Here's what an example of what that flow might look like.
+
+- You `git clone` your data registry.
+- Then `dvc pull` your data.
+- Delete the files you want to remove.
+- Run `dvc commit` and `git commit` to save your changes.
+
+It should be faster to commit, as DVC won't re-add the files to the cache nor
+will it try to hash them.
+
+Good question @MadsO!
+
+### [We want to access a private Git repo using `dvc.api.read()` in a Docker container. How do I pass the credentials to DVC so that we can read DVC files from this repo?](https://discord.com/channels/485586884165107732/485596304961962003/894533078389784577)
+
+Great question about the API @dashmote!
+
+There are a couple different ways to handle this.
+
+The first option is to use SSH. You'll need to pass GitHub SSH keys into your
+Docker container and use the `git@github.com:username/repo.git` URL format when
+you call the API method.
+
+The other option is to use HTTP. You need to use the
+`https://username:token@github.com/username/repo.git` URL format when you call
+the API method.
+
+You could pass your credentials into your container as environment variables and
+then do something like:
+
+```python
+username = os.environ["GITHUB_USERNAME"]
+token = os.environ["GITHUB_TOKEN"]
+dvc.api.read(..., repo=f"https://{username}:{token}/...", ...)
+```
+
+### [Is there a clean way to handle multiple models in the same repo that are trained using the same pipeline?](https://discord.com/channels/485586884165107732/563406153334128681/895368479853649930)
+
+Let's say your project looks something like this:
+
+```dvc
+├── data
+│   ├── customer_1
+│   │   ├── input_data.txt
+│   │   ├── input_params.yaml
+│   │   └── output
+│   │       └── model.pkl
+│   └── customer_2
+│       ├── input_data.txt
+│       ├── input_params.yaml
+│       └── output
+│           └── model.pkl
+├── dvc.lock
+├── dvc.yaml
+└── train_model.py
+```
+
+The simplest way is to copy the `dvc.yaml` into each model's separate directory,
+like this:
+
+```dvc
+├── data
+│   ├── customer_1
+│   │   ├── input_data.txt
+│   │   ├── input_params.yaml
+│   │   ├── dvc.yaml
+│   │   ├── dvc.lock
+│   │   └── output
+│   │       └── model.pkl
+│   └── customer_2
+│       ├── input_data.txt
+│       ├── input_params.yaml
+│       ├── dvc.yaml
+│       ├── dvc.lock
+│       └── output
+│           └── model.pkl
+└── train_model.py
+```
+
+Another potential solution is try templating. We'll have a `dvc.yaml` in the
+root of the project and add `vars` to define the model you want to train. Then
+we'll update the `train` stage to use the `vars` like this:
+
+```yaml
+vars:
+  - model_name: 'customer_2'
+
+stages:
+  train:
+    cmd: python train.py
+    deps:
+      - data/${model_name}/input_data.txt
+    params:
+      - data/${model_name}/input_params.yaml:
+          - batch_size
+          - ...
+```
+
+You can
+[learn more about templating in the docs](https://dvc.org/doc/user-guide/project-structure/pipelines-files#templating).
+It essentially lets you add variables to the `dvc.yaml` to dynamically set
+values for your stages.
+
+Thanks for the great question @omarelb!
+
+---
+
+![My Work Is Done Reaction GIF by SpongeBob SquarePants](https://media.giphy.com/media/26u4lOMA8JKSnL9Uk/giphy.gif)
+
+At our November Office Hours Meetup we will be going over internal Kaggle
+competitions and PyTorch Lightening integration.
+[RSVP for the Meetup here](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/281355245/)
+to stay up to date with specifics as we get closer to the event!
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to get all your DVC and
+CML questions answered!
diff --git a/content/blogs/2021-11-17-november-21-heartbeat.md b/content/blogs/2021-11-17-november-21-heartbeat.md
new file mode 100644
index 0000000000..4ac2a06076
--- /dev/null
+++ b/content/blogs/2021-11-17-november-21-heartbeat.md
@@ -0,0 +1,335 @@
+---
+title: November '21 Heartbeat
+date: 2021-11-17
+description: >
+  Monthly updates are here! You will find great tutorials and workflows
+  from  the Community, Apache Airflow Integration, new CML docs, DVC en
+  Español,  news from the team, update on the coming online course, and more! 😅
+descriptionLong: |
+  This month you will find:
+    
+    🥰 Tutorials and workflows from the Community,
+
+    🤔 Lots of ways to learn,
+
+    🇺🇾 🇪🇸 DVC en Español,
+
+    🎥 Meetup videos,
+
+    📖 Docs updates,
+
+    💻 Online Course updates,
+
+    🚀 Info on our growing team, and more!
+picture: 2021-11-17/heartbeat-november.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/november-21-heartbeat/968
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - Apache Airflow
+  - Flask
+  - Evidently AI
+  - Cookiecutter
+  - DAGsHub
+---
+
+# From the Community
+
+I can't believe it's already November! Our Community has given us a lot to be
+thankful for!
+
+![Hello November!](https://media.giphy.com/media/vLxTOSEfHIr0A/giphy.gif)
+
+## Thanakorn Panyapiang's Two Part tutorial: Data Versioning with DVC
+
+In his two part tutorial which can be found
+[here](https://medium.com/@thanakornpanyapiang/data-versioning-why-do-data-science-projects-need-it-a44cb7a471c9)
+and
+[here,](https://medium.com/@thanakornpanyapiang/data-versioning-with-dvc-a474af1247f5)
+[**Thanakorn Panyapiang**](https://www.linkedin.com/in/tpanyapiang/) first
+explains why data versioning is so important to successful machine learning
+projects. Next he takes us through a tutorial of DVC showing how to install and
+initiate DVC. Finally he covers tracking, pushing to remote storage, modifying
+and switching the data. In the future look out for more posts on the other
+features of DVC, including pipelines, metrics, experiments and continuous
+integration through CML from Thanakorn!
+
+<external-link 
+href="https://medium.com/@thanakornpanyapiang/data-versioning-with-dvc-a474af1247f5/"
+title="Data Versioning with DVC"
+description="Thanakorn Panyapiang's explanation of the importance of data version control in ML projects and tutorial on DVC."
+link="https://medium.com"
+image="../uploads/images/2021-11-17/panyapiang.jpeg"/>
+
+## Sanaka Chathuranga: End to End Machine Learning Pipeline with MLOps tools
+
+[**Shanaka Chathuranga**](https://www.linkedin.com/in/shanakac/) uses multiple
+tools including DVC to build an end to end Machine Learning Pipeline. In the mix
+you'll find Cookiecutter, DVC, Mlflow, GitHub Actions, Heroku, Flask, Evidently
+AI, and PyTest in
+[his post](https://medium.com/@shanakachathuranga/end-to-end-machine-learning-pipeline-with-mlops-tools-mlflow-dvc-flask-heroku-evidentlyai-github-c38b5233778c)
+in [Medium.](https://medium.com/) DVC is used for data versioning and model
+pipeline management in this tutorial.
+
+![End to End Machine Learning Pipeline](../uploads/images/2021-11-17/shanaka.png)
+_Shanaka Chathuranga's End to End ML Pipeline Tools Stack
+([Source link](https://medium.com/@shanakachathuranga/end-to-end-machine-learning-pipeline-with-mlops-tools-mlflow-dvc-flask-heroku-evidentlyai-github-c38b5233778c))_
+
+📣 Swag to the first person to do a similar tutorial using DVC for experiment
+tracking and versioning and CML for CI/CD. 🚦Go!👉🏽
+
+## COVID Genomics Apache Airflow and DVC Integration
+
+[In this blog post,](https://covidgenomics.com/blog/airflow_dvc/)
+[**Piotr Styczyński**](https://www.linkedin.com/in/piotrstyczynski/) of
+[COVID Genomics](https://covidgenomics.com/) shares how they use Airflow and DVC
+together in their work to model SARS Cov-2 and optimizing RT-PCR tests. They
+needed to update the data used for the training model daily and automate their
+processses to make sure the whole process stays up-to-date.
+
+Be sure to check out the very detailed tutorial with lots of delicious code and
+two repositories [here](https://github.com/covid-genomics/airflow-dvc) and
+[here.](https://github.com/covid-genomics/dvc-fs)
+
+![Airflow + DVC Integration](../uploads/images/2021-11-17/covid-genomics.png)
+_Piotr Styczyński's blog on COVID Genomics use of Airflow with DVC
+([Source link](https://covidgenomics.com/blog/airflow_dvc/))_
+
+## Looking to create a light weight Feature Store?
+
+Remember [**João Santiago**](https://twitter.com/jcpsantiago) from
+[dvthis?](https://github.com/jcpsantiago/dvthis) Well he's back at it solving ML
+engineering challenges, sharing his new blog post,
+[Unlocking Our Data with a Feature Store.](https://medium.com/billie-finanzratgeber/unlocking-our-data-with-a-feature-store-402ade0743b)
+In this article from the [Billie.io](https://www.billie.io/) engineering crew,
+Santiago shows how they implemented a light weight feature store creating a
+system in which features are defined in YAML files (gotta love those YAML files
+😉) interfacing with Snowflake. Check out how they did it, and learn the term
+"instarejected" which he coined and we all should instaadopt!
+
+![Billie.io Lightweight Feature Store](../uploads/images/2021-11-17/billie.png)
+_Billie.io's feature store: Snowflake + Lambda + Redis
+([Source link](https://medium.com/billie-finanzratgeber/unlocking-our-data-with-a-feature-store-402ade0743b))_
+
+# Learning Opportunities
+
+## Learn about DVC en Español!
+
+[TryoLabs](https://tryolabs.com/) held an Open Meetup recently in Uraguay
+teaching about some of the technology they use at this consultancy.
+[**Ian Spektor**,](https://www.linkedin.com/in/ianspektor/)
+[**Diego Kiedanski**,](https://www.linkedin.com/in/diego-kiedanski/) and
+[**Nicolás Eiris**](https://www.linkedin.com/in/nicol%C3%A1s-eiris-64916194/)
+presented on the their learnings and use of DVC to get better organization of
+their data for the various projects they work on with their clients. In addition
+to streamlining the onboarding of the data for their projects, DVC has provided
+them reproducibility of the various data and code versions in their workflows.
+
+https://www.youtube.com/watch?v=4uEjIa-f_FE&t=268s
+
+Also en Español, our own
+[**David de la Iglesia Castro**](https://twitter.com/daviddelachurch) will be
+presenting at
+[Python Barcelona](https://pybcn.org/events/pyday_bcn/pyday_bcn_2021/) on
+"Making MLOps Uncool Again." In this workshop David will show you how to use
+HuggingFace, DVC and CML to create an MLOps workflow, extending the power of Git
+and GitHub without the need for external platforms or complicated
+infrastructure.
+
+<external-link 
+href="https://pybcn.org/events/pyday_bcn/pyday_bcn_2021/"
+title="Python Barcelona"
+description="Join David de la Iglesia Castro for his workshop entitled Making MLOps Uncool Again."
+link="https://pybcn.org"
+image="../uploads/images/2021-11-17/py-barcelona.png"/>
+
+## October Office Hours Video: Continuum Industries Tool Stack with Ivan Chan
+
+If you missed last month's Office Hours
+[Meetup](https://www.meetup.com/DVC-Community-Virtual-Meetups/), you can now
+catch the video! [**Ivan Chan**](https://www.linkedin.com/in/ivanchc/) took us
+on a journey through the
+[Continuum Industries](https://www.continuum.industries/) tool stack and showed
+us how they save tons of time weekly by integrating DVC and CML into their
+workflows.
+
+https://www.youtube.com/watch?v=TBZKfyYWtXs
+
+## Are you a Data Scientist Struggling with some of the ML engineering concepts?
+
+### Atinuke Oluwabamikemi Kayode: Common Github Terms for Open Source Contributors
+
+For the learners out there,
+[**Atinuke Oluwabamikemi Kayode's**](https://twitter.com/oluwabamikemi) piece
+[Common Github Terms for Open Source Contributors](https://iambami.dev/common-github-terms-for-open-source-contributors-ckvuhdzsf0jcocms1fggb0fj3)
+shares about all the most common terminology you need to know when using GitHub
+in your projects. Need to understand what "checkout" is? The difference between
+"origin" and "master?" Atinuke has you covered in this piece.
+
+<external-link 
+href="https://iambami.dev/common-github-terms-for-open-source-contributors-ckvuhdzsf0jcocms1fggb0fj3"
+title="Common GitHub Terms for Open Source Contributors"
+description="Atinuke Oluwabamikemi Kayode helps you navigate the common terminalogy in GitHub."
+link="https://iambami.dev"
+image="../uploads/images/2021-11-17/kayode.jpeg"/>
+
+### Vincent Driessen: A Successful Git Branching Architecture
+
+For a deeper dive into how Git and versioning works, checkout
+[A Successful Git Branching Model](https://nvie.com/posts/a-successful-git-branching-model/)
+piece by [**Vincent Driessen**](https://twitter.com/nvie) which explains in
+detail the git branching model. While this explanation is as it relates to
+software development, it will help you understand how git versioning works. This
+foundation will help provide the insight into how DVC works, delivering the same
+capabilities for data, models and experimentation.
+
+![Git Versioning in Software Development](../uploads/images/2021-11-17/git-model.png)
+_Vincent Driessen's Git Model Branch
+([Source link](https://nvie.com/posts/a-successful-git-branching-model/))_
+
+### Nir Barazida: Notebook to Production
+
+[**Nir Barazida**](https://twitter.com/barazida) of
+[DAGsHub](https://dagshub.com/) brings us a blog post on
+[Notebook to Production](https://dagshub.com/blog/notebook-to-production-ready-machine-learning/)
+which explains why you should, and how you can, move your code from notebooks to
+scripts when working on production ready ml projects. You'll see how DVC is used
+to version everything in the process so your team will always know which version
+of all the possible elements that go into your project produced or failed to
+produce the best results.
+
+<external-link 
+href="https://dagshub.com/blog/notebook-to-production-ready-machine-learning/"
+title="Notebook to Production"
+description="Nir Barazida shows you why and how to bring your notebook to production ready code."
+link="https://dagshub.com"
+image="../uploads/images/2021-11-17/dagshub-dvc.png"/>
+
+## DVC Online Course Update!
+
+We know you've wanted it, and the day is getting closer and closer! By the end
+of this week we will be about 90% done recording videos for the first course,
+and then it's on to video processing and platform set up. The first course will
+focus on DVC for Data Scientists and Analysts. You can expect to see the course
+out by the end of the year. The course will be 100% **FREE** and available from
+our website. We are so excited about how it's coming to life! 🚀
+
+![Loading Downloading GIF by Vera Verreschi](https://media.giphy.com/media/hL9q5k9dk9l0wGd4e0/giphy.gif)
+
+# DVC News
+
+## San Francisco Off-site
+
+The group of us from the Americas met in San Francisco last week. We had a great
+time getting to know each other better and working on ways and processes to make
+our tools even better for you! Amidst our working, we also took time out to
+visit Alcatraz, go on a scavenger hunt, and eat some great food! Pictured below
+from left front, going clockwise: Jorge Orpinel, Stephanie Roy, Ivan Shcheklein,
+Dmitry Petrov, Dave Berenbaum, Jervis Hui, Ken Thom, Jon Burdo, Peter Rowlands,
+Julie Galvan, Jeny De Figueiredo, Jordan Weber, and Maria Khalusova! 🎉
+
+![America Team Members meet in San Francisco](../uploads/images/2021-11-17/team.jpg)
+_Iterative Team Members meet in San Francisco
+([Source: Jorge Orpinel](https://www.linkedin.com/in/jorgeorpinel/))_
+
+## New Team Members
+
+[**Maria Khalusova**](https://www.linkedin.com/in/maria-khalusova-a958aa14/)
+joins us from Montreal, Canada as a Senior Developer Advocate. Previously at Jet
+Brains for 14 years, Maria brings a ton of experience in developer advocacy and
+product management. She has already dove in working on CML and the upcoming
+releases. She also organizes PyData Montreal. In her free time Maria likes to
+spend time with her two kids, walk their mixed bull dog, and garden. 👩🏻‍🌾 Welcome
+Maria!
+
+## Open Positions
+
+As always, we're still hiring!
+[Use this link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22)
+to find details of all the positions including:
+
+- Senior Software Engineer (ML, Labeling, Python)
+- Senior Software Engineer (ML, Labeling, Python)
+- Senior Software Engineer (ML, DevTools, Python)
+- Field Data Scientist / Sales Engineer
+- Developer Advocate (ML)
+- Director / VP of Engineering (ML, DevTools)
+- Director / VP of Product (ML, Data Infra, SaaS)
+- Head of Talent
+- Head of DevRel
+
+Please pass this info on to anyone you know that may fit the bill. We look
+forward to new team members! 🎉
+
+![Hyper RPG GIF](https://media.giphy.com/media/ZcQXsVrAuKMePTJYG6/giphy.gif)
+
+## Docs Updates
+
+This month's important doc updates come from CML! The CML team has been on fire
+🔥 building new things. You will want to keep your eyes tuned to
+[CML.dev](https://cml.dev/) and our social media channels for big news before
+the end of the year!
+
+### 📖 CML: Self-hosted Runners
+
+Check out the new
+[Self-hosted Runners](https://cml.dev/doc/self-hosted-runners?tab=GitLab#allocating-cloud-compute-resources-with-cml)
+doc. This will help you set up your own runners and allocate cloud computing
+resources. Whether you are a GitHub or GitLab user, you will be able to toggle
+between the respective code needed right there at your fingertips!
+
+### 📖 CML: Command Reference: `send-comment`
+
+The new
+[Command Reference: send-comment](https://cml.dev/doc/ref/send-comment#command-reference-send-comment)
+doc provides a way for you to post a markdown comment on a commit and flags for
+associating the comment with another pull/merge request or if a `cml pr` was
+used earlier in your workflow.
+
+### 📖 Branding Assets
+
+If you are interested in writing a blog post about our tools, we now have a very
+easy way for you to get your hands on our logos as well as a guide to let you
+know how and where it's appropriate to use our logos and images. We love when
+the Community shares about our tools!  
+[Find our branding assets here.](https://iterative.ai/brand)
+
+![Iterative.AI Branding Asseets](../uploads/images/2021-11-17/brand.png)
+_Iterative.AI branded assets from your next blog post 😉
+([Source:](https://iterative.ai/brand))_
+
+## Next Meetup
+
+Be sure to join us at the
+[December Office Hours Meetup,](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/282064369/)
+where we will be showing a demo on a new feature! We can't say more just yet 🤐,
+but be sure to RSVP!
+
+<external-link
+href="https://www.meetup.com/DVC-Community-Virtual-Meetups/events/282064369/"
+title="DVC Office Hours - New Feature Release"
+description="Join us at the December Office Hours for a demo of a new feature in DVC!"
+link="https://meetup.com"
+image="../uploads/images/2021-10-15/office-hours-meetup.png"/>
+
+## Tweet Love ❤️
+
+Last but never least, I leave you with this great tweet from Paige Bailey, this
+time about CML's docs:
+
+https://twitter.com/DynamicWebPaige/status/1459395186027470849?s=20
+
+---
+
+_Have something great to say about our tools? We'd love to hear it! Head to
+[this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+to record or write a Testimonial! Join our
+[Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2021-11-30-november-21-community-gems.md b/content/blogs/2021-11-30-november-21-community-gems.md
new file mode 100644
index 0000000000..f844bee35a
--- /dev/null
+++ b/content/blogs/2021-11-30-november-21-community-gems.md
@@ -0,0 +1,196 @@
+---
+title: November '21 Community Gems
+date: 2021-11-30
+description: >
+  A roundup of technical Q&A's from the DVC and CML community. This month: CML
+  runners, working with data, DVC Studio, and more.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC and CML community. This month: CML
+  runners, working with data, DVC Studio, and more.
+picture: 2021-11-30/nov-community-gems.png
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/november-21-community-gems/964
+tags:
+  - Data Versioning
+  - DataChain Studio
+  - DVC
+  - CML
+  - Community Gems
+---
+
+### [What would be the cleanest, most Pythonic way to run DVC commands from inside a Python script if we want to avoid calling the subprocess library?](https://discord.com/channels/485586884165107732/563406153334128681/895570704605528094)
+
+That's a really good question @mihaj!
+
+If you want to run DVC commands in a Python script, you have a couple of
+options.
+
+You can work with the `main` module from the `dvc` library. This is the more
+CLI-like option. An example of running an experiment would look something like
+this.
+
+```python
+from dvc.main import main
+
+main(["exp", "run"])
+```
+
+The other option you have is to use the `Repo API`. This API is largely
+undocumented at the moment, but it closely mirrors the CLI commands. One
+exception is that they will return internal data structures instead of exit
+codes.
+
+Here's an example of running an experiment with the Repo API.
+
+```python
+from dvc.repo import Repo
+
+repo = Repo()
+repo.experiments.run()
+repo.experiments.show()
+# etc...
+```
+
+### [How can you check if a DVC tracked directory has changes?](https://discord.com/channels/485586884165107732/563406153334128681/899693929560158218)
+
+Good question from @edran!
+
+You can check which directories have been changed by running:
+
+```dvc
+$ dvc status
+```
+
+This will give you an output similar to this in your terminal:
+
+```yaml
+train:
+  changed deps:
+    modified: src/train.py
+  changed outs:
+    deleted: model.pkl
+evaluate:
+  changed deps:
+    deleted: model.pkl
+```
+
+We're working on adding granularity support for this command and should have a
+release for this in the next few months.
+
+### [Is there a way to look at all of the experiments I've run and see the metrics and parameters associated with them?](https://discord.com/channels/485586884165107732/563406153334128681/900451895666155520)
+
+Thanks for asking @GuyAR! This is a common question that comes up.
+
+You can see all of your experiments and the associated metrics and parameters in
+a table in the terminal by running the following command:
+
+```dvc
+$ dvc exp show
+```
+
+This will give you a table that looks similar to this with all of this
+information.
+
+```dvctable
+ ────────────────────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**                metric:**step**       metric:**acc**   metric:**val_acc**      metric:**loss**   metric:**val_loss**   param:**lr**      param:**momentum**
+ ────────────────────────────────────────────────────────────────────────────────────────────
+  **workspace**                    **3**   **0.91389**      **0.87**   **0.20506**    **0.66306**   **0.001**   **0.09**
+  **data-change**                  **-**         **-**         **-**         **-**          **-**   **0.001**   **0.09**
+  │ ╓ 9405575 [exp-54e8a]      3   0.91389      0.87   0.20506    0.66306   0.001   0.09
+  │ ╟ 856d80f                  2   0.90215   0.87333   0.27204    0.61631   0.001   0.09
+  │ ╟ 23dc98f                  1   0.87671      0.86   0.35964    0.61713   0.001   0.09
+  ├─╨ 99a3c34                  0   0.71429      0.82   0.67674    0.62798   0.001   0.09
+  │ ╓ 3b3a2a2 [exp-23593]      3   0.86885      0.46   0.31573     3.7067   0.001   0.09
+  │ ╟ 93d015d                  2   0.83197   0.41333   0.36851     3.4259   0.001   0.09
+  │ ╟ d474c42                  1   0.79918   0.43333   0.46612      3.286   0.001   0.09
+  ├─╨ 1582b4b                  0   0.52869      0.39   0.94102     2.5967   0.001   0.09
+ ────────────────────────────────────────────────────────────────────────────────────────────
+```
+
+### [What's the recommended way to remove data that has been imported using `dvc import`?](https://discord.com/channels/485586884165107732/485596304961962003/898462029650735134)
+
+Great question @MadsO!
+
+This works the exact same as when you've added data with `dvc add`. So to remove
+data, you would run this command:
+
+```dvc
+$ dvc remove
+```
+
+### [When using a CML, are GitHub Actions, GitLab, and BitBucket the only options for CI?](https://discord.com/channels/485586884165107732/728693131557732403/909847110306914345)
+
+Currently, `cml runner` does not support CircleCI or droneCI self–hosted runners
+and you would have to deploy them manually.
+
+You can still use `cml send-comment`, `cml pr`, and the other CML tools with any
+CI platform.
+
+Thanks for this awesome question @tpietruszka!
+
+### [When I run the `dvc remove` command, does it only remove `.dvc` files?](https://discord.com/channels/485586884165107732/563406153334128681/905382438786715648)
+
+A really good question from @flowy!
+
+That is correct. Running `dvc remove` only removes DVC tracked files and
+directories. It will also remove the entry from `.gitignore` and handles the
+`dvc.yaml`.
+
+For example, if you run something like `dvc remove folder_name/file.dvc`, only
+the `.dvc` file will be removed. So your updated directory would likely still
+have `folder_name/file` since that was the file being tracked.
+
+If you wanted to remove the tracked file as well, you would need to run
+`dvc remove --outs`. This command removes the outputs of any target.
+
+If there is nothing else in the folder, you'll be left with an empty directory.
+You can remove it and stop tracking in Git with a command like:
+
+```dvc
+$ git rm -r folder_name
+```
+
+### [Can DVC Studio be connected to a self-managed GitLab repo?](https://discord.com/channels/485586884165107732/841856466897469441/907468264882462800)
+
+Very good question about Studio @Sra!
+
+Right now this only works if it's an on-premises network or a private VPC
+network.
+
+We are working on bringing custom-domain GitLab as a feature very soon! You can
+follow
+[this GitHub issue](https://github.com/iterative/studio-support/issues/12) and
+leave comments for anything you'd like to see!
+
+### [Is there a way to extend default job execution time for a CML runner?](https://discord.com/channels/485586884165107732/728693131557732403/904660123161600021)
+
+There is definitely a way to do this!
+
+You can extend the max time in your CI by adding something like this:
+
+```yaml
+train:
+  timeout-minutes: 5000
+```
+
+If you're using GitLab, the same update would look similar to this:
+
+```yaml
+train:
+  timeout: 72 hours
+```
+
+Thanks for this question @evergreengt!
+
+---
+
+![Matt Fraser GIF by E!](https://media.giphy.com/media/VInc9GYelUbHf5QhNR/giphy.gif)
+
+At our December Office Hours Meetup we will be doing a new feature demo you
+won't want to miss!
+[RSVP for the Meetup here](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/282064369/)
+to stay up to date with specifics as we get closer to the event!
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to get all your DVC and
+CML questions answered!
diff --git a/content/blogs/2021-12-07-ml-experiment-versioning.md b/content/blogs/2021-12-07-ml-experiment-versioning.md
new file mode 100644
index 0000000000..662c36bf52
--- /dev/null
+++ b/content/blogs/2021-12-07-ml-experiment-versioning.md
@@ -0,0 +1,222 @@
+---
+title: Don't Just Track Your ML Experiments, Version Them
+date: 2021-12-07
+description: >
+  ML experiment versioning brings together the benefits of traditional code
+  versioning and modern day experiment tracking, super charging your ability to
+  reproduce and iterate on your work.
+descriptionLong: >
+  ML experiment versioning takes experiment tracking to the next level by adding
+  the benefits of version control. Track experiments as code, make updates
+  incrementally, and keep everything distributed to share however you want.
+picture: 2021-12-07/experiment-versioning-cover.png
+pictureComment: Keep your team organized with ML experiment versioning.
+author: dave_berenbaum
+commentsUrl: https://discuss.dvc.org/t/dont-just-track-your-experiments-version-them/996
+tags:
+  - MLOps
+  - DVC
+  - Experiments
+  - Experiment Tracking
+  - Experiment Versioning
+  - Release
+---
+
+https://youtu.be/z0s42TxH9oM
+
+Experiment tracking tools help manage machine learning projects where version
+control tools like Git aren't enough. They log parameters and metrics, and they
+store artifacts like input data or model weights, so that you can reproduce
+experiments and retrieve results. They also provide a dashboard to navigate all
+this meta-information across lots of experiments.
+
+Git can't manage or compare all that experiment meta-information, but it is
+still better for code. Tools like GitHub make distributed collaboration easy,
+and you can see incremental code changes. That's why experiments get split
+between Git for code and experiment tracking tools for meta-information (usually
+with a link in one or the other to keep track).
+
+ML experiment versioning combines experiment tracking and version control.
+Instead of managing these separately, keep everything in one place and get the
+benefits of both, like:
+
+- **Experiments as code**: Track meta-information in the repository and version
+  it like code.
+- **Versioned reproducibility**: Save and restore experiment state, and track
+  changes to only execute what's new.
+- **Distributed experiments**: Organize locally and choose what to share,
+  reusing your existing repo setup.
+
+![Experiment Versioning](../uploads/images/2021-12-07/exp-versioning.png)
+
+# ML Experiments as Code
+
+Experiment versioning treats experiments as code. It saves all metrics,
+hyperparameters, and artifact information in text files that can be versioned by
+Git (DVC [data versioning](https://dvc.org/doc/start/data-and-model-versioning)
+backs up the artifacts themselves anywhere). You do not need a centralized
+database or online services. Git becomes a store for experiment
+meta-information.
+
+You can choose your own file formats and paths, which you can configure in DVC:
+
+```dvc
+$ dvc exp init -i
+This command will guide you to set up a default stage in dvc.yaml.
+See https://dvc.org/doc/user-guide/project-structure/pipelines-files.
+
+DVC assumes the following workspace structure:
+├── data
+├── metrics.json
+├── models
+├── params.yaml
+├── plots
+└── src
+
+Command to execute: python src/train.py
+Path to a code file/directory [src, n to omit]: src/train.py
+Path to a data file/directory [data, n to omit]: data/images/
+Path to a model file/directory [models, n to omit]:
+Path to a parameters file [params.yaml, n to omit]:
+Path to a metrics file [metrics.json, n to omit]:
+Path to a plots file/directory [plots, n to omit]: logs.csv
+```
+
+Once you set up your repo in this structure, you start to see the benefits of
+this approach. Experiment meta-information lives in readable files that are
+always available, and your code can stay clean. You can read, save, and version
+your meta-information:
+
+```dvc
+$ cat params.yaml
+train:
+  epochs: 10
+model:
+  conv_units: 128
+```
+
+```dvc
+$ cat metrics.json
+{"loss": 0.24310708045959473, "acc": 0.9182999730110168}
+```
+
+You can see what changed in parameters, code, or anything else:
+
+```diff
+$ git diff HEAD~1 -- params.yaml
+diff --git a/params.yaml b/params.yaml
+index baad571a2..57d098495 100644
+--- a/params.yaml
++++ b/params.yaml
+@@ -1,5 +1,5 @@
+ train:
+   epochs: 10
+-model:
+-  conv_units: 16
++model:
++  conv_units: 128
+```
+
+With DVC, you can even compare lots of experiments from the terminal like you
+would in a dashboard:
+
+```dvctable
+$ dvc exp show
+ ─────────────────────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**                neutral:**Created**           metric:**loss**      metric:**acc**   param:**train.epochs**   param:**model.conv_units**
+ ─────────────────────────────────────────────────────────────────────────────────────────────
+  **workspace**                 **-**              **0.25183**   **0.9137**   **10**             **64**
+  **mybranch**                  **Oct 23, 2021**         **-**        **-**   **10**             **16**
+  ├── 9a4ff1c **[exp-333c9]**   10:40 AM       0.25183   0.9137   10             64
+  ├── 138e6ea **[exp-55e90]**   10:28 AM       0.25784   0.9084   10             32
+  └── 51b0324 **[exp-2b728]**   10:17 AM       0.25829   0.9058   10             16
+ ─────────────────────────────────────────────────────────────────────────────────────────────
+```
+
+# Versioned reproducibility
+
+One reason you need to track all this meta-information is to reproduce your
+experiment. Experiment tracking databases save the artifacts, but you still need
+to put them all back in the right place. Since experiment versioning keeps all
+the meta-information in your repo, you can restore the experiment state exactly
+as it was in your workspace. DVC
+[saves the state of the experiment](https://dvc.org/blog/experiment-refs), and
+it can restore it for you:
+
+```dvc
+$ dvc exp apply exp-333c9
+
+Changes for experiment 'exp-333c9' have been applied to your current workspace.
+```
+
+Reproducibility is nice, but data drift, new business requirements, bug fixes,
+etc. all mean running a slightly modified experiment. You don't have time to
+always start from scratch. Versioned reproducibility means tracking changes to
+the experiment state. DVC can determine what changes were introduced by the
+experiment and only run what's necessary. It only saves those changes, so you
+don't waste time or storage on duplicate copies of data.
+
+```dvc
+$ dvc exp run --set-param model.conv_units=128
+'data/images.tar.gz.dvc' didn't change, skipping
+Stage 'extract' didn't change, skipping
+Running stage 'train':
+> python3 src/train.py
+79/79 [==============================] - 1s 14ms/step - loss: 0.2552 - acc: 0.9180
+Updating lock file 'dvc.lock'
+
+Reproduced experiment(s): exp-be916
+Experiment results have been applied to your workspace.
+
+To promote an experiment to a Git branch run:
+
+        dvc exp branch <exp> <branch>
+```
+
+# Distributed Experiments
+
+Experiment tracking tools log experiments to a central database and show them in
+a dashboard. This makes it easy to share them with teammates and compare
+experiments. However, it introduces a problem - in an active experimentation
+phase, you may create hundreds of experiments. Team members may be overwhelmed,
+and the tool loses one of its core purposes - sharing experiments between team
+members.
+
+Experiment versioning piggybacks on Git and its distributed nature. All the
+experiments you run are stored in your local repo, and only the best experiments
+are promoted to the central repo (GitHub for example) to share with teammates.
+Distributed experiments are shared with the same people as your code repo, so
+you don't need to replicate your project permissions or worry about security
+risks.
+
+With DVC, you can push experiments just like Git branches, giving you
+flexibility to share whatever, whenever, and wherever you choose:
+
+```dvc
+$ dvc exp push origin exp-333c9
+Pushed experiment 'exp-333c9'to Git remote 'origin'.
+```
+
+# What Next?
+
+These enhancements can have powerful ripple effects for fast-moving, complex,
+collaborative ML projects. There are parallels to the
+[history of version control](https://ericsink.com/vcbe/html/history_of_version_control.html).
+Git's distributed nature and incremental change tracking were major advances
+over the centralized, file-based version control systems of previous
+generations. Experiment versioning can similarly advance the next generation of
+experiment tracking.
+
+ML experiment versioning is still in its early days. Look out for future
+announcements about:
+
+- Deep learning features like [live monitoring](https://dvc.org/doc/dvclive) and
+  [checkpointing](https://dvc.org/doc/user-guide/experiment-management/checkpoints).
+- Visualizing and comparing experiment results in other tools like VS Code and
+  [DVC Studio](https://studio.datachain.ai/).
+
+What do you want to see for the next generation of experiment tracking? Join our
+upcoming
+[meetup](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/282064369/)
+to discuss, join our [Discord community](https://discord.com/invite/dvwXA2N), or
+let us know in the comments!
diff --git a/content/blogs/2021-12-13-collaborative-experiments.md b/content/blogs/2021-12-13-collaborative-experiments.md
new file mode 100644
index 0000000000..a9fcac00c1
--- /dev/null
+++ b/content/blogs/2021-12-13-collaborative-experiments.md
@@ -0,0 +1,164 @@
+---
+title: Running Collaborative Experiments
+date: 2021-12-13
+description: >
+  Sharing experiments with teammates can help you build models more efficiently.
+descriptionLong: >
+  You can use DVC remotes to share experiments and their data across machines.
+picture: 2021-12-13/collaborative_exps.png
+pictureComment: Running Collaborative Experiments
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/collaborative-experiments/1002
+tags:
+  - MLOps
+  - DVC
+  - Git
+  - Experiments
+  - Collaboration
+  - Tutorial
+---
+
+## Intro
+
+Sharing experiments to compare machine learning models is important when you're
+working with a team of engineers. You might need to get another opinion on an
+experiments results. You might need to share a modified dataset or even share
+the exact reproduction of a specific experiment.
+
+Setting up DVC remotes in addition to your Git remotes lets you share all of the
+data, code, and hyperparameters associated with each experiment so anyone can
+pick up where you left off in the training process. We'll go through an example
+of sharing an experiment with DVC remotes.
+
+## Forking the project
+
+To follow along, fork
+[this repo](https://github.com/iterative/example-dvc-experiments) as one of your
+own GitHub repos. That way you'll have pull access when we start working with
+DVC. This repo has different tags that show the progression of the project and
+you're welcome to check them out!
+
+To get the branch we'll use in this post, you can run this command to clone your
+forked repo. Make sure to replace `<your_github>` with your GitHub name.
+
+```dvc
+$ git clone git@github.com:<your_github>/example-dvc-experiments.git -b get-started
+```
+
+This project already has DVC files set up to run experiments, but if you want to
+follow along with a project you're currently working on, make sure to check out
+the steps to initialize a DVC pipeline in
+[the Getting Started doc](https://dvc.org/doc/start).
+
+## Setting up your DVC remotes
+
+When you want to share the progress you've made with training your model, that
+usually means you need to find a way to bundle the code, data, and
+hyperparameters. This could be a complicated process if you're working with GBs
+worth of data or you have a large number of hyperparameters.
+
+That's one of the uses for DVC and why we'll be working with remotes. To start
+with, make sure your GitHub remote is configured correctly. It should use the
+SSH version of the URL. This is so DVC can authenticate the pushes and pulls
+from GitHub it needs as part of experiment sharing.
+
+The way DVC works is by storing custom Git refs in your repo with metadata that
+defines the experiment. You can learn more about how DVC uses custom Git refs in
+[this post](https://dvc.org/blog/experiment-refs).
+
+Next, you'll need to set up a remote to your data location. This could be an AWS
+S3 bucket, a Google Drive, or
+[one of the other supported storage types](https://dvc.org/doc/command-reference/remote/add#supported-storage-types).
+
+An important thing to note about the project we're working with is that there is
+already a remote set up for you to pull from. You can see this in `.dvc/config`.
+You'll need to set up a separate remote to push changes to since this remote
+doesn't allow push access.
+
+For this example, we'll be using a Google Drive folder as the remote to handle
+pushing data. Now that you know what we're doing, let's run the command to set
+up the DVC remote to push to.
+
+```dvc
+$ dvc remote add cloud_remote gdrive://1k6aUYWphOulJlXgq4XbfKExWGyymTpEl
+```
+
+This adds the remote storage named `cloud_remote` for DVC to track and we'll be
+able to push and pull the exact code and data to reproduce any experiment. With
+your Git remote and DVC remotes in place, you can start pulling data and
+experiments from the cloud to your local machine.
+
+_Note: Make sure you have write permissions to the Git remote!_
+
+## Listing your remote experiments
+
+When you're working with a team on an existing project, you might want to see
+the experiments already in the remotes so you know what's available. To take a
+look at the experiments we have run in the repo you forked, you'll have to set
+up a new Git upsteam remote to reference the original repo. You can do that with
+the following command.
+
+```dvc
+$ git remote add upstream https://github.com/iterative/example-dvc-experiment
+```
+
+Now you can take a look at all of the experiments we have associated with this
+repo with the following command.
+
+```dvc
+$ dvc exp list upstream --all
+```
+
+You'll get a list of all of the experiments across different Git branches that
+have been pushed with DVC in the original repo. The output will look similar to
+this.
+
+```dvc
+21784fa:
+        exp-c8dcf
+main:
+        exp-b3667
+        exp-d382a
+```
+
+Now you'll be able to pick which experiment you want to reproduce and start
+testing with.
+
+## Pulling experiments
+
+If you're picking up an existing project, there will likely be a specific
+experiment you'll get started with. To pull an experiment to your local machine,
+you'll need an experiment id for the following command.
+
+```dvc
+$ dvc exp pull upstream exp-b3667
+```
+
+The `exp-b3667` comes from the `dvc exp list` command we ran earlier and now you
+have all of the data and code associated with that experiment on your machine.
+
+From here, you can start running new experiments with different models,
+hyperparameters, or even datasets.
+
+## Pushing experiments
+
+Once you're done with your new experiments, you can push these to the Google
+Drive remote we set up earlier. DVC handles both the GitHub and data storage
+pushes with this command.
+
+```dvc
+$ dvc exp push origin exp-p4202
+```
+
+This will push the custom Git refs to your forked repo and it will push any
+artifacts, like your data or model output, to the DVC remote location. If you
+have checkpoints enabled, it will also push the checkpoints of an experiment.
+Now you can easily share your work with other engineers to get feedback faster
+and finish projects sooner.
+
+## Conclusion
+
+It's a lot easier to get help from someone on a project when you can share
+everything with them. When you use DVC, you can bundle your data and code
+changes for each experiment and push those to a remote for somebody else to
+check out.
diff --git a/content/blogs/2021-12-15-december-21-heartbeat.md b/content/blogs/2021-12-15-december-21-heartbeat.md
new file mode 100644
index 0000000000..6b154faa73
--- /dev/null
+++ b/content/blogs/2021-12-15-december-21-heartbeat.md
@@ -0,0 +1,267 @@
+---
+title: December '21 Heartbeat
+date: 2021-12-15
+description: >
+  Monthly updates are here! You will find great tutorials and workflows from  
+  the Community, DVC, CML and Rasa project, Speech diarization use case,
+  new  docs,  news from the team, update on the coming online course, and more!
+  😅
+descriptionLong: |
+  This month you will find:
+    
+    🥰 Tutorials and workflows from the Community,
+
+    🗣 DVC,  CML and Rasa,
+
+    🎙 Speech Diarization,
+
+    🧐 Research paper on MLOps AntiPatterns,
+
+    📖 Docs updates,
+
+    💻 Online Course updates,
+
+    🚀 Info on our growing team, and more!
+picture: 2021-12-15/heartbeat-december.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/december-21-heartbeat/1003
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - AntiPatterns
+  - arXiv
+  - RASA
+---
+
+# From the Community
+
+We've made it to the end of the year! 2021 has been an amazing journey for us
+and our growing Community all over the world. There's lots of great news this
+month. Let's not waste a heartbeat and get right to it! 😉
+
+![Heartbeat!](https://media.giphy.com/media/YAIOuXv2zYDW8/giphy.gif)
+
+## DVC + CML + RASA = ❤️
+
+[**Matthew Upson**](https://twitter.com/m_a_upson), Founder at
+[MantisNLP,](https://mantisnlp.com/) an AI consultancy focused on NLP, along
+with his team, put out the
+[first blog post](https://medium.com/mantisnlp/mlops-for-conversational-ai-with-rasa-dvc-and-cml-part-i-beec756e8e7f)
+in a series showing how to use DVC and CML along with Rasa in developing
+conversational AI. This post sets the scene for the following more detailed
+parts, but lays out DVC's use for generating the DAG as well as logging metrics
+and using CML to do the testing. We're looking forward to the next installments!
+
+![Heartbeat!](https://media.giphy.com/media/HYrBxW4xsPSP3wsUTk/giphy.gif)
+
+## Curious about Speaker Diarization?
+
+[The co-authored article entitled,](https://blogs.cisco.com/developer/speakerdiarization01)
+“Who Said That?” A Technical Intro to Speaker Diarization," by
+[**Dario Cazzani**](https://www.linkedin.com/in/dariocazzani/), and
+[**Alex Huang**](https://github.com/alhuang10), machine learning engineers at
+[Cisco,](https://www.cisco.com/) provides an introduction to the topic of
+Speaker Diarization, or who spoke when, in audio recordings. Their team's
+solution takes you through the fingerprinting of voices, clustering to assign
+speaker labels, creating the needed data pipeline, and the integration with
+Webex.
+
+In this process, the team derives benefit from using DVC to version data and
+models, as well as easily collaborate with each other and the transcription
+team. More info on this project can be found
+[in their repo.](https://github.com/CiscoDevNet/vo-id#train-the-vectorizer)
+
+![Speaker Diarization](../uploads/images/2021-12-15/Dario-Cazzani-2.png '=800')
+_Dario Cazzani and team's process for assinging speaker labels to audio files
+([Source link](https://blogs.cisco.com/developer/speakerdiarization01))_
+
+## Using DVC in Academic Research on a Compartmental Infectious Disease Model
+
+[**Matthew Segal**,](https://www.linkedin.com/in/matthew-segal-aa132093/)
+[in his post,](https://mattsegal.dev/devops-academic-research.html) "DevOps in
+Academic Research," reviews his work of applying some of the tried and true
+practices in DevOps to data science projects using a
+[Markov chain Monte Carlo](https://en.wikipedia.org/wiki/Markov_chain_Monte_Carlo)
+(MCMC) technique to create a model to simulate the spread of tuberculosis and
+later, as the pandemic erupted, COVID-19.
+
+The article covers mapping the workflow (see below), testing the codebase, smoke
+tests
+[(with a guide link),](https://mattsegal.dev/pytest-on-github-actions.html)
+contiunuous integration, and data management (where he recommends DVC).
+
+![Map Pipeline](../uploads/images/2021-12-15/matt-segal.png '=500') _Working to
+develop a pipeline
+([Source link](https://mattsegal.dev/devops-academic-research.html))_
+
+## Are you confused by how many MLOps tools there are?
+
+![Thoughtworks Trianglethoughtwork](../uploads/images/2021-12-15/thoughtworks-mlops-landscape.png 'Thoughtworks Platform vs. Specialist Triangle :wrap-right ==450')
+
+Well
+[Thoughtworks](https://www.thoughtworks.com/?utm_source=google-search&utm_medium=paid-media&utm_campaign=always-on-brand_2021-11&utm_term=thoughtworks&utm_content=RSAad1&gclid=Cj0KCQiA2NaNBhDvARIsAEw55hg2li5srltu8ppVsxLzcnv-pYWRmvnCk_jmljiC2ocyM4tc7EUEt9gaAoVWEALw_wcB)
+included DVC in its recent
+[Thoughtworks Guide to MLOps Platforms](https://www.thoughtworks.com/what-we-do/data-and-ai/cd4ml/guide-to-evaluating-mlops-platforms).
+While being included is great, things move so fast that they seemed to have
+missed our experiment capabilities and the CI/CD capabilities for machine
+learning of CML.🤔
+
+And if they only knew what's to come! 🚀 Lots planned in the new year!
+
+![They don't know DVC has more tools coming](../uploads/images/2021-12-15/more-tools.png ' =800')
+_Dmitry Petrov's meme
+([Source Link](https://twitter.com/FullStackML/status/1465428233336201218?s=20))_
+
+## What is MLOps - Everything You Must Know to Get Started
+
+In his post,
+[What is MLOps - Everything You Need to Know to Get Started,](https://towardsdatascience.com/what-is-mlops-everything-you-must-know-to-get-started-523f2d0b8bd8)
+[**Harshit Tyagi**](https://www.linkedin.com/in/tyagiharshit/) provides an
+overview of MLOps and why it's necessary for today's ML and AI to production
+projects. You will learn the different parts of the puzzle that make up MLOps,
+and review the machine learning life cycle. In the post, Harshit also provides a
+video of the concepts as well as an interview with our CEO,
+[**Dmitry Petrov**.](https://twitter.com/FullStackML) Be sure to check it out!
+
+![What is MLOps](../uploads/images/2021-12-15/harshit-tyagi.jpeg '=800')
+_Harshit Tyagi's ML Systems Engineering and Operations with their Stakeholders
+([Source link](https://towardsdatascience.com/what-is-mlops-everything-you-must-know-to-get-started-523f2d0b8bd8))_
+
+## Using AntiPatterns to avoid MLOps Mistakes
+
+[**Nikhil Maralidhar**,](https://www.linkedin.com/in/nikhilmuralidhar/) et. al.,
+in their survey paper,
+[Using AntiPatterns to avoid MLOps Mistakes,](https://arxiv.org/abs/2107.00079)
+aim to develop a vocabulary for anti-patterns found in machine learning projects
+in the financial services industry. In the paper, they also give recommendations
+for acheiving MLOps at an enterprise scale using processes for documentation and
+management. Luckily, our tools help you to solve some of these challenges!
+
+You can also catch Nikhil's interview with
+[**Ben Lorica**](https://twitter.com/bigdata) from
+[The Data Exchange](https://thedataexchange.media/)
+[podcast here.](https://thedataexchange.media/mlops-anti-patterns/)
+
+<external-link 
+href="https://arxiv.org/abs/2107.00079"
+title="Using AntiPatterns to avoid MLOps Mistakes"
+description="Nikhil Maralidhar, et. al. paper on AntiPatterns in MLOps in the Financial Services industry and recommendations for improving machine learning operations."
+link="https://arxiv.org"
+image="../uploads/images/2021-12-15/arxiv.png"/>
+
+# DVC News
+
+## New Team Member
+
+[**Amrit Ghimire**](https://www.linkedin.com/in/iamritghimire/) joins our Studio
+team as a back end developer, from Pokhara, Nepal. Prior to joining Iterative,
+he lead a team at Leapfrog, Inc. to develop applications for a drug discovery
+company. Amrit likes to read and watch movies in this free time and works to
+complete reading 3-4 books per month. Finally he enjoys working in Python, Rust
+and customizing Linux systems and personal command line automations. Welcome
+Amrit! 🎉
+
+## Open Positions
+
+As always, we're still hiring!
+[Use this link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22)
+to find details of all the positions including:
+
+- Senior Software Engineer (ML, Labeling, Python)
+- Senior FronteEnd Engineer (Typescript, Node, React)
+- Senior Software Engineer (ML, DevTools, Python)
+- Senior Software Engineer (ML, Data Infra, GoLang)
+- Field Data Scientist / Sales Engineer
+- Developer Advocate (Machine Learning)
+- Director / VP of Engineering (ML, DevTools)
+- Director / VP of Product (ML, Data Infra, SaaS)
+- Head of Talent
+- Head of DevRel
+- Account Executive (Sales)
+
+Please pass this info on to anyone you know that may fit the bill. Come join our
+rocket ship! 🚀
+
+![Go Team Nasa GIF](https://media.giphy.com/media/3xz2BzSNxkwPqF8Wdy/giphy.gif)
+
+## Docs Updates
+
+The DVC team has been steadily adding to the Experiment Management section of
+our docs. We want to make sure that all your experiment versioinng needs are met
+and there's more to come! 🚀
+
+![Dvc GIF](https://media.giphy.com/media/5qy3GWYwCydByEn3O6/giphy.gif)
+
+And don't miss
+[the latest Use Case on Machine Learning Experiment Tracking,](https://dvc.org/doc/use-cases/experiment-tracking)
+which outlines going from the traditional, painful, note taking, to more
+advanced methods, and compares how DVC can take you to the next level!
+
+![Machine Learning Experiment Tracking](../uploads/images/2021-12-15/natural-experimentation.png)
+_Tired of this? Check out our docs!
+([Source link](https://dvc.org/doc/use-cases/experiment-tracking))_
+
+## DVC Online Course Update!
+
+The course is in editing mode and this week we are getting the second cuts for
+review. The first course will focus on DVC for Data Scientists and Analysts. The
+course is on track to be out by the end of the year! It will be 100% **FREE**
+and available from our websites. We are so excited about how it's coming to
+life! 🚀
+
+👀 Note the the Udemy channel in Discord has now changed to
+#iterative-online-course. We're getting ready!
+
+![You Can Do It GIF by chuber channel](https://media.giphy.com/media/xUOxfh6ZM75efM3Bqo/giphy.gif)
+
+## Next Meetup
+
+Be sure to join us at the
+[January Office Hours Meetup,](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/282663146/)
+where [**Gennaro Todesco**,](https://www.linkedin.com/in/gennarotedesco/) Senior
+Data Scientist at [Billie.io,](https://www.billie.io/) will present his workflow
+with DVC and CML, and his Neovim-DVC plugin.
+[**Tezan Sahu**,](https://www.linkedin.com/in/tezan-sahu/) will follow
+presenting a workflow from a series of tutorials that we shared from him in the
+[September Heartbeat,](https://dvc.org/blog/september-21-dvc-heartbeat)
+including DVC, PyCaret, MLFlow and FastAPI.
+
+<external-link
+href="https://www.meetup.com/DVC-Community-Virtual-Meetups/events/282663146/"
+title="January Office Hours Meetup - 2 workflows"
+description="RSVP for DVC Office Hours - 2 Workflows with integrations including Neovim, PyCaret, MLFlow and FastAPI!"
+link="https://meetup.com"
+image="../uploads/images/2021-12-15/office-hours-meetup.png"/>
+
+## Tweet Love ❤️
+
+There were many candidates this month. Check out our Testimonials Wall of Love,
+which is now live on our [Community Page](https://dvc.org/community) and holds
+many of our favorite Tweets! If you'd like to give a shout our for our tools
+[head here](https://testimonial.to/iterative-open-source-community-shout-outs)
+to make a video or written testimonial. We'd appreciate it! 🙏🏼
+
+But for this month, this Tweet wins the coveted Tweet Love slot.
+
+https://twitter.com/ChrisSamiullah/status/1461702483965886468
+
+## Thank you!
+
+And with that we close out the year! We send a huge thank you to all of our
+Community members that help us make our tools better. Thank you for your
+contributions, trust and feedback! We look forward to continue to grow with you
+in 2022! Have a wonderful holiday season and Happy New Year! 🎉
+
+---
+
+_Have something great to say about our tools? We'd love to hear it! Head to
+[this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+to record or write a Testimonial! Join our
+[Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2021-12-21-December-21-community-gems.md b/content/blogs/2021-12-21-December-21-community-gems.md
new file mode 100644
index 0000000000..c3d402f64d
--- /dev/null
+++ b/content/blogs/2021-12-21-December-21-community-gems.md
@@ -0,0 +1,225 @@
+---
+title: December '21 Community Gems
+date: 2021-12-21
+description: >
+  A roundup of technical Q&A's from the DVC community. This month: comparing
+  experiments, working with data, working with pipelines, and more.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month: comparing
+  experiments, working with data, working with pipelines, and more.
+picture: 2021-12-21/dec-community-gems.png
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/december-21-community-gems/1001
+tags:
+  - Data Versioning
+  - DVC Remotes
+  - DVC API
+  - DVC Stages
+  - Community Gems
+---
+
+### [I'm using Google Drive as a remote storage and accidentally entered the verification from the wrong Google account. How can I edit that?](https://discord.com/channels/485586884165107732/563406153334128681/908437162150739978)
+
+No problem @fireballpoint1! This happens sometimes.
+
+You should be able to run the following command in your terminal and then
+re-enter your credentials.
+
+```dvc
+$ rm .dvc/tmp/gdrive-user-credentials.json
+```
+
+That should give you a chance to enter the correct credentials when you try to
+`dvc pull` again.
+
+### [Can I add a `dvc remote` which refers to NAS by IP so I don't have to mount on every computer?](https://discord.com/channels/485586884165107732/563406153334128681/912667503283564544)
+
+That's a new question for us @Krzysztof Begiedza!
+
+If you enable the SSH service on your NAS, you can configure DVC to use it as an
+SSH remote with `dvc remote add`.
+
+There should also be DSM (Synology DiskStation Manager) packages for webdav as
+well, if you prefer that over SSH. Just make sure that when you run
+`dvc remote add -d storage <URL>`, your remote storage URL looks similar to
+this.
+
+```
+webdav://<ip>/<path>
+```
+
+### [Can you selectively `dvc pull` data files?](https://discord.com/channels/485586884165107732/563406153334128681/913713923667148850)
+
+Great question @Clemens!
+
+You would run `dvc pull <file>` to get the files you want. You could also use
+the `--glob` option on `dvc pull` and DVC will only pull the relevant files.
+
+The command for that pull would be similar to this.
+
+```dvc
+$ dvc pull path/to/specific/file
+```
+
+You could also make a
+[data registry](https://dvc.org/doc/use-cases/data-registries) and use
+`dvc import` in other projects to get a specific dataset. That way you don't
+have to do a granular pull.
+
+### [What is the fastest way to get the specific value of a metric of an experiment based on experiment id?](https://discord.com/channels/485586884165107732/563406153334128681/916328260856590346)
+
+That's a really good question @Kwon-Young!
+
+You can always look through experiment metrics with `dvc exp show` and this
+shows you all of the experiments you've run.
+
+To get the metrics for a specific experiment or set of experiments, you'll need
+the experiment ids and then you can use the Python API like this example.
+
+```python
+from dvc.repo import Repo
+
+dvc = Repo(".")  # or Repo("path/to/repo/dir")
+metrics = dvc.metrics.show(revs=["exp-name1", "exp-name2", ...])
+```
+
+This returns a Python dictionary that contains what gets displayed in
+`dvc metrics show --json` except you're able to specify the experiments you
+want.
+
+### [Is it possible to run the whole pipeline but only for one element of the `foreach`?](https://discord.com/channels/485586884165107732/563406153334128681/915986804577026088)
+
+Another great question from @vgodie!
+
+If your stages look something like this for example:
+
+```yaml
+stages:
+  cleanups:
+    foreach: # List of simple values
+      - raw1
+      - labels1
+      - raw2
+    do:
+      cmd: clean.py "${item}"
+      outs:
+        - ${item}.cln
+  train:
+    foreach:
+      - epochs: 3
+        thresh: 10
+      - epochs: 10
+        thresh: 15
+    do:
+      cmd: python train.py ${item.epochs} ${item.thresh}
+```
+
+You should try the following command:
+
+```dvc
+$ dvc repro cleanups@labels1
+```
+
+This will run your whole pipeline, but only with `labels1` in the `cleanups`
+stage.
+
+### [Is it possible to pull experiments from the remote without checking out the base commit of those experiments?](https://discord.com/channels/485586884165107732/485596304961962003/910481311905505290)
+
+Thanks for the question @mattlbeck!
+
+You should be able to do this with `dvc exp pull origin exp-name`.
+
+If you have experiments with the same name on different commits, using
+`exp-name` won't work since it defaults to selecting the one based on your
+current commit if there are duplicate names.
+
+To work around this, you can use the full refname, like
+`refs/exps/e7/78ad744e8d0cd59ddqc65d5d698cf102533f85/exp-6cb7`, to specify the
+experiments that you want to work with.
+
+### [How should I handle checkpoints in PyTorch Lightning with DVCLive?](https://drive.google.com/file/d/1t0wPowk-PUinNjV4xchrzPZh7xsI8i37/view?usp=sharing)
+
+This is a really good question that came from one of our Office Hours talks!
+Thanks [Ilia Sirotkin](https://www.linkedin.com/in/sirily/)!
+
+We have an [open issue](https://github.com/iterative/dvclive/issues/170) we
+encourage you to follow for more details and to even contribute!
+
+Python Lightning handles checkpoints differently from other libraries. This
+affects the way metrics logging is executed and how models are saved.
+
+You can write a custom callback to control saving everything and track it with
+DVC and this is the workaround we suggest. You can implement the
+`after_save_checkpoint` method and save the model file.
+
+The way this works is by breaking your training process into small stages. You
+should specify the stage’s checkpoint as the output of the stage and set it as a
+dependency for the next stage. That way if something breaks, the `dvc repro`
+command will resume your experiment from the last stage.
+
+Your pipeline might look something like this:
+
+```yaml
+stages:
+  stage_0:
+    cmd: python train.py
+    outs:
+      - checkpoints/checkpoint_epoch=0.ckpt
+  next:
+    foreach:
+      - prev: 0
+        next: 1
+      - prev: 1
+        next: 2
+    do:
+      cmd: python train.py --checkpoint ${item.prev}
+      deps:
+        - checkpoints/checkpoint_epoch=${item.prev}.ckpt
+      outs:
+        - checkpoints/checkpoint_epoch=${item.next}.ckpt
+```
+
+Then you'll need to reuse the `ModelCheckpoint` that is included in
+`pytorch_lightning` to capture the checkpoints. Here's a snippet of what that
+could look like in your training script:
+
+```python
+# set checkpoint path
+ckpt_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "checkpoints"))
+
+# checkpoints will be saved to checkpoints/checkpoint_epoch={epoch_number}.ckpt
+cp = pl.callbacks.model_checkpoint.ModelCheckpoint(
+    monitor="train_loss_epoch",
+    save_top_k=1,
+    dirpath=ckpt_path,
+    filename='checkpoint_{epoch}')
+```
+
+### [Is there a feature for DVC to only sample and cache a subset of the tracked dataset, e.g. 10000 lines of a large file?](https://discord.com/channels/485586884165107732/485596304961962003/917778575845900340)
+
+Really great question @Abdi!
+
+You should be able to use the streaming capability of the DVC API to achieve
+this goal.
+
+Here is an example of a Python script that would do this:
+
+```python
+from dvc.api import open as dvcopen
+
+with dvcopen('data',f'{repo_url}') as fd:
+    for line in fd:
+        print(line)
+```
+
+---
+
+![Done Tyler The Creator GIF](https://media.giphy.com/media/h5Ct5uxV5RfwY/giphy.gif)
+
+At our January Office Hours Meetup we will be looking at machine learning
+workflows and Neovim-DVC plugin!
+[RSVP for the Meetup here](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/282663146/)
+to stay up to date with specifics as we get closer to the event!
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to get all your DVC and
+CML questions answered!
diff --git a/content/blogs/2022-01-31-january-22-community-gems.md b/content/blogs/2022-01-31-january-22-community-gems.md
new file mode 100644
index 0000000000..8fb51afe0d
--- /dev/null
+++ b/content/blogs/2022-01-31-january-22-community-gems.md
@@ -0,0 +1,196 @@
+---
+title: January '22 Community Gems
+date: 2022-01-31
+description: >
+  A roundup of technical Q&A's from the DVC and CML communities. This month: DVC
+  Studio data, DVC for non-ML projects, getting started with CML, and more.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC and CML communities. This month: DVC
+  Studio data, DVC for non-ML projects, getting started with CML, and more.
+picture: 2022-01-31/jan-community-gems.png
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/january-22-community-gems/1020
+tags:
+  - DataChain Studio
+  - CML
+  - DVC Remotes
+  - Pipelines
+  - Community Gems
+---
+
+### [Is it possible to stream objects to and from remote caches?](https://discord.com/channels/485586884165107732/563406153334128681/919567459189682177)
+
+Thanks for asking @mihaj!
+
+You can stream files using the [DVC API](https://dvc.org/doc/api-reference).
+There are two methods that you'll likely want to check out. First there's
+`dvc.api.open()`. This opens a file tracked by DVC and generates a corresponding
+file object. Here's a quick example:
+
+```python
+import dvc.api
+
+with dvc.api.open(
+        'get-started/data.xml',
+        repo='https://github.com/iterative/dataset-registry'
+        ) as fd:
+        # do things with the file object here
+```
+
+The simplest way to return the contents from a DVC tracked file would be to use
+`dvc.api.read()`. The returned content can be a bytearray or string. Here's a
+little example of this being used:
+
+```python
+import pickle
+import dvc.api
+
+model = pickle.loads(
+    dvc.api.read(
+        'model.pkl',
+        repo='https://github.com/iterative/example-get-started'
+        mode='rb'
+        )
+    )
+```
+
+### [One of the steps in my DVC pipeline uses a `pip` installed package. What is the best way to make sure that DVC re-runs the steps that depend on that package?](https://discord.com/channels/485586884165107732/563406153334128681/920139825284280381)
+
+Thanks for the question @alphaomega!
+
+The best way to handle any package dependencies is to include a
+`requirements.txt` file with the specific versions your pipeline needs.
+
+Another approach you can take is having a stage that dumps the package version
+as an intermediate output. It doesn't have to be saved in Git or DVC because
+it's easily reproduced and DVC should be able to take care of detecting that the
+package didn't change. Here's an example of a stage that does this.
+
+```yaml
+stages:
+  package_version:
+    cmd: pip freeze | grep "package_name==" > package_name_version.txt
+    outs:
+      - package_name_version.txt
+  train:
+    cmd: python train.py
+    deps:
+      - package_name_version.txt
+```
+
+### [Does DVC save dependencies which are in the `dvc.yaml` pipeline to the cache?](https://discord.com/channels/485586884165107732/563406153334128681/920659549835370497)
+
+Thanks for another great question @rie!
+
+DVC doesn't track the pipeline dependencies in the cache or storage, only the
+outputs. If you want DVC to track a pure data dependency that's not an output of
+a different stage, you need to track it with `dvc add ...`
+
+The output of a pipeline might be something like `data.dvc`, while a pure
+dependency might be a file that's just a part of the project, like `script.py`.
+That's why you'll need to use the `dvc add` command to track this.
+
+### [What is the difference between Kubeflow pipelines and DVC pipelines?](https://discord.com/channels/485586884165107732/563406153334128681/922728960478035978)
+
+This is a fantastic question! Thanks for asking @ramakrishnamamidi!
+
+A major difference is that DVC focuses primarily on ML _development_ and adding
+lightweight functionality on top of existing projects, which may be reusable in
+deployment in some cases.
+
+Kubeflow focuses on _deployment_ and building on top of Kubernetes, which could
+be used during development but requires more up-front effort.
+
+### [Could DVC be a good alternative to LFS for game development?](https://discord.com/channels/485586884165107732/485586884165107734/928336349487067196)
+
+Thanks for such an interesting question @CB!
+
+Yes! We have community members that use DVC to handle their large files in game
+development.
+
+There are several other use cases we've seen for DVC outside of machine learning
+and data science. Some people have used DVC to track build artifacts for
+deployment systems and to track performance data alongside design iterations and
+simulation tools.
+
+You should check out our
+[#beyond-ml](https://discord.com/channels/485586884165107732/918159153824952320)
+Discord channel to stay up to date with the other use cases the community is
+coming p with!
+
+### [Does DVC run on JSON/YAML configuration files for all things?](https://discord.com/channels/485586884165107732/563406153334128681/928779586622332938)
+
+This is a great question about large projects with a lot of dependencies from
+@SolemnSimulacrum!
+
+All of the dependencies you list in `dvc run` are in fact configured in the
+`dvc.yaml` file. `dvc run` is a convenience for adding a pipeline stage to this
+file and then doing `dvc repro` on that stage. It's completely acceptable and
+even encouraged to directly edit `dvc.yaml` if that's easier.
+
+For example, if you are currently executing a command like this:
+
+```dvc
+$ dvc run -n prune \
+        -o model.pt \
+        -d ./DepFiles_0/ \
+        -d ./DepFiles_1/ \
+        -d ./DepFiles_2/ \
+        -d ./src/.py \
+        -d ./packages/.py \
+        -d ./scripts/.py \
+        -d ./data/.npy \
+        python script.py
+```
+
+You could add those directly to the `dvc.yaml` like this:
+
+```yaml
+stages:
+  prune:
+    cmd: python script.py
+    deps:
+      - ./DepFiles_0/
+      - ./DepFiles_1/
+      - ./DepFiles_2/
+      - ./src/.py
+      - ./packages/.py
+      - ./scripts/.py
+      - ./data/.npy
+    outs:
+      - model.pt
+```
+
+### [I'm setting up MLOps at my company from scratch and we use GitLab and Cloudera DS workbench. What are the best resources to get started with CML?](https://discord.com/channels/485586884165107732/728693131557732403/923785806848614461)
+
+This is a great question from @dvc!
+
+We recommend you start with the [CML docs website](https://cml.dev/).
+
+You can find some tutorials on [our blog](https://dvc.org/blog).
+
+Or you can check out the videos on
+[our YouTube channel](https://www.youtube.com/channel/UC37rp97Go-xIX3aNFVHhXfQ)
+
+And of course, you can always ask questions in the Discord community!
+
+### [I understand that DVC Studio is a discoverability layer over my DVC repo in GitHub. Will any of my data be stored on your servers?](https://discord.com/channels/485586884165107732/841856466897469441/923714473603256420)
+
+This is a great question about DVC Studio from @johnnyaug!
+
+DVC Studio only stores metrics, plots, and metadata about your pipelines in the
+databases to be able to serve this as a table. We don't read actual data and we
+don't store code.
+
+An important thing to note is that if you have plots from `dvc plots show` that
+are images, JSON files, or vega specs, those could be saved on our end as well
+to serve them to UI.
+
+We're working on documentation for this as well!
+
+---
+
+![The Lord Of The Rings GIF](https://media.giphy.com/media/zCME2Cd20Czvy/giphy.gif)
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to get all your DVC and
+CML questions answered!
diff --git a/content/blogs/2022-02-28-february-22-community-gems.md b/content/blogs/2022-02-28-february-22-community-gems.md
new file mode 100644
index 0000000000..1411e2fdd2
--- /dev/null
+++ b/content/blogs/2022-02-28-february-22-community-gems.md
@@ -0,0 +1,155 @@
+---
+title: February '22 Community Gems
+date: 2022-02-28
+description: >
+  A roundup of technical Q&A's from the DVC and CML community. This month:
+  comparing experiments, working with data, working with pipelines, and more.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC and CML community. This month:
+  comparing experiments, working with data, working with pipelines, and more.
+picture: 2022-02-28/feb-comm-gems.png
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/february-22-community-gems/1078
+tags:
+  - Data Versioning
+  - DVC Remotes
+  - DVC API
+  - DVC Stages
+  - Community Gems
+---
+
+### [How can I delete DVC-tracked files from cloud storage?](https://discord.com/channels/485586884165107732/563406153334128681/927618225989111880)
+
+Thanks for the question @fireballpoint1!
+
+You can find the best way to delete files from your cloud storage in
+[our docs](https://dvc.org/doc/command-reference/gc#removing-data-in-remote-storage).
+Make sure you're super careful when deleting data from the cloud because it's an
+irreversible action. Here's an example of a deletion command that will clear out
+everything in your cloud storage _except_ what is referenced in your workspace.:
+
+```dvc
+$ dvc gc --workspace --cloud
+```
+
+This option only keeps the files and directories referenced in the workspace and
+it removes everything else, including data in the cloud and cache. By default,
+this command will use the default remote you have set. You can specify a
+different remote storage with the `--remote` option like this:
+
+```dvc
+$ dvc gc --workspace --cloud --remote name_of_remote
+```
+
+### [I'm using DVC experiments, but the Git index gets corrupted with large (4GB) files. What is the best workaround?](https://discord.com/channels/485586884165107732/563406153334128681/928939232033140736)
+
+Great question from @charles.melby-thompson!
+
+Experiment files may be tracked by Git or DVC. For large files, we generally
+recommend tracking them with DVC, in which case file size shouldn't be an issue.
+
+By default, experiments will track all other files with Git. However, Git will
+fail with too much data. If there are files you don't want to track at all (such
+as large temporary/intermediate files), you can add them to your .gitignore
+file.
+
+Check out
+[this open issue with experiments](https://github.com/iterative/dvc/issues/6181)
+for more details and to provide feedback.
+
+### [Is there an easy way to visualize DVC experiment results without using the command line?](https://discord.com/channels/485586884165107732/485596304961962003/930150143259459644)
+
+Good question @LucZ[Mad]!
+
+If you bring those experiments into your regular Git workflow, e.g. using
+`dvc exp branch` to create a branch for any experiment you want to share, you
+could use [DVC Studio](https://studio.datachain.ai/) to visualize them.
+
+We're working on support for viewing any pushed experiments in Studio right now
+so if there's anything you want to see, make sure to comment on and follow
+[this issue](https://github.com/iterative/studio-support/issues/45).
+
+### [Can CML self-hosted runners stop the instance after the idle timeout instead of terminating?](https://discord.com/channels/485586884165107732/728693131557732403/933674203796873226)
+
+This is another fantastic question from @jotsif!
+
+No, we deliberately terminate the instance to avoid unexpected costs. Stopped
+but unterminated instances
+[can still cost the same as running ones](https://aws.amazon.com/premiumsupport/knowledge-center/ec2-billing-terminated/).
+It's best to let the CML runner terminate and create new instances, running
+`dvc pull` to restore your data each time.
+
+However, if you're trying to preserve data (e.g. cache dependencies to speed up
+experimentation time) on an AWS EC2 instance, you could
+[connect persistent AWS S3 remote storage](https://aws.amazon.com/premiumsupport/knowledge-center/s3-transfer-data-bucket-instance/).
+
+### [What's the difference between DVC Studio free and enterprise versions?](https://discord.com/channels/485586884165107732/841856466897469441/933324508570472497)
+
+Thanks for asking @Abdi!
+
+You can find more info about the different
+[DVC Studio tiers here](https://studio.datachain.ai/#pricing).
+
+The _Free_ tier has all the features most individual users need, like connecting
+to ML repositories, creating views, submitting experiments, and generating
+plots. The _Teams_ tier allows you to create large teams for better
+collaboration and sharing of views and settings with everyone. The _Enterprise_
+tier is more for needs around compliance, dedicated support, and on-premise
+installation.
+
+If you are trying to decide which plan to select, please email us at
+`support@iterative.ai` and we'll help you figure it out based on your needs.
+
+### [How can I use one `dvc.yaml` file with multiple pipeline folders with different `params.yaml` files?](https://discord.com/channels/485586884165107732/485596304961962003/939099847288578079)
+
+@louisv, thanks for this question!
+
+It seems like you're looking for the parametrization functionality. You can
+learn more about how it works
+[in this doc](https://dvc.org/doc/user-guide/project-structure/pipelines-files#templating),
+but here's a an example of what that might look like in the `dvc.yaml`.
+
+```yaml
+stages:
+  cleanups:
+    foreach: # List of simple values
+      - raw1
+      - labels1
+      - raw2
+    do:
+      cmd: clean.py "${item}"
+      outs:
+        - ${item}.cln
+```
+
+### [Is it possible to change the x-label in DVC Studio?](https://discord.com/channels/485586884165107732/841856466897469441/938857004187943003)
+
+A great question about Studio from @PythonF!
+
+You can set custom properties for your plot in your `dvc.yaml` like this:
+
+```yaml
+plots:
+  - plots_no_cache.csv:
+      cache: false
+      x: r
+```
+
+You can also use `dvc plots modify` to change the x-label or y-label for your
+plots using commands similar to the following.
+
+```dvc
+$ dvc plots modify plots_no_cache.csv -x r -y q
+```
+
+---
+
+![Done Tyler The Creator GIF](https://media.giphy.com/media/h5Ct5uxV5RfwY/giphy.gif)
+
+At our March Office Hours Meetup we will be about how you can create, run, and
+benchmark DVC pipelines with [ZnTrack](https://github.com/zincware/ZnTrack)!
+[RSVP for the Meetup here](https://www.meetup.com/Machine-Learning-Engineer-Community-Virtual-Meetups/events/283998696/)
+to stay up to date with specifics as we get closer to the event!
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to get all your DVC and
+CML questions answered!
diff --git a/content/blogs/2022-03-17-march-22-heartbeat.md b/content/blogs/2022-03-17-march-22-heartbeat.md
new file mode 100644
index 0000000000..9597705397
--- /dev/null
+++ b/content/blogs/2022-03-17-march-22-heartbeat.md
@@ -0,0 +1,355 @@
+---
+title: March '22 Heartbeat
+date: 2022-03-17
+description: >
+  Monthly updates are here! You will find a special note in regards to the war
+  in Ukraine, the usual great tutorials and workflows from the
+  Community,  online course updates, MLOps maturity models, new docs and more!
+  Welcome to Spring!
+
+descriptionLong: |
+  This month you will find:
+
+    🇺🇦 A special note on the war in Ukraine,
+
+    🧘🏻‍♀️ MLOps is a mess, but that's ok,
+    
+    🥰 Tutorials and workflows from the Community,
+
+    🗣 Upcoming Events,
+
+    🔺 MLOps Maturity Models,
+
+    💻 Online Course(s) updates,
+
+    📖 New doc,
+
+    🚀 Info on our growing team, and more!
+picture: 2022-03-17/heartbeat-march.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/march-22-heartbeat/1117
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - DataChain Studio
+  - Twine.net
+  - Git
+---
+
+# On the war in Ukraine 🇺🇦
+
+While the war in Ukraine has impacted the world, it has also greatly impacted
+our company as we have team members living in Ukraine and Russia, and many with
+family ties to both. Our hearts are with our Iterative family in Ukraine and we
+are committed to doing everything we can to support the safety of our Ukrainian,
+as well as the transition of our Russian colleagues during this crisis.
+
+We as a company are against this war. We have donated to the humanitarian
+efforts to help the people of Ukraine and are matching our team members'
+donations as well. We are proud of the perseverance, care, and support coming
+from our team at this time.
+
+If you are able, we ask that you consider these resources as ways to help. Our
+hope is that the world will find a quick and peaceful end to this war and
+Ukraine will be restored, even stronger than before. 🇺🇦
+
+## 🪙 Donations
+
+- [A list of charities with direct connections to Ukrainian people endorsed](https://www.reddit.com/r/ukraine/comments/s6g5un/want_to_support_ukraine_heres_a_list_of_charities/)
+  by the [Kyiv Independent](https://kyivindependent.com/). Everything on this
+  list except for the "Charities that help the war effort” section is for
+  humanitarian efforts only.
+- [Humanitarian Assistance to Ukrainians by National Bank of Ukraine](https://bank.gov.ua/en/news/all/natsionalniy-bank-vidkriv-rahunok-dlya-gumanitarnoyi-dopomogi-ukrayintsyam-postrajdalim-vid-rosiyskoyi-agresiyi)
+- [UNICEF USA](https://www.unicefusa.org/?form=ukraine-emergency-match) (2x
+  additional match)
+- [UNICEF](https://www.unicef.org.uk/donate/donate-now-to-protect-children-in-ukraine/)
+  UK
+- [UNHCR](https://donate.unrefugees.org.uk/general/~my-donation?_cv=1)
+- [RedCross Ukraine](https://donate.redcrossredcrescent.org/ua/donate/~my-donation?_cv=1)
+  (there are some concerns about this org - see
+  [one](https://twitter.com/ptico/status/1502192685364531204),
+  [two](https://twitter.com/KyivIndependent/status/1501136976447168512))
+- [RedCross UK](https://donate.redcross.org.uk/appeal/ukraine-crisis-appeal)
+- [International Medical Corps](https://give.internationalmedicalcorps.org/page/99837/donate/1)
+- [WFP](https://www.wfp.org/support-us/stories/ukraine-appeal)
+- [UKRAINECHARITY](https://www.ukrainecharity.org/war-crisis-692518.html)
+- [NOVA UKRAINE](https://novaukraine.org/)
+- [GOFUNDME / Support Ukrainian Refugees Arriving In Poland](https://www.gofundme.com/f/support-ukrainian-refugees-arriving-in-poland)
+- [Doctors Without Borders](https://www.doctorswithoutborders.org/what-we-do/countries/ukraine)
+- [Save the Children](https://support.savethechildren.org/site/Donation2?df_id=5751&mfc_pref=T&5751.donation=form1)
+- [ICRC](https://www.icrc.org/en/donate/ukraine)
+- [Project Hope](https://secure.projecthope.org/site/SPageNavigator/2022_02_Ukraine_Response_Web_UNR.html&s_subsrc=oth)
+- [Flexport](https://www.flexport.org/donate-now)
+
+## ❤️‍🩹 Other ways to help
+
+- [I Can Help (hosting)](https://icanhelp.host/)
+- [Airbnb - host a refugee](https://www.airbnb.org/help-ukraine)
+
+---
+
+# AI/ML News
+
+![Excited Marie Kondo GIF](https://media.giphy.com/media/5YiRHZtcSeiEyOpSV7/giphy.gif)
+
+## Mihail Eric: MLOps is a Mess But That's to be Expected
+
+[**Mihail Eric**](https://twitter.com/mihail_eric) writes a long, but _really
+worth it_ piece entitled
+[MLOps is a Mess But That’s to be Expected.](https://www.mihaileric.com/posts/mlops-is-a-mess/)
+In it he discusses the allure of seeking a machine learning career, only run
+smack into the giant wall of learning that encompasses the space, not the least
+of which is the multitude of tools to pick through once you get there. The state
+of machine learning is reviewed and some history of DevOps for perspective on
+MLOps is added.  
+You will find advice for newcomers and some final, thorough, thoughts and
+predictions especially as they relate to “ML at a reasonable scale” companies.  
+Definitely worth your review!
+
+![Gartner Hype cycle for MLOps](../uploads/images/2022-03-17/hype-cycle-mihail-eric.png '=800')
+_Gartner Hype cycle for MLOps
+([Source link](https://www.mihaileric.com/posts/mlops-is-a-mess/))_
+
+# Community News
+
+## Kevin Lu: Learn how to use Data Version Control to remove the third wheel from your relationship
+
+![Learn how to use Data Version Control to remove the third wheel from your relationship](../uploads/images/2022-03-17/kevin.png 'Learn how to use Data Version Control to remove the third wheel from your relationships :wrap-right ==300')
+In
+[this hilarious post,](https://medium.com/@kevinylu/learn-how-to-use-data-version-control-to-remove-the-third-wheel-from-your-relationship-ce4c2afa649c)
+[**Kevin Lu**](https://medium.com/@kevinylu) teaches us how to use DVC to enable
+us to disconnect from our unhealthy addictive relationships with our computers
+and make room for more human relationships! You don't want to miss the humor,
+productivity and wisdom here, all while helping you understand how each of DVC's
+commands help your machine learning engineering exploits.
+
+## Thanakorn Panyapiang: Putting A Machine Learning model into production with Google Cloud Platform and DVC
+
+Are you a data scientist new to putting models into production?  
+[In this piece](https://towardsdatascience.com/putting-machine-learning-model-into-production-with-google-cloud-platform-and-dvc-f6a22cdcf4a5) [**Thanakorn Panyapiang**](https://www.linkedin.com/in/tpanyapiang/)
+describes various model deployment strategies to put projects into production
+including model-as-service, batch prediction and model-on-edge. In his example
+he uses a batch prediction approach with an image segmentation model to identify
+clouds. He uses DVC as a model registry with Google Cloud storage and GitHub
+actions to automate the Cloud Functions deployment. See all the steps he
+outlines in his piece to get real value out of your machine learning projects.
+
+![Data Pipeline](../uploads/images/2022-03-17/panyapiang.jpeg '=800') _Data
+Pipeline
+([Source link: Author](https://towardsdatascience.com/putting-machine-learning-model-into-production-with-google-cloud-platform-and-dvc-f6a22cdcf4a5))_
+
+## Matthew Upson: MLOps for Conversational AI with Rasa, DVC, and CML (PartII)
+
+In the [December Heartbeat,](https://dvc.org/blog/december-21-heartbeat) I told
+you about [**Matt Upson's**](https://twitter.com/m_a_upson) first post in his
+series on using DVC, CML and Rasa together.
+[In this second post](https://medium.com/mantisnlp/mlops-for-conversational-ai-with-rasa-dvc-and-cml-part-ii-3a70fe2f357d)
+he goes through some Rasa basics and gets the DVC pipeline setup, with its train
+and test stages, params, dependencies, outs and metrics. He also covers syncing
+with DVC, making changes, the `dvc repro` command, the `.dvc-lock` file, and
+pushing to remote storage. We're looking forward to the next installment when we
+will see how CML can be used to automatically train the model.
+
+![Rasa DVC metrics diff](../uploads/images/2022-03-17/upson.png '=800') _DVC
+metrics diff in Rasa project
+([Source link](https://medium.com/mantisnlp/mlops-for-conversational-ai-with-rasa-dvc-and-cml-part-ii-3a70fe2f357d))_
+
+## Sibanjan Das: MLOps for Enterprise AI
+
+[**Sibanjan Das**](https://www.linkedin.com/in/sibanjan/) notes the trending of
+the MLOps keyword in
+[his piece](https://dzone.com/articles/mlops-for-enterprise-ai) in
+[DZone.](https://dzone.com) Sibanjan gives an overview of MLOps and how it
+supports the AI/ML ecosystem to deliver return on investment for ML projects. He
+reviews the components of MLOps, including automated ML model building
+pipelines, model serving, model version control, model/data monitoring, and
+security and governance. He also discusses the MLOps maturity models of Google
+and Microsoft (see below). I found this part especially interesting as it
+mirrors what we see in our Community and how they develop using our tools as
+well. Finally, he outlines some tools that help in the process, including DVC.
+
+![Comparing Google's and Microsoft's maturity models](../uploads/images/2022-03-17/das.jpeg '=800')
+_Comparing Google's and Microsoft's maturity models
+([Source link](https://dzone.com/articles/mlops-for-enterprise-ai))_
+
+## Jagreet Kaur: Implementing DevOps for Machine Learning - A Quick Guide
+
+![Tensorflow, PyTorch, DVC, Docker, CI/CD](../uploads/images/2022-03-17/jagreet-kaur.png 'Continuous Development Life Cycle Guide from Xenostack :wrap-left ==300')
+[**Jagreet Kaur**](https://www.linkedin.com/in/jagreetkaur/) of
+[Xenonstack](https://www.xenonstack.com/) authors
+[a guide](https://www.xenonstack.com/blog/devops-for-machine-learning) on
+applying DevOps to machine learning and generally what the continuous
+development life cycle is as it relates to machine learning projects. Jagreet
+goes over all the fun continuous topics including, continuous integration,
+continuous testing, continuous retraining, and continuous deployment. She gives
+an overview of the use of Tensor Flow, PyTorch, and Docker, as well as DVC for
+version control, experiment management deployment, and collaboration. Additional
+resources from Xenonstack are provided for further review.
+
+### Yuqi Li: Why MLOps should be Open Source
+
+![Why MLOps Tools should be Open Source](../uploads/images/2022-03-17/yuqi-li.jpeg 'Why MLOps Tools should be Open Source :wrap-right ==300')
+[**Yuqi Li**](https://www.linkedin.com/in/yuqiliofficial/)
+[in this opinion piece,](https://towardsdatascience.com/why-mlops-tools-should-be-open-source-5ad696463f54)
+in [Towards Data Science.](https://towardsdatascience.com/) overviews the
+meaning and components of MLOps and identifies a number of good open-source
+tools in the space which of course includes DVC. He also outlines a number of
+reasons why MLOps should be open source. Among the reasons making the cut:
+
+1. Cost-Effectiveness
+2. Ownership
+3. No privacy concern
+4. Build Community around the tool Examine these reasons to determine if open
+   source makes sense for your MLOps work. We think you will.
+
+## And speaking of Community…
+
+## Mert Bozkir: Community-Driven Learning
+
+If you’ve been in our Discord server, been to one of our Meetups, or interacted
+with us on Twitter, you’ve surely come across DVC Community All-Star
+[**Mert Bozkir**](https://github.com/mertbozkir). Mert has written
+[a great piece](https://medium.com/@mertbozkir/community-driven-learning-2481103aa190)
+Entitled _Community Driven Learning_ and describes how it is the best way to
+learn. He outlines his reasoning for this including the support, encouragement,
+and motivation you can get from the Community to be persistent in your learning
+efforts. He also includes eight communities that are great for learning, with
+invites included. Be sure to check it out!
+
+![Community Driven Learning](../uploads/images/2022-03-17/community.jpeg '=800')
+_Community Driven Learning
+([Source link: Unsplash by john_cameron](https://unsplash.com/@john_cameron))_
+
+## And speaking of learning…
+
+# Company News
+
+![GIF by Star Wars](https://media.giphy.com/media/3ohuAxV0DfcLTxVh6w/giphy.gif)
+
+## Online Course(s) Updates
+
+- We now have over **250** students taking the course and **10** students that
+  have completed the course! 🎉 Thank you to all who have given us feedback. We
+  are actively working on making adjustments to the course and improving the
+  next one.
+
+- We have a new look! The website for our online course, Iterative Tools for
+  Data Scientists and Analysts has been updated to be more streamlined to more
+  clearly identify what our students need in the course!
+
+- We have already begun working on the second course which will be more advanced
+  (remember those maturity models outlined in the article from DZone above?) and
+  will cover scenarios with CML. We are also working on creating an ebook for
+  each video that will provide relevant information, diagrams, and links with
+  the video content instead of being batched at the end of the module. The ebook
+  format will also let you take your own notes as you study!
+
+## New Hires
+
+![My Team GIF by The Voice](https://media.giphy.com/media/lQ0LC603dA96Gs2Hfx/giphy.gif)
+
+[**Mike Moynihan**](https://www.linkedin.com/in/michael-moynihan/) joins us from
+Brooklyn, NY as an Account Executive. He previously worked at Code Climate as
+the Manager of Business Development and an Account Executive. Mike's really into
+biking and will be participating in the 5-Boro Bike Tour in NYC this year. He's
+also a baker and has been baking bread and other baked goods consistently for
+about 3 years now. Finally, when not working or biking or baking, you may find
+him playing one of the video or board games in his 500-strong collection.
+
+[**Rob De Wit**](https://www.linkedin.com/in/rcdewit/) joins our team from
+Utrecht, the Netherlands as a Developer Advocate. Rob's first focus will be on
+developing those new ebooks for our new online courses mentioned above. He has a
+background in Information Sciences and previously worked at bol.com and
+Devoteam. When not working, Rob likes photo and video editing, board games,
+organizing meetups, and hiking (the Peaks of the Balkans are on his bucket
+list).  
+He also stays busy by learning Spanish and dabbling in local politics.
+
+## Upcoming Events
+
+### March Office Hours!
+
+Be sure to join us at the
+[March Office Hours Meetup,](https://www.meetup.com/Machine-Learning-Engineer-Community-Virtual-Meetups/events/283998696/)
+where [**Fabian Zills**,](https://github.com/PythonFZ/) PhD student at
+[University of Stuttgart,](https://www.uni-stuttgart.de/en/) will present his
+ZnTrack ("zinc track") project which creates, runs and benchmarks DVC pipelines
+in Python and Jupyter Notebooks.  
+[Find the repo here!](https://github.com/zincware/ZnTrack)
+
+<external-link
+href="https://www.meetup.com/Machine-Learning-Engineer-Community-Virtual-Meetups/events/283998696/"
+title="March Office Hours - ZnTrack"
+description="RSVP for DVC Office Hours - ZnTrack - Create, Visualize, Run and Benchmark DVC Pipelines in Python & Jupyter Notebooks "
+link="https://meetup.com"
+image="../uploads/images/2022-03-17/office-hours-meetup.png"/>
+
+## Conferences/Hackathons
+
+- We will be sponsoring [ODSC East](https://odsc.com/boston/) and
+  [MLOps World](https://mlopsworld.com/) this year, so if you are attending,
+  we'd love to meet you IRL! Stop by our booth!
+- [**Milecia McGregor**](https://twitter.com/FlippedCoding) will be speaking at
+  [PythonWeb Conference](https://2022.pythonwebconf.com/) March 22nd on "Using
+  Reproducible Experiments to Creat Better Machine Learning Models."
+- [**David de la Iglesia Castro**](https://github.com/daavoo) will be presenting
+  his workshop "Making MLOps Uncool Again" at
+  [MLOps World New York](https://mlopsworld.com/newyork/) on March 29th and at
+  [PyCon Berlin](https://2022.pycon.de/) April 11th.
+- Community member [**Gift Ojeabulu**](https://twitter.com/GiftOjeabulu_) will
+  be giving a talk on "MLops Exploration with Git and DVC for Machine Learning
+  Project" at [Open Source Festival 2022](https://festival.oscafrica.org/) March
+  24-26.
+- [BatteryDev Hackathon](https://www.battery.dev/) will take place next week and
+  [**Milecia McGregor**](https://twitter.com/FlippedCoding) will hold an Office
+  Hours for those needing help with DVC on March 21st
+- [**Antoine Toubhans**](https://twitter.com/AntoineToubhans) will be presenting
+  his DVC integration with Streamlit at [PyCon Berlin](https://2022.pycon.de/)
+  as well.
+
+## 📖 New Docs
+
+### CML CI
+
+CML has a new command line reference that lets you prepare the Git repository
+for CML operations. For more info on `cml ci`,
+[check out the docs](https://cml.dev/doc/ref/ci#command-reference-ci)
+
+## Open Positions
+
+Even with our amazing new additions to the team, we're still hiring!
+[Use this link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22)
+to find details of all the positions and share with anyone you think may be
+interested! 🚀
+
+![Iterative.ai is Hiring](../uploads/images/2022-01-18/hiring.jpeg '=800')
+_Iterative is Hiring
+([Source link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22))_
+
+## Tweet Love ❤️
+
+We were really excited to the the [Sicara](https://www.sicara.ai/) team all
+decked out in their DVC swag this month in this Tweet. If you haven't seen the
+video of [Antoine Toubhans](https://twitter.com/AntoineToubhans) integration
+with Streamlit, you can
+[see it on our YouTube channel](https://www.youtube.com/watch?v=F318uN01v7M&t=2s)
+or catch the presentation at this year's [PyCon Berlin.](https://2022.pycon.de/)
+
+https://twitter.com/AntoineToubhans/status/1497254983963660292
+
+How do you get some DVC swag you ask? Write us some great content, contribute to
+our tools, give a presentation at one of our Meetups! We'd love to have you!
+
+---
+
+_Have something great to say about our tools? We'd love to hear it! Head to
+[this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+to record or write a Testimonial! Join our
+[Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2022-03-30-march-22-community-gems.md b/content/blogs/2022-03-30-march-22-community-gems.md
new file mode 100644
index 0000000000..fb7fbeec16
--- /dev/null
+++ b/content/blogs/2022-03-30-march-22-community-gems.md
@@ -0,0 +1,100 @@
+---
+title: March '22 Community Gems
+date: 2022-03-30
+description: >
+  A roundup of technical Q&A's from the DVC and CML community. This month: CML
+  updates, working with multiple datasets, using DVC stages, and more.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC and CML community. This month: CML
+  updates, working with multiple datasets, using DVC stages, and more.
+picture: 2022-03-30/mar-comm-gems.png
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/march-22-community-gems/1119
+tags:
+  - CML
+  - DVC Stages
+  - DVC Remotes
+  - Community Gems
+---
+
+### [What is the difference between using `dvc exp run` and `dvc repro`?](https://discord.com/channels/485586884165107732/485596304961962003/939070512322195456)
+
+This is a really good question from @v2.03.99!
+
+When you use `dvc exp run`, DVC automatically tracks each experiment run. Using
+`dvc repro` leaves it to the user to track each experiment.
+
+You can learn how `dvc exp run` uses custom Git refs to track experiments in
+this [blog post](https://dvc.org/blog/experiment-refs) and you can see a quick
+technical overview in
+[the docs here](https://dvc.org/doc/user-guide/experiment-management/experiments-overview).
+
+### [What is a good way to debug DVC stages in VSCode?](https://discord.com/channels/485586884165107732/485596304961962003/939269709780643861)
+
+A great question here from @quarkquark!
+
+You can debug in VSCode by following the steps below:
+
+- Install the `debugpy` package.
+- Navigate to `"Run and Debug" > "Remote Attach" > localhost > someport`.
+- In a terminal in VSCode,
+  `python -m debugpy --listen someport --wait-for-client -m dvc mycommand`
+
+This should help you debug the stages in your pipeline in the IDE and you can
+find
+[more details here](https://github.com/iterative/dvc/wiki/Debugging-DVC-interactively).
+
+### [Is there a way to list what files (and ideally additional info like location, MD5, etc) are within a directory tracked by DVC?](https://discord.com/channels/485586884165107732/485596304961962003/940318136568258650)
+
+Thanks for asking @CarsonM!
+
+You should be able to use DVC to list the directory contents of your DVC remotes
+without pulling the repo. Here's an example of the command you can run:
+
+```dvc
+$ dvc list https://github.com/iterative/dataset-registry/ fashion-mnist/raw
+```
+
+### [If we have multiple datasets, is it recommended to have 1 remote per dataset or to have 1 remote and let DVC handle the paths?](https://discord.com/channels/485586884165107732/485596304961962003/943213340195434546)
+
+This is a really interesting question from @BrownZ!
+
+It really depends on your use case. Separated remotes might be useful if you
+want to have granular control over permissions for each dataset.
+
+In general, we would suggest a single remote and setting up a
+[data registry](https://dvc.org/doc/use-cases/data-registries) to handle the
+different datasets through DVC.
+
+### [Is there a mailing list for subscribing to CML releases?](https://discord.com/channels/485586884165107732/728693131557732403/939215540591927337)
+
+It's awesome community members like @pria want to keep up with our releases!
+
+You can follow all of our releases via GitHub notifications. You can browse
+release notes at <https://github.com/iterative/cml/releases>. You can also
+subscribe to release updates by clicking the `Watch` button in the top-right,
+navigating to `Custom`, and checking the `Releases` option.
+
+![the checkbox you need to check in GitHub to follow CML releases](../uploads/images/2022-03-30/cml-release-follow.png)
+
+### [Does `cml-send-comment` work for azure devops repositories?](https://discord.com/channels/485586884165107732/728693131557732403/947986936994353293)
+
+Thanks for the question @1cybersheep1!
+
+Currently, the supported Source Code Management tools are GitHub, GitLab, and
+Bitbucket. Other SCMs may be a part of the roadmap later on.
+
+### [If my model is training on a self-hosted, local runner, and I already have a shared DVC cache set up on the same machine, is there a good way for my GitHub workflow to access that cache instead of having to redownload it all from cloud storage?](https://discord.com/channels/485586884165107732/728693131557732403/951240652035883008)
+
+Excellent question from @luke_imm!
+
+In GitHub, you can mount volumes to your container, but you have to declare them
+within the
+[workflow YAML](https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#example-running-a-job-within-a-container)
+
+---
+
+![Season 3 Race GIF](https://media.giphy.com/media/3o6Mbnll2gudglC3HG/giphy.gif)
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to get all your DVC and
+CML questions answered!
diff --git a/content/blogs/2022-03-31-stale-models.md b/content/blogs/2022-03-31-stale-models.md
new file mode 100644
index 0000000000..1395be9040
--- /dev/null
+++ b/content/blogs/2022-03-31-stale-models.md
@@ -0,0 +1,235 @@
+---
+title: Preventing Stale Models in Production
+date: 2022-03-31
+description: >
+  We're going to look at how you can prevent stale models from remaining in
+  production when the data starts to differ from the training data.
+descriptionLong: >
+  Every model that gets deployed to production experiences some type of drift as
+  the data on production starts to differ from the data the model was trained
+  on. That's why we're going to look at how you can prevent stale models from
+  remaining in production.
+picture: 2022-03-31/stale-model-cover.png
+pictureComment: Preventing Stale Models in Production
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/preventing-stale-models-in-production/1137
+tags:
+  - MLOps
+  - DVC
+  - Git
+  - Experiments
+  - Collaboration
+  - Tutorial
+---
+
+<admon type='info'>
+
+This post hasn't been updated since its release and the repo is currently
+broken. Our team is in the process of updating it. Nonetheless, the concepts
+described still hold true and you should be able to follow along with minor
+changes.
+
+</admon>
+
+What happens when the model you've worked so hard to get to production becomes
+stale? Machine learning engineers and data scientists face this problem all the
+time. You usually have to figure out where the data drift started so you can
+determine what input data has changed. Then you need to retrain the model with
+this new dataset.
+
+Retraining could involve a number of experiments across multiple datasets, and
+it would be helpful to be able to keep track of all of them. In this tutorial,
+we'll walk through how using DVC can help you keep track of those experiments
+and how this will speed up the time it takes to get new models out to
+production, preventing stale ones from lingering too long.
+
+## Setting up the project
+
+We'll be working with a project from
+[Evidently.ai](https://evidentlyai.com/blog/tutorial-1-model-analytics-in-production)
+that demonstrates what it would be like to work with a production model that
+experiences data drift over time. We'll take this to the next level by adding
+some automation with a DVC pipeline and share the results with others using DVC
+Studio.
+
+So we'll start by cloning
+[this repo for the project](https://github.com/iterative/stale-model-example).
+This project is based on the one created by
+[evidently.ai](https://github.com/evidentlyai/evidently/blob/main/examples/data_stories/bicycle_demand_monitoring.ipynb)
+with some modifications to work with DVC and different datasets.
+
+The reason we're adding DVC and Studio to this project is to automate the way
+our model evaluation pipeline runs and to version our data as we get new data.
+We'll be able to share and review the results for each experiment run we do. One
+of the big problems in machine learning is collaboration, so making it easier to
+share models, data, and results can save your team a lot of time and
+frustration.
+
+## Set up data drift reports
+
+When the data in production starts to look different from the data that your
+model was trained, this is called data drift. There are a number of tools that
+help monitor for data drift like [evidently.ai](https://docs.evidentlyai.com/)
+or [Aporia](https://docs.aporia.com/).
+
+Since we're working with Evidently.ai, you can see target drift report when you
+run the notebook for the initial project they made. Here's what it looks like.
+
+![image of the report showing the target drift](https://thumb.tildacdn.com/tild6336-3231-4736-b136-646539326135/-/format/webp/4_week3_pred_actual.png)
+
+So we see at the end of Week 3 the model is in pretty bad shape. This is where
+we can bring in DVC to help us get this stale model off of production faster.
+
+## Running a training experiment to get production up to date
+
+We'll start by taking a year's worth of data and creating a new model. This
+might give us a more accurate model to push to production than using weekly
+data. So we'll take all the data from 2011 (because that's the dataset we have
+to work with) and make our training and testing datasets. Then we'll check this
+data into DVC, so it can version it with the following commands:
+
+```dvc
+$ dvc add data/train.pkl data/test.pkl
+$ git add data/.gitignore data/train.pkl.dvc data/test.pkl.dvc
+```
+
+We add the `.dvc` files to Git to ensure that we are only checking in the
+metadata for the datasets and not the entire dataset files. Now we can run the
+entire MLOps pipeline with
+[this command](https://dvc.org/doc/command-reference/exp/run):
+
+```dvc
+$ dvc exp run
+```
+
+This will execute the commands we've defined in `dvc.yaml` and it will give us
+the metrics to evaluate how good the model is. Let's take a look at the metrics
+so far with the following command:
+
+```dvc
+$ dvc exp show --no-timestamp
+```
+
+```dvctable
+┏━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
+┃ neutral:**Experiment**              ┃ metric:**avg_prec** ┃ metric:**roc_auc** ┃ param:**train.seed** ┃ param:**train.n_est** ┃ param:**train.min_split** ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
+│ workspace               │  0.70164 │ 0.51384 │ 20210428   │ 450         │ 64              │
+│ main                    │  0.60791 │ 0.45758 │ 20210428   │ 375         │ 64              │
+│ └── 801fdff [exp-a80c0] │  0.70164 │ 0.51384 │ 20210428   │ 450         │ 64              │
+└─────────────────────────┴──────────┴─────────┴────────────┴─────────────┴─────────────────┘
+```
+
+This model doesn't have the best metrics, so we can run more experiments to see
+if tuning hyperparameters will help before we deploy this model to production.
+Let's change the values of the `train.n_est` and `train.n_est` hyperparameters.
+We'll
+[run several experiments](https://dvc.org/doc/user-guide/experiment-management)
+with different values and it will produce a table similar to this:
+
+```dvctable
+┏━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
+┃ neutral:**Experiment**              ┃ metric:**avg_prec** ┃ metric:**roc_auc** ┃ param:**train.seed** ┃ param:**train.n_est** ┃ param:**train.min_split** ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
+│ workspace               │  0.43501 │ 0.79082 │ 20210428   │ 475         │ 28              │
+│ main                    │  0.60791 │ 0.45758 │ 20210428   │ 375         │ 64              │
+│ ├── 78d29aa [exp-f06bb] │  0.43501 │ 0.79082 │ 20210428   │ 475         │ 28              │
+│ ├── 8fb41cf [exp-1323d] │  0.42796 │ 0.80841 │ 20210428   │ 425         │ 28              │
+│ ├── 434a82f [exp-63459] │  0.36044 │ 0.87037 │ 20210428   │ 350         │ 28              │
+│ ├── 549586e [exp-ceb6d] │  0.61998 │  0.4306 │ 20210428   │ 350         │ 64              │
+│ ├── fbf8760 [exp-affe2] │  0.68824 │ 0.50067 │ 20210428   │ 425         │ 64              │
+│ ├── 732ab92 [exp-f8e8d] │  0.65138 │ 0.49431 │ 20210428   │ 500         │ 64              │
+│ └── 801fdff [exp-a80c0] │  0.70164 │ 0.51384 │ 20210428   │ 450         │ 64              │
+└─────────────────────────┴──────────┴─────────┴────────────┴─────────────┴─────────────────┘
+```
+
+We've run a few experiments with a different hyperparameter value each time and
+it looks like `exp-63459` is the best one out of them based on both average
+precision and the ROC-AUC value. So we'll apply this experiment to our workspace
+and choose this model as the one that will go to production. To apply the
+experiment, we'll run the following command:
+
+```dvc
+$ dvc exp apply exp-c85c3
+```
+
+This will update the workspace with the exact code, data, and hyperparameters
+that were used in that particular experiment. So we can commit these changes to
+Git and we'll have a reference to everything we need for this exact model. Now
+let's say we have deployed this to production and it's been a great model for
+almost another year, then we start noticing data drift again.
+
+## Running more training experiments with new data
+
+That means it's time to update our dataset with the latest data from production
+and that will include all the data on bike sharing in 2012 (because this is the
+newer data we have to train with). DVC will note the changes in the data and
+create a new version record for the updated data automatically.
+
+Next we'll run a new experiment in the project with the following command:
+
+```dvc
+$ dvc exp run
+```
+
+Then we can take a look at the metrics with the following command:
+
+```dvc
+$ dvc exp show
+```
+
+Since we cleared our workspace by pushing the changes to Git, we'll have a fresh
+table to look at. Now you should see a table similar to this:
+
+```dvctable
+┏━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
+┃ neutral:**Experiment**              ┃ metric:**avg_prec** ┃ metric:**roc_auc** ┃ param:**train.seed** ┃ param:**train.n_est** ┃ param:**train.min_split** ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
+│ workspace               │  0.42526 │ 0.82722 │ 20210428   │ 400         │ 28              │
+│ main                    │  0.69744 │ 0.63056 │ 20210428   │ 475         │ 32              │
+│ ├── e76a89d [exp-7d207] │  0.42526 │ 0.82722 │ 20210428   │ 400         │ 28              │
+│ ├── 2a6d647 [exp-7526d] │  0.74411 │ 0.65808 │ 20210428   │ 400         │ 32              │
+│ ├── 467fd3d [exp-dfabd] │  0.71431 │  0.6267 │ 20210428   │ 450         │ 32              │
+│ ├── 2a2171c [exp-45493] │  0.58291 │ 0.49201 │ 20210428   │ 350         │ 48              │
+│ └── 683dc49 [exp-2649a] │  0.58421 │  0.5783 │ 20210428   │ 475         │ 48              │
+└─────────────────────────┴──────────┴─────────┴────────────┴─────────────┴─────────────────┘
+```
+
+Having the updated dataset made a huge difference in the metrics, and it looks
+like this model has a different set of hyperparameters that perform well. Now
+that we have all of the experiments with both the old and new datasets, this is
+a good time to share the results with your coworkers and get some feedback.
+
+## Viewing experiment results in DVC Studio
+
+Because we already have DVC set up in this project, we can run as many
+experiments as we need to, and it will track which datasets we're working with,
+the code changes that we make, and it'll let us look at all the results from
+each experiment in Studio.
+
+If you go to [Iterative Studio](https://studio.datachain.ai/), you'll be
+prompted to connect to your GitHub/GitLab account and you'll be able to choose
+the repo for this project. Once you're connected, you should be able to see all
+the experiments you've pushed to your Git history.
+
+![example of plots and results in DVC Studio](../uploads/images/2022-03-31/stale_models_in_studio.png)
+
+You can give others on your team access to this, and they'll be able to run new
+experiments and see the results right in the browser. This is a great tool to
+use to discuss the next best steps in your model training before you're ready to
+deploy.
+
+## Deploy new model to production
+
+The output of our training stage is the file for the `model.pt`. Now all we need
+to do is get this to our production environment. That could be a web API that
+returns results in real-time, or you could do some kind of batch prediction.
+Regardless of how you deploy to production, you now have a model that's been
+updated to account for the previous data drift.
+
+## Conclusion
+
+Now you just have to keep an eye on this new model to make sure that it does
+stray too far from the results you expect. This is one of the processes you can
+use to keep your production models from going stale. You could even automate
+some parts of this process if you know what your thresholds are!
diff --git a/content/blogs/2022-04-15-april-22-heartbeat.md b/content/blogs/2022-04-15-april-22-heartbeat.md
new file mode 100644
index 0000000000..0c77e9447c
--- /dev/null
+++ b/content/blogs/2022-04-15-april-22-heartbeat.md
@@ -0,0 +1,341 @@
+---
+title: April '22 Heartbeat
+date: 2022-04-15
+description: >
+  Monthly updates are here! You will find the future of AI Infrastruture is
+  modular, articles on distribution drift and how to solve it, the usual great
+  tutorials and workflows from the Community,  online course updates, new docs
+  and more! Happy April!
+
+descriptionLong: |
+  This month you will find:
+
+    🧱 AI infrastructure is becoming modular,
+
+    🔎 Distribution drift with Chip Huyen
+
+    🥰 Tutorials and workflows from the Community,
+
+    🗣 IRL events,
+
+    💻 Online course(s) updates,
+
+    🚀 Quebec becoming an Iterative hub, and more!
+picture: 2022-04-15/april-heartbeat-cover.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/april-22-hearbeat/1158
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - Git
+  - Modular Infrastructure
+  - Chip Huyen
+  - Distribution drift
+  - AI Environments
+---
+
+<details>
+
+This month's Heartbeat image is inspired by Community member Gudmundur
+Heimisson. Gudmundur submitted some great PRs to update WebHDFS docs pending
+some other issues in the DVC repo.
+
+This image refelcts his Paris area team's view of Château de Vincennes out their
+company windows!
+
+We are grateful for all our Community members' contributions from all around the
+world!
+
+<summary>✨Image Inspo✨</summary>
+</details>
+
+Welcome to April! We have lots to ingest from the AI World and the Community so
+let's get started with all the building blocks for success!
+
+## AI News
+
+![Lego Rotate GIF by sheepfilms](https://media.giphy.com/media/l0JMrPWRQkTeg3jjO/giphy.gif)
+
+### The Future of AI Infrastructure is Becoming Modular: Why Best-of-Breed MLOps Solutions are Taking Off & Top Players to Watch
+
+[**Casber Wang**](https://twitter.com/CasberW) of
+[Sapphire VC](https://twitter.com/SapphireVC) recently wrote
+[a piece in Medium](https://medium.com/sapphire-ventures-perspectives/the-future-of-ai-infrastructure-is-becoming-modular-why-best-of-breed-mlops-solutions-are-taking-fd85c6ca8bcf)
+on the necessary trend of AI infrastructure tooling becoming modular. He notes
+three types of AI user types, "Off-the-shelfers," "Bet-the-Farmers," and "Rocket
+Scientists." As the industry matures he makes the case (and we concur) for the
+need for modular infrastructure tooling to provide AI teams with the most
+flexible approach as they fine-tune their advancing and ever-growing processes.
+
+> Where organizations used to seek all-in-one solutions to operationalize
+> machine learning (ML) due to limited in-house resources and expertise, we’re
+> seeing a rise in the demand for modular, best-in-class tooling that equips
+> today’s more robust ML teams with the ability to flexibly run highly-custom
+> and performant ML workloads.
+
+![Clayton Christensen's Modularity Theory](../uploads/images/2022-04-15/clayton-christensen.png '=800')
+_Clayton Christensen's Modularity Theory
+([Source link](https://medium.com/sapphire-ventures-perspectives/the-future-of-ai-infrastructure-is-becoming-modular-why-best-of-breed-mlops-solutions-are-taking-fd85c6ca8bcf))_
+
+> Soon, large data teams will turn to modular toolkits with dozens of solutions
+> that manage different stages of the AI lifecycle. This will be particularly
+> true of the “bet-the-farmers”, who will need customized, best-in-class tools
+> that provide the flexibility that can match their exact challenge.
+
+Wang describes the different toolchain groupings in the AI Lifecycle and
+discusses some of the players in each of them. DVC shows up in the Model
+Evaluation & Experiment Tracking group, but soon you will see that our tools
+deliver flexible, modular building blocks for some other pieces of the puzzle.
+
+## Data Distribution Shifts and Monitoring
+
+[**Chip Huyen's**](https://twitter.com/chipro)
+[most recent blog post](https://huyenchip.com/2022/02/07/data-distribution-shifts-and-monitoring.html)
+created for the course at Stanford
+[CS 329S: Machine Learning Systems Design](https://cs329s.stanford.edu/) goes
+into detail on all things related to data distribution shifts and the monitoring
+of them. The piece provides great examples to understand concepts such as
+natural labels, the types of distribution shifts, causes of ML System failure,
+and the metrics needed to monitor these things to determine when your model is
+no longer producing the desired results. She discusses tools that can help
+identify these shifts including logs, dashboards, and alerts, acknowledging the
+pluses and minuses of each approach. Finally, the emergence of the favoring of
+the term _observability_ over _monitoring_ is discussed because it is a stronger
+concept for determining what went wrong with the internal states of a system by
+observing the external outputs.
+
+![Drift Detection Algorithms](../uploads/images/2022-04-15/chip-huyen.png '=800')
+_Drift detection algorithms by open-source package alibi-detect
+([Source link](https://huyenchip.com/2022/02/07/data-distribution-shifts-and-monitoring.html#monitoring-toolbox))_
+
+Related to this, you can find a tutorial on how to detect drift and how to
+correct your model with [Evidently AI](https://evidentlyai.com/) and DVC, see
+[**Milecia McGregor's**](https://twitter.com/FlippedCoding) latest post on
+[Preventing Stale Models in Production!](https://dvc.org/blog/stale-models)
+
+![Preventing Stale Models in Production](../uploads/images/2022-04-15/stale-model-cover.png '=800')
+_Preventing Stale Models in Production
+([Source link](https://dvc.org/blog/stale-models))_
+
+### MLOps is the Solution for Machine Learning and AI Projects
+
+The team at [**xpresso.ai**](https://xpresso.ai) created
+[this short post](https://xpresso.ai/resources/blogs/mlops-is-the-solution-for-machine-learning-and-ai-projects/?utm_source=rss&utm_medium=rss&utm_campaign=mlops-is-the-solution-for-machine-learning-and-ai-projects)
+about all the facets that make up MLOps. While the tried and true
+[CRISP-DM](https://en.wikipedia.org/wiki/Cross-industry_standard_process_for_data_mining)
+model for Data Science takes us right up to production, MLOps encompasses
+considerably more processes that keep and maintain a model in production over
+time. You can see all of these things highlighted in their image below,
+providing lots to ponder!
+
+![Machine Learning Operations](../uploads/images/2022-04-15/Machine-Learning-Operations.jpeg '=800')
+_Machine Learning Operations
+([Source link](https://xpresso.ai/resources/blogs/mlops-is-the-solution-for-machine-learning-and-ai-projects/?utm_source=rss&utm_medium=rss&utm_campaign=mlops-is-the-solution-for-machine-learning-and-ai-projects))_
+
+## Community News
+
+### Kaushik Shakkari: The three environments for AI Professionals — Research, Development, and Production
+
+![The three environments for AI Professionals - Research, Development, and Production](../uploads/images/2022-04-15/kaushik-shakkari.png 'The three environments for AI Professional - Research, Development, and Production :wrap-left ==300')
+If your head is spinning with all the ample facets of the MLOps world as
+outlined in xpresso.ai's diagram above and where you fit, or in the AI world in
+general, [**Kaushik Shakkari**](https://www.linkedin.com/in/kaushik-shakkari/)
+wrote
+[this article](https://kaushikshakkari.medium.com/the-three-environments-for-ai-professionals-research-development-and-production-cffb86dfe533)
+dividing up the AI space into three environments: Research, Development, and
+Production. He goes into detail about the type of work, skillsets, and roles
+found in each. This breakdown can help the reader zero in on where he or she may
+best fit and be fulfilled in this vast and often confusing space as well as
+determine a pathway for their career.
+
+### Yashaswi Nayak: Continuous Machine Learning - An Introduction to CML (Iterative.ai)
+
+![Continuous Machine Learning - An Introduction to CML](../uploads/images/2022-04-15/yashaswi-nayak.png 'Continuous Machine Learning - An Introduction to CML :wrap-right ==300')
+[**Yahaswi Nayak**](https://twitter.com/YashaswiNayak) writes
+[a wonderful guide](https://towardsdatascience.com/continuous-machine-learning-e1ffb847b8da)
+for data scientists and engineers, filled with great story-telling and fun
+images created by the author about using [CML](https://cml.dev) to provide CI/CD
+to ML projects. He discusses the usual software development cycle using Git and
+then follows with the complexities introduced by ML projects. He identifies the
+reasons why CML is needed in the ML space, and how CML works.
+
+Yahaswi gives the scenario of a team working on a classifier problem and how CML
+would work for different team members tackling different parts of the problem.
+He details all the questions a CML.yml file answers and takes care of in the
+workflow. Finally, he lists a number of use cases for readers to try out with
+CML. We'd love to see some Community members write about some of these use cases
+that they've put into action!
+
+![Continuous Machine Learning](../uploads/images/2022-04-15/cml-workflow.jpeg '=800')
+_CML workflow
+([Source link](https://towardsdatascience.com/continuous-machine-learning-e1ffb847b8da))_
+
+### Zoumana Keita: MLops — Data And Model Versioning With DVC and Azure Blob Storage
+
+If you've ever struggled with setting up your Azure Blob Storage with DVC, or
+you know you will need to in the near future, you're in luck!
+[**Zoumana Keita**](https://twitter.com/zoumana_keita_) shows you how to do just
+that
+[in this post](https://towardsdatascience.com/large-data-versioning-with-dvc-and-azure-blob-storage-a-complete-guide-b97344827c81)
+in [Towards Data Science.](https://towardsdatascience.com) He recently was
+struggling with the same problem and team member,
+[David de la Iglesia Castro](https://twitter.com/daviddelachurch) came to the
+rescue on our [Discord Server.](https://discord.com/invite/dvwXA2N) Zoumana was
+kind enough to write a blog article on the detailed steps for the benefit of the
+Community.
+
+At this point in this Heartbeat, you probably grasp the importance of data,
+model, and experiment versioning and how DVC easily versions large files in
+conjunction with Git, which Zoumana describes. But he then takes you on a
+detailed journey with screenshots of all the steps to get DVC set up with Azure
+Blob Storage. Many thanks for this tutorial! 🙏🏼
+
+<external-link
+href="https://towardsdatascience.com/large-data-versioning-with-dvc-and-azure-blob-storage-a-complete-guide-b97344827c81"
+title="MLOps — Data And Model Versioning With DVC And Azure Blob Storage"
+description="Zoumana Keita's detailed tutorial on how to set up Azure Blob Storage with DVC"
+link="https://towardsdatascience.com"
+image="../uploads/images/2022-04-15/zoumana-keita.png"/>
+
+### Ahmed Abdullah: Perfect Way of Versioning Models & Training Data
+
+[**Ahmed Abdullah**](https://www.linkedin.com/in/ahmed-abdullah-7b1806180/)
+[wrote this tutorial](https://medium.com/red-buffer/perfect-way-of-versioning-models-training-data-318819a1510d)
+in Medium about how to get DVC set up to version your data and models with a
+Google Drive. He takes you in detail through the steps and discusses many of the
+reasons why this versioning is important to your success as an ML engineer
+including ever-changing data, effective collaboration with teammates, and the
+need for keeping data separated from code for security reasons.
+
+<external-link
+href="https://medium.com/red-buffer/perfect-way-of-versioning-models-training-data-318819a1510d"
+title="Perfect Way of Versioning Models & Training Data"
+description="Ahmed Abdullah's detailed tutorial on using DVC for versioning data, models with a Google Drive"
+link="https://medium.com"
+image="../uploads/images/2022-04-15/ahmed-abdullah.png"/>
+
+## Conference News
+
+In-person conferences are going on and we are excited to be able to see the
+Community in person again!
+
+- [**Gift Ojeabulu**](https://twitter.com/GiftOjeabulu_) presented at
+  [Open Source Festival 2022](https://festival.oscafrica.org/) in Lagos, Nigeria
+  with the talk: _MLOps Exploration with Git & DVC for Machine Learning Project
+  on DAGsHub_
+  [[slides](https://speakerdeck.com/giftojabu1/mlops-exploration-with-git-and-dvc-for-machine-learning-project-on-dagshub?slide=2)]
+- [**Antoine Toubhans**](https://twitter.com/AntoineToubhans) presented
+  _Flexible ML Experiment Tracking System for Python Coders with DVC and
+  Streamlit_ at PyCon Berlin
+  [[repo, slides](https://github.com/sicara/pycon-2022-dvc-streamlit)]
+- [**David de la Castro Iglesia**](https://twitter.com/daviddelachurch)
+  presented _Making MLOps Uncool Again_ at PyCon Berlin
+  [[repo](https://github.com/iterative/workshop-uncool-mlops)]
+- Next week at [ODSC East](https://odsc.com/boston/), come see
+  [**Dmitry Petrov**](https://twitter.com/FullStackML) presenting _Model
+  Registry with OpenSource Tools: Git, GitHub, and CI/CD_;
+  [**Milecia McGregor**](https://twitter.com/FlippedCoding) with _Preventing
+  Stale Models in Production_; and
+  [**Alex Kim**](https://twitter.com/alex000kim) _Reproducibility, ML Pipelines,
+  and CI/CD in Computer Vision Projects_
+  [more info](https://odsc.com/boston/schedule/)
+- Visit us at [MLOps World](https://mlopsworld.com/) June 9-10!
+
+## Company News
+
+### Online Course Updates
+
+![Surprised Owl GIF](https://media.giphy.com/media/EdRgVzb2X3iJW/giphy.gif)
+
+We've grown from 250 students last month to 450 right now!🎉 We are so happy to
+see you all in the [platform](https://learn.iterative.ai) learning! What's
+coming:
+
+- We have heard from some of you that you would like captions. We are working on
+  it!
+- Course guide - you will start to see each video have a course guide that will
+  have corresponding resources, explanations, and diagrams for those lessons and
+  be able to take your own notes.
+
+Thank you to all who have provided feedback after each course module! We are
+going through this feedback, making adjustments, and keeping them in mind for
+the next course!
+
+### 5 New Hires!🎉
+
+[**Dan Martinec**](https://www.linkedin.com/in/dan-martinec-30739a54/) joins us
+from the Czech Republic as a field data scientist. Dan first learned about
+Iterative through using DVC in his work as an ML Engineer. Dan originally
+studied Control Engineering at CTU in Prague. He graduated with a PhD and has
+worked in various fields (C++ development at Porsche, mathematical optimization
+in a small start-up, ML engineer at Avast). When not working Dan enjoys hobby
+projects in the garden such as building my own storage lodge for firewood,
+building a wooden composter, implementing a wireless water level reader in the
+water tank, etc. And after that hard work, he is known to appreciate a good
+movie. Welcome, Dan!
+
+[**Yury Kasimov**](https://www.linkedin.com/in/yury-kasimov-103962b8/) also
+joins us from Prague, the Czech Republic as Field Data Scientist. He studied
+Robotics during his Bachelor's studies and then Artificial Intelligence for his
+Master degree. Yury worked for some as a part of a university group that helps
+protect NGOs from different cyber attacks. Prior to joining the team, he spent
+the last 4 years as an ML engineer at Avast. In his free time, Yury plays a lot
+of tennis and is learning to play the drums. He speaks English, Czech, Russian,
+and a bit of Spanish. Bienvenidos, Yury!
+
+[**Chaz Black**](https://www.linkedin.com/in/chazblack1/) joins us as an Account
+Executive from Atlanta, Georgia. Most recently he worked at H2O.ai leading their
+business development team for 3 years. When Chaz is not helping clients, you may
+find him checking out the ever-growing Atlanta food scene and hunting new and
+exciting coffees and brewing styles. He is also a big audiophile and like many
+on our team, Chaz enjoys board and video games when he has the time, with his
+two cats looking over his shoulder. Welcome, Chaz!
+
+Many in our Community already know our latest hire,
+[**Daniel Barnes**](https://github.com/dacbd), as he has already been a great
+contributor to our tools! We are excited to welcome him officially to the team
+as a Software Engineer. Daniel is based in the Seattle, Washington area, having
+recently moved back after two years in Korea. He has had a varied career path,
+starting in IT security, programming, as a medic, then cyber in the US military,
+and then to PACCAR where he discovered our open-source community! When not
+solving complex software engineering challenges, Daniel has been noted as a bit
+of an adrenaline junky with "hobbies" like skydiving, paragliding, and
+motorcycles. Welcome, Daniel!
+
+[**Maxim Aginsky**](https://www.linkedin.com/in/maximaginsky/) joins the team as
+a Senior Product Designer from Montreal, Canada, marking our 4th employee from
+the Province of Quebec! Maxim has worn many hats over the years working on
+Product Development and most recently was the Director of Design for a Montreal
+Fintech company. You can [explore his portfolio here.](https://arrowww.space/)
+Welcome, Maxim!
+
+## Open Positions
+
+Even with our amazing new additions to the team, we're still hiring!
+[Use this link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22)
+to find details of all the positions and share with anyone you think may be
+interested! 🚀
+
+![Iterative.ai is Hiring](../uploads/images/2022-01-18/hiring.jpeg '=800')
+_Iterative is Hiring
+([Source link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22))_
+
+## Tweet Love ❤️
+
+We've been following along on [**Anna's**](https://twitter.com/__anavc__)
+journey through #100daysofcode to learn DVC. And now she's working on a project
+of her own using Amazon Best Seller data.
+
+---
+
+_Have something great to say about our tools? We'd love to hear it! Head to
+[this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+to record or write a Testimonial! Join our
+[Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2022-04-26-CML-runners-saving-models-1.md b/content/blogs/2022-04-26-CML-runners-saving-models-1.md
new file mode 100644
index 0000000000..5e70ee27f8
--- /dev/null
+++ b/content/blogs/2022-04-26-CML-runners-saving-models-1.md
@@ -0,0 +1,321 @@
+---
+title:
+  Training and saving models with CML on a self-hosted AWS EC2 runner (part 1)
+date: 2022-04-26
+description:
+  In this guide we will show how you can use CML to automatically retrain a
+  model and save its outputs to your Github repository using a provisioned AWS
+  EC2 runner.
+descriptionLong: |
+  We can use CML to automatically retrain models whenever data, model code,
+  or parameters change. In this guide we show how to create a pipeline that
+  provisions an AWS EC2 instance to retrain a model and save the output on
+  a regular basis. This way we can prevent drift by ensuring that our model
+  always uses the latest input data.
+picture: 2022-04-26/saving-models-cover.jpeg
+# pictureComment: Some _Comment_ (supports _basic_ [Markdown](link))
+author: rob_dewit
+commentsUrl: https://discuss.dvc.org/t/training-and-saving-models-with-cml-on-a-self-hosted-aws-ec2-runner/1155
+tags:
+  - CML
+  - Git
+  - Pipelines
+  - Self-hosted runners
+  - CI/CD
+  - Cloud training
+  - AWS
+  - Tutorial
+---
+
+When you first develop a machine learning model, you will probably do so on your
+local machine. You can easily change algorithms, parameters, and input data
+right in your text editor, notebook, or terminal. Imagine you have a
+long-running model for which you want to detect possible
+[drift](https://en.wikipedia.org/wiki/Concept_drift), however. In that case it
+would be beneficial to automatically retrain your model on a regular basis.
+
+In this guide, we will show how you can use
+[CML (Continuous Machine Learning)](https://cml.dev/) to do just that. CML is an
+open-source library for implementing continuous integration and delivery (CI/CD)
+in machine learning projects. This way we can define a pipeline to train a model
+and keep track of various versions. Although we could do so directly in our
+CI/CD pipeline (e.g. GitHub Actions Workflows), the runners used for this
+generally don’t have a lot of processing power. Therefore it makes more sense to
+provision a dedicated runner that is tailored to our computing needs.
+
+At the end of this guide we will have set up a CML workflow that does the
+following on a daily basis:
+
+1. Provision an Amazon Web Services (AWS) EC2 instance
+1. Train the model
+1. Save the model and its metrics to a GitHub repository
+1. Create a pull request with the new outputs
+1. Terminate the AWS EC2 instance
+
+In a follow-up post we will expand upon this by using [DVC](https://dvc.org/) to
+designate a remote storage for our resulting models. But let's focus on CML
+first!
+
+All files needed for this guide can be found in
+[this repository](https://github.com/iterative/example_model_export_cml).
+
+<admon type="info">
+
+This guide can be followed on its own, but also as an extension to this
+[example in the docs](https://cml.dev/doc/self-hosted-runners).
+
+</admon>
+
+<admon type="tip">
+
+We wil be using GitHub for our CI/CD and AWS for our computing resources. With
+slight modifications, however, you can use
+[GitLab CI/CD](https://cml.dev/doc/self-hosted-runners?tab=GitLab#allocating-cloud-compute-resources-with-cml),
+[Google Cloud](https://cml.dev/doc/self-hosted-runners?tab=GCP#cloud-compute-resource-credentials)
+or
+[Microsoft Azure](https://cml.dev/doc/self-hosted-runners?tab=Azure#cloud-compute-resource-credentials).
+
+</admon>
+
+# Prerequisites
+
+Before we begin, make sure you have the following things set up:
+
+1. You have
+   [created an AWS account](https://aws.amazon.com/premiumsupport/knowledge-center/create-and-activate-aws-account/)
+   (free tier suffices)
+2. You have
+   [created a `PERSONAL_ACCESS_TOKEN` on GitHub](https://cml.dev/doc/self-hosted-runners?tab=GitHub#personal-access-token)
+   with the `repo` scope
+3. You have
+   [created an `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` on AWS](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html#cli-configure-quickstart-creds)
+4. You have
+   [added the `PERSONAL_ACCES_TOKEN`, `AWS_ACCESS_KEY_ID`, and `AWS_SECRET_ACCESS_KEY` as GitHub secrets](https://docs.github.com/en/actions/security-guides/encrypted-secrets)
+
+It also helps to clone
+[the template repository for this tutorial](https://github.com/iterative/example_model_export_cml).
+
+# Training a model and saving it
+
+To kick off, we will adapt `train.py` from the
+[CML getting started guide](https://cml.dev/doc/start/github). Here we create a
+simple `RandomForestClassifier()` based on some generated data. We then use the
+model to make some predictions and plot those predictions in a confusion matrix.
+
+While running the script the model is kept in memory, meaning it is discarded as
+soon as the script finishes. In order to save the model for later, we need to
+dump it as a binary file. We do so with
+[`joblib.dump()`](https://joblib.readthedocs.io/en/latest/generated/joblib.dump.html).
+Later we can read the model using
+[`joblib.load()`](https://joblib.readthedocs.io/en/latest/generated/joblib.load.html)
+when we need to.
+
+<admon type="tip">
+
+You can also use `pickle.dump()` if you prefer.
+
+</admon>
+
+The outputs of `train.py` are:
+
+- `metrics.txt`: a file containing metrics on model performance (in this case
+  accuracy)
+- `confusion_matrix.png`: a plot showing the classification results of our model
+- `random_forest.joblib`: the binary output of the trained model
+
+All of these files are saved to the `model` directory.
+
+```python
+import json
+import os
+
+import joblib
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import plot_confusion_matrix
+
+# Read in data
+X_train = np.genfromtxt("data/train_features.csv")
+y_train = np.genfromtxt("data/train_labels.csv")
+X_test = np.genfromtxt("data/test_features.csv")
+y_test = np.genfromtxt("data/test_labels.csv")
+
+# Fit a model
+depth = 5
+clf = RandomForestClassifier(max_depth=depth)
+clf.fit(X_train, y_train)
+
+# Calculate accuracy
+acc = clf.score(X_test, y_test)
+print(acc)
+
+# Create model folder if it does not yet exist
+if not os.path.exists("model"):
+    os.makedirs("model")
+
+# Write metrics to file
+with open("model/metrics.txt", "w+") as outfile:
+    outfile.write("Accuracy: " + str(acc) + "\n")
+
+# Plot confusion matrix
+disp = plot_confusion_matrix(clf, X_test, y_test, normalize="true", cmap=plt.cm.Blues)
+plt.savefig("model/confusion_matrix.png")
+
+# Save the model
+joblib.dump(clf, "model/random_forest.joblib")
+```
+
+# Train the model on a daily basis
+
+Now that we have a script to train our model and save it as a file, let’s set up
+our CI/CD to provision a runner and run the script. We define our workflow in
+`cml.yaml` and save it in the `.github/workflows` directory. This way GitHub
+will automatically run the workflow whenever it is triggered. In this case the
+triggers are on (manual) request as well as daily (automatic) schedule.
+
+<admon type="info">
+
+The name of the workflow doesn’t matter, as long as it’s a `.yaml` and located
+in the `.github/workflows` directory. You can have multiple workflows in there
+as well. You can learn more in the
+[documentation](https://docs.github.com/en/actions/learn-github-actions/workflow-syntax-for-github-actions)
+here.
+
+</admon>
+
+```yaml
+name: CML
+on: # Here we use two triggers: manually and daily at 08:00
+  workflow_dispatch:
+  schedule:
+    - cron: '0 8 * * *'
+jobs:
+  deploy-runner:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: iterative/setup-cml@v1
+      - name: Deploy runner on EC2
+        env:
+          REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+        run: |
+          cml runner \
+              --cloud=aws \
+              --cloud-region=eu-west \
+              --cloud-type=t2.micro \
+              --labels=cml-runner \
+              --single
+  train-model:
+    needs: deploy-runner
+    runs-on: [self-hosted, cml-runner]
+    timeout-minutes: 120 # 2h
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.x'
+      - uses: actions/setup-node@v3
+        with:
+          node-version: '16'
+      - uses: iterative/setup-cml@v1
+      - name: Train model
+        env:
+          REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
+        run: |
+          cml ci
+          pip install -r requirements.txt
+          python get_data.py
+          python train.py
+```
+
+<admon type="warn">
+
+In this example we are using a `t2.micro`
+[AWS EC2 instance](https://aws.amazon.com/ec2/instance-types/). At the time of
+writing this is included in the AWS free tier. Make sure that you qualify for
+this free usage to prevent unexpected spending. When you specify a bulkier
+<code>cloud-type</code>, your expenses will rise.
+
+</admon>
+
+The workflow we defined first
+[provisions a runner](https://cml.dev/doc/ref/runner) on AWS, and then uses that
+runner to train the model. After completing the training job, CML automatically
+terminates the runner to prevent you from incurring further costs. Once the
+runner is terminated, however, the model is lost along with it. Let's see how we
+can save our model in the next step!
+
+# Export the model to our Git repository
+
+CML allows us to export the model from our runner to our Git repository. Let's
+extend the training stage of our workflow by pushing `random_forest.joblib` to a
+new experiment branch and creating a pull request.
+
+[`cml pr`](https://cml.dev/doc/ref/pr) is the command that specifies which files
+should be included in the pull request. The commands after that are used to
+generate a report in the pull request that displays the confusion matrix and
+calculated metrics.
+
+```yaml
+train-model:
+  needs: deploy-runner
+  runs-on: [self-hosted, cml-runner]
+  timeout-minutes: 120 # 2h
+  steps:
+    - uses: actions/checkout@v2
+    - uses: actions/setup-python@v2
+      with:
+        python-version: '3.x'
+    - uses: actions/setup-node@v3
+      with:
+        node-version: '16'
+    - uses: iterative/setup-cml@v1
+    - name: Train model
+      env:
+        REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
+      run: |
+        cml ci
+        pip install -r requirements.txt
+        python get_data.py
+        python train.py
+
+        # Create pull request
+        cml pr model/random_forest.joblib
+
+        # Create CML report
+        cat model/metrics.txt > report.md
+        cml publish model/confusion_matrix.png --md >> report.md
+        cml send-comment --pr --update report.md
+```
+
+Et voilà! We are now running a daily model training on an AWS EC2 instance and
+saving the resulting model to our GitHub repository.
+
+There is still some room for improvement, though. This approach works well when
+our resulting model is small (less than 100MB), but we wouldn't want to store
+large models in our Git repository. In a follow-up post we will describe how we
+can use [DVC](https://dvc.org/), another Iterative open-source tool, for storage
+when we're dealing with larger files.
+
+# Conclusions
+
+There are many cases in which it's a good idea to retrain models periodically.
+For example, you could be using the latest data available to you in order to
+prevent model drift. CML allows you to automate this process.
+
+In this guide, we explored how to set up CML for a daily training job using a
+self-hosted runner. We automatically provisioned this runner on AWS, exported
+the resulting files to our Git repository, and terminated the runner to prevent
+racking up our AWS bill.
+
+In a follow-up post we will explore how to use DVC when the resulting model is
+too large to store directly in our Git repository.
+
+Another great extension of our CI/CD would be a `deploy` step to bring the
+latest version of our model into production. This step might be conditional on
+the performance of the model; we could decide to only start using it in
+production if it performs better than previous iterations. All of this warrants
+a guide of its own, however, so look out for that in the future! 😉
diff --git a/content/blogs/2022-04-27-terraform-provider.md b/content/blogs/2022-04-27-terraform-provider.md
new file mode 100644
index 0000000000..cc6b53ec6b
--- /dev/null
+++ b/content/blogs/2022-04-27-terraform-provider.md
@@ -0,0 +1,127 @@
+---
+title: Machine Learning Workloads with Terraform Provider Iterative
+date: 2022-04-27
+description: >
+  Today we introduce painless resource orchestration for your machine learning
+  projects in conjunction with HashiCorp Terraform.
+descriptionLong: >
+  Catapult your machine learning projects into any cloud and reduce cost with
+  spot instance auto-recovery, while aligning your Data Science and DevOps teams
+  with a simple config file.
+picture: 2022-04-27/terraform-provider-iterative.png
+pictureComment: Machine Learning Workloads with Terraform Provider Iterative
+author: maria_khalusova
+commentsUrl: https://discuss.dvc.org/t/terraform-iterative-provider-release-blog-post/1171
+tags:
+  - MLOps
+  - Terraform
+  - Git
+  - Cloud orchestration
+  - Spot instance management
+  - TPI
+  - Terraform Provider Iterative
+  - Release
+---
+
+The requirements for Machine Learning (ML) infrastructure are becoming
+increasingly complex. Training large models often requires specialized hardware
+(GPUs, TPUs) which involves moving the whole training process onto cloud
+machines, setting up environments and synchronizing data. For teams that want to
+leverage spot instances, the setup becomes even more complex -- they need to
+make sure the training progress is not lost during spot instance recovery. This
+is time-consuming, and requires expertise in both DevOps and Machine Learning.
+Additionally, training in a cloud environment can incur high costs due to the
+need for expensive hardware, as well as users forgetting to shutdown instances
+when training is complete.
+
+To address the specific needs of machine learning teams, we have built
+[Terraform Provider Iterative (TPI)](https://github.com/iterative/terraform-provider-iterative).
+TPI is an open-source tool extending the functionality of Terraform, the world's
+most widely used multi-cloud provisioning product. The Iterative Provider
+enables full lifecycle management of computing resources and is designed
+specifically for machine learning pipelines.
+
+## Tailored to Machine Learning Workflows
+
+The Iterative Provider offers a single resource called `iterative_task` which
+you can use to configure:
+
+- Your cloud infrastructure
+- The steps to perform on the cloud resource, i.e. setting up the environment,
+  running the training pipeline, logging metrics, etc.
+- The data to be synced back once the training is complete (e.g. a file with
+  metrics, a model, plots)
+
+Here’s a “hello world” example of a `main.tf` Terraform configuration file using
+the `iterative_task` resource:
+
+```hcl
+terraform {
+  required_providers { iterative = { source = "iterative/iterative" } }
+}
+provider "iterative" {}
+
+resource "iterative_task" "example" {
+  cloud       = "aws"     # or any of: gcp, az, k8s
+  machine     = "m"       # medium. Or any of: l, xl, m+k80, xl+v100, ...
+  image       = "ubuntu"  # or "nvidia", ...
+  region      = "us-west" # or us-west, eu-east, ...
+  disk_size   = 30        # GB
+  spot        = 0         # auto-price. Default -1 to disable or >0 for hourly USD limit
+  timeout     = 24*60*60  # max 24h before forced termination
+
+  storage {
+    workdir = "."
+    output  = "results"
+  }
+  script = <<-END
+    #!/bin/bash
+    sudo apt update
+    sudo apt install -y python3-pip
+    pip3 install --user -r requirements.txt
+    python3 train.py
+  END
+}
+```
+
+Once the training is complete, the Iterative Provider terminates the resource,
+so users don't have to worry about spiraling costs from unused machines.
+
+## Configure Once, Bring Everywhere
+
+Once you configure infrastructure and a script that executes your training
+pipeline in a Terraform configuration file, you can bring that pipeline anywhere
+you want. You can use such a config for ad-hoc training at any stage of your
+prototyping process or use it as a job in your preferred CI/CD tool. You can
+also store your infrastructure configuration files in a version control system
+together with the rest of your project for easier control.
+
+## One Provider to Rule Them All
+
+Whether you prefer Amazon Web Services (AWS), Microsoft Azure, Google Cloud
+Platform (GCP), or Kubernetes (K8s), the Iterative Provider has you covered. You
+can configure compute resources from these with a unified API, using
+[common machine types](https://registry.terraform.io/providers/iterative/iterative/latest/docs/resources/task#machine-type)
+that are the same across all cloud vendors. This significantly simplifies
+infrastructure configuration and makes it easy to migrate from one cloud to
+another by changing just one line of code.
+
+## Costs Optimization
+
+The Iterative Provider helps with cloud compute cost optimization in two major
+ways. First, upon completion of your script, the instance is automatically
+terminated. This helps to avoid accumulating costs due to abandoned resources.
+Second, you can leverage the cost-saving power of spot instances to train your
+models without losing any progress! TPI recovers the working directory and
+respawns interrupted/preempted instances for you.
+
+## DevOps-Friendly
+
+Last, but not least, the Iterative Provider aims to bridge the gap between
+DevOps and Data Science teams. We build on top of Terraform, a tool universally
+familiar to DevOps teams, but extend it to suit ML needs.
+
+If you’d like to try the Iterative Provider in your project, check out the
+documentation on the provider’s page in the Terraform registry, and if you have
+any questions or suggestions, we welcome them in our
+[GitHub repository.](https://github.com/iterative/terraform-provider-iterative)
diff --git a/content/blogs/2022-04-28-april-22-community-gems.md b/content/blogs/2022-04-28-april-22-community-gems.md
new file mode 100644
index 0000000000..a4afab2561
--- /dev/null
+++ b/content/blogs/2022-04-28-april-22-community-gems.md
@@ -0,0 +1,128 @@
+---
+title: April '22 Community Gems
+date: 2022-04-28
+description: >
+  A roundup of technical Q&As from the DVC and CML community. This month: CML
+  updates, working with multiple datasets, using DVC stages, and more.
+descriptionLong: >
+  A roundup of technical Q&As from the DVC and CML community. This month: CML
+  updates, working with multiple datasets, using DVC stages, and more.
+picture: 2022-04-28/apr-comm-gems.jpeg
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/april-22-community-gems/1175
+tags:
+  - CML
+  - DVC Stages
+  - DVC Remotes
+  - Community Gems
+---
+
+### [When I run `dvc repro` on a stage, does it automatically push any outputs to my remote?](https://discord.com/channels/485586884165107732/563406153334128681/953616587523498025)
+
+Great question from @tina_rey!
+
+The `dvc repro` command doesn't automatically push any outputs or data to your
+remote. The outputs are stored in the cache until you run `dvc push`, which then
+pushes them from your cache to your remote.
+
+### [Is `dvc dag` based on `deps` and `outs`, so that a stage that depends on the output of another stage will always be executed after the former has finished?](https://discord.com/channels/485586884165107732/563406153334128681/956113493155799070)
+
+This is a good question from @johnysku!
+
+That is correct! If the pipelines are independent or the stages are independent,
+they may run in any order. Without explicit dependency linkage, stages could be
+executed in an unexpected order.
+
+### [If I want to use the `foreach` utility in `dvc repro`, is there a way I can use glob patterns to create the list DVC needs to iterate over?](https://discord.com/channels/485586884165107732/563406153334128681/956241424150577233)
+
+Another interesting question from @copah!
+
+If you have `mystage` which uses `foreach`, you can do `dvc repro` to `mystage`
+to iterate over every `mystage` stage.
+
+### [How does DVC handle files that have been deleted from remote storage?](https://discord.com/channels/485586884165107732/563406153334128681/956254582676258866)
+
+Really good question from @Meme Philosopher!
+
+DVC will fail when you try to pull files that have been deleted from the remote
+and notify you that those files are missing in remote storage.
+
+### [Can I separate CML running from GitHub actions VM to work with GCP or AWS so training and testing are in these cloud environments?](https://discord.com/channels/485586884165107732/728693131557732403/954316332457947169)
+
+Thanks for the question @Atsu!
+
+This is supported out-of-the-box! Here's how it works:
+
+1. Within Github Actions, CML launches a
+   [self-hosted runner](https://cml.dev/doc/self-hosted-runners) on GCP or AWS
+   using `cml runner --labels=cml --cloud=gcp`/`--cloud=aws`
+2. GitHub Actions runs the rest of the workflow on the self-hosted runner using
+   `runs-on: [self-hosted, cml]` and the maximum allowable
+   `timeout-minutes: 4320`
+3. If GitHub Actions is about to timeout, CML will restart the workflow, so make
+   sure your code regularly caches and restores data if it's expected to take >3
+   days to run.
+
+You can follow along with
+[this doc](https://cml.dev/doc/self-hosted-runners?tab=GitHub#allocating-cloud-compute-resources-with-cml)
+to get started.
+
+The key is requesting GitHub's
+[maximum `timeout-minutes: 4320`](https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners#usage-limits).
+This signals to CML to
+[restart the workflow](https://cml.dev/doc/ref/runner#faqs-and-known-issues)
+just before the timeout. You'll also have to write your code to cache results so
+that the restarted workflow will use previous results (e.g. use
+https://dvc.org/doc/user-guide/experiment-management/checkpoints#caching-checkpoints
+and https://github.com/iterative/dvc/issues/6823)
+
+### [When running an experiment from the web interface with DVC, is there any way to get the new metrics to show on the commit created by Iterative Studio for the experiment?](https://discord.com/channels/485586884165107732/841856466897469441/957931058639306772)
+
+Awesome question about Studio from @Benjamin-Etheredge!
+
+In order to show the experiment results in Studio, you would have to commit and
+push the results as part of your CI (continuous integration) action. Here's an
+[example GitHub action script](https://github.com/iterative/demo-fashion-mnist/blob/main/.github/workflows/cml.yaml)
+that does this.
+
+We do understand that it is not ideal that there are 2 commits, one with your
+changes and one with the results. We have been thinking about how this can be
+improved and it would be great to hear if you have
+[any thoughts/ideas](https://github.com/iterative/studio-support/)!
+
+### [Is there a way to get DVC to import from a private repository?](https://discord.com/channels/485586884165107732/485596304961962003/964204106824695868)
+
+Good question from @qubvel!
+
+You can use SSH to handle this and run the following command:
+
+```dvc
+$ dvc import git@gitlab.com:<reposiotry location> <data_path>
+```
+
+### [If I use a local remote and a shared cache, will the data be symlinked from the remote to the cache?](https://discord.com/channels/485586884165107732/485596304961962003/963768504987815987)
+
+Very interesting question from @cajoek!
+
+The data will _not_ be symlinked from the remote to the cache.
+
+Sometimes we can treat cache as something temporary so a lot of data that will
+never be used can get there from failed experiments, etc. In this case having a
+local remote to keep track of important data for important versions of your
+project would be good.
+
+That way, later when your cache is too big and the project takes up too much
+space, you can remove `.dvc/cache` and download latest important version from
+remote.
+
+---
+
+![iAM_Learning GIF](https://media.giphy.com/media/f8QPB1rgHbwhcD2Jd6/giphy.gif)
+
+At our May Office Hours Meetup we will have Matt Squire of Fuzzy Labs join us
+sharing his view on open source MLOps tools!
+[RSVP for the Meetup here](https://www.meetup.com/Machine-Learning-Engineer-Community-Virtual-Meetups/events/285550813)
+to stay up to date with specifics as we get closer to the event!
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to get all your DVC and
+CML questions answered!
diff --git a/content/blogs/2022-05-03-end-to-end-computer-vision-api-part-1-data-versioning-and-ml-pipelines.md b/content/blogs/2022-05-03-end-to-end-computer-vision-api-part-1-data-versioning-and-ml-pipelines.md
new file mode 100644
index 0000000000..1dbf06e086
--- /dev/null
+++ b/content/blogs/2022-05-03-end-to-end-computer-vision-api-part-1-data-versioning-and-ml-pipelines.md
@@ -0,0 +1,313 @@
+---
+title:
+  'End-to-End Computer Vision API, Part 1: Data Versioning and ML Pipelines'
+date: 2022-05-03
+description: >
+  In most cases, training a well-performing Computer Vision (CV) model is not
+  the hardest part of building a Computer Vision-based system.  The hardest
+  parts are usually about incorporating this model into a maintainable
+  application that runs in a production environment bringing value to the
+  customers and our business.
+
+descriptionLong: |
+
+  _This is the first part of a three-part series of posts:_
+  - _Part 1: Data Versioning and ML Pipelines (this post)_
+  - _Part 2: [Local 
+  Experiments](/blog/end-to-end-computer-vision-api-part-2-local-experiments)_
+  - _Part 3: [Remote Experiments & CI/CD For 
+  Machine Learning](/blog/end-to-end-computer-vision-api-part-3-remote-exp-ci-cd)_
+
+
+  Lately, training a well-performing Computer Vision (CV) model in Jupyter
+  Notebooks became fairly straightforward. You can use pre-trained Deep Learning
+  models and high-level libraries like `fastai`, `keras`, `pytorch-lightning`
+  etc., that abstract away much of the complexity. However, it's still hard to
+  incorporate these models into a maintainable production application in a way
+  that brings value to the customers and business. 
+
+  Below we'll present the tools that naturally integrate with your git
+  repository and makes this part of the process significantly easier.
+picture: 2022-05-03/e2e-cv-pt1-cover.png
+author: alex_kim
+commentsUrl: https://discuss.dvc.org/t/end-to-end-computer-vision/1178
+tags:
+  - Computer Vision
+  - DVC
+  - CML
+  - Studio
+  - CI/CD
+  - Experiment Tracking
+  - Tutorial
+---
+
+### Introduction
+
+In this series of posts, we'll describe an approach that streamlines the
+lifecycles stages of a typical Computer Vision project going from
+proof-of-concept to configuration and parameter tuning to, finally, deployment
+to the production environment.
+
+Automatic defect detection is a common problem encountered in many industries,
+especially manufacturing. A typical setup would include a conveyor belt that
+moves some products along the production line and a camera installed above the
+conveyor. The camera takes pictures of the products moving below and connects to
+a computer that controls it. This computer needs to send raw images to some
+defect detection service, receive information about the location and size of the
+defects, if any, and may even control what happens to a defective product by
+being connected to a robotic arm via a PLC (programmable logic controller).
+
+As our demo project, we've selected a very common deployment pattern for this
+setup: a CV model wrapped in a web API service. Specifically, we'll perform an
+[image segmentation](https://ai.stanford.edu/~syyeung/cvweb/tutorial3.html) task
+on a magnetic tiles dataset first introduced in this
+[paper](https://www.researchgate.net/profile/Congying-Qiu/publication/327701995_Saliency_defect_detection_of_magnetic_tiles/links/5b9fd1bd45851574f7d25019/Saliency-defect-detection-of-magnetic-tiles.pdf)
+and available in this GitHub
+[repository](https://github.com/abin24/Magnetic-tile-defect-datasets.).
+
+![A sample from the image segmentation dataset we used for this project. Top
+row: images of magnetic tile surfaces. Bottom row: segmentation mask (white
+pixels show defective areas)](../uploads/images/2022-05-03/dataset_sample.png '=800')
+
+- This post (part 1) introduces the concepts of data versioning and ML pipelines
+  as they apply to Computer Vision projects.
+- Part 2 will focus on experiment tracking and management - key components
+  needed for effective collaboration between team members.
+- In part 3, you’ll learn how to easily move your model training workloads from
+  a local machine to cloud infrastructure and set up proper CI/CD workflows for
+  ML projects.
+
+### Target Audience
+
+We assume the target audience of this post to be technical folks who are
+familiar with the general Computer Vision concepts, CI/CD processes, and Cloud
+infrastructure. Familiarity with the Iterative ecosystem of tools such as
+[DVC](https://dvc.org/), [CML](https://cml.dev/), and
+[Studio](https://studio.datachain.ai/) is not required but would help with
+understanding the nuances of our solution.
+
+### Summary of the Solution
+
+All the code for the project is stored in this GitHub
+[repository](https://github.com/iterative/magnetic-tiles-defect).
+
+The CV API solution that we are proposing can be summarized in the following
+steps:
+
+- Client service will submit the image to our API endpoint
+- The image will be preprocessed to adhere to the specifications that our model
+  expects
+- The CV model will ingest the processed image and output its prediction image
+  mask
+- Some postprocessing will be applied to the image mask
+- A reply back to the client with the output mask
+
+The repository also contains code for the web application itself, which can be
+found in the
+[`app`](https://github.com/iterative/magnetic-tiles-defect/tree/main/app)
+directory. While the web application is very simple, its implementation is
+beyond the scope of this blog post. In short, we can say that it's based on the
+[`FastAPI`](https://fastapi.tiangolo.com/) library, and we deploy it to the
+Heroku platform through a Docker container defined in this
+[`Dockerfile`](https://github.com/iterative/magnetic-tiles-defect/blob/main/Dockerfile).
+
+![Simplified diagram of the CV API
+solution](../uploads/images/2022-05-03/web_api_diagram.png '=800')
+
+### Prerequisites for Reproduction
+
+Feel free to fork the
+[repository](https://github.com/iterative/magnetic-tiles-defect) if you'd like
+to replicate our steps and deploy your own API service. Keep in mind that you'll
+need to set up and configure the following:
+
+- GitHub account and
+  [GitHub application token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token)
+- [`pipenv`](https://pipenv.pypa.io/en/latest/) installed locally
+- AWS account,
+  [access keys](https://aws.amazon.com/premiumsupport/knowledge-center/create-access-key/),
+  and an
+  [S3 bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/creating-bucket.html)
+- Heroku account and
+  [Heroku API key](https://help.heroku.com/PBGP6IDE/how-should-i-generate-an-api-key-that-allows-me-to-use-the-heroku-platform-api)
+
+For security reasons, you'll need to set up all keys and tokens through
+[GitHub secrets](https://docs.github.com/en/actions/security-guides/encrypted-secrets).
+You'll also need to change the remote location (and its name) in the
+[DVC config](https://github.com/iterative/magnetic-tiles-defect/blob/main/.dvc/config)
+file for versioning data and other artifacts.
+
+### Proof-of-Concept in Jupyter Notebooks
+
+A typical ML project would start with data collection and/or labeling, but we
+are skipping all this hard work because it was done for us by the researchers
+who published the dataset.
+
+We'll get right to the exciting part of training CV models in Jupyter notebooks
+which you can find
+[here](https://github.com/iterative/magnetic-tiles-defect/tree/main/notebooks).
+In short, there we have three notebooks:
+
+1. [`1_ProcessData.ipynb`](https://github.com/iterative/magnetic-tiles-defect/blob/main/notebooks/1_ProcessData.ipynb)
+   downloads, processes, and organizes the data for easy loading into the
+   training process later
+1. [`2_TrainSegmentationModel.ipynb`](https://github.com/iterative/magnetic-tiles-defect/blob/main/notebooks/2_TrainSegmentationModel.ipynb)
+   uses [`fastai`](https://github.com/fastai/fastai) Deep Learning framework to
+   train an image segmentation model
+1. [`3_Evaluate.ipynb`](https://github.com/iterative/magnetic-tiles-defect/blob/main/notebooks/3_Evaluate.ipynb)
+   computes model performance on the test dataset
+
+Jupyter Notebook is by far the most popular tool for quick exploratory work when
+it comes to data analysis and modeling. However, it's not without
+[its own limitations](https://www.youtube.com/watch?v=7jiPeIFXb6U). One of the
+biggest issues of Jupyter is that it has no guardrails to ensure
+reproducibility, e.g. hidden states of variables and objects as well as the
+possibility to run cells out of order. While there are several projects that
+attempt to alleviate some of these issues (notably,
+[`nodebook`](https://github.com/stitchfix/nodebook),
+[`papermill`](https://github.com/nteract/papermill),
+[`nbdime`](https://github.com/jupyter/nbdime),
+[`nbval`](https://github.com/computationalmodelling/nbval),
+[`nbstripout`](https://github.com/kynan/nbstripout), and
+[`nbQA`](https://github.com/nbQA-dev/nbQA)), they don’t solve them completely.
+
+That's where the concepts of data versioning and ML pipelines come in.
+
+### Data Versioning
+
+In most ML projects, training data changes gradually over time as new training
+instances (images in our case) get added while older ones might be removed.
+Simply creating snapshots of our training data at the time of training (e.g.
+labeling data directories with dates) quickly becomes unsustainable since these
+snapshots will contain many duplicates. Additionally, tracking which data
+directory was used to train each model becomes hard to manage very fast; and
+linking data versions and models to their respective code versions complicates
+things even further.
+
+A much better approach is to:
+
+1. track only the deltas between different versions of the datasets; and
+
+2. have the project’s git repository store only the reference links to the data
+   while the actual data is stored in a remote storage
+
+This is exactly what we can do with [DVC](https://dvc.org/) by running only a
+couple of DVC commands. In turn, DVC handles all the underlying complexity of
+managing data versions, performing file deduplication, pushing and pulling
+to/from different remote storage solutions and more.
+
+Check out
+[this tutorial](https://dvc.org/doc/use-cases/versioning-data-and-model-files/tutorial)
+to learn more about data and model versioning with DVC.
+
+![Diagram of how DVC performs data versioning
+](https://editor.analyticsvidhya.com../uploads/86351git-dvc.png)
+
+In this project, AWS S3 is our remote storage configured in the
+[`.dvc/config`](https://github.com/iterative/magnetic-tiles-defect/blob/main/.dvc/config)
+file. In other words, we store the images in an AWS bucket while only keeping
+references to those files in our git repository.
+
+### Refactoring Jupyter code into an ML pipeline
+
+Another powerful set of DVC features is ML pipelines. An ML pipeline is a way to
+codify and automate the workflow used to reproduce a machine learning model. A
+pipeline consists of a sequence of stages.
+
+First, we did some refactoring of our Jupyter code into individual and
+self-contained modules:
+
+- [`data_load.py`](https://github.com/iterative/magnetic-tiles-defect/blob/main/src/stages/data_load.py)
+  downloads raw data locally
+- [`data_split.py`](https://github.com/iterative/magnetic-tiles-defect/blob/main/src/stages/data_split.py)
+  splits data into train and test subsets
+- [`train.py`](https://github.com/iterative/magnetic-tiles-defect/blob/main/src/stages/train.py)
+  uses [`fastai`](https://github.com/fastai/fastai) library to train a UNet
+  model with a ResNet-34 encoder and saves it into a pickle file
+- [`eval.py`](https://github.com/iterative/magnetic-tiles-defect/blob/main/src/stages/eval.py)
+  evaluates the model's performance on the test subset
+
+Specific execution commands, dependencies, and outputs of each stage are defined
+in the pipeline file
+[`dvc.yaml`](https://github.com/iterative/magnetic-tiles-defect/blob/main/dvc.yaml)
+(more about pipelines files
+[here](https://dvc.org/doc/user-guide/project-structure/pipelines-files)).
+
+We've also added an optional
+[`check_packages`](https://github.com/iterative/magnetic-tiles-defect/blob/main/dvc.yaml#L2)
+stage that freezes the environment into a `requirements.txt` file containing all
+python packages and their versions installed in the environment. We enabled the
+[`always_changed`](https://dvc.org/doc/command-reference/run#--always-changed)
+field in the configuration of this stage to ensure DVC reruns this stage every
+time. All other stages have this text file as a dependency. Thus, the entire
+pipeline will be rerun if anything about our python environment changes.
+
+We can see the whole dependency graph (directed acyclic graph, to be exact)
+using the [`dvc dag`](https://dvc.org/doc/command-reference/dag) command:
+
+```dvc
+$ dvc dag
+                               +----------------+
+                               | check_packages |
+                          *****+----------------+
+                    *****         *    **    **
+               ****             **       **     ***
+            ***               **            **      ***
++-----------+               **               *         ***
+| data_load |             **                 *             *
++-----------+           **                   *             *
+           ***        **                     *             *
+              *     **                       *             *
+               **  *                         *             *
+          +------------+                     *             *
+          | data_split |***                  *             *
+          +------------+   ***               *             *
+                  *            ***           *             *
+                  *               ***        *             *
+                  *                   **     *             *
+                  **                    +-------+        ***
+                    ***                 | train |     ***
+                        ***             +-------+   ***
+                            ***        **        ***
+                               ***   **      ***
+                                   **    ***
+                                  +----------+
+                                  | evaluate |
+                                  +----------+
+```
+
+The entire pipeline can be easily reproduced with the `dvc exp run` command:
+
+```dvc
+$ dvc exp run
+Running stage 'check_packages':
+> python src/stages/check_pkgs.py --config=params.yaml
+...
+Running stage 'data_load':
+> python src/stages/data_load.py --config=params.yaml
+...
+Running stage 'data_split':
+> python src/stages/data_split.py --config=params.yaml
+...
+Running stage 'train':
+> python src/stages/train.py --config=params.yaml
+...
+Running stage 'evaluate':
+> python src/stages/eval.py --config=params.yaml
+...
+```
+
+## Summary
+
+In this first part of the blog post, we talked about the following:
+
+- Common difficulties when building Computer Vision Web API for defect detection
+- Pros and cons of exploratory work in Jupyter Notebooks
+- Versioning data in remote storage with DVC
+- Moving and refactoring the code from Jupyter Notebooks into DVC pipeline
+  stages
+
+In the second part, we’ll see how to get the most out of experiment tracking and
+management by seamlessly integrating [DVC](https://github.com/iterative/dvc),
+[DVCLive](https://github.com/iterative/dvclive), and
+[Iterative Studio](https://studio.datachain.ai/).
diff --git a/content/blogs/2022-05-05-end-to-end-computer-vision-api-part-2-local-experiments.md b/content/blogs/2022-05-05-end-to-end-computer-vision-api-part-2-local-experiments.md
new file mode 100644
index 0000000000..b192e3c142
--- /dev/null
+++ b/content/blogs/2022-05-05-end-to-end-computer-vision-api-part-2-local-experiments.md
@@ -0,0 +1,224 @@
+---
+title: 'End-to-End Computer Vision API, Part 2: Local Experiments'
+date: 2022-05-05
+description: >
+  In part 1, we talked about effective management and versioning of large
+  datasets and the creation of reproducible ML pipelines.
+
+  Here we'll learn about experiment management:  generation of many experiments
+  by tweaking configurations and hyperparameters;  comparison of experiments
+  based on their performance metrics; and persistence of the most promising ones
+
+descriptionLong: |
+
+  _This is the second part of a three-part series of posts:_
+  - _Part 1: [Data Versioning and ML
+  Pipelines](/blog/end-to-end-computer-vision-api-part-1-data-versioning-and-ml-pipelines)_
+  - _Part 2: Local Experiments (this post)_
+  - _Part 3: [Remote Experiments & CI/CD For
+  Machine Learning](/blog/end-to-end-computer-vision-api-part-3-remote-exp-ci-cd)_
+
+
+  In [part
+  1](https://dvc.org/blog/end-to-end-computer-vision-api-part-1-data-versioning-and-ml-pipelines)
+  of this series of posts, we introduced a solution to a common problem faced by
+  companies  in the manufacturing industry: detecting defects from images of
+  products moving along a production line.  The solution we proposed was a Deep
+  Learning-based image segmentation model wrapped in a web API.  We talked about
+  effective management and versioning of large datasets and the creation of
+  reproducible ML pipelines.
+
+  Here we'll learn about experiment management: generation of many experiments
+  by tweaking configurations and hyperparameters;  comparison of experiments
+  based on their performance metrics; and persistence of the most promising
+  ones.
+picture: 2022-05-05/e2e-cv-pt2-cover.png
+author: alex_kim
+commentsUrl: https://discuss.dvc.org/t/end-to-end-computer-vision/1178
+tags:
+  - Computer Vision
+  - DVC
+  - CML
+  - Studio
+  - CI/CD
+  - Experiment Tracking
+  - Tutorial
+---
+
+### Introduction
+
+[Earlier](https://dvc.org/blog/end-to-end-computer-vision-api-part-1-data-versioning-and-ml-pipelines),
+we built a pipeline that produces a trained Computer Vision model. Now we need a
+way to efficiently tune its configuration and the hyperparameters of the model.
+We want the ability to:
+
+- Run many experiments and easily compare their results to pick the
+  best-performing ones.
+- Track the global history of the model's performance, and map each improvement
+  to a particular change in code, configuration, or data.
+- Zoom into the details of each training run to help us diagnose issues.
+
+### Experiment Management
+
+Our DVC pipeline relies on the parameters defined in
+the[`params.yaml`](https://github.com/iterative/magnetic-tiles-defect/blob/main/params.yaml)
+file in this case (see other possible file types
+[here](https://dvc.org/doc/command-reference/params#description)). By loading
+its contents in each stage, we can avoid hard-coded parameters. It also allows
+rerunning the whole or parts of our pipeline under a different set of
+parameters. The DVC pipeline YAML file
+[`dvc.yaml`](https://github.com/iterative/magnetic-tiles-defect/blob/main/dvc.yaml)
+supports a
+[templating format](https://dvc.org/doc/user-guide/project-structure/pipelines-files#templating)
+to insert values from different sources in the YAML structure itself.
+
+DVC tracks which stages of the pipeline experienced changes and only reruns
+those. By changes, we mean _everything_ that might affect the predictive
+performance of your model like changes to the dataset, source code and/or
+parameters. This not only ensures complete reproducibility but often
+significantly reduces the time needed to rerun the whole pipeline while ensuring
+consistent results on every rerun. For example, at first, we started with a
+pixel accuracy metric (the percent of pixels in your image that are classified
+correctly). Later, we realized that it might not be the best metric to track (as
+described in
+[this blog post](https://towardsdatascience.com/metrics-to-evaluate-your-semantic-segmentation-model-6bcb99639aa2)),
+and we decided to include the Dice coefficient into our metrics. There is no
+reason for us to rerun the often time-consuming data preprocessing and model
+training stages if we want to incorporate these updates. DVC pipelines can skip
+the execution of these stages without our explicit instructions:
+
+```dvc
+$ dvc exp run
+Running stage 'check_packages':
+> pipenv run pip freeze > requirements.txt
+Stage 'data_load' didn't change, skipping
+Stage 'data_split' didn't change, skipping
+Stage 'train' didn't change, skipping
+Running stage 'evaluate':
+> python src/stages/eval.py --config=params.yaml
+...
+```
+
+There is a super convenient set of
+[Experiment Management](https://dvc.org/doc/user-guide/experiment-management)
+features that make switching between reproducible experiments very easy without
+adding failed experiments to your git history. Check out this
+[blog post](https://dvc.org/blog/ml-experiment-versioning), which talks about
+the idea of "ML Experiments as Code." That means treating experiments as you'd
+treat code, that is, use git to track all changes in configs, metrics, and data
+versions through text files. This approach removes the need for a separate
+database/online service to store experiment metadata. If wanted to run a few
+experiments with different scales of learning rate values (e.g. `0.1`, `0.01`
+and `0.001`), we'd do that as follows:
+
+```dvc
+$ dvc exp run --set-param train.learning_rate=0.1
+...
+$ dvc exp run --set-param train.learning_rate=0.01
+...
+$ dvc exp run --set-param train.learning_rate=0.001
+...
+```
+
+Optionally, you can delay the execution of the experiments by putting them in a
+[queue](https://dvc.org/doc/user-guide/experiment-management/running-experiments#the-experiments-queue),
+and execute them later with the `dvc exp run --run-all` command.
+
+These local experiments are powered by Git references, and you can learn about
+them in [this post](https://dvc.org/blog/experiment-refs). We can display all
+experiments with the `dvc exp show` command:
+
+```dvc
+$ dvc exp show --only-changed --sort-by=dice_mean
+```
+
+```dvctable
+──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**                neutral:**Created**       metric:**train.loss**   metric:**valid.loss**   metric:**foreground.acc**   metric:**jaccard.coeff**   metric:**dice.multi**   metric:**dice_mean**   metric:**acc_mean**   param:**train.learning_rate**   param:**train.batch_size**   neutral:**models**
+ ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  workspace                 -                 0.10356     0.069076          0.90321         0.75906      0.92371     0.70612    0.97689   0.01                  16                 5854528
+  exp                       Apr 09, 2022      0.13305     0.087599          0.77803         0.66494      0.89084     0.70534    0.97891   0.01                  8                  6c513ae
+  ├── 83a4975 [exp-2d80e]   Apr 09, 2022      0.11189     0.088695          0.86905         0.75296      0.92005     0.70612    0.97689   0.01                  16                 5854528
+  ├── 675efb3 [exp-6c274]   Apr 09, 2022      0.10356     0.069076          0.90321         0.75906      0.92371     0.71492    0.98099   0.1                   16                 770745a
+  └── c8b1857 [exp-04bcd]   Apr 09, 2022      0.11189     0.088695          0.86905         0.75296      0.92005     0.71619    0.98025   0.01                  8                  094c420
+ ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+```
+
+Once we identify one or a few best ones (e.g., highest `dice_mean` score), we
+can
+[persist](https://dvc.org/doc/user-guide/experiment-management/persisting-experiments)
+them by creating a branch out of an experiment:
+
+```dvc
+$ dvc exp branch exp-04bcd my-branch
+Git branch 'my-branch' has been created from experiment 'exp-04bcd'.
+To switch to the new branch run:
+        git checkout my-branch
+```
+
+To track detailed information about the training process, we integrated
+[DVCLive](https://dvc.org/doc/dvclive) into the training code by
+[adding a callback object](https://github.com/iterative/magnetic-tiles-defect/blob/41a057cf9b9a4a738087c8ad046b99c21f4faf17/src/utils/train_utils.py#L45)
+to the training function. DVCLive is a Python library for logging machine
+learning metrics and other metadata in simple file formats, which is fully
+compatible with DVC.
+
+## Collaboration and Reporting with Iterative Studio
+
+What if we needed to report the results to our team members or maybe hand over
+the project to one of them? How do we communicate everything we did since the
+conception of the project? What things resulted in the most significant
+improvements? What things didn't seem to matter at all?
+
+[Iterative Studio](https://studio.datachain.ai/) is a web-based application with
+seamless integration with DVC for data and model management, experiment
+tracking, visualization, and automation. It becomes especially valuable when
+collaborating with others on the same project or when there's a need to
+summarize the progress of the project through metrics and plots. All that's
+needed is to connect the project's repository with Studio. Then Studio will
+automatically parse all required information from `dvc.yaml`, `params.yaml`, and
+other text files that DVC recognizes. The result will be a repository view. The
+view for our project is
+[here](https://studio.datachain.ai/user/alex000kim/views/magnetic-tiles-defect-5kozhnu9jo).
+It displays commits, metrics, parameters, the remote location of data and models
+tracked by DVC, and more.
+
+In the screenshot below, you can see that we created a separate `exp` branch
+that displays the results of the local experiments that we decided to upload to
+our remote repository, like trying different learning rates and batch sizes.
+Note that earlier, we discarded all local experiments whose performance we
+weren't satisfied with.
+
+![Studio view](../uploads/images/2022-05-05/studio_view.png '=800')
+
+Below we can see the evolution of the key metrics and the value of the loss
+function throughout training (enabled by the earlier integration of
+[DVCLive](https://dvc.org/doc/dvclive)) for a set of selected commits.
+
+![DVCLive metrics displayed in Studio](../uploads/images/2022-05-05/dvc_live_studio.png '=800')
+
+Now, for example, if we see that the loss function hasn't reached a plateau
+after a certain number of epochs, we'll try increasing this number. Or, even
+worse, if we see the loss function growing over time, it'll be an indication
+that our learning rate may be too high. In this case, we may generate a few
+additional experiments with lower learning rate values, eventually picking the
+one that achieves good model performance after a reasonable number of training
+epochs.
+
+## Summary
+
+In this post, we talked about the following:
+
+- How to run and view ML experiments locally and commit the most promising ones
+  to the remote git repository
+- How the integration of Iterative Studio with DVC enables collaboration,
+  traceability, and reporting on projects with multiple team members
+- How DVCLive allows us to peek into the training process and helps us decide
+  what ideas to try next
+
+What if we don't have a machine with a powerful GPU, and we'd like to take
+advantage of our cloud infrastructure? What if we'd like to have a custom report
+(with metrics, plots, and other visuals) accompany every commit/pull request on
+GitHub? The third (and last) part of this series of posts will demonstrate how
+another open-source tool from the Iterative ecosystem, [CML](https://cml.dev/),
+addresses these issues.
diff --git a/content/blogs/2022-05-06-CML-runners-saving-models-2.md b/content/blogs/2022-05-06-CML-runners-saving-models-2.md
new file mode 100644
index 0000000000..dfd0a2ad08
--- /dev/null
+++ b/content/blogs/2022-05-06-CML-runners-saving-models-2.md
@@ -0,0 +1,244 @@
+---
+title:
+  Training and saving models with CML on a dedicated AWS EC2 runner (part 2)
+date: 2022-05-06
+description:
+  Use CML to automatically retrain a model on a provisioned AWS EC2 instance and
+  export the model to a DVC remote storage on Google Drive.
+descriptionLong: >
+  We can use CML to automatically retrain models whenever data, model code, or
+  parameters change. In this guide we show how to create a pipeline that
+  provisions an AWS EC2 instance to retrain a model and save the output on a
+  regular basis. In this part 2 we cover how to export the model to a DVC remote
+  on Google Drive.
+picture: 2022-05-06/saving-models-2-cover.jpeg
+author: rob_dewit
+commentsUrl: https://discuss.dvc.org/t/training-and-saving-models-with-cml-on-a-self-hosted-aws-ec2-runner/1155
+tags:
+  - CML
+  - DVC
+  - Git
+  - Pipelines
+  - Self-hosted runners
+  - Cloud training
+  - AWS
+  - Google Drive
+  - Tutorial
+---
+
+In [part 1 of this guide](https://dvc.org/blog/CML-runners-saving-models-1) we
+showed how you can use CML to provision an AWS EC2 instance to train your model
+before saving the model to our Git repository. In doing so, we allowed ourselves
+to terminate the training instance without losing our model altogether.
+
+This worked perfectly fine for the simple model we trained, but this approach is
+not optimal when dealing with larger models. GitHub starts warning you at 50MB
+files and simply
+[won't upload anything over 100MB](https://docs.github.com/en/repositories/working-with-files/managing-large-files/about-large-files-on-github).
+[GitLab similarly limits](https://docs.gitlab.com/ee/user/gitlab_com/index.html#account-and-limit-settings)
+the size of files you can store in your repository. A beefy XGBoost model can
+easily exceed 100MB and a neural network can go up into the gigabytes.
+
+That means we cannot save these models directly to our repository. Luckily we
+can look towards another one of Iterative's open-source tools:
+[DVC](https://dvc.org). DVC includes a lot of features for managing machine
+learning projects, such as ML pipelines, experiment tracking, and data
+versioning. In this guide we will zoom in on just one of those features: remote
+storage.
+
+We can use DVC to save our model to a remote storage location, such as M3, HDFS,
+an SFTP server, or even Google Drive. Much like Git tracks changes to your code,
+DVC tracks changes to your data. It puts a reference to a specific version of
+your data in the Git commit. That way your code is linked to a specific version
+of your model, without containing the actual model.
+
+In this part 2, we will show you how to save the model we trained in part 1 to a
+DVC remote. At the end of this guide our CML workflow will be doing the folowing
+on a daily basis:
+
+1. Provision an Amazon Web Services (AWS) EC2 instance
+1. Train the model
+1. Save the model to a DVC remote storage on Google Drive
+1. Save the model metrics to a GitHub repository
+1. Create a merge request with the new outputs
+1. Terminate the AWS EC2 instance
+
+All files needed for this guide can be found in
+[this repository](https://github.com/iterative/example_model_export_cml).
+
+<admon type="tip">
+
+We will be using Google Drive as our remote storage. With slight modifications,
+however, you can also use other remotes such as AWS S3, GCP Cloud Storage, and
+Azure Storage. Please
+[refer to the DVC Docs](https://dvc.org/doc/command-reference/remote/add#supported-storage-types)
+for more details.
+
+</admon>
+
+# Prerequisites
+
+Make sure to have followed
+[part 1 of this guide](https://dvc.org/blog/CML-runners-saving-models-1) and
+have gotten CML up and running. The necessary files for all of this can be found
+in [this repository](https://github.com/iterative/example_model_export_cml).
+Additionally, set up the following things beforehand:
+
+- [Install DVC](https://dvc.org/doc/install)
+- [Set up a GCP project](https://dvc.org/doc/user-guide/setup-google-drive-remote#using-a-custom-google-cloud-project-recommended)
+- [Enable the Google Drive API for your GCP project](https://console.cloud.google.com/apis/library/drive.googleapis.com)
+- [Create a GCP service account and download the private key to a safe location](https://dvc.org/doc/user-guide/setup-google-drive-remote#using-service-accounts)
+- [Create a Google Drive directory to save your model to](https://support.google.com/drive/answer/2375091?hl=en&co=GENIE.Platform%3DDesktop)
+- [Grant the service account editor permissions to the Drive directory by sharing it with the service account's email address](https://support.google.com/drive/answer/7166529?hl=en&co=GENIE.Platform%3DDesktop)
+
+# Setting up our DVC remote
+
+When first using DVC in a project, you need to initialize DVC by running
+`dvc init`. This will create the structure DVC uses to keep track of versioning,
+and ensures Git will not be tracking the files in the DVC repository. Instead,
+Git will henceforth include a list of references to those files. Make sure to
+commit the initialization to Git.
+
+Then, in order to start using DVC for versioning, we need to set up a remote.
+This is where our model files will end up, while DVC keeps track of their
+respective versions. Here we will be using Google Drive as our remote.
+
+[The DVC user guide](https://dvc.org/doc/user-guide/setup-google-drive-remote#setup-a-google-drive-dvc-remote)
+explains how to set up a remote on Google Drive. If you would rather use another
+remote, you can
+[find instructions here](https://dvc.org/doc/command-reference/remote/add#supported-storage-types).
+In that case make sure to also update the DVC dependency in `requirements.txt`.
+
+While DVC doesn't require a service account to work, we do need one in the
+set-up we're aiming for. That's because without a service account we will need
+to authorize ourselves through a log-in page every time. Our self-hosted runner
+would get stuck on this page because we cannot authorize ourselves there.
+
+In order to let DVC access the Google Drive folder we created from our runner,
+we need to add two more GitHub Actions secrets: `GDRIVE_CREDENTIALS_DATA` and
+`GOOGLE_DRIVE_URI`. The first one should contain the private key we downloaded
+when setting up our service account (i.e. the `.json` file). The second one
+should be the [Drive URI](https://cloud.google.com/bigquery/external-data-drive)
+to the directory we created in Google Drive (i.e. the sequence of random
+characters at the end of our Google Drive URL).
+
+# Export the model to a DVC remote
+
+Now that we have set up the remote and made sure GitHub Actions has all the
+details needed to access the remote, we can use the workflow below. In this
+scenario, we train the model in the same way as in part 1, but we push it to the
+DVC remote. A reference to the location of this file is added to the GitHub
+repository (`model/random_forest.joblib.dvc`). The model itself is added to
+`.gitignore` and not pushed to the repository.
+
+The other files created in `train.py` are still pushed to an experiment branch
+in GitHub. Afterwards a merge request is created.
+
+```yaml
+name: CML with DVC
+on: # Here we use two triggers: manually and daily at 08:00
+  workflow_dispatch:
+  schedule:
+    - cron: '0 8 * * *'
+jobs:
+  deploy-runner:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: iterative/setup-cml@v1
+      - name: Deploy runner on EC2
+        env:
+          REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+        run: |
+          cml runner \
+              --cloud=aws \
+              --cloud-region=eu-west \
+              --cloud-type=t2.micro \
+              --labels=cml-runner \
+              --single
+  train-model:
+    needs: deploy-runner
+    runs-on: [self-hosted, cml-runner]
+    timeout-minutes: 120 # 2h
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.x'
+      - uses: actions/setup-node@v3
+        with:
+          node-version: '16'
+      - uses: iterative/setup-cml@v1
+      - name: Train model
+        env:
+          REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
+          GDRIVE_CREDENTIALS_DATA: ${{ secrets.GDRIVE_CREDENTIALS_DATA }}
+        run: |
+          cml ci
+          pip install -r requirements.txt
+
+          python get_data.py
+          python train.py
+
+          # Connect to your DVC remote storage and push the model to there
+          dvc add model/random_forest.joblib # This automatically adds the model to your .gitignore
+          dvc remote add -d -f myremote gdrive://${{ secrets.GOOGLE_DRIVE_URI }}
+          dvc remote modify myremote gdrive_use_service_account true
+          dvc push
+
+          # Create pull request for the remaining files
+          cml pr .
+
+          # Create CML report
+          cat model/metrics.txt > report.md
+          cml publish model/confusion_matrix.png --md >> report.md
+          cml send-comment --pr --update report.md
+```
+
+And that's it! We have broadly the same set-up as outlined in part 1 of this
+guide, but we no longer use our GitHub repository for storing our model.
+Instead, the model is now saved to Google Drive, which allows for much larger
+models.
+
+<admon type="tip">
+
+In a situation where we retrain our model daily based on the most recent data,
+it would make sense to also use DVC to keep track of the data used in each
+training. We could, for example, use our runner to import our training data from
+a table in our database and write both the data and the model to the DVC remote.
+This is beyond the scope of this guide, but
+[here you can find a repository that covers this](https://github.com/iterative/cml_dvc_case).
+
+</admon>
+
+<admon type="tip">
+
+While we have achieved our goal of using DVC for our model storage, there are
+some other DVC features we could benefit from as well. We could define a
+reproducible pipeline to replace our manual `get_data.py` and `train.py` tasks.
+[Here you can find](https://dvc.org/doc/start/data-pipelines) more information
+on how to achieve this.
+
+</admon>
+
+# Conclusions
+
+As we saw in [part 1 of this guide](/blog/CML-runners-saving-models-1), we can
+use CML to automate a periodical retraining of our models on a self-hosted
+runner. We were able to save the model to our GitHub repository, but that
+approach has its limitations with regards to model size.
+
+In this part 2 we worked around those limitations by saving our model to a DVC
+remote instead. We set up Google Drive as our remote and adapted our CML
+workflow to save our models there. All in all, we can now automatically
+(re)train models using a self-hosted runner, track different model versions in
+Git, and save models to a remote storage such as Google Drive for future
+reference.
+
+A great extension of our CI/CD would be a `deploy` step to bring the latest
+version of our model into production. This step might be conditional on the
+performance of the model; we could decide to only start using it in production
+if it performs better than previous iterations. All of this warrants a guide of
+its own, however, so look out for that in the future! 😉
diff --git a/content/blogs/2022-05-09-end-to-end-computer-vision-api-part-3-remote-exp-ci-cd.md b/content/blogs/2022-05-09-end-to-end-computer-vision-api-part-3-remote-exp-ci-cd.md
new file mode 100644
index 0000000000..ea8591a240
--- /dev/null
+++ b/content/blogs/2022-05-09-end-to-end-computer-vision-api-part-3-remote-exp-ci-cd.md
@@ -0,0 +1,301 @@
+---
+title:
+  'End-to-End Computer Vision API, Part 3: Remote Experiments & CI/CD For
+  Machine Learning'
+date: 2022-05-09
+description: >
+  In this final part, we will focus on leveraging cloud infrastructure with CML;
+  enabling automatic reporting  (graphs, images, reports and tables with
+  performance metrics) for PRs; and the eventual deployment process.
+descriptionLong: |
+
+  _This is the last part of a three-part series of posts:_
+  - _Part 1: [Data Versioning and ML 
+  Pipelines](/blog/end-to-end-computer-vision-api-part-1-data-versioning-and-ml-pipelines)_
+  - _Part 2: [Local 
+  Experiments](/blog/end-to-end-computer-vision-api-part-2-local-experiments)_
+  - _Part 3: Remote Experiments & CI/CD For Machine Learning (this post)_
+
+  In [part 
+  1](/blog/end-to-end-computer-vision-api-part-1-data-versioning-and-ml-pipelines),
+  we talked about exploratory work in Jupyter Notebooks; versioning data in
+  remote storage with DVC; and refactoring the code from Jupyter Notebooks
+  into DVC pipeline stages.
+
+  [Part 2](/blog/end-to-end-computer-vision-api-part-2-local-experiments) 
+  talked about the process of managing experiments with DVC pipelines, DVCLive
+  and Iterative Studio.
+
+  In this final part, we will focus on leveraging cloud infrastructure with
+  CML; enabling automatic reporting (graphs, images, reports and tables with
+  performance metrics) for PRs; and the eventual deployment process.
+
+picture: 2022-05-09/e2e-cv-pt3-cover.png
+author: alex_kim
+commentsUrl: https://discuss.dvc.org/t/end-to-end-computer-vision/1178
+tags:
+  - Computer Vision
+  - DVC
+  - CML
+  - Studio
+  - CI/CD
+  - Experiment Tracking
+  - Tutorial
+---
+
+### Leveraging Cloud Resources with CI/CD and CML
+
+If you use the [CML library](https://cml.dev/) in combination with CI/CD tools
+like GitHub Actions or GitLab CI/CD, you can quickly and easily:
+
+1. provision a powerful virtual machine (VM) in the cloud as training Computer
+   Vision (CV) models often requires powerful GPUs rarely available on local
+   machines
+2. submit your ML training job to it
+3. save the results (metrics, models and other training artifacts)
+4. automatically shut down the VM without having to worry about excessive cloud
+   bills
+
+![Continuous Integration and Deployment for Machine Learning](../uploads/images/2022-05-09/cicd4ml.png '=460')
+_Continuous Integration and Deployment for Machine Learning_
+
+We've configured three
+[workflow files](https://github.com/iterative/magnetic-tiles-defect/tree/main/.github/workflows)
+for GitHub Actions, each of which corresponds to a particular stage depending on
+the project's lifecycle we are in:
+
+#### 1. [Workflow for experimentation and hyperparameter tuning](https://github.com/iterative/magnetic-tiles-defect/blob/main/.github/workflows/1-experiment.yaml)
+
+![Workflow for experimentation and hyperparameter tuning](../uploads/images/2022-05-09/workflow_exp.png '=400')
+_Workflow for experimentation and hyperparameter tuning_ In this stage, we'll
+create an experiment branch so that can experiment with data preprocessing,
+change model architecture, tune hyperparameters, etc. Once we think our
+experiment is ready to be run, we'll push our changes to a remote repository (in
+this case, GitHub). This push will trigger a CI/CD job in GitHub Actions, which
+in turn will:
+
+a) provision an EC2 virtual machine with a GPU in AWS:
+
+```yaml
+- name: Deploy runner on AWS EC2
+  env:
+    REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
+  run: |
+    cml runner \
+        --cloud=aws \
+        --cloud-region=us-east-1 \
+        --cloud-type=g4dn.xlarge \
+        --labels=cml-runner
+```
+
+b) deploy our experiment branch to a Docker container on this machine:
+
+```yaml
+train-model:
+  needs: deploy-runner
+  runs-on: [self-hosted, cml-runner]
+  container:
+    image: iterativeai/cml:0-dvc2-base1
+    options: --gpus all
+  environment: cloud
+  permissions:
+    contents: read
+    id-token: write
+  steps:
+    - uses: actions/checkout@v2
+```
+
+c) rerun the entire DVC pipeline and push metrics back to GitHub:
+
+```yaml
+- name: dvc-repro-cml
+  env:
+    REPO_TOKEN: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
+  run: |
+    # Install dependencies
+    pipenv install --skip-lock
+    pipenv run dvc pull
+    pipenv run dvc exp run
+    pipenv run dvc push
+```
+
+d) open a pull request and post a report to it that contains a table with
+metrics and model outputs on a few test images:
+
+```bash
+# Open a pull request
+cml pr dvc.lock metrics.json training_metrics.json training_metrics_dvc_plots/**
+# Create CML report
+echo "## Metrics" > report.md
+pipenv run dvc metrics show --md >> report.md
+echo "## A few random test images" >> report.md
+for file in $(ls data/test_preds/ | sort -R | tail -20); do
+  cml publish data/test_preds/$file --md >> report.md
+done
+cml send-comment --pr --update report.md
+```
+
+The report structure is fully customizable. Below is an example of what the PR
+and the CML report would look like in this case. The test images show (from left
+to right) input images, ground truth masks and prediction masks.
+
+![PR and CML report](../uploads/images/2022-05-09/pr_cml_report.png '=800') _PR
+and CML report_
+
+At this point, we can assess the results in Iterative Studio and GitHub and
+decide whether we want to accept the PR or keep experimenting.
+
+#### 2. [Workflow for deploying to the development environment](https://github.com/iterative/magnetic-tiles-defect/blob/main/.github/workflows/2-develop.yaml)
+
+![Workflow for deploying to the development environment](../uploads/images/2022-05-09/workflow_dev.png '=400')
+_Workflow for deploying to the development environment_ Once we are happy with
+our model's performance on the experiment branch, we can merge it into the
+development branch. This would trigger a different CI/CD job that will:
+
+a) retrain the model if the `dev` branch contains changes not present in the
+`exp` branch. DVC will skip this stage if that's not the case. This step looks
+almost identical to step (1.c) above (rerunning the pipeline & reporting metrics
+on GitHub) in the above workflow.
+
+b) deploy the web REST API application (that incorporates the new model) to a
+development endpoint on Heroku:
+
+```yaml
+deploy-dev-api:
+  needs: train-and-push
+  runs-on: ubuntu-latest
+  steps:
+    - uses: actions/checkout@v2
+    - uses: actions/download-artifact@master
+      with:
+        name: model_pickle
+        path: models
+    - uses: akhileshns/heroku-deploy@v3.12.12
+      with:
+        heroku_api_key: ${{secrets.HEROKU_API_KEY}}
+        heroku_app_name: demo-api-mag-tiles-dev
+        heroku_email: 'alexkim@iterative.ai'
+        team: iterative-sandbox
+        usedocker: true
+```
+
+The development endpoint is now accessible at
+
+<https://demo-api-mag-tiles-dev.herokuapp.com/analyze> (note `-dev`),
+
+and we can use it to assess the end-to-end performance of the overall solution.
+If we pick a random test image `exp3_num_258558.jpg`,
+![Test image `exp3_num_258558.jpg`](../uploads/images/2022-05-09/exp3_num_258558.jpg '=300')
+_Test image `exp3_num_258558.jpg`_
+
+we can send it to the endpoint using the `curl` command like this:
+
+```dvc
+$ curl -F 'image=@data/MAGNETIC_TILE_SURFACE_DEFECTS/test_images/exp3_num_258558.jpg' \
+-v https://demo-api-mag-tiles-dev.herokuapp.com/analyze
+```
+
+This will return some http-header info and the body of the response containing
+the defect segmentation mask (`0` for pixel locations without defects and `1`
+otherwise):
+
+```dvc
+*   Trying 18.208.60.216:443...
+* Connected to demo-api-mag-tiles-dev.herokuapp.com (18.208.60.216) port 443 (#0)
+...
+{"pred":[[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+```
+
+Alternatively, we can do a similar thing with a Python script that also saves
+the output mask into a `exp3_num_258558_mask.png` image:
+
+```python
+import json
+from pathlib import Path
+
+import matplotlib.cm as cm
+import matplotlib.pyplot as plt
+import numpy as np
+import requests
+
+url = 'https://demo-api-mag-tiles-dev.herokuapp.com/analyze'
+file_path = Path(
+    'data/MAGNETIC_TILE_SURFACE_DEFECTS/test_images/exp3_num_258558.jpg')
+files = {'image': (str(file_path), open(file_path, 'rb'), "image/jpeg")}
+response = requests.post(url, files=files)
+data = json.loads(response.content)
+pred = np.array(data['pred'])
+plt.imsave(f'{file_path.stem}_mask.png', pred, cmap=cm.gray)
+```
+
+Below you can see what this mask looks like.
+![Output mask `exp3_num_258558_mask.png`](../uploads/images/2022-05-09/exp3_num_258558_mask.png '=300')
+_Output mask `exp3_num_258558_mask.png`_
+
+Before we merge the dev branch into the main branch, we would need to thoroughly
+test and monitor the application in the development environment. A good test
+could be duplicating real image requests to the dev endpoint for some period of
+time and assess the quality of the returned segmentation masks.
+
+#### 3. [Workflow for deploying to the production environment](https://github.com/iterative/magnetic-tiles-defect/blob/main/.github/workflows/3-deploy.yaml)
+
+![Workflow for deploying to the production environment](../uploads/images/2022-05-09/workflow_prod.png '=400')
+_Workflow for deploying to the production environment_
+
+If there are no issues and we are confident in the quality of the new model, we
+can merge the development branch into the main branch of our repository. Again,
+this triggers the third CI/CD workflow that deploys the code from the main
+branch to the production API. This looks identical to the deployment into the
+development environment, except now the deployment endpoint will be
+
+<https://demo-api-mag-tiles-prod.herokuapp.com/analyze> (note `-prod`).
+
+## Summary
+
+In this series of posts (see [Part 1][part 1] and [Part 2][part 2]), we
+described how we addressed the problem of building a Computer Vision Web API for
+defect detection. We’ve chosen this approach because it addresses the common
+challenges that are shared across many CV projects: how to version datasets that
+consist of a large number of small- to medium-sized files; how to avoid
+triggering long-running stages of an ML pipeline when it’s not needed for
+reproducibility; how to run model training jobs on the cloud infrastructure
+without having to provision and manage everything yourself; and, finally, how to
+track progress in key metrics when you run many ML experiments.
+
+[part 1]:
+  /blog/end-to-end-computer-vision-api-part-1-data-versioning-and-ml-pipelines
+[part 2]: /blog/end-to-end-computer-vision-api-part-2-local-experiments
+
+We've talked about the following:
+
+- Common difficulties when building Computer Vision Web API for defect detection
+  ([link](/blog/end-to-end-computer-vision-api-part-1-data-versioning-and-ml-pipelines#introduction))
+- Pros and cons of exploratory work in Jupyter Notebooks
+  ([link](/blog/end-to-end-computer-vision-api-part-1-data-versioning-and-ml-pipelines#proof-of-concept-in-jupyter-notebooks))
+- Versioning data in remote storage with DVC
+  ([link](/blog/end-to-end-computer-vision-api-part-1-data-versioning-and-ml-pipelines#data-versioning))
+- Moving and refactoring the code from Jupyter Notebooks into DVC pipeline
+  stages
+  ([link](/blog/end-to-end-computer-vision-api-part-1-data-versioning-and-ml-pipelines#refactoring-jupyter-code-into-an-ml-pipeline))
+- Experiment management and versioning
+  ([link](/blog/end-to-end-computer-vision-api-part-2-local-experiments#experiment-management))
+- Visualization of experiments and collaboration in Iterative Studio
+  ([link](/blog/end-to-end-computer-vision-api-part-2-local-experiments#collaboration-and-reporting-with-iterative-studio))
+- Remote experiments, CI/CD, and production deployment (this post)
+
+## What to Try Next
+
+Missed the previous parts of this post? See [Part 1: Data Versioning and ML
+Pipelines][part 1] and [Part 2: Local Experiments][part 2].
+
+- Reproduce this solution by setting your own configs, tokens, and access keys
+  for GitHub, AWS, and Heroku
+- Add a check to merge PRs automatically if the metrics have improved
+- Add a few simple unit tests and insert them into CML workflow files so they
+  run before reproducing the pipeline
+- Apply this approach to a different Computer Vision problem using a different
+  dataset or different problem type (image classification, object detection,
+  etc.)
diff --git a/content/blogs/2022-05-12-local-experiments-to-cloud-with-tpi.md b/content/blogs/2022-05-12-local-experiments-to-cloud-with-tpi.md
new file mode 100644
index 0000000000..5553934383
--- /dev/null
+++ b/content/blogs/2022-05-12-local-experiments-to-cloud-with-tpi.md
@@ -0,0 +1,342 @@
+---
+title:
+  Moving Local Experiments to the Cloud with Terraform Provider Iterative (TPI)
+date: 2022-05-12
+description: >
+  Tutorial for easily moving a local ML experiment to a remote cloud machine
+  with the help of Terraform Provider Iterative (TPI).
+descriptionLong: >
+  In this tutorial, learn how to move a local machine learning experiment to a
+  remote cloud machine on AWS with the help of Terraform Provider Iterative
+  (TPI).
+picture: 2022-05-12/header-bees.png
+author: maria_khalusova
+commentsUrl: https://discuss.dvc.org/t/moving-local-experiments-to-the-cloud-with-terraform-provider-iterative-tpi/1190
+tags:
+  - MLOps
+  - TPI
+  - AWS
+  - Terraform
+  - Git
+  - Python
+  - Tutorial
+  - Cloud orchestration
+  - Spot instance management
+---
+
+There are many reasons you might train a machine learning model locally. Mainly,
+it's quick & easy to set up a new project on a local machine. This is sufficient
+for simple experiments (with reduced data subsets or small models) without
+paying to rent heavy cloud compute resources. A local machine is also deeply
+familiar -- as opposed to the multitude of available cloud services, which can
+be intimidating even with a decent background in DevOps.
+
+Once you locally set up and iterate over your data & code enough, you may reach
+a point where more powerful compute resources are needed to train a larger model
+and/or use bigger datasets. In other words, you might have to switch from
+experimenting locally to a cloud environment. If you find yourself in this
+situation, this tutorial will help you easily provision cloud infrastructure
+with Terraform and run your existing training script on it.
+
+## Getting Started
+
+This tutorial uses the
+[BeeImage Dataset](https://www.kaggle.com/jenny18/honey-bee-annotated-images)
+which contains over 5,100 bee images annotated with location, date, time,
+subspecies, health condition, caste, and pollen. Let's assume we've downloaded
+the images, created a project, and trained a
+[convolutional neural network](https://en.wikipedia.org/wiki/Convolutional_neural_network)
+model locally to classify different subspecies. If you want to follow along, you
+can use your own data and training code, or clone [the example
+repository][tpi-bees].
+
+[tpi-bees]: https://github.com/iterative/blog-tpi-bees
+
+How do we continue iterating on our model in the cloud? Can we run more epochs
+overnight? Change some hyperparameters? Add more layers? The first question when
+planning _The Big Move_ is "what dependencies are needed to train this model in
+a cloud environment?"
+
+Some of the important puzzle pieces you already have locally:
+
+- Your training code. It is likely that you have a
+  [whole pipeline](https://dvc.org/doc/start/data-pipelines) with multiple
+  stages but for the sake of simplicity, this tutorial uses a single `train.py`
+  script.
+- Data.
+- Python environment with all required libraries.
+
+You will also need an account with your cloud provider of choice. In this
+tutorial we'll be provisioning infrastructure on
+[Amazon Web Services (AWS)](https://aws.amazon.com/). You can create an AWS
+account yourself, or ask your DevOps team to provide you with one.
+
+<admon type="info">
+
+Make sure to insert
+[authentication credentials](https://registry.terraform.io/providers/iterative/iterative/latest/docs/guides/authentication#amazon-web-services)
+into your system's environment variables (`AWS_ACCESS_KEY_ID` and
+`AWS_SECRET_ACCESS_KEY`).
+
+</admon>
+
+We can now start the move with the help of [Terraform Provider Iterative
+(TPI)][tpi].
+
+[tpi]: https://github.com/iterative/terraform-provider-iterative
+
+## What is Terraform?
+
+<admon type="info">
+
+[Terraform](https://www.terraform.io) is an open-source infrastructure-as-code
+tool that you should [download and install](https://www.terraform.io/downloads)
+for this tutorial.
+
+</admon>
+
+Terraform requires us to create a configuration file containing a declarative
+description of the infrastructure we need. There's no need to read lots of cloud
+documentation nor write lots of commands. Instead, you describe what your
+infrastructure should ultimately look like. Behind the scenes, Terraform will
+figure out what needs to be done. If you've cloned the [repository][tpi-bees],
+the `main.tf` configuration file is in the project's root. We'll explain its
+contents below.
+
+## Terraform Provider Iterative (TPI)
+
+Terraform can orchestrate a plethora of various resources for you, but for the
+majority of projects you only need a few. Instead of shipping plugins
+(providers) for all these resources in one bundle, Terraform downloads
+[_providers_](https://www.terraform.io/docs/extend/how-terraform-works.html)
+whenever required.
+
+For this tutorial we will only need [TPI][tpi]. It enables full lifecycle
+management of computing resources from AWS, Microsoft Azure, Google Cloud
+Platform, and Kubernetes. TPI provisions infrastructure, sync data, and also
+executes your scripts -- all via a single configuration file. It has a several
+super neat features:
+
+- The configuration for different cloud compute providers is nearly identical,
+  so you can easily migrate from one cloud provider to another.
+- It syncs data to and from the remote cloud and your local machine.
+- It allows you to use low-cost spot instances, and even automatically respawns
+  interrupted instances, restoring working directories/data and resuming running
+  tasks in the cloud even if you are offline.
+- Once your training is complete, the remote resources will be terminated,
+  avoiding unused machines quietly ramping up costs.
+
+To start using TPI we need to let Terraform know about it by writing this in our
+`main.tf`:
+
+```hcl
+terraform {
+  required_providers { iterative = { source = "iterative/iterative" } }
+}
+provider "iterative" {}
+```
+
+Once we describe what providers we need, run
+
+```dvc
+$ terraform init
+```
+
+<admon type="info">
+
+If you have cloned the example repository, you should run this command before
+doing anything else. This will initialize your working directory and download
+the required provider(s).
+
+</admon>
+
+<admon type="tip">
+
+It's probably also a good idea to set the logging level to see helpful info on
+progress:
+
+```dvc
+$ export TF_LOG_PROVIDER=INFO
+```
+
+</admon>
+
+## Configuring `iterative_task`
+
+TPI offers a single resource called `iterative_task` that we'll need to
+configure. This resource will:
+
+1. Create cloud resources (storage, machines) for the task.
+2. If specified, upload a local working directory to the cloud storage.
+3. Run the given script in the cloud until completion, error, or timeout.
+4. If specified, download output results.
+5. Automatically terminate compute resources upon task completion.
+
+This is exactly what we need to run a model training process! Let's see the
+`iterative_task` in the `main.tf` file before delving into the details:
+
+```hcl
+terraform {
+  required_providers { iterative = { source = "iterative/iterative" } }
+}
+provider "iterative" {}
+
+resource "iterative_task" "example-basic" {
+  cloud   = "aws"    # or any of: gcp, az, k8s
+  machine = "m"      # medium. Or any of: l, xl, m+k80, xl+v100, ...
+  spot    = 0        # auto-price. Default -1 to disable, or >0 for hourly USD limit
+  timeout = 24*60*60 # 24h
+  image   = "ubuntu"
+
+  storage {
+    workdir = "src"
+    output  = "results-basic"
+  }
+  environment = { TF_CPP_MIN_LOG_LEVEL = "1" }
+  script = <<-END
+    #!/bin/bash
+    sudo apt-get update -q
+    sudo apt-get install -yq python3-pip
+    pip3 install -r requirements.txt tensorflow-cpu==2.8.0
+    python3 train.py --output results-basic/metrics.json
+  END
+}
+```
+
+Every Terraform resource needs a name; here it's `example-basic`. This name is
+only used within the configuration file and it can be whatever you want. Inside
+of the resource block, we specify some arguments:
+
+- _cloud_ (**required**): cloud provider to run the task on. This can be `aws`,
+  `gcp`, `az`, or `k8s`.
+- _machine_: if you know the exact kind of machine that you'd like to use, you
+  can specify it here. Alternatively,
+  [TPI offers some common machine types](https://registry.terraform.io/providers/iterative/iterative/latest/docs/resources/task#machine-type)
+  which are roughly the same for all supported clouds. For example, `m+t4` means
+  "Medium, with (at least) 4 CPU cores, 16 GB RAM, and 1 NVIDIA Tesla T4 GPU
+  device".
+- _spot_: set the
+  [spot instance price](https://aws.amazon.com/ec2/spot/pricing/). Here we use
+  `0` for automatic pricing, which should keep costs down. Alternatively you can
+  specify a positive number to set a maximum bidding price in USD per hour, or
+  `-1` to use on-demand pricing.
+- _timeout_: maximum time to run before the instance is force-terminated. This
+  prevents forgotten long-running instances draining your budget.
+- _image_: the container to use (in our case, Ubuntu LTS 20.04).
+- _workdir_: a directory on your local machine relative to your project folder
+  which you would like to upload with the remote machine. This way you can share
+  your whole project or parts of it with a remote machine.
+- _output_: a directory **relative to `workdir`** to download after the task in
+  complete.
+- _script_ (**required**): this is where TPI's magic happens, i.e. what commands
+  to run in `workdir` on the provisioned cloud instance.
+
+<admon type="tip">
+
+See the
+[resource arguments documentation](https://registry.terraform.io/providers/iterative/iterative/latest/docs/resources/task#argument-reference)
+for a full list.
+
+</admon>
+
+<admon type="warn">
+
+Keep in mind the
+[the running costs of AWS EC2 instances](https://aws.amazon.com/ec2/pricing/).
+The `machine` used in the example above is not included in the free tier and
+will incur charges. Using TPI's `spot` pricing will keep costs to a minimum
+(roughly $0.15/hour for `m+t4` on AWS), but not eliminate them entirely.
+
+</admon>
+
+In the simplest scenario, all we need to do on a new machine to run the training
+`script` is to set up the Python environment with required libraries. If you
+simply want to train your model on a machine with more memory, this may be
+enough. However, if you want your training code to leverage GPUs, we can make a
+few small tweaks:
+
+## Training with GPU
+
+There are several ways you can leverage GPU devices on a remote machine. You can
+install all the required drivers and dependencies "manually" via a script, you
+can use an existing Docker image, build your own, or just use the convenient
+`nvidia` image pre-packaged with CUDA 11.3 GPU drivers.
+
+```hcl
+terraform {
+  required_providers { iterative = { source = "iterative/iterative" } }
+}
+provider "iterative" {}
+
+resource "iterative_task" "example-gpu" {
+  cloud   = "aws"
+  machine = "m+t4"   # 4 CPUs and an NVIDIA Tesla T4 GPU
+  spot    = 0
+  timeout = 24*60*60
+  image   = "nvidia" # has CUDA GPU drivers
+
+  storage {
+    workdir = "src"
+    output  = "results-gpu"
+  }
+  environment = { TF_CPP_MIN_LOG_LEVEL = "1" }
+  script = <<-END
+    #!/bin/bash
+    sudo apt-get update -q
+    sudo apt-get install -yq python3-pip
+    pip3 install -r requirements.txt tensorflow==2.8.0
+    python3 train.py --output results-gpu/metrics.json
+  END
+}
+```
+
+## Ready… Set… Apply!
+
+Whether you want to go with the basic example, or the GPU-enabled training, you
+can run:
+
+```dvc
+$ terraform apply
+```
+
+to review what steps Terraform is going to take to provision your desired
+infrastructure. Don't worry, nothing is actually done at this point, but it's a
+good way to check for potential issues in the configuration. You'll need to type
+`yes` to confirm.
+
+At this point you can go offline, get a cup of your preferred beverage, and let
+TPI work its magic together with Terraform. They will allocate a remote machine
+for you, upload you data and script, and run your code. Once the script
+finishes, the machine will be terminated.
+
+You can monitor what's going on at any point by running:
+
+```dvc
+$ terraform refresh
+$ terraform show
+```
+
+This will print the logs and script's output. Once you see that the task has
+successfully finished, run:
+
+```dvc
+$ terraform destroy
+```
+
+to sync back your shared files and tear down all remote objects managed by your
+configuration. If you output results (e.g. `results-gpu/metrics.json`), they'll
+be synced back to your local machine.
+
+Now if you want to try another experiment, you can change your code, run
+`terraform apply` again, and when the training is finished, commit your code
+together with the updated results. This can help you move from prototyping
+locally to leveraging more powerful cloud instances without the hassle of full
+MLOps setup. At the same time, once you're ready to start working on your
+[production pipelines and CI/CD](https://dvc.org/doc/use-cases/ci-cd-for-machine-learning),
+this `main.tf` codification should also make the transition smoother.
+
+In this tutorial we covered the simplest example with no GPU, and one with GPUs.
+In many cases, deploying your pipelines would be easier with your own Docker
+image (both for prototyping and for production) and CI/CD workflows. If you'd
+like to learn how to create your own Docker images and use them with TPI, see
+[part 2](/blog/local-experiments-to-cloud-with-tpi-docker) of this blog post!
diff --git a/content/blogs/2022-05-16-may-22-heartbeat.md b/content/blogs/2022-05-16-may-22-heartbeat.md
new file mode 100644
index 0000000000..f37c8f5256
--- /dev/null
+++ b/content/blogs/2022-05-16-may-22-heartbeat.md
@@ -0,0 +1,397 @@
+---
+title: May '22 Heartbeat
+date: 2022-05-16
+description: >
+  Monthly updates are here! You will find a link to Chip Huyen's new book, great
+  guides and frameworks on the iterative nature of AI, tons of company news,
+  Dmitry on TFIR, beyond machine learning use cases and more! Welcome to May!
+
+descriptionLong: |
+  This month you will find:
+
+    📖 Chip Huyen's new book,
+
+    👀 Iterative guides and frameworks,
+    
+    🥰 Beyond ML tutorial with idempotent cloud deployments,
+
+    🗣 IRL events,
+
+    💻 TPI updates,
+
+    🚀 Spotify MLOps playlist addition, and more!
+picture: 2022-05-16/may-heartbeat.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/may-22-heartbeat/1191
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - TPI
+  - Git
+  - Modular Infrastructure
+  - Mission Statement
+  - Terraform Provider Iterative
+  - Chip Huyen
+  - TFIR
+---
+
+# AI/ML News
+
+![Designing Machine Learning Systems](../uploads/images/2022-05-16/chip-huyen.jpeg 'Designing Machine Learning Systems :wrap-left =200')
+
+## Chip Huyen: Designing Machine Learning Systems
+
+[**Chip Huyen**](https://www.linkedin.com/in/chiphuyen/) just came out with a
+new book with [O'Reilly](https://oreilly.com) entitled
+[Designing Machine Learning Systems](https://www.oreilly.com/library/view/designing-machine-learning/9781098107956/).  
+I'm
+not going to pontificate here; Chip Huyen wrote it, the reviews are shining,
+need I say more?
+
+## Jenny Abramov: An Agile Framework for AI Projects — Development, QA, Deployment and Maintenance
+
+[**Jenny Abramov**](https://www.linkedin.com/in/jennyabramov/)
+[wrote a piece](https://towardsdatascience.com/an-agile-framework-for-ai-projects-development-cbe115ba86a2)
+in [Toward Data Science](https://towardsdatascience.com/) with the purpose to
+present an "iterative-lifecycle framework," that is adapted to AI-centered
+software. She outlines important considerations as you work through the
+framework that depends on your use case, data, and business problem.
+
+She suggests using DVC for your larger, more complex datasets and also about the
+need for reproducibility in experimentation with which DVC can help you
+[(see Technical Product Manager, Dave Berenbaum’s post on experiment versioning.)](https://dvc.org/blog/ml-experiment-versioning)
+
+In addition, she discusses issues with quality assurance in deployment and the
+maintenance of the system.
+
+![Jenny Abromov iterative-lifecycle framework](../uploads/images/2022-05-16/jenny-abramov.png '=800')
+_Jenny Abramov's iterative-lifecycle framework
+([Source link](https://towardsdatascience.com/an-agile-framework-for-ai-projects-development-cbe115ba86a2))_
+
+## MLOps Guide from INNOQ
+
+[**Dr. Larysa Visengeriyeva**](https://www.linkedin.com/in/larysavisenger/),
+[**Anja Kammer,**](https://www.linkedin.com/in/anja-kammer-berlin/)
+[**Isabel Bär,**](https://www.linkedin.com/in/isabel-b%C3%A4r-a89705213/)
+[**Alexander Kniesz,**](https://www.linkedin.com/in/alexander-kniesz-656256197/)
+and [**Michael Plöd**](https://www.linkedin.com/in/michael-ploed/) of
+[**INNOQ**](https://www.innoq.com/en/) (a software development, strategy, and
+technology consultancy) created
+[this](https://ml-ops.org/content/mlops-principles) very thorough resource on
+MLOps, going through all the principles and "iterative-incremental" steps of the
+process (there's an iterative pattern here 😉). The authors cover Automation,
+Continuous X (hello CML and TPI), Versioning (hello DVC!), Experiments Tracking
+(noted DVC here because indeed DVC does experiment tracking too!), Testing,
+Monitoring, the "ML Test Score" System, Reproducibility, Modularity, ML-based
+Software Delivery Metrics, and MLOps Principles and Best Practices. Definitely a
+good resource for for MLOps and filled with more resources as well.
+
+![INNOQ MLOps Guide](../uploads/images/2022-05-16/innoq.jpeg '=800') _INNOQ
+MLOps Guide ([Source link](https://ml-ops.org/content/mlops-principles))_
+
+Also interesting from INNOQ is their
+[Artist-in-residence program](https://www.innoq.com/en/artists/) created because
+they "believe in the conscious reflection between technology and society" and
+feel art is well suited for this refection. See the work below by Studio Waltz
+Binaire based on the question: What traces do we leave behind with technology?
+
+![Waltz Binaire GIF](https://media.giphy.com/media/NxdrJ6a4IQKyW5gGjL/giphy.gif)
+
+([Source link](https://www.innoq.com/en/artists/))
+
+## Laszlo Sragner: LinkedIn discussion on Code Quality
+
+[**Laszlo Sragner**](https://www.linkedin.com/in/laszlosragner/?trk=public_post-embed_share-update_actor-text&originalSubdomain=uk)
+a frequent contributor to the MLOps Community in general, often driving
+discussions and helping others in the
+[MLOps Community Slack channel,](https://mlops-community.slack.com/join/shared_invite/zt-178s99cyv-Q~whRpqbhgMTBrOcbjnDIQ#/shared-invite/email)
+posed an interesting point about code quality on LinkedIn. Join the discussion
+and weigh in at this post:
+
+<iframe src="https://www.linkedin.com/embed/feed/update/urn:li:share:6931541880090324992" height="800" width="504" frameborder="0" allowfullscreen="" title="Embedded post"></iframe>
+
+# Company News
+
+## ICYMI: We released TPI! 🎉
+
+On April 27th we released the latest offering to our tool ecosystem.
+
+![Celebrate GIF](https://media.giphy.com/media/ut7lqhIfOscbjuU6YQ/giphy.gif)
+
+[Terraform Provider Iterative (TPI)](https://tpi.cml.dev) is a Terraform plugin
+built with machine learning in mind. Full lifecycle management of computing
+resources (including GPUs and respawning spot instances) from several cloud
+vendors (AWS, Azure, GCP, K8s)... without needing to be a cloud expert.
+
+- **Lower cost with spot recovery:** transparent data checkpoint/restore &
+  auto-respawning of low-cost spot/preemptible instances
+- **No cloud vendor lock-in:** switch between clouds with just one line thanks
+  to unified abstraction
+- **No waste:** auto-cleanup unused resources (terminate compute instances upon
+  task completion/failure & remove storage upon download of results), pay only
+  for what you use
+- **Developer-first experience:** one-command data sync & code execution with no
+  external server, making the cloud feel like a laptop
+
+- ⭐️ [Star the Repo](https://tpi.cml.dev)
+- ✍🏼 [Read the release blog post](https://dvc.org/blog/terraform-provider)
+- ⚙️ Read:
+  [Moving Local Experiments to the Cloud with Terraform Provider Iterative (TPI) tutorial](https://dvc.org/blog/local-experiments-to-cloud-with-tpi)
+- 🎥 [Watch the video](https://www.youtube.com/watch?v=2fEgO8SazSE&t=2s)
+- 🪐
+  [TPI with Jupyter Notebooks Repo](https://github.com/iterative/blog-tpi-jupyter)
+
+Stay tuned for more tutorials and use cases to come!
+
+![Tom Cruise GIF](https://media.giphy.com/media/MrCYIN3x0SgdG/giphy.gif)
+
+## 🚀~~Mission Impossible~~ - We have a mission statement!
+
+We did it! This year we surveyed the entire team to arrive at a mission
+statement for Iterative. It was no small feat to decide on what it should be
+given the early stage of our industry, the variety of our tools, and always a
+struggle - figuring out the best and most concise way to convey these ideas (you
+know our penchant for abbreviations). But we persevered and succeeded. Behold
+Iterative's new mission statement:
+
+> We deliver the best developer experience for machine learning teams by
+> creating an ecosystem of open, modular ML tools.
+
+As always the door is open for your feedback on how we can serve your needs
+better!
+
+## ODSC East
+
+We attended our first post-pandemic, in-person conference in Boston last month.
+It was awesome to be together as a team, see
+[**Dmitry Petrov**](https://twitter.com/FullStackML),
+[**Milicia McGregor**](https://twitter.com/FlippedCoding), and
+[**Alex Kim**](https://www.linkedin.com/in/alex000kim/) in action, and talk to
+attendees and other vendors at the conference. We are looking forward to
+[MLOps World](https://mlopsworld.com/) next month!
+
+![Iterative Team at ODSC East](../uploads/images/2022-05-16/odsc.jpeg '=800')
+_Iterative team (left to right) - Mike Moynihan, me, Dave Berenbaum, Daniel
+Barnes, (DeeVee), Rob De Wit, Milicia McGregor, Dmitry Petrov, Jervis Hui, Alex
+Kim, Chaz Black_
+
+## ✍🏼 Tons of new content on the blog
+
+Our team has been on fire creating content for you. 🔥 Don't miss the following:
+
+- Needing to get started with CML and AWS?
+  [**Rob de Wit**](https://www.linkedin.com/in/rcdewit?miniProfileUrn=urn%3Ali%3Afs_miniProfile%3AACoAAA5CEPkB9fI02IpClBKhRdq2brULPHMhmR8&lipi=urn%3Ali%3Apage%3Ad_flagship3_search_srp_all%3B9MrcxBhQSG6IKzSgJDyfQQ%3D%3D)
+  shows you how to train and save your models with CML in a two-part series
+  using a
+  [self-hosted AWS EC2 runner](https://dvc.org/blog/CML-runners-saving-models-1)
+  and
+  [with CML and DVC on a dedicated AWS EC2 runner](https://dvc.org/blog/CML-runners-saving-models-2)
+- The
+  [Part 1](https://dvc.org/blog/end-to-end-computer-vision-api-part-1-data-versioning-and-ml-pipelines),
+  [Part 2](https://dvc.org/blog/end-to-end-computer-vision-api-part-2-local-experiments)
+  and
+  [Part 3](https://dvc.org/blog/end-to-end-computer-vision-api-part-3-remote-exp-ci-cd)
+  tutorials of [**Alex Kim's**](https://www.linkedin.com/in/alex000kim/)
+  End-to-End Computer Vision API project are out and filled with great learning!
+- [**Milecia McGregor**](https://twitter.com/FlippedCoding) brings the monthly
+  roundup of the Community's best technical questions in our latest
+  [Community Gems.](https://dvc.org/blog/april-22-community-gems) 💎
+
+## ✨ Shiny New Docs
+
+We have a [new doc page](https://dvc.org/doc/start/experiments/visualization)
+showcasing the new visualizations added to the
+[example-dvc-experiments repo](https://github.com/iterative/example-dvc-experiments).  
+Whether
+you need to create plots from tabular data, user-generated plots, or
+autogenerating plots from deep learning code, we've got you covered.
+
+![DVC Visualization Doc](../uploads/images/2022-05-16/dvc-visualization-doc.png '=800')
+_DVC Visualization Doc
+([Source link](https://towardsdatascience.com/an-agile-framework-for-ai-projects-development-cbe115ba86a2))_
+
+## Dmitry Petrov on TFIR about Terraform Provider Iterative (TPI)
+
+[**Dmitry Petrov**](https://twitter.com/FullStackML) recently sat down with
+[**Swapnil Bhartiya**](https://twitter.com/SwapBhartiya) of
+[TFIR](https://www.tfir.io/) to have a chat about TPI. Learn how to save your
+team valuable resources in your machine learning projects with Terraform
+Provider Iterative (TPI). You can watch the recording below.
+
+https://www.youtube.com/watch?v=x-xiKzlQFjY
+
+## 🥳 Join our Release Party Meetup
+
+We have another tool ready to debut on May 24th. On the 25th we'd love to have
+you join us for a Release Party Meetup. We will be celebrating the release of
+the new addition to our open-source tool ecosystem and have a demo of said tool!
+To join the fun,
+[RSVP to the Meetup ](https://www.meetup.com/Machine-Learning-Engineer-Community-Virtual-Meetups/events/285789441/)
+and mark your calendar!
+
+<external-link
+href="https://www.meetup.com/Machine-Learning-Engineer-Community-Virtual-Meetups/events/285789441/"
+title="New Tool Release Party"
+description="Join us May 25th. RSVP for New Tool Release Party!"
+link="https://meetup.com"
+image="../uploads/images/2022-05-16/release-party-meetup.png"/>
+
+## New hires
+
+[**Wolmir Nemitz**]() is our first team member from South America! We're getting
+closer to covering all the continents on
+[our remote team map](https://iterative.ai/about)! From Brazil, Wolmir joins us
+as an Engineer for the 🤫 team (you'll find out June 14th). Wolmir has four
+dogs, two tortoises, and a budgie! 🦜
+
+[**Pavel Chekmaryov**](https://www.linkedin.com/in/ufijuice/) joins us in People
+Operations, managing the hiring pipeline from Frankfurt, Germany, but soon to be
+Canada! He has spent the last eight years in startups, most recently at OccurAI,
+reinventing recruitment in the deep-tech/ML field. We look forward to him
+helping to grow our amazing team!
+
+## Open Positions
+
+Even with our amazing new additions to the team, we're still hiring!
+[Use this link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22)
+to find details of all the positions and share with anyone you think may be
+interested! 🚀
+
+![Iterative.ai is Hiring](../uploads/images/2022-05-16/hiring.jpeg '=800')
+_Iterative is Hiring
+([Source link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22))_
+
+# Community News
+
+## Yet another tool comparison, imagine that!
+
+![Cant Believe There You Are GIF](https://media.giphy.com/media/lWa7aAo62YZLwtk3nj/giphy.gif)
+
+So each month I tell you about yet another post to help you attempt to make
+sense of the vast MLOps tool space. Well, this month is no different. I mean you
+could be new here, right? 🤷🏽‍♀️ [DoltHub](https://dolthub.com) tries to bring some
+clarity
+[with this piece](https://www.dolthub.com/blog/2022-04-27-data-version-control/)
+by comparing different data versioning tools and the intricacies of each. You do
+your research. You know we're partial.
+
+![Data Version Control tools](../uploads/images/2022-05-16/data-version-control.png '=800')
+_Data Version control tools
+([Source link](https://ml-ops.org/content/mlops-principles))_
+
+I’m starting to wonder if all Data Science/AI teams need a role with the sole
+responsibility of the job to keep up to date with all the new tooling and
+changes/updates to existing tooling in the MLOps space and what might best work
+for the team. What should this position be called? The best answer wins a DVC
+t-shirt. See
+[this Twitter thread](https://twitter.com/DVCorg/status/1526286089551433728?s=20&t=nV3FQAso441MtvrckYAOJA)
+to answer. (Hint: Funny answers will likely win 😉). Deadline: May 31st. Pass it
+around...
+
+## Andrey Cheptsov: Notebooks and MLOps. Choose One.
+
+[**Andrey Cheptsov**](https://www.linkedin.com/in/andrey-cheptsov/) writes
+[a piece](https://mlopsfluff.dstack.ai/p/notebooks-and-mlops-choose-one?s=r)
+pointing out how Jupyter Notebooks, while rightfully loved in data science work,
+fail pretty miserably in a production environment and the reliance on them can
+cause bad habits. He notes that he's found:
+
+> For any ML model, the time spent in a Jupyter notebook is inversely
+> proportional to its reproducibility. The reasons behind this rule are poor
+> modularity and reusability of the code in notebooks, and poor integration with
+> Git. - Andrey Cheptsov
+
+He advocates for training your models using Python scripts, Git, and CI/CD to
+automatically shift your foucus to creating reusable, testable code, and to use
+tools like [Gradio](https://gradio.app/) and [Streamlit](https://streamlit.io/)
+to provide the interactivity of Jupyter notebooks. Sounds like a promising idea.
+💡
+
+![Confused The Interview GIF](https://media.giphy.com/media/qxtxlL4sFFle/giphy.gif)
+
+## Beyond ML
+
+As noted above in our shiny new mission statement, our focus is to make tools
+for machine learning teams. It has however come to our attention that more and
+more users are using our tools for non-ML use cases.
+
+[**Dror Speiser**](https://drorspei.wordpress.com/about/) writes about a non-ML
+use case in
+[A New Recipe for Idempotent Cloud Deployments](https://drorspei.wordpress.com/2021/09/15/a-new-recipe-for-reproducible-cloud-deployments/)
+in which he provides a tutorial for doing just that with DVC.
+
+The benefits of the approach are:
+
+> 1. Changing one artifact’s code does not force rebuilding other artifacts,
+>    even if you’re building on a new VM every time.
+> 2. Changing only the deployment script won’t build any artifacts at all.
+> 3. You have an artifact repository that just works.
+> 4. Your Git history contains the hashes of all built artifacts.
+> 5. You can look up any artifact using its hash.
+
+We have opened up a #beyond-ml channel in our
+[Discord Server](https://dvc.org/chat). Do stop by and chat about alternate uses
+for our tools!
+
+## Upcoming Events
+
+- 📣 Our next in-person conference will be
+  [MLOps World](https://mlopsworld.com/) from June 7-10 in Toronto! We look
+  forward to seeing Community members there!
+- 📣 PyLadies Berlin is hosting **Doreen**, a data scientist working at
+  [Opinary](https://opinary.com/), who will be presenting "Reproducible Machine
+  Learning with DVC and Poetry" on May 17th.
+  [Join the event here.](https://www.meetup.com/PyLadies-Berlin/events/285313817/)
+- 📣 [**Nicolás Eiras**](https://www.linkedin.com/in/nicolas-eiris/) will be
+  presenting "Data Versioning: Towards Reproducibility in Machine Learning" at
+  [Embedded Vision Summit](https://embeddedvisionsummit.com/2022/session/data-versioning-towards-reproducibility-in-machine-learning/)
+  on May 18th in Santa Clara, California.
+- 📣 [Montreal PyData](https://www.meetup.com/PyData-MTL/) will host a
+  [Meetup](https://www.meetup.com/PyData-MTL/events/285894672/) on June 16th
+  with two presentations, "Introduction to Trustworthy Machine Learning for the
+  Enterprise" by [**Mohamed Leila**](https://www.linkedin.com/in/mohamedleila/),
+  ServiceNow and "ML in production in the video game industry: Ubisoft's use
+  case" by
+  [**Jean-Michel Daignan**](https://www.linkedin.com/in/jeanmicheldaignan/),
+  Ubisoft
+
+## Other Fun Stuff
+
+- [New Awesome list](https://github.com/gaocegege/awesome-open-source-mlops)
+- [New Udemy Course including DVC](https://www.udemy.com/course/dvc-and-git-for-data-science/)
+  (But don't forget [our online course!](https://learn.iterative.ai))
+- Would you like to get some good practice in? Join this
+  [Kaggle competition](https://www.the-odd-dataguy.com/2022/04/28/dvc_kaggle/)
+  created by
+  [**Jean-Michel Daignan**](https://www.linkedin.com/in/jeanmicheldaignan/)
+  based on a previous competition from Petfinder.my with some really cute pet
+  images.
+
+![DVC Kaggle Competition](../uploads/images/2022-05-16/img_pawpularity.png '=800')
+_DVC Kaggle Competition based on Pawfinder.my
+([Source link](https://www.the-odd-dataguy.com/2022/04/28/dvc_kaggle/))_
+
+## Tweet Love ❤️
+
+We love it when our Community does conference talks on our tools! 🥰
+
+https://twitter.com/tryolabs/status/1525103969885888512?s=20&t=0h7bHyIeOl49soQpnf6aDA
+
+This Heartbeat was brought to you by the song "Tarkus" from Emerson, Lake, and
+Palmer which can be found on our
+[MLOps Playlist,](https://open.spotify.com/playlist/3eahsf3T9iEJkfWECC7VEp?si=cbcf1f9d3e424d62)
+and the letters **T, P, and I.** 😉 See you next month!
+
+---
+
+_Have something great to say about our tools? We'd love to hear it! Head to
+[this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+to record or write a Testimonial! Join our
+[Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2022-05-24-MLEM-release.md b/content/blogs/2022-05-24-MLEM-release.md
new file mode 100644
index 0000000000..fb4e89f922
--- /dev/null
+++ b/content/blogs/2022-05-24-MLEM-release.md
@@ -0,0 +1,316 @@
+---
+title: Productionize your models with MLEM in a Git-native way
+date: 2022-06-01
+description: >
+  Introducing MLEM - one tool to run your models anywhere.
+descriptionLong: >
+  We’re excited to announce the launch of our latest open source offering,
+  [MLEM](https://mlem.ai)! MLEM is a tool that automatically extracts meta
+  information like environment and frameworks from models and standardizes that
+  information into a human-readable format within Git. ML teams can then use the
+  model information for deployment into downstream production apps and services.
+  MLEM easily connects to solutions like Heroku to dramatically decrease model
+  deployment time.
+picture: 2022-06-01/mlem-rocket.png
+author: aguschin
+commentsUrl: https://discuss.dvc.org/t/introducing-mlem/1198
+tags:
+  - Machine Learning
+  - MLEM
+  - GTO
+  - Serving
+  - Deployment
+  - Model Registry
+  - MLOps
+  - Productionization
+  - Release
+---
+
+With MLEM ML teams get a single tool to **run your models anywhere** that
+strikes to cover all model productionization scenarios you have.
+
+MLEM enables this via **model metadata codification**: saving all information
+that is required to use a model later. Besides packaging a model for deployment
+it can be used for many things, including search and documentation. To make it
+even more convenient, MLEM uses human-readable YAML files for that.
+
+Finally, using Git to keep that metainformation allows you to create a
+**Git-native model registry**, allowing you to handle model lifecycle management
+in Git, getting all benefits of CI/CD. Which makes your ML team one step closer
+to GitOps.
+
+We built MLEM to address issues that MLOps teams have around managing model
+information as they move them from training and development to production and,
+ultimately, retirement. The Git-based model
+([one of our core philosophies](https://iterative.ai/why-iterative/)) aligns
+model operations and deployment with software development teams – information
+and automation are all based on familiar DevOps tools – so that deploying any
+model into production is that much faster.
+
+# Model metadata codification
+
+Capturing model-specific information requires an understanding of the
+Programming language and ML frameworks they're created with. That's why MLEM is
+a Python-specific tool. To provide a developer-first experience, MLEM exposes
+carefully designed CLI to help you manage DevOps parts of the workflow from CLI
+and Python API to handle model productionization programmatically.
+
+It's easy to start using MLEM, since it integrates nicely into your existing
+training workflows by adding a couple of lines:
+
+```python
+import mlem
+mlem.api.save(
+    my_model,
+    "mlem-model",
+    sample_data=train
+)
+```
+
+That produces two files: model binary and model metadata, which is a `.mlem`
+file:
+
+```shell
+$ ls models
+mlem-model mlem-model.mlem
+```
+
+MLEM automatically detects everything you need to run the model: ML framework,
+model dependencies (i.e. Python requirements), methods, and input/output data
+schema (note, that we didn't specify those above at `save`!).
+
+This enables easy codification of arbitrary complex models, such as a Python
+function in which you average a couple of frameworks or a custom Python class
+that uses different libraries to generate the features and make a prediction.
+MLEM saves this information in a simple human-readable YAML file:
+
+```yaml
+# mlem-model.mlem
+artifacts:
+  data:
+    hash: b7f7e869f2b9270c516b546f09f49cf7
+    size: 166864
+    uri: mlem-model
+description: Random Forest Classifier
+labels:
+  - random-forest
+  - classifier
+model_type:
+  methods:
+    predict_proba:
+      args:
+        - name: data
+          type_:
+            columns:
+              - sepal length (cm)
+              - sepal width (cm)
+              - petal length (cm)
+              - petal width (cm)
+            dtypes:
+              - float64
+              - float64
+              - float64
+              - float64
+            index_cols: []
+            type: dataframe
+      name: predict_proba
+      returns:
+        dtype: float64
+        shape:
+          - null
+          - 3
+        type: ndarray
+  type: sklearn
+object_type: model
+requirements:
+  - module: sklearn
+    version: 1.0.2
+  - module: pandas
+    version: 1.4.1
+  - module: numpy
+    version: 1.22.3
+```
+
+To make ML model development Git-native, MLEM can work with DVC to manage
+versions of a model stored remotely in the cloud. Committing both model
+metainformation (`mlem-model.mlem`) and a pointer to the model binary
+(`mlem-model.dvc` or `dvc.lock` if you train it in a DVC pipeline) to Git allows
+you to enable GitFlow and other Software Engineering best practices like GitOps.
+
+# Running your models anywhere
+
+The main goal of MLEM is to provide you with a single tool that enables any kind
+of model productionization scenarios. For MLEM, there are three main groups of
+those scenarios:
+
+- **Use** a model directly with MLEM.
+- **Export** a model to a format that can be used by other tools.
+- **Deploy** a model to a production environment or cloud provider.
+
+The first one allows you to import your model into a Python runtime, run predict
+against some dataset directly in the command line, or serve the model with MLEM
+from your CLI.
+
+```python
+$ python
+>>> import mlem
+>>> model = mlem.api.load("mlem-model")
+>>> model.predict(test)
+[[0.4, 0.3, 0.3], [0.2, 0.5, 0.3]]
+```
+
+```shell
+$ mlem apply mlem-model test.csv
+[[0.4, 0.3, 0.3], [0.2, 0.5, 0.3]]
+```
+
+```shell
+$ mlem serve ml-model fastapi
+⏳️ Loading model from ml-model.mlem
+Starting fastapi server...
+💅 Adding route for /predict
+💅 Adding route for /predict_proba
+Checkout openapi docs at <http://0.0.0.0:8080/docs>
+INFO:     Started server process [5750]
+INFO:     Waiting for application startup.
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
+```
+
+The second one allows you to export your models as a Python package, build a
+Docker Image, or export it as some special format (like `.onnx` which is coming
+soon).
+
+```shell
+$ mlem build mlem-model pip -c package_name=mlem-translate -c target=build/
+⏳️ Loading model from ml-model.mlem
+💼 Written `ml-package` package data to `build/`
+$ tree build/
+build
+├── MANIFEST.in
+├── ml-package
+│   ├── __init__.py
+│   ├── model
+│   └── model.mlem
+├── requirements.txt
+└── setup.py
+```
+
+The last one allows you to deploy models to deployment providers, such as Heroku
+(with AWS Sagemaker and Kubernetes coming soon).
+
+```shell
+$ mlem deployment run myservice -m mlem-model -t staging -c app_name=mlem-quick-start
+⏳️ Loading deployment from my-service.mlem
+🔗 Loading link to staging.mlem
+🔗 Loading link to mlem-model.mlem
+💾 Updating deployment at my-service.mlem
+🛠 Creating docker image for heroku
+  🛠 Building MLEM wheel file...
+  💼 Adding model files...
+  🛠 Generating dockerfile...
+  💼 Adding sources...
+  💼 Generating requirements file...
+  🛠 Building docker image registry.heroku.com/mlem-quick-start/web...
+  ✅  Built docker image registry.heroku.com/mlem-quick-start/web
+  🔼 Pushing image registry.heroku.com/mlem-quick-start/web to registry.heroku.com
+  ✅  Pushed image registry.heroku.com/mlem-quick-start/web to registry.heroku.com
+💾 Updating deployment at my-service.mlem
+🛠 Releasing app mlem-quick-start formation
+💾 Updating deployment at my-service.mlem
+✅  Service mlem-quick-start is up. You can check it out at https://mlem-quick-start.herokuapp.com/
+```
+
+Since MLEM is both CLI-first and API-first tool, you can productionize your
+models just as easy with Python API:
+
+```python
+$ python
+>>> from mlem.api import serve, build, deploy
+```
+
+# Git-native model registry
+
+[versions]: https://dvc.org/doc/use-cases/versioning-data-and-model-files
+[mp]: https://dvc.org/doc/start/metrics-parameters-plots
+[experiments]: https://dvc.org/doc/user-guide/experiment-management
+[gto]: https://github.com/iterative/gto
+[mlem]: https://mlem.ai/
+[modeling process]: https://dvc.org/doc/start/data-pipelines
+[remote storage]: https://dvc.org/doc/command-reference/remote
+[sharing]: https://dvc.org/doc/start/data-and-model-access
+[via cml]: https://cml.dev/doc/cml-with-dvc
+[gitops]: https://www.gitops.tech/
+
+MLEM is a core building block for a Git-based ML model registry, together with
+other Iterative tools, like GTO and DVC.
+
+ML model registries give your team key capabilities:
+
+- Collect and organize model [versions] from different sources effectively,
+  preserving their data provenance and lineage information.
+- Share metadata including [metrics and plots][mp] to help use and evaluate
+  models.
+- A standard interface to access all your ML artifacts, from early-stage
+  [experiments] to production-ready models.
+- Deploy specific models on different environments (dev, shadow, prod, etc.)
+  without touching the applications that consume them.
+- For security, control who can manage models, and audit their usage trails.
+
+Many of these benefits are built into DVC: Your [modeling process] and
+[performance data][mp] become **codified** in Git-based <abbr>DVC
+repositories</abbr>, making it possible to reproduce and manage models with
+standard Git workflows (along with code). Large model files are stored
+separately and efficiently, and can be pushed to [remote storage] -- a scalable
+access point for [sharing].
+
+To make a Git-native registry, one option is to use [GTO] (Git Tag Ops). It tags
+ML model releases and promotions, and links them to artifacts in the repo using
+versioned annotations. This creates abstractions for your models, which lets you
+**manage their lifecycle** freely and directly from Git.
+
+```shell
+$ gto show
+╒══════════════════════╤══════════╤════════╤═════════╕
+│ name                 │ latest   │ #stage │ #prod   │
+╞══════════════════════╪══════════╪════════╪═════════╡
+│ pet-face-recognition │ v3.1.0   │ -      │ v3.0.0  │
+│ mlem-blep-classifier │ v0.4.1   │ v0.4.1 │ -       │
+│ dog-bark-translator  │ v0.0.1   │ -      │ v0.0.1  │
+╘══════════════════════╧══════════╧════════╧═════════╛
+
+$ mlem apply dog-bark-translator ./short-dog-phrase.wav
+🐶🚀🎉
+```
+
+For more information, visit our
+[model registry page](https://iterative.ai/model-registry).
+
+# What next?
+
+⭐ **Star [MLEM on GitHub](https://github.com/iterative/mlem)** and let us know
+what you think!
+
+![Umbrella dog](../uploads/images/2022-06-01/mlem-repo-umbrella-dog.gif 'Machine Learning should be mlemming!')
+
+Machine Learning should be mlemming! 🚀
+
+Resources:
+
+- [Documentation](https://mlem.ai/doc)
+- [MLEM website](https://mlem.ai)
+- [MLEM on GitHub](https://github.com/iterative/mlem)
+- [Building an ML model registry](https://iterative.ai/model-registry/)
+
+---
+
+_Have something great to say about our tools? We'd love to hear it! Head to
+[this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+to record or write a Testimonial! Join our
+[Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2022-05-24-local-experiments-to-cloud-with-tpi-docker.md b/content/blogs/2022-05-24-local-experiments-to-cloud-with-tpi-docker.md
new file mode 100644
index 0000000000..eda156dc04
--- /dev/null
+++ b/content/blogs/2022-05-24-local-experiments-to-cloud-with-tpi-docker.md
@@ -0,0 +1,136 @@
+---
+title:
+  Moving Local Experiments to the Cloud with Terraform Provider Iterative (TPI)
+  and Docker
+date: 2022-05-24
+description: >
+  Tutorial for easily running experiments in the cloud with the help of
+  Terraform Provider Iterative (TPI) and Docker.
+descriptionLong: >
+  In this tutorial, learn how to use Docker images to run experiments in the
+  cloud with Terraform Provider Iterative (TPI).
+picture: 2022-05-19/tpi-docker-header.png
+author: casper_dcl
+commentsUrl: https://discuss.dvc.org/t/moving-local-experiments-to-the-cloud-with-terraform-provider-iterative-tpi/1190
+tags:
+  - MLOps
+  - TPI
+  - AWS
+  - Docker
+  - Terraform
+  - Git
+  - Python
+  - Tutorial
+  - Cloud orchestration
+  - Spot instance management
+---
+
+We recently [published a tutorial][bees-part-1] on using [Terraform Provider
+Iterative (TPI)][tpi] to move a machine learning experiment from your local
+computer to a more powerful cloud machine. We've covered how you can use
+[Terraform](https://www.terraform.io) & TPI to provision infrastructure, sync
+data, and run training scripts. To simplify the setup, we used a pre-configured
+[Ubuntu/NVIDIA image](https://registry.terraform.io/providers/iterative/iterative/latest/docs/resources/task#machine-image).
+However, instead of using a pre-configured image, we can use custom
+[Docker](https://www.docker.com) images. These are often
+[recommended in machine learning](https://aws.amazon.com/blogs/opensource/why-use-docker-containers-for-machine-learning-development/)
+as well as in traditional software development.
+
+[bees-part-1]: /blog/local-experiments-to-cloud-with-tpi
+[tpi]: https://github.com/iterative/terraform-provider-iterative
+
+<admon type="info">
+
+Using Docker to manage dependencies (e.g. Python packages) does not remove all
+other setup requirements. You'll still need Docker itself installed, as well as
+GPU runtime drivers if applicable. Happily, TPI sets up all of this by default.
+
+</admon>
+
+When confronted with cloud infrastructure and dependencies, people often think
+"oh no, not again" (much
+[like the petunias](https://www.youtube.com/watch?v=THSY7-CxKnQ) in the cover
+image). To solve this, separating dependencies into Docker images gives more
+control over software versions, and also makes it painless to switch between
+cloud providers -- currently Amazon Web Services (AWS), Microsoft Azure, Google
+Cloud Platform, and Kubernetes. Your Docker image is cloud provider-agnostic.
+There are thousands of
+[pre-defined Docker images online](https://hub.docker.com/) too.
+
+In this tutorial, we'll use an existing Docker image that comes with most of our
+requirements already installed. We'll then add add a few more dependencies on
+top and run our training pipeline in the cloud as before!
+
+## Run GPU-enabled Docker containers
+
+<admon type="warn">
+
+If you haven't read the [previous tutorial][bees-part-1], you should check out
+the basics there first. This includes how to let Terraform know about TPI, and
+essential commands (`init`, `apply`, `refresh`, `show`, and `destroy`).
+
+</admon>
+
+The only modification from the [previous tutorial][bees-part-1] is the script
+part of the `main.tf` config file.
+
+Let's say we've found a carefully prepared a Docker image suitable for data
+science and machine learning -- in this case,
+[`iterativeai/cml:0-dvc2-base1-gpu`](https://cml.dev/doc/self-hosted-runners#docker-images).
+This image comes loaded with Ubuntu 20.04, Python 3.8, NodeJS, CUDA 11.0.3,
+CuDNN 8, Git, [CML](https://cml.dev), [DVC](https://dvc.org), and other
+essentials for full-stack data science.
+
+Our `script` block is now:
+
+```hcl
+script = <<-END
+  #!/bin/bash
+  docker run --gpus all -v "$PWD:/tpi" -w /tpi -e TF_CPP_MIN_LOG_LEVEL \
+    iterativeai/cml:0-dvc2-base1-gpu /bin/bash -c "
+  pip install -r requirements.txt tensorflow==2.8.0
+  python train.py --output results-gpu/metrics.json
+  "
+END
+```
+
+Yes, it's quite long for a one-liner. Let's looks at the components:
+
+- `docker run`: Download the specified image, create a container from the image,
+  and run it.
+- `--gpus all`: Expose GPUs to the container.
+- `-v "$PWD:/tpi"`: Expose our current working directory (`$PWD`) within the
+  container (as path `/tpi`).
+- `-w /tpi`: Set the working directory of the container (to be `/tpi`).
+- `-e TF_CPP_MIN_LOG_LEVEL`: Expose the environment variable
+  `TF_CPP_MIN_LOG_LEVEL` to the container (in this case to control TensorFlow's
+  verbosity).
+- `iterativeai/cml:0-dvc2-base1-gpu`: The image we want to download & run a
+  container from.
+- `/bin/bash -c "pip install -r requirements.txt ... python train.py ..."`:
+  Commands to run within the container's working directory. In this case,
+  install the dependencies and run the training script.
+
+We can now call `terraform init`, `export TF_LOG_PROVIDER=INFO`, and
+`terraform apply` to provision infrastructure, upload our data and code, set up
+the cloud environment, and run the training process. If you'd like to tinker
+with this example you can
+[find it on GitHub](https://github.com/iterative/blog-tpi-bees/tree/docker).
+
+<admon type="tip">
+
+Don't forget to `terraform refresh && terraform show` to check the status, and
+`terraform destroy` to download results & shut everything down.
+
+</admon>
+
+Now you know the basics of using convenient Docker images together with
+[TPI][tpi] for provisioning your MLOps infrastructure!
+
+<admon type="tip">
+
+If you have a lot of custom dependencies that rarely change (e.g. a large
+`requirements.txt` that is rarely updated), it's a good idea to build it into
+your own custom Docker image. Let us know if you'd like a tutorial on this!
+
+</admon>
diff --git a/content/blogs/2022-05-26-may-22-community-gems.md b/content/blogs/2022-05-26-may-22-community-gems.md
new file mode 100644
index 0000000000..a6858f4e96
--- /dev/null
+++ b/content/blogs/2022-05-26-may-22-community-gems.md
@@ -0,0 +1,185 @@
+---
+title: May '22 Community Gems
+date: 2022-05-26
+description: >
+  A roundup of technical Q&A's from the DVC and CML communities. This month:
+  working with CML and GCP, DVC data and remotes, DVC pipelines and setups, and
+  more.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC and CML communities. This month:
+  working with CML and GCP, DVC data and remotes, DVC pipelines and setups, and
+  more.
+picture: 2022-05-26/may-community-gems.png
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/may-22-community-gems/1184
+tags:
+  - DVC Remotes
+  - Pipelines
+  - CML
+  - GCP
+  - Community Gems
+---
+
+### [Is it possible to export a plot generated using `dvc plots diff HEAD main` to vega-lite for use in CML?](https://discord.com/channels/485586884165107732/563406153334128681/965911829538832435)
+
+Thanks for the awesome question @dominic!
+
+You can use the `dvc plots diff --show-vega` command to export the plot to
+vega-lite on a single graph. You'll need to run the following command:
+
+```dvc
+$ dvc plots diff HEAD main --targets prediction.json --show-vega > vega.json
+```
+
+You can also include this plot in a comment with CML so that it appears on your
+pull requests in GitHub.
+
+### [What is the difference between `dvc pull` and `dvc checkout`?](https://discord.com/channels/485586884165107732/563406153334128681/966739538888241192)
+
+Great question @Derek!
+
+Here are some explanations around how `dvc pull` and `dvc checkout` work.
+They're comparable to `git pull` and `git checkout`.
+
+- `dvc pull` fetches data from your remote cache to your local cache and syncs
+  it to your workspace
+- `dvc checkout` syncs data from your local cache to your workspace
+
+### [Is there a way to add all of the `outs` of a `foreach` job to the `deps` of a downstream stage?](https://discord.com/channels/485586884165107732/563406153334128681/967709548393672734)
+
+Very interesting question from @mathematiguy!
+
+One way to do this is to have all `foreach` stages write out to different paths
+within the same directory and then track the entire directory as a dependency of
+your downstream stage.
+
+Here's an example of how that might look in your `dvc.yaml` file.
+
+```yaml
+stages:
+  cleanups:
+    foreach:
+      - raw1
+      - labels1
+      - raw2
+    do:
+      cmd: echo "${item}" > "data/${item}"
+      outs:
+        - data/${item}
+
+  reduce:
+    cmd: echo file > file
+    deps:
+      - data
+    outs:
+      - file
+```
+
+### [Is there a way to version and move data from one cloud storage to another with DVC remotes?](https://discord.com/channels/485586884165107732/563406153334128681/968778284114538496)
+
+Wonderful question from @Hisham!
+
+There are a couple of ways you can do this. One approach is to use
+`dvc add --to-remote`.
+
+The other approach is to use the
+[`import-url --to-remote`](https://dvc.org/doc/command-reference/import-url#example-transfer-to-remote-storage)
+functionality. The main difference between these approaches is that
+`dvc import-url` has the added benefit of keeping a connection to the data
+source so it can be updated later with `dvc update`.
+
+You can see an example of how to do this in the docs. Just make sure that you
+have your remotes set up!
+
+### [If I'm using Feast feature store, is it possible to version datasets with DVC?](https://discord.com/channels/485586884165107732/563406153334128681/968899175561449532)
+
+This is a good integration question from @Bernardo Galvao!
+
+If you want to fetch historical features from the offline store to generate
+training data, a typical pattern would be to write the script to do so and set
+up a DVC pipeline stage to track that script and version the output file. This
+is similar to how a lot of people use DVC alongside SQL databases.
+
+### [How can I run a DVC pipeline in a Docker container?](https://discord.com/channels/485586884165107732/563406153334128681/969640280263389184)
+
+Nice question from @Anudeep!
+
+Here's an example of a Dockerfile with a simple DVC setup.
+
+```docker
+FROM ubuntu:latest
+RUN apt-get update && apt install -y python-is-python3 python3-pip
+WORKDIR /dvc_project
+
+COPY . .
+pip install -r requirements.txt # assuming your requirements, including dvc, are here
+CMD dvc pull && dvc exp run
+```
+
+You would save this file and then run the following commands in your terminal.
+
+```dvc
+$ docker build -t "myproject-dvc-exp-run" .
+$ docker run myproject-dvc-exp-run
+```
+
+You could also use the `dvc repro` command or any of the other DVC commands.
+
+### [How can I reset a repository and start fresh with DVC?](https://discord.com/channels/485586884165107732/485596304961962003/970344379938127892)
+
+Nice question from @strickvl!
+
+The best approach for resetting a repo is to run the `dvc destroy` command that
+will remove all DVC file and internals from your repository.
+
+### [Is there an example of using CML with GCP that can be used as a reference?](https://discord.com/channels/485586884165107732/728693131557732403/963512513452970086)
+
+Excellent question from @sabygo!
+
+Here is a GitHub Actions snippet to get you started:
+
+```yaml
+jobs:
+  setup:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: iterative/setup-cml@v1
+      - name: Deploy runner
+        env:
+          GOOGLE_APPLICATION_CREDENTIALS_DATA: ${{ secrets.GCP_CML_RUNNER_KEY }}
+        run: |
+          cml runner \
+            --single \
+            --labels=cml-gcp \
+            --token=${{ secrets.GCP_SECRET }} \
+            --cloud=gcp \
+            --cloud-region=us-west \
+            --cloud-type=e2-highcpu-2
+  test:
+    needs: [setup]
+    runs-on: [self-hosted, cml-gcp]
+    steps:
+      - uses: actions/checkout@v2
+      # - uses: iterative/setup-cml@v1
+      - run: |
+          echo "model training"
+```
+
+### [Can I use preemptive instances provided by GCP as a `cml-runner`?](https://discord.com/channels/485586884165107732/728693131557732403/964860322710192202)
+
+Good question from @Atsu!
+
+Yes! You can use `cml runner --cloud-spot` to request a preemptive instance.
+
+---
+
+![We Did It Smiling GIF](https://media.giphy.com/media/bg1MQ6IUVoVOM/giphy.gif)
+
+At our June Office Hours Meetup we will be the launch party for our new MLOps
+tool! Make sure you join us to find out what it is!
+[RSVP for the Meetup here](https://www.meetup.com/Machine-Learning-Engineer-Community-Virtual-Meetups/events/285789441/)
+to stay up to date with specifics as we get closer to the event!
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to get all your DVC and
+CML questions answered!
diff --git a/content/blogs/2022-05-31-aws-remotes-in-dvc.md b/content/blogs/2022-05-31-aws-remotes-in-dvc.md
new file mode 100644
index 0000000000..a85f1045e8
--- /dev/null
+++ b/content/blogs/2022-05-31-aws-remotes-in-dvc.md
@@ -0,0 +1,184 @@
+---
+title: Syncing Data to AWS S3
+date: 2022-05-31
+description: >
+  We're going to set up an AWS S3 remote in a DVC project.
+descriptionLong: >
+  Setting up a remote to make data versioning easier with DVC is a common need
+  so we're going to go through a tutorial for doing this with AWS.
+picture: 2022-05-31/dvc-aws.png
+pictureComment: Using AWS Remotes in DVC
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/syncing-data-to-aws-s3/1192
+tags:
+  - MLOps
+  - DVC
+  - Git
+  - DVC Remotes
+  - Collaboration
+  - AWS
+  - Tutorial
+---
+
+When you’re working on a data science project that has huge datasets, it’s
+common to store them in cloud storage. You’ll also be working with different
+versions of the same datasets to train a model, so it’s crucial to have a tool
+that enables you to switch between datasets quickly and easily. That’s why we’re
+going to do a quick walkthrough of how to set up a remote in an AWS S3 bucket
+and handle data versioning with [DVC](https://dvc.org/doc).
+
+We’ll start by creating a new S3 bucket in our AWS account, then we’ll show how
+you can add DVC to your project. We’ll be working with
+[this repo](https://github.com/iterative/stale-model-example) if you want an
+example to play with.
+
+<admon type="info">
+
+By the time you finish, you should be able to create this setup for any machine
+learning project using an AWS remote.
+
+</admon>
+
+## Set up an AWS S3 bucket
+
+Make sure that you already have an [AWS account](https://aws.amazon.com/) and
+log in. Search for `S3` and it should be the first service that appears.
+
+![S3 service in AWS](../uploads/images/2022-05-31/finding_s3.png)
+
+Once you’re on the S3 page, click the `Create Bucket` button and it will take
+you to a page that looks like this. The bucket in this example is called
+`updatedbikedata` because that is the data our demo repo works with. You can
+leave the default settings in place or you can update them to fit the
+functionality you need.
+
+![create an S3 bucket in AWS](../uploads/images/2022-05-31/create_bucket.png)
+
+Once you’ve created the bucket, you should be redirected to the S3 dashboard and
+see the success message and your new bucket.
+
+![newly created S3 bucket in AWS](../uploads/images/2022-05-31/created_bucket.png)
+
+### Get your credentials
+
+Now that the S3 bucket is ready, we need the `access_key_id` and
+`secret_access_key` from AWS in order to connect to our project. You can create
+these keys in your Identity and Access Management settings. Go to your security
+credentials and select the `Access keys` section. Then click the
+`Create New Access Key` button. This will generate a new set of keys for you so
+make sure you download this file to get your secret access key.
+
+![make AWS access credentials](../uploads/images/2022-05-31/make_credentials.png)
+
+Once you’ve downloaded the credentials, you should see the access key ID in the
+table. Note that you won’t be able to access your secret key again at this
+point. You would need to make a new set of credentials if you don’t have it.
+
+![successfully created AWS access credentials](../uploads/images/2022-05-31/credentials.png)
+
+That’s it for setting up your bucket and getting the credentials you need! Now
+let’s add DVC to our demo repo and set up the remote.
+
+## Set up a DVC project
+
+First, add DVC as a requirement to your project with the following installation
+command:
+
+```dvc
+$ pip install 'dvc[s3]'
+```
+
+Then you can initialize DVC in your own project with the following command:
+
+```dvc
+$ dvc init
+```
+
+This will add all of the DVC internals needed to start versioning your data and
+tracking experiments. Now we need to set up the remote to connect our project
+data stored in AWS to the DVC repo.
+
+### Create a default remote
+
+Now we can add a default to the project with the following command:
+
+```dvc
+$ dvc remote add -d bikes s3://updatedbikedata
+```
+
+This creates a default remote called `bikes` that connects to the
+`updatedbikedata` bucket we made earlier which is where the training data for
+the model will be stored.
+
+### Add AWS credentials
+
+In order for DVC to be able to push and pull data from the remote, you need to
+have valid AWS credentials.
+
+By default, DVC authenticates using your AWS CLI configuration, if it has been
+set. You can do that with the `aws configure` command like in this example:
+
+```dvc
+$ aws configure
+AWS Access Key ID [None]: AKIAIOSFODNN7EXAMPLE
+AWS Secret Access Key [None]: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
+Default region name [None]:
+Default output format [None]:
+```
+
+You can check out more details on this command
+[here in the AWS docs](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html).
+
+If you want to
+[use a different authentication method](https://dvc.org/doc/command-reference/remote/modify#amazon-s3)
+or if you run into issues with the credentials, you can manually add them with
+the following commands:
+
+```dvc
+$ dvc remote modify --local bikes access_key_id 'mykey'
+$ dvc remote modify --local bikes secret_access_key 'mysecret'
+```
+
+### Push and pull data with DVC
+
+Now you can push data from your local machine to the AWS remote! First, add the
+data you want DVC to track with the following command:
+
+```dvc
+$ dvc add data
+```
+
+This will allow DVC to track the entire `data` directory so it will note when
+any changes are made. Then you can push that data to your AWS remote with this
+command:
+
+```dvc
+$ dvc push
+```
+
+Here's what the data might look like in your AWS bucket.
+
+![data in AWS bucket](../uploads/images/2022-05-31/aws_bucket.png)
+
+Then if you move to a different machine or someone else needs to use that data,
+it can be accessed by cloning or forking the project repo and running:
+
+```dvc
+$ dvc pull
+```
+
+This will get any data from your remote and download it to your local machine.
+
+<admon type="info">
+
+Authentication has to be setup locally on any machine you need to pull or push
+data from.
+
+</admon>
+
+---
+
+That’s it! Now you can connect any DVC project to an AWS S3 bucket. If you run
+into any issues, makes sure to check that your credentials are valid, check if
+your user has MFA enabled, and check that the user has the right level of
+permissions.
diff --git a/content/blogs/2022-06-13-azure-remotes-in-dvc.md b/content/blogs/2022-06-13-azure-remotes-in-dvc.md
new file mode 100644
index 0000000000..ba1f557aa9
--- /dev/null
+++ b/content/blogs/2022-06-13-azure-remotes-in-dvc.md
@@ -0,0 +1,234 @@
+---
+title: Syncing Data to Azure Blob Storage
+date: 2022-06-13
+description: >
+  We're going to set up an Azure Blob Storage remote in a DVC project.
+descriptionLong: >
+  Setting up a remote to make data versioning easier with DVC is a common need
+  so we're going to go through a tutorial for doing this with Azure.
+picture: 2022-06-13/dvc-azure.png
+pictureComment: Using Azure Blob Storage in DVC
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/syncing-data-to-azure-blob-storage/1212
+tags:
+  - MLOps
+  - DVC
+  - Git
+  - DVC Remotes
+  - Azure
+  - Collaboration
+  - Tutorial
+---
+
+When you’re working on a data science project that has huge datasets, it’s
+common to store them in cloud storage. You’ll also be working with different
+versions of the same datasets to train a model, so it’s crucial to have a tool
+that enables you to switch between datasets quickly and easily. That’s why we’re
+going to do a quick walkthrough of how to set up a remote with Azure Blob
+Storage and handle data versioning with [DVC](https://dvc.org/doc).
+
+We’ll start by creating a new blob storage container in our Azure account, then
+we’ll show how you can add DVC to your project. We’ll be working with
+[this repo](https://github.com/iterative/stale-model-example) if you want an
+example to play with.
+
+<admon type="info">
+
+By the time you finish, you should be able to create this setup for any machine
+learning project using an Azure remote.
+
+</admon>
+
+## Set up an Azure blob storage container
+
+Make sure that you already have a
+[Microsoft Azure account](https://azure.microsoft.com/en-us/features/azure-portal/).
+When you log in, you should see a page like this.
+
+![initial Azure page](../uploads/images/2022-06-13/initial_azure.png)
+
+Search for `storage accounts` in the search bar and click `Storage accounts`
+under `Services`. Make sure you don't click the "classic" option.
+
+![search for storage account](../uploads/images/2022-06-13/storage_account_search.png)
+
+This will bring you to the `Storage accounts` page where you'll need click the
+`Create storage account` button.
+
+![storage accounts page](../uploads/images/2022-06-13/storage_account_page.png)
+
+Now you need to enter a `Resource group` and name for the account. You can
+create a new resource group right here, like we do, and call it
+`BicycleProject`. We'll name this storage account `bicycleproject`. Then you can
+leave all the default settings in place and click `Review + create`.
+
+![storage account details](../uploads/images/2022-06-13/storage_account_details.png)
+
+Azure will run validation on the account and then you'll be able to click
+`Create` and it will generate the storage account.
+
+![created storage account](../uploads/images/2022-06-13/created_storage_account.png)
+
+You'll get redirected to a new page and you should click the `Go to resource`
+button. Now you should see all of the details for your storage account. In the
+left sidebar, got to on `Data storage` > `Containers`.
+
+![bicycle project account](../uploads/images/2022-06-13/bicycle_project_account.png)
+
+Then click the `+ Container` button at the top of the new page and you'll see a
+right sidebar open. In the name field, type `bikedata` and then click `Create`.
+Now we have everything set up for the blob storage to work.
+
+![new container for bike data](../uploads/images/2022-06-13/bikedata_container.png)
+
+### Set the right roles for your Azure account
+
+You'll need the right roles on your storage account and your container in order
+to connect this remote storage to your machine learning project.
+
+On the page for your `bicycleproject` storage account, go to the
+`Access Control (IAM)` in the left sidebar.
+
+![update roles for storage account](../uploads/images/2022-06-13/storage_account_iam.png)
+
+On this page, you'll click `Add role assignment` and get directed to the page
+with all of the roles.
+
+![update roles for storage account](../uploads/images/2022-06-13/storage_account_role.png)
+
+Select the `Storage Blob Data Contributor` role and click `Next`
+
+![update roles for storage account](../uploads/images/2022-06-13/storage_account_member.png)
+
+Then you can click `+ Select members` to add this role to your user.
+
+You'll also need to go through this exact flow for your `bikedata` container, so
+make sure you do this immediately after your storage account is updated.
+
+Since our Azure storage account and container have the correct roles now, let's
+set up the project!
+
+## Set up a DVC project
+
+First, add DVC as a requirement to your project with the following installation
+command:
+
+```dvc
+$ pip install 'dvc[azure]'
+```
+
+Then you can initialize DVC in your own project with the following command:
+
+```dvc
+$ dvc init
+```
+
+This will add all of the DVC internals needed to start versioning your data and
+tracking experiments. Now we need to set up the remote to connect our project
+data stored in Azure to the DVC repo.
+
+### Create a default remote
+
+Now we can add a default to the project with the following command:
+
+```dvc
+$ dvc remote add -d bikes azure://bikedata
+```
+
+This creates a default remote called `bikes` that connects to the `bikedata`
+container we made earlier which is where the training data for the model will be
+stored.
+
+### Add Azure credentials
+
+In order for DVC to be able to push and pull data from the remote, you need to
+have valid Azure credentials.
+
+By default, DVC authenticates using your
+[Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli)
+configuration.
+
+Run the following command to authenticate with Azure.
+
+```dvc
+$ az login
+A web browser has been opened at https://login.microsoftonline.com/organizations/oauth2/v2.0/authorize. Please continue the login in the web browser. If no web browser is available or if the web browser fails to open, use device code flow with `az login --use-device-code`.
+[
+  {
+    "cloudName": "AzureCloud",
+    "homeTenantId": "some-id",
+    "id": "some-id",
+    "isDefault": true,
+    "managedByTenants": [],
+    "name": "Azure subscription 1",
+    "state": "Enabled",
+    "tenantId": "some-id",
+    "user": {
+      "name": "test@test.com",
+      "type": "user"
+    }
+  }
+]
+```
+
+This should open a window that looks like this where you can enter your login
+credentials.
+
+![Azure CLI authentication page](../uploads/images/2022-06-13/azure_auth_page.png)
+
+You can check out more details on this command
+[here in the Azure docs](https://docs.microsoft.com/en-us/cli/azure/authenticate-azure-cli).
+If you want to use a different authentication method with DVC, check out
+[our docs here](https://dvc.org/doc/command-reference/remote/modify#microsoft-azure-blob-storage).
+
+You will also need to manually define the storage account name with the
+following command:
+
+```dvc
+$ dvc remote modify bikes account_name 'bicycleproject'
+```
+
+### Push and pull data with DVC
+
+Now you can push data from your local machine to the Azure remote! First, add
+the data you want DVC to track with the following command:
+
+```dvc
+$ dvc add data
+```
+
+This will allow DVC to track the entire `data` directory so it will note when
+any changes are made. Then you can push that data to your Azure remote with this
+command:
+
+```dvc
+$ dvc push
+```
+
+Here's what the data might look like in your Azure container.
+
+![data in Azure container](../uploads/images/2022-06-13/data_in_azure.png)
+
+Then if you move to a different machine or someone else needs to use that data,
+it can be accessed by cloning or forking the project repo and running:
+
+```dvc
+$ dvc pull
+```
+
+This will get any data from your remote and download it to your local machine.
+
+<admon type="info">
+
+Authentication has to be setup locally on any machine you need to pull or push
+data from. That means running the `az login` command on any other machine. You
+don't need to go through the DVC setup again.
+
+</admon>
+
+---
+
+That’s it! Now you can connect any DVC project to an Azure blob storage
+container. If you run into any issues, makes sure to check that your credentials
+are valid, check if your user has MFA enabled, and check that the user has the
+right level of permissions.
diff --git a/content/blogs/2022-06-14-DVC-VS-Code-extension.md b/content/blogs/2022-06-14-DVC-VS-Code-extension.md
new file mode 100644
index 0000000000..04545412a9
--- /dev/null
+++ b/content/blogs/2022-06-14-DVC-VS-Code-extension.md
@@ -0,0 +1,180 @@
+---
+title: >
+  Turn Visual Studio Code into a machine learning experimentation platform with
+  the DVC extension
+date: 2022-06-14
+description: >
+  Today we are releasing the DVC extension, which brings a full ML
+  experimentation platform to Visual Studio Code.
+descriptionLong: >
+  Today we are releasing the DVC extension, which brings a full machine learning
+  experimentation platform to Visual Studio Code. Manage your data, run
+  experiments, compare metrics, and visualize plots, all from the comfort of
+  your IDE.
+picture: 2022-06-14/header-vs-code-release.png
+author: rob_dewit
+commentsUrl: https://discuss.dvc.org/t/release-dvc-extension-for-visual-studio-code/1211
+tags:
+  - VS Code
+  - DVC
+  - MLOps
+  - Release
+---
+
+Since its beta release in 2017, DVC has become an essential tool for many data
+science teams. Its data versioning capabilities, reproducible pipelines, and
+experiment tracking features are at the core of our ecosystem of open MLOps
+tools.
+
+Today we are proud to launch a new product that extends how machine learning
+teams can use DVC:
+[our extension for Visual Studio Code](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc).
+
+With this extension, you get a full VS Code-native experimentation platform for
+your machine learning projects. Control your datasets and models, run
+experiments, view metrics, create plots, and much more. You can do this all from
+the comfort of your IDE, without the need for external services or logins. The
+only thing you need is a
+[DVC pipeline](https://dvc.org/doc/start/data-management/data-pipelines#get-started-data-pipelines).
+
+<video controlslist="nodownload" preload="metadata" autoplay muted loop
+style="width:100%;"><source src="../uploads/images/2022-06-14/overview.mp4" type="video/mp4"/> Your
+browser does not support the video tag. </video>
+
+<external-link 
+href="https://marketplace.visualstudio.com/items?itemName=Iterative.dvc" 
+title="Download the DVC extension" 
+description="Install the DVC extension from the VS Code marketplace to get started. Manage your data, run experiments,
+  compare metrics, and visualize plots, all from the comfort of your IDE."
+link="marketplace.visualstudio.com"
+image="../uploads/images/2022-06-14/vscode-logo.png" />
+
+# Why a VS Code extension?
+
+We built DVC to expand upon the Git workflow to
+[make it well-suited for ML experimentation](https://dvc.org/blog/ml-experiment-versioning).
+This approach brought us independence from the infrastructure and provided a
+natural connection to best practices from software engineering. However, a pure
+CLI tool can only take things so far when it comes to visualizing experiments or
+displaying large tables.
+
+[VS Code is the IDE of choice for many](https://insights.stackoverflow.com/survey/2021#section-most-popular-technologies-integrated-development-environment)
+and was a natural choice for a platform to add a graphical interface to DVC.
+
+With this extension, we want to:
+
+- Move the ML experimentation workflow into your IDE
+- Provide interactive plots and tables for analyzing ML experiments
+- Make DVC more accessible by providing an alternative to the complexity of the
+  CLI
+
+As data scientists, DVC is our toolbox. This extension turns VS Code into our
+workshop.
+
+# Features
+
+Our extension introduces the DVC view, your one-stop-shop for everything related
+to your ML experiments. You can run new experiments from here, manage
+parameters, and compare metrics and plots for different models.
+
+The extension also adds panes to the
+[_Explorer_](https://code.visualstudio.com/docs/getstarted/userinterface#_explorer)
+and [_Source Control_](https://code.visualstudio.com/Docs/editor/versioncontrol)
+views for managing all datasets and models in your DVC repository.
+
+[_Check out the feature video on Youtube!_](https://youtu.be/LHi3SWGD9nc)
+
+## Experiment bookkeeping
+
+Quickly run new experiments and compare their resulting metrics in the
+experiments table. Use the command palette or buttons to reproduce old
+experiments, run new ones, or add them to the queue for later.
+
+<video controlslist="nodownload" preload="metadata" autoplay muted loop
+style="width:100%;"><source src="../uploads/images/2022-06-14/experiment-bookkeeping.mp4" type="video/mp4"/> Your
+browser does not support the video tag. </video>
+
+## Interactive plots
+
+Select experiments to compare and visualize their performance in interactive
+plots. You can export these plots to PNG or SVG for use elsewhere.
+
+<video controlslist="nodownload" preload="metadata" autoplay muted loop
+style="width:100%;"><source src="../uploads/images/2022-06-14/compare-experiments.mp4" type="video/mp4"/> Your
+browser does not support the video tag. </video>
+
+## Live tracking
+
+Get insight into the training process of your models with live tracking of
+metrics. As soon as your metrics change, your plots will be updated
+automatically.
+
+<video controlslist="nodownload" preload="metadata" autoplay muted loop
+style="width:100%;"><source src="../uploads/images/2022-06-14/live-metrics.mp4" type="video/mp4"/> Your
+browser does not support the video tag. </video>
+
+## Reproducibility
+
+Click _Apply to workspace_ to reproduce any past experiment. DVC will restore
+all artifacts for that experiment, and you can rerun it or use it as a base for
+a new experiment.
+
+<video controlslist="nodownload" preload="metadata" autoplay muted loop
+style="width:100%;"><source src="../uploads/images/2022-06-14/apply-to-workspace.mp4" type="video/mp4"/> Your
+browser does not support the video tag. </video>
+
+## Data management
+
+Use the DVC tracked panel in the
+[_Explorer_](https://code.visualstudio.com/docs/getstarted/userinterface#_explorer)
+view to quickly navigate the files in the DVC project(s) in your workspace.
+
+The [_Source Control_](https://code.visualstudio.com/Docs/editor/versioncontrol)
+view now lets you manage datasets and models tracked by DVC without using the
+terminal. The DVC panel shows you the state of the workspace. From here, you can
+track artifacts and synchronize versions with your remote repository. Just like
+you use Git to track changes to your code!
+
+<video controlslist="nodownload" preload="metadata" autoplay muted loop
+style="width:100%;"><source src="../uploads/images/2022-06-14/data-management.mp4" type="video/mp4"/> Your
+browser does not support the video tag. </video>
+
+---
+
+# What's next?
+
+From here on out, we plan on making the extension even better with new features
+such as pipeline (DAG) support,
+[TPI](https://github.com/iterative/terraform-provider-iterative) integration for
+remote execution, autocomplete for `dvc.yaml`, and parallel coordinate plots.
+
+Of course, we would love to hear what you look forward to most. Please give us
+feedback on what you would like to see next!
+
+![Space Cowboy GIF](https://media.giphy.com/media/cEYFeE4wJ6jdDVBiiIM/giphy.gif)
+
+# Thank you! ❤️
+
+We would sincerely like to thank everyone who has helped make this project
+possible:
+
+- [Henning Dieterichs](https://github.com/hediet), for helping us get started
+- [Paige Bailey](https://twitter.com/DynamicWebPaige), for her support and warm
+  tweets
+- [Sid Unnithan](https://www.linkedin.com/in/siddhanthunnithan/), for his review
+  and help in getting the word out there
+- [The VS Code developer community](https://vscode-dev-community.slack.com/join/shared_invite/zt-zq9w7ddw-VD1NVQ4p2XLT7vh_kO7bJA#/shared-invite/email)
+- Everyone who has beta-tested the extension and provided their feedback!
+
+# Resources
+
+Want to read more about DVC or the extension? Check out the following pages:
+
+- [DVC extension on the VS Code marketplace](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc)
+- [GitHub repository](https://github.com/iterative/vscode-dvc)
+- [DVC docs](https://dvc.org/)
+- [Dave Berenbaum's post on DVC's experiment versioning](https://dvc.org/blog/ml-experiment-versioning)
+- [Alex Kim's guide on setting up an ML pipeline](https://dvc.org/blog/end-to-end-computer-vision-api-part-1-data-versioning-and-ml-pipelines)
+- [Iterative community on Discord](https://dvc.org/chat)
+
+https://youtu.be/LHi3SWGD9nc
diff --git a/content/blogs/2022-06-29-june-22-community-gems.md b/content/blogs/2022-06-29-june-22-community-gems.md
new file mode 100644
index 0000000000..d28d42b0df
--- /dev/null
+++ b/content/blogs/2022-06-29-june-22-community-gems.md
@@ -0,0 +1,233 @@
+---
+title: June '22 Community Gems
+date: 2022-06-29
+description: >
+  A roundup of technical Q&A's from the DVC and CML communities. This month:
+  working with the DVC cache, DVC data and remotes, using DVC programmatically,
+  and more.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC and CML communities. This month:
+  working with the DVC cache, DVC data and remotes, using DVC programmatically,
+  and more.
+picture: 2022-06-29/june-community-gems.png
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/june-22-community-gems/1226
+tags:
+  - DVC Remotes
+  - Pipelines
+  - CML
+  - DVC Cache
+  - Community Gems
+---
+
+## [Is there a shorthand command to commit changes to all modified files in DVC without manually adding them all individually?](https://discord.com/channels/485586884165107732/563406153334128681/981498675689828362)
+
+Thanks for the question @Ramnath T!
+
+If you already have data tracked by DVC, the `dvc commit` command adds all the
+changes to those files or directories without having to name each target. You'll
+still need to remember to commit any other changes you've made to Git as well.
+
+If you don't have data tracked by DVC, run `dvc add <file name or folder name>`
+and the data will be added to your local cache and no commit is needed. This is
+how we make DVC aware of any new data we want versioned.
+
+When you run `dvc add`, a file hash will be calculated, the file content will be
+moved to the cache, and a `.dvc` file will be created to start tracking the
+added data. If you're working with remotes using the `--to-remote` option, you
+can skip the local cache entirely and move the file contents directly to your
+remote storage.
+
+## [How can I connect Iterative Studio to a remote repo on a private network, like a GitLab server?](https://discord.com/channels/485586884165107732/563406153334128681/981543978644172830)
+
+Good question about [Iterative Studio](https://studio.datachain.ai/) from
+@LilDataScientist!
+
+This is something that our users asked quite a bit, so we wrote up a whole guide
+about
+[custom GitLab server connections](https://dvc.org/doc/studio/user-guide/connect-custom-gitlab-server).
+It's a quick walkthrough of how to set up the permissions you'll need and
+connecting your team to Studio.
+
+You can find lots of great guides and explanations about everything Studio in
+the [User Guide](https://dvc.org/doc/studio/user-guide) section of the docs!
+
+## [How does `dvc get-url` interact with the cache compared to `dvc import-url`?](https://discord.com/channels/485586884165107732/563406153334128681/981862313076346920)
+
+This is an awesome question from @Gema Parreno!
+
+When you run `dvc get-url`, it downloads the file/directory to your local file
+system. It's _not_ tracking the downloaded data with a `.dvc` file. It's just
+pulling that data from some source to your file system. If you want to download
+a file or directory without needing a DVC project, you can use the `dvc get-url`
+command.
+
+On the other hand, when you run `dvc import-url`, the local `cache` folder
+inside of `.dvc` will be updated. This is similar to running `dvc get-url` and
+`dvc add` together except that `dvc import-url` also saves a link to the
+original file/directory location so that if it changes, you can download the
+updated data.
+
+There is one more option to bypass the local cache and transfer data directly to
+your remote storage using `dvc import-url <url> --to-remote`. This doesn't
+download anything to your local cache so it's another way to transfer data
+between remotes.
+
+## [If an image is present in different directories in different projects, will the shared cache store them both as one hash or will their different paths mean the same image appears twice in the cache?](https://discord.com/channels/485586884165107732/563406153334128681/984408209387298837)
+
+Great question about the cache from @paulwrightkcl!
+
+DVC will index the whole directory, but there will only be one hash per file. So
+the same image will only appear once in the cache. What _will_ be duplicated in
+the cache is the `.dir` hash that DVC uses internally as the directory tree
+representation.
+
+In summary, the image file is only stored in the shared cache once unless it's
+modified in one of the directories.
+
+## [Is it possible to limit which columns show for experiments in the metrics table?](https://discord.com/channels/485586884165107732/563406153334128681/985448515402616842)
+
+Nice question from @DylanTF!
+
+You can use `dvc exp show --drop` (or `--keep`) to decide what to hide (or
+show). For example, if you have a table like this:
+
+```dvctable
+ ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**   neutral:**Created**        metric:**avg_prec**   metric:**roc_auc**   param:**train.seed**   param:**train.n_est**   param:**train.min_split**   dep:**./clf**   dep:**./data**    dep:**./data/train.pkl**   dep:**./src/train.py**   dep:**src/evaluate.py**
+ ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+  workspace    -                     -         -   20210428     300           75                -       a9bb63e   aded63c            bdc3fe9          b0ef2a1
+  mlem-serve   Jun 16, 2022    0.76681   0.38867   20210428     300           75                -       a9bb63e   aded63c            bdc3fe9          b0ef2a1
+ ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
+```
+
+You could clean it up with a command like this:
+
+```dvc
+$ dvc exp show --drop 'Created|train.seed|./clf|./data/*|./src/train.py|src/evaluate.py'
+```
+
+Then get a table like this:
+
+```dvctable
+ ─────────────────────────────────────────────────────────────────
+  neutral:**Experiment**   metric:**avg_prec**   metric:**roc_auc**   param:**train.n_est**   param:**train.min_split**
+ ─────────────────────────────────────────────────────────────────
+  workspace           -         -   300           75
+  mlem-serve    0.76681   0.38867   300           75
+ ─────────────────────────────────────────────────────────────────
+```
+
+Alternatively, you can run the following command to only show the columns that
+have changed in the experiment run:
+
+```dvc
+$ dvc exp show --only-changed
+```
+
+This will produce a table similar to this one:
+
+```dvctable
+ ─────────────────────────────────────────────────────────────────────────────
+  neutral:**Experiment**   neutral:**Created**        metric:**avg_prec**   metric:**roc_auc**  param:**train.n_est**   dep:**src/train.py**
+ ─────────────────────────────────────────────────────────────────────────────
+  workspace    -                     -         -   325           94279e0
+  mlem-serve   Jun 16, 2022    0.76681   0.38867   300           bdc3fe9
+ ─────────────────────────────────────────────────────────────────────────────
+```
+
+You can also look at/edit these tables with the
+[DVC VS Code extension](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc)!
+If you're interested in more advanced visualizations, you should try out
+[Iterative Studio](https://studio.datachain.ai/#features).
+
+## [Is it possible to create, commit, and push updates to datasets using DVC with Python instead of the command line?](https://discord.com/channels/485586884165107732/563406153334128681/988895726257991740)
+
+Fantastic question from @wlu07!
+
+Yes, we do have an internal `Repo` class to do DVC operations using Python. You
+can refer to the
+[GitHub repo for the DVC CLI commands](https://github.com/iterative/dvc/tree/main/dvc/commands)
+to see how the CLI arguments are translated into the `Repo` function arguments
+and you can see how to use some of the
+[`Repo` methods in our docs](https://dvc.org/doc/api-reference).
+
+Here's an example of how you might run DVC commands using Python:
+
+```python
+from dvc.repo import Repo
+
+repo = Repo(".")
+
+repo.add("test_dataset.csv")
+
+repo.push()
+```
+
+Keep in mind that `dvc.repo.Repo` is not an official public API, so there is no
+guarantee it will always be in stable state.
+
+## [How can I write generated artifacts back to a GitHub repo after a GitHub workflow is finished?](https://discord.com/channels/485586884165107732/728693131557732403/983379949023006750)
+
+Wonderful CML question from @Fourtin!
+
+If you want to add the artifact to your repo just like you would a file, then
+you should check out the [`cml pr <file>` command](https://cml.dev/doc/ref/pr).
+You can use this to merge pull requests to the same branch the workflow was
+triggered from.
+
+For example, if you run a command like:
+
+```dvc
+$ cml pr --squash train.py
+```
+
+It will run `git add train.py`, commit the change, create a new branch, open a
+pull request, and squash and merge it.
+
+## [Is there a way to programmatically update the content of `params.py`?](https://discord.com/channels/485586884165107732/563406153334128681/987004036995764304)
+
+Thanks for asking this @petek!
+
+If you have a `params.py` file like this:
+
+```python
+class TrainTestSplit:
+    FOLDER = "data/train_test_split"
+    SPLIT_METHOD = "proportional"
+```
+
+In DVC, you can update the params and run `dvc exp run --set-param <param>`.
+Here's an example of what that might look like:
+
+```dvc
+$ dvc exp run --set-param params.py:TrainTestSplit.SPLIT_METHOD="proportional"
+```
+
+_Note:_
+[It may not be able to update Python parameters correctly](https://dvc.org/doc/command-reference/params#examples-python-parameters-file).
+Because of this, we recommend you use `params.yaml` files.
+
+If you need a pure Python solution, you could try something like this:
+
+```python
+from dvc.utils.serialize import modify_py
+
+with modify_py("params.py") as d:
+    d["key"] = "value"
+```
+
+---
+
+![Duck Dynasty GIF by DefyTV](https://media.giphy.com/media/pdSncNyYgaH0wqaCqp/giphy.gif)
+
+Keep an eye out for our next Office Hours Meetup! Make sure you stay up to date
+with us to find out what it is!
+[Join our group](https://www.meetup.com/machine-learning-engineer-community-virtual-meetups/)
+to stay up to date with specifics as we get closer to the event!
+
+Check out [our docs](https://dvc.org/doc) to get all your DVC and CML questions
+answered!
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to chat with the
+community!
diff --git a/content/blogs/2022-07-06-using-gcp-remotes-in-dvc.md b/content/blogs/2022-07-06-using-gcp-remotes-in-dvc.md
new file mode 100644
index 0000000000..f8df4871dd
--- /dev/null
+++ b/content/blogs/2022-07-06-using-gcp-remotes-in-dvc.md
@@ -0,0 +1,206 @@
+---
+title: Syncing Data to GCP Storage Buckets
+date: 2022-07-06
+description: >
+  We're going to set up a GCP storage bucket remote in a DVC project.
+descriptionLong: >
+  Setting up a remote to make data versioning easier with DVC is a common need
+  so we're going to go through a tutorial for doing this with GCP.
+picture: 2022-07-06/dvc-gcp.png
+pictureComment: Using GCP Remotes in DVC
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/syncing-data-to-gcp-storage/1237
+tags:
+  - MLOps
+  - DVC
+  - Git
+  - DVC Remotes
+  - Collaboration
+  - Tutorial
+---
+
+When you’re working on a data science project that has huge datasets, it’s
+common to store them in cloud storage. You’ll also be working with different
+versions of the same datasets to train a model, so it’s crucial to have a tool
+that enables you to do this quickly and easily. That’s why we’re going to do a
+quick walkthrough of how to set up a remote in a GCP storage bucket and handle
+data versioning with [DVC](https://dvc.org/doc).
+
+We’ll start by creating a new storage bucket in our GCP account, then we’ll show
+how you can add DVC to your project, and finally, we’ll make updates to the
+dataset with DVC commands. We’ll be working with
+[this repo](https://github.com/iterative/stale-model-example) if you want an
+example to play with. By the time you finish, you should be able to create this
+setup for any machine learning project using a GCP remote.
+
+## Set up a GCP storage bucket
+
+Make sure that you already have a
+[GCP account](https://console.cloud.google.com). You’ll need a valid credit card
+to create a new account. Once you’re logged into your account, you should see a
+screen like this with some of the services GCP offers.
+
+_Note:_ Remember, GCP does have a
+[free tier](https://cloud.google.com/free/docs/gcp-free-tier) if you just want
+to try it out.
+
+![GCP initial page](../uploads/images/2022-07-06/gcp_initial_page.png)
+
+From here, you'll need to create a new project. Search for "create a project"
+and click the "IAM & Admin" option. You'll enter the name of the project, which
+is `Bicycle Project`, and choose the organization and location and click the
+`Create` button. This will take you to your project dashboard and show you all
+of the stats and settings you have available.
+
+![create a new GCP project](../uploads/images/2022-07-06/gcp_new_project.png)
+
+Then you need to go to `Cloud Storage` in the left sidebar to create a bucket to
+store the data. When you get to the Cloud Storage page, you should see something
+similar to this and you’ll click the `Create Bucket` button.
+
+![create_gcp_bucket.png](../uploads/images/2022-07-06/create_gcp_bucket.png)
+
+The Bucket page will have a lot of configurations you can set, but you can leave
+the settings in the default state if there’s nothing you need to customize. We
+have named this example bucket `updatedbikedata` as you can see below.
+
+![gcp_bucket_options.png](../uploads/images/2022-07-06/gcp_bucket_options.png)
+
+Now you can save your changes and you’ll be redirected to the `Bucket Details`
+page and you’ll see the bucket you just created.
+
+![created_gcp_bucket.png](../uploads/images/2022-07-06/created_gcp_bucket.png)
+
+### Get your credentials
+
+Since you have the bucket created, we need to get the credentials to connect the
+GCP remote to the project. Go to the `IAM & Admin` service and go to
+`Service Accounts` in the left sidebar.
+
+![no service accounts](../uploads/images/2022-07-06/gcp_empty_service_account.png)
+
+Click the `Create Service Account` button to create a new service account that
+you'll use to connect to the DVC project in a bit. Now you can add the name and
+ID for this service account and keep all the default settings. We've chosen
+`bicycle-service-account` for the name and `bicycle-account` for the ID. Click
+`Create and Continue` and it will show the permissions settings. Select `Owner`
+in the dropdown and click `Continue`.
+
+![service account permissions](../uploads/images/2022-07-06/gcp_service_account_permissions.png)
+
+Then add your user to have access to the service account and click `Done`.
+
+![service account user access](../uploads/images/2022-07-06/gcp_service_account_user_access.png)
+
+Finally, you'll be redirected to the `Service accounts` page.
+
+![service account with name and ID](../uploads/images/2022-07-06/gcp_create_service_account.png)
+
+You’ll see your service account and you’ll be able to click on `Actions` and go
+to where you `Manage keys` for this service account.
+
+![manage keys on service account](../uploads/images/2022-07-06/gcp_service_account.png)
+
+Once you’ve been redirected, click the `Add Key` button and this will bring up
+the credentials you need to authenticate your GCP account with your project. Go
+ahead and download the credentials file and store it somewhere safe.
+
+![gcp_key.png](../uploads/images/2022-07-06/gcp_key.png)
+
+That’s it for setting up your storage bucket and getting the credentials you
+need! Now let’s add DVC to our demo repo and set up the remote.
+
+## Set up a DVC project
+
+First, add DVC as a requirement to your project with the following installation
+command:
+
+`$ pip install 'dvc[gs]'`
+
+Then you can initialize DVC in your own project with the following command:
+
+`$ dvc init`
+
+This will add all of the DVC internals needed to start versioning your data and
+tracking experiments. Now we need to set up the remote to connect our project
+data stored in GCP to the DVC repo.
+
+### Create a default remote
+
+Now we can make the GCP storage the default remote for the project with the
+following command:
+
+`$ dvc remote add -d bikes gs://updatedbikedata`
+
+This creates a default remote called `bikes` that connects to the
+`updatedbikedata` bucket we made earlier which is where the any data for the
+model will be stored.
+
+### Add GCP credentials
+
+In order for DVC to be able to push and pull data from the remote, you need to
+have valid GCP credentials.
+
+If you are using the
+[GCP CLI (google-cloud-sdk)](https://cloud.google.com/sdk/docs/install-sdk)
+already, you should be able to run `gcloud auth application-default login`. This
+method doesn't require a service account.
+
+You can also authenticate with the service account we created earlier in a
+couple of ways with that credentials file we downloaded.
+
+You can run the following command with your service account email.
+
+```dvc
+$ gcloud auth activate-service-account bicycle-service-account@tonal-history-154018.iam.gserviceaccount.com --key-file=../tonal-history-154018-e62a79baf90f.json
+```
+
+If you don't have the GCP CLI installed and you want to use the service account,
+you can set the `GOOGLE_APPLICATION_CREDENTIALS` environment variable to point
+to the credentials file, like this:
+
+```dvc
+$ export GOOGLE_APPLICATION_CREDENTIALS='../tonal-history-154018-e62a79baf90f.json'
+```
+
+Or you can add the credentials file location with the following command:
+
+```dvc
+$ dvc remote modify --local bikes credentialpath '../tonal-history-154018-e62a79baf90f.json'
+```
+
+You can check out more about authentication
+[here in the GCP docs](https://cloud.google.com/sdk/docs/authorizing).
+
+### Push and pull data with DVC
+
+Now you can push data from your local machine to the GCP remote! First, add the
+data you want DVC to track with the following command:
+
+`$ dvc add data`
+
+This will allow DVC to track the entire `data` directory so it will note when
+any changes are made. Then you can push that data to your GCP remote with this
+command:
+
+`$ dvc push`
+
+Here's what that data will look like when it has been successfully uploaded to
+GCP.
+
+![data in GCP](../uploads/images/2022-07-06/data_in_gcp.png)
+
+Then if you move to a different machine or someone else needs to use that data,
+it can be accessed by cloning or forking the project repo, setting up the remote
+and running:
+
+`$ dvc pull`
+
+_Note:_ Depending on the authentication method being used, there might be some
+required extra steps, such as making sure users actually have the permissions to
+read/write to the bucket.
+
+That’s it! Now you can connect any DVC project to a GCP storage bucket. If you
+run into any issues, make sure to check that your credentials are valid, check
+if your user has MFA enabled, and check that the user has the right level of
+permissions.
diff --git a/content/blogs/2022-07-18-july-heartbeat.md b/content/blogs/2022-07-18-july-heartbeat.md
new file mode 100644
index 0000000000..097100b221
--- /dev/null
+++ b/content/blogs/2022-07-18-july-heartbeat.md
@@ -0,0 +1,376 @@
+---
+title: July '22 Heartbeat
+date: 2022-07-18
+description: >
+  What a couple months it's been! This month you will find lots that's happening
+  in IRL  conferences again and in case you missed it, we've had some new,
+  exciting releases! In addtion you'll find a guide to migrate from Git-LFS to
+  DVC, the first Community piece on MLEM, and demo videos. Welcome to July!
+
+descriptionLong: |
+  This month you will find:
+
+    🎙 MLOps World,
+
+    🚀 MLEM Release,
+
+    🔥 DVC extension for VS Code,
+    
+    🥰 Guide to migrating from Git-LFS to DVC,
+
+    ✍🏼 New Docs and Blog content,
+
+    🚀 New demo videos, and more!
+picture: 2022-07-18/july-heartbeat-cover.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/july-22-heartbeat/1256
+tags:
+  - Heartbeat
+  - DVC
+  - MLEM
+  - VS Code
+  - MLOps World
+  - Git
+  - Git-LFS
+  - MyMLOps.com
+---
+
+<details>
+
+This month our cover image is inspired by a Community member
+[Gift Ojebulu](https://twitter.com/GiftOjeabulu_). Gift is a champion of
+Community and is a leader in the data movement in Nigeria. Recently he presented
+about DVC at the
+[Open Source Africa Conference](https://twitter.com/GiftOjeabulu_). He is also
+extremely involved doing amazing work building the data Community in Africa
+through [Data Fest Africa](https://datafestafrica.com/). We are lucky to have a
+Gift as a member of our own Community.
+
+<summary>✨Image Inspo✨</summary>
+</details>
+
+# First an apology
+
+I first must share my sincere apologies. With all that was going on in the
+Iterative Community last month, I ran out of time to finish the June Heartbeat.
+With even more time passing there's lots to write about; let's do this!
+
+![Send Tom Hanks GIF](https://media.giphy.com/media/CzbiCJTYOzHTW/giphy.gif)
+
+## MLEM Release
+
+On June 1st we released our latest open source tool in the Iterative ecosystem.
+MLEM is a model registry and deployment tool connected to your Git repo.  
+Together with [DVC](https://dvc.org) and [GTO](https://github.com/iterative/gto)
+(Git Tag Ops), MLEM helps you maintain a model registry right in your git
+repository. Now we have one more step in the process of fully syncing together
+the software development and machine learning worlds. To learn more about MLEM,
+visit [the website](https://mlem.ai),
+[⭐️ the repository](https://github.com/iterative/mlem),
+[read the blog post](https://dvc.org/blog/DVC-VS-Code-extension), or
+[watch the video](https://youtu.be/a2Lc9kEgEM8) of
+[**Mike Svehnikov's**](https://github.com/mike0sv) full presentation and demo on
+MLEM at our Release Party.
+
+https://youtu.be/a2Lc9kEgEM8
+
+If pressed for time you can also catch a shorter version of the presentation
+with [Alexey Grigorev](https://www.linkedin.com/in/agrigorev/) of
+[Data Talks Club](https://datatalks.club/)
+[here](https://www.youtube.com/watch?v=QQZUy0kSzOk).
+
+## MLOps World 2022
+
+I started writing this Heartbeat on the plane heading back from
+[MLOps World](https://mlopsworld.com/) in Toronto. This conference was a real
+treat! It was wonderful to meet so many Community members already using DVC and
+also to see conference talks advocating for our tools that we didn't even know
+were going to happen! Many thanks to [Interos'](https://www.interos.ai/)
+[**Stephen Brown**](https://www.linkedin.com/in/stephanrb3/) and
+[**Amy Bachir**](https://www.linkedin.com/in/amybachir/) for sharing about DVC
+and CML in the talk, _A GitOps Approach to Machine Learning._
+
+Additionally, it was great to finally meet in person all the people from the
+greater MLOps Community that I'd previously only known virtually including
+[**Demetrios Brinkman**](https://www.linkedin.com/in/dpbrinkm/) of
+[MLOps Community Slack](https://mlops.community/), our friends from
+[DAGsHub](https://dagshub.com/), and [Tryo-Labs](https://tryolabs.com/), and one
+of our Community Champions
+[**Sami Jawhar**](https://www.linkedin.com/in/sami-jawhar-a58b9849/) who
+presented at one of our most engaging meetups on record, asking the question
+_What IS an experiment?_ You can find this great talk below.
+
+https://www.youtube.com/watch?v=DxZdWq3Weng&t=1309s
+
+The conference talks were great. I was able to attend three:
+
+- _Top 5 Lessons Learned in Helping Organizations Adopt MLOps practices_ from
+  [**Shelbee Eigenbrode**](https://www.linkedin.com/in/shelbee-eigenbrode/),
+  Principal AI/ML Specialist
+- _Panel: What Every Product Manager Delivering AI Solutions Should Know_,
+  moderated by
+  [**Jessie Lamontagne**](https://www.linkedin.com/in/jessie-lamontagne-89b2a912b/)
+  (who was lucky enough to take home her very own DeeVee, see below), Data
+  Science Manager at Kinaxis; with
+  [**Nahla Salem**](https://www.linkedin.com/in/nahlags/), Senior Product
+  Manager at [Yelp](https://www.yelp.com/);
+  [**Anneya Golob**](https://www.linkedin.com/in/anneya-golob/), Staff Data
+  Scientist at [Shopify](https://www.shopify.com/), and
+  [**Phillip Gorniki**](https://www.linkedin.com/in/phillipgornicki/), St.
+  Product Manager at [Kinaxis](https://www.kinaxis.com/en). A particular quote
+  that was a stand out for me from this panel from Nahla, was, "If everything is
+  a priority, nothing is a priority." That was a lesson I needed to take to
+  heart, hence a bumped Heartbeat. 😢
+
+![Jessie Lamontagne](../uploads/images/2022-07-18/jessie-lamontagne.png '=800')
+_Jessie Lamontagne of Kinaxis with DeeVee!
+([Source link](https://www.linkedin.com/in/jessie-lamontagne-89b2a912b/))_
+
+I heard great feedback from attendees on conference talks as well. In general,
+the atmosphere at the conference had a fantastic, positive vibe with great
+connections made through the event app, the conference itself, and parties and
+networking opportunities 🥳🍻 We also thoroughly enjoyed being Expo Booth
+neighbors with [Seldon](https://www.seldon.io/) (model serving) and
+[Genesis Cloud](https://www.genesiscloud.com/) (environmentally sustainable
+GPUs!) I must finally give hats off to the organizers
+[**Faraz Thambi**](https://www.linkedin.com/in/farazthambi/) and
+[**Tina Aprile**](https://www.linkedin.com/in/tinaaprile/), who delivered an
+extremely well thought out and run, in-person Conference! If you didn't attend
+this year, you should definitely put it on your radar for next, or attend their
+[Toronto Machine Learning Summit](https://www.torontomachinelearning.com/) in
+November! Plus Toronto was fun! Check out our team dinner the last night from
+the CN Tower.
+
+![Team Dinner at the CN Tower](../uploads/images/2022-07-18/team-toronto.jpeg '=800')
+_Team dinner at the CN Tower - Pictured L to R: Gabriella Caraballo, Stephanie
+Roy, Mike Moynihan, Jorge Orpinel Perez (forward), me, Mikhail Sveshnikov,
+Milecia McGregor (forward), Max Aginsky, Alex Kim (forward), and Dmitry Petrov)_
+
+## DVC Extension for VS Code
+
+We just released our DVC extension for VS Code! It was so fun to let the cat out
+of the bag to conference goers and watch their eyes light up! 😃 This was a
+foreshadowing of events to come at the release! While it hadn't been completely
+a secret since
+[Paige Bailey's tweet](https://twitter.com/DynamicWebPaige/status/1430920240251035649)
+about it a while ago and the fact that it's been on the
+[VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc)
+for a couple of months so beta testers could try it out, we did finally,
+officially release the tool June 12th.
+
+And OH. MY. GOSH. The response has been amazing! Already over 3,400 people
+watched the video below on YouTube. And 1000 more new users downloaded the
+[DVC Extension for VS Code](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc)
+from the marketplace, just within the first couple of days!
+
+https://www.youtube.com/watch?v=LHi3SWGD9nc
+
+You will find in this extension:
+
+- tons of experiment tracking and table functionality over your regular CLI
+- live metrics tracking
+- the ability to run and queue experiments directly from the experiment table or
+  the command tree
+- sorting, drag and drop column and group movement
+- expanded plot viewing capabilities - zoom into plots and save them as PNGs or
+  SVGs for your reporting needs
+
+If you are a DVC and VS Code user, you will be a happy camper! Please try it and
+as always reach out with feedback! We want to make these tools better for you!
+
+Since the release, [**Alex Kim**](https://twitter.com/alex000kim) talked with
+[**Reynold Adolphe**](https://twitter.com/ReynaldAdolphe) on the VS Code
+Livestream and showed off the tool. You can check that out here! 👇🏽
+
+https://www.youtube.com/watch?v=Eq3100S3aHw
+
+## Content from the Community
+
+There's been lots of juicy content from the Community
+[since the last Heartbeat](https://dvc.org/blog/may-22-heartbeat). When I first
+started at Iterative over a year and a half ago, I would hope each month that
+there would be enough content from the Community to write about. This is no
+longer an issue; I sadly have to filter it now, so that these Heartbeats don't
+go on for days. If you've written something about our tools and it hasn't
+appeared in a Heartbeat, just know that we see it and we are grateful for all
+the Community's efforts to share about our tools! 🙏🏼
+
+### **Alex Strick van Linschoten** - More Data, More Problems: Using DVC to handle data versioning for a computer vision problem
+
+[**Alex Strick van Linschoten**](https://www.linkedin.com/in/strickvl/) brings
+us
+[this great overview of DVC's versioning capabilities](https://mlops.systems/tools/redactionmodel/computervision/mlops/2022/05/24/data-versioning-dvc.html#-appendix-how-to-switch-from-git-lfs-to-dvc)
+on his use of DVC in a redaction identifier project. He goes through the pluses
+of using DVC which he mentions as "be(ing) more or less unchallenged for what it
+does in the data versioning domain." He had previously used Git LFS and found it
+to be less robust so made the switch to DVC. In his post, he provides a
+[tutorial on making the switch from Git LFS to DVC](https://mlops.systems/tools/redactionmodel/computervision/mlops/2022/05/24/data-versioning-dvc.html#-appendix-how-to-switch-from-git-lfs-to-dvc:~:text=I%E2%80%99m%20missing%20out%E2%80%A6-,%F0%9F%8F%83%20Appendix%3A%20How%20to%20switch%20from%20git%2Dlfs%20to%20DVC,-When%20I%20first).
+We are so grateful to Alex for sharing this guide with the Community!
+
+Also super worthy of mention is Alex's shout-out about our welcoming Community.
+We are thankful for this praise and for his contributions to our Community. 🙏🏼
+
+![Iterative Community shout out from Alex Strick van Linshoten](../uploads/images/2022-07-18/alex-strick-van-linshoten.png '=800')
+_Thanks for the shout-out Alex!
+([Source link](https://mlops.systems/tools/redactionmodel/computervision/mlops/2022/05/24/data-versioning-dvc.html#-appendix-how-to-switch-from-git-lfs-to-dvc))_
+
+### MyMLOps Stack
+
+[MyMLOps.com](https://mymlops.com/) provides a tool to help you build a cool
+diagram for your MLOps Stack. There's no about page there or indication of who
+made this for the greater MLOps Community, which is frankly a bit sus.
+Nevertheless, we were excited to see DVC included in the section of Experiment
+Tracking as it should! We know there are other great experiment tracking tools
+out there, and we are content to see that the larger Community is starting to
+recognize this capability with DVC! We like to think of it as taking a step
+beyond tracking to versioning. To learn more about experiment versioning,
+[visit this blog piece](https://dvc.org/blog/ml-experiment-versioning) from
+Technical Product Manager - DVC,
+[Dave Berenbaum](https://www.linkedin.com/in/david-berenbaum-20b6b424/).
+
+Our team had an internal discussion about the absence of our tools from certain
+categories, DVC and CML for artifact tracking, CML for Pipeline Orchestration
+Runtime Engine, MLEM for Model Registry and Serving. But like everything in this
+space, things are changing constantly. Thanks to whoever you are out there that
+made this nifty tool!
+
+![MyMLOps.com](../uploads/images/2022-07-18/mymlops.png '=800') _MLOps tool
+stack diagram generator from MyMLOps.com ([Source link](https://mymlops.com/))_
+
+### **Samson Zhang**: MLOps: How DVC smartly manages your data sets for training your machine learning models on top of Git
+
+[**Samson Zhang**](https://www.linkedin.com/in/samson-zhang-887135115/) of
+[LittleBigCode](www.littlebigcode.fr) writes an in-depth article in
+[Medium](https://medium.com) on how DVC aptly manages large datasets. He
+discusses why DVC is needed and how it is a better option compared to MLFlow
+because MLflow does not optimize storage for file duplication like DVC does, as
+well as Git-LFS for the same reasons mentioned by Alex Strick van Linschoten in
+the piece mentioned above. Samson goes through a very thorough overview of the
+tool, how it works and how to use it. He includes some best practices that he
+has figured out while using the tool and goes over how to set up a dataset
+registry which he finds particularly useful with DVC.
+
+![Samson Zhang, DVC Workflow, Cache and Storage](../uploads/images/2022-07-18/samson-zhang.png '=800')
+_DVC workflow, cache, and storage
+([Source link](https://medium.com/hub-by-littlebigcode/mlops-how-dvc-smartly-manages-your-data-sets-for-training-your-machine-learning-models-on-top-of-b73857e54e52))_
+
+### **Dror Atariah**: Getting to Know MLEM
+
+![Awesome MLEM](../uploads/images/2022-07-18/awesome.png 'Getting to Know MLEM :wrap-left =100')
+[**Dror Atariah**](https://www.linkedin.com/in/atariah/) is the first Community
+member to write about MLEM! 🎉 In
+[his piece](http://drorata.github.io/posts/2022/Jun/17/getting-to-know-mlem/) he
+gives a review of the tool and starts with a general overview. Giving it a try
+with the iris dataset, he ultimately builds a Docker image with MLEM to get
+predictions from a trained model served by MLEM in an API. You can try out his
+project [in this repo!](https://github.com/drorata/mlem-review)
+
+### ✍🏼 New Docs
+
+As you can imagine, with new tools come new docs! The docs and product teams
+have been furiously busy making sure that you have the docs you need to try our
+new tools. Of note please find:
+
+- [MLEM Docs](https://mlem.ai/doc)
+- [Machine Learning Model Registry](https://mlem.ai/doc/use-cases/model-registry)
+  in [DVC.org docs](https://dvc.org/doc/use-cases/model-registry) as well as in
+  the [MLEM docs](https://mlem.ai/doc/use-cases/model-registry)
+- [VS Code docs and walkthrough](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc)
+
+## ✍🏼 Tons of new content on the blog
+
+- [Moving Local Experiments to the Cloud with Terraform Provider Iterative (TPI) and Docker](https://dvc.org/blog/local-experiments-to-cloud-with-tpi-docker)
+
+Have you ever or are you struggling with syncing data with one of the cloud
+providers? We know that comes up a lot in the Discord server. So
+[Milecia Mc Gregor](https://twitter.com/FlippedCoding) wrote three detailed
+pieces to help you out.
+
+- [Syncing Data to AWS S3](https://dvc.org/blog/aws-remotes-in-dvc)
+- [Syncing Data to GCP](https://dvc.org/blog/using-gcp-remotes-in-dvc)
+- [Syncing Data to Azure Blob Storage](https://dvc.org/blog/azure-remotes-in-dvc)  
+  Whatever
+  your flavor, she's got you covered. Look out for short videos covering the
+  same topics this quarter.
+
+Find more of your Discord questions answered in the latest editions of Community
+Gems. 💎
+
+- [May Community Gems](https://dvc.org/blog/may-22-community-gems)
+- [June Community Gems](https://dvc.org/blog/june-22-community-gems)
+
+## 🧑🏽‍💻 Online Course Updates
+
+We have surpassed 1300 students in our
+[Iterative Tools School!](https://learn.iterative.ai) 🎉 We now have in place:
+
+- Closed captions
+- Course guides for each lesson. For some of these, you will find the video
+  embedded into the lesson itself, but for the lessons that include code
+  snippets, the guides are in PDF form so that you can copy and paste them to
+  your heart's content! 😉
+
+If you are in the course already or through social media you may have noticed
+[Gema Perreño Piqueras'](https://twitter.com/SoyGema) amazing notes on the
+modules she has created (see below). 🚨Spoiler alert: Gema's joining the DevRel
+team next week! So look forward to more great content from her.
+
+![Gema Perreño Piqueeras' Course Notes](../uploads/images/2022-07-18/gema-course-notes.jpeg '=800')
+_Gema Perreño Piqueras' Course Notes
+([Source link](https://twitter.com/SoyGema/status/1543210842749079554?s=20&t=DMCw3cN8rFbwlD1hD_rotA))_
+
+## Upcoming Events
+
+We'll be at [AI4](https://ai4.io/) from August 16-18.  
+[Dmitry Petrov](https://twitter.com/fullstackml?lang=en) will give a talk as
+well as participate in a panel discussion on MLOps. If you are attending, stop
+by the booth and say hi or check out one of the in-booth demos we will have on
+our tools throughout the day.
+
+Additional conferences we will be attending this year:
+
+- [ODSC West](https://odsc.com/california/) in San Francisco
+- [Deep Learning World](https://deeplearningworld.de/) - Berlin
+- [MLOps Summit - Re-work](https://www.re-work.co/events/mlops-summit-2022) -
+  London
+- [Toronto Machine Learning Summit](https://www.torontomachinelearning.com/)-
+  Toronto
+
+## Open Positions
+
+[Use this link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22)
+to find details of all the open positions. This month we are especially seeking
+a fit for the Senior Software Engineer (Dataset Label Management, Python) role,
+so if that fits you or someone else you know, get applying! 🚀
+
+![Iterative.ai is Hiring](../uploads/images/2022-07-18/hiring.jpeg '=800')
+_Iterative is Hiring
+([Source link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22))_
+
+## Tweet Love ❤️
+
+Because I missed a month, there's just going to have to be two...
+
+We were excited to see this project come up from
+[Chansung](https://twitter.com/algo_diver) using DVC, Iterative Studio,
+Huggingface and Jarvis Labs AI.  
+Looking forward to seeing how it develops! 🍿
+
+https://twitter.com/algo_diver/status/1530455733837647873?s=20&t=Z5bCod_oPf6VqHST6vbVBw
+
+And we have this great Tweet thread from
+[Leon Menkreo](https://twitter.com/LeonMenkreo) about how he's taken back
+control of his data, models, and predictions with DVC!
+
+https://twitter.com/LeonMenkreo/status/1545410381677531136?s=20&t=d1VGraE1PnkYaCdWQcylaQ
+
+_Have something great to say about our tools? We'd love to hear it! Head to
+[this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+to record or write a Testimonial! Join our
+[Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2022-07-19-serving-models-with-mlem.md b/content/blogs/2022-07-19-serving-models-with-mlem.md
new file mode 100644
index 0000000000..a29047de76
--- /dev/null
+++ b/content/blogs/2022-07-19-serving-models-with-mlem.md
@@ -0,0 +1,285 @@
+---
+title: Serving Machine Learning Models with MLEM
+date: 2022-07-19
+description: >
+  Once you have a machine learning model that's ready for production, getting it
+  out can be complicated. In this tutorial, we're going to use MLEM to deploy a
+  model as a web API.
+descriptionLong: >
+  Getting a machine learning model out to our end users can be an involved task.
+  You have to make sure it's in the correct format for the environment and for
+  the way it will be consumed. We're going to walk through how you can use MLEM
+  to save your model and serve it with a web API.
+picture: 2022-07-19/serve_mlem_models.png
+pictureComment: Serving Machine Models with MLEM
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/serving-machine-learning-models-with-mlem/1253
+tags:
+  - MLOps
+  - DVC
+  - Git
+  - MLEM
+  - Collaboration
+  - Tutorial
+---
+
+Training a machine learning model is only one step in the process of getting
+something useful out to end-users. When it's time to deploy the model to
+production, there are a number of approaches you can take depending on the goal
+of the machine learning project. That might mean getting the model ready to
+respond to real-time queries coming from an API or batch processing predictions,
+for example.
+
+Either way, you'll need to save your trained and validated model in a format
+that's consumable by other systems. That's why we'll be covering how to serve
+models through a [REST](https://restfulapi.net/) endpoint or a Python package
+with [MLEM](https://mlem.ai/).
+
+> You can get the repo we're working with
+> [here](https://github.com/iterative/stale-model-example/tree/mlem-serve).
+
+## Take a candidate model
+
+There are instructions in the project
+[README](https://github.com/iterative/stale-model-example/tree/mlem-serve#readme)
+on how to get everything you need installed and running. This is a simple ML
+project that uses [DVC](https://dvc.org/) for data versioning and experiment
+tracking.
+
+After you have the repo set up, you'll already have the `mlem` package
+installed. This project already has a model that's been trained and validated so
+we can move on to saving this model.
+
+## Save the model
+
+Inside the `train.py` script, we need to add the `mlem` import to save the
+models as we experiment. We don't have to worry about running the training
+script for this project since we have the model, but it's good to know what's
+happening under the hood.
+
+```python
+# train.py
+import os
+import pickle5 as pickle
+import sys
+import yaml
+
+from mlem.api import save
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+...
+```
+
+Then you can add the `save` function to the end of the training script.
+
+```python
+# train.py
+...
+clf = RandomForestClassifier(
+    n_estimators=n_est, min_samples_split=min_split, n_jobs=2, random_state=seed
+)
+
+clf.fit(x, labels)
+
+save(
+    clf,
+    "clf",
+    sample_data=x,
+    description="Random Forest Classifier",
+)
+```
+
+<admon type="tip">
+
+You don't have to do these steps as we already have a model available, but if
+you want to see the training and evaluation steps in action, you reproduce the
+DVC pipeline with:
+
+```dvc
+$ dvc repro
+```
+
+You can check out what is happening in that pipeline by looking at the
+`dvc.yaml` file in the project.
+
+You can also see where we load the model into the `src/evaluate.py` script. To
+do that, you'll need to add the following import.
+
+```python
+# evaluate.py
+
+...
+import pickle5 as pickle
+import sklearn.metrics as metrics
+from mlem.api import apply
+...
+```
+
+Now we can use the [`apply` function](https://mlem.ai/doc/api-reference/apply)
+to make predictions with the model.
+
+```python
+# evaluate.py
+...
+x = matrix.iloc[:,1:11].values
+
+cleaned_x = np.where(np.isnan(x), 0, x)
+labels_pred = apply(model_file, cleaned_x, method="predict")
+
+predictions_by_class = apply(model_file, cleaned_x, method="predict_proba")
+predictions = predictions_by_class[:, 1]
+...
+```
+
+The `predict` and `predict_proba` are methods available from the model and they
+are used to get new predicted values and their probabilities for evaluation.
+This, along with everything else in the script, is how we get the metrics for
+each experiment.
+
+</admon>
+
+After you run an experiment, there will be two new files in your repo: `clf` and
+`clf.mlem`. Make sure you add the `clf.mlem` file to your Git history with the
+following command:
+
+```cli
+$ git add clf.mlem
+```
+
+This is so that the metadata is in your repo and ready to use with other MLEM
+commands. Now we can finally take the model file and ship it to production!
+
+## Deploy the model to production
+
+There are a couple of ways you can do this with MLEM:
+
+- Serve the model with [FastAPI](https://fastapi.tiangolo.com/).
+- Create a Python package (and use or distribute it).
+
+_Note:_ There is an experimental option to
+[deploy the model directly to Heroku](https://mlem.ai/doc/get-started/deploying)
+although this functionality is experimental and may have breaking changes.
+
+### Serve with FastAPI
+
+If you don't have an API to work with and don't need a Python package, like if
+you're just testing a model, you can serve your model quickly using FastAPI with
+this command.
+
+```cli
+$ mlem serve clf fastapi
+```
+
+This will run a local server and spin up a web API for you so you can quickly
+test out your model without needing a development team to work on the API
+initially.
+
+You'll see an output like this in your terminal:
+
+```cli
+$ mlem serve clf fastapi
+⏳️ Loading model from clf.mlem
+Starting fastapi server...
+💅 Adding route for /predict
+💅 Adding route for /predict_proba
+💅 Adding route for /sklearn_predict
+💅 Adding route for /sklearn_predict_proba
+Checkout openapi docs at <http://0.0.0.0:8080/docs>
+INFO:     Started server process [31916]
+INFO:     Waiting for application startup.
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
+```
+
+Then, when you go to the local URL, you'll see the
+[documentation](https://fastapi.tiangolo.com/features/#automatic-docs) for how
+to use the model you created.
+
+![FastAPI ML model deployment](../uploads/images/2022-07-19/fastapi_docs.png)
+
+That's it! Now you know how to train a model, save it, and deploy to some
+external service quickly using MLEM!
+
+### Custom Python package
+
+Let's take a look at making a Python package and importing it into a
+[Flask](https://flask.palletsprojects.com/en/2.1.x/) web app. To make the Python
+package, we'll run the following MLEM command.
+
+```cli
+$ mlem build clf pip -c target=build/ -c package_name=bike_predictor
+```
+
+This takes our `clf.mlem` file and generates a Python package called
+`bike_predictor` in the `build` directory. When you look in your project, you
+should see that new `build` folder that has all of the files you need for an
+independent Python package.
+
+To build the package, you'll need to run the following command in the `build`
+directory.
+
+```cli
+$ python -m build --wheel
+```
+
+Then go back to the top-level directory and run the following command to install
+your new model package.
+
+```cli
+$ pip install ./build/dist/bike_predictor-0.0.0-py3-none-any.whln
+```
+
+Now you can import this to your Flask API like so.
+
+```python
+# api.py
+import os
+from flask import Flask, jsonify, request
+from flask_sqlalchemy import SQLAlchemy
+from flask_migrate import Migrate
+from flask_cors import CORS
+from dotenv import load_dotenv
+import bike_predictor
+...
+```
+
+You can then use the `predict` method on new data and run any other tasks you
+need to in the API.
+
+```python
+# api.py
+...
+new_event = EventsModel(
+    title=data["title"],
+    date=data["date"],
+    time=data["time"],
+    location=data["location"],
+    description=data["description"],
+)
+
+db.session.add(new_event)
+db.session.commit()
+
+bike_predictor.predict(new_event)
+...
+```
+
+Then you can test this API out locally by running the following command:
+
+```cli
+$ python src/api.py
+```
+
+This will start up a local server on port 5000 and you'll be able to see your
+model in action. From here, this can be deployed to any cloud environment as
+long as you remember to include and install the model package.
+
+## Conclusion
+
+In this post, we learned how easy it can be to deploy a model through FastAPI or
+through a Python package with MLEM. You can use this same process to train and
+serve any model through an API endpoint very quickly. This can help with
+validation, collaborating with team members, and it can help you see if there
+are any underlying issues in your overall deployment process before you hear
+about them from users. MLEM can also be used to create a model registry so you
+can store and switch between models whenever you need to.
diff --git a/content/blogs/2022-07-26-iterative-studio-model-registry.md b/content/blogs/2022-07-26-iterative-studio-model-registry.md
new file mode 100644
index 0000000000..674d4dbe7b
--- /dev/null
+++ b/content/blogs/2022-07-26-iterative-studio-model-registry.md
@@ -0,0 +1,195 @@
+---
+title: Git-backed Machine Learning Model Registry to bring order to chaos
+date: 2022-07-26
+description: >
+  🚀 As Machine Learning projects and teams grow, keeping track of all the
+  models and their production status gets increasingly complex. Iterative
+  Studio's Git-backed Model Registry solves this.
+descriptionLong: >
+  Use your Git repositories to build a model registry with model versioning,
+  lineage, and lifecycle management. With Iterative Studio, have the who, what,
+  why, where and when questions of your team's model production at your
+  fingertips. Read to find out how.
+picture: 2022-07-26/model-management-chaos.png
+pictureComment:
+  Without the right tools, model management can easily turn chaotic
+author: tapa_dipti_sitaula
+commentsUrl: https://discuss.dvc.org/t/iterative-studio-model-registry/1267
+tags:
+  - Model Registry
+  - MLOps
+  - Git
+  - MLEM
+  - GTO
+  - DataChain Studio
+  - Release
+---
+
+Machine learning tasks are iterative by nature. Over time, you build several
+versions of your ML models, which could be in different stages of
+production-readiness. A version may be running in production, another version
+that seems to perform better may be in staging, and a couple more versions could
+be in active development by you and your teammates - using updated
+hyperparameters, datasets, or algorithms.
+
+How do you keep track of all your models, their versions, and deployment
+statuses? How do you get answers to questions like these easily:
+
+- Which model version is currently in production?
+- When was the last time this model was updated?
+
+If you are like some of the data scientists we know, you may have a Google sheet
+or a Notion page with the list of all your models, their changes, deployment
+history, and so on. But this is highly error-prone and will probably get
+out-of-date very quickly. Or maybe you upload all your models to a cloud bucket
+and “attach” text reports to them. Not very maintainable or searchable either.
+We’ve even seen people use sticky notes, or better yet, rely on their memory 😀.
+
+Some of the more organized folks use Model Registries - tools created
+specifically to organize models into a central, searchable repository. While
+this is definitely better than using random files or sticky notes, one major
+problem persists: the data science and machine learning team members work
+completely isolated from the software development and DevOps team members. This
+makes collaboration far more time consuming than it should be.
+
+![Teams can work in disconnected silos](../uploads/images/2022-07-26/disconnected-silos.png)
+
+Some even implement in-house systems, and maybe you are also planning to do so.
+But these can get expensive to develop and maintain.
+
+**We built the Iterative Studio Model Registry to solve these problems.**
+
+Iterative Studio Model Registry enables ML teams to collaborate on models by
+providing model organization, discovery, versioning, lineage (tracing the origin
+of the model), and the ability to manage deployment statuses such as,
+development, staging, and production across multiple projects.
+
+## Utilize your existing Git infrastructure
+
+Iterative Studio Model Registry is built on top of Git, which means:
+
+- You can reuse your existing Git infrastructure to manage ML models together
+  with code, data, experiment pipelines, and deployment statuses.
+- You can use GitOps for model deployment, and trigger model deployment from
+  Iterative Studio, which you can also use to run your ML experiments.
+- DS/ML folks and Software/DevOps folks can work together more easily, because
+  they utilize the same tools and infrastructure.
+
+## Open MLOps
+
+A core philosophy at Iterative is open MLOps - we build tools that work with
+your infrastructure. Our toolstack is modular, so you can build your model
+registry on top of your existing cloud and DevOps infrastructure.
+
+Under the hood, Iterative Studio Model Registry uses Iterative’s open-source
+Git-based tools [GTO] and [MLEM].
+
+- [GTO] enables [semantic versioning][semver] and stage transitions of artifacts
+  using metadata files and Git tags.
+- [MLEM] saves ML models and extracts model metadata including framework,
+  methods, input / output data schema, and requirements.
+
+![Iterative toolstack is modular](../uploads/images/2022-07-26/modular-toolstack.png)
+
+## UI of your choice
+
+Iterative Model Studio Registry meets you where you are, through your favorite
+interface. Whether you like APIs, prefer a web interface, or work best in the
+command line; whatever your role or preference, we've got you covered so your
+team can be most efficient.
+
+## Models can reside anywhere
+
+Save your model files wherever works best for you, whether it’s in S3, GCP, or
+any other of your remote (or local) storages. Then, add them to the model
+registry in a non-intrusive, no-code fashion **without modifying your ML
+training code**. This saves you hours of valuable time.
+
+## Collaborate across multiple projects
+
+A central dashboard of all your models facilitates transparency and discovery
+across every project by your whole team.
+
+![Models are organized in a central dashboard](../uploads/images/2022-07-26/models-dashboard.png)
+
+And on the model details page, you’ll find that information about the model is
+automatically detected and its history tracked.
+
+![All models have separate model detail pages](../uploads/images/2022-07-26/model-details-page.png)
+
+<admon type="tip">
+
+Try our [demo Model Registry](https://studio.datachain.ai/team/Iterative/models)
+to get a feel for Iterative Studio's Model Registry features.
+
+</admon>
+
+## Create model versions and stages from any Git commit
+
+For registering versions, select the commit and provide the version number. To
+assign stages, select the version and provide the stage name. It is as simple as
+that.
+
+## Git remains the single source of truth for all your ML projects
+
+Here’s a brief explanation of how the model, version and stage information is
+stored in Git:
+
+- The following entry in `artifacts.yaml` indicates that your `image-synthesis`
+  model is stored in an `S3` bucket.
+
+```yaml
+image-classifier-model:
+  description:
+    This model is used to classify images of different objects submitted by
+    users. This version of the model has an accuracy of 95%.
+  labels:
+    - Random Forest
+    - image classification
+    - sklearn
+  path: .mlem/model/image-classifier-model
+  type: model
+```
+
+In the following example, the Git tag `image-classifier-model@v2.0.0` indicates
+that you created version `2.0.0` of your `image-classifier-model` from the Git
+commit `6c0fc85`.
+
+The Git tag `image-classifier-model#production#3` indicates that you assigned
+the `production` stage to version `2.0.0` of your model.
+
+![Git tags represent model version and stage](../uploads/images/2022-07-26/git-tags.png)
+
+## A single platform for all your MLOps needs
+
+Since its inception, Iterative Studio has brought together [Git], [DVC], and
+[CML] for seamless data and model management, experiment tracking, visualization
+and automation. Now, by harnessing the power of [MLEM] and [GTO] in its Model
+Registry, it makes your machine learning processes even more robust.
+
+## Conclusion
+
+With the Iterative Studio Model Registry, your ML model (dis)organization is not
+in chaos anymore. Collaborating on your ML projects becomes faster and your ML
+team members’ lives become much easier.
+
+Start using [Iterative Studio Model Registry] today. And answer all the who,
+what, why, where and when questions of your team's model production directly
+from the information in your Git repository.
+
+Refer to the [documentation and tutorials][docs] to get started. To request
+support or share feedback, you can [email me] or create a support ticket on
+[GitHub][github support repo].
+
+https://www.youtube.com/watch?v=DYeVI-QrHGI
+
+[iterative studio model registry]: https://studio.datachain.ai/
+[git]: https://git-scm.com/
+[dvc]: https://dvc.org/
+[cml]: https://cml.dev
+[gto]: https://github.com/iterative/gto
+[mlem]: https://mlem.ai/
+[semver]: https://semver.org/
+[docs]: https://dvc.org/doc/studio/user-guide/model-registry
+[email me]: mailto:tapa@iterative.ai
+[github support repo]: https://github.com/iterative/studio-support
diff --git a/content/blogs/2022-07-26-july-22-community-gems.md b/content/blogs/2022-07-26-july-22-community-gems.md
new file mode 100644
index 0000000000..84783f4060
--- /dev/null
+++ b/content/blogs/2022-07-26-july-22-community-gems.md
@@ -0,0 +1,222 @@
+---
+title: July '22 Community Gems
+date: 2022-07-26
+description: >
+  A roundup of technical Q&A's from the DVC community. This month: deploying
+  models MLEM, DVC data and remotes, DVC stages and plots, and more.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month: deploying
+  models MLEM, DVC data and remotes, DVC stages and plots, and more.
+picture: 2022-07-26/july-community-gems.png
+author: milecia_mcgregor
+commentsUrl: https://discuss.dvc.org/t/july-22-community-gems/1261
+tags:
+  - DVC Remotes
+  - Pipelines
+  - CML
+  - DVC Cache
+  - Community Gems
+---
+
+## [How can I track a new file added to my `data` folder if the `data` folder is already tracked by DVC, yet ignored by Git?](https://discord.com/channels/485586884165107732/485596304961962003/983278896587894804)
+
+Great question on how DVC handles data tracking from @NgHoangDat!
+
+Since you already track the `data` folder, when you add a new file into it, all
+you need to do is update your DVC history. You can use either `dvc add data` or
+`dvc commit` to start tracking the new file.
+
+DVC will also only recalculate the changed files. If you add or modify a small
+number of files in that folder, the update will not take very long.
+
+## [What would be the best method to get the remote URL of a given dataset inside a Python environment?](https://discord.com/channels/485586884165107732/485596304961962003/984870485668008007)
+
+Wonderful question from @come_arvis!
+
+You can use the `get_url` method of the
+[DVC Python API](https://dvc.org/doc/api-reference) to do this. Here's an
+example of a script you might run to get the remote URL.
+
+```python
+import dvc.api
+
+resource_url = dvc.api.get_url(
+    'get-started/data.xml',
+    repo='https://github.com/iterative/dataset-registry'
+    )
+
+print(resource_url)
+
+# https://remote.dvc.org/dataset-registry/a3/04afb96060aad90176268345e10355
+```
+
+This URL is built with the remote URL from the project configuration file,
+`.dvc/config`, and the `md5` file hashes stored in the `.dvc` file corresponding
+to the data file or directory you want the storage location of.
+
+## [I'm excited about MLEM helping expose API endpoints to our model, but heard it was experimental. Where can I learn more about how to deploy models with this tool?](https://discord.com/channels/485586884165107732/563406153334128681/992517466662117386)
+
+Great question from @raveman^2!
+
+There are a few ways you can use expose API endpoints to your model:
+
+- Run `mlem serve` to generate a FastAPI endpoint with your model.
+- Export the model as a Python package for your own custom-built API.
+- The experimental deploy to Heroku.
+
+You can find more details here in the MLEM docs: https://mlem.ai/doc/get-started
+
+You can also see an example of deploying a model with MLEM in this
+[blog post tutorial](https://dvc.org/blog/serving-models-with-mlem).
+
+## [How do I revert a `dvc add` command to stop tracking data?](https://discord.com/channels/485586884165107732/563406153334128681/993111134896918599)
+
+This is a good question from @Nwoke!
+
+If you have accidentally added the wrong directory or files for DVC to track,
+you can easily remove them with the `dvc remove` command. This is used to remove
+the `.dvc` file and ensure that the original data file is no longer being
+tracked. Here's an example of this command being used:
+
+```dvc
+$ dvc remove data.csv.dvc
+```
+
+Sometimes when you stop tracking data, you also want to remove it from your
+cache. You can do this with the `dvc gc` command, which will remove all data,
+not just the target of `dvc remove`. If you want to remove all of the data and
+its previous versions from the cache, you can do that with the following
+command:
+
+```dvc
+$ dvc gc -w
+```
+
+The `-w` option only keeps the files and directories referenced in the
+workspace, so once you have removed the data you don't want to track, this is
+how DVC knows what to keep and what to discard.
+
+You can learn more about removing tracked data in
+[the docs here](https://dvc.org/doc/user-guide/how-to/stop-tracking-data).
+
+## [Is it normal for the `outs` of a stage to be removed when `dvc repro` is run?](https://discord.com/channels/485586884165107732/563406153334128681/993781745524691087)
+
+Fantastic question from @Nish!
+
+This is the expected behavior of DVC. It removes the `outs` of a stage unless
+the `persist:true` value is set for that output. You can learn more about how
+this works in
+[our docs here](https://dvc.org/doc/user-guide/project-structure/dvcyaml-files#output-subfields).
+Here's an example of a stage with the `persist` value set.
+
+```yaml
+stages:
+  train:
+    cmd: date > data/external/date
+    outs:
+      - data/external:
+          persist: true
+```
+
+Even if you don't persist your `outs`, you can still check out an older version
+of the pipeline to get older `outs` with `dvc checkout`. This is based on what's
+in the `dvc.lock` and `.dvc` files and it will update your workspace to match
+the experiment you check out. This is usually run after checking out a different
+Git branch. So the flow might look like:
+
+```dvc
+$ git checkout experiment-branch
+$ dvc checkout
+```
+
+These commands allow you to get the `dvc.lock` and `.dvc` files for the
+experiment you want to go back to from your Git history. Then it uses DVC to get
+your data to the version you want and reproduce your entire experiment. You can
+learn more about these details in
+[the `dvc checkout` docs here](https://dvc.org/doc/command-reference/checkout).
+
+## [Is there a way to have a plot with multiple y-axes?](https://discord.com/channels/485586884165107732/485596304961962003/994685566698410055)
+
+Wonderful question from @shortcipher3!
+
+If you update DVC to version `2.12.1` and higher, you should be able to define
+multiple y-axes in your DVC pipeline. Here's an example of how this may look in
+a `dvc.yaml`:
+
+```yaml
+# dvc.yaml
+stages: ...
+plots:
+  some_file.csv:
+    x: x_column_name
+    y: [col1, col2, col3]
+  # alternative 1:
+  multiple_rocs:
+    x: x_column_name
+    y:
+      some_file.csv: [col1, col2, col3]
+  # in case of multiple files:
+  multiple_rocs_from_multiple_files:
+    x: x_column_name
+    y:
+      file1.csv: [col1, col2]
+      file2.csv: [col3]
+```
+
+A quick note, make sure that `plots` is on the same level as `stages` in your
+`dvc.yaml` file.
+
+## [How do you structure the `dvc.yaml` file to run in stages in a specific order?](https://discord.com/channels/485586884165107732/563406153334128681/991000853278232616)
+
+Awesome question from @srb302!
+
+You would need to set up outputs and dependencies for each stage. So a stage
+that is run first would generate an output and the stage that is suppose to run
+second would use the first stage's output as a dependency.
+
+Otherwise, DVC does not guarantee any particular execution order for stages
+which are independent of each other. DVC determines the structure of your DAG
+based on file outputs and dependencies and there isn't another way to enforce
+order of stage execution in DVC.
+
+## [How do I know when I should track a file with Git or DVC?](https://discord.com/channels/485586884165107732/485596304961962003/993120910095699978)
+
+This is a really good question from @vadim.sukhov!
+
+Let's take a look at an example `dvc.yaml`.
+
+```yaml
+stages:
+  evaluate:
+    ...
+    plots:
+      - prc.json:
+          cache: false
+          x: recall
+          y: precision
+      - roc.json:
+          cache: false
+          x: fpr
+          y: tpr
+```
+
+In this scenario, the `prc.json` and `roc.json` files are **not** being tracked
+by DVC because of the `cache: false` value. Since these files aren't tracked by
+DVC, they aren't saved to a remote storage location outside of Git, like data
+files are. So if you have `cache: false` on a file that you want to keep track
+of, you'll need to Git commit them to your project.
+
+---
+
+![Duck Dynasty GIF by DefyTV](https://media.giphy.com/media/pdSncNyYgaH0wqaCqp/giphy.gif)
+
+Keep an eye out for our next Office Hours Meetup! Make sure you stay up to date
+with us to find out what it is!
+[Join our group](https://www.meetup.com/machine-learning-engineer-community-virtual-meetups/)
+to stay up to date with specifics as we get closer to the event!
+
+Check out [our docs](https://dvc.org/doc) to get all your DVC, CML, and MLEM
+questions answered!
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to chat with the
+community!
diff --git a/content/blogs/2022-08-16-august-22-heartbeat.md b/content/blogs/2022-08-16-august-22-heartbeat.md
new file mode 100644
index 0000000000..1abbb21e6a
--- /dev/null
+++ b/content/blogs/2022-08-16-august-22-heartbeat.md
@@ -0,0 +1,364 @@
+---
+title: August '22 Heartbeat
+date: 2022-08-16
+description: >
+  Monthly updates are here! Phenomenal new posdcast, DVC with MinIO, Semantic
+  similarity, DVC for Kaggle, new Model Registry in Studio, first Iterative
+  Internal Hackathon and more! Welcome to August!
+
+descriptionLong: |
+  This month you will find:
+
+    🎙 Vanishing Gradients podcast,
+
+    👀 DVC used with Kaggle,
+    
+    🏢 S3 locally with MinIO and DVC,
+
+    👯 Semantic similarity
+
+    ® Iterative Studio Model Registry
+
+    🧑🏽‍💻 Internal Hackathon
+
+    🗣 IRL events,
+
+    🚀 New hires, and more!
+picture: 2022-08-16/august-heartbeat.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/august-2022-heartbeat/1298
+tags:
+  - Heartbeat
+  - DVC
+  - Kaggle
+  - Hackathon
+  - Git
+  - Vanishing Gradients
+  - Hugo Bowne-Anderson
+  - Peter Wang
+  - MinIO
+  - AI Techpark
+  - The NewStack
+  - Semantic Similarity
+---
+
+Welcome to the August Heartbeat! As we all soak in the remaining summer days,
+swing along in your hammock and take in all the great news from the Iterative
+Community!
+
+![Ukulele Hammock GIF by Northern Illinois University](https://media.giphy.com/media/2uI9paIuAWgaqfyX0Q/giphy.gif)
+
+# From Greater AI/ML Community
+
+## Vanishing Gradients Podcast
+
+![Vanishing Gradients](../uploads/images/2022-08-16/vanishing-gradients.png 'Vanishing Gradients :wrap-left =300')
+If you are not familiar with
+[**Hugo Bowne-Anderson**](https://twitter.com/hugobowne), you should be. He was
+the host of my all-time favorite Data Science podcast
+[DataFramed](https://www.datacamp.com/podcast) while he was at
+[DataCamp](https://www.datacamp.com/). DataFramed helped me immeasurably when I
+started my data science journey. It provided great not only great teachings on
+many data science concepts, but even more importantly, the ability to gain
+perspectives from different people across all parts of the data space, talking
+about challenges, danger zones, and issues that we all need to be aware of in
+the field. Recently Hugo started a new podcast,
+[Vanishing Gradients](https://vanishinggradients.fireside.fm/). This newer
+endeavor is in a somewhat different format than DataFramed, but still with
+Hugo's characteristic deep dive into all the challenges that come up when
+working with data. Hugo uses a long-format conversation approach with many
+leaders and great thinkers in the data science/machine learning/AI space. In
+episodes [seven](https://vanishinggradients.fireside.fm/7) and
+[eight,](https://vanishinggradients.fireside.fm/8) Hugo has a fascinating chat
+with [**Peter Wang**](https://twitter.com/pwang), CEO of Anaconda, in which they
+talk about a number of topics including how Python became so big in Data
+Science, the emergence of open source collaborative environments, and things
+that the PyData stack solves. Then it gets really interesting as they dive into
+the open source model in the context of finite and infinite games and open
+source software as a "paradigm of humanity's ability to create generative,
+nourishing and anti-rivalrous systems." 🤯 Super interesting discussion and food
+for thought. I've already listened to both episodes twice. I highly recommend
+them and this new podcast in general.
+
+# From the Iterative tools Community
+
+## **Mikołaj Kania** - Can DVC Be Used for Kaggle?
+
+[**Mikołaj Kania**](https://twitter.com/MikolajKania) suggests that you upgrade
+your Kaggle competition workflow from the “spaghetti code” of Jupyter Notebooks
+and use the more mature way of creating reproducible ML results by using DVC
+[here on his blog](https://mikolajkania.com/2022/08/07/dvc-kaggle-mlops/).
+
+He notes that notebooks are really bad to compare changes between runs. Instead,
+he suggests developing a workflow where for every major experiment type,
+creating a branch - experimenting in each and persisting the best and most
+notable outcomes (good and bad). The best results are then submitted to Kaggle.
+You can find more about his workflow in
+[his repo for the project.](https://github.com/mikolajkania/kaggle-03-house-prices)
+
+![Using DVC for Kaggle Competition](../uploads/images/2022-08-16/kaggle-dvc.png '=800')
+_DVC with Kaggle
+([Source link](https://mikolajkania.com/2022/08/07/dvc-kaggle-mlops/))_
+
+Mikołaj explains how DVC's project structure ensures reproducible results and
+develops habits on best practices. One drawback he noted was the lack of an
+experimentation UI, but we just introduced the DVC extension for VS Code to help
+with that, and there’s always Iterative Studio. Look out for improvement to the
+experiment features in both tools in the coming months! Also, experimenting with
+DVC in Kaggle may give you some good practice for things we are cooking up
+internally! 😉🤫
+
+## **Shambhavi Mishra** - Searching for Semantic Similarity
+
+[**Shambhavi Mishra**](https://twitter.com/ShambhaviCodes) in her post
+[Searching for Semantic Similarity](https://medium.com/towards-artificial-intelligence/searching-for-semantic-similarity-cfbff2388d04)
+details the steps of her NLP project on similarity algorithms. She mainly
+focuses on cosine similarity using a Stack Overflow questions dataset. The
+end-to-end project uses Sentence BERT, Fast Text, DVC, DAGsHub, Streamlit and
+deploys the web app on an AWS EC2 instance.
+
+Once you follow all the steps you will have computed the similarity between a
+search query and a database of texts and rank all the data by their similarity
+score to retrieve the most similar text to its index.
+
+![Cosine Similarity](../uploads/images/2022-08-16/cosine-similarity.png '=800')
+_Understanding Cosine Similarity
+([Source link](https://www.oreilly.com/library/view/mastering-machine-learning/9781785283451/ba8bef27-953e-42a4-8180-cea152af8118.xhtml))_
+
+## **Evgenii Munin** - Run S3 Locally With MinIO for the DVC Machine Learning Pipeline
+
+If you are in need of object storage to work with data through an API, but need
+to do so in a private network,
+[**Evgenii Munin**](https://www.linkedin.com/in/evgenii-munin-01932a143/) shows
+how to set up MinIO as remote storage with DVC to do just that
+[in this piece in Medium](https://betterprogramming.pub/run-s3-locally-with-minio-for-dvc-machine-learning-pipeline-7fa3d240d3ab).
+In this cool use case, he starts with installing the MinIO server and builds a
+Docker image to run it, sharing a great repo on Kafka-to S3 where MinIO was used
+to mock the S3 for the data. Then he shows you how to link the MinIO server as
+DVC remote storage.
+
+![Minio Browser](../uploads/images/2022-08-16/minio.png '=800') _Minio Browser
+with Data pushed from DVC
+([Source link](https://betterprogramming.pub/run-s3-locally-with-minio-for-dvc-machine-learning-pipeline-7fa3d240d3ab))_
+
+## **Caleb Kaiser** - Moving from Data Science to Machine Learning Engineering
+
+It can sometimes be confusing to determine where data science stops and machine
+learning engineering starts. [**Caleb Kaiser**](https://twitter.com/KaiserFrose)
+helps clarify this
+[in this old but good piece](https://www.kdnuggets.com/2020/11/moving-data-science-machine-learning-engineering.html)
+in [KD Nuggets](https://www.kdnuggets.com). He provides four examples of real-
+world projects and defines what portions of the project are data science and
+what are ML engineering. In all what we find is that machine learning
+engineering is all the tasks that need to happen to get the model the data
+scientists create into production applications.
+
+He goes on to dive deeper into one of the examples and shows the promise in some
+tools that bridge the gap between machine learning and software engineering
+where he highlights DVC and Huggingface. This is a good piece to read if you are
+grappling with the difference!
+
+![Season 2 Episode 6 GIF by Portlandia](https://media.giphy.com/media/xUNd9DLukkavmhybAs/giphy.gif)
+
+## Just a few other things...
+
+- GitHub Goodness alert for
+  [Visual Data Preparation (VDP),](https://github.com/instill-ai/vdp) an
+  open-source visual data ETL tool to streamline the end-to-end visual data
+  processing pipeline. Among the highlights: a fast way to build end-to-end
+  visual data pipelines, pre-built ETL data connectors, and integration with DVC
+- [**Jillian Rowe**](https://twitter.com/jillianerowe) gave a shout-out to DVC
+  on a
+  [recent podcast](https://topenddevs.com/podcasts/adventures-in-devops/episodes/the-intersection-of-data-and-devops-devops-124)
+  from
+  [Adventures in DevOps Podcast](https://topenddevs.com/podcasts/adventures-in-devops)
+  in an episode where they discuss the intersection of data and DevOps
+- If you are interested in contributing to researchers' learning about machine
+  learning experimentation tools, you can take
+  [this survey](https://www.freelancer.com.au/projects/machine-learning/Seeking-Qualified-Respondents-for-Online-34294453.html).
+  Spread the word!
+
+## Company News
+
+### 🎉 Model Registry released in Iterative Studio
+
+On July 26th we released our new
+[model registry in Iterative Studio.](https://iterative.ai/model-registry)  
+The great work done by the MLEM team building a git-based model registry is now
+incorporated in Studio in a web UI. This release took the work of half the
+people in the company and we are proud of the steps we are taking to meet people
+where they are and round out your options whether you are comfortable in the
+CLI, API, or web UI. Be sure to try it out and give us your feedback. Learn more
+[in the blog post](https://dvc.org/blog/iterative-studio-model-registry) and
+[in the docs](https://dvc.org/doc/studio/user-guide/model-registry/what-is-a-model-registry).
+Look out for a full tutorial coming soon!
+
+https://www.youtube.com/watch?v=DYeVI-QrHGI
+
+### 🧑🏽‍💻 Iterative's First Internal Hackathon
+
+Last week we had our very first internal Hackathon! The entire company
+participated in the 48-hour computer vision challenge classifying dogs, cats,
+croissants and muffins. Part of the objective was to familiarize ourselves and
+test a new tool that we are expecting to release later this year.
+
+Eight teams competed for prizes for the best outcome, but also for the best
+integrations with other tools, the best dog, cat, croissant, and muffin photos
+from team members, and the best notes from the experience. I think the notes of
+our newest DevRel [**Gema Parreño Piqueras**](https://twitter.com/SoyGema) are
+in good running for the prize. (Learn more about Gema in the New Hires section
+below!)
+
+![Gema Parreño Piqueras' Hackathon notes](../uploads/images/2022-08-16/gema-hackathon-notes.jpeg '=800')
+_Gema Parreño Piqueras' Hackathon notes
+([Source link](https://twitter.com/SoyGema/status/1558135976698028034?s=20&t=lXyAWLISwf8gUl8SZS84AQ))_
+
+See the members of the winning teams below. Team members
+[**Daniel Kharitonov**](https://www.linkedin.com/in/danielkharitonov/) and
+[**Jon Burdo**](https://www.linkedin.com/in/jon-burdo-59730a83/) organized the
+whole event and put together an extremely comprehensive document to help guide
+the teams. We are looking forward to more of these events in the future!
+
+![Winners of the First Iterative Hackathon](../uploads/images/2022-08-16/winners.jpg '=800')
+_Winners of the first Iterative Internal Hackathon, Source: Dmitry Petrov_
+
+### 📰 Dmitry Petrov in AI Techpark and The New Stack
+
+[**Dmitry Petrov**](https://twitter.com/FullStackML) gives a sneak peek into the
+recent developments at Iterative.ai, highlights the most exciting trends, and
+shares about his entrepreneurial journey
+[in this article](https://ai-techpark.com/aitech-interview-with-dmitry-petrov-co-founder-ceo-at-iterative-ai/)
+in [AI Techpark.](https://ai-techpark.com/ai/)
+
+Dmitry also wrote a piece for [The NewStack](https://thenewstack.io/) entitled
+[Why We Built an Open Source ML Model Registry with Git](https://thenewstack.io/why-we-built-an-open-source-ml-model-registry-with-git/).
+As the title suggests the why is here as well as learnings from our customers'
+use cases, and the realization of the need for Model Registry as Code (MRaC),
+thus continuing our GitOps approach to tool building for machine learning.
+
+## **David de la Iglesia Castro** - Making MLOps Uncool Again
+
+If you haven't gotten a chance to make it to the conferences where
+[**David de la Iglesia Castro**](https://twitter.com/daviddelachurch) presented
+his popular talk or workshop entitled
+[Making MLOps Uncool Again](https://www.youtube.com/watch?v=J6fduKE1j1g), you
+can now catch it on our very own
+[YouTube channel](https://www.youtube.com/channel/UC37rp97Go-xIX3aNFVHhXfQ)! In
+this presentation you will learn how to build an MLOps workflow by extending the
+power of Git and GitHub with open-source tools DVC and CML. In the end, you will
+have an automated workflow that covers the entire lifecycle of an ML model, from
+data labeling to monitoring predictions.
+[Find the repo for the project here.](https://github.com/iterative/workshop-uncool-mlops)
+And the
+[solution here](https://github.com/iterative/workshop-uncool-mlops-solution).
+
+https://www.youtube.com/watch?v=J6fduKE1j1g
+
+## New hires
+
+[**Gema Parreño Piqueras**](https://twitter.com/SoyGema) joins our team from
+Madrid, Spain as a Developer Advocate. You may have already been familiar with
+Gema if you've been taking our [online course](https://learn.iterative.ai) this
+summer because of the
+[gorgeous notes](https://twitter.com/SoyGema/status/1558135976698028034?s=20&t=pJAfd-S4aoKGf4UhsnlgCw)
+she contributed per module. Gema was born and raised as an Architect (of
+buildings) but switched to tech a while back. She had her own video game
+start-up and has also worked as a Data Scientist in the Financial Industry. She
+has contributed to open source StarCraft II ML project. Gema loves indie games,
+puzzles, and croquettes! She makes the 4th teammate from España! 🇪🇸
+
+[**Marcin Jasion**](https://www.linkedin.com/in/marcinjasion/) joins the team as
+a Senior Platform Engineer from Poland. He has been friends with team member,
+Paweł Redzyński, for years. When not working he likes travelling and eating,
+motorcycling, and is an avid cross-fitter. He also has a cat that likes to be a
+part of meetings! 🐈
+
+[**Domas Monkus**](https://www.linkedin.com/in/domasmonkus/) joins the CML team
+as an engineer from Lithuania. Before joining us at Iterative, Domas spent 10
+years at Canonical working on juju, livepatch, and many internal projects. He's
+a husband and father with a house outside the hustle and bustle of the city, so
+he mentioned that lawn mowing is one of his main free time activities. 🏡
+
+## Upcoming Events
+
+This week is [AI4](https://ai4.io/)!
+[**Dmitry Petrov**](https://twitter.com/fullstackml?lang=en) will give a talk as
+well as participate in a panel discussion on MLOps. If you are attending, stop
+by the booth and say hi or check out one of the in-booth demos we will have on
+our tools throughout the day.
+
+Additional conferences we will be attending this year:
+
+- [**Gema Parreño Piqueras**](https://twitter.com/SoyGema) and our lead docs
+  writer, [**Jorge Orpinel Perez**](https://twitter.com/JorgeOrpinel) will be
+  heading to Mexico City August 31-September 1st for the
+  [LATAM AI Conference](https://www.latam-ai.com/). Gema will give a
+  presentation on experimentation in our new
+  [DVC extension for VS Code](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc).
+- [Southern Data Science Conference](https://www.southerndatascience.com/) in
+  Atlanta, GA on September 8-9th.
+- [ODSC West](https://odsc.com/california/) in San Francisco
+- [Deep Learning World](https://deeplearningworld.de/) - Berlin
+- [MLOps Summit - Re-work](https://www.re-work.co/events/mlops-summit-2022) -
+  London
+- Dmitry Petrov will be speaking at
+  [GitHub Universe](https://www.githubuniverse.com/) on November 9-10!
+- [Toronto Machine Learning Summit](https://www.torontomachinelearning.com/)-
+  Toronto
+
+We also will be reviving our virtual meetups this fall so be sure to
+[join our group on Meetup.](https://www.meetup.com/machine-learning-engineer-community-virtual-meetups/)
+
+## Open Positions
+
+[Use this link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22)
+to find details of all the open positions. Please share with anyone looking to
+have a lot of fun building the next generation of machine learning to production
+tools! 🚀
+
+![Iterative.ai is Hiring](../uploads/images/2022-08-16/hiring.jpeg '=800')
+_Iterative is Hiring
+([Source link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22))_
+
+### ✍🏼 Doc Updates
+
+- As noted above there are
+  [new docs for Iterative Studio's Model Registry](https://dvc.org/doc/studio/user-guide/model-registry/what-is-a-model-registry)
+- In case you missed it, CML now supports
+  [Bitbucket](https://bitbucket.org/product)! You can find the
+  [docs for the Bitbucket integration here](https://cml.dev/doc/start/bitbucket#get-started-with-cml-on-bitbucket).
+
+### ✍🏼 Blog post
+
+- 💎 Don't miss
+  [July's Community Gems](https://dvc.org/blog/july-22-community-gems) is full
+  of great questions from the Community.
+- [**Milecia McGregor**](https://twitter.com/FlippedCoding) provides a new
+  tutorial for
+  [Serving Machine Learning Models with MLEM.](https://dvc.org/blog/serving-models-with-mlem)
+  Don't miss it!
+
+## Tweet Love ❤️
+
+Once again we have a tie for best Tweet! Looking forward to seeing the video on
+this one from [**Avikalp Kumar Gupta**](https://twitter.com/AvikalpGupta)!🍿 You
+can find the slides
+[here](https://drive.google.com/file/d/1-iOgtVDWG13A9MxRDet246Gnbdrkb0vv/view).
+
+https://twitter.com/AvikalpGupta/status/1556609442908884994?s=20&t=pJAfd-S4aoKGf4UhsnlgCw
+
+Also so great to have our new DVC extension shouted out by
+[**Harold Sinnot**](https://twitter.com/HaroldSinnott)!
+
+https://twitter.com/HaroldSinnott/status/1545058509087092736?s=20&t=5jVO7zD2UBak4e6rk7mooQ
+
+_Have something great to say about our tools? We'd love to hear it! Head to
+[this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+to record or write a Testimonial! Join our
+[Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2022-08-30-august-22-community-gems.md b/content/blogs/2022-08-30-august-22-community-gems.md
new file mode 100644
index 0000000000..6044dcaa28
--- /dev/null
+++ b/content/blogs/2022-08-30-august-22-community-gems.md
@@ -0,0 +1,248 @@
+---
+title: August '22 Community Gems
+date: 2022-08-30
+description: >
+  A roundup of technical Q&A's from the DVC community. This month: explaining
+  DVC versioning mechanism, some tricks with pipelines and CML action,
+  visualizing plots in VS Code extension.
+descriptionLong: >
+  A roundup of technical Q&A's from the DVC community. This month: explaining
+  DVC versioning mechanism, some tricks with pipelines and CML action,
+  visualizing plots in VS Code extension.
+picture: 2022-08-30/cgaugust.png
+author: gema_parreno
+commentsUrl: https://discuss.dvc.org/t/august-22-community-gems/1314
+tags:
+  - DVC Remotes
+  - Pipelines
+  - CML
+  - DVC Cache
+  - Community Gems
+---
+
+Hi there! This is Gema! Today I'll be the guide to Community Gems for August.
+Big shout out to [Milecia Mcgregor](https://twitter.com/flippedcoding) that
+co-authors this post.
+
+## [If I am tracking a directory with DVC, how can I read the file names without using `dvc checkout`?](https://discord.com/channels/485586884165107732/563406153334128681/1001787488173572147)
+
+This is a wonderful question from @Mikita Karotchykau!
+
+You can read those file names with our DVC Python API. Here's an example of how
+that may work:
+
+```python
+import os
+from dvc.repo import Repo
+
+for item in Repo.ls(
+  "<repo_path_or_url>",
+  "/path/to/dir",
+  dvc_only=True,
+  rev="<rev>",
+  recursive=True
+):
+  print(os.path.join("/path/to/dir", item["path"]))
+```
+
+## [How can I mock the execution of certain stages in `dvc repro`?](https://discord.com/channels/485586884165107732/563406153334128681/1004408394888777738)
+
+Nice situation posted as a question from @JesusCerquides!
+
+This situation might arise when you have stages that take a long time to run or
+when you are confident about them and want to advance with the pipeline design;
+therefore, you wouldn't want to reproduce all again. One example might be when
+you have a good enough feature engineering and want to iterate over
+hyperparameters in training.
+
+You should be able to run `dvc commit` in this case as it provides a way to
+complete `dvc repro` when it has been used with the `--no-commit` or `--no-exec`
+options. Those options cause the command to skip certain stages so you can move
+to another stage without executing all of them.
+
+## [How can I change the dataset for a DVC pipeline that runs completely with `dvc repro`?](https://discord.com/channels/485586884165107732/563406153334128681/1004432985052942396)
+
+Great question from @5216!
+
+One of the straightforward solutions for this challenge is to replace the
+dataset in place and run `dvc repro` again. If the dataset is at some other
+path, you can update `dvc.yaml` to use that new path instead of the original
+dataset path. If you don't want to lose the previous pipeline and want to keep
+it and results for future reproducibility or other needs, you can use
+`dvc exp run` as it keeps a record in Git of all changes and allows you to
+create a branch if needed.
+
+## [When I trigger a GitHub event, I use `pull_request: types: [labeled]` and it seems to cause the runner to use the wrong SHA. How can I fix this?](https://discord.com/channels/485586884165107732/728693131557732403/1001003933159915550)
+
+Thanks for the good question @hyojoo!
+
+You might have encounter that this issue doesn´t allow you to send comments to
+the PR. A
+[change](https://github.com/iterative/cml/issues/880#issuecomment-1145522505)
+with respect to the SHAs made us point to the head reference.
+
+We've updated [CML Start](https://cml.dev/doc/start/github) to include a fix:
+
+```yaml
+- uses: actions/checkout@v3
+  with:
+    ref: ${{ github.event.pull_request.head.sha }}
+```
+
+## [How does DVC solve the file versioning problem, specifically when we want to roll back to previous versions of the dataset?](https://discord.com/channels/485586884165107732/563406153334128681/1005130028692017184)
+
+Time travel with DVC ! We just find this topic fascinating. Thanks for bringing
+this up @MiaM
+
+`git checkout` command lets us restore any commit in the repository history. It
+will automatically adjust the repository files, by replacing, adding or deleting
+them. This git command changes `dvc.lock` and another DVC files, meaning that
+git tracks DVC files, but doesn´t track the file per se. For this to happen and
+get back to previous versions of the dataset, make sure to `dvc checkout` on
+this one.
+
+For reproducibility, we will see now what happens with the `data.dvc` file and
+cache folder when we go back to a previous dataset version. For that, we will
+add a dataset, change it and add it to DVC, and then get back to the first
+dataset version.
+
+First, we have added a dataset, and then add it as well with DVC: if we explore
+the `data.xml.dvc` file and the cache folder , we will see the MD5 hash for the
+file, a unique identifier!
+
+```dvc
+$ cat data.xml.dvc # will show file info including MD5 hash
+outs:
+- md5: a8d60da582524dac805fc7b64d762e58
+  size: 33471
+  path: data.xml
+$ cd .dvc/cache
+$ tree # will show dataset in the cache with hash reference
+.
+|___ a8
+     |___ a8d60da582524dac805fc7b64d762e58
+
+```
+
+After changing the dataset, we have added it to DVC as well. As you can see in
+`data.xml.dvc` file, the hash MD5 has changed, as the dataset is different! The
+cache , however keeps both hashes. Smart!
+
+```dvc
+$ cat data.xml.dvc # will show new file info including MD5 hash
+outs:
+- md5: 8e4ed00d7118e31340db6c0ba572658e
+  size: 35263
+  path: data.xml
+$ cd .dvc/cache
+$ tree # will show both datasets in the cache with their hash reference
+.
+|___ 8e
+|    |___ 4ed00d7118e31340db6c0ba572658e
+|___ a8
+     |___ d60da582524dac805fc7b64d762e58
+```
+
+Now let´s get back to the previous version of the dataset
+
+```dvc
+$ git checkout HEAD~1 data/data.xml.dvc
+$ dvc checkout
+$ git commit data/data.xml.dvc
+```
+
+```dvc
+$ cat data.xml.dvc
+outs:
+- md5: a8d60da582524dac805fc7b64d762e58
+  size: 33471
+  path: data.xml
+```
+
+Interesting! The hash makes reference to the previous version of our dataset
+that has been stored in our cache folder. The cache folder saves the data so DVC
+allows you to get back to previous files with the synced `git checkout` and
+`dvc checkout` commands. Please note that you have to checkout with Git, but
+also with DVC! If you always want to ensure `dvc checkout` after `git checkout`
+you can use `post-chekout`
+[Git hook](https://dvc.org/doc/command-reference/install#installed-git-hooks) to
+automatically update the workspace with the correct data file versions.
+
+![back to the future](../uploads/images/2022-08-30/backtothefuture.png)
+
+## [How can I plot the result metrics for the machine learning experiments inside VSCode DVC extension scenario?](https://discord.com/channels/485586884165107732/842220310585147452/991695952480043038)
+
+Happy to discover that you are using
+[DVC extension](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc)
+for VSCode @Julian\_ !
+
+You can define your plots with
+[DVCLive](https://dvc.org/doc/dvclive/dvclive-with-dvc) depending on your
+machine learning challenge and save them as a CSV, JSON file or other
+[supported format](https://dvc.org/doc/user-guide/visualizing-plots#supported-file-formats).
+You need to list it as a plots output in `dvc.yaml`, adding plots in the build
+stage
+
+```yaml
+stages:
+  build:
+    cmd: python train.py
+    deps:
+      - features.csv
+    outs:
+      - model.pt
+    metrics:
+      - metrics.json:
+          cache: false
+    plots:
+      - metrics.csv: # specify the name and .csv extension file
+          cache: false
+```
+
+## [Im constructing a pipeline with several stages inside the `dvc.yaml` file.
+
+When I execute dvc exp run or dvc repro commands, stages run randomly. What is
+the reason behind this or did I miss something ?]
+(https://discord.com/channels/485586884165107732/563406153334128681/1011617355849269258)
+
+Hello there @ekmekci48 ! That is indeed a really great question.
+
+In order to ensure linear order in your pipeline, you should concatenate all
+your pipeline stages, taking into account that the previous stage output will be
+the next dependency, from the beginning to the end of your pipeline. Please make
+sure that you specify dependencies and outputs for each stage: that will
+introduce the order to provide an end result. For stages that don´t depend on
+each other, they will still executed randomly.
+
+As an example, imagine that we have 3 stages: load , feature engineering and
+training. Load output with be feature engineering dependency, and feature
+engineering output will be training dependency.
+
+The key concept to have into account here is that you should concatenate the
+output of one stage as the dependency of the other among all pipeline stages.
+
+As an example, added some schema from our learning
+[course](https://learn.iterative.ai/): check out the `-o` and `-d` config flags
+. Those will be key for concatenating your stages.
+
+Let's also thank @daavoo for helping you out pointing to the docs on this one!
+
+![notes from pipelines lesson iterative learning course](../uploads/images/2022-08-30/pipelines.png)
+
+Please check out the [docs](https://dvc.org/doc/command-reference/dag) to know
+more!
+
+---
+
+![Shut It Down GIF by Matt Cutshall](https://media.giphy.com/media/l0IycQmt79g9XzOWQ/giphy.gif)
+
+Keep an eye out for our next Office Hours Meetup! Make sure you stay up to date
+with us to find out what it is!
+[Join our group](https://www.meetup.com/machine-learning-engineer-community-virtual-meetups/)
+to stay up to date with specifics as we get closer to the event!
+
+Check out [our docs](https://dvc.org/doc) to get all your DVC, CML, and MLEM
+questions answered!
+
+[Join us in Discord](https://discord.com/invite/dvwXA2N) to chat with the
+community!
diff --git a/content/blogs/2022-09-06-bitbucket-cml-runners.md b/content/blogs/2022-09-06-bitbucket-cml-runners.md
new file mode 100644
index 0000000000..fef1298402
--- /dev/null
+++ b/content/blogs/2022-09-06-bitbucket-cml-runners.md
@@ -0,0 +1,236 @@
+---
+title: CML Cloud Runners for Model Training in Bitbucket Pipelines
+date: 2022-09-06
+description:
+  Use CML from a Bitbucket pipeline to provision an AWS EC2 instance and
+  (re)train a machine learning model.
+descriptionLong: >
+  We can use CML to cheaply provision a cloud instance to train our model, push
+  the model to our repository, and automatically terminate the instance
+  afterward. In this guide, we will be exploring how to do so in conjunction
+  with a Bitbucket repository and pipeline.
+picture: 2022-09-06/header-cml-bitbucket.png
+author: rob_dewit
+commentsUrl: https://discuss.dvc.org/t/cml-cloud-runners-for-model-training-in-bitbucket-pipelines/1309
+tags:
+  - MLOps
+  - Pipelines
+  - CI/CD
+  - Release
+  - Tutorial
+  - Reproducibility
+  - DevOps
+  - CML
+  - Git
+  - Bitbucket
+  - Self-hosted runners
+  - Cloud training
+  - AWS
+---
+
+A while ago, we learned about
+[training models in the cloud and saving them in Git](https://dvc.org/blog/CML-runners-saving-models-1).
+We did so using [CML and GitHub Actions](https://cml.dev/doc/start/github).
+GitLab is [also supported](https://cml.dev/doc/start/gitlab), and a
+[recent CML release](https://github.com/iterative/cml/releases/tag/v0.16.0)
+incorporated support for self-hosted runners in Bitbucket Pipelines: a good
+excuse to revisit this topic and show how CML works in conjunction with
+Bitbucket's CI/CD.
+
+Using CML to provision cloud instances for our model (re)training has a number
+of benefits:
+
+- Bring Your Own Cloud: a single CML command connects your existing cloud to
+  your existing CI/CD
+- Cloud abstraction: CML handles the interaction with our cloud provider,
+  removing the need to configure resources directly. We could even switch cloud
+  providers by changing a single parameter
+- Auto-termination: CML automatically terminates instances once they are no
+  longer being used, reducing idle time (and costs)
+
+# What we'll be doing
+
+This guide will explore how we can use CML to (re)train models from one of our
+Bitbucket pipelines. We will:
+
+1. Provision an EC2 instance on Amazon Web Services (AWS) from a Bitbucket
+   pipeline
+2. Train a machine learning model on the provisioned instance
+3. Open a pull request that adds the resulting model to our Bitbucket repository
+
+While we could use Bitbucket's own runners for our model training, they have
+[limited](https://support.atlassian.com/bitbucket-cloud/docs/limitations-of-bitbucket-pipelines/#LimitationsofBitbucketPipelines-Buildlimits)
+memory, storage, and processing power. Self-hosted runners let us work around
+these limitations: we can get a runner with specifications tailored to our
+computing needs. CML greatly simplifies the setup and orchestration of these
+runners.
+
+Moreover, if our data is hosted by our cloud provider, using a runner on the
+same cloud would be a logical approach to minimize data transfer costs and time.
+
+<admon type="tip">
+
+While we'll be using
+[AWS](https://cml.dev/doc/self-hosted-runners?tab=AWS#cloud-compute-resource-credentials)
+in this guide, CML works just as well with
+[Google Cloud Platform](https://cml.dev/doc/self-hosted-runners?tab=GCP#cloud-compute-resource-credentials),
+[Microsoft Azure](https://cml.dev/doc/self-hosted-runners?tab=Azure#cloud-compute-resource-credentials),
+and
+[on-premise](https://cml.dev/doc/self-hosted-runners#on-premise-local-runners)
+machines. Of course, CML would need the appropriate credentials, but otherwise,
+it takes care of the differing configuration for us.
+
+</admon>
+
+# Before we start
+
+You can clone the repository for this guide
+[here](https://bitbucket.org/iterative-ai/example_model_export_cml).
+
+To help follow along, you may want to keep the
+[Getting started section of the CML docs](https://cml.dev/doc/start/bitbucket)
+open in another tab. The docs cover the following prerequisite steps you'll need
+to take if you want to follow along with this blog post:
+
+1. [Generate a `REPO_TOKEN` and set it as a repository variable](https://cml.dev/doc/self-hosted-runners?tab=Bitbucket#personal-access-token).
+2. [Install the _Pull Request Commit Links app_ in your Bitbucket workspace](https://cml.dev/doc/ref/send-comment#bitbucket)
+
+Additionally, you will need to take the following steps to allow Bitbucket to
+provision AWS EC2 instances on your behalf:
+
+1. [Create an `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` on AWS](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-quickstart.html#cli-configure-quickstart-creds)
+2. [Add the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` as repository variables](https://support.atlassian.com/bitbucket-cloud/docs/variables-and-secrets/)
+
+<admon type="warn">
+
+In this example, we will be provisioning an `m5.2xlarge`
+[AWS EC2 instance](https://aws.amazon.com/ec2/instance-types/). Note that this
+instance is not included in the free tier, and Amazon
+[will charge you for your usage](https://aws.amazon.com/ec2/pricing/on-demand/)
+($0.45 per hour at the time of writing). To minimize cost, CML always terminates
+the instance upon completion of the pipeline.
+
+</admon>
+
+# Implementing the CML Bitbucket pipeline
+
+The main point of interest in the project repository is the
+`bitbucket-pipelines.yml` file. Bitbucket will automatically recognize this file
+as the one containing our pipeline configuration. In our case, we have defined
+one pipeline (named `default`) that consists of two steps:
+
+## Launch self-hosted runner
+
+In the first step, we specify the runner we want to provision. We use a CML
+docker image and configure a runner on a medium (`m`) instance. CML
+[automatically translates this generic type to a cloud-specific one](https://registry.terraform.io/providers/iterative/iterative/latest/docs/resources/task#machine-type).
+In the case of AWS, this corresponds with an `m5.2xlarge` instance.
+
+We also specify the `--idle-timeout=30min` and `--reuse-idle` options. The first
+of these specifies how long the provisioned instance should wait for jobs before
+it is terminated. This ensures that we are not racking up costs due to our
+instances running endlessly. With the latter, we ensure that a new instance is
+only provisioned when a runner is not already available with the same label.
+Combining these two options means that we can automatically scale up the number
+of runners (if there are multiple pull requests in parallel) and scale down when
+they are no longer required.
+
+```yaml
+- step:
+    image: iterativeai/cml:0-dvc2-base1
+    script:
+      - |
+        cml runner \
+            --cloud=aws \
+            --cloud-region=us-west \
+            --cloud-type=m \
+            --idle-timout=30min \
+            --reuse-idle \
+            --labels=cml.runner
+```
+
+<admon type="tip">
+
+CML [has many more options](https://cml.dev/doc/ref/runner) that might pique
+your interest. For example, you could use `--single` to terminate instances
+right after completing one job. Or you could set a maximum bidding price for
+spot instances with `--cloud-spot-price=...`. With these features, CML helps you
+tailor instances precisely to your needs.
+
+</admon>
+
+## Train model on self-hosted runner
+
+The second step in our pipeline defines the model training task. We specify that
+this step should run on the `[self.hosted, cml.runner]` we provisioned above.
+From here, our script defines the individual commands as we could also run them
+in our local terminal.
+
+```yaml
+- step:
+    runs-on: [self.hosted, cml.runner]
+    image: iterativeai/cml:0-dvc2-base1
+    # GPU not yet supported, see https://github.com/iterative/cml/issues/1015
+    script:
+      - pip install -r requirements.txt
+      - python get_data.py
+      - python train.py
+      # Create pull request
+      - cml pr model/random_forest.joblib
+
+      # Create CML report
+      - cat model/metrics.txt > report.md
+      - echo '' >> report.md
+      - echo '![Confusion Matrix](model/confusion_matrix.png)' >> report.md
+      - cml send-comment --pr --update --publish report.md
+```
+
+First, we install our requirements, and then we run our data loading and model
+training scripts. At this point, our runner contains our newly trained model.
+However, we need to take a few extra steps to do something with that model.
+Otherwise, our results would be lost when CML terminates the instance.
+
+To add our model to our repository, we create a pull request with `cml pr`. We
+also create a CML report that displays the model performance in the pull
+request. We add the metrics and the confusion matrix created in `train.py` to
+the report, and `cml send-comment` updates the description of the pull request
+to the contents of `report.md` (i.e., our `metrics.txt` and confusion matrix).
+
+![The model training report in the pull
+request](../uploads/images/2022-09-06/pr-screenshot.png)_The
+resulting pull request showing the model training report_
+
+That's all there is to it! Once CML has created the pull request, we can merge
+it on Bitbucket. CML will automatically terminate the cloud instance after its
+specified idle time, thus saving us from high AWS expenses.
+
+<admon type="tip">
+
+You might be interested in storing the resulting model in a DVC remote, rather
+than in your Git repository.
+[Follow this guide to learn how to do so](https://iterative.ai/blog/CML-runners-saving-models-2).
+
+</admon>
+
+# Conclusions
+
+CML allows us to incorporate our model training into our Bitbucket CI/CD. We can
+define a pipeline to provision a cloud instance that meets our requirements and
+then use the instance to train our model. The resulting model can be pushed to
+our Git repository, along with a detailed report on our model's performance.
+
+Because CML handles the interaction with our cloud provider of choice, we can
+switch between different providers (AWS, Azure, or Google Cloud Project) by
+changing a single line. Moreover, CML automatically reduces our cloud expenses
+by terminating instances we are no longer using.
+
+Now that we got started with CML in Bitbucket Pipelines, we can look toward some
+of CML's more advanced features. It might be worth exploring CML's spot
+recovery, for example, which can pick up training from the last epoch when a
+script is randomly terminated. Or we might be interested in training models on
+GPUs, which CML is also well-suited for.
+
+These topics warrant their own guides, however. Keep an eye out for these
+follow-ups on our blog, and make sure to let us know what you would like us to
+cover next! You can let us know in the comments or by
+[joining our Discord server](https://dvc.org/chat).
diff --git a/content/blogs/2022-09-19-september-22-heartbeat.md b/content/blogs/2022-09-19-september-22-heartbeat.md
new file mode 100644
index 0000000000..c0fec00e51
--- /dev/null
+++ b/content/blogs/2022-09-19-september-22-heartbeat.md
@@ -0,0 +1,513 @@
+---
+title: September '22 Heartbeat
+date: 2022-09-19
+description: >
+  Monthly updates are here! Food for thought on Meta's Wikipedia fact checker
+  and the EU AI Act, creating an artifact registry with GTO, MLOps course
+  materials from DTU, a new O'Reilly course, and more! Welcome to September!
+
+descriptionLong: |
+  This month you will find:
+
+    🤔 Meta to fact-check Wikipedia,
+
+    🇪🇺 EU AI Act,
+    
+    ® Creating an artifact registry with GTO,
+
+    🎓 MLOps Course at the DTU
+
+    😆 xkcd comics
+
+    🐶 Our new dog-fooding policy
+
+    ✨ O'Reilly Course,
+
+    🚀 New hires, and more!
+picture: 2022-09-19/september-cover.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/september-22-heartbeat/1336
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - MLEM
+  - GTO
+  - TechCrunch
+  - EU AI Act
+  - Meta
+  - Wikipedia
+  - Replicability crisis
+  - O'Reilly
+  - Technical University of Denmark
+  - xkcd
+---
+
+<details>
+
+This month’s image inspiration is community member
+[**Sami Jawhar**](https://www.linkedin.com/in/sami-jawhar-a58b9849/). Sami has
+contributed to DVC in the past and most recently to the DVC and CML teams with
+regard to extending our remote experimenting features to include running
+experiments in parallel, which you can check out
+[here](https://github.com/iterative/dvc/commit/c7d63e8c59819592d2a749ab721fe5c85379fece)
+and
+[here]([https://github.com/iterative/terraform-provider-iterative/compare/master...sjawhar:terraform-provider-iterative:feature/nfs-volume](https://github.com/iterative/terraform-provider-iterative/compare/master...sjawhar:terraform-provider-iterative:feature/nfs-volume).
+Look out for him speaking at a Meetup soon on this topic!
+
+Last year Sami presented at one of our
+[Office Hours meetups](https://www.youtube.com/watch?v=DxZdWq3Weng) on “What is
+an experiment?” More specifically he asked, at what level of granularity do you
+experiment and when do you share with your team? He shared great ideas, tips,
+and code in the session and spurred a great discussion with other community
+members. We look forward to the next Meetup!
+
+<summary>✨Image Inspo✨</summary>
+</details>
+
+<details>
+Our Community has grown and so has the monthly Heartbeat! To help you better navigate to the content you desire, use the following ToC:
+
+# Table of contents
+
+1. [From Greater AI/ML Community](#from-greater-aiml-community)
+   1. [Meta Is Building an AI to Fact-Check Wikipedia](#meta-is-building-an-ai-to-fact-check-wikipediaall-65-million-articles)
+   2. [EU AI Act](#european-ai-act)
+   3. [💗Pulse Check](#pulse-check)
+2. [Iterative Community News](#iterative-community-news)
+   1. [Story of GTO-based model registry](#francesco-calcavecchia---we-refused-to-use-a-hammer-on-a-screw-story-of-gto-based-model-registry)
+   2. [MLOps Course at University of Denmark](#mlops-course-at-the-technical-university-of-denmark-includes-dvc-and-cml)
+   3. [Made With ML MLOps Interactive Course](#goku-mohandas---made-with-ml-mlops-interactive-course)
+   4. [Lakera Review of DVC (video)](#adrià-romero---youtube-review-of-dvc)
+   5. [Reproducibility, Replicability, and Data Science](#sydney-firmin---reproducibility-replicability-and-data-science)
+   6. [Iterative xkcd lore](#iterative-xkcd-lore)
+3. [Company News](#company-news)
+   1. [We are eating our own dog food](#mlem-mlem-mlem-this-dog-food-is-good)
+   2. [New O'Reilly Course with Alex Kim](#alex-kim-oreilley-mlops-course)
+   3. [LATAM AI](#latam-ai)
+   4. [New Hires](#new-hires)
+   5. [Open Positions](#open-positions)
+   6. [New Blog posts](#new-blog-posts)
+   7. [Upcoming Conferences](#upcoming-conferences)
+4. [Tweet Love](#tweet-love)
+
+<summary>Table of Contents</summary>
+</details>
+
+As the summer fades and we get revved up to finish off the year, we start the
+September Heartbeat with some juicy food for thought AI topics.
+
+![Will Ferrell Lol GIF by NBA](https://media.giphy.com/media/kPtv3UIPrv36cjxqLs/giphy.gif)
+
+## From Greater AI/ML Community
+
+### Meta Is Building an AI to Fact-Check Wikipedia—All 6.5 Million Articles
+
+![Meta Fact-Checking Wikipedia](../uploads/images/2022-09-19/wikipedia.png 'Meta Fact-checking Wikipedia Ai :wrap-left =300')
+[**Vanessa Bates Ramirez**](http://twitter.com/vanessabramirez) writes
+[an article](https://singularityhub.com/2022/08/26/meta-is-building-an-ai-to-fact-check-wikipedia-all-6-5-million-articles/)
+in [Singularity Hub](https://singularityhub.com) about Meta's plans to
+fact-check Wikipedia. Under the premise of making Wikipedia more accurate,
+[Meta](https://about.facebook.com/?utm_source=meta.com&utm_medium=redirect), in
+conjunction with [Amazon Alexa.AI](https://www.amazon.science/tag/alexa) and
+[some University contributors](https://openreview.net/pdf?id=qfTqRtkDbWZ) is
+building an AI system trained on 4 million Wikipedia citations. The system
+architecture made up of retrieval and verification engines, cross references not
+only content, but specific figures to verify accuracy.
+
+They’ve built an index of web pages that are chunked into passages and then
+provide an accurate representation of the passage to train the model. Their aim
+is to more accurately capture meaning as opposed to word pattern. From
+[**Fabio Petroni**](https://twitter.com/Fabio_Petroni), Meta’s Fundamental AI
+Research tech lead manager:
+
+> [This index] is not representing word-by-word the passage, but the meaning of
+> the passage. That means that two chunks of text with similar meaning will be
+> represented in a very close position in the resulting n-dimensional space
+> where all these passages are stored.
+
+They hope to ultimately be able to suggest accurate sources and create a grading
+system on accuracy.
+[You can find a demo of the project, named Side, here](https://verifier.sideeditor.com/)
+to look at samples and go deeper into the research. They are looking for people
+to give feedback on the quality of the system.
+
+Vanessa brings up some great questions regarding this:
+
+> If you imagine a not-too-distant future where everything you read on Wikipedia
+> is accurate and reliable, wouldn’t that make doing any sort of research a bit
+> too easy? There’s something valuable about checking and comparing various
+> sources ourselves, is there not? It was a big leap to go from paging through
+> heavy books to typing a few words into a search engine and hitting “Enter”; do
+> we really want Wikipedia to move from a research jumping-off point to a
+> gets-the-last-word source?
+
+To these I’d add, what’s Meta’s/Amazon Alexa's monetary motivation to do this
+(because there always is one), and given past ethical infractions on Meta's part
+( [1,](https://link.springer.com/article/10.1007/s43681-021-00068-x)
+[2,](https://www.abc.net.au/triplej/programs/hack/facebook-whistleblower-says-instagram-content-hurts-teens/13573020)
+[3,](https://www.theguardian.com/news/2018/mar/17/cambridge-analytica-facebook-influence-us-election)
+[4,](https://www.buzzfeednews.com/article/craigsilverman/viral-fake-election-news-outperformed-real-news-on-facebook)
+and
+[5,](https://www.theatlantic.com/technology/archive/2014/06/everything-we-know-about-facebooks-secret-mood-manipulation-experiment/373648/))
+should we applaud this? Or is this collaboration with Universities a step in the
+right direction?
+
+### European AI Act
+
+![EU AI Act](../uploads/images/2022-09-19/eu.jpg 'EU AI Act :wrap-rightt =300')
+[**Kyle Wiggers**](https://twitter.com/Kyle_L_Wiggers) reports on the EU's AI
+Act and its potential ill effects on open source efforts in
+[this piece](https://techcrunch.com/2022/09/06/the-eus-ai-act-could-have-a-chilling-effect-on-open-source-efforts-experts-warn/)
+in [TechCrunch](https://techcrunch.com). The proposed new rules would require
+that open source developers adhere to guidelines across a spectrum of categories
+including risk management, data governance, technical documentation and
+transparency, standards and accuracy, and cyber security. Not a negligible list.
+
+The article covers critiques of the Act from
+[**Alex Engler**](https://www.brookings.edu/experts/alex-engler/) of think tank
+[Brookings](https://brookings.edu) through
+[this piece.](https://www.brookings.edu/blog/techtank/2022/08/24/the-eus-attempt-to-regulate-open-source-ai-is-counterproductive/)
+While [**Oren Etzioni**](https://twitter.com/etzioni), the founding CEO of the
+[Allen Institute for AI](https://allenai.org/) adds that such regulation could
+create an undue burden where only large tech companies could comply:
+
+> “Open source developers should not be subject to the same burden as those
+> developing commercial software. It should always be the case that free
+> software can be provided ‘as is’ — consider the case of a single student
+> developing an AI capability; they cannot afford to comply with EU regulations
+> and may be forced not to distribute their software, thereby having a chilling
+> effect on academic progress and on reproducibility of scientific results.”
+
+The article discusses some proponents to the Act, as well as alternative thought
+processes on the granularity of regulations (product vs. category, or downstream
+responsibility). Finally, it ends with some thoughts from Hugging Face CEO,
+[**Clément Delangue**](https://twitter.com/ClementDelangue) and his colleagues'
+comments on the vagueness and the problems that can arise out of this lack of
+clarity, including stifling competition and innovation. They also point out the
+growing Responsible AI initiatives such as AI licensing and model cards
+outlining the intended use of such open source technology as positives that are
+community-born.
+
+So does regulation stifle technology or provide guard rails?
+
+My colleague [**Rob de Wit**](https://www.linkedin.com/in/rcdewit/) would like
+to point out that similar concerns were raised when the EU introduced the GDPR
+in 2016, which has turned out to be of major importance to people's rights to
+privacy -- in the EU and worldwide.
+
+To what degree should AI technology be regulated? Where do you draw lines? It’s
+quite clear that it moves faster than lawmakers can keep up with and the
+potential for harm is well known at this point. We could say, as I believe, that
+reflection on the consequences should be baked into the building process.
+However, the reality in practice is that --despite best intentions-- the
+overarching push for better and faster often results in negative consequences
+that are only discovered after the fact.
+
+How do we incentivize reflecting on consequences in our processes? Would
+regulation force this? Make development slower, but necessarily force the social
+good work that must be done in the development of AI tech?
+
+What other industries have similar dilemmas and how do they handle it? The
+Hippocratic Oath has served medicine well for thousands of years.  
+[Do We Need a Hippocratic Oath for Artificial Intelligence Scientists?](https://ojs.aaai.org/index.php/aimagazine/article/view/15090)
+
+### Pulse Check
+
+We would love to hear (read) your thoughts on this! We are starting a “Pulse
+check” topic from the Heartbeat each month up for discussion in our Discord
+server in the General channel.
+[Come join the discussion!](https://discord.com/invite/dvwXA2N)
+
+![Heartbeat GIF](https://media.giphy.com/media/W5JywCYOCSP8VMiVZg/giphy.gif)
+
+## Iterative Community News
+
+### **Francesco Calcavecchia** - We refused to use a hammer on a screw: Story of GTO-based model registry
+
+[**Francesco Calcavecchia**](https://www.linkedin.com/in/francescocalcavecchia/)
+[wrote a piece](https://medium.com/@francesco.calcavecchia/we-refused-to-use-a-hammer-on-a-screw-story-of-a-gto-based-model-registry-c540ac5d129f)
+in [Medium](https://medium.com) about building a custom model registry with
+[GTO](https://github.com/iterative/gto).
+
+He acknowledges the main reasons for needing a model registry as:
+
+1. When you need model versioning
+2. When you need to promote or assign models to different stages
+3. When you need to establish production model governance
+
+Additionally, he finds registering the data analysis and model evaluation
+outputs into an artifact registry is necessary, and as such used GTO and DVC to
+accomplish this. He goes into more detail about why he chose GTO over MLFlow -
+essentially appreciating our UNIX philosophy that empowers agility over
+prescriptive methods that hamper your design choices. He notes:
+
+> **It is hard to think of something simpler than this. And simplicity is
+> beauty** ❤️
+
+He then discusses some things he found missing for his needs, such as using it
+in a production pipeline as opposed to committing models by hand. He discusses
+working on solutions to build the artifact registry, introduce new commands, and
+streamline the process for the `dvc push` remote storage secret requirements.
+Please join him in his contributions. We love to see where this is going! 🚀
+
+![DVC GTO Artifact Registry schematic](../uploads/images/2022-09-19/artifact-gto.jpeg '=800')
+_Francesco Calcavecchia's schematic for a proposed artifact registry with DVC
+and GTO
+([Source link](https://medium.com/@francesco.calcavecchia/we-refused-to-use-a-hammer-on-a-screw-story-of-a-gto-based-model-registry-c540ac5d129f))_
+
+### MLOps Course at the Technical University of Denmark includes DVC and CML
+
+![DTU MLOps Course Memes](../uploads/images/2022-09-19/dtu-mlops.jpeg 'DTU MLOps Course Meme :wrap-left =300')
+The [Technical University of Denmark (DTU)](https://www.dtu.dk/english) has
+included DVC and CML in its MLOps Course at the University. The lectures,
+slides, exercises, and code can be found in
+[this repo](https://github.com/SkafteNicki/dtu_mlops) from
+[**Nicki Skafte Detlefsen**](https://github.com/SkafteNicki), Postdoc in the
+section of Cognitive Systems at the University with a focus on generative models
+and geometrical deep learning. There are 10 sections covering:
+
+1. Getting started
+2. Organization and version control (find Git and DVC here)
+3. Reproducibility
+4. Debugging and logging
+5. Continuous X (find CML here)
+6. The Cloud
+7. Scalable applications
+8. Deployment
+9. Monitoring
+10. Extra Resources
+
+The materials are great and even include some funny memes. Isn't an open-source
+model amazing for learning? Cheers to DTU for including our tools and the open
+source sharing of these learning materials with the world!
+
+![DTU bad code comic](../uploads/images/2022-09-19/dtu-bad-code.jpeg '=800')
+_Good code review vs. Bad code review
+([Source link](https://github.com/SkafteNicki/dtu_mlops/blob/main/s2_organisation_and_version_control/S2.md))_
+
+### **Goku Mohandas** - Made With ML MLOps Interactive Course
+
+You likely already know of [**Goku Mohandas'**](https://github.com/GokuMohandas)
+wildly popular free course [Made with ML](https://madewithml.com/#mlops), which
+includes DVC. Knowing that it can be challenging to learn everything on your
+own, he is starting an interactive class beginning on October 1st. The deadline
+for application is September 25th.  
+[For more info find the details here.](https://madewithml.com/#interactive-course)
+
+![Goku Mohandas - Made with ML MLOps](../uploads/images/2022-09-19/made-with-ml.png '=800')
+_Goku Mohandas' Made with ML Interactive Course
+([Source link](https://madewithml.com/#mlops))_
+
+### **Adrià Romero** - YouTube review of DVC
+
+[**Adrià Romero**](https://www.linkedin.com/in/adriaromero/), Computer Vision
+Developer at [Lakera](https://www.lakera.ai/), has a regular tool review on
+tools that can make computer vision easier, and recently reviewed DVC. He does a
+demo of DVC pushing up to a Google Drive remote and goes over how to share
+DVC-tracked data. He then covers the data pipelines functionality that can be
+used for CI/CD pipelines and shows the benefits of tracking the versions of
+everything including data, models, pipelines, parameters, and experiments.
+Finally, he mentioned that our documentation is super clear and useful, which
+makes us very happy. 🦉Check out the review below.
+
+https://www.youtube.com/watch?v=DXlxr4sEnc0
+
+### **Sydney Firmin** - Reproducibility, Replicability, and Data Science
+
+![Sydney Firmin - Reproducibility, Replicability, and Data Science](../uploads/images/2022-09-19/the_difference.png ' :wrap-right =300')
+
+[**Sydney Firmin**](https://www.linkedin.com/in/sydney-f-4369a65b/) writes
+[a wonderful piece](https://www.kdnuggets.com/2019/11/reproducibility-replicability-data-science.html)
+in KD Nuggets outlining the replicability crisis, the importance of
+reproducibility in science in general and data science in particular. She
+highlights the growing awareness of irreproducible research due to technology's
+help to make all research better circulated. She encourages standardizing a
+paradigm of reproducibility in data science work to promote efficiency,
+accuracy, and to help your future self and colleagues check work and reduce
+bugs.
+
+Of course, she recommends DVC as a possible tool to help with this and notes,
+
+> fun fact, this is my second attempt at writing this post after my computer was
+> [bricked](<https://en.wikipedia.org/wiki/Brick_(electronics)>) last week. I am
+> now compulsively saving all of my work
+> in [the cloud](https://www.vox.com/2015/4/30/11562024/too-embarrassed-to-ask-what-is-the-cloud-and-how-does-it-work).
+
+Haven’t we all been there? 🙋🏻‍♀️ She goes on to describe other contributors to
+irreproducible results including p-hacking and discusses other methods in
+addition to tooling that can help, such as preventing overfitting and using a
+sufficiently large dataset, and team review. All this and some fun xkcd comics
+can be found in the post including [this one shown above](https://xkcd.com/242)!
+
+<details>
+
+Speaking of xkcd comics,
+[**Casper da Costa Luis**](https://github.com/casperdcl), CML Product Manger,
+loves xkcd and regularly regales us with the comics in our internal Slack. He is
+also an expert at TL;DRing (yes, I just made that a verb). Part of his process
+in this excellence is to
+“[suppress my latent desire to add a relevant xkcd comic](https://tldr.cdcl.ml).”
+As you can see, they do not appear every day. Self-discipline is a good thing.
+
+![Casper da Costa Luis and xkcd comics](../uploads/images/2022-09-19/casper-xkcd.png '=800')
+_Casper da Costa Luis' propensity for Slack slinging xkcd comics_
+
+<summary id="iterative-xkcd-lore">😄 Iterative xkcd Lore</summary>
+</details>
+
+## Company News
+
+![Happy Dog Food GIF by Diamond Pet Foods](https://media.giphy.com/media/ji6BdEco3I29DTXddx/giphy-downsized-large.gif)
+
+### MLEM, MLEM, MLEM, this dog food is good!
+
+So over the summer, you may have noticed that our blog has moved from the
+[DVC](https://dvc.org) website to the [Iterative](https://iterative.ai) website.
+This is because as we now have many more tools than DVC, we wanted to make a
+blog home for them all. In this transition, we have also changed our internal
+blog writing process from being just Git-dependent to Git- and DVC- dependent,
+such that the writing is in Git, but the images are versioned with DVC and
+stored in a remote. 🤗
+
+This admittedly may be like bringing a
+[CNC router](https://arclightcnc.com/product/cnc-router-kit) to a steak dinner
+(I feel like there should be a Myth Busters episode on this). **But** it will
+help both the DevRel team and the Websites team become intimately familiar with
+what our users feel when using our tools and potentially drive more feature
+improvements for you. In other words, we ❤️ you and we're really serious about
+making our tools better for you so you don't have to build them yourselves!
+
+![Ken Jeong Masked Singer GIF by FOX TV](https://media.giphy.com/media/wdA6Ql7ku32JZKXBFV/giphy.gif)
+
+### **Alex Kim** O'Reilly MLOps Course
+
+![Open-source MLOps in 4 weeks with Alex Kim](../uploads/images/2022-09-19/alex-oreilly.png 'Open Source MLOps in 4 weeks :wrap-left =300')
+[**Alex Kim**](https://twitter.com/alex000kim) is working with
+[O'Reilly](https://www.oreilly.com/) on a course entitled _Open-source MLOps in
+4 weeks_. Here is an outline of what you will be learning in the course which
+starts on November 8th and again on January 10th:
+
+- Week 1: Kick-starting an ML project
+- Week 2: ML pipelines and reproducibility
+- Week 3: Serving ML models as web API services
+- Week 4: CI/CD and monitoring for ML projects
+
+[Head here to sign up for the course](https://learning.oreilly.com/live-events/open-source-mlops-in-4-weeks/0636920080215/0636920080214/)
+
+### LATAM AI
+
+[**Gema Parreño Piqueras**](https://twitter.com/SoyGema) and our lead docs
+writer, [**Jorge Orpinel Perez**](https://twitter.com/JorgeOrpinel), got to
+experience [LATAM AI](https://www.latam-ai.com/) this year. Gema gave the talk
+_Reproducibility and version control are important: Follow-up experiments with
+the DVC extension for VS Code_. Both Gema and Jorge enjoyed the conference and
+meeting lots of people. Below you can see Gema with the winners of our DeeVee's
+Ramen Run Game. In the game, players have to roam DeeVee city answering
+questions to win Ramen and the highest place on the leaderboard. Get yourself to
+one of the conferences we are attending to play! See winners Miguel Moran
+Flores, Efren Bautista Linares and Rodofo Ferro below.
+
+![Efren Bautista Linares, Miguel Moran Flores, Rodolfo Ferro with Gema Parreño](../uploads/images/2022-09-19/latam-ai-winners.jpg '=800')
+_Winners of DeeVee's Ramen Run game with Gema, Left to Right: Efren Bautista
+Linares, Miguel Moran Flores, Gema Parreño Piqueras, and Rodolfo Ferro_
+
+### New Hires
+
+[**Ronan Lamy**](https://www.linkedin.com/in/ronan-lamy-84133612/) joins the DVC
+team from Bristol, UK. He has a Ph.D. in physics and had been working as an
+open-source contractor as core dev of PyPy and HPy before joining Iterative.
+When he's not working Ronan enjoys exploring the many fine restaurants and great
+local beers of Bristol. Originally from France, Ronan recently shared with me
+that his friends and family back home don't believe that the food can be so good
+in Bristol, but he insists it is. Add it to your bucket list! When in Bristol,
+Ronan has recommendations for you!
+
+[**Aleksei Shaikhaleev**](https://github.com/nimdraugsael) joins the Studio team
+as a backend developer. Originally from Russia, Aleksei has called Phuket,
+Thailand his home base for the last 10 years. When he's not working, he's really
+into surfing, skateboarding, motorcycles, and other fun activities like these.
+Aleksei also has a heart for rescuing cats, having adopted and caring for five
+stray cats at home!
+
+[**David Tulga**](https://www.linkedin.com/in/david-tulga-60b29410/) is our
+latest hire, and joins the LDB team from California as a Senior Software
+Engineer. He previously worked at Asimov and Freenome. When not working David
+enjoys a variety of outdoor activities such as Biking, Hiking, Kayaking,
+Sailing, and Astronomy.
+
+David's arrival marks the 4th David on the team, putting the name David in a
+three-way tie with versions of Daniel and Alexander! Indeed over 20% of our
+workforce is named David, Daniel, or Alexander. 😅
+
+## Open Positions
+
+[Use this link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22)
+to find details of all the open positions. Please share with anyone looking to
+have a lot of fun building the next generation of machine learning to production
+tools! 🚀 But don't apply if your name is David, Daniel, or Alexander. Unless
+you're willing to be nick-named, of course! It's getting confusing around here.
+😂
+
+![Iterative.ai is Hiring](../uploads/images/2022-09-19/hiring.jpeg '=800')
+_Iterative is Hiring
+([Source link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22))_
+
+## ✍🏼 New Blog posts {#new-blog-posts}
+
+- [**Rob de Wit**](https://www.linkedin.com/in/rcdewit/) created a tutorial for
+  using CML with [Bitbucket](https://bitbucket.org/), which CML now supports. Be
+  sure to read it if Bitbucket is your Git provider of choice!
+- [**Gema Parreño Piqueras'**](https://twitter.com/SoyGema)
+  [August Community Gems](https://dvc.org/blog/august-22-community-gems) is full
+  of great questions from the Community from our
+  [Discord server](https://discord.com/invite/dvwXA2N).
+
+## Upcoming Conferences
+
+Conferences we will be attending through the end of the year:
+
+- [**Dmitry Petrov**](https://twitter.com/FullStackML) and
+  [**Mike Sveshnikov**](https://github.com/mike0sv) will be giving a talk and
+  workshop on our GitOps approach to a Model registry at
+  [TWIML Con](https://twimlai.com/conf/twimlcon/2022/) on October 4-7 (On-line)
+- [**Dmitry Petrov**](https://twitter.com/FullStackML) will speak at
+  [ODSC West](https://odsc.com/california/) in San Francisco on November 1-3 on
+  the same topic
+- [**Rob de Wit**](https://www.linkedin.com/in/rcdewit/) will be speaking at
+  [Deep Learning World](https://deeplearningworld.de/) - Berlin, October 5-6
+  with the talk _Becoming a Pokémon Master with DVC: Experiment Pipelines for
+  Deep Learning Projects_
+- [**Casper da Costa Luis**](https://cdcl.ml/) will be giving the talk _Painless
+  cloud orchestration without leaving your IDE_ at
+  [MLOps Summit - Re-work](https://www.re-work.co/events/mlops-summit-2022) -
+  London, November 8-9
+- [**Dmitry Petrov**](https://twitter.com/FullStackML) will be speaking at
+  [GitHub Universe](https://www.githubuniverse.com/) on November 9-10 with the
+  talk _Connecting Machine Learning with Git: ML experiment tracking with
+  Codespaces_!
+- Finally, we will be participating in
+  [Toronto Machine Learning Summit](https://www.torontomachinelearning.com/) -
+  November 29-30 in Toronto, talks TBD
+
+  ## ❤️ Tweet Love {#tweet-love}
+
+  We loved finding DVC and CML used for benchmarking and reporting at
+  [Huggingface](https://huggingface.co) thanks to the tip-off from
+  [Omar Sanseviero](https://twitter.com/osanseviero)! Look out for more projects
+  involving Hugginface and our tools coming soon!
+
+https://twitter.com/SoyGema/status/1567824457296642048?s=20&t=7f4KT9cRzEhrcgXu3qQAlw
+
+---
+
+_Have something great to say about our tools? We'd love to hear it! Head to
+[this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+to record or write a Testimonial! Join our
+[Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2022-1-18-January-22-heartbeat.md b/content/blogs/2022-1-18-January-22-heartbeat.md
new file mode 100644
index 0000000000..19db9da8a8
--- /dev/null
+++ b/content/blogs/2022-1-18-January-22-heartbeat.md
@@ -0,0 +1,347 @@
+---
+title: January '22 Heartbeat
+date: 2022-01-18
+description: >
+  Monthly updates are here! You will find great tutorials and workflows from  
+  the Community, Online course is now open, decision making strategies
+  for  MLOps tools, and more! Welcome to 2022!🎇
+
+descriptionLong: |
+  This month you will find:
+    
+    🥰 Tutorials and workflows from the Community,
+
+    🗣 Upcoming Events,
+
+    📰 Data Science and AI News,
+
+    🧐 MLOps tool decision strategies,
+
+    😎 GitHub Awesomeness,
+
+    💻 Online Course is live,
+
+    🚀 Info on our growing team, and more!
+picture: 2022-01-18/heartbeat-january.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/january-22-heartbeat/1025
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - DataChain Studio
+  - Heroku
+  - Reproducibility
+  - Stateful training
+  - SQL
+  - Git
+---
+
+# From the Community
+
+Happy New Year! Hope you got some good rest and stayed healthy at the end of
+2021, because 2022 has lots of great things in store!
+
+![Heartbeat!](https://media.giphy.com/media/7ILAGpJWoQYWA0j60C/giphy.gif)
+
+## Diego Jardim - MLOps: A Complete Hands-On Introduction
+
+[In Part 1](https://poatek.com/2021/12/20/mlops-a-complete-and-hands-on-introduction-part-1/)
+of his two-part series,
+[**Diego Jardim**](https://www.linkedin.com/in/diegosevero/) of
+[Poatek](https://poatek.com/) takes us through the basics of MLOps and the
+stages of implementation and maturity of an MLOps pipeline. He closes by
+introducing us to some tools to help a team progress through these stages, which
+include DVC and CML.
+
+[In Part 2](https://poatek.com/2021/12/29/mlops-a-complete-and-hands-on-introduction-part-2/)
+he delves into more detail and code on how to set up version control of
+everything with DVC as well as automation of experimentation and reporting with
+CML. Finally, he uses FastAPI and Heroku for model serving and deployment. You
+can find all the scripts for the project in
+[this GitHub repository.](https://github.com/dsjardim/fraud-detection-mlops)
+
+<external-link 
+href="https://poatek.com/2021/12/29/mlops-a-complete-and-hands-on-introduction-part-2/"
+title="MLOps: A Complete Hands-On Tutorial"
+description="In his 2-part series, Diego Jardim of Paotek introduces concepts and stages of MLOps and provides a tutorial on how to create an MLOps pipeline."
+link="https://poatek.com/"
+image="../uploads/images/2022-01-18/diego-jardim.png"/>
+
+## Carl W. Handlin Wallace - Reproducible Data Science and Why it Matters
+
+[**Carl W. Handlin Wallace**](https://www.linkedin.com/in/carlhandlin/) of
+[RappiBank](https://www.rappibank.pe/) wrote a
+[great article](https://medium.com/rappibank/reproducible-data-science-and-why-it-matters-e4e62fd60b9a/)
+for their company [Medium](https://medium.com/) profile on the importance of
+reproducibility, AKA replicability, in science in general, and the challenges in
+Data Science in particular. As he points out, from
+[Nature's survey,](https://doi.org/10.1038/533452a) over half of all researchers
+have failed to reproduce even their own work, let alone that of another
+scientist. While initiatives like
+[Papers With Code](https://paperswithcode.com/) are helping to encourage
+reproducibility in the industry, there's still work to be done. He notes DVC as
+a part of the solution to this problem along with other tools to round out the
+whole picture. Check out the article for good food for thought and other
+resources!
+
+![Proposed Reproducibility Framework for Data Science](../uploads/images/2022-01-18/carl-handlin-rappibank.png '=800')
+
+_Carl W. Handlin Wallace's Proposed Reproducibility Framework for Data Science
+([Source link](https://medium.com/rappibank/reproducible-data-science-and-why-it-matters-e4e62fd60b9a/))_
+
+## Abid Ali Awan - Tips & Tricks of Deploying Deep Learning Webapp on Heroku Cloud
+
+![DVC Heroku Integration](../uploads/images/2022-01-18/abid-ali-awan.png 'Heroku Hidden Tricks :wrap-right ==450')
+
+[**Abid Ali Awan**'s](https://www.linkedin.com/in/1abidaliawan/)
+[article in KDNuggets](https://www.kdnuggets.com/2021/12/tips-tricks-deploying-dl-webapps-heroku.html)  
+guides
+you on how to create a smooth process to deploy a deep learning web application
+with Heroku. In the guide, he covers integration with DVC and optimizing storage
+using Docker, Git & CLI-based deployment, how to deal with error code H10, and
+tweaking Python packages to stay within the 500 MB Heroku limitation. If you've
+been looking for a way to create a deep learning web app, this may help!
+
+## Amit Kulkarni - Overview of MLOps with Open Source Tools
+
+In the very
+[**FIRST** tutorial of DVC Studio](https://www.analyticsvidhya.com/blog/2022/01/overview-of-mlops-with-open-source-tools/)
+from the Community,
+[**Amit Kulkarni**](http://www.linkedin.com/in/amitvkulkarni2) reviews the set
+up process of DVC Studio and MLFlow and their ability to ease the operational
+aspects of machine learning teams by providing a clear way to solve the
+formidable task of tracking all the factors that go into the iterative process.
+Amit covers the easy setup process, adding a view, model comparison, and running
+experiments from the DVC Studio UI.
+
+![DVC Studio Experiment Tracker UI](../uploads/images/2022-01-18/amit-kulkarni-studio.png '=800')
+_Amit Kulkarni's DVC Studio tutorial
+([Source link](https://www.analyticsvidhya.com/blog/2022/01/overview-of-mlops-with-open-source-tools/))_
+
+# GitHub Goodness 😎
+
+![Will Ferrell Reaction GIF](https://media.giphy.com/media/3ohzdIuqJoo8QdKlnW/giphy.gif)
+
+In case you missed it we now have an
+[Awesome Iterative Projects Repository.](https://github.com/iterative/awesome-iterative-projects)
+This repository is a list of projects relying on Iterative tools to achieve
+awesomeness. Recent additions to the list include:
+
+- [zincware/ZnTrack](https://github.com/zincware/ZnTrack): Create, visualize,
+  run & benchmark DVC pipelines in Python & Jupyter notebooks.
+- [nvim-dvc](https://github.com/gennaro-tedesco/nvim-dvc): Neovim plugin for
+  DVC.
+
+We'd love to see more of the Community's awesome work added to this list. Feel
+free to submit your project!
+
+Other repos that came across my radar this last month that may be of interest to
+our Community:
+
+- [An Awesome List of Awesomes](https://github.com/Nachimak28/awesome-list-of-awesomes):
+  an aggregation of all the Awesome lists
+- [Awesome MLOps](https://github.com/visenger/awesome-mlops): an awesome list of
+  references for MLOps.
+- [Project Atlas - São Paulo](https://github.com/mateuspicanco/project-atlas-sao-paulo)
+  : a Data Science and Engineering initiative that aims to develop relevant and
+  curated Geospatial features of São Paulo, Brazil (includes DVC).
+- [NN Template](https://github.com/lucmos/nn-template): Generic template to
+  bootstrap your PyTorch project (includes DVC)
+
+# Deciding on MLOps tools?
+
+![Think Season 2 GIF by Portlandia](https://media.giphy.com/media/3ohjUZZEFfWJfaeKUE/giphy.gif)
+
+[Last month](https://media.giphy.com/media/3ohjUZZEFfWJfaeKUE/giphy.gif) I told
+you about Thoughtworks' guide to MLOps Platforms. If you prefer video content,
+you may like
+[this webinar](https://www.thoughtworks.com/what-we-do/data-and-ai/cd4ml/guide-to-evaluating-mlops-platforms1?utm_source=linkedin&utm_medium=social-organic&utm_campaign=tw-webinars_2021-12&gh_src=463a2f181us)
+from [**Ryan Dawson**](https://www.linkedin.com/in/ryan-dawson-501ab9123/) on
+CD4ML covering the process of identifying the best tools for your team's needs.
+
+![MLOPs Tool evaluation process](../uploads/images/2022-01-18/ryan-dawson-thoughtworks-cd4ml.png '=800')
+_Ryan Dawson's MLOps tool evaluation process
+([Source link](https://www.thoughtworks.com/what-we-do/data-and-ai/cd4ml/guide-to-evaluating-mlops-platforms1?utm_source=linkedin&utm_medium=social-organic&utm_campaign=tw-webinars_2021-12&gh_src=463a2f181us))_
+
+[**Dean Pleban**,](https://www.linkedin.com/in/deanpleban/) CEO of
+[DAGsHub,](https://dagshub.com) also gave a great talk on a decision making
+framework for deciding on your tools in his presentation at
+[DevOpsDays Tel Aviv](https://devopsdays.org/events/2021-tel-aviv/welcome/). In
+this talk you will learn guidelines and mental models that will help you choose
+tools in whatever stage of the process you are in.
+
+https://youtu.be/XLc733qO2lE
+
+## In Other Data Science and AI News
+
+### Rob Toews AI Predictions in Forbes
+
+[**Rob Toews**](https://www.twitter.com/_RobToews) wrote
+[10 AI Predictions for 2022](https://www.forbes.com/sites/robtoews/2021/12/22/10-ai-predictions-for-2022/?sh=559c4c8d482d)
+for [Forbes.](https://forbes.com) In it he predicts more startups getting funded
+in NLP than any other category, reinforcement learning to become increasingly
+important, the rise of synthetic data, and powerful new AI tools being built for
+video. My favorite prediction:
+
+> Responsible AI' will begin to shift from a vague catch-all term to an
+> operationalized set of enterprise practices."  
+> That's good news!
+
+<external-link 
+href="https://www.forbes.com/sites/robtoews/2021/12/22/10-ai-predictions-for-2022/?sh=559c4c8d482"
+title="10 AI Predictions for 2022"
+description="Rob Toews predicts the rise of NLP, reinforcement learning, operationalized responsible AI and more."
+link="https://forbes.com"
+image="../uploads/images/2022-01-18/forbes.jpeg"/>
+
+### Chip Huyen's Latest Blog Post
+
+You may remember [**Chip Huyen**](https://twitter.com/chipro) from
+[MLOps Tooling Landscape v2](https://huyenchip.com/2020/12/30/mlops-v2.html) and
+[DVC's inclusion in her Machine Learning Systems Design Lecture series](https://docs.google.com/presentation/d/15ZrLFzimfy-8ob7mJ0qHPNyVoTtSfKBF5gPPG5f0Lz8/edit#slide=id.p).
+But at the turn of the new year, she published a new blog post entitled
+[Real-time machine learning: Challenges and Solutions.](https://huyenchip.com/2022/01/02/real-time-machine-learning-challenges-and-solutions.html)
+The article describes her learning from working with approximately 30 companies
+in different industries doing real-time machine learning. She describes the
+online prediction processes of batch prediction and streaming prediction.
+
+Additionally she discusses continual learning and the difference between
+stateless retraining (the model is trained from scratch each time), and stateful
+training (the model continues training on new data) and moving from a manual
+process to a more automated one. Definitely worth a read and we believe DVC and
+CML can help you with your stateful training!
+
+She and her team are running a [survey](https://forms.gle/dDvgF7QgpPdvJE5b8) to
+better understand the adoption and challenges of real-time ML. We enourage your
+participation!
+
+![Stateful Training](../uploads/images/2022-01-18/stateful-training.png '=800')
+_Chip Huyen's Stateless vs.Stateful Training
+([Source link](https://huyenchip.com/2022/01/02/real-time-machine-learning-challenges-and-solutions.html))_
+
+### Vicki Boykis' Top three Fundamental Tools for a Machine Learning Engineer
+
+![Git, SQL, CLI](../uploads/images/2022-01-18/git-sql-cli.jpeg 'Git, SQL, CLI :wrap-left ==300')
+If you're interested in becoming a machine learning engineer and you're not
+familiar with [**Vicki Boykis**,](https://twitter.com/vboykis) you should be.
+She has an amazing blog with years of well-written, funny, technical content on
+machine learning. Her latest piece entitled
+[Git, SQL, CLI](https://vickiboykis.com/2022/01/09/git-sql-cli/) tells why she
+thinks these three tools are fundamental tools for any technical job. We think
+so too.
+
+# DVC News
+
+## Our Online Course is Live! 🎉
+
+You can register for the FREE new course
+[here on the Iterative website](https://learn.iterative.ai). The course is
+currently in beta mode. We already have some things we are working on to make it
+even better, but we would love your feedback! 🙏🏼 So far we have had some minor
+glitches and a lot of positive feedback! But we want your critiques too!
+
+**Whoever can give us feedback on any three modules by February 6th will receive
+some fresh new swag!**
+
+We are already planning our next course!
+
+## Experiment Versioning piece in KDNuggets
+
+Our Senior Developer Advocate
+[**Maria Khalusova**](https://twitter.com/mariaKhalusova) wrote a tutorial piece
+on `exp init` and experiment versioning entitled
+[Versioning Machine Learning Experiments vs Tracking Them.](https://www.kdnuggets.com/2021/12/versioning-machine-learning-experiments-tracking.html)
+The command helps you quickly set up a pipeline and codify your experiments with
+all of the factors that contributed to each of them, including data, code,
+pipeline, model version and all hyperparameters. This is a step above other
+experiment tracking tools and enables you to achieve true reproducibility.
+
+<external-link 
+href="https://www.kdnuggets.com/2021/12/versioning-machine-learning-experiments-tracking.html"
+title="Versioning Machine Learning Experiments vs Tracking Them"
+description="Maria Khalusova's tutorial on DVC's `exp init` command and the next level of experiment tracking that delivers true reproducibility."
+link="https://kdnuggets.com"
+image="../uploads/images/2022-01-18/kdnuggets.jpeg"/>
+
+## New Team Members
+
+We have a few new team members this month!
+
+[**Daniele Trifirò**](https://github.com/dtrifiro) is our first team member from
+Italy! He joins us as a Senior Software Engineer. Daniele has a background in
+Physics/Astrophysics and worked for 4 years as a researcher in the LIGO
+Scientific collaboration and then went on to positions at Cloudian and illimity.
+It was at illimity where he "fell in love" with DVC! In his free time Daniele
+likes listening to and sometimes playing music himself, as well as rock
+climbing. 🧗🏼‍♂️
+
+[**Thomas Kunwar**](https://github.com/yathomasi) is a software engineer joining
+the team from Nepal. He's been working as a fullstack developer specializing in
+the MERN stack and has lead a team on multiple projects. In his free time Thomas
+enjoys trekking, watching and playing sports, watching movies, and learning.
+Welcome Thomas! 👏🏼
+
+[**Madhur Tandon**](https://github.com/madhur-tandon) joins our team as a
+Software Engineer from Delhi, India. He is active in open source and some of his
+famous contributions are to projects such as Pyodide (the Python Scientific
+Stack compiled to WebAssembly) and Jupyterlite (a Jupyter distribution running
+in the browser). He has also been a speaker in PyData and JupyterCon. Talk to
+him about his solo trip to SF, his experiences at Mozilla or about books, Indian
+governance, food, and crypto. When not working, he is working out!💪🏼
+
+## Open Positions
+
+Even with these amazing new additions to the team, we're still hiring!
+[Use this link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22)
+to find details of all the positions and share with anyone you think may be
+interested! 🚀
+
+![Iterative.ai is Hiring](../uploads/images/2022-01-18/hiring.jpeg '=800')
+_Iterative is Hiring
+([Source link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22))_
+
+## Upcoming Events
+
+### January Office Hours!
+
+Be sure to join us at the
+[January Office Hours Meetup,](https://www.meetup.com/DVC-Community-Virtual-Meetups/events/282663146/)
+where [**Gennaro Todesco**,](https://www.linkedin.com/in/gennarotedesco/) Senior
+Data Scientist at [Billie.io,](https://www.billie.io/) will present his workflow
+with DVC and CML. [**Tezan Sahu**,](https://www.linkedin.com/in/tezan-sahu/)
+will follow presenting a workflow from a series of tutorials that we shared from
+him in the
+[September Heartbeat,](https://dvc.org/blog/september-21-dvc-heartbeat)
+including DVC, PyCaret, MLFlow and FastAPI.
+
+<external-link
+href="https://www.meetup.com/DVC-Community-Virtual-Meetups/events/282663146/"
+title="January Office Hours Meetup - 2 workflows"
+description="RSVP for DVC Office Hours - 2 Workflows with integrations including Neovim, PyCaret, MLFlow and FastAPI!"
+link="https://meetup.com"
+image="../uploads/images/2021-12-15/office-hours-meetup.png"/>
+
+### Milecia Mc Gregor at Conf 42
+
+![Conf42](../uploads/images/2022-01-18/Conf42.png 'Milecia McGregor at Conf42 :wrap-left ==375')
+
+Don't miss [**Milecia McGregor**](https://twitter.com/FlippedCoding) at the
+upcoming
+[Conf42](https://www.conf42.com/Python_2022_Milecia_McGregor_reproducible_experiments_better_ml_models)
+on January 27th! She will be presenting her talk on Using Reproducible
+Experiments To Create Better Machine Learning Models. If you haven't caught this
+talk yet, now's the time!
+
+---
+
+_Have something great to say about our tools? We'd love to hear it! Head to
+[this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+to record or write a Testimonial! Join our
+[Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2022-10-04-dvc-hydra-integration.md b/content/blogs/2022-10-04-dvc-hydra-integration.md
new file mode 100644
index 0000000000..9d5b2e9ced
--- /dev/null
+++ b/content/blogs/2022-10-04-dvc-hydra-integration.md
@@ -0,0 +1,184 @@
+---
+title: DVC and Hydra integration
+date: 2022-10-04
+description: >
+  Use Hydra and DVC in the same project and benefit from the best of both tools.
+descriptionLong: |
+  How to use Hydra and DVC in the same project has been a recurrent question in our community for a while.
+  We decided to tackle this by providing a deeper integration and allowing users to benefit from the best of both tools.
+picture: 2022-10-04/header.png
+pictureComment:
+  A mixture of DeeVee and Hydra, according to [stable
+  diffusion](https://huggingface.co/spaces/stabilityai/stable-diffusion)
+authors:
+  - diglesia
+  - gema_parreno
+  - dave_berenbaum
+tags:
+  - DVC
+  - Hydra
+  - Release
+---
+
+[Hydra](https://hydra.cc/) has become one of the most popular tools for managing
+the configuration of research projects and complex applications, given its
+ability for composing and overwriting configuration both from the command line
+and from files.
+
+These features are a great complement to many of the values provided as part of
+DVC:
+[data versioning](https://dvc.org/doc/start/data-management/data-versioning),
+[data pipelines](https://dvc.org/doc/start/data-management/data-pipelines), and
+[experiment management](https://dvc.org/doc/start/experiment-management/experiments).
+
+Therefore, we decided to tackle this by providing a deeper integration: using
+Hydra internals inside DVC and allowing users to benefit from the best of both
+tools.
+
+In this post, we are going to provide an overview of the benefits that users of
+both tools can get from the integration.
+
+# What DVC users gain from the integration
+
+## Use Hydra composition to configure DVC experiments
+
+<video controlslist="nodownload" preload="metadata" autoplay muted loop style="width:100%;"><source src="../uploads/images/2022-10-04/deevee-band.mp4" type="video/mp4"/>
+Your browser does not support the video tag. </video>
+
+DVC didn’t provide a way of composing configuration from multiple sources, which
+can be very convenient in several use cases, like switching between different
+model architectures. The Hydra docs provide a great overview of
+[common patterns](https://hydra.cc/docs/patterns/configuring_experiments/) where
+this composition is useful.
+
+DVC can now use Hydra Composition to configure entire DVC pipelines and run DVC
+experiments.
+
+You can learn more about this feature on the
+[Hydra Composition](https://dvc.org/doc/user-guide/experiment-management/hydra-composition)
+page of the user guide.
+
+## Appending and removing parameters on the fly
+
+DVC supported a limited functionality for modifying configuration using
+`exp run --set-param`.
+
+`--set-param` can now be used with
+[Hydra’s Basic Override syntax](https://hydra.cc/docs/advanced/override_grammar/basic/)
+supporting new operations like _Appending_ and _Removing_ parameters for
+arbitrary parameter files.
+
+When Hydra’s composition is enabled, the same syntax can be used to override
+values in the
+[Config Groups](https://hydra.cc/docs/tutorials/basic/your_first_app/config_groups/)
+and
+[Defaults list](https://hydra.cc/docs/tutorials/basic/your_first_app/defaults/).
+
+```dvc
+# Append new param
+$ dvc exp run -S '+trainer.gradient_clip_val=0.001'
+# Remove existing param
+$ dvc exp run -S '~model.dropout'
+# Target arbitrary files
+$ dvc exp run -S 'train_config.json:+train.weight_decay=0.001'
+# Modify the defauls list
+$ dvc exp run --set-param 'train/model=efficientnet'
+```
+
+## Grid Search of parameters
+
+DVC `exp run` only supported
+[queuing](https://dvc.org/doc/user-guide/experiment-management/running-experiments#the-experiments-queue)
+a single experiment at a time.
+
+`exp run --set-param` can now use Hydra's
+[Choice](https://hydra.cc/docs/advanced/override_grammar/extended/#choice-sweep)
+and
+[Range](https://hydra.cc/docs/advanced/override_grammar/extended/#range-sweep)
+syntax for adding multiple experiments to the queue and performing a grid
+search:
+
+```dvc
+$ dvc exp run -S 'model.learning_rate=range(0.01, 0.5, 0.01)' --queue
+Queueing with "{'params.yaml': ['model.learning_rate=0.01']}".
+Queued experiment '84e89be' for future execution.
+Queueing with "{'params.yaml': ['model.learning_rate=0.02']}".
+Queued experiment 'd7708fa' for future execution.
+Queueing with "{'params.yaml': ['model.learning_rate=0.03']}".
+Queued experiment '5494d5c' for future execution.
+Queueing with "{'params.yaml': ['model.learning_rate=0.04']}".
+Queued experiment '2e16c1f' for future execution.
+Queueing with "{'params.yaml': ['model.learning_rate=0.05']}".
+Queued experiment '7c7a615' for future execution.
+
+$ dvc queue start
+```
+
+# What Hydra users gain from the integration
+
+## Git-based versioning and caching
+
+Hydra relies on
+[folder-based versioning](https://hydra.cc/docs/configure_hydra/workdir/) for
+managing multiple runs.
+
+By using the DVC and Hydra integration, you can version the runs using
+[DVC experiments](https://dvc.org/doc/user-guide/experiment-management),
+enabling a more
+[git-friendly](https://dvc.org/doc/user-guide/experiment-management/persisting-experiments)
+workflow and adding
+[caching](https://dvc.org/doc/user-guide/experiment-management#run-cache-automatic-log-of-stage-runs)
+capabilities so runs won’t be unnecessarily recomputed.
+
+## Multi-step pipelines and Language Agnostic
+
+Hydra's scope is limited to a single **Python script** wrapped with the
+`@hydra.main` decorator.
+
+By using the
+[DVC and Hydra integration](https://dvc.org/doc/user-guide/experiment-management/hydra-composition),
+you can use Hydra to configure entire
+[DVC pipelines](https://dvc.org/doc/start/data-management/data-pipelines), which
+can be composed of **multiple** **stages** running **arbitrary** **commands.**
+
+```yaml
+stages:
+  featurize:
+    cmd: python src/featurization.py data/prepared data/features
+    deps:
+      - data/prepared
+      - src/featurization.py
+    params:
+      - featurize.max_features
+      - featurize.ngrams
+    outs:
+      - data/features
+  train:
+    cmd: python src/train.py data/features model.pkl
+    deps:
+      - data/features
+      - src/train.py
+    params:
+      - train.min_split
+      - train.n_est
+    outs:
+      - model.pkl
+```
+
+```dvc
+$ dvc exp run -S 'featurize.max_features=200' -S 'train.n_est=100'
+Running stage 'featurize':
+> python src/featurization.py data/prepared data/features
+
+Running stage 'train':
+> python src/train.py data/features model.pkl
+```
+
+# Conclusion
+
+Starting with DVC `2.25.0`, you can use the features described in this post to
+efficiently combine Hydra and DVC in your projects.
+
+To get a deeper understanding of all the parts involved, you can read the
+[Hydra Composition](https://dvc.org/doc/user-guide/experiment-management/hydra-composition)
+page of the DVC user guide.
diff --git a/content/blogs/2022-10-11-iterative-x-hacktoberfest-2022.md b/content/blogs/2022-10-11-iterative-x-hacktoberfest-2022.md
new file mode 100644
index 0000000000..c6fe0466dd
--- /dev/null
+++ b/content/blogs/2022-10-11-iterative-x-hacktoberfest-2022.md
@@ -0,0 +1,115 @@
+---
+title: Iterative x Hacktoberfest 2022
+date: 2022-10-11
+description: >
+  Hacktoberfest is already started and waiting for new contributors!
+descriptionLong: >
+  It is the 9th season of Hacktoberfest, and we are so excited to announce we
+  are taking part in it! This year we’re upping the game with our cool stickers,
+  special edition T-shirts, and next-level merch. Have fun, learn and
+  contribute!
+picture: 2022-10-11/hacktoberfest.png
+pictureComment:
+
+authors:
+  - mert_bozkir
+tags:
+  - Hacktoberfest
+  - DVC
+  - CML
+  - MLEM
+  - GTO
+  - Company
+---
+
+Hacktoberfest is DigitalOcean’s annual event that encourages people to
+contribute to open source throughout October. Hacktoberfest is all about giving
+back to the community by contributing to open-source projects. The main point of
+Hacktoberfest is encouraging new open-source contributors whether you’re a
+seasoned contributor or looking for projects to contribute to for the first
+time, you’re welcome to participate!
+
+## What is Iterative
+
+Iterative is a remote-first team on a mission to solve the complexity of
+managing datasets, ML Infrastructure, and ML model lifecycle management. It was
+started in 2018 by a data scientist and an engineer to fill in the gaps in the
+machine learning to production. Presently Iterative is growing pretty fast,
+adoption of the Iterative tools has significantly increased, and we have our
+contributors to thank (more than 300 in both code and docs) for developing open
+source projects such as DVC, CML, and MLEM with us.
+
+<p align="center">
+  <img src="https://media.giphy.com/media/wIVA0zh5pt0G5YtcAL/giphy.gif" alt="animated" />
+</p>
+
+## Quick Start
+
+- Sign up for Hacktoberfest [here](https://hacktoberfest.com/auth/)
+- Find all the Hacktoberfest issues
+  [here](https://github.com/search?o=desc&q=org%3Aiterative+label%3Ahacktoberfest&s=comments&state=open&type=Issues)
+- Read the contribution guideline ([DVC](https://dvc.org/doc/contributing/core),
+  [CML](https://cml.dev/doc/contributing/core),
+  [MLEM](https://mlem.ai/doc/contributing/core))
+- Join our [Hacktoberfest Discord channel](https://discord.gg/5j3uvSnzXb) and
+  ask any questions
+- Create a pull request on the related GitHub repository
+
+## How to Participate
+
+The most exciting part about being involved in the open-source community is that
+no matter how small or big your contributions are, the community will welcome
+your efforts and collaborate with you positively, sharing feedback and
+expressing gratitude.
+
+If you haven’t started your Hacktoberfest challenge yet, it is just the right
+time; you have 4 weeks left to submit PRs and get your swag! Here are some
+important details:
+
+- Hacktoberfest is open to everyone in the global community
+- You can sign up anytime between October 1 and October 31. Make sure to sign up
+  on the [official Hacktoberfest website](https://hacktoberfest.com/) for your
+  PRs to count
+- Pull requests can be made in
+  any [GitHub](https://github.com/topics/hacktoberfest) project that’s
+  participating in Hacktoberfest (look for the “Hacktoberfest” topic)
+- Project maintainers must accept your pull/merge requests for them to count
+  toward your total
+- Have 4 pull/merge requests accepted between October 1 and October 31 to
+  complete Hacktoberfest
+
+And the special addition from the Iterative team:
+
+- Look through the list
+  of [Iterative Hacktoberfest tickets](https://github.com/search?o=desc&q=org%3Aiterative+label%3Ahacktoberfest&s=comments&state=open&type=Issues).
+- Make a PR to repositories and get our stickers.
+- Close two issues for Iterative and get a special edition T-shirt.
+
+### Important Rules
+
+- Your pull/merge requests must be within the bounds of Hacktoberfest
+- Your pull/merge requests must not be spammy
+- Your pull/merge requests must be in a repo tagged with the “Hacktoberfest”
+  topic, or be labeled as “Hacktoberfest-accepted”
+- Your pull/merge requests must not be labeled as “invalid”
+- Avoid submitting low-quality pull/merge requests. More details can be found
+  [here](https://hacktoberfest.com/participation/#:~:text=AVOID%20SUBMITTING%20LOW%2DQUALITY%20PULL/MERGE%20REQUESTS.)
+
+At Iterative our mission is to deliver the best developer experience for machine
+learning teams by creating an ecosystem of open, modular ML tools. Our tools are
+built for developers, by developers and we need help from the global -
+open-source community - to deliver this mission!
+
+For all of us who have a heart for open source — let’s discuss, contribute,
+learn, take the technologies forward and build something great together!
+
+Happy hacking!
+
+<p align="center">
+  <img src="https://media.giphy.com/media/LcfBYS8BKhCvK/giphy.gif" alt="animated" />
+</p>
+
+---
+
+We are happy to hear from you [here](https://dvc.org/support).
+Our [DMs on Twitter](https://twitter.com/DVCorg) are always open, too!
diff --git a/content/blogs/2022-10-20-october-heartbeat.md b/content/blogs/2022-10-20-october-heartbeat.md
new file mode 100644
index 0000000000..f6a800f326
--- /dev/null
+++ b/content/blogs/2022-10-20-october-heartbeat.md
@@ -0,0 +1,305 @@
+---
+title: October '22 Heartbeat
+date: 2022-10-20
+description: >
+  Monthly updates are here! Andrew Ng on Democratizing AI with a Data Centric
+  approach, White House Blueprint for AI Bill of Rights, loads of content from
+  the community, Nadia Nahar Meetup video and more!  Welcome to October!
+
+descriptionLong: |
+  This month you will find:
+
+    🎙 Andrew Ng Intel Keynote talk,
+
+    🇺🇸 White House Blueprint for AI Bill of Rights,
+    
+    🧐 CML in research,
+
+    🎥 Nadia Nahar video: Collaboration Challenges in ML-Enabled Systems,
+
+    🐉 DVC-Hydra integration,
+
+    🗣 CI/CD for Machine Learning upcoming webinar,
+
+    🚀 New hire, and more!
+picture: 2022-10-20/october-cover.png
+authors:
+  - jeny_defigueiredo
+  - rob_dewit
+commentsUrl: https://discuss.dvc.org/t/heartbeat-october-22/1367
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - MLEM
+  - AI Bill of Rights
+  - Andrew Ng
+  - Hydra
+  - ODSC
+---
+
+Welcome to October! As the days grow shorter or longer depending on your
+hemisphere, we bring you the latest and greatest from the Iterative Community.
+
+# In AI News
+
+## Andrew Ng at Intel's Innovation Conference - Democratizing AI through Data-Centric AI
+
+https://youtu.be/G3MaIMrR6Ms
+
+At
+[Intel’s Innovation](https://www.intel.com/content/www/us/en/events/on-event-series/innovation.html)
+conference, [**Andrew Ng**](https://www.linkedin.com/in/andrewyng/) gave a
+keynote on democratizing AI. He posits that while large companies have embraced
+AI, most smaller companies outside of the consumer-based domains still struggle.
+He provides two main reasons for this: small datasets and customization.
+
+According to Ng, data-centric AI will be the key to unlocking that potential,
+forcing a paradigm shift away from code-centric AI. In this scenario, people
+could take mostly ready-built ML tech and focus on the data to ensure it
+captures all necessary domain knowledge.
+
+For example, two companies that produce cornflakes and medication could take the
+same ML model and train it on their respective datasets. As long as they have
+the right tools and practices and provide a domain representative dataset, the
+same model can reproduce effective results. If you want to see some of the tools
+Ng uses, make sure to check out his keynote.
+
+What do you think? Does the average data scientist need a different set of
+skills in the near future? Are you in one of these smaller industries that are
+starting to embrace AI? We'd love to read your thoughts! Join us in our
+[discussion of this topic on Discord](https://discord.com/invite/dvwXA2N)!
+
+## Blueprint for an AI Bill of Rights
+
+![Blueprint for an AI Bill of Rights](../uploads/images/2022-10-20/blue-print.png 'White House Blueprint for an AI Bill of Rights :wrap-left =300')
+If you will recall from
+[last month's Heartbeat](https://iterative.ai/blog/september-22-heartbeat#european-ai-act)
+we called to your attention the EU AI Act. This act proposes new rules that
+would require that open source developers adhere to guidelines across a spectrum
+of categories including risk management, data governance, technical
+documentation and transparency, standards and accuracy, and cyber security. Not
+to be outdone, the US White House declared a
+[Blue Print for an AI Bill of Rights](https://www.whitehouse.gov/ostp/ai-bill-of-rights/).
+[The White House Office of Science and Technology Policy (OSTP)](https://www.whitehouse.gov/ostp/)
+has defined 5 categories for these rights:
+
+1. Safe and Effective Systems
+2. Algorithmic Discrimination Protection
+3. Data Privacy
+4. Notice and Explanation
+5. Human Alternatives, Consideration, and Fallback
+
+There's definitely some overlap here with the EU AI Act and some catching up
+with Data Privacy in the mix. There's lots to unpack, compare, and contrast on
+scope and philosophy between the two. It's nice to see that major attention is
+given to these issues.
+
+We could think of the relationship between AI rights and Andrew Ng's talk in the
+sense of the AI space maturing. To Andrew Ng's points, as we move from the
+frenzied all-important model development to an understanding of the need for a
+data-centric approach and this democratization, we are changing the focus to
+enable us to adequately address these hard and important issues. Improving the
+efficiency of tooling will help with this too. That's why we are here.
+
+What do you think? Do the efficiencies we are gaining open up room for improved
+time/attention to bake protections into the process or am I too hopeful? Head to
+[Discord](https://discord.com/invite/dvwXA2N) and share your thoughts!
+
+# Company News
+
+![DVC-Hydra integration](../uploads/images/2022-10-20/hydra.jpeg '=800') _AI
+generated image of rainbow feathered dragon (DeeVee + Hydra)_
+
+## DVC-Hydra Integration
+
+Did you hear? DVC has a new integration with Hydra. Now you can use Hydra
+composition to configure your DVC experiments. You can also apend and remove
+parameters on the fly as well as do a grid search of parameters. Random search
+functionlity is coming,
+[weigh in on the issue here.](https://github.com/iterative/dvc/issues/8258) Find
+out more in [**David de la Iglesia's**](https://twitter.com/daviddelachurch)
+[blog post](https://iterative.ai/blog/dvc-hydra-integration).
+
+## October Meetup
+
+If you missed the October Meetup with
+[**Nadia Nahar**](https://www.linkedin.com/in/nadia-nahar-iit/) presenting her
+team's research on _Collaboration Challenges in Building ML-Enabled Systems:
+Communication, Documentation, Engineering, and Process_ don't worry, there's a
+video! Catch it below!
+
+https://youtu.be/FKdVSNfnD_M
+
+## November Meetup
+
+Join us for our next meetup on November 16th. We will have
+[**Dmytro Filatov**](https://www.linkedin.com/in/dim25/) of
+[DeepX](https://deepxhub.com/) presenting _Continous Computer Vision with DVC
+and CML_ and [**Jelle Bouwman**](https://www.linkedin.com/in/jelle-bouwman/)
+demoing Iterative Studio Model Registry. Be sure to register
+[here!](https://www.meetup.com/machine-learning-engineer-community-virtual-meetups/events/289088542/)
+
+<external-link
+href="https://www.meetup.com/machine-learning-engineer-community-virtual-meetups/events/289088542/"
+title="Continuous Computer Vision with DVC and CML plus Iterative Studio Model Registry Demo"
+description="Join us on November 16th. Come see the possibilities with DVC, CML, and Iterative Studio Model Registry!"
+link="https://meetup.com"
+image="../uploads/images/2022-10-20/meetup.png"/>
+
+## Alex Kim - CI/CD for Machine Learning Webinar with ODSC
+
+Join [**Alex Kim**](https://twitter.com/alex000kim) on November 30th with
+[ODSC](https://opendatascience.com/) to learn about CI/CD for Machine Learning.
+This webinar shares how CML is a project to help ML and data science
+practitioners automate their ML model training and model evaluation, using best
+practices and tools from software engineering, such as GitLab CI/CD (as well as
+GitHub Actions and BitBucket Pipelines). The idea is to automatically train your
+model and test it in a production-like environment every time your data or code
+changes. In this talk, you'll learn how to:
+
+- Automatically allocate cloud instances (AWS, Azure, GCP) to train ML models.
+  And automatically shut the instance down when training is over
+- Automatically generate reports with graphs and tables in pull/merge requests
+  to summarize your model's performance, using any visualization library
+- Transfer data between cloud storage and computing instances with DVC
+- Customize your automation workflow with GitLab CI/CD
+
+Sign up for the talk
+[here](https://register.gotowebinar.com/register/6817359546805649932?utm_campaign=Webinars&utm_source=Community&utm_medium=Community&utm_content=Webinar%2030th%20Nov%202022).
+
+![Alex Kim ODSC webinar](../uploads/images/2022-10-20/alex-kim.png '=800') _Alex
+Kim webinar CI/CD for Machine Learning for ODSC
+([Source link](https://register.gotowebinar.com/register/6817359546805649932?utm_campaign=Webinars&utm_source=Community&utm_medium=Community&utm_content=Webinar%2030th%20Nov%202022))_
+
+## It's Hacktoberfest!
+
+![Iterative Hacktoberfest](../uploads/images/2022-10-20/hacktoberfest.png 'Iterative Hacktoberfest :wrap-left =200')
+It's Hacktoberfest month and we are participating! Find out all the information
+in [**Mert Bozkir's**](https://twitter.com/mertbozkirr)
+[blog post](https://iterative.ai/blog/iterative-x-hacktoberfest-2022). But if
+you just want to jump in, find all the open HackToBerFest issues
+[here.](https://github.com/search?o=desc&q=org%3Aiterative+label%3Ahacktoberfest&s=comments&state=open&type=Issues)
+Follow along in the `#hacktoberfest` channel in Discord to keep up to date for
+the rest of the month and be sure to read next month's Heartbeat to learn of the
+contributions!
+
+## New Hires
+
+[**Ivan Longin**](https://www.linkedin.com/in/ivan-longin/) joins us as a Senior
+Software Engineer on the Iterative Studio team from Zadar, Croatia. When Ivan's
+not working he likes to spend time doing outdoor activities, swimming in good
+weather, and or just walking or often running after his one-year-old! Been there
+three times over! ❤️ Welcome Ivan!
+
+# From the Community
+
+This month was full of great content. We wanted to give a shout-out to all of
+it, so we are trying out a more abbreviated list.  
+Thanks to all these amazing Community members that are sharing their knowledge!
+🚀
+
+## DVC
+
+### Data management
+
+- [Data and Machine Learning Model Versioning with DVC](https://towardsdatascience.com/data-and-machine-learning-model-versioning-with-dvc-34fdadd06b15)
+  by [**Ruben Winastwan**](https://www.linkedin.com/in/marcellusrubenwinastwan/)
+  Nice visuals! ⭐️
+- A great guide from [**Willem Meints**](https://www.linkedin.com/in/wmeints/) -
+  [Managing Machine Learning Datasets with DVC.](https://fizzylogic.nl/2022/10/14/managing-machine-learning-datasets-with-dvc)
+  Also, find his
+  [Tweets on Twitter](https://twitter.com/willem_meints/status/1580898467097980932?s=20&t=SD8k9hZ7ygzEFlGBNTyJSA)
+- [**Jorge Namour**](https://www.linkedin.com/in/jorgehabibnamour/) will give a
+  Webinar on
+  [Tracking Data with Git + DVC](https://www.facebook.com/facet.unt/posts/pfbid03ABqt5v1tUhRJJowSZgvjaYdFYfyirxGu9aph6LstYu8rVPJsYeuTBPio9srMn4hl)
+  en Español on October 27th
+  [at this YouTube link.](https://www.youtube.com/watch?v=pYLEf9FsFic)
+- Some GitHub goodness:
+  [MLOps - tutorial with DVC, MLFlow, and Pycaret](https://github.com/datarootsio/tutorial-mlops)
+  from [**Murilo Cunha**](https://github.com/murilo-cunha),
+  [**vspara**](https://github.com/vspara), and
+  [**virginiemar**](https://github.com/virginiemar)
+- Updated Udemy course that includes DVC -
+  [Complete MLOps Bootcamp | From Zero to Hero in Python 2022](https://www.udemy.com/course/complete-mlops-bootcamp-from-zero-to-hero-in-python-2022/?utm_source=aff-campaign&utm_medium=udemyads&LSNPUBID=McqLy3Lfq44&ranMID=47901&ranEAID=McqLy3Lfq44&ranSiteID=McqLy3Lfq44-MTrInsWY4oEt0kDxUzExAg)
+- [How to Version Control Your Data and Models with DVC](https://mathdatasimplified.com/2022/10/07/how-to-version-control-your-data-and-models-with-dvc/?utm_source=rss&utm_medium=rss&utm_campaign=how-to-version-control-your-data-and-models-with-dvc)
+  (**Video included**) by
+  [**Khuyen Tran**](https://www.linkedin.com/in/khuyen-tran-1401/) Dig the DVC
+  color-themed command line! 🤩
+- NLP and CV with DVC!
+  [From UNet to BERT: Extraction of Important Information from Scientific Papers](https://pub.towardsai.net/from-unet-to-bert-extraction-of-important-information-from-scientific-papers-ef0f737e45e9)
+  by [**Eman Shemsu**](https://www.linkedin.com/in/eman-shemsu-83473684/)
+- [[MLOps] How to use DVC (Data Version Control) data versioning](https://minimin2.tistory.com/m/185)
+  in Korean 🇰🇷 by Minimin2
+
+### Data Pipelines
+
+- Great guide from
+  [**Déborah Mesquita**](https://www.linkedin.com/in/deborahmesquita/) -
+  [The ultimate guide to building maintainable Machine Learning pipelines using DVC](https://towardsdatascience.com/the-ultimate-guide-to-building-maintainable-machine-learning-pipelines-using-dvc-a976907b2a1b)
+  (**Video Included**) ⭐️
+- Also from [**Khuyen Tran**](https://www.linkedin.com/in/khuyen-tran-1401/):
+  [Create a Maintainable Data Pipeline with Prefect and DVC](https://towardsdatascience.com/create-a-maintainable-data-pipeline-with-prefect-and-dvc-1d691ea5bcea)
+
+### Experimentation
+
+- In-depth tutorial covering Data Management, Pipelines and Experimentation with
+  DVC [**Gleb Ivashkevich**](https://www.linkedin.com/in/givashkevich/) -
+  [Creating Reproducible data Science Workflows with DVC](https://medium.com/y-data-stories/creating-reproducible-data-science-workflows-with-dvc-3bf058e9797b)
+  ⭐️
+- [Data Version Control (DVC): Beginner's Guide](https://iblog.ridge-i.com/entry/2022/10/11/102033)
+  by [**Ajmain Inqiad Alam**](https://www.linkedin.com/in/ajmain-inqiad-alam/)
+
+### Other mentions
+
+- There is now a
+  [**DVC Wikipedia page!**](https://en.wikipedia.org/w/index.php?title=Data_Version_Control&diff=1114227867&oldid=1114227707)
+- Great discussion around challenges in Machine learning from
+  [**Dmytro Samchuk**](https://medium.com/@dvsamchuk) -
+  [Machine Learning Done Right in Your Business.](https://medium.com/@dvsamchuk/machine-learning-done-right-in-your-business-130acd3a093e)
+
+## CML
+
+- CML in research! 🤩
+  [A Preliminary Investigation of MLOps Practices in GitHub](https://arxiv.org/abs/2209.11453),
+  [PDF](https://arxiv.org/pdf/2209.11453.pdf) by
+  [**Fabio Calefato**](https://www.linkedin.com/in/fcalefato/),
+  [**Filippo Lanubile**](https://www.linkedin.com/in/lanubile/), and
+  [**Luigi Quaranta**](https://www.linkedin.com/in/luigi-quaranta-007a6112a/)
+- Part III in [**Matt Upson's**:](https://twitter.com/m_a_upson) series
+  [MLOps for Conversational AI with Rasa, DVC, and CML (Part III)!](https://medium.com/mantisnlp/mlops-for-conversational-ai-with-rasa-dvc-and-cml-part-iii-f56a29c428f3?source=rss----72ea48936cdc---4)
+- [Zen ML adds CML to its Awesome Data Science with Python list.](https://mail-redir.mention.com/api/url?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1cmwiOiJodHRwczpcL1wvZ2l0aHViLmNvbVwvcjBmMVwvZGF0YXNjaWVuY2VcL2NvbW1pdFwvNzMzMTU0YTdjYWJlOGY2MDRlMmMwYzQwOWI2NzRhY2QyODg3NWJhMCIsImFjY291bnRfaWQiOjEwMDMyNDIsImFsZXJ0X2lkIjoyNDM1MTgwLCJzb3VyY2VfaWQiOjY3LCJtZW50aW9uX2lkIjoxNDAzNzIzOTkwMzV9.AQcSYPdGzKBJemSgTDlyPcSeWL7dJTIlULRJaDqDVRg)
+  😎
+- [**Alessandro Paticchio**](https://www.linkedin.com/in/alessandro-paticchio/)
+  (Casavo)
+  [Using AI to automatically estimate the status of a façade.](https://medium.com/casavo/using-ai-to-automatically-estimate-the-status-of-a-fa%C3%A7ade-c84c2a90549e)
+  ⭐️
+- [CI/CD for Machine Learning Model Training with GitHub Actions](https://cmtech.live/2022/08/31/ci-cd-for-machine-learning-model-training-with-github-actions-by-zoumana-keita-aug-2022/)
+  by [**Zoumana Keita**](https://www.linkedin.com/in/zoumana-keita/)
+
+## MLEM
+
+- [MLEM Instagram](https://www.instagram.com/tv/Cjnl8CuK2K0/). If you're on IG,
+  follow [the_ai_dot](https://www.instagram.com/the_ai_dot/) for AI & ML New,
+  Tools & Libraries
+
+## ❤️ Tweet Love
+
+I had a really hard time choosing this month, but I was excited to see this
+Tweet from [**Nick Sorros**](https://twitter.com/nsorros) announcing the post
+from his colleague [Matt Upson](https://twitter.com/m_a_upson).
+
+https://twitter.com/nsorros/status/1571844138575843331?s=20&t=bsca-6qt4Q1bdmffbSC8Tw
+
+---
+
+_Have something great to say about our tools? We'd love to hear it! Head to
+[this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+to record or write a Testimonial! Join our
+[Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2022-10-24-jupyter-notebook-dvc-pipeline.md b/content/blogs/2022-10-24-jupyter-notebook-dvc-pipeline.md
new file mode 100644
index 0000000000..2c871a4738
--- /dev/null
+++ b/content/blogs/2022-10-24-jupyter-notebook-dvc-pipeline.md
@@ -0,0 +1,474 @@
+---
+title: >
+  From Jupyter Notebook to DVC pipeline for reproducible ML experiments
+date: 2022-10-24
+description: >
+  In this guide we will take a Jupyter Notebook and use Papermill to turn it
+  into a simple, one-stage DVC pipeline.
+descriptionLong: >
+  Jupyter Notebooks are great for prototyping, but eventually we want to move
+  towards reproducible experiments. DVC can help us with this, but converting a
+  notebook into a complete pipeline may seem a bit daunting. In this guide, we
+  will explore an intermediate step: use Papermill to build a one-stage DVC
+  pipeline that executes our entire notebook. We will then use our pipeline to
+  run and version ML experiments.
+picture: 2022-10-24/header.png
+pictureComment: >
+  Header image generated by [Dall-E 2](https://openai.com/dall-e-2/)
+authors:
+  - rob_dewit
+  - milecia_mcgregor
+tags:
+  - DVC
+  - Jupyter Notebook
+  - Papermill
+  - Pipelines
+  - Experiments
+  - Guide
+  - Tutorial
+---
+
+While every data scientist has their own methods and approaches to conducting
+data science, there is one tool that nearly everyone in the field uses:
+[Jupyter Notebook](https://jupyter.org/). Its ease of use makes it the perfect
+tool for prototyping, usually resulting in a script in which we preprocess the
+data, do a train/test split, train our model, and evaluate it.
+
+However, once we have a decent prototype, the subsequent iterations generally
+don’t touch most of the code. Instead, we tend to focus on tweaking feature
+engineering parameters and tuning model hyperparameters. At this point, we
+really start experimenting, trying to answer questions such as _“What happens if
+I increase the learning rate?”_ and _“What’s the optimal batch size?”_
+
+It will take numerous experiments to get to an acceptable level of performance
+for our model. But with so many experiments, it becomes difficult to keep track
+of the changes. In turn, this makes it difficult to go back in time to a certain
+point and see what combination of data, code, and parameters constituted a
+specific experiment. In other words, we cannot _reproduce_ our experiments.
+
+<admon type="info">
+
+Reproducibility is a core concept of our data science philosophy here at
+Iterative. If you are new to the concept, I recommend reading
+[this blog post by Dave Berenbaum](https://iterative.ai/blog/ml-experiment-versioning)
+or
+[this one by Ejiro Onoso](https://neptune.ai/blog/how-to-solve-reproducibility-in-ml).
+
+</admon>
+
+We can solve our need for reproducibility by transforming our notebook into a
+codified pipeline with defined inputs and outputs. This will allow us to then
+save every experiment that modifies the inputs, pipeline, or outputs. In this
+guide, we will explore how to do this using [DVC](https://dvc.org/). It extends
+Git so that in addition to code and parameters we can track and version data and
+models.
+
+## What we’ll be doing
+
+While a pipeline typically consists of multiple _stages_, transforming our
+notebook straight into a multi-stage DVC pipeline may seem somewhat daunting.
+For the sake of simplicity, we will create a pipeline with just one stage for
+now: run all of the code in our notebook. Just like any other pipeline, we will
+have defined inputs (data and parameters) and outputs (model, evaluation
+metrics, and plots).
+
+To achieve this, we will wrap our notebook with
+[Papermill](https://papermill.readthedocs.io/en/latest/usage-workflow.html).
+With this tool, we can parameterize our notebook and run experiments
+[from our CLI with a single command](https://papermill.readthedocs.io/en/latest/usage-execute.html#execute-via-cli).
+
+Throughout this guide, we will do the following:
+
+1. Parameterize a notebook using Papermill;
+1. Create a single-stage pipeline with DVC;
+1. Version our data, model, and other large artifacts using DVC; and
+1. Run multiple experiments using the new pipeline.
+
+As an example project, we will be using a notebook I created that trains a
+classifier for Pokémon sprites. You can find this project in
+[the repository here](https://github.com/iterative/example-pokemon-classifier/tree/snapshot-jupyter).
+Make sure to follow the instructions in `README.md` to set up the development
+environment and to `git checkout snapshot-jupyter` to get our starting point for
+this guide.
+
+Of course, you can also follow along using a notebook you created yourself! In
+that case, you will at least need to install `dvc` and `papermill`. You will
+also need to initialize DVC through `dvc init`.
+
+<admon type="tip">
+
+If you're using [Visual Studio Code](https://code.visualstudio.com/) as your
+IDE, I also recommend
+[installing the DVC extension](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc).
+This will make it even easier to run and compare experiments!
+
+</admon>
+
+## Guide
+
+Stages in a DVC pipeline consist of commands as we could run them in our own
+terminal. As such, we need a way to run the contents of our notebook from our
+command line. This is where Papermill comes in. With the following command we
+execute the entire notebook as a single unit without changing its contents:
+
+```bash
+$ papermill \
+notebooks/pokemon_classifier.ipynb \
+outputs/completed_notebook.ipynb
+```
+
+The result is saved as a new notebook in `outputs/completed_notebook.ipynb`.
+
+### Parameterize notebook
+
+While we would technically have a DVC pipeline if we added this command as a
+stage, its usefulness would be somewhat limited. After all, the result would be
+the same every time we execute the command. To start experimenting with our
+pipeline, we need to parameterize our notebook. We do so by creating a single
+cell at the top of our notebook where we declare the parameters:
+
+```python
+SEED: int = 42
+POKEMON_TYPE_TRAIN: str = "Water"
+
+SOURCE_DIRECTORY: str = "data/external"
+DESTINATION_DIRECTORY: str = "data/processed"
+TRAIN_DATA_IMAGES: str = "images-gen-1-8"
+TRAIN_DATA_LABELS: str = "stats/pokemon-gen-1-8.csv"
+
+MODEL_TEST_SIZE: float = 0.2
+MODEL_LEARNING_RATE: float = 0.001
+MODEL_EPOCHS: int = 10
+MODEL_BATCH_SIZE: int = 120
+```
+
+Papermill
+[needs a `parameters` tag](https://papermill.readthedocs.io/en/latest/usage-parameterize.html#designate-parameters-for-a-cell)
+to recognize this cell as the one containing our parameters. To add this tag to
+the cell, we go to `View / Cell Toolbar` and enable `Tags`. Afterward, we type
+in `parameters` in the top right corner of our cell.
+
+![Enabling Tags for Jupyter Notebook
+cells](../uploads/images/2022-10-24/jupyter-tags.png '=800')_Enabling
+Tags for Jupyter Notebook cells_
+
+<admon type="tip">
+
+In case you’re running the notebook straight from VS Code, please be aware that
+[editing cell tags is not natively supported here](https://github.com/microsoft/vscode-jupyter-powertoys/issues/48).
+You can use the
+[Jupyter Cell Tags extension](https://marketplace.visualstudio.com/items?itemName=ms-toolsai.vscode-jupyter-cell-tags)
+or the editor in Jupyter Server as shown above.
+
+</admon>
+
+We can now replace hard-coded parameters in our notebook with references to the
+variables we defined. For example, we change the following section of code like
+so:
+
+```diff-python
+estimator = model.fit(X_train, y_train,
+                      validation_data = (X_test, y_test),
+                      class_weight = calculate_class_weights(y_train),
+-                     epochs = 10,
++                     epochs = MODEL_EPOCHS,
+-                     batch_size = 120,
++                     batch_size = MODEL_BATCH_SIZE,
+                      verbose = 1)
+```
+
+Now we can run our notebook through Papermill with changed parameters:
+
+```bash
+$ papermill \
+notebooks/pokemon_classifier.ipynb \
+outputs/completed_notebook.ipynb \
+-p MODEL_EPOCHS 15 \
+-p MODEL_BATCH_SIZE 160
+```
+
+### Create DVC pipeline
+
+With our parameterized notebook in place, we can create our pipeline with DVC.
+Our pipeline consists of stages (in this case: one stage) and has inputs and
+outputs. For our model, the inputs will be the required datasets and our
+notebook. The pipeline’s outputs will be the model itself, a graph showing the
+training process, and a confusion matrix for the model’s predictions.
+
+Additionally, a pipeline can have metrics and plots. We will define several
+metrics that allow us to compare model performance across different experiments,
+such as accuracy and F1 scores.
+
+![All of the pipeline
+components](../uploads/images/2022-10-24/pipeline-components.png)_Our
+inputs, pipeline, and outputs_
+
+A DVC pipeline is defined in a dedicated `dvc.yaml` file. We can add stages
+manually in this file, which you generally want to do when building complex,
+multi-stage pipelines. However, to get started, it’s probably easier if we use
+the `dvc stage add` command. We use the `-n` option to provide a name for the
+stage, the `-d` option to specify our dependencies, the `-o` option to specify
+our outputs, and the `-M` option to specify our metrics file. Lastly, we type in
+the command that DVC should execute for that stage:
+
+```dvc
+$ dvc stage add \
+-n run_notebook \
+-d notebooks/pokemon_classifier.ipynb \
+-d data/external/images-gen-1-8 \
+-d data/external/stats/pokemon-gen-1-8.csv \
+-o data/processed/pokemon \
+-o data/processed/pokemon.csv \
+-o data/processed/pokemon-with-image-paths.csv \
+-o outputs/model.pckl \
+-o outputs/confusion_matrix.png \
+-o outputs/train_history.png \
+-M outputs/metrics.yaml \
+papermill notebooks/pokemon_classifier.ipynb outputs/pokemon_classifier_out.ipynb
+```
+
+The uppercase `-M` option (as opposed to the lowercase `-m` option) tells DVC
+not to track the resulting metrics file. We typically want to do this with
+metrics because the files are small enough to be tracked by Git directly.
+
+The resulting `dvc.yaml` looks as follows:
+
+```yaml
+stages:
+  run_notebook:
+    cmd: >
+      papermill notebooks/pokemon_classifier.ipynb
+      outputs/pokemon_classifier_out.ipynb
+    deps:
+      - notebooks/pokemon_classifier.ipynb
+      - data/external/images-gen-1-8
+      - data/external/stats/pokemon-gen-1-8.csv
+    outs:
+      - data/processed/pokemon
+      - data/processed/pokemon.csv
+      - data/processed/pokemon-with-image-paths.csv
+      - outputs/model.pckl
+      - outputs/confusion_matrix.png
+      - outputs/train_history.png
+    metrics:
+      - outputs/metrics.yaml:
+          cache: false
+```
+
+With that, we have our pipeline in its basic form! We can run the pipeline with
+the `dvc repro` command, and DVC will execute our notebook. We have yet to
+specify our parameters, however. Otherwise, every pipeline _run_ would utilize
+the default parameters we defined in our notebook.
+
+DVC parses in the values for parameters from another YAML file: `params.yaml`.
+We can declare the same parameters here that we previously incorporated in our
+notebook. To provide a little bit of structure, let’s also group them in
+sections:
+
+```yaml
+base:
+  seed: 42
+  pokemon_type_train: 'Water'
+
+data_preprocess:
+  source_directory: 'data/external'
+  destination_directory: 'data/processed'
+  dataset_labels: 'stats/pokemon-gen-1-8.csv'
+  dataset_images: 'images-gen-1-8'
+
+train:
+  test_size: 0.2
+  learning_rate: 0.001
+  epochs: 15
+  batch_size: 120
+```
+
+We can now update our pipeline in `dvc.yaml` to read the parameters from
+`params.yaml`. The file is detected automatically by DVC and we can parse the
+values into the `papermill` command with the `-p` option. The result will look
+like this:
+
+```diff-yaml
+stages:
+  run_notebook:
+    cmd: >
+      papermill
+      notebooks/pokemon_classifier.ipynb
+      outputs/pokemon_classifier_out.ipynb
++     -p SEED ${base.seed}
++     -p POKEMON_TYPE_TRAIN ${base.pokemon_type_train}
++     -p SOURCE_DIRECTORY ${data_preprocess.source_directory}
++     -p DESTINATION_DIRECTORY ${data_preprocess.destination_directory}
++     -p TRAIN_DATA_IMAGES ${data_preprocess.dataset_images}
++     -p TRAIN_DATA_LABELS ${data_preprocess.dataset_labels}
++     -p MODEL_TEST_SIZE ${train.test_size}
++     -p MODEL_LEARNING_RATE ${train.learning_rate}
++     -p MODEL_EPOCHS ${train.epochs}
++     -p MODEL_BATCH_SIZE ${train.batch_size}
+    deps:
+    - notebooks/pokemon_classifier.ipynb
+    - data/external/images-gen-1-8
+    - data/external/stats/pokemon-gen-1-8.csv
+    outs:
+    - data/processed/pokemon
+    - data/processed/pokemon.csv
+    - data/processed/pokemon-with-image-paths.csv
+    - outputs/model.pckl
+    - outputs/confusion_matrix.png
+    - outputs/train_history.png
+    metrics:
+    - outputs/metrics.yaml:
+        cache: false
+```
+
+And with that, we have our pipeline ready for use! Before we start running
+experiments with it, however, let’s ensure everything is tracked and versioned
+properly so we can reproduce our experiments later on.
+
+### Version our data, models, and plots with DVC
+
+As we discussed earlier, we want to version every component of our experiments
+to achieve true reproducibility: code, parameters, data, models, metrics, and
+plots. We want to version small files (usually text) with Git and larger files
+with DVC. That principle gives us the following split between the two:
+
+![Versioning all of the pipeline
+components](../uploads/images/2022-10-24/versioning-components.png)_Every
+component of our experiment is versioned either by Git or DVC_
+
+When we created our pipeline in the previous step, DVC automatically started
+tracking the outputs we defined and listed them in our `.gitignore`. On the
+other hand, the metrics file is ignored by DVC and still tracked by Git
+(`cache: false`), because we added it with the upper case `-M` option. If we
+wanted to track the metrics with DVC as well, we could change this in our
+`dvc.yaml`.
+
+There is one last output of the pipeline we haven't yet accounted for:
+`outputs/completed_notebook.ipynb`. Because it's a rather large file that we
+don't really need for anything, we can simply add it to our `.gitignore`. After
+all, we can always reproduce it by rerunning our pipeline!
+
+With that, every component (of importance) in our project is now versioned by
+Git or DVC. That means we now have the reproducible pipeline we set out to
+create: we can go back to any experiment and get the precise combination of
+code, data, parameters, and results. This will make it much easier to conduct
+experiments, find the best-performing model, and collaborate with teammates.
+
+Let’s take our pipeline for a ride and run some experiments!
+
+<admon type="info">
+
+At this point, we would typically also configure our DVC remote to make sure our
+versioning not only exists on our local system. This is outside the scope of
+this guide, but you can find guides for
+[Google Cloud Platform](https://iterative.ai/blog/using-gcp-remotes-in-dvc),
+[Azure Blob Storage](https://iterative.ai/blog/azure-remotes-in-dvc), and
+[Amazon Web Services](https://iterative.ai/blog/aws-remotes-in-dvc) on our blog.
+
+</admon>
+
+### Running experiments
+
+There are two ways in which we can run experiments with our newly defined
+pipeline. The first one utilizes our good ol’ command line interface. We can use
+`dvc exp run` to run an experiment after we have changed the parameters in
+`params.yaml`, or we could change the parameters in the command itself with the
+`-S` option. The following command would trigger a new experiment with an
+updated number of epochs, for example:
+
+```dvc
+$ dvc exp run -S 'train.epochs=25'
+```
+
+However, if we’re using [Visual Studio Code](https://code.visualstudio.com/) as
+our IDE of choice, we can also use
+[the DVC extension](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc)
+to run and visualize experiments through a graphical user interface. We can go
+to the experiment table and, from there, modify, queue, and run new experiments.
+The results will be shown below each other, providing an easy way to compare
+their outcomes.
+
+<video controlslist="nodownload" preload="metadata" autoplay muted loop
+style="width:100%;"><source
+src="../uploads/images/2022-10-24/dvc-vscode-extension.mp4" type="video/mp4"/> Your
+browser does not support the video tag. </video>
+
+Now, all there’s left to do is to start experimenting and find the best possible
+model! Once we have drawn our conclusions from experimenting, we can pick the
+best-performing experiment and start using the model it put forth.
+
+## Conclusions
+
+Throughout this guide, we transformed a Jupyter Notebook into a codified
+pipeline for reproducible experiments. We used Papermill to parameterize our
+notebook so that we could run it with a single command and then created a
+pipeline in DVC to run that command for us.
+
+<admon type="info">
+
+The result of following the guide can be found in
+[the `papermill-dvc` branch of the example project](https://github.com/iterative/example-pokemon-classifier/tree/papermill-dvc).
+
+</admon>
+
+With our DVC pipeline tracking and versioning every experiment, we can discover
+which combination of code, data, and parameters provides the best results.
+Comparing experiments is especially easy when using the experiment table in the
+DVC extension for Visual Studio Code.
+
+From this point onwards, we can still make a few improvements to our pipeline.
+For one, we could leverage DVC to generate our plots rather than render them as
+images from our notebook. This would allow us to compare experiments visually in
+a similar manner to how DVC can visualize an experiments table. To learn more
+about this,
+[please refer to the docs](https://dvc.org/doc/command-reference/plots).
+
+Another improvement would be to break up our single-stage pipeline into
+different stages with coherent units of code (e.g., preprocess, train, and
+evaluate). Our current implementation runs the entire notebook for every single
+experiment, even though the data preprocessing doesn’t change between
+experiments. With a multi-stage pipeline, DVC could track changes to the in- and
+outputs for every stage and automatically determine which stages it can skip
+because nothing has changed. This saves time and resources, especially in
+computationally heavy projects.
+
+```dvc
+$ dvc dag
++-------------------+
+| data/external.dvc |
++-------------------+
+          *
+          *
+          *
+ +-----------------+
+ | data_preprocess |
+ +-----------------+
+          *
+          *
+          *
+    +-----------+
+    | data_load |
+    +-----------+
+          *
+          *
+          *
+      +-------+
+      | train |
+      +-------+
+          *
+          *
+          *
+    +----------+
+    | evaluate |
+    +----------+
+```
+
+If you want to learn how to transform a notebook into a multi-stage pipeline, I
+recommend taking a look at our course:
+[Iterative tools for Data Scientists and Analysts](https://learn.iterative.ai/course/data-scientist-path).
+It is completely free to follow, and module 3 covers this process in depth.
+
+We might also write a future guide about this, so let us know if you would be
+interested in seeing this content. Make sure to join
+[our Discord server](https://dvc.org/chat) if you have any questions or want to
+discuss this post further!
diff --git a/content/blogs/2022-10-31-mlem-k8s-sagemaker.md b/content/blogs/2022-10-31-mlem-k8s-sagemaker.md
new file mode 100644
index 0000000000..800c8d8684
--- /dev/null
+++ b/content/blogs/2022-10-31-mlem-k8s-sagemaker.md
@@ -0,0 +1,143 @@
+---
+title: Deploy ML models to k8s and SageMaker with a single line of code
+date: 2022-10-31
+description: >
+  MLEM takes the evil configs out of Kubernetes and SageMaker to make your life
+  easier just in time for Halloween 🎃
+descriptionLong: >
+  Deploying ML models could be a challenge, especially if you want to deploy to
+  advanced platforms such as AWS SageMaker or Kubernetes. MLEM gives you a
+  simple and powerful API to do just that - while doing the complex machinery
+  for you.
+picture: 2022-10-31/dog-on-a-broomstick.png
+pictureComment: >
+  Header image generated by [Dall-E 2](https://openai.com/dall-e-2/)
+author: aguschin
+commentsUrl: https://discuss.dvc.org/t/deploy-ml-models-to-k8s-and-sagemaker-with-a-single-line-of-code/1371
+tags:
+  - Machine Learning
+  - Productionization
+  - Deployment
+  - Kubernetes
+  - Sagemaker
+  - Docker
+  - Heroku
+  - MLEM
+  - Release
+---
+
+To establish the deployment to cloud platforms, you have to learn how they work,
+their secrets, and their quirks. To simplify your daily Swiss-army-knife ML
+duties, you’ll need to write complicated bash scripts, figure out what arguments
+needs to be supplied to the platform CLI tool or API methods, call them in the
+correct way and embrace the burden of limitless extension of your knowledge to
+one more Cloud Platform tool.
+
+But, it doesn’t have to always be that way. Some tools like Terraform help you
+with managing the infrastructure in a cloud-agnostic way, so why can’t we invent
+the same for MLOps?
+
+That’s why we’re releasing new Deployment mechanics for MLEM, along with 4
+supported deployment targets:
+[Docker container deploy](https://mlem.ai/doc/user-guide/deploying/docker),
+[Heroku](https://mlem.ai/doc/user-guide/deploying/heroku),
+[Kubernetes](https://mlem.ai/doc/user-guide/deploying/kubernetes), and
+[AWS SageMaker](https://mlem.ai/doc/user-guide/deploying/sagemaker).
+
+![Docker, Heroku, Kubernetest, and SageMaker, in person](https://media.giphy.com/media/bfOb3UnSzQvTsBKLmq/giphy.gif)
+
+# Deploying with a single command
+
+MLEM strives to abstract away all the stuff you need to do for deployment. Once
+you configure kubectl with your cluster IP and credentials, you can deploy your
+model as simple as:
+
+```cli
+$ mlem deployment run kubernetes app.mlem \
+		--model model --service_type loadbalancer
+⏳️ Loading model from model.mlem
+💾 Saving deployment to service_name.mlem
+🛠 Creating docker image app
+  🛠 Building MLEM wheel file...
+  💼 Adding model files...
+  🛠 Generating dockerfile...
+  💼 Adding sources...
+  💼 Generating requirements file...
+  🛠 Building docker image app:4ee45dc33804b58ee2c7f2f6be447cda...
+  ✅  Built docker image app:4ee45dc33804b58ee2c7f2f6be447cda
+namespace created. status='{'conditions': None, 'phase': 'Active'}'
+deployment created. status='{'available_replicas': None,
+ 'collision_count': None,
+ 'conditions': None,
+ 'observed_generation': None,
+ 'ready_replicas': None,
+ 'replicas': None,
+ 'unavailable_replicas': None,
+ 'updated_replicas': None}'
+service created. status='{'conditions': None, 'load_balancer': {'ingress': None}}'
+✅  Deployment app is up in mlem namespace
+```
+
+The `app.mlem` is a file that is going to have all the information about the
+deployment that we specified. Later we can use it to deploy a new model version.
+
+This created `deployment` and `service` resources in the cluster. Let’s check
+out pods that were created by the `deployment` (all the resources are placed
+in `mlem` namespace by default):
+
+```cli
+$ kubectl get pods --namespace mlem
+NAMESPACE     NAME                     READY   STATUS    RESTARTS   AGE
+mlem          app-cddbcc89b-zkfhx      1/1     Running   0          5m58s
+```
+
+# Getting predictions
+
+Since our model above is reachable by HTTP request, we can just open the URL and
+see the OpenAPI spec there (like
+[this one](http://example-mlem-get-started-app.herokuapp.com/docs)), or send
+requests to get predictions. We can also use built-in MLEM functionality to
+achieve the same:
+
+```cli
+$ mlem deployment apply app.mlem data.csv --json
+[0, 1, 2]
+```
+
+# Extend your learning
+
+That’s it: deployment to cloud providers is as simple as it can be. MLEM helps
+you to simplify your daily routine and help you focus on developing the models
+and not spending time getting into the DevOps weeds.
+
+- To learn how MLEM can help you, try out the
+  [Get Started Tutorial](https://mlem.ai/doc/get-started) or
+  [Use Cases](https://mlem.ai/doc/use-cases).
+- To see a full-scale Tutorial for Kubernetes, Sagemaker or Heroku, check out
+  our [User Guide](https://mlem.ai/doc/user-guide).
+- To quickly get your questions answered, reach us in
+  [Discord](https://discord.com/channels/485586884165107732/903647230655881226)
+  or [GitHub issues](https://github.com/iterative/mlem).
+
+# What’s next?
+
+It’s been five months since we released MLEM on the 1st of June, and now it’s
+October 31st already. With all these big deployment targets, MLEM finally looks
+like a formidable little dog 🎃. What’s next on the agenda?
+
+- We’re going to work on an
+  **[e2e Computer Vision scenario](https://github.com/iterative/mlem/issues/454)**.
+  Think about training a NN to classify images, saving it with MLEM, and
+  deploying it to K8s or Sagemaker.
+- We are going to share how to use MLEM when your model
+  [consists of two parts: **preprocessing and inference**](https://github.com/iterative/mlem/issues/283).
+- Batch processing is something we received many requests about. We’ll set up an
+  example of how to use
+  [**MLEM with Airflow**](https://github.com/iterative/mlem/issues/11) and
+  publish it. 📚
+
+Happy to hear your thoughts on this!
+
+Machine Learning should be ~~mlemming~~ scary! Once a year only.
+
+![Happy Halloween!](https://media.giphy.com/media/dlYIz2AoqR5GcqZ1Yk/giphy.gif)
diff --git a/content/blogs/2022-11-18-november-2022-heartbeat.md b/content/blogs/2022-11-18-november-2022-heartbeat.md
new file mode 100644
index 0000000000..5853bd9883
--- /dev/null
+++ b/content/blogs/2022-11-18-november-2022-heartbeat.md
@@ -0,0 +1,401 @@
+---
+title: November '22 Heartbeat
+date: 2022-11-18
+description: >
+  Monthly updates are here! NLP will have a bigger impact than Computer
+  vision?  WDYT? Dmitry speaks at Github Universe, MLEM new deployment features,
+  SOC 2 Type 1 compliance, more events, and great Community content. Welcome to
+  November!
+
+descriptionLong: |
+  This month you will find:
+
+    ❓ Will NLP have more impact than Computer Vision,
+
+    🐙 Dmitry Petrov speaks at GitHub Universe,
+    
+    🧐 CML in research at NeurIPS,
+
+    ❣️ Unstructured Data Catalog coming,
+
+    ✅ SOC 2 Type 1 Compliance,
+
+    🚀 MLEM adds Sagemaker and Kubernetes deployment,
+
+    👀 Lots of new docs,
+
+    🚀 Upcoming events, and more!
+picture: 2022-11-18/november-cover.jpg
+pictureComment: Image generated with the help of Stable Diffusion
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/november-22-heartbeat/1385
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - MLEM
+  - DataChain Studio
+  - GitHub Universe
+  - SOC 2
+  - Sagemaker
+  - Kubernetes
+  - NeurIPS
+  - ODSC
+---
+
+Welcome to November! In the US, this is the time of year we reflect and give
+thanks. It's been a productive year despite the world's rather extreme
+challenges. There's lots to be thankful for. Here are some of those things from
+the last month in the Iterative Community.
+
+# AI News
+
+### Robert Toews - The Biggest Opportunity in Generative AI Is Language, Not Images
+
+![NLP](../uploads/images/2022-11-18/forbes.jpg 'Rob Toews bets on languge over
+images :wrap-left =200')
+[In this article](https://www.forbes.com/sites/robtoews/2022/11/06/the-biggest-opportunity-in-generative-ai-is-language-not-images/?sh=303a5719789d)
+entitled _The Biggest Opportunity In Generative AI Is Language, Not Images_,
+[**Robert Toews**](https://www.linkedin.com/in/robtoews/) argues that AI-powered
+text generation will create many orders of magnitude more value than
+text-generated images.
+
+> Language is humanity’s single most important invention. More than anything
+> else, it is what sets us apart from every other species on the planet.
+> Language enables us to reason abstractly, to develop complex ideas about what
+> the world is and could be, to communicate these ideas to one another, and to
+> build on them across generations and geographies. Almost nothing about modern
+> civilization would be possible without language.
+
+He points out the many examples from a variety of industries and academia that
+have gained and will continue to gain massive improvements due to the power of
+large language models (LLMs) in the coming years. Read the article for all the
+applications.
+
+### State of AI Report
+
+The
+[State of AI Report](https://docs.google.com/presentation/d/1WrkeJ9-CjuotTXoa4ZZlB3UPBXpxe4B3FMs9R9tn34I/edit#slide=id.g164b1bac824_0_2794)
+is generated each year and reports on the most interesting things the authors,
+[**Nathan Benaich**](https://twitter.com/nathanbenaich),
+[**Ian Hogarth**](https://twitter.com/soundboy),
+[**Othmane Sebbouh**](https://twitter.com/osebbouh), and
+[**Nitarshan Rajkumar**](https://twitter.com/nitarshan) come across in the world
+of AI throughout the year.
+
+- Slide 22: Mirroring the ideas of the Toews article above, this slide discusses
+  the LLM use case of conversational code generation. OpenAI's Codex, which
+  powers [GitHub's Copilot](https://github.com/features/copilot) to produce this
+  capability was on display at the recent
+  [GitHub Universe](https://watch.githubuniverse.com/home). Other companies
+  including Salesforce, Google, and DeepMind are working on Code generating
+  projects of their own with Google's LLM PaLM coming out as a favored option
+  with 50x less code than Codex. Alternatively DeepMind's AlphaCode generates
+  the whole program as opposed to lines of code.
+- Slide 24: Continuing to echo Toews' article, in research LLMs are greatly
+  improving their mathematical abilities, jumping to far better scores than
+  previous model versions. Techniques that helped to achieve these gains are
+  discussed
+- Slides 30 and 31: Challenging Toews' stance, these slides show the great
+  progress in Computer Vision. Diffusion models are doing more than just
+  text-to-image generation. Now they are being used for text-to-video, text
+  generation, audio, molecular design, and more. Info on the techniques now
+  being used can be found in Slide 30. Side 31 discusses the huge improvement in
+  the next generation of text-to-image generation competing models including
+  DALL-E, Imagen, and Parti.
+
+Be sure to digest the whole report for even more AI advances!
+
+💓 So for our “Pulse check” this month:
+
+<admon type="tip">
+
+Do you agree that NLP will have more impact than computer vision? Tell us about
+what you are working on with NLP. We’d love to get you connected with others
+struggling with similar issues and know how we can improve our tools to help you
+with your NLP projects.
+
+</admon>
+
+Join us in the `#general` channel in
+[Discord](https://discord.com/invite/dvwXA2N) to weigh in.
+
+# Community Content Highlights
+
+## Thank you Hacktoberfest Contributors!
+
+We would like to thank
+[**Francesco Calcavecchia**](https://github.com/francesco086),
+[**vvssttkk**](https://github.com/vvssttkk), and
+[**deepyaman**](https://github.com/deepyaman) for their contributions to
+[GTO](https://github.com/iterative/gto),
+[MLEM,](https://github.com/iterative/mlem) and
+[CML](https://github.com/iterative/cml) respectively. They will be receiving
+their own personalized shirts that note their contributions! And many thanks to
+[**Mert Bozkir**](https://www.linkedin.com/in/mertbozkir/) for leading the
+Hacktoberfest charge here at Iterative!
+
+![Hacktoberfest Contributors](../uploads/images/2022-11-18/hacktoberfest.png '=800')
+_2022 Hacktoberfest Contributions_
+
+## João Santiago and team presenting on their use of DVC at the NLP in Closure Session 2 event
+
+One of our Community Champions,
+[**João Santiago**](https://www.linkedin.com/in/jcpsantiago/) of
+[Billie.io](https://www.billie.io/) gives an introduction to DVC in preparation
+for the remainder of the session where
+[**Carsten Behring**](https://scicloj.github.io/blog/predict-real-vs.-fake-disaster-tweets/),
+author of [Metamorph](https://cljdoc.org/d/scicloj/metamorph/0.2.1/doc/readme)
+and the [scicloj.ml](https://github.com/scicloj/scicloj.ml) platform presents
+how NLP pipelines can be managed with DVC, Closure & Python.
+
+[https://www.youtube.com/watch?v=eubg-fjRh9E&t=914s](https://www.youtube.com/watch?v=eubg-fjRh9E&t=914s)
+
+## CML at NeurIPS
+
+Last month we reported on CML turning up in research
+[here](https://iterative.ai/blog/october-heartbeat#cml). Well, this work will be
+presented within the virtual Workshop
+[Challenges In Deploying and Monitoring Machine Learning Systems](https://neurips.cc/media/PosterPDFs/NeurIPS%202022/62157.png)
+at NeurIPS virtual this year on December 9th.
+[Find out more and register here.](https://neurips.cc/)
+
+![CML at NeurIPS](../uploads/images/2022-11-18/cml-neurips.png '=800') _Research
+on CML to be presented at NeurIPS
+([Source link](https://neurips.cc/media/PosterPDFs/NeurIPS%202022/62157.png))_
+
+# Company News
+
+## New Unstructured Data Catalog
+
+Do you use Amazon S3, Azure Blob Storage, or Google Cloud Storage? We have a new
+solution for finding and managing your datasets of unstructured data like
+images, audio files, and PDFs! Extend your DVC environment with the first data
+catalog and query language (SQL->DQL) for unstructured data and machine
+learning. Learn more on [our website](https://iterative.ai/data-catalog-for-ml)
+and/or [schedule a meeting with us](https://calendly.com/gtm-2/iterative-datamgmt-overview)!
+
+## MLEM
+
+![MLEM Sagemaker and Kubernetes
+deployment](../uploads/images/2022-11-18/dog-on-a-broomstick.png 'MLEM adds
+Kubernetes and Sagemaker Deployment :wrap-left =250')
+In case you missed it MLEM announced a release on Halloween! MLEM now supports
+[Sagemaker](https://mlem.ai/doc/user-guide/deploying/sagemaker) and
+[Kubernetes](https://mlem.ai/doc/user-guide/deploying/kubernetes) in addition to
+[Heroku](https://mlem.ai/doc/user-guide/deploying/heroku) and
+[Docker](https://mlem.ai/doc/user-guide/deploying/docker). You can learn about
+how easy it now is to package your models for deployment with only a few lines
+of code and never have to get lost in Kubernetes docs again! Find the
+[blog post here](https://iterative.ai/blog/mlem-k8s-sagemaker) and be sure to
+[visit the docs](https://mlem.ai/doc/user-guide/deploying)!
+
+## SOC 2 Type 1 Compliance
+
+![Iterative Achieves SOC 2 Type 1
+Compliance](../uploads/images/2022-11-18/soc-2-cover.png 'Iterative Achieves SOC 2
+Type 1 Compliance :wrap-right =250')
+We are very excited to announce that Iterative is now SOC 2 Type 1 compliant.
+This certification signals to our customers our commitment to Security,
+Availability, Processing Integrity, Confidentiality, and Privacy within our
+organization. We have successfully endured the rigorous process and have learned
+much as a team in the process.
+[**Guro Bokum**](https://www.linkedin.com/in/gurobokum/) reviews the five key
+learnings [in this blog piece](https://iterative.ai/blog/SOC-2). You can find
+the full report on our
+[Security and Privacy](https://iterative.ai/security-and-privacy) page.
+
+## Dmitry Petrov at GitHub Universe
+
+On November 8th, our CEO, [**Dmitry Petrov**](https://twitter.com/FullStackML)
+spoke at [GitHub Universe](https://githubuniverse.com/) on _ML with Git:
+experiment tracking in Codespaces._ In his presentation, he shows how to use the
+DVC extension for VS Code and Codespaces to streamline your machine learning
+experimentation process. You can find his video below in the event platform if
+you are registered. We expect the video to be available on YouTube in the next
+of couple months. We'll keep you updated!
+
+![Dmitry Petrov at GitHub Universe](../uploads/images/2022-11-18/gh-universe.jpeg '=800')
+_Dmitry Petrov during his talk, 𝗠𝗟 𝘄𝗶𝘁𝗵 𝗚𝗶𝘁: 𝗲𝘅𝗽𝗲𝗿𝗶𝗺𝗲𝗻𝘁 𝘁𝗿𝗮𝗰𝗸𝗶𝗻𝗴 𝗶𝗻 𝗖𝗼𝗱𝗲𝘀𝗽𝗮𝗰𝗲𝘀_
+
+## Rob de Wit - From Jupyter Notebook to DVC pipeline for reproducible ML experiments
+
+Jupyter Notebooks are great for prototyping, but eventually, you will want to
+move toward reproducible experiments. Converting a notebook to a DVC pipeline
+requires a bit of a mental shift.
+[**Rob de Wit**](https://www.linkedin.com/in/rcdewit/) shows you how to
+accomplish it with an intermediate step: use
+[Papermill](https://papermill.readthedocs.io/en/latest/) to build a one-stage
+DVC pipeline that executes our entire notebook, and use the resulting pipeline
+to run and version ML experiments. Look out for a future post with a more
+advanced pipeline!
+
+![Dvc GIF](https://media.giphy.com/media/wnWvARibI7pykx0mTf/giphy.gif)
+
+## Meetups
+
+At our next meetup on December 14th,
+[**Sami Jawhar**](https://www.linkedin.com/in/sami-jawhar-a58b9849/) will
+present _An Open Discussion of Parallel data pipelines with DVC and TPI_, an
+advanced use case for distributing experiments in the cloud. Sami is a great
+discussion driver. If you are interested in higher-level use cases you will want
+to join the discussion!
+
+<external-link
+href="https://www.meetup.com/machine-learning-engineer-community-virtual-meetups/events/289771497/"
+title="Sami Jawhar on Running Parallel Pipelines with DVC and TPI"
+description="Join us on December 14th for an open discussion on Running Parallel Pipelines with DVC and
+TPI!" link="https://meetup.com" image="../uploads/images/2022-11-18/meetup.png"/>
+
+On January 11th,
+[**Francesco Calcavecchia**](https://www.linkedin.com/in/francescocalcavecchia/)
+will be joining us to share about his recent contribution to MLEM through his
+work on GTO and how this helps him in his work at
+[E.On Energie Deutschland](https://www.eon.de/de/pk.html) with creating a
+Git-based model registry.
+
+<external-link
+href="https://www.meetup.com/machine-learning-engineer-community-virtual-meetups/events/289772002/"
+title="Francesco Calcavecchia on Designing a model Registry with Legacy Systems
+using DVC and GTO" description="Join us on January 11th. Designing a Model
+Registry with Legacy Systems using GTO!" link="https://meetup.com"
+image="../uploads/images/2022-11-18/meetup.png"/>
+
+## Events
+
+### ODSC West
+
+We had a great time at [ODSC West](https://odsc.com/california/)! We had great
+conversations with conferencegoers and attended great sessions! Dmitry had a
+packed room for his in-person talk _Why You Need a GitOps-based Machine Learning
+Model Registry_ and [**Alex Kim**](https://twitter.com/alex000kim) presented
+_CI/CD for Machine Learning_ virtually. At each of the conferences we've
+sponsored this year, we've had a game called Deevee's Ramen Run. (If you don't
+know the Ramen connection, you need to spend more time reading the monthly
+Heartbeats 😉). Below find the top three winners of the game.
+
+![Winners of DeeVees Ramen Run](../uploads/images/2022-11-18/winners.png '=800')
+_Winners 1st - 3rd shown above: Alexandra Hagmeyer (pictured with myself and
+teammate Daniel Barnes), Ryan Renslow, and (name asked to be withheld, but she
+was good with the picture and DeeVee!)_
+
+### MLOps Summit London
+
+We were also part of the
+[MLOps Summit in London](https://london-ml-ops.re-work.co/) only a week later!
+Admittedly, there were different team members in attendance and staffing the
+booth. Aside from attending a variety of great talks, we met many wonderful
+people from all over the world. This resulted in some really interesting
+discussions about how different companies approach MLOps.
+
+Casper da Costa-Luis gave a well-received talk on how to painlessly run ML
+experiments in the cloud with CML at the summit. The recording will be made
+available in the near future, so look out for that! The talk answered at least
+one of the questions of Deevee's Ramen Run, which yielded
+[some surprised (but excited!) winners](https://www.linkedin.com/posts/rebecca-gorringe_machinelearning-iterative-reworkai-activity-6998338419772772353-FUip?utm_source=share&utm_medium=member_desktop)
+this time around.
+
+![Iterative Team at MLOps Summit - London](../uploads/images/2022-11-18/team.png '=800')
+_Iterative Team members, clockwise from top right: Rob de Wit, Gema Parreño
+Piqueras, Casper da Costa-Luis, and Chaz Black)_
+
+### TechWeek
+
+[**Gema Parreño Piqueras**](https://twitter.com/SoyGema) presented at
+[TechWeek](https://www.ambito.com/negocios/tecnologia/comenzo-la-tech-week-latam-y-espana-mas-600-ofertas-empleo-it-n5578240)
+in Spain with her talk _Reproducibilty and Version Control are Important: Follow
+up with the DVC extension for VS Code_. She will be presenting the same talk at
+[Codemotion](https://events.codemotion.com/conferences/online/2022/online-tech-conference-2022-spanish-edition-autumn).
+You can find her talk in Spanish at 2:02 below!
+
+https://youtu.be/zXl9qINlbcI
+
+### Upcoming events
+
+- We will be participating in
+  [Toronto Machine Learning Summit](https://www.torontomachinelearning.com/) -
+  on November 29-30 in Toronto
+- [**Alex Kim**](https://twitter.com/alex000kim) _CI/CD for Machine Learning_
+  for an ODSC Webinar.
+  [Register here.](https://app.aiplus.training/courses/CI-CD-for-Machine-Learning)
+- We will be at [PyData Eindhoven](https://pydata.org/eindhoven2022/) on
+  December 2nd. Come say hi at the booth if you are attending! We have some
+  tickets to give away for the event in
+  [Discord](https://discord.com/channels/485586884165107732/497187456051970048/1036999675951190056).
+  First come first serve!
+- We are sponsoring [NormConf](https://normconf.com/) on December 15th. They
+  will have Slack-based booths there. We are looking forward to supporting this
+  new conference!
+
+Stay tuned to
+[our Newsletter ](https://iterative.ai/#:~:text=Go%20to%20Twitter-,Subscribe,-for%20updates.%20We)
+for what we will be up to conference-wise in 2023!
+
+## ✍🏼 Doc Updates!
+
+![Computer Working GIF](https://media.giphy.com/media/BemKqR9RDK4V2/giphy.gif)
+
+The team has been busy improving the docs for you. See all the latest and
+greatest updates below.
+
+### DVC Docs
+
+- [DVCFileSystem](https://dvc.org/doc/api-reference/dvcfilesystem) -
+  DVCFileSystem provides a pythonic file interface
+  ( [fsspec-compatible](https://filesystem-spec.readthedocs.io/) ) for a DVC
+  repo. It is read-only. DVCFileSystem provides a unified view of all the
+  files/directories in your repository, be it Git-tracked or DVC-tracked, or
+  untracked (in the case of a local repository). It can reuse the files in the
+  DVC cache and can otherwise stream
+  from [supported remote storage](https://dvc.org/doc/command-reference/remote/add#supported-storage-types).
+- We’ve now added
+  [Horizontal bar plots](https://dvc.org/doc/command-reference/plots/show#example-horizontal-bar-plot)
+  to the mix of `dvc plots show` !
+- You can now list contents from supported URLs with `dvc ls-url` Find the
+  description, options, and example code
+  [here.](https://dvc.org/doc/command-reference/list-url)
+- Based on some feedback we reorganized the
+  [User Guide](https://dvc.org/doc/user-guide/overview) to help you better
+  navigate. Let us know what you think!
+- Similarly, we reorganized the
+  [DVCLive documentation](https://dvc.org/doc/dvclive) for better navigation.
+
+### CML docs
+
+- In CML you can now publicly self-host images with `cml comment`. Find the
+  options [here.](https://cml.dev/doc/ref/comment#--publish)
+- Also, we’ve updated the
+  [self-hosted runners](https://cml.dev/doc/self-hosted-runners) docs in CML.
+- We've now added a guide for bringing your data to GitLab using DVC. Find the
+  details [in this doc.](https://cml.dev/doc/cml-with-dvc?tab=GitLab)
+
+### MLEM docs
+
+- [MLEM docs](https://mlem.ai/doc) have received a nearly full overhaul.
+- Additionally the [Get Started](https://mlem.ai/doc/get-started) section has
+  been greatly improved.
+- Look out for new docs to come out soon for
+  [GTO](https://github.com/iterative/gto) on the [MLEM](https://mlem.ai/doc)
+  website.
+
+### Iterative Studio docs
+
+- DataChain Studio now supports adding a model from a remote location in
+  Iterative Studio. Find out more
+  [here](https://dvc.org/doc/studio/user-guide/model-registry/add-a-model).
+- Use the new Iterative Studio Wizard to set up CML in your CI. More on the
+  process and parameters
+  [here in the docs.](https://dvc.org/doc/studio/user-guide/projects-and-experiments/run-experiments#use-the-iterative-studio-wizard-to-set-up-your-ci-action)
+
+---
+
+_Have something great to say about our tools? We'd love to hear it! Head to
+[this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+to record or write a Testimonial! Join our
+[Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2022-12-07-gto-model-registry.md b/content/blogs/2022-12-07-gto-model-registry.md
new file mode 100644
index 0000000000..d593af4ebb
--- /dev/null
+++ b/content/blogs/2022-12-07-gto-model-registry.md
@@ -0,0 +1,375 @@
+---
+title: Building a GitOps ML Model Registry with DVC and GTO
+date: 2022-12-07
+description: >
+  Got your data and model versioning down? ✅ Learn how to take your projects to
+  the next level by creating a model registry right in your project's Git repo
+descriptionLong: >
+  A model registry is a tool to catalog ML models and their versions. Models
+  from your data science projects can be discovered, tested, shared, deployed,
+  and audited from there. Learn how to build a model registry in a DVC Git repo
+  without involving any extra services, integrations and APIs.
+picture: 2022-12-07/drawing-owl-step-by-step.jpg
+pictureComment:
+  Header image generated by [Dall-E 2](https://openai.com/dall-e-2/)
+author: aguschin
+commentsUrl: https://discuss.dvc.org/t/building-a-gitops-ml-model-registry-with-dvc-and-gto/1405
+tags:
+  - Model Registry
+  - Machine Learning
+  - Data Science
+  - Open Source
+  - GitOps
+  - Tutorial
+  - GTO
+  - Release
+---
+
+Machine Learning is iterative in its nature. Similar to developing software,
+you’re going to have many different versions of your models, improving them step
+by step (such as `v0.1.0`, `v0.2.0`, etc). To keep track of model development,
+trigger checks, and deployments, and know which versions are in production and
+which are stuck in staging (both right now and retrospectively), ML specialists
+organize models' lifecycles using Model Registries.
+
+## The Pluses and Minuses of Model Registries
+
+While model registries solve operational issues, many solutions come at a cost.
+Model Registries often introduce a separate software stack that must be learned,
+integrated with, and maintained. For example, if you keep your model training
+code in Git, train your models with CI/CD, and use CI/CD to deploy them,
+introducing a separate service in the middle of the process breaks the flow and
+forces you to leave your code versioning ecosystem (Git + GitHub for example).
+This happens when we add more and more systems and services that all try to be
+the center of attention. A good example is working with MLFlow or SageMaker as a
+model registry - there’s a feeling it’s always “in the way” of the Git-based
+development workflow.
+
+## Our Git-based Solution to Model Registry
+
+To help you with that, we developed a CLI tool named
+[GTO](https://github.com/iterative/gto). The tool is very simple - it organizes
+Model Registry in your Git repo using Git tags and a file called
+`artifacts.yaml`. Welcome to this short tutorial on how to do just that - and
+it's simpler than you might think.
+
+Before we start, let’s take a look at
+[**Studio Model Registry**](https://iterative.ai/model-registry), which provides
+a nice UI dashboard on top of GTO-managed registries:
+
+![Iterative Studio Model Registry](../uploads/images/2022-12-07/iterative-studio-model-registry.png)
+The model dashboard above has three models from a single Git repo (we’ll add
+another one in a minute).
+[Git tags](https://github.com/iterative/demo-bank-customer-churn/tags) in this
+repo represent the version registrations (such as `v2.0.1` or `v1.0.1`) and
+stage assignments (like `dev`, `prod`, and `staging`) done by team members
+(assigning `v1.0.0` to `dev` signals the version is ready to be deployed to the
+`dev` environment and can trigger that deployment directly).
+
+<admon type="tip">
+
+Take a look around in our
+[demo Model Registry](https://studio.datachain.ai/team/Iterative/models) to get
+a feel for Iterative Studio's Model Registry features.
+
+</admon>
+
+GTO provides a simplistic representation of the same from CLI, thus accessible
+from a terminal and friendly for a developer:
+
+```cli
+$ gto show --repo https://github.com/iterative/demo-bank-customer-churn
+╒════════════════════╤══════════╤════════╤═════════╤══════════╕
+│ name               │ latest   │ #dev   │ #prod   │ #stage   │
+╞════════════════════╪══════════╪════════╪═════════╪══════════╡
+│ randomforest-model │ v2.0.0   │ v2.0.0 │ v1.0.0  │ -        │
+│ xgboost-model      │ v1.0.1   │ -      │ -       │ v1.0.0   │
+│ lightgbm-model     │ v2.0.3   │ v2.0.3 │ v2.0.0  │ v2.0.0   │
+╘════════════════════╧══════════╧════════╧═════════╧══════════╛
+```
+
+Notice that GTO works with a single repo at a time - that’s why we need to
+specify the `--repo` argument, while Studio aggregates your models from multiple
+projects and repositories you add to it.
+
+For this tutorial, we'll pick a simple project with no models registered yet, to
+demonstrate adding a model registry on top of an existing ML project. We'll take
+https://github.com/iterative/example-get-started, which is an example DVC
+project. We won’t get into details about DVC, but if you’re new to it, you can
+check out [DVC Get Started](https://dvc.org/doc/start). Revisit the example
+project before we start to get a quick picture of it if you wish.
+
+The project trains a natural language processing (NLP) binary classifier
+predicting tags for a given StackOverflow question. It uses DVC Pipelines to
+connect raw text preprocessing and model training, producing an ML model stored
+in the `model.pkl`. The `main` branch has a model version we can consider as the
+first version, while the branch `try-large-dataset` is a promising experiment
+that we’d like to mark as the second version and assign to the `dev` stage to
+trigger a deployment.
+
+To start, we need to
+[fork the repo](https://github.com/iterative/example-get-started/fork), since
+we’re going to make some changes to it. Note that you need to uncheck "Copy the
+`main` branch only" because we'll be using the `try-large-dataset` branch as
+well:
+
+![fork](../uploads/images/2022-12-07/fork-uncheck.png '=800')
+
+To use GTO from CLI, we'll set up a Python virtual environment:
+
+```cli
+$ python -m venv .venv
+$ source .venv/bin/activate
+$ pip install gto
+```
+
+To remove some friction, we won’t clone the repo locally. This will save us from
+running `commit` and `push` to update the remote repo, and GTO will do that for
+us.
+
+## Registering a model version
+
+In the repo, we have an already trained ML model saved as `model.pkl`. The file
+itself resides in an AWS S3 bucket and is tracked with DVC. One of the versions
+of that model can be found in the HEAD of the `main` branch. Let’s register the
+very first version of it - [`v0.0.1`](https://semver.org/). Since we’ll be using
+our remote repo many times here, we'll set a shell var `$REPO` to store the URL.
+
+```cli
+$ REPO=https://github.com/{user}/example-get-started
+$ gto register classifier --repo $REPO
+Created git tag 'classifier@v0.0.1' that registers a version
+Running `git push origin classifier@v0.0.1`
+Successfully pushed git tag classifier@v0.0.1 on remote.
+```
+
+Now the model is called `classifier` in our registry and the `v0.0.1` version is
+registered in the tip of the `main` branch.
+
+Since the repo we're working with is a remote one, GTO pushes a tag to the repo
+automatically. With a local repo, you will need to run `git push` yourself
+(although you can make GTO do that by providing a `--push` argument). This
+workflow should be familiar to DVC and Git users - making changes locally and
+then pushing them to remote with an additional command.
+
+Now we can see the model dashboard of our registry:
+
+```cli
+$ gto show --repo $REPO
+╒════════════╤══════════╕
+│ name       │ latest   │
+╞════════════╪══════════╡
+│ classifier │ v0.0.1   │
+╘════════════╧══════════╛
+```
+
+Remember, that we only see a single `classifier` model because GTO works with a
+single repo and the models we’ve seen above were from another repository (notice
+the `--repo` argument).
+
+A common case is to use a model registry as a source of truth to pull models for
+experimentation locally or in CI for deployments. Note that for now we manually
+provide the path to the model (`model.pkl`) and Git revision to use
+(`classifier@v0.0.1`). We’ll learn how to dynamically set them up using GTO in
+the next sections.
+
+```cli
+$ dvc get $REPO model.pkl --rev classifier@v0.0.1 -o model.pkl
+```
+
+## Adding optional model metadata
+
+To skip hardcoding a model path in our scripts or writing model description
+somewhere in the notebook, we need to store metadata about the model in the repo
+itself. Unlike the Git tag, we created to register a version, GTO stores
+metadata in a file, which requires us to create a commit. This allows us to have
+different paths or descriptions in different commits and branches, which can be
+useful if you’ll be updating your model significantly or changing the structure
+of your repo. Since the model is not annotated right now, let’s add that
+information to the new model version in the `try-large-dataset` branch that
+[increased ROC AUC of the model](https://studio.datachain.ai/team/Iterative/projects/example-get-started-zde16i6c4g).
+Later we can merge this to `main` to update the annotation there:
+
+```cli
+$ gto annotate classifier\
+		--repo $REPO \
+		--rev try-large-dataset \
+		--path model.pkl \
+		--description "Simple text classification model"
+		--type model
+Updated `artifacts.yaml`
+Running `git commit` and `git push`
+Successfully pushed a new commit to remote.
+```
+
+This creates an `artifacts.yaml` file with the following contents in the
+`try-large-dataset` branch:
+
+```yaml
+classifier:
+  path: model.pkl
+  description: Simple text classification model
+  type: model
+```
+
+## Registering another version
+
+Since GTO allows you to build any kind of registry, including dataset registry,
+model registry, or a mix of both, to distinguish between different artifact
+types (e.g. a `dataset` and a `model`), it’s good to specify `type` while
+annotating. This will also hint to Studio that `classifier` is a `model` so
+Studio could display it in Studio Model Registry.
+
+Let’s register a new version in the `try-large-dataset` branch:
+
+```cli
+$ gto register classifier \
+		--repo $REPO \
+		--rev try-large-dataset
+Created git tag 'classifier@v0.0.2' that registers version
+Running `git push origin classifier@v0.0.2`
+Successfully pushed git tag classifier@v0.0.2 on remote.
+```
+
+Checking the updated model dashboard:
+
+```cli
+$ gto show --repo $REPO
+╒════════════╤══════════╕
+│ name       │ latest   │
+╞════════════╪══════════╡
+│ classifier │ v0.0.2   │
+╘════════════╧══════════╛
+```
+
+The latest version of `classifier` is now `v0.0.2`.
+
+To download the model and use it locally, now we can let GTO resolve the path
+from the value stored in `artifacts.yaml`, and download it using DVC: script and
+can use the value stored in the repo:
+
+```cli
+$ REVISION=classifier@v0.0.2
+$ MODEL_PATH=$(gto describe classifier $REPO --rev $REVISION --path)
+$ dvc get $REPO $MODEL_PATH --rev $REVISION -o $MODEL_PATH
+```
+
+## Assigning stages to deploy a model
+
+Now, we have two registered versions of our model: `v0.0.1` and `v0.0.2`. How do
+we get one of them into production? To signal the model version is ready to be
+used in some environment, we can assign it to a stage:
+
+```cli
+$ gto assign classifier \
+    --repo $REPO \
+    --version v0.0.2 \
+    --stage dev
+Created Git tag 'classifier#dev#1' that assigns stage
+Running `git push origin classifier#dev#1`
+Successfully pushed git tag classifier#dev#1 on remote.
+```
+
+To actually start the deployment process, we'll need to set up a CI/CD that can
+be triggered by pushing a Git tag. We'll discuss this in the next section.
+
+Now the model dashboard will be updated with the newly assigned `dev` stage:
+
+```cli
+$ gto show --repo $REPO
+╒════════════╤══════════╤════════╕
+│ name       │ latest   │ #dev   │
+╞════════════╪══════════╪════════╡
+│ classifier │ v0.0.2   │ v0.0.2 │
+╘════════════╧══════════╧════════╛
+```
+
+When running `gto show` for a specific model, we will get all of its registered
+versions. Notice that the stage is marked at the latest version that was
+assigned to it - to signal the currently deployed model version in that stage:
+
+```
+$ gto show classifier --repo $REPO
+╒════════════╤═══════════╤═══════════╤═══════════════════╕
+│ artifact   │ version   │ stage     │ ref               │
+╞════════════╪═══════════╪═══════════╪═══════════════════╡
+│ classifier │ v0.0.2    │ dev       │ classifier@v0.0.2 │
+│ classifier │ v0.0.1    │           │ classifier@v0.0.1 │
+╘════════════╧═══════════╧═══════════╧═══════════════════╛
+```
+
+Having dozens of models, it’s easier to automate figuring out what versions are
+currently assigned to stages. For that, we can use a variation of the `show`
+command. To download the `classifier` version in `dev`:
+
+```cli
+$ REVISION=$(gto show classifier#dev --repo $REPO --ref)
+$ MODEL_PATH=$(gto describe classifier --repo $REPO --rev $REVISION)
+$ dvc get $REPO $MODEL_PATH --rev $REVISION -o $MODEL_PATH
+```
+
+## Starting CI/CD for new versions and assignments
+
+CI/CD is a common way to set up some automation - including building your models
+into Docker images or deploying them to Kubernetes or SageMaker. Since new
+versions and stage assignments are implemented using Git tags, they can
+automatically kick off CI/CD process that you can set up with
+[GitHub Actions](https://docs.github.com/en/actions) or any other CI/CD tool,
+allowing you to programmatically react with actions you would like to perform.
+
+Showing a full CI/CD example is worthy of a dedicated blog post, so we’ll save
+it for another time. If you want to see how it works, there are two examples in
+the [GTO example repo](https://github.com/iterative/example-gto/actions). The
+one in the `main` branch
+[shows how to parse a Git tag](https://github.com/iterative/example-gto/blob/main/.github/workflows/gto-act-on-tags.yml)
+to react on new versions and stage assignments differently, while the other in
+the `mlem` branch explains
+[how to deploy your model in a single line](https://github.com/iterative/example-gto/blob/mlem/.github/workflows/deploy-model-with-mlem.yml)
+with
+[MLEM](https://github.com/iterative/example-gto/blob/mlem/.github/workflows/deploy-model-with-mlem.yml).
+
+## Taking a high-level look at our Model Registry
+
+We just learned how to register semantic model versions, assign stages to them,
+and employ CI/CD to act on those, all using a GitOps approach. Used together
+with DVC, this allows us to accomplish the main use cases for a powerful model
+registry, while not introducing any extra services and staying inside a Git
+Repo.
+
+As we saw above, GTO works within a single repo and requires you to work in CLI.
+To lift these limitations, we introduced Iterative Studio Model Registry which,
+in a nutshell, is a friendly UI that allows you to work with GTO artifacts
+gathered from multiple repositories. This is what
+[Studio Model Registry](https://studio.datachain.ai) will look like if you log
+in and add the repo:
+
+![Iterative Studio Model Registry](../uploads/images/2022-12-07/iterative-studio-model-registry-2.png)
+
+Besides the `classifier` model that we just registered, you can also see three
+other models from our example `demo-bank-customer-churn` repo.
+
+Behind the scenes,
+[Iterative Studio just uses GTO API](https://dvc.org/doc/studio), so there are
+no new magic tricks here (and you can also use GTO API from your automation
+Python code if you wish). Feel free to play around to register more versions,
+assign stages or annotate the other models you have, and see how Studio can help
+you track model lineage, audit events, and connect model versions to DVC
+experiments.
+
+## What’s next?
+
+Check out [GTO docs](https://mlem.ai/doc/gto/) to learn more about the tool and
+ask us questions in [Discord] - we’re happy to help you!
+
+Take a look at our
+[public Model Registry](https://studio.datachain.ai/team/Iterative/models) so
+you can see for yourself how Iterative Studio puts together a Git based Model
+Registry experience.
+
+Share your feedback in [Discord] or
+[GitHub issues](https://github.com/iterative/gto/issues) to help us build an
+open-source Model Registry on top of Git, so you can stick to an existing
+software engineering stack. No more divide between ML engineering and
+operations!
+
+[discord]: https://discord.com/channels/485586884165107732/903647230655881226
diff --git a/content/blogs/2022-12-15-exp-tracking-dvc-python.md b/content/blogs/2022-12-15-exp-tracking-dvc-python.md
new file mode 100644
index 0000000000..2a7bd155c2
--- /dev/null
+++ b/content/blogs/2022-12-15-exp-tracking-dvc-python.md
@@ -0,0 +1,228 @@
+---
+title: 'Instant Experiment Tracking: Just Add DVC!'
+date: 2022-12-15
+description: Experiment tracking in DVC with a few lines of Python.
+descriptionLong: |
+  Start with some simple ingredients (DVC, Git, and Python) and add a few lines
+  to your code using the included DVCLive logging library to make an experiment
+  tracker right inside your development environent. Voilà!
+picture: 2022-12-15/header-exp-tracking-dvc-python.png
+pictureComment:
+  Header image generated by [Dall-E 2](https://openai.com/dall-e-2/)
+commentsUrl: https://discuss.dvc.org/t/feature-release-instant-experiment-tracking-just-add-dvc/1415
+authors:
+  - dave_berenbaum
+tags:
+  - Experiment Tracking
+  - DVC
+  - DVCLive
+  - VS Code
+  - Git
+  - Data Science
+  - Machine Learning
+  - Open Source
+  - Release
+---
+
+Did you know that DVC can track experiments? Now you can track experiments in
+DVC by changing a few lines of your Python code.
+
+And with the optional [DVC extension for VS Code], you have a full-fledged
+experiment tracking interface in your IDE!
+
+<toggle>
+
+<tab title="DVC extension for VS Code">
+
+<video controlslist="nodownload" preload="metadata" muted controls style="width:100%;"><source
+src="../uploads/images/2022-12-15/dvclive_exp_tracking.mp4" type="video/mp4"/>
+Your browser does not support the video tag. </video>
+
+</tab>
+
+<tab title="Notebook">
+
+<video controlslist="nodownload" preload="metadata" muted controls style="width:100%;"><source
+src="../uploads/images/2022-12-15/dvclive_exp_tracking_cli.mp4" type="video/mp4"/>
+Your browser does not support the video tag. </video>
+
+</tab>
+
+</toggle>
+
+# Why?
+
+We want to bring the DVC ethos to experiment tracking, but the learning curve
+for DVC can be steep. That's why we built our Python logging library [DVCLive]
+to make it easy to start.
+
+![](../uploads/images/2022-12-15/another_exp_tracker.png) _source:
+https://twitter.com/untitled01ipynb/status/1593911944989270016_
+
+All you need to start is a Git repo. There are no logins, servers, databases, or
+UI to spin up. Every experiment run is saved in a Git commit, but those commits
+are hidden so they don't clutter your repo, unlike saving each run to a separate
+directory, or creating a Git branch for each.
+
+From that simple starting point, DVC experiment tracking grows with your
+project. You don't have to decide today whether you will need to share with your
+team or backup to cloud storage. That's because DVC builds on top of the tools
+you already use and allows you to incrementally integrate them.
+
+When you need to
+[share](https://dvc.org/doc/user-guide/experiment-management/sharing-experiments),
+push existing experiments to your Git provider (GitHub/GitLab). When you need
+artifact
+[storage](https://dvc.org/doc/start/data-management/data-versioning#storing-and-sharing),
+add your own cloud provider and push your existing artifacts. When you need a
+UI, use VS Code or add [Iterative Studio] for a collaborative interface.
+
+# How to start
+
+Check out the example [repo], try it out in a [colab notebook], or follow the
+steps below to start with your own model training code.
+
+1. Install DVC>=2.38.0 as a library in your Python environment.
+
+   ```cli
+   $ pip install --upgrade dvc
+   ```
+
+2. Setup a DVC repo where your model training code is (or use an existing repo).
+
+   ```cli
+   $ git init
+   $ dvc init
+   $ git add -A
+   $ git commit -m "setup dvc repo"
+   ```
+
+3. In your code, enable DVC experiment tracking using [DVCLive] with
+   `save_dvc_exp=True`. Use the callback for your framework or log your own
+   metrics. You can find examples below
+   ([other frameworks available](https://dvc.org/doc/dvclive/api-reference/ml-frameworks)):
+
+<toggle>
+
+<tab title="Pytorch Lightning">
+
+```python
+from dvclive.lightning import DVCLiveLogger
+
+...
+
+trainer = Trainer(logger=DVCLiveLogger(save_dvc_exp=True))
+trainer.fit(model)
+```
+
+</tab>
+
+<tab title="Hugging Face">
+
+```python
+from dvclive.huggingface import DVCLiveCallback
+
+...
+
+trainer.add_callback(DVCLiveCallback(save_dvc_exp=True))
+trainer.train()
+```
+
+</tab>
+
+<tab title="Keras">
+
+```python
+from dvclive.keras import DVCLiveCallback
+
+...
+
+model.fit(
+  train_dataset, validation_data=validation_dataset,
+  callbacks=[DVCLiveCallback(save_dvc_exp=True)])
+```
+
+</tab>
+
+<tab title="General Python API">
+
+```python
+from dvclive import Live
+
+with Live(save_dvc_exp=True) as live:
+    live.log_param("epochs", NUM_EPOCHS)
+
+    for epoch in range(NUM_EPOCHS):
+        train_model(...)
+        metrics = evaluate_model(...)
+        for metric_name, value in metrics.items():
+            live.log_metric(metric_name, value)
+        live.next_step()
+```
+
+</tab>
+
+</toggle>
+
+4\. Run your code and track the experiment results.
+
+<toggle>
+
+<tab title="DVC extension for VS Code">
+
+![](../uploads/images/2022-12-15/dvclive_exp_tracking.png)
+
+</tab>
+
+<tab title="Command line">
+
+```cli
+# Show the experiments table in the terminal.
+$ dvc exp show
+ ────────────────────────────────────────────────────────────────────────────────────
+  Experiment                 Created        train_loss   epoch   step   encoder_size
+ ────────────────────────────────────────────────────────────────────────────────────
+  workspace                  -                0.020196       4    500   512
+  main                       Dec 06, 2022            -       -      -   -
+  ├── c1759a5 [quare-foil]   08:55 PM         0.020196       4    500   512
+  ├── affedee [bitty-tass]   08:55 PM          0.02038       4    500   256
+  ├── a5bdc18 [murky-emeu]   08:55 PM         0.016396       4    500   128
+  ├── 744f3b6 [sworn-wage]   08:54 PM          0.01972       4    500   64
+  └── 0c3ac81 [named-gaby]   08:54 PM         0.031206       4    500   32
+ ────────────────────────────────────────────────────────────────────────────────────
+
+# Plot the diff of all experiments in an HTML file.
+$ dvc plots diff $(dvc exp list --name-only)
+file:///Users/dave/Code/dvclive-exp-tracking/dvc_plots/index.html
+```
+
+Open the HTML to see the plots:
+
+![](../uploads/images/2022-12-15/dvclive_exp_tracking_plots_diff.svg '=500')
+
+</tab>
+
+</toggle>
+
+# Stay tuned
+
+That's all there is to it! There's lots more coming for DVC experiment tracking,
+including:
+
+- **Showing you where to go from here**. Share your experiments, add data or
+  pipelines, and use DVC without ever leaving your notebook or Python IDE.
+
+- **Adding more DVCLive features**. Share realtime updates to [Iterative
+  Studio], log data and model artifacts, and compare experiments in Python.
+
+Try out the [repo] or [colab notebook] and let us know what you think in
+[Discord](https://discordapp.com/invite/dvwXA2N) or
+[GitHub](https://github.com/iterative/dvc/issues).
+
+[iterative studio]: https://studio.datachain.ai
+[dvc extension for vs code]:
+  https://marketplace.visualstudio.com/items?itemName=Iterative.dvc
+[dvclive]: https://dvc.org/doc/dvclive
+[repo]: https://github.com/iterative/dvclive-exp-tracking
+[colab notebook]:
+  https://colab.research.google.com/drive/1VKEBdSgFdEjg-k6FqNXX-0o83QWcpmN_?usp=sharing
diff --git a/content/blogs/2022-12-16-december-2022-heartbeat.md b/content/blogs/2022-12-16-december-2022-heartbeat.md
new file mode 100644
index 0000000000..aabf6652d9
--- /dev/null
+++ b/content/blogs/2022-12-16-december-2022-heartbeat.md
@@ -0,0 +1,286 @@
+---
+title: December '22 Heartbeat
+date: 2022-12-16
+description: >
+  Monthly updates are here! Great content from the Community, including a DVC
+  extension for VS Code tutorial, sweet MLOps guide, framework agnostic ml
+  pipeline with DVC and non-Python apps to integrate ML models with MLEM.
+  Welcome to December!
+
+descriptionLong: |
+  This month you will find:
+
+    🦮 MLOps Guide,
+
+    🧪 DVC extension for VS Code Experimentation,
+    
+    🌌 A Fable about MLOps,
+
+    ❣️ Unstructured Data Query Language coming,
+
+    📈 DVC Live Experiment Tracking,
+
+    🚀 GTO GitOps model registry tutorial,
+
+    👀 New CML Commands, and more!
+picture: 2022-12-16/december-cover.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/december-22-heartbeat/1407
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - MLEM
+  - Rust
+  - DataChain Studio
+---
+
+<admon type="tip">
+
+Unlike most of the text you've read over the past two weeks, this Heartbeat was
+100% human generated. 😉
+
+</admon>
+
+Welcome to December! Wow, what a year! We introduced an online course, added
+five new tools (TPI, GTO, MLEM, DVC Extension for VS Code, and a Model Registry
+in Iterative Studio) plus tons of new features to DVC, CML, and Iterative
+Studio. We also were thrilled to emerge from the pandemic and meet so many of
+you in person at conferences around the world. We are excited about what's in
+store for 2023, and we thank you all for being such fantastic community members.
+While there are still challenging events happening around the globe, there is
+much to be thankful for and victories to celebrate! Bring on 2023!
+
+![Believe Jason Sudeikis GIF by Apple TV](https://media.giphy.com/media/DEZA7FlHbMesUF1jm9/giphy.gif)
+
+## From the Community
+
+### MLOps Guide
+
+For their engineering final project at [Insper,](https://www.insper.edu.br/en/)
+[**Arthur Olga**](https://github.com/arthurolga), [**Gabriel Monteiro**](https://github.com/gabriellm1), [**Guilherme Leite**](https://github.com/guipleite),
+ and [**Vinicius Lima**](https://github.com/ViniGl) created the
+[MLOps Guide](https://mlops-guide.github.io/), which provides a Complete MLOps
+development cycle using DVC, CML, and IBM Watson. The multi-page guide covers
+the principles of MLOps as well as a full tutorial for building an MLOps
+environment. It covers data and model versioning, feature management and
+storing, automation of pipelines and processes, CI/CD for machine learning, and
+continuous monitoring of models. The guide uses both DVC and CML and includes
+videos outlining the project and much of the coding, as well as a project
+repository that you can work through.
+
+![MLOps Guide](../uploads/images/2022-12-16/DiagramMLOPs.png '=800') _MLOps
+Guide ([Source link](https://mlops-guide.github.io/))_
+
+### Turn VS Code Into a One-Stop Shop for ML Experiments
+
+[**Eryk Lewinson**](https://www.linkedin.com/in/eryklewinson/) wrote a fabulous,
+[in-depth tutorial](https://towardsdatascience.com/turn-vs-code-into-a-one-stop-shop-for-ml-experiments-49c97c47db27)
+on experiment tracking using our new DVC Extension for VS Code. He starts off
+with, “One of the biggest threats to productivity in recent times is context
+switching.” As a Community Manager, I can so relate! 😅 He posits that the
+extension is a great way to both code our experiments and evaluate and compare
+them happily in our IDE, without having to jump back and forth between
+platforms.
+
+![DVC Extension for VS Code Experiment Tracking](../uploads/images/2022-12-16/eryk-lewinson.gif)
+
+Eryk uses a credit card risk dataset and project to show most of the
+capabilities of the
+[DVC Extension for VS Code](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc)
+and take us through all the steps to show the entire workflow and the resulting
+project structure. He notes the best points of the extension are its experiment
+bookkeeping with an emphasis on reproducibility and its extended plotting
+capabilities including live plotting to visualize model performance while the
+model is still being trained. He goes over some tricks and functionality of the
+extension as well.
+
+### A Fable About MLOps...and Broken Dreams
+
+![A Fable About MLOps...And Broken Dreams](../uploads/images/2022-12-16/alex-burlacu.png '=800')
+_A Fable About MLOps... and Broken Dreams
+([Source link](https://alexandruburlacu.github.io/posts/2022-11-22-mlops-fable))_
+
+[**Alex Burlacu**](https://www.linkedin.com/in/alexandru-burlacu) tells a great
+story and provides many tips on his experience in MLOps
+[in this piece](https://alexandruburlacu.github.io/posts/2022-11-22-mlops-fable)
+on his blog called _A Fable About MLOps... and Broken Dreams_. The tale is
+likely all too familiar to many of you in our Community in addition to being
+validating and entertaining to read. He offers some great prerequisites for
+beginning your MLOps journey including quickly finding and accessing your data,
+seeding that model training code, and recording your experiment configuration.
+Last of these he recommends MLFlow, but as the previous summary from Eryk points
+out, this can be done very effectively with the new DVC extension AND be truly
+fully reproducible. 🤗
+
+Generally, he recommends starting early and starting small with MLOps. More
+technically, he recommends a simple data collection and discovery system, data
+versioning with DVC, replicable experiments, experiment tracking, ML serving,
+testing, and CI/CD. It's all great advice and fun to read!
+
+### ML Pipeline Decoupled - I managed to write a framework-agnostic ml pipeline with DVC, Rust, and Python
+
+![Framework Agnostic ML Pipeline with DVC, Rust and Python](../uploads/images/2022-12-16/mr-data-psycho.png 'Rob Toews bets on languge over
+images :wrap-left =300')
+[**Sheikh Samsuzzhan Alam, aka Mr. Data Psycho**,](https://www.linkedin.com/in/mr-data-psycho/)
+writes
+[this great piece](https://towardsdev.com/ml-pipeline-decoupled-i-managed-to-write-a-framework-agnostic-ml-pipeline-with-dvc-rust-python-287de68104c9)
+that reminds us that DVC is language agnostic! While Python is the most popular
+language used in Data Science and with DVC, there are some instances where you
+may want to use languages such as Rust to speed up memory efficiency and offer a
+faster solution for parts of your project. The good news is you can! Mr. Data
+Psycho extols the virtues of DVC’s pipelining feature and shows how to use Rust
+(Polars) as a pre-processing framework, Sci-kit Learn for model training, and
+the rest in Python. Using the yaml files, each stage could be put together using
+dependencies written in whatever language your heart desires! You can find the
+repo for the project [here](https://github.com/DataPsycho/mlpipeline-with-dvc).
+R users may be interested in this related content
+[here](https://github.com/jcpsantiago/dvthis),
+[here,](https://www.youtube.com/watch?v=NwUijrm2U2w&t=2s) and
+[here](https://iterative.ai/blog/r-code-and-reproducible-model-development-with-dvc).
+
+### Digital Cheatsheet for DVC
+
+If you’d like an online CheatSheet for DVC you can find one
+[here](https://cheat.sh/dvc) created by
+[**Igor Chubin**](https://twitter.com/igor_chubin). Pick a command from the
+drop-down menu and bam 💥, you’ve got the info you need! It’s very cool, but do
+always remember to check our docs [here](https://dvc.org/doc),
+[here](https://cml.dev/doc), and [here](https://mlem.ai/doc); we are always
+updating them!
+
+![DVC Cheat sheet](../uploads/images/2022-12-16/cheatsheet.png '=800') _DVC
+Cheat Sheet ([Source link](https://cheat.sh/dvc))_
+
+### Akvelon enables non-Python apps to integrate machine learning models with MLEM
+
+[**Aleksandr Dudko**](https://www.linkedin.com/in/aleksandr-dudko-bb475476/),
+[**Anatoly Bolshakov**](https://www.linkedin.com/in/anatolii-bolshakov-9a25b2199/),
+[**Denis Nosov**](https://www.linkedin.com/in/denis-nosov/), and
+[**Vladimir Krestov**](https://www.linkedin.com/in/vladimir-krestov-4873391ba/),
+of [Akvelon,](https://akvelon.com/) wrote
+[this great tutorial](https://akvelon.com/akvelon-enables-non-python-apps-to-integrate-machine-learning-models-with-mlem/)
+on using MLEM to make the process of integrating, packaging, and deploying
+machine learning models much easier. In the tutorial, they show how to do this
+with Akvelon’s .NET and Java clients for use in existing or new Web (ASP.Net,
+Java Spring), Mobile (Xamarin, Android), and Desktop (WPF, WinForms, Java
+Spring, Java Spring). Explore the project directory
+[here.](https://github.com/akvelon/MLEM-SDK-for-Java)
+
+![Akvelon enables non-Python apps to integrate machine learning models with MLEM](../uploads/images/2022-12-16/akvelon.png '=800')
+_Akvelon enables non-Python apps to integrate machine learning models with MLEM
+([Source link](https://akvelon.com/akvelon-enables-non-python-apps-to-integrate-machine-learning-models-with-mlem/))_
+
+# Company News
+
+![Awesome Thats Lit GIF by Samsung Austria](https://media.giphy.com/media/LdBroIIcAdoj8NuG6Q/giphy.gif)
+
+## DVC Live Experiment Tracking
+
+We’ve been listening to the greater Community and know you’d like to see easier
+experiment tracking from DVC and we’re on it!
+[The latest release of DVCLive](https://iterative.ai/blog/exp-tracking-dvc-python?tab=DVC-extension-for-VS-Code)
+helps bring that goal to fruition. Now you can track your experiments with only
+a couple of lines of code directly from your notebook or your .py file. You can
+start with just a repo with Git and DVC initialized, using your existing tools;
+eliminating the need for a hosted solution or setting up a server or database.
+Keep track of all the metadata related to the experiment in your Git provider of
+choice (GitHub/GitLab), and your cloud storage, and share with your team when
+you are ready. In addition, you can use Iterative Studio to share the results of
+your experiments with teammates.
+
+![Ariel Biller Experiment Tracking meme](../uploads/images/2022-12-16/ariel-biller.jpeg '=400')
+_Ariel Biller's Experiment Tracking meme
+([Source link](https://twitter.com/untitled01ipynb/status/1593911944989270016?s=20&t=h0rvf7Bi7ikf9E3hna4vYw))_
+
+## New Unstructured Data Query Language
+
+Do you use Amazon S3, Azure Blob Storage, or Google Cloud Storage? We have a new
+solution for finding and managing your datasets of unstructured data like
+images, audio files, and PDFs! Extend your DVC environment with the first
+unstructured data query language (think SQL -> DQL) for machine learning. We are
+looking for beta customers for this new tool.
+
+[Schedule a meeting with us](https://calendly.com/gtm-2/iterative-datamgmt-overview)
+if that's what you're needing!
+
+![Unstructured Data Query Language from the makers of DVC](../uploads/images/2022-12-16/dvc-cloud.png '=800')
+_Unstructured Data Query Language Prototype_
+
+## GTO Tutorial on the Blog
+
+A model registry is a tool to catalog ML models and their versions. Models from
+your data science projects can be discovered, tested, shared, deployed, and
+audited from there. Learn how to build a model registry in a DVC Git repo
+without involving any extra services, integrations, and APIs in
+[this new post](https://iterative.ai/blog/gto-model-registry) from
+[**Alex Guschin**](https://www.linkedin.com/in/1aguschin/)!
+
+![Building a GitOps ML Model Registry with DVC and GTO](../uploads/images/2022-12-16/drawing-owl-step-by-step.jpg)
+
+## Next Meetup
+
+On January 11th,
+[**Francesco Calcavecchia**](https://www.linkedin.com/in/francescocalcavecchia/)
+will be joining us to share about his recent contribution to MLEM through his
+work on GTO and how this helps him in his work at
+[E.On Energie Deutschland](https://www.eon.de/de/pk.html) with creating a
+Git-based model registry.
+
+<external-link
+href="https://www.meetup.com/machine-learning-engineer-community-virtual-meetups/events/289772002/"
+title="Francesco Calcavecchia on Designing a model Registry with Legacy Systems
+using DVC and GTO" description="Join us on January 11th. Designing a Model
+Registry with Legacy Systems using GTO!" link="https://meetup.com"
+image="../uploads/images/2022-12-16/meetup.png"/>
+
+## Flappy DeeVee
+
+Our global, all-remote team works hard, but we also have fun! We have a weekly
+All-Hands meeting where our teams report progress via pre-recorded video so that
+everyone can be prepared to discuss the topic during the meeting.
+
+As we all level up our video production skills, the videos have started to get
+more fun!
+[**Jesper Svendsen**](https://www.linkedin.com/in/jesper-svendsen-10892b1bb/)
+inserted this FlappyDeeVee video in the middle of our Iterative Studio update!
+Try the game [here!](https://flappycreator.com/flappy.php?id=638f6f7f1e9c8)
+Confession: I can’t get past the first pipe! 😆
+
+<video controlslist="nodownload" preload="metadata" autoplay muted loop
+style="width:100%;"><source src="../uploads/images/2022-12-16/FlappyDeeVee.mp4" type="video/mp4"/> Your
+browser does not support the video tag. </video>
+
+Stay tuned to
+[our Newsletter ](https://iterative.ai/#:~:text=Go%20to%20Twitter-,Subscribe,-for%20updates.%20We)
+for more content from the Community and what we will be up to conference-wise in
+2023!
+
+## ✍🏼 Doc Updates!
+
+The [CML](https://cml.dev) team recently made updates to their commands to make
+them more intuitive. If you were used to the old ones, do not fret, info will
+pop up in the CLI to remind you if you use the old commands and what the new
+ones are. In the meantime, you can get up to date on the changes
+[here](https://cml.dev/doc/ref).
+
+### Tweet Love ❤️
+
+Our
+[Notebooks to DVC Pipeline for Reproducible Experiments](https://iterative.ai/blog/jupyter-notebook-dvc-pipeline)
+from
+[**Rob de Wit**](https://www.linkedin.com/in/rcdewit?miniProfileUrn=urn%3Ali%3Afs_miniProfile%3AACoAAA5CEPkB9fI02IpClBKhRdq2brULPHMhmR8&lipi=urn%3Ali%3Apage%3Ad_flagship3_search_srp_all%3BaKm1eO7JQle9sN63j%2FHHFA%3D%3D)
+was noted in [Deep Learning Weekly.](https://twitter.com/dl_weekly)
+
+https://twitter.com/dl_weekly/status/1592900833741393920?s=20&t=eOc76y6a-XcqV1UlhVp9Jg
+
+_Have something great to say about our tools? We'd love to hear it! Head to
+[this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+to record or write a Testimonial! Join our
+[Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2022-2-17-february-22-heartbeat.md b/content/blogs/2022-2-17-february-22-heartbeat.md
new file mode 100644
index 0000000000..a5700b3aa8
--- /dev/null
+++ b/content/blogs/2022-2-17-february-22-heartbeat.md
@@ -0,0 +1,370 @@
+---
+title: February '22 Heartbeat
+date: 2022-02-17
+description: >
+  Monthly updates are here! You will find great tutorials and workflows from  
+  the Community, Online course is now open, decision making strategies
+  for  MLOps tools, and more! Happy February!
+
+descriptionLong: |
+  This month you will find:
+    
+    🥰 Tutorials and workflows from the Community,
+
+    🗣 Upcoming Events,
+
+    📰 DVC helps in COVID research,
+
+    🧐 More MLOps tool decision strategies,
+
+    😎 GitHub Goodness and Integrations,
+
+    💻 Online Course is live,
+
+    🚀 Info on our growing team, and more!
+picture: 2022-02-17/heartbeat-february.png
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/february-2022-heartbeat/1064
+tags:
+  - Heartbeat
+  - DVC
+  - CML
+  - DataChain Studio
+  - FuzzyLabs
+  - Twine.net
+  - Scientiometrics
+  - BatteryDEV Hackathon
+  - TFIR
+  - Guild AI
+  - Git
+---
+
+<details>
+
+This month's Heartbeat image is inspired by Community member Daniel Barnes.  
+Daniel has been a great contributor to CML and helps out folks with questions in
+Discord as well as frequently attends our Meetups. This image is inspired from
+his
+[GitHub profile image](https://app.orbit.love/dvc-community/members/danielbarnes)
+and the fact that he used to be a competitive paraglider. His record being 9.5
+hours in the air! 😳 Many thanks to Daniel for his contributions to the
+Community that keeps us all flying high! 🪂
+
+<summary>✨Image Inspo✨</summary>
+</details>
+
+# Community News
+
+![Stranger Things Math GIF by Wetpaint](https://media.giphy.com/media/d3mn5mnDkwECLmnK/giphy.gif)
+
+The year is already flying by! Check out what's new this month!
+
+## FuzzyLabs Open Source MLOps is Awesome
+
+So let me guess, still overwhelmed with MLOps tool choices? This past month
+[**Matt Squire**](https://www.linkedin.com/in/matt-squire-a19896125/) of
+[Fuzzy Labs.ai](http://FuzzyLabs.ai) reviewed their
+[Awesome Open Source MLOps repo,](github.com/fuzzylabs/awesome-open-mlops)
+[in this blog](https://fuzzylabs.ai/blog/open-source-mlops-is-awesome/) and
+[this video](https://youtu.be/HIAPoKEDXrg). Matt breaks down the tool space into
+categories of SaaS platforms, fully open source tools, and partly open source
+tools. He describes how they define open source and why they think open source
+is the best choice in the MLOps space, which includes its trait of being
+_flexible_, _ownable_, _cost-effective_, and _agile_.
+
+> "Turn key solutions quickly become inflexible." - Matt Squire
+
+Fuzzy Labs, a small AI company in Manchester, England, had a need for
+flexibility in their work with their clients, so they did a deep dive into MLOps
+tooling and established an MLOps Platform meeting the open source and flexible
+criteria they required. This stack includes our own _DVC_, as well as
+[Sacred](https://github.com/IDSIA/sacred), [ZenML](https://zenml.io/),
+[Seldon Core](https://www.seldon.io/tech/products/core), and
+[Evidently AI.](https://evidentlyai.com/)
+
+The blog and the video are definitely good material to review if you're choosing
+your ML tools.
+
+https://youtu.be/HIAPoKEDXrg
+
+## Continuous Machine Learning on Huggingface Transformer with DVC including Weights & Biases Implementation and Converting Weights to ONNX.
+
+As the title would suggest,
+[this jam packed article](https://medium.com/@arjunkumbakkara/continuous-machine-learning-on-huggingface-transformer-with-dvc-including-weights-biases-1bc4520d210)
+from [**Nabarun Barua**](https://github.com/nabarunbaruaAIML), and
+[**Arjun Kumbakkara**](https://github.com/arjunKumbakkara) focuses in on how CML
+can be implemented into an NLP project. They assume knowledge of DVC,
+Transformers, ONNX and Weights & Biases, so be ready to take your skills to the
+next level automating parts of the process with CML.
+
+They begin with the all-important setups of AWS IAM user with EC2 & S3 Developer
+access, the S3 bucket to store the dataset, and requesting an EC2 spot instance.
+They then continue into a detailed description of all the stages of the project,
+outlining the use of all the tools including DVC Studio. You can find
+[the repo for the project here.](https://github.com/nabarunbaruaAIML/CML_with_DVC_on_Transformer_NLP)
+Looking forward to the next installment from Nabarun and Arjun on a Dockerized
+Container Application cluster with Kubernetes Orchestration. 🍿
+
+![Training, Deployment and Retraining Architecture](../uploads/images/2022-02-17/arjun-kumbakkara-architecture.png '=800')
+_Total architecture with the Training, Deployment, and Retraining Pipelines in
+the same order.
+([Source link](https://medium.com/@arjunkumbakkara/continuous-machine-learning-on-huggingface-transformer-with-dvc-including-weights-biases-1bc4520d210))_
+
+## DVC Used to help extract knowledge from COVID-19 research
+
+In case you missed it in our
+[Twitter feed](https://twitter.com/ivanovitchm/status/1482742970461863939?s=20&t=QrfDTRHcZOKWIe5n5mb7ZQ),
+a group of scientists
+[published an article](https://link.springer.com/article/10.1007/s11192-021-04260-y)
+in [Scientometrics Journal](https://link.springer.com/journal/11192) entitled,
+_Discovering temporal scientometric knowledge in COVID-19 scholarly production_.
+The authors,
+[**Breno Santana Santos**](https://link.springer.com/article/10.1007/s11192-021-04260-y#auth-Breno_Santana-Santos),
+[**Ivanovitch Silva**](https://link.springer.com/article/10.1007/s11192-021-04260-y#auth-Ivanovitch-Silva),
+[**Luciana Lima**](https://link.springer.com/article/10.1007/s11192-021-04260-y#auth-Luciana-Lima),
+[**Patricia Takako Endo**](https://link.springer.com/article/10.1007/s11192-021-04260-y#auth-Patricia_Takako-Endo),
+[**Gisliany Alves**](https://link.springer.com/article/10.1007/s11192-021-04260-y#auth-Gisliany-Alves),
+&
+[**Marcel da Câmara Ribeiro-Dantas**](https://link.springer.com/article/10.1007/s11192-021-04260-y#auth-Marcel_da_C_mara-Ribeiro_Dantas),
+used DVC to create a reproducible workflow that combined machine learning and
+Complex Network Analysis techniques to extract implicit and temporal knowledge
+from Scientific production bases on COVID-19.
+
+> "The presented methodology has the potential to instrument and expand
+> strategic and proactive decisions of the scientific community aiming at
+> knowledge extraction that supports the fight against the pandemic."
+
+We are so happy to be helpful in the fight against the pandemic! Be sure to
+check out the paper and keep your eyes out for a Meetup in the future where they
+present this work!
+
+![DVC in Scientometric Covid Research](../uploads/images/2022-02-17/scientometric.png '=800')
+_Discovering temporal scientometric knowledge in COVID-19 scholarly production
+([Source link](https://link.springer.com/article/10.1007/s11192-021-04260-y))_
+
+# GitHub Goodness and Integrations
+
+- If you're a [**Guild.Ai**](https://guild.ai/) user, you'll be happy to know
+  that Guild now supports DVC! Find out more in
+  [this article](https://my.guild.ai/t/using-guild-ai-with-dvc/803) by
+  [**Garret Smith**](https://www.linkedin.com/in/gar1t/)and the
+  [corresponding repo](https://github.com/guildai/guildai/tree/dvc/examples/dvc)
+  for an example.
+
+- [**Luca Moschella**](https://github.com/lucmos) created
+  [this **NN template**](https://github.com/grok-ai/nn-template) for your neural
+  network projects where you want to combine PyTorch Lightning, Hydra, DVC,
+  Weights and Biases and Streamlit.
+
+- Just a reminder for your NLP projects, [**SpaCy**](https://spacy.io/)
+  integrates with DVC as well. You can find out more info on
+  [the integration here.](https://spacy.io/usage/projects#integrations)
+
+![Seal Of Approval Thumbs Up GIF](https://media.giphy.com/media/13zeE9qQNC5IKk/giphy.gif)
+
+# In Other Data Science and AI News
+
+## 10 Most Important Jobs for ML Products in 2022
+
+![10 Most Important Jobs for ML Products in 2022](../uploads/images/2022-02-17/roles-in-ai.png '10 Most Important Jobs for ML Products in 2022 :wrap-left ==300')
+People new to the data science/ml space are often overwhelmed by all that there
+is to learn, and determining the path to get there. When I get this question
+from Community members, I always have the same advice: try to figure out what
+part of DS/AI is most interesting to you and then work to building your skills
+toward that. In this article on the
+[10 Most Important Jobs for ML Products in 2022](https://medium.datadriveninvestor.com/the-10-most-important-jobs-for-ml-products-in-2022-7bf844d62423),
+[**Ágoston Török**](https://www.linkedin.com/in/agoston-torok/) does a great job
+of defining the different roles in the space, how they interrelate, and how they
+show up in AI companies in the product development process. See his breakdown of
+the roles above, with rows defining the stage, and columns, the aspects the
+roles focus on. If you find you are drawn to the space where the DS prototypes
+become the software product, then you may want to check out
+[our new course!](https://learn.iterative.ai) 😉
+
+## Engineering Best Practices for Machine Learning
+
+Diving deeper into these roles, the team was a buzz recently, reviewing
+[this slide deck](https://se.ewi.tudelft.nl/remla/slides/07_ASerban_mleng_practices.pdf)
+on _Engineering Best Practices for Machine Learning_ by
+[**Alex Serban**](https://www.linkedin.com/in/serbanac/). In it Alex discusses
+the challenges of creating software from machine learning projects, the
+differences between these projects and traditional software development, and the
+need for developing robust and ethical practices. He and his colleagues,
+[**Koen van der Blom**](https://liacs.leidenuniv.nl/~blomkvander/),
+[**Holger Hoos**](https://ada.liacs.nl/members/), and
+[**Joost Visser**](https://jstvssr.github.io/) created a survey to determine
+current adoption of best practices in the industry. Along with the great review
+of the survey results in the slides, a number of resources were provided
+including
+[the corresponding Awesome list, ](https://github.com/SE-ML/awesome-seml/blob/master/readme.md)
+a
+[Catalog of Best ML Engineering Practices](https://se-ml.github.io/practices/),
+and their [project website](https://se-ml.github.io/) for more information on
+the whole project. Definitely worth your review! ✅
+
+![Engineering Best Practices for Machine Learning](../uploads/images/2022-02-17/alex-serban.png '=800')
+_29 Machine Learning Engineering practices ranked by adoption
+([Source link](https://se.ewi.tudelft.nl/remla/slides/07_ASerban_mleng_practices.pdf))_
+
+## Twine Ethical Datasets
+
+Are you in need of ethically sourced audio or video data for your ML project?
+[Twine](https://www.twine.net/ai) has created a way to accomplish this, while
+simultaneously freeing ML teams of the project management lift associated with
+the collection of these datasets.  
+You can learn more about Twine's efforts in ethical data collection through
+these articles,
+[The Importance of Ethically Sourced Data,](https://www.twine.net/blog/the-importance-of-ethically-sourced-data/)
+[Bias in Data Collection, ](https://www.twine.net/blog/bias-in-data-collection/)
+[Collecting Diversity Data: How to Ensure an Inclusive Workforce,](https://www.twine.net/blog/diversity-data-inclusive-workforce/)
+and
+[The Hidden Costs of Bad Data.](https://www.twine.net/blog/the-hidden-costs-of-bad-data/)
+Twine also provides
+[100 open audio and video datasets](https://www.twine.net/blog/100-audio-and-video-datasets/)
+for anyone working on these types of projects. Check it out! 👇🏽
+
+<external-link 
+href="https://www.twine.net/blog/100-audio-and-video-datasets/"
+title="Twine Ethically Sourced Datasets"
+description="100 Ethically sourced audio and video datasets from Twine."
+link="https://twine.net/"
+image="../uploads/images/2022-02-17/twine.png"/>
+
+## BatteryDEV Hackathon 2022
+
+Are you interested in battery technology and in participating in a Hackathon
+using battery data? The
+[growth of battery technology](https://www.tfir.io/how-experiment-versioning-is-going-to-solve-big-problems-of-ai-ml-world/)
+is climbing quickly as the world is looking to solve some of the world's
+emissions issues with electronic vehicles. Additionally the demand for electric
+vehicles
+[is outpacing](https://www.mckinsey.com/business-functions/operations/our-insights/unlocking-growth-in-battery-cell-manufacturing-for-electric-vehicles)
+the manufacturers' ability to supply the needed batteries. Datasets in the space
+are kept proprietary as companies work independently to develop patents.
+BatteryDEV 2022 aims to accelerate battery innovation through open source
+competitions. This year they are expecting 300 participants for the event from
+March 20-26. Community member
+[Raymond Gasper](https://www.linkedin.com/in/raymond-james-gasper/) is one of
+the organizers of [Battery.dev](https://battery.dev), and is creating a DVC
+template for participants to use during the Hackathon. You can
+[register for the event here!](https://www.battery.dev/registration-form)
+
+<external-link 
+href="https://battery.dev"
+title="BatteryDEV 2022 Hackathon"
+description="A global innovation challenge for battery, data and machine learning enthusiasts."
+link="https://battery.dev/"
+image="../uploads/images/2022-02-17/battery-dev.png"/>
+
+# Company News
+
+[**Dmitry Petrov**](https://twitter.com/FullStackML) talked to
+[**Swapnil Bhartiya**](https://twitter.com/SwapBhartiya) recently about how
+experiment versioning can help to solve the big problems of the AI/ML world. In
+this interview you will learn how experiment versioning tracks everything you
+need for a particular experiment so that the result is reproducible from
+prototyping to production. This solution enables data science and engineering
+teams to work more productively together.
+
+https://www.youtube.com/watch?v=y5zp54LiAqg
+
+## Upcoming Events
+
+### March Office Hours!
+
+Be sure to join us at the
+[March Office Hours Meetup,](https://www.meetup.com/Machine-Learning-Engineer-Community-Virtual-Meetups/events/283998696/)
+where [**Fabian Zills**,](https://github.com/PythonFZ/) PhD student at
+[University of Stuttgart,](https://www.uni-stuttgart.de/en/) will present his
+ZnTrack project which creates, runs and benchmarks DVC pipelines in Python and
+Jupyter Notebooks.
+
+<external-link
+href="https://www.meetup.com/Machine-Learning-Engineer-Community-Virtual-Meetups/events/283998696/"
+title="March Office Hours - ZnTrack"
+description="RSVP for DVC Office Hours - ZnTrack - Create, Visualize, Run and Benchmark DVC Pipelines in Python & Jupyter Notebooks "
+link="https://meetup.com"
+image="../uploads/images/2022-02-17/office-hours-meetup.png"/>
+
+## New Hires
+
+We are extremely excited to welcome our new Director of Engineering,
+[**Oded Messer**](https://www.linkedin.com/in/odedmesser/). Oded lives in Israel
+and plans to pour his time and attention into the people/processes/structures of
+the engineering org to facilitate healthy growth and culture.💗 He brings
+hands-on and managerial industry experience in the backend/tooling/infra and
+MLOps domains (ex. Intel and Iguazio). In his spare time Oded remembers
+traveling being a favorite activity, and also admits to being a sci-fi geek.
+He's in good company here! 😉
+
+We welcome [**Alex Kim**](https://twitter.com/alex000kim) who joins us as a
+Field Data Scientist from Montreal, Canada. Alex's previous professional
+experience has been at the intersection of Software Engineering and Data Science
+across a few different industries. He has also done consulting work to develop
+Data Science curriculums for EdTech companies. Alex speaks Russian and a little
+French in addition to English. In his free time, Alex likes to bake, his
+specialty being pizza! 🍕
+
+<details>
+
+We now have three Alex's on the team to match our three Davids!
+
+<summary>🎉Fun Fact!</summary>
+
+</details>
+
+[**Jesper Svendsen**](https://github.com/jesper7) joins the team as a Platform
+Engineer from Denmark.  
+Previously, Jesper worked as an SRE for Evaxion Biotech (another ML-driven
+company). Prior to that, he was a self-employed IT consultant, where he did
+full-stack development. Jesper's hobbies include reading books, (particularly
+medicine and psychology books), weightlifting, running, and photography. 📸
+
+<details>
+
+Jesper makes the eighth employee joining [Iterative.AI](https://iterative.ai)
+with a name starting with the letter 'j.' I thought this was odd, as words that
+start with 'j' have one of the
+[lowest frequencies in the English language](https://funbutlearn.com/2012/06/which-english-letter-has-maximum-words.html).
+But as it turns out, 'J' is
+[one of the more common first initials.](https://www.quora.com/What-letter-of-the-English-alphabet-are-used-most-as-the-first-letter-of-the-first-name)
+
+<summary>🎉Fun Fact!</summary>
+
+</details>
+
+[**Gabriella Caraballo**](https://github.com/erudin) joins Iterative as a
+Backend Engineer. She is originally from Venezuela, but is currently living in
+Canada! Programming was a hobby that became a professional path for Gabriella.
+She loves everything related to security, privacy and open source. In her free
+time, Gabriella enjoys cooking and eating, playing video/board games,
+crocheting, photography, and music. Now that she's in Canada, she has added
+skiing to her hobbies! ⛷
+
+## Open Positions
+
+Even with these amazing new additions to the team, we're still hiring!
+[Use this link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22)
+to find details of all the positions and share with anyone you think may be
+interested! 🚀
+
+![Iterative.ai is Hiring](../uploads/images/2022-01-18/hiring.jpeg '=800')
+_Iterative is Hiring
+([Source link](https://iterative.notion.site/Iterative-ai-is-Hiring-852cb978129645e1906e2c9a878a4d22))_
+
+## Tweet Love ❤️
+
+https://twitter.com/GiftOjeabulu_/status/1490771330949599234
+
+---
+
+_Have something great to say about our tools? We'd love to hear it! Head to
+[this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+to record or write a Testimonial! Join our
+[Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2023-01-17-january-2023-heartbeat.md b/content/blogs/2023-01-17-january-2023-heartbeat.md
new file mode 100644
index 0000000000..35d63ab2e9
--- /dev/null
+++ b/content/blogs/2023-01-17-january-2023-heartbeat.md
@@ -0,0 +1,206 @@
+---
+title: January '23 Heartbeat
+date: 2023-01-17
+description: >
+  Monthly updates are here! Great content from the Community, including a new
+  tutorial video on MLEM and shout out from Tryolabs for MLEM, great new
+  tutorial on DVC, Casper da Costa-Luis' video on CML from MLOps Summit and
+  more! Happy 2023!
+
+descriptionLong: |
+  This month you will find:
+
+    🎥 MLEM tutorial video from Community member,
+
+    🥇 Top Python tools for 2022 from Tryolabs,
+    
+    🎅🏼 Naughty or Nice MLEM project,
+
+    ❣️ Unstructured Data Query Language coming,
+
+    🎥 Sami Jawhar's Running Parallel Pipelines with DVC & TPI Video,
+
+    🎥 Casper da Costa-Luis' MLOps Summit presentation video,
+
+    👀 DVC tutorial, and more!
+picture: 2023-01-17/january-cover.jpg
+author: jeny_defigueiredo
+commentsUrl: https://discuss.dvc.org/t/january-23-heartbeat/1456
+tags:
+  - Heartbeat
+  - DVC
+  - MLEM
+  - Tryolabs
+  - Tensorflow
+  - Kernel
+  - MLOps Summit
+  - Huggingface
+  - LineaPy
+---
+
+Happy New Year! We are looking forward to what’s going to be a stellar year for
+us and for all of you! We are hoping for peace to reign, the recession to
+subside, and success aplenty. 🤞🏼 Are you ready? Let’s do this!
+
+![Lets Do This GIF by National Geographic Channel](https://media.giphy.com/media/JykvbWfXtAHSM/giphy.gif)
+
+# From the Community
+
+We always start with DVC, but this month, in this new year, we’ll start with
+MLEM! We released MLEM in June of last year and have made
+[some advances to it already](https://iterative.ai/blog/mlem-k8s-sagemaker). It
+seems the Community is learning about it and recognizing its benefits. We are
+thrilled to see that!
+
+## MLEM Tutorial Video from JCharis Jesse
+
+[**JCharis Jesse**](https://twitter.com/JCharisTech) created the
+[FIRST video tutorial from the Community for MLEM!](https://www.youtube.com/watch?v=vEoc64xJaK4)
+In this very well-explained and recorded video, Jesse takes you through what
+MLEM is and where it fits in the machine learning to production process. He
+follows that by showing the different options of saving a model, where to find
+the model metadata and how it works, loading the ML model, examples of serving
+with FastAPI and Docker, and finally applying the model to data for prediction.
+If you are interested in using MLEM for serving your models, this will
+definitely help get you started! You can find a ton of other great content on
+his [YouTube site](https://www.youtube.com/@JCharisTech).
+
+https://www.youtube.com/watch?v=vEoc64xJaK4
+
+## Tryolabs Top Python Libraries of 2022
+
+From our friends at [Tryolabs](https://tryolabs.com/),
+[**Alan Descoins**](https://www.linkedin.com/in/alan-descoins/) and
+[**Facundo Lezama**](https://www.linkedin.com/in/facundo-lezama/) round out 2022
+with
+[Tryolabs’ annual picks for the best Python Libraries of 2022](https://tryolabs.com/blog/2022/12/26/top-python-libraries-2022).
+The requirements to make the cut are for libraries that were launched or gained
+popularity within the year. They have a list of top 10 picks that you will want
+to take a look at, including [LineaPy](https://lineapy.org/) which helps you
+convert notebooks to production pipelines. MLEM also made the list in the
+category of _Tools & Enablers_.
+
+![Tryolabs](../uploads/images/2023-01-17/tryolabs.png '=800') _Tryolabs Best
+Python Libraries of 2022
+([Source link](https://tryolabs.com/blog/2022/12/26/top-python-libraries-2022))_
+
+## Bex Tuychiev - Data Version Control: Learn What Other Data Scientists Are Ignoring
+
+![Learn What Other Data Scientists are Ignoring with DVC](../uploads/images/2023-01-17/fiona-art.jpg 'Photo by Fiona Art from Pexels :wrap-left =300')
+In the first part of a new series on DVC,
+[**Bex Tuychiev**](https://www.linkedin.com/in/bextuychiev/) writes a fire 🔥
+tutorial on DVC in
+[Towards Data Science](https://towardsdatascience.com/how-to-version-gigabyte-sized-datasets-just-like-code-with-dvc-in-python-5197662e85bd)
+with a computer vision project using the German Traffic Sign Recognition
+Benchmark Dataset and Tensorflow. He guides you on getting the project properly
+set up, then how to start adding, tracking, pulling, and pushing files with DVC.
+Next, he goes over building the image classification model and then concludes
+with how to create a shared cache if you are working on a large project with a
+team. Reproducibility and Collaboration for the win! We are looking forward to
+the next parts of the series!
+
+![It Crowd Popcorn GIF](https://media.giphy.com/media/epxDzItQhxAzK/giphy.gif)
+
+## Aryan Jadon - Survey of Data Versioning Tools for Machine Learning Operations
+
+For a very nice comparison of Data Versioning Tools, look to
+[**Aryan Jadon’s**](https://www.linkedin.com/in/aryan-jadon/)
+[recent post on the subject](https://medium.com/@aryanjadon/analysis-of-data-versioning-tools-for-machine-learning-operations-1cb27146ce49).
+He seems to hit them all, providing information about their benefits and things
+of which to be cautious. Naturally, DVC makes this list with the only caution
+being, “you need to use a Git repository to use DVC’s versioning features."
+Isn’t Git a part of every modern tech stack? 😉 Staying true to our mission to
+deliver the best developer experience for machine learning teams by creating an
+ecosystem of open, modular ML tools!
+
+![Survey of Data Versioning Tools for Machine Learning Operations](../uploads/images/2023-01-17/aryan-jadon.png '=800')
+_Deciding on Data Versioning Tools?
+([Source link by Mary Amato ](https://medium.com/@aryanjadon/analysis-of-data-versioning-tools-for-machine-learning-operations-1cb27146ce49))_
+
+## Sami Jawhar - Running Parallel Pipelines with DVC and TPI
+
+If you couldn’t make the December Meetup, good news!
+[The video](https://youtu.be/X3M1UfMn2Kk) is already out!
+[**Sami Jawhar**](https://www.linkedin.com/in/sami-jawhar-a58b9849/) joined us
+to share a solution he built to run parallel pipelines with DVC and TPI to save
+time processing the massive amount of data they use in their brain research at
+[Kernel](https://www.kernel.com/). He describes the context of his situation as
+well as all of its constraints and finally the details of the solution, coined
+“Neuromancer” after the famous sci-fi novel. Get ready for some mind-blowing
+engineering! 🤯
+
+https://youtu.be/X3M1UfMn2Kk
+
+# Company News
+
+## MLEM Christmas Project
+
+<img src="https://media.giphy.com/media/KtrhyNGwNCSYM4pVRq/giphy.gif" alt="Have you been Naughty or Nice?" title="Naughty or Nice MLEMMing" style="width: 300px; float: right; clear: left; padding: 0.5rem"></img>
+In case you missed it while you were out for the holidays,
+[**Alex Guschin**](https://www.linkedin.com/in/1aguschin/) and
+[**Mike Sveshnikov**](https://www.linkedin.com/in/mike0sv/), your friendly
+neighborhood MLEM creators, put together
+[a fun project using MLEM](https://medium.com/@mike0sv/i-trained-a-model-to-tell-if-you-were-naughty-this-year-11a36ca6d472)
+to determine if you had been naughty or nice just ahead of Santa’s trot around
+the globe in 2022. In the blog post, you will learn how they DDOS’ed Santa’s
+website, Trained a Christmas (decision) tree, and Deployed a ML service with
+MLEM to [Streamlit](https://streamlit.io/) to see the predictions.
+
+You can try it out [here](https://mlem-nice-or-naughty.fly.dev/). And check out
+how some of our team members fared in
+[this LinkedIn post](https://www.linkedin.com/posts/1aguschin_streamlit-activity-7012056418816036864-k9hv?utm_source=share&utm_medium=member_desktop).
+Spoiler alert: I’m naughty and nice?
+
+## Casper da Costa-Luis at MLOps Summit - Painless cloud experiments without leaving your IDE
+
+Our CML Product Manager,
+[**Casper da Costa-Luis'**](https://github.com/casperdcl) presented in November
+at MLOps Summit on _Painless cloud experiments without leaving your IDE_. The
+presentation is now available on YouTube
+[here](https://www.youtube.com/watch?v=PaBQF89URuI). If Full lifecycle
+management of computing resources (including GPUs and auto-respawning spot
+instances) from several cloud vendors (AWS, Azure, GCP, K8s)... without needing
+to be a cloud expert appeals, this talk is for you! He discusses how to move
+experiments seamlessly between a local laptop, a powerful cloud machine, and
+your CI/CD of choice.
+
+https://www.youtube.com/watch?v=PaBQF89URuI
+
+## New Unstructured Data Query Language
+
+**Do you use Amazon S3, Azure Blob Storage, or Google Cloud Storage? We have a
+new solution for finding and managing your datasets of unstructured data like
+images, audio files, and PDFs!** Extend your DVC environment with the first
+unstructured data query language (think SQL -> DQL) for machine learning. We are
+looking for beta customers for this new tool.
+
+[Schedule a meeting with us](https://calendly.com/gtm-2/iterative-datamgmt-overview)
+if that's what you're needing! Find more info
+[here.](https://iterative.ai/data-catalog-for-ml)
+
+![Unstructured Data Query Language from the makers of DVC](../uploads/images/2023-01-17/dvc-cloud.png '=800')
+_Unstructured Data Query Language Prototype_
+
+## ✍🏼 Doc Updates!
+
+## Tweet Love ❤️
+
+Our favorite Tweet this month is from
+[**Osman Bayram**](https://twitter.com/the_osbm) who mentions he plans to use
+CML with [Huggingface](https://huggingface.co/) GPU. We are looking forward to
+that! 🍿 I'm seeing a lot of popcorn eating in our future. See you next month!
+
+[Link to Tweet](https://twitter.com/the_osbm/status/1606018332175478786?s=20&t=uTKIsTjTv5frJPz2yNPqUw)
+
+---
+
+_Have something great to say about our tools? We'd love to hear it! Head to
+[this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+to record or write a Testimonial! Join our
+[Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2023-01-19-mlem-cv-model-deployment.md b/content/blogs/2023-01-19-mlem-cv-model-deployment.md
new file mode 100644
index 0000000000..69dbf419f7
--- /dev/null
+++ b/content/blogs/2023-01-19-mlem-cv-model-deployment.md
@@ -0,0 +1,169 @@
+---
+title: Deploy Computer Vision Models Faster and Easier
+date: 2023-01-19
+description: >
+  One command to serve CV models from your laptop in the cloud 🚀
+descriptionLong: >
+  It’s well known that training neural networks is a difficult and
+  time-consuming process that requires a lot of attention. On top of that,
+  learning and mastering tools for model deployment like Docker and different
+  clouds infrastructure can also be a significant undertaking, and it’s not
+  directly relevant to the core task of training and debugging machine learning
+  models. We'd like to fix that.
+picture: 2023-01-19/dogs-flying.png
+pictureComment: A Stable Diffusion dreaming about fly.io deployment with MLEM 🐶
+author: mike0sv
+tags:
+  - Deployment
+  - Computer vision
+  - MLOps
+  - Open Source
+  - MLEM
+  - Tutorial
+---
+
+By developing MLEM - a tool that allows researchers to easily deploy their
+models to production without having to worry about the underlying
+infrastructure, we strive to help them focus on what they do best: developing
+and improving their models. This can help accelerate the pace of research and
+development, and ultimately lead to better and more effective AI systems.
+
+MLEM deploy your models in a couple of commands - and in this blog post, we’ll
+deploy an image classification model to [Fly.io](https://fly.io). Without any
+additional user input, MLEM will serve your model with REST API, create a
+Streamlit application, and build a Docker image with both included. Does this
+sound like fun? Try out the deployment at https://mlem-cv.fly.dev before we
+start!
+
+## The good part
+
+To showcase MLEM power we’ll take a pytorch model and deploy it to the cloud in
+a couple of simple steps. Just don’t forget to install MLEM and other
+requirements with `pip install torch torchvision mlem[streamlit,flyio]`. You’ll
+also need docker up and running on your machine.
+
+First, we need to get the model. To get to model deployment faster, we won’t
+dive too far into model development and stick to the task at hand by using a
+pre-trained ResNet model from `torchvision`:
+
+```python
+from torchvision.models import ResNet50_Weights, resnet50
+
+weights = ResNet50_Weights.DEFAULT
+model = resnet50(weights=weights)
+model.eval()
+```
+
+Since our model expects tensors of a certain shape, we need some preprocessing
+to be able to use it with an arbitrary image. And while we’re here, let’s throw
+some postprocessing on top to get class name from predicted class probabilities.
+Thankfully, MLEM allows you to do just that:
+
+```python
+from torchvision.io import read_image
+from mlem.api import save
+
+img = read_image("cat.jpg")
+
+categories = weights.meta["categories"]
+preprocess = weights.transforms()
+
+save(model, "torch_resnet",
+    preprocess=lambda x: preprocess(x).unsqueeze(0),
+    postprocess=lambda x: categories[
+        x.squeeze(0).softmax(0).argmax().item()
+    ],
+    sample_data=img,
+)
+```
+
+MLEM will do its metadata-extracting magic on our model, so we get
+ready-to-serve MLEM Model at `torch_resnet` path.
+
+Now we’re ready for deployment, but before we’d like to play around with it
+locally. We can use [`mlem serve`](https://mlem.ai/doc/command-reference/serve)
+to see how it works:
+
+```bash
+$ mlem serve streamlit \
+    --model torch_resnet \
+    --request_serializer torch_image  # accept images instead of raw tensors
+Starting streamlit server...
+🖇️  Adding route for /predict
+Checkout openapi docs at <http://0.0.0.0:8080/docs>
+INFO:     Started server process [17525]
+INFO:     Waiting for application startup.
+INFO:     Application startup complete.
+INFO:     Uvicorn running on http://0.0.0.0:8080 (Press CTRL+C to quit)
+
+  You can now view your Streamlit app in your browser.
+
+  URL: http://0.0.0.0:80
+```
+
+Let's head over to [localhost:80](http://localhost:80) to see if our model is
+ready for production!
+
+![Streamlit app](../uploads/images/2023-01-19/streamlit.gif)
+
+This is already useful: you can play around with your model, demo it to
+colleagues in a call, or show your pet how it's going to be classified now. Tons
+of ways to use this - give it a try when in need the next time!
+
+## Cloudification
+
+That's cool and all, but what is your model worth if you need to call your
+friends each time to show it off? MLEM can help in this department too.
+[Using `mlem deploy`](https://mlem.ai/doc/command-reference/deployment) you can
+deploy your model to Heroku, Sagemaker, Kubernetes or Flyio (not to mention
+[`mlem build`](https://mlem.ai/doc/command-reference/build) that can build a
+Docker image out of your model that you can later deploy yourself).
+
+Since a PR for [fly.io](http://fly.io) was just merged, let’s use it:
+
+- Go to [fly.io](http://fly.io) and set up an account
+- Install flyctl using
+  [this instruction](https://fly.io/docs/hands-on/install-flyctl/)
+- Login via `flyctl auth login`
+- You also need to provide a credit card, but they won't charge you
+  [until you exceed free limits](https://fly.io/docs/about/pricing/#how-it-works).
+
+Now normally we’d need to write `Dockerfile`, `requirements.txt` and other
+deployment-platform-specific files like `Procfile`, and then finally use
+`flyctl` executable to run an app. But fortunately, we can just run:
+
+```bash
+$ mlem deploy run flyio cv-app \
+    --model torch_resnet \
+    --app_name mlem-cv \
+    --scale_memory 1024 \
+    --server streamlit \
+    --server.request_serializer torch_image \
+    --server.ui_port 8080 \
+    --server.server_port 8081
+```
+
+Now it’s live at [mlem-cv.fly.dev](https://mlem-cv.fly.dev) 🚀
+
+Finally, all you have to do now is to brag to your best friend about your
+achievement:
+
+![ChatGPT](../uploads/images/2023-01-19/best-friend.png '=800')
+
+## What's next?
+
+As we promised in our
+[last MLEM blog post](https://iterative.ai/blog/mlem-k8s-sagemaker/), we added
+support for CV models and models that have preprocessing or postprocessing
+steps. What's next?
+
+- We're looking at integrations with specialized CV serving tools like
+  TorchServe, GPU support, and model optimization.
+- We already
+  [support NLP scenarios](https://medium.com/better-programming/i-trained-a-model-to-tell-if-you-were-naughty-this-year-11a36ca6d472),
+  but we're going to see if there is something special that needs to be
+  implemented there as well.
+
+Feel free to drop us a line in
+[GH issues](https://github.com/iterative/mlem/issues) if you'd like something
+specific! See you next time 🐶
diff --git a/content/blogs/2023-02-08-mlem-nanogpt-modal-flyio.md b/content/blogs/2023-02-08-mlem-nanogpt-modal-flyio.md
new file mode 100644
index 0000000000..11cdbae631
--- /dev/null
+++ b/content/blogs/2023-02-08-mlem-nanogpt-modal-flyio.md
@@ -0,0 +1,133 @@
+---
+title: MLEM + Modal + nanoGPT
+date: 2023-02-08
+description: >
+  Train and deploy your own GPT model in 2 easy steps!
+descriptionLong: >
+
+  If you hadn't heard all the recent fuss about the ChatGPT model from open.ai,
+  you must have been living under a rock. And you might even have seen [this
+  video](https://www.youtube.com/watch?v=kCc8FmEb1nY) from [Andrej
+  Karpathy](https://twitter.com/karpathy) on how those GPT models work. In this
+  post, I will show how easy it is to train your own GPT model and also share it
+  with your friends via a nice Streamlit app in the cloud (see [this
+  one](https://mlem-nanogpt.fly.dev/) as an example!). All you need is an idea
+  of what you want to generate and a couple of bucks for renting a GPU if you
+  don’t have access to your own.
+picture: 2023-02-08/dog.jpg
+pictureComment: Writing dogs with nanoGPT
+author: mike0sv
+tags:
+  - GPT
+  - Deployment
+  - NLP
+  - MLOps
+  - Open Source
+  - MLEM
+  - Tutorial
+---
+
+## Preparing data
+
+To kick off the process, you basically just need a single text file that you
+want your model to be trained on. For example, I often struggle with writing
+docs for MLEM framework, so I will try to generate those.
+[Here](https://github.com/mike0sv/nanoGPT/blob/mlem/data/mlem-docs/prepare.py)
+you can find my code that clones
+[mlem.ai repo](https://github.com/iterative/mlem.ai), compiles every `.md` from
+the docs directory into a single text file and then creates a train set using
+the same code as an example Shakespeare dataset. I also prepended each file’s
+content with the path to this file, so I can condition the generation for a
+specific file.
+
+Of course, for your own experiments, you can provide different data and train
+GPT model for a different task.
+
+## Training the model
+
+Thanks to Andrej’s original repo, it’s as easy as cloning and running a couple
+of commands. My fork has some additional stuff to make it even easier.
+
+```bash
+$ git clone https://github.com/mike0sv/nanoGPT && cd nanoGPT/ && git checkout -b mlem origin/mlem
+$ pip install -r requirements-mlem.txt
+
+# Prepare mlem docs dataset
+# Alternatively, you can compile your own training data for different task
+$ python data/mlem-docs/prepare.py char
+```
+
+If you don’t have access to GPU, you can use [modal.com](http://modal.com) to
+train your model without any infrastructure configuration. Just register there,
+wait for approval, and run
+[this script](https://github.com/mike0sv/nanoGPT/blob/mlem/modal_train.py) to
+run the training and download the resulting model checkpoint.
+
+```bash
+$ modal token new  # approve in browser
+$ python modal_train.py  # you can edit paths or other parameters
+```
+
+Or if you are already working on a machine with GPU, just run the training
+locally
+
+```bash
+# train model
+$ python train.py config/train_mlemai.py --device cuda --dtype=float32 --max_iters=3000 --init_from=scratch
+```
+
+After training you model will be saved at `out-mlemai-char/ckpt.pt` and you can
+sample it with
+
+```bash
+# sample model
+$ python sample.py --out_dir=out-mlemai-char --dtype=float32
+```
+
+## Deploying your model
+
+Now, to show off your model to friends and colleagues, we will deploy it as a
+[Streamlit](https://streamlit.io) application to https://fly.io. It’s very easy
+with [MLEM](https://mlem.ai) Streamlit extension. First, we need to save the
+model as MLEM model -
+[here](https://github.com/mike0sv/nanoGPT/blob/mlem/wrapper.py) is the script
+for that
+
+```bash
+$ python wrapper.py out-mlemai-char mlem_char
+```
+
+Now, setup and login into [fly.io](https://fly.io/docs/hands-on/install-flyctl/)
+and run `mlem deploy` command. I also prepared a
+[custom Streamlit application template](https://github.com/mike0sv/nanoGPT/blob/mlem/app.py)
+you can use to give it more ChatGPT feel
+
+```bash
+
+# setup flyio
+$ flyctl auth login
+
+$ mlem deploy run flyio app -m mlem_char \
+	--app_name mlem-nanogpt --scale_memory 1024 \
+	--server streamlit  --server.ui_port 8080 \
+	--server.server_port 8081 --server.template app.py
+```
+
+After the command finishes, just go to https://<app_name>.fly.dev - in my case
+its [https://mlem-nanogpt.fly.dev/](https://mlem-nanogpt.fly.dev/) - and start
+chatting.
+
+![app.gif](../uploads/images/2023-02-08/app.gif)
+
+Well, I guess if this is what generated docs look like, I still have a job! 🤣
+
+But just for lulz, I re-generated the whole MLEM documentation with this model -
+you can check it out
+[here](https://mlem-ai-nano-gpt-xyinoh8xgobdz.herokuapp.com/doc).
+
+## Conclusion
+
+Nowadays it’s really easy to recreate someone else’s work thanks to open source
+software. And thanks to folks like Andrej and companies like Modal and Fly now
+it becomes much faster to build and deploy ML models. We are happy to be part of
+this, with tools like MLEM, DVC, CML and others. Long live the open source!
diff --git a/content/blogs/2023-02-13-dvclive-metrics-studio.md b/content/blogs/2023-02-13-dvclive-metrics-studio.md
new file mode 100644
index 0000000000..6f3750ea98
--- /dev/null
+++ b/content/blogs/2023-02-13-dvclive-metrics-studio.md
@@ -0,0 +1,229 @@
+---
+title:
+  Real-time visualization of Computer Vision model training with DVC and
+  Iterative Studio
+date: 2023-02-13
+description: >
+  Save time and resources by tracking your deep learning experiments
+  in  real-time with DVC and Iterative Studio.
+descriptionLong: >
+  The ability to track machine learning experiments in real time has become
+  essential in AI. DVC makes it possible for you to visualize the progress of
+  the experiment, share it with team members, and find out early if the
+  experiment isn't progressing as planned. You can then stop the experiment if
+  needed. Collaboration, time, and resource savings are yours with Iterative
+  Studio.
+picture: 2023-02-13/dvclive-metrics-studio.jpg
+authors:
+  - maxim_shmakov
+  - alex_kim
+  - diglesia
+commentsUrl: https://discuss.dvc.org/t/track-computer-vision-experiments-in-real-time-with-dvclive-in-iterative-studio/1478
+tags:
+  - Live metrics
+  - DVCLive
+  - Computer vision
+  - MLOps
+  - DataChain Studio
+  - Release
+---
+
+Computer vision is a complex field requiring much experimentation and trial and
+error to achieve optimal results. However, managing and tracking the progress of
+these experiments has not been easy. You can't see it once you've sent it to the
+server for training. Keeping an eye on its progress over (often) days makes it
+possible to miss something. This makes it difficult to effectively manage your
+time and reduce unnecessary resource use. Moreover, a team working on the same
+project needs to be able to easily share their results with colleagues. This can
+be challenging with existing (or non-existent) tooling.
+
+That's where DVCLive and Iterative Studio come in. These tools offer live
+experiment tracking and efficient result sharing, making it easy to optimize
+your experimentation process and streamline the workflow with your team.
+
+![Real-time experiment tracking in Iterative Studio](../uploads/images/2023-02-13/live_plots.gif)
+_See experiment results in real-time in Iterative Studio_
+
+### The tools at work
+
+[DVCLive](https://dvc.org/doc/dvclive) is a Python library connected to DVC that
+provides a real-time experiment logger that allows machine learning engineers to
+track the metrics and parameters of their experiments. It is beneficial for
+long-running experiments, which can take hours or even days to complete.
+
+[Iterative Studio](https://studio.datachain.ai/) is a
+[SaaS](https://en.wikipedia.org/wiki/Software_as_a_service) platform that
+displays logged experiments with their metrics, parameters, and plots all tied
+together and tracked using DVC and Git under the hood. It allows for rich,
+visual, real-time tracking and sharing of the results, making it easy to
+collaborate with others and be production-ready efficiently.
+
+![Real-time, nested experiment tracking in Iterative Studio](../uploads/images/2023-02-13/live_metrics.gif)
+_Real-time, nested experiment tracking in Iterative Studio_
+
+### Use case: Identifying and segmenting pools from satellite imagery
+
+In this computer vision project (see repo
+[here](https://github.com/iterative/example-get-started-experiments)), we embark
+on an exciting journey to uncover swimming pools, often obscured from
+street-level views, right in the middle of our neighborhoods and cities. Using
+[ResNet-18](https://www.mathworks.com/help/deeplearning/ref/resnet18.html) and
+[Fast.ai](https://www.fast.ai/), we will be able to accurately identify and
+segment pools from satellite images.
+
+![BH-Pools Dataset](../uploads/images/2023-02-13/bh-pools-dataset.png '=800')
+_Images and ground truth segmentation of BH-Pools Dataset
+([Source link](http://patreo.dcc.ufmg.br/2020/07/29/bh-pools-watertanks-datasets/))_
+
+<admon type="info">
+
+It's worth noting that the experiment in this example is beyond a toy project by
+design. It may take around one hour to run on an ordinary laptop, and the time
+may vary depending on the specific configuration and settings. However, you can
+use a GPU to speed up the process.
+
+</admon>
+
+### Dataset, Methods & Tools
+
+We will use a modified version of the
+[BH-Pools dataset](http://patreo.dcc.ufmg.br/2020/07/29/bh-pools-watertanks-datasets/),
+which consists of high-resolution 4K images of various neighborhoods in the city
+of Belo Horizonte, Brazil. These images were captured through Google Earth Pro
+and come pre-annotated with swimming pools and water tanks. For this project, we
+will focus on just the swimming pools.
+
+We have made the dataset more manageable with some pre-processing to crop the
+images into smaller tiles of 1024x1024 pixels.
+
+When using DVCLive in Iterative Studio, we will be able to see the progress of
+our experiments. Let’s get started!
+
+### Getting Set up
+
+Follow the initial setup instructions in the
+[README](https://github.com/iterative/example-get-started-experiments). Next, we
+need to run `dvc pull` in our root directory to fetch the dataset from our
+remote. This command retrieves the data from the remote storage and makes it
+available locally for our experiments. Once the download is complete, we will
+create a data loader using the label function with `SegmentationDataLoaders`
+from the `fastai` library. This data loader allows us to easily load and
+preprocess the images (e.g. resizing the images to the desired resolution). You
+can dig deeper into the code
+[here.](<https://github.com/iterative/example-get-started-experiments/blob/main/src/train.py#:~:text=/%20%22train_data%22-,data_loader%20%3D%20SegmentationDataLoaders.from_label_func(,),-model_names%20%3D%20%5B>)
+
+![BH-Pools Dataset](../uploads/images/2023-02-13/swimming-pools-dataset.png '=800')
+_Sample of Belo Horizonte Pools Dataset from `data_loader`
+([Source link](http://patreo.dcc.ufmg.br/2020/07/29/bh-pools-watertanks-datasets/))_
+
+After creating the data loader and resizing the images, we train a ResNet-18
+model with unet_learner with varying hyperparameters and utilizing the
+DVCLiveCallback. The DVCLiveCallback is a built-in logger provided by DVCLive
+that allows us to track the intermediate results of the training process, such
+as the loss and accuracy of the model, in real-time. By logging these metrics,
+we can easily monitor the progress of our model and make adjustments as needed
+to optimize the training process and improve the performance of the model.
+
+```python
+    learn = unet_learner(
+        data_loader, arch=getattr(models, params.train.arch), metrics=DiceMulti
+    )
+
+    learn.fine_tune(
+        **params.train.fine_tune_args,
+        cbs=[DVCLiveCallback(dir="results/train", report="md", dvcyaml=False)],
+    )
+```
+
+Additionally, we can also use Studio to analyze and visualize the results of our
+experiments, making it easy to share and collaborate with others.
+[By providing the STUDIO_TOKEN](https://dvc.org/doc/studio/user-guide/projects-and-experiments/live-metrics-and-plots),
+DVCLive will automatically post the results of the experiment to Studio. To do
+this, first, let’s obtain an individual token from the user profile page in
+Studio.
+
+![Generate Iterative Studio Access token](../uploads/images/2023-02-13/studio-access-token.png '=800')
+_Generating Studio Access Token in the Iterative Studio Profile page
+([Source link](https://studio.datachain.ai))_
+
+By providing this token as an environment variable, we can access the results of
+our experiments in an
+[Iterative Studio project](https://dvc.org/doc/studio/get-started). The project
+lets you compare them with previous experiments, helps you find insights to
+improve our model and share it with others.
+
+![Comparison in Iterative Studio](../uploads/images/2023-02-13/iterative-studio-live-metrics.png '=800')
+_Compare with previous experiments in Iterative Studio
+([Source link](https://studio.datachain.ai))_
+
+To export the token run the command below with the token obtained from your
+Studio profile:
+
+```bash
+export STUDIO_TOKEN=<your-token>
+```
+
+Running an experiment locally using DVC will now automatically live-update the
+Studio project(s) associated with your git remote (the one named "origin")
+
+You may want to change the parameters and run the experiment again.
+
+```bash
+dvc exp run -S train.fine_tune_args.epochs=16 -S train.img_size=512
+```
+
+![Experiment tracking in Iterative Studio](../uploads/images/2023-02-13/exp-run.gif '=800')
+_Real-time Experiment tracking in Iterative Studio
+([Source link](https://studio.datachain.ai))_
+
+As you can see, the change to the epochs and image size brought improvement to
+the metrics.
+
+It's safe to say that if you provide the model with a satellite image of any
+neighborhood, it will pretty accurately identify all swimming pools in that
+image! And by using DVCLive and Studio, we were able to track and efficiently
+control the model training process, without squandering expensive training
+resources on unfruitful training runs.
+
+### Conclusion
+
+Our work has produced a model which is able to accurately identify and segment
+swimming pools from satellite images! With the help of DVCLive and Iterative
+Studio, we've been able to visualize results in real-time to make
+resource-saving decisions. And finally, this work is readily visible for the
+entire team to review!
+
+We’d like to express our gratitude to the creators of the incredible
+[BH-Pools dataset](http://patreo.dcc.ufmg.br/about-us/), without which there
+would have been less fun and less impressive results!
+
+You can give Iterative Studio a try by signing up
+[here](https://studio.datachain.ai). Try out
+the [repo](https://github.com/iterative/example-get-started-experiments)
+or [colab notebook](https://colab.research.google.com/drive/1NTivljRYiySMJn-SHeWQSycBmSOVUbvA)
+for this project and let us know what you think
+in [Discord](https://discordapp.com/invite/dvwXA2N) or
+[Discourse](https://discuss.dvc.org/t/track-computer-vision-experiments-in-real-time-with-dvclive-in-iterative-studio/1478)!
+
+<admon type="info">
+
+Learn more about enhancing your machine learning experimentation with these blog
+posts:
+
+- [Experiment Tracking with DVC and Python](https://iterative.ai/blog/exp-tracking-dvc-python)
+- [DVC and Hydra Integration](https://iterative.ai/blog/dvc-hydra-integration/).
+
+</admon>
+
+---
+
+_Have something great to say about our tools? We'd love to hear it! Head to
+[this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+to record or write a Testimonial! Join our
+[Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2023-02-16-cloud-versioning.md b/content/blogs/2023-02-16-cloud-versioning.md
new file mode 100644
index 0000000000..88e603cb55
--- /dev/null
+++ b/content/blogs/2023-02-16-cloud-versioning.md
@@ -0,0 +1,273 @@
+---
+title: 'Organize Your Storage with DVC Cloud Versioning'
+date: 2023-02-22
+description: |
+  DVC cloud versioning makes it easy to take full advantage of your cloud
+  provider’s built-in versioning capabilities.
+descriptionLong: |
+  Major cloud providers (AWS, Azure, Google) all have versioning capabilities,
+  but they only version individual files. DVC can help you track the cloud
+  version IDs across many files for all the datasets and models in your project.
+picture: 2023-02-22/cloud-versioning-cover.jpg
+pictureComment:
+commentsUrl: https://discuss.dvc.org/t/dvc-cloud-versioning-blog-post/1530
+authors:
+  - dave_berenbaum
+tags:
+  - Cloud Versioning
+  - DVC
+  - Cloud
+  - AWS
+  - Azure
+  - Google Cloud
+  - Release
+---
+
+If you use cloud storage regularly, you have probably seen it become a mess like
+this S3 bucket:
+
+![](../uploads/images/2023-02-22/no_versions.png)
+
+Luckily, major cloud storage providers can version files automatically. Still,
+even with versioning enabled, you might find you end up with a mess. More
+importantly, you forget which version is which.
+
+That's because versioning happens at the file level. There's no way to version a
+composite dataset or entire machine learning project. This is where DVC can
+supplement cloud versioning and finally let you clean up your cloud storage. DVC
+records the versions of all the files in your dataset, so you have a complete
+snapshot of each point in time. You can store this record in Git alongside the
+rest of your project and use it to recover the data from that time, giving you
+the freedom to keep adding new data in place without fear of losing track of the
+old data. DVC ensures reproducibility while keeping everything organized between
+your Git repo and cloud storage, so you can focus on iterating on your machine
+learning project.
+
+<admon type="info">
+
+If you already use DVC, you might be familiar with data versioning and want to
+know what DVC cloud versioning means for you. Read the next section to get more
+familiar with cloud versioning generally or skip directly to the section
+[for existing DVC users](#for-existing-dvc-users).
+
+</admon>
+
+# How cloud versioning works
+
+With versioning enabled, whenever you save a file to the cloud, it will get a
+unique version ID. When you overwrite (or even delete) a file, the previous
+version remains accessible by referencing its version ID.
+
+Here's the same data from above organized with cloud versioning:
+
+![](../uploads/images/2023-02-22/show_versions.png) _Overwritten and deleted
+files may be recovered using their version IDs._
+
+And here it is showing only the current versions:
+
+![](../uploads/images/2023-02-22/collapsed_versions.png) _Enabling versioning
+can keep your cloud storage organized by collapsing file versions._
+
+Now the model versions are all collapsed under one file name and ordered by
+time, but what about the `predictions` folder? Let's assume this project trains
+a neural machine translation model, and each file in `predictions` is a
+predicted translation of a sentence. Each model iteration generates a new set of
+predictions. How can we reassemble the predictions from an earlier model
+version?
+
+![](../uploads/images/2023-02-22/dir_versions.png) _For a folder of many files,
+keeping track of versions becomes unrealistic._
+
+# How DVC works with cloud versioning
+
+Cloud versioning falls short for tracking and syncing folders and projects, but
+this is where DVC can help. DVC records the version IDs of all files in your
+dataset or project. You keep this record in a Git repository so you can maintain
+snapshots of your cloud-versioned data (the data itself gets stored on the
+cloud, not in Git).
+
+![](../uploads/images/2023-02-22/dir_versions_dvc.png) _DVC connects multiple
+version IDs across a folder or project._
+
+<admon type="tip">
+
+Before you start with DVC, ensure that your cloud storage is configured
+correctly. Cloud versioning must be enabled at the bucket or storage account
+level. See [Quickstart](#quickstart) for instructions below if versioning is not
+already enabled. You also need write access to the cloud storage (more info on
+how to configure your storage
+[here](https://dvc.org/doc/user-guide/data-management/remote-storage)).
+
+</admon>
+
+To start using cloud versioning in DVC, [install](https://dvc.org/doc/install)
+DVC and set up a `version_aware` remote inside a Git repo. A remote is the cloud
+storage location where you want to sync the data, and `version_aware` tells DVC
+to use cloud versioning.
+
+```dvc
+$ dvc init
+
+$ dvc remote add --default myremote s3://cloud-versioned-bucket/path
+
+$ dvc remote modify myremote version_aware true
+```
+
+Use `dvc add` to start tracking your model and predictions and `dvc push` to
+sync it to the cloud.
+
+```dvc
+$ dvc add model.pt predictions
+
+$ dvc push
+11 files pushed
+```
+
+<admon type="tip">
+
+If you want to start tracking changes to an existing cloud dataset instead of
+starting from a local copy, see
+[dvc import-url --version-aware](https://dvc.org/doc/command-reference/import-url#example-tracking-cloud-version-ids).
+
+</admon>
+
+DVC adds `model.pt.dvc` and `predictions.dvc` files with the version ID (and
+other metadata) of each file.
+
+```yaml
+outs:
+  - path: predictions
+    files:
+      - relpath: 0.txt
+        md5: f163358b0b2b89281d6990e82495d6ca
+        size: 154
+        cloud:
+          myremote:
+            etag: f163358b0b2b89281d6990e82495d6ca
+            version_id: UkLM3za5T8oH6.EeZCqOrFNBvUnrAlT7
+      - relpath: 1.txt
+        md5: ec736fcb3b92886399f3577eac2163bb
+        size: 154
+        cloud:
+          myremote:
+            etag: ec736fcb3b92886399f3577eac2163bb
+            version_id: fE4Fst2Z25sYEjaJo_0mXZzWDT6vQ4Uz
+```
+
+Next, track `model.pt.dvc` and `predictions.dvc` in Git.
+
+```dvc
+$ git add model.pt.dvc predictions.dvc .gitignore
+
+$ git commit -m "added and pushed model and predictions"
+```
+
+<admon type="tip">
+
+DVC will also make Git ignore `model.pt` and the `predictions` folder so that
+Git only tracks the metadata. For more info on the mechanics of how DVC works,
+see
+[Versioning Data and Models](https://dvc.org/doc/use-cases/versioning-data-and-models).
+
+</admon>
+
+Now there is a versioned record of the model and predictions in Git commits, and
+we can revert to any of them without having to manually track version IDs. If
+someone else clones the Git repo, they can pull the exact versions pushed with
+that commit, even if those have been overwritten in cloud storage.
+
+```dvc
+$ git clone git@github.com:iterative/myrepo
+
+$ cd myrepo
+
+$ dvc pull
+A       predictions/
+A       model.pt
+2 files added and 11 files fetched
+```
+
+# For existing DVC users
+
+If you have versioning enabled on your cloud storage (or can enable it), you may
+wish to start using `version_aware` remotes to simplify the structure of your
+remote (or so you don't have to explain that structure to your colleagues). A
+`version_aware` remote is similar to the remotes you already use, except easier
+to read.
+
+A traditional cache-like DVC remote looks like:
+
+![](../uploads/images/2023-02-22/remote_cache.png)
+
+A cloud-versioned remote looks like:
+
+![](../uploads/images/2023-02-22/remote_cloud_versioned.png)
+
+The other difference is that version IDs get added to the
+[DVC metafiles](https://dvc.org/doc/user-guide/project-structure) during
+`dvc push`.
+
+```yaml
+outs:
+  - path: predictions
+    files:
+      - relpath: 0.txt
+        md5: f163358b0b2b89281d6990e82495d6ca
+        size: 154
+        cloud:
+          myremote:
+            etag: f163358b0b2b89281d6990e82495d6ca
+            version_id: UkLM3za5T8oH6.EeZCqOrFNBvUnrAlT7
+      - relpath: 1.txt
+        md5: ec736fcb3b92886399f3577eac2163bb
+        size: 154
+        cloud:
+          myremote:
+            etag: ec736fcb3b92886399f3577eac2163bb
+            version_id: fE4Fst2Z25sYEjaJo_0mXZzWDT6vQ4Uz
+```
+
+This means you need to be more careful about the order in which you `dvc push`
+and `git commit`. You should first `dvc push` and then `git commit` since
+pushing will modify the DVC metafiles. This might seem odd, but it means you
+have a record in Git of what was pushed, so there is no more guessing whether
+you remembered to push.
+
+# Quickstart
+
+You can start with DVC cloud versioning in 3 steps:
+
+**1\. Check whether cloud versioning is enabled for your bucket/storage account,
+and enable it if it's not.**
+
+- [Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/manage-versioning-examples.html)
+- [Azure Storage](https://learn.microsoft.com/en-us/azure/storage/blobs/versioning-enable)
+- [Google Cloud Storage](https://cloud.google.com/storage/docs/using-object-versioning)
+
+**2\. Setup DVC to use that bucket/container as cloud-versioned remote
+storage.**
+
+```dvc
+$ dvc init
+
+$ dvc remote add --default myremote s3://cloud-versioned-bucket/path
+
+$ dvc remote modify myremote version_aware true
+```
+
+**3\. Add and then push data.**
+
+```dvc
+$ dvc add model.pt predictions
+
+$ dvc push
+```
+
+---
+
+Stop messing around with backing up your cloud data! With cloud versioning in
+DVC, you can iterate on your data as much as you want without losing track of
+your changes or worrying about your storage growing into an unmanageable mess.
+
+Special thanks to [Peter Rowlands](https://github.com/pmrowla) for leading the
+development of this new capability!
diff --git a/content/blogs/2023-03-20-automate-your-ml-pipeline-combining-airflow-dvc-and-cml-for-a-seamless-batch-scoring-experience.md b/content/blogs/2023-03-20-automate-your-ml-pipeline-combining-airflow-dvc-and-cml-for-a-seamless-batch-scoring-experience.md
new file mode 100644
index 0000000000..e57b837998
--- /dev/null
+++ b/content/blogs/2023-03-20-automate-your-ml-pipeline-combining-airflow-dvc-and-cml-for-a-seamless-batch-scoring-experience.md
@@ -0,0 +1,490 @@
+---
+title: 'Automate Your ML Pipeline: Combining Airflow, DVC, and CML for a 
+Seamless Batch Scoring Experience'
+date: 2023-03-22
+description: >
+  This tutorial shows you how to supercharge your batch scoring workflow by 
+  harnessing the power of Aiflow, DVC and CML.
+descriptionLong: >
+  In this tutorial, we'll guide you through the process of setting up an 
+  end-to-end experimentation, training, and production infrastructure for batch
+  scoring applications. By leveraging popular software engineering tools like 
+  Git and Gitlab, alongside data engineering powerhouse Airflow, with the 
+  reproducibility and automation strengths of DVC and CML, you will have one 
+  solid pipeline!
+picture: 2023-03-22/batch-scoring-airflow-cover.jpeg
+pictureComment:
+commentsUrl: https://discuss.dvc.org/t/end-to-end-batch-scoring-applications-with-dvc-and-airflow/1555
+authors:
+  - mikhail_rozhkov
+tags:
+  - DVC
+  - DataChain Studio
+  - Airflow
+  - Tutorial
+  - CML
+  - Batch Scoring
+---
+
+Companies in Banking, Telecom, Retail, and other industries operate the enormous
+size of data to generate insights and gain value.
+[Batch scoring](https://www.datarobot.com/wiki/scoring/) is a common way to
+operate machine learning applications for such companies. It helps to run ML
+training and inference (scoring) jobs that operate with large amounts of data.
+This post covers topics around the design, tools, and implementation of ML
+applications for batch scoring scenarios with Airflow.
+
+### What is batch scoring?
+
+In machine learning, scoring is the process of applying a trained model to a new
+dataset in an attempt to get practical predictions. Batch scoring is the way to
+score (get predictions) for large datasets that are collected over some period
+of time before being passed to the model. It is the most effective scoring
+pattern when the model’s decisions don’t have to be implemented immediately. For
+example, a CRM Department in Retail Banking may apply ML models to a batch of
+active customers to determine which are most likely to buy a new credit product
+next month. Other application examples:
+
+- **Marketing Communication Optimization:** effectively identifying customers
+  who are looking for new financial products and services, and then optimizing
+  marketing communication, is a perfect application for AI. This use case
+  includes not only identifying customers with a propensity to buy new products,
+  but also customers at risk of churning.
+
+- **Pricing Optimization:** personalization of banking services requires
+  monitoring the marketplace dynamically to provide competitive prices for
+  existing and new customers.
+
+- **Next Best Action (NBA):** this is a promising customer-centric approach to
+  optimize multiple different actions that could be taken for a specific
+  customer through multiple communication channels.
+
+### Goals for this post
+
+This post shares an approach to solve 3 tasks in batch scoring applications:
+
+- Build an ML pipeline to train a model.
+
+- Setup a `train` CI job to run a model training at scale.
+
+- Setup a `deploy` CI job to deliver the inference (scoring) pipeline to an
+  Airflow cluster.
+
+### How to reproduce
+
+Code examples are stored in two repositories:
+
+- [home_credit_default](https://gitlab.com/iterative.ai/cse_public/home_credit_default)
+  contains an end-to-end solution for a batch scoring application with Airflow
+
+- [airflow-cluster](https://gitlab.com/iterative.ai/cse_public/airflow-cluster)
+  contains configuration for Airflow and other services
+
+Fork the
+[home_credit_default](https://gitlab.com/iterative.ai/cse_public/home_credit_default)
+repository if you'd like to replicate our steps and deploy your own
+batch-scoring application with Airflow and DVC. Keep in mind that you'll need
+the setup and to configure the following:
+
+- GitLab account and
+  [Personal Access Token](https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html).
+
+- [`pip`](https://pipenv.pypa.io/en/latest/) and Docker installed locally
+
+The repository also contains code for Airflow DAGs, which can be found in the
+`dags/` directory. A separate
+[airflow-cluster](https://gitlab.com/iterative.ai/cse_public/airflow-cluster)
+repository is used to set up and run the Airflow cluster.
+
+## Design ML pipelines with DVC
+
+Machine Learning experiment pipelines for batch-scoring applications typically
+involve the following steps:
+
+1. **Data preparation:** The first step is to clean, pre-process, and transform
+   the data into a format that can be used for training machine learning models.
+
+2. **Feature engineering:** In this step, relevant features are extracted or
+   created from the data and transformed into a format that can be used for
+   training machine learning models.
+
+3. **Model selection and training:** Next, multiple machine learning models are
+   selected and trained using the prepared data.
+
+4. **Model evaluation:** The trained models are then evaluated to determine
+   their accuracy and performance on new data.
+
+By following these steps, the pipeline provides a systematic approach to
+experimenting with different machine learning models, including feature
+engineering, and selecting the best one for deployment.
+
+![DVC Pipeline Design](../uploads/images/2023-03-22/diagram.png) _Machine
+Learning experiment pipelines for batch scoring applications_
+
+[DVC](https://dvc.org) is a great tool that can help to automate such kinds of
+ML pipelines. For the purpose of this tutorial, the DVC pipeline consists of
+five stages (see `dvc.yaml` in
+[the example repo](https://gitlab.com/iterative.ai/cse_public/home_credit_default)):
+
+- Load Data (`load_data`)
+
+- Calculate features for `bureau.csv` data (`extract_features_bureau`)
+
+- Calculate features for `application.csv` data (`extract_features_application`)
+
+- Join features (`join_features`)
+
+- Train and save a model (`train`)
+
+The diagram below visualizes dependencies between stages of the DVC pipeline.
+For such patterns, DVC helps automatically track changes and optimize the time
+to run the pipeline. For example, if you iteratively improve only code to
+calculate features for Bureau data, DVC will only rerun 3 stages:
+`extract_features_bureau`, `join_features`, and `train`. DVC with skip running
+`load data` and `extract_features_application` because these steps did not
+change, saving a substantial amount of time.
+
+![Dependency diagram](../uploads/images/2023-03-22/dependency-diagram.png)
+_Dependency Diagram_
+
+After we prepare the configuration for the ML pipeline, DVC helps to run a new
+model training experiment with a simple single command:
+
+```dvc
+$ dvc exp run
+```
+
+Or, if you want to update the configuration of the `params.yaml` file and set a
+specific name of the experiment you may run a command:
+
+```dvc
+$ dvc exp run -n <NAME> [--set-param <param_name>=<param_value>]
+```
+
+## Train model at scale with Studio and CML
+
+In a common scenario, batch-scoring applications require a large amount of data
+stored in remote storage. Data Scientists run ML experiments on a local (dev)
+machine (e.g. laptop) using a sample of the data. After the model and
+hyperparameters configuration are found, an additional training run on the full
+dataset is required. Sometimes, the final model training is run on a different
+high-performance machine. Results for the ML experiments should be stored and
+accessible for the next analysis, following experiments, and any team members
+that need to review them.
+
+### Continuous Integration (CI) workflow
+
+Designing a CI (Continuous Integration) job to run model training at scale
+involves the following steps:
+
+1. **Environment setup:** Create a reproducible environment for model training
+   by using virtual machines or containers. GitLab and [CML](https://cml.dev)
+   help us preparing and provisioning an environment for the training job.
+
+2. **Automated build:** Set up an automated build process that triggers a build
+   every time code is committed to the repository. We use GitLab CI
+   configuration to automate building a Docker image and run tests for the code.
+
+3. **Parallel processing:** Utilize parallel processing to run multiple model
+   training jobs in parallel. This reduces the time required to train the model
+   and can be accomplished using tools like Dask or Ray. In this example, we
+   don’t use these tools.
+
+4. **Training:** Make sure that the model training pipeline can scale to handle
+   large amounts of data and processing power. As a result of the training job,
+   a new model is saved. CML may help to set up and use cloud computing
+   resources or by using high-performance computing systems.
+
+GitLab's Continuous Integration (CI) pipeline configuration for this post
+example is stored in the
+[`.gitlab-ci.yml` file](https://gitlab.com/iterative.ai/cse_public/home_credit_default/-/blob/main/.gitlab-ci.yml).
+It specifies different stages of the pipeline including building an image,
+testing the code, training a model, and deploying Airflow DAGs. The image below
+provides a graphical representation of this pipeline.
+
+![GitLab Continuous Integration Pipeline Configuration](../uploads/images/2023-03-22/gitlab-airflow.png)
+_GitLab Continuous Integration Pipeline Configuration with Airflow Cluster_
+
+- The GitLab repository triggers the CI pipeline as soon as new code or
+  parameters updates are committed to the repository. This runs `build`, `test`,
+  and `train` CI jobs. The `train` job runs a model training on a full dataset
+  on a remote machine (or cloud), generates model training reports, and creates
+  a PR in the GitLab repo.
+
+- Merging (accepting a pull/merge request) the experiment results into the
+  `main` branch triggers the `deploy` job.
+
+- Every month, Airflow runs `scoring` jobs to generate predictions (scores) for
+  all clients on new data. Generated predictions are stored in the prediction
+  database or files.
+
+### Setup `train` job with GitLab CI and CML
+
+For this post’s example the training job is triggered on creating a new Merge
+Request into the `main` branch or, if the Git commit message (commit to any
+branch) contains the `[exp]` tag. This configuration allows us to achieve two
+goals:
+
+1. We may define whether new code (or params) changes need to trigger a new
+   experiment, or if it’s just a minor update (e.g. update the documentation in
+   README) there is no need to run a new experiment,
+
+2. We ensure that every merge into the `main` branch is linked to the latest
+   model.
+
+An example of the `train` job configuration is presented below. There are three
+main steps in the `script` there:
+
+1. Run a new experiment on a full-scale dataset with `dvc exp run`
+
+2. Prepare the `report.md` file with metrics and plots,
+
+3. Publish the `report.md` content to the Merge Request (Pull Request) message
+   in GitLab ([using CML](https://cml.dev/doc/ref/publish)).
+
+```yaml
+train:
+  ...
+  rules:
+    - if: $CI_MERGE_REQUEST_TARGET_BRANCH_NAME == "main" || $CI_COMMIT_MESSAGE =~ /\[exp\]/
+  image: ${PROJECT_IMAGE}
+  script:
+    - ...
+    - dvc exp run --pull -S load_data.sample_size=1.0
+    - |
+      echo "# Metrics" >> report.md
+      echo "## Experiment metrics" >> report.md
+      dvc metrics show --show-md >> report.md
+      ...
+      echo "## Plot train lift curve " >> report.md
+      echo '![](./reports/lift_curve_train.png "Train lift curve")' >> report.md
+    - cml pr create . --md >> report.md
+    - cml comment create --target=commit report.md
+```
+
+### Run ML experiments with Iterative Studio
+
+The proposed CI pipeline makes it possible to implement a development process
+that:
+
+- Automates the launch of experiments with training models when the code
+  changes.
+
+- Links the change in versions of the code and artifacts (models, data).
+
+- Makes the development more straightforward and manageable.
+
+Moreover, it enables [Iterative Studio](https://studio.datachain.ai/) to run new
+experiments from the UI.
+
+The experimenting process of Iterative Studio is very simple! (See diagram
+below).
+
+- In the first step (1), we update the experiment configuration and trigger
+  running a new experiment. This functionality is available in the standard
+  package of the Iterative Studio.
+
+- Then (2) the configured GitLab CI pipeline launches the experiment job
+  running.
+
+- After the job completes, CML publishes the experiment report to GitLab commit
+  message (3). Iterative Studio is constantly monitoring the project repository
+  for updates.
+
+- As soon as the repo changes, Iterative Studio updates tracking files in the UI
+  (4). Data Scientists can compare experiment metrics and plots.
+
+- Also, DVC stores the updated versions of a model and artifacts to DVC Storage
+  (5).
+
+![GitLab Continuous Integration Pipeline Configuration](../uploads/images/2023-03-22/trigger-experiment.png)
+_GitLab Continuous Integration Pipeline Configuration with Airflow Cluster_
+
+After the experiment completes Iterative Studio helps to visualize parameters,
+metrics, and plots. Users may compare experiments, run new ones, and share with
+colleagues.
+
+![Confusion Matrices](../uploads/images/2023-03-22/confusion-matrix.png)
+_Visualize Parameters, Metrics and Plots in Iterative Studio_
+
+## Deploy scoring pipeline
+
+A batch scoring inference pipeline in machine learning is a series of steps that
+are executed in a specific order to process a large amount of data and generate
+predictions based on a pre-trained model. It typically includes the following
+steps:
+
+1. **Input data preparation:** This step involves cleaning, transforming, and
+   preprocessing the input data so that it can be fed into the model for
+   prediction. Feature engineering can be a part of this step.
+
+2. **Model loading:** The pre-trained model is loaded into memory, usually from
+   storage or a database, so that it can be used for predictions.
+
+3. **Inference:** The input data is passed through the model to generate
+   predictions. This is done in a batch-wise manner, where a large amount of
+   data is processed in one go to reduce the overhead of repetitively loading
+   the model.
+
+4. **Post-processing:** This step involves any additional processing of the
+   prediction results, such as normalization, thresholding, or aggregation,
+   before they are written to an output file or database.
+
+5. **Saving predictions:** Finally, the prediction results are saved to a file
+   or database for further analysis or use. This can be done in various formats,
+   such as CSV, JSON, or binary.
+
+The pipeline can be implemented using a variety of tools and technologies such
+as Apache Airflow, Apache Spark, or even custom scripts. The key aspect of a
+scoring pipeline is that it is automated, efficient, and scalable, making it
+possible to score large volumes of data in a timely and consistent manner.
+
+Because of the large number of pre- and post-processing tasks, including
+checking for data sources updates, the typical scenario needs to deploy a
+scoring pipeline, not a model.
+
+### Batch scoring inference pipeline with Airflow
+
+In this example, we implement an inference pipeline using
+[Apache Airflow](https://airflow.apache.org/). Airflow helps to schedule and run
+pipelines (DAG) for various data engineering and machine learning purposes. DAG
+is a Directed Acyclic Graph that describes an order of computational Tasks
+(jobs) to run. The basics of the Airflow pipeline definition can be found
+[here](https://airflow.apache.org/docs/apache-airflow/stable/start.html).
+
+We store Airflow DAGs in the `dags/` directory in the same repository as our ML
+pipeline.
+
+![DAG](../uploads/images/2023-03-22/dags.png) _DAGs Directory_
+
+Let’s go a bit deeper into the Airflow DAG `dags/scoring.py` to find out how DVC
+is used there! This DAG is designed to be run every 5th day of the month to
+calculate predictions and save them into a .csv file.
+
+The DAG performs the following steps:
+
+1. It creates a temporary directory for the local repository
+   (**`create_tmp_dir`** task).
+
+2. It clones the repository specified in the **`project_args`** argument
+   (**`clone`** task).
+
+3. It runs the scoring script from the cloned repository and saves predictions
+   (**`run_scoring`** task).
+
+4. Finally, it removes the temporary repository directory (**`clean`** task).
+
+For the purposes of this post, we are most interested in the `run_scoring` task!
+The task 'run_scoring' is a BashOperator in Apache Airflow. It performs the
+following actions:
+
+1. Runs the `dvc fetch` command to fetch the latest version of the artifacts and
+   model to be used for inference.
+
+2. Runs the `dvc checkout` command to check out the latest version of the data.
+
+3. Runs a python script located at `src/stages/scoring.py` with the following
+   command line arguments:
+
+   - `--config` specifies the path to the parameters file in YAML format,
+
+   - `--scoring-date` specifies the date for which the scoring should be
+     performed,
+
+   - `--storage-path` specifies the location of the storage.
+
+```python
+run_scoring = BashOperator(
+    task_id='run_scoring',
+    bash_command=f'''
+      cd {project_args.get('dag_run_dir')} && \
+      export PYTHONPATH=. k&& \
+      dvc fetch && \
+      dvc checkout && \
+      python src/stages/scoring.py \
+        --config=params.yaml \
+        --scoring-date={{{{ first_day_of_month(ds) }}}} \
+        --storage-path={project_args.get('storage_path')} \
+```
+
+Therefore, this example shows the deployment of the Airflow DAGs, and DVC helps
+to fetch the latest model to be used for inference. This is awesome!
+
+### Setup CI job `deploy`
+
+<aside>
+💡 Merging (accepting PR) experiment results into the `main` branch triggers the
+`deploy` job.
+</aside>
+
+There are various strategies for delivering `scoring` DAG to the Airflow
+cluster. In this example, the GitLab CI pipeline pushes (copies) DAG files from
+the repo to the Airflow home directory (specified by `${AIRFLOW_HOME}`) and
+activates it.
+
+The `deploy_dags` CI job configuration looks like this:
+
+```yaml
+deploy_dags:
+  stage: deploy
+  ...
+  script:
+    - |
+      export DAGS_FOLDER=${AIRFLOW_HOME}/dags/${PROJECT_FOLDER}
+
+      # Create ${DAGS_FOLDER}
+      rm -rf ${DAGS_FOLDER} && mkdir -p ${DAGS_FOLDER}
+
+      # Copy content of folder ./dags to ${DAGS_FOLDER} directory
+      cp -r dags/* ${DAGS_FOLDER}
+      echo "Airflow DAGs copied to ${DAGS_FOLDER}"
+```
+
+This simple example is for demonstration purposes, but it works as a
+proof-of-concept for DVC-Airflow-Studio integration for batch scoring
+applications.
+
+## Results
+
+The proposed approach demonstrates how DVC, CML, and Iterative Studio may help
+in batch scoring applications at the experimentation and production phases.
+Solutions discussed in this post may benefit similar use cases in a few ways:
+
+- Help with system design and tools integration.
+
+- Automate ML experiments.
+
+- Increasing speed of Proof-Of-Concept (POC) and Operationalization (MLOps)
+  stages.
+
+- Saving time and money for similar projects.
+
+Specifically, DVC and Iterative Studio can benefit batch scoring Applications
+by:
+
+- Enabling regulatory compliance and auditability. Iterative Studio offers a
+  robust approach for data usage tracking, keeping, and versioning data and
+  configurations used for model training and prediction. Models are developed in
+  a robust environment allowing us to link code, data, and configs for
+  reproducible experiments and ensure auditability in the event of a compliance
+  audit.
+
+- Run machine learning experiments, with or without coding. Iterative Studio
+  offers a user-friendly UI for analysts and data scientists to create a new
+  experiment, change the configuration, and run with a one-button-click.
+
+- Access versioned models during the CI/CD process and use them to run a scoring
+  job with Airflow.
+
+  ***
+
+  _Have something great to say about our tools? We'd love to hear it! Head to
+  [this page](https://testimonial.to/iterative-open-source-community-shout-outs)
+  to record or write a Testimonial! Join our
+  [Wall of Love ❤️](https://testimonial.to/iterative-open-source-community-shout-outs/all)_
+
+_Do you have any use case questions or need support? Join us in
+[Discord](https://discord.com/invite/dvwXA2N)!_
+
+_Head to the [DVC Forum](https://discuss.dvc.org/) to discuss your ideas and
+best practices._
diff --git a/content/blogs/2023-04-17-managing-openfoam-physical-simulations-with-dvc-cml-studio-part-1.md b/content/blogs/2023-04-17-managing-openfoam-physical-simulations-with-dvc-cml-studio-part-1.md
new file mode 100644
index 0000000000..2c1fbf7124
--- /dev/null
+++ b/content/blogs/2023-04-17-managing-openfoam-physical-simulations-with-dvc-cml-studio-part-1.md
@@ -0,0 +1,592 @@
+---
+title:
+  'Managing OpenFOAM Physical Simulations with DVC, CML, and Studio (Part 1)'
+date: 2023-04-17
+description: >
+  In the first part of the series we learn how to use DVC for OpenFOAM
+  simulation experiments and data management.
+descriptionLong: >
+  In the series of blog posts we discuss the challenges of using
+  [OpenFOAM](https://www.openfoam.com/) for computational fluid dynamics
+  simulations, as well as the benefits of using [DVC](https://dvc.org/),
+  [CML](https://cml.dev/), and [Iterative Studio](https://studio.datachain.ai/)
+  for data versioning, experiment management, and cloud resource management. 
+
+  In the first part we build a [demo
+  project](https://gitlab.com/iterative.ai/cse_public/sonicfoam-demo) with
+  OpenFOAM and DVC to automate the process of running simulations, to capture
+  and track data and code.
+picture: 2023-04-17/blog_header.jpg
+pictureComment:
+commentsUrl: https://discuss.dvc.org/t/managing-openfoam-physical-simulations-with-dvc-cml-and-studio-part-1/1602
+authors:
+  - mikhail_rozhkov
+  - peter_zikan
+tags:
+  - DVC
+  - CML
+  - Studio
+  - OpenFOAM
+---
+
+# Introduction
+
+[OpenFOAM](https://www.openfoam.com/) is a powerful, open-source software tool
+used for
+[computational fluid dynamics](https://en.wikipedia.org/wiki/Computational_fluid_dynamics)
+(CFD) simulations. It allows engineers and scientists to model and analyze the
+flow of fluids, such as gases and liquids, through intricate geometries and
+physical phenomena. For example, such physical phenomena could be turbulence,
+heat transfer, and chemical reactions. OpenFOAM has a large and dedicated user
+base and is utilized in a variety of industries, including aerospace,
+automotive, chemical, energy, and marine engineering.
+
+This post focuses on the following challenges that users of OpenFOAM may
+encounter:
+
+1. **Complexity**: OpenFOAM is a highly flexible and powerful tool, but this can
+   also make it difficult for new users to learn and navigate. The software has
+   a large number of solvers and utilities, and it can be challenging to
+   understand which solver is most suitable for a given problem.
+
+2. **Data management:** OpenFOAM simulations generate a number of outputs that
+   need to be stored, versioned, shared, and cleaned up when needed.
+
+3. **Interfacing with other software:** OpenFOAM may need to be used in
+   conjunction with other software, such as CAD or mesh generation tools, and
+   there can be challenges in integrating these tools and transferring data
+   between them.
+
+4. **Software version control:** OpenFOAM and simulation software are constantly
+   updating and very complex software packages.
+
+All challenges above become more challenging for a small team of researchers who
+develop and run simulations. They may lack experience with DevOps and cloud
+Infrastructure management. Therefore, having a handy toolset is needed to help
+with pipelines and infrastructure setup.
+
+With DVC you may manage versions of simulation outputs, pipelines, and control
+software versions used to execute the pipeline ensuring consistent results.
+These features allow users to ensure that the new version of the software
+produces the same results as previous versions, helping to maintain the
+reliability and accuracy of the simulations. [CML](https://cml.dev/) and
+[Iterative Studio](https://studio.datachain.ai/) together provide a key for
+cloud resources management, running new experiments via nice UI, showing
+parameters and results of the simulation.
+
+We describe these and other features in the two following posts. In this post,
+we discuss how Iterative tools help with physical and computational simulations.
+To do this, we’ll go over a simple demo project built with OpenFOAM. The demo
+shows how to set up DVC for simulation experiments and data management.
+
+These posts are a result of collaboration between the
+[Iterative.ai](http://iterative.ai) and
+[PlasmaSolve](https://plasmasolve.com/about-us/) teams. PlasmaSolve was founded
+in 2016 by plasma physicists and software engineers to provide a platform for
+cutting-edge physics simulation services and research. The PlasmaSolve team
+strives to deliver top-notch solutions and well-designed physics simulations to
+speed up research and reduce development costs using various open-source and
+commercial simulation tools.
+
+**In this post, you will learn how to:**
+
+1. Configure and run OpenFOAM simulations with DVC
+
+2. Store and share simulation data in the cloud using DVC
+
+# `sonicFoam` simulation pipeline
+
+OpenFOAM simulations may include several computational steps, from mesh
+generation to a large number of solvers and post-processing simulation results.
+SonicFoam is a simulation tool based on the open-source CFD (Computational Fluid
+Dynamics) software OpenFOAM. It is used to simulate compressible, inviscid flows
+with high Mach numbers, such as supersonic flows.
+
+In this demo, we simulate a supersonic flow over a step located at the front of
+the flow. The scenario involves a Mach 3 flow entering a rectangular area with a
+step near the inlet, which creates shock waves. We use the same geometry to run
+two chained simulations: `sonicFoam` and `scalarTransportFoam`.
+
+![Shock fronts in the forward step problem](../uploads/images/2023-04-17/shock_fronts.png)_Shock
+fronts in the forward step problem
+[(source)](https://www.openfoam.com/documentation/tutorial-guide/3-compressible-flow/3.2-supersonic-flow-over-a-forward-facing-step)_
+
+Our demo simulation pipeline contains a few steps:
+
+1. Generate geometry with `blockMesh`;
+
+2. Run `sonicFoam` simulation to get velocity (`U`) and temperature (`T`)
+   fields;
+
+3. Post-processing simulation results;
+
+4. Run a subsequent `scalarTransportFoam` simulation that uses the velocity
+   field computed before.
+
+In reality, simulations sometimes need to be “chained”, i.e. outputs of one
+simulation go as an input to another simulation. When running a parametric study
+of such a simulation chain, intermediate simulations are often recomputed even
+if the parameter change does not influence them. We demonstrate how to use DVC
+to cache all the results and only trigger a computation if really necessary.
+Results of the `sonicFoam` solver go as inputs to the `scalarTransportFoam`
+solver.
+
+As a basis for the demo, we use OpenFOAM
+[Supersonic flow over a forward-facing step tutorial](https://www.openfoam.com/documentation/tutorial-guide/3-compressible-flow/3.2-supersonic-flow-over-a-forward-facing-step).
+The original code can be found
+[here](https://develop.openfoam.com/Development/openfoam/tree/master/tutorials/compressible/sonicFoam/laminar/forwardStep).
+
+### Setup the demo project
+
+💡 For this part of the post, we follow the `no-dvc` branch in the
+[demo repository](https://gitlab.com/iterative.ai/cse_public/sonicfoam-demo/-/tree/no-dvc).
+
+The easiest way to follow the demo with OpenFOAM simulation is to run in
+[Docker](https://www.docker.com/) containers. Follow the setup section in the
+repository `README` to build a Docker image and set up Python virtual
+environment and install dependencies.
+
+After the environment is set up we only need to run `openfoam-cse-docker` script
+which runs a new OpenFOAM job in a Docker container. For example, to run the
+OpenFOAM simulation in an interactive way, use the command:
+
+```cli
+$ ./openfoam-cse-docker
+```
+
+## 1. Generate geometry with `blockMesh`
+
+To use `sonicFoam`, a user must first create a 3D geometry model of the flow
+domain using a tool such as CAD software. The user must then define the boundary
+conditions and physical properties of the flow, such as the temperature,
+pressure, and velocity at each boundary. The user can then run the simulation
+using the `sonicFoam` solver, which will solve the governing equations of
+compressible flow using the finite volume method.
+
+```cli
+$ ./openfoam-cse-docker -c 'cd sonicFoam && blockMesh'
+```
+
+![Geometry of the forward step](../uploads/images/2023-04-17/forward_step_geometry.png)_Geometry
+of the forward step
+[(source)](https://www.openfoam.com/documentation/tutorial-guide/3-compressible-flow/3.2-supersonic-flow-over-a-forward-facing-step)_
+
+## 2. Run the first step simulation with `sonicFoam` solver
+
+During the simulation, `sonicFoam` will calculate various flow quantities, such
+as the pressure, velocity, and temperature, at each point in the flow domain.
+The user can then visualize and analyze these results using post-processing
+tools, such as ParaView, to gain insight into the flow behavior.
+
+```cli
+$ ./openfoam-cse-docker -c 'cd sonicFoam && sonicFoam'
+```
+
+## 3. Post-processing simulation results
+
+As an example of post-processing stages in the simulation demo, we have a few
+tasks:
+
+- calculate the magnitude of the velocity
+
+- calculate `ﬂowRatePatch`
+
+- generate VTK and visualize mesh
+
+**Calculate the magnitude of the velocity**
+
+`postProcess` is a command allows users to perform post-processing operations on
+simulation data. The `-func` option specifies that a user-defined function
+should be applied to the data. In this case calculates and writes the field of
+the magnitude of velocity into a ﬁle named `mag(U)` in each time directory
+generated during simulation:
+
+```cli
+$ ./openfoam-cse-docker -c 'cd sonicFoam && postProcess -func "mag(U)"'
+```
+
+The `postProcess` command can be used in conjunction with various options and
+functions to perform a wide range of post-processing tasks, such as calculating
+flow quantities, generating plots, and creating animations. It is an important
+tool for gaining insight into the results of CFD simulations.
+
+**Calculate `ﬂowRatePatch`**
+
+In order to produce a 1D dataset and its visualization we compute the flow rate
+over the “outlet” patch. For this purpose, we may apply the
+`flowRatePatch(name=outlet)` function to the simulation data. The
+`flowRatePatch` function calculates the flow rate through a patch, which is a
+specified boundary in the flow domain. The input `name` specifies the patch to
+use, in this case, `outlet`. The `outlet` patch represents the boundary at the
+outlet of the flow domain, so the `flowRatePatch` function will calculate the
+flow rate through the outlet.
+
+```cli
+$ ./openfoam-cse-docker -c 'cd sonicFoam && \
+     postProcess -func "flowRatePatch(name=outlet)"'
+```
+
+This operation saves results into the
+`sonicFoam/postProcessing/flowRatePatch(name=outlet)/0/surfaceFieldValue.dat`
+file.
+
+**Generate VTK**
+
+`foamToVTK` is a utility converts simulation data stored in the OpenFOAM format
+to the VTK ([Visualization ToolKit](https://vtk.org/about/#overview)) format.
+VTK is a popular file format for storing and visualizing scientific data, and it
+is often used for post-processing and visualization of CFD simulations.
+
+```cli
+$ ./openfoam-cse-docker -c 'cd sonicFoam && foamToVTK'
+```
+
+This will convert the simulation data stored in the `sonicFoam` directory from
+the OpenFOAM format to the VTK format, allowing it to be visualized and analyzed
+using tools that support the VTK format. It creates `sonicFoam/VTK/` directory
+with formatted simulation results.
+
+## 4. Visualize simulation results
+
+To visualize the results of a simulation performed using the OpenFOAM toolkit's
+`sonicFoam` solver, you can use one of the post-processing tools included with
+the OpenFOAM toolkit, such as `paraFoam` or `foamToVTK`. These tools allow you
+to view and analyze the simulation results in a graphical interface.
+
+In the demo example, a 3D geometry mesh and float pressure diagram are
+generated. There are examples of generated files below.
+
+![3D mesh visualization](../uploads/images/2023-04-17/3d_mesh_viz.png)_3D mesh
+visualization_
+
+![Float pressure diagram](../uploads/images/2023-04-17/float_pressure_diag.png)_Float
+pressure diagram_
+
+## 5. Run the second step simulation with `scalarTransportFoam` solver
+
+The `scalarTransportFoam` is a solver in the open-source CFD software OpenFOAM
+that is used to solve a transport equation for a passive scalar using a
+specified stationary velocity field. It is typically used to calculate the
+convection diffusion of a scalar in a given velocity field.
+
+Before running `scalarTransportFoam` solver, we need to update the stage
+configuration based on the `sonicFoam` outputs:
+
+- Copy `U` config from the last simulation stage in `sonicFoam`
+
+- Update `T` config with the `boundaryField` from the last simulation stage in
+  `sonicFoam`
+
+- Copy the `polyMesh` to use the same geometry
+
+```cli
+# Configure scalarTransportFoam
+$ python3 src/config_scalarTransportFoam.py
+
+# Run scalarTransportFoam simulation
+$ ./openfoam-cse-docker -c 'cd scalarTransportFoam && scalarTransportFoam'
+```
+
+The simulation will calculate the transport of the passive scalar using the
+specified velocity field and other input parameters. The resulting simulation
+data can then be post-processed and analyzed to gain insight into the transport
+of the scalar in the flow.
+
+# Reduce simulation management complexity with DVC
+
+💡 For this part of the post, we follow the `main` branch in the
+[demo repository](https://gitlab.com/iterative.ai/cse_public/sonicfoam-demo/-/tree/main).
+Please follow the README to prepare your environment and install dependencies.
+
+Up to this moment, we run different tasks for the simulation pipeline using
+separate commands. Let’s see how DVC tools can help with automating the
+simulation pipeline and handling simulation output data.
+
+DVC pipelines is a feature of the [DVC](https://dvc.org) (Data Version Control)
+tool. A DVC pipeline is a series of commands that are executed in a specific
+order and can be used to run all steps that are needed- simulation itself,
+post-processing the results, and generating reports. DVC automatically captures
+and tracks the data and code associated with your OpenFOAM simulations to make
+them reproducible and shareable with your team.
+
+## Basic computational stage configuration
+
+A DVC config file is written in YAML format and consists of a list of steps,
+each of which corresponds to a command that should be executed as part of the
+pipeline. The steps can depend on one another, meaning that the output from one
+step is used as input for another step. More details can be found on the
+[DVC documentation website](https://dvc.org/doc/user-guide/project-structure/dvcyaml-files#stage-entries).
+
+![DVC DAG](../uploads/images/2023-04-17/dag.png)
+
+Let’s consider an example of the DVC pipeline configuration for `blockMesh`
+stage below.
+
+```yaml
+blockMesh:
+  cmd:
+    - bash run.sh 'cd sonicFoam && blockMesh'
+  deps:
+    - sonicFoam/system/blockMeshDict
+  outs:
+    - sonicFoam/constant/polyMesh
+```
+
+The `cmd` field specifies the command to be executed, which in this case is a
+utility shell script `run.sh` that changes the file permissions and runs the
+`blockMesh` command directly or using `openfoam-cse-docker` script. The `run.sh`
+script “knows” how to run the simulations pipeline on your local environment
+(manually) or as a part of the GitLab CI pipeline on the Cloud environment
+(automatically). We will discuss CI configuration in later sections.
+
+The `deps` field in this pipeline step specifies the input files that the
+`blockMesh` command depends on `blockMeshDict` file. These files contain
+information about the mesh and the simulation parameters, and are required by
+the `blockMesh` command to generate the mesh.
+
+The `outs` field specifies the output files generated by the `blockMesh`
+command. In this case, the output is the `polyMesh` directory, which contains
+the generated mesh data. The mesh data is captured and versioned by DVC.
+
+## Configure simulation pipelines with `params.yaml`
+
+DVC pipeline configuration file (`params.yaml`) file configures an OpenFOAM
+simulation. Here is an extract of the parameters used for `sonicFoam` stage
+configuration:
+
+```yaml
+configureSim:
+  sim_config_dir: configs
+  controlDict:
+    path: system/controlDict
+    params:
+      startTime: 0
+      endTime: 3
+      deltaT: 0.002
+      writeInterval: 0.5
+      purgeWrite: 0
+      writePrecision: 5
+      timePrecision: 6
+```
+
+The `params` field of the `controlDict` section specifies the values of the
+simulation control parameters. In this case, the `startTime`, `endTime`,
+`deltaT`, `writeInterval`, `purgeWrite`, `writePrecision`, and `timePrecision`
+parameters are set to specific values.
+
+In the DVC simulation setup, the user is responsible for putting the values from
+the `params.yaml` file into the `controlDict`. Unlike other tools that handle
+this process automatically, this approach requires some manual effort on the
+user's end but provides greater flexibility as it eliminates the need for
+support for each and every tool or software used in the simulation. The demo
+showcases how this task is carried out through the `src/configureSim.py` script.
+
+## Adapt DVC behavior for the simulation use case
+
+DVC pipeline configuration expects that all inputs and outputs of each stage are
+explicitly defined in the `dvc.yaml` file. This is a common pattern in Machine
+Learning and Data Management pipelines. DVC uses explicit `deps` and `outs` to
+build a computational DAG and “understand” whether it needs to re-run a stage if
+some of its dependencies change. This ensures the reproducibility of the
+pipeline.
+
+However, OpenFOAM simulation pipelines are different. Depending on the
+simulation parameters (e.g. `endTime` and `writeInterval` in the `controlDict`
+parameters), a different number of files and folders can be generated.
+Therefore, it may impossible to specify all outputs in `dvc.yaml` in advance.
+But, because of these files are not specified in `dvc.yaml`, DVC can’t manage
+them properly. To solve this problem, we introduced two helper scripts that
+“help” DVC to find and handle generated files and folders for the simulation use
+case. Hopefully,
+[supporting wildcard patterns](https://github.com/iterative/dvc/issues/4816) in
+`dvc.yaml` configuration file will simplify such use cases!
+
+Let’s introduce two additional helper scripts:
+
+- `dvc_outs_remove.py` - removes the stage outputs from the previous simulation.
+  This script checks if there are files previously added by
+  `dvc_outs_handler.py` script and remove them from DVC with `dvc remove`
+  command.
+- `dvc_outs_handler.py` - finds all “untracked” and adds them to DVC control. By
+  default, only files tracked by either Git or DVC are saved to the experiment.
+  This script checks if there are files or directories generated by the stage
+  and add them to DVC with `dvc add` command.
+
+```yaml
+sonicFoam:
+  cmd:
+    # Remove previous sim results
+    - python3 src/dvc_outs_remove.py --stage=sonicFoam ...
+
+    # Run sim
+    - bash run.sh 'cd sonicFoam && sonicFoam'
+
+    # Add generated files to DVC and create outputs index files
+    - python3 src/dvc_outs_handler.py --stage=sonicFoam ...
+  params:
+    - configureSim
+  deps:
+    - sonicFoam/constant/polyMesh/
+    - ...
+  outs:
+    - ...
+```
+
+## Link stages and multiple solvers
+
+It is common for OpenFOAM simulations to involve complex pipelines with multiple
+steps and dependencies between the steps. This is because simulations often
+require the use of multiple solvers, each of which may have its own input and
+output files and dependencies on other solvers.
+
+For example, a simulation may require the use of multiple solvers to simulate
+different physical phenomena, such as fluid flow, heat transfer, and chemical
+reactions. These solvers may need to be run in a specific order and may depend
+on the output of other solvers as input.
+
+It’s possible to manage these dependencies with DVC! DVC allows you to specify
+the steps in the simulation pipeline and the dependencies between them in a
+configuration file.
+
+The demo project example has two solvers: `sonicFoam` and `scalarTransportFoam`.
+Both solvers depend on the same geometry generated by the `blockMesh` stage. In
+the case we know exactly the path to the output (`outs`) of the `sonicFoam`
+solver, we may explicitly define it as a dependency (`deps`) of the
+`scalarTransportFoam` stage. In our case, we use a utility script
+(`src/config_scalarTransportFoam.py`) to get the results of the `sonicFoam`
+solver and prepare the initial state for the `scalarTransportFoam` solver.
+
+```yaml
+scalarTransportFoam:
+  cmd:
+    - python3 src/config_scalarTransportFoam.py
+    - ...
+    - bash run.sh 'cd scalarTransportFoam && scalarTransportFoam'
+    - ...
+  deps:
+    - sonicFoam/constant/polyMesh/
+    - ...
+  params:
+    - plotMesh
+    - scalarTransportFoam
+  outs:
+    - ...
+```
+
+## Run a new simulation
+
+After the DVC pipeline is set up, you may run a new simulation experiment with a
+command:
+
+```cli
+$ dvc exp run
+```
+
+To run a new simulation with updated parameters you may manually change the
+parameter value in the `params.yaml` file and run `dvc exp run` or, it’s
+possible to
+[modify parameters on-the-fly](https://dvc.org/doc/command-reference/exp/run#example-modify-parameters-on-the-fly).
+For example, let’s change the length of our simulation:
+
+```cli
+$ dvc exp run -S 'configureSim.controlDict.params.endTime=4'
+```
+
+It is also possible to queue and run multiple simulations in parallel.
+
+In the next post, we will show how to visualize and compare simulation data with
+CML and Iterative Studio.
+
+# Versioning and sharing simulation data with DVC
+
+Effective data management is essential for successful OpenFOAM simulations.
+Proper data management can help you organize and track the data and code
+associated with your simulations, and make it easier to reproduce simulation
+results.
+
+There are several challenges that users of OpenFOAM may encounter in managing
+the data associated with their simulations:
+
+1. **Large data volumes**: OpenFOAM simulations can generate large amounts of
+   data, particularly for complex or high-resolution simulations. This can make
+   it difficult to store, transfer, and analyze the data effectively.
+
+2. **Data version control**: It is important for users to be able to track
+   changes to the input files and simulation results over time and to be able to
+   reproduce past simulations. This can be challenging without a version control
+   system or other means of tracking changes.
+
+3. **Data transfer**: Users may need to transfer large amounts of data between
+   different systems or devices, such as between their personal computers and a
+   high-performance computing cluster. This can be challenging due to the size
+   of the data and the potential for data transfer bottlenecks.
+
+4. **Collaboration**: Users may want to share simulation results with colleagues
+   or collaborate on simulations. This can be done by sharing the simulation
+   input files and results, as well as using tools such as online collaborative
+   platforms or version control systems.
+
+Luckily, DVC may help with all of them. Let’s review the core features of DVC
+that we used in the demo project.
+[Data versioning](https://dvc.org/doc/use-cases/versioning-data-and-models) is a
+core feature of DVC that helps to capture the versions of simulation data in Git
+commits, while storing them on-premises or in cloud storage. Moreover, using DVC
+pipelines, all outputs specified as `outs`, `plots`, or `metrics` in `dvc.yaml`
+configuration, are automatically added to DVC version control! Other files,
+generated by different stages, are added to DVC via `dvc_outs_handler.py`
+script. The next step is to set up DVC remote storage and upload these files
+there.
+
+DVC help to store large volumes of data in the on-premise or cloud storage (e.g.
+SSH, S3, HDFS,
+[etc.](https://dvc.org/doc/command-reference/remote/add#supported-storage-types))
+The demo project uses AWS S3 as a remote storage. For more details on the remote
+storage configuration you may check
+[Example: Customize an additional S3 remote](https://dvc.org/doc/command-reference/remote#example-customize-an-additional-s3-remote).
+
+You may add your own remote storage in AWS S3 bucket using the following
+command:
+
+```cli
+$ dvc remote modify s3remote url s3://<bucket>/<path>
+```
+
+After the remote storage is set up, you need a single additional command to
+transfer your results to the storage:
+
+```cli
+$ dvc exp push
+```
+
+With this DVC takes care of pushing and pulling to/from both Git and DVC remotes
+in the case of experiments. Therefore, the following collaboration with
+colleagues is simple. Your colleagues may access your last simulation results
+with a `dvc exp pull` command (after updating their repository with `git pull`):
+
+```cli
+$ dvc exp pull
+```
+
+# Summary
+
+This post details how Iterative tools help in physical and computational
+simulations. The demo shows how to set up DVC for simulation experiments and
+data management.
+
+Overall, DVC can help OpenFOAM users to:
+
+1. Reduce the complexity of simulation pipelines and automate tasks such as
+   running simulations, post-processing results, and generating reports.
+
+2. Manage and track the data and code associated with your OpenFOAM simulations,
+   and make it easier to reproduce simulation results.
+
+3. Manage simulation experiments with a YAML config files.
+
+4. Store and share simulation data in the cloud using DVC and AWS S3.
+
+5. Easily collaborate with your colleagues around simulation results, share and
+   reuse data.
+
+In the next post, we will discuss how to utilize cloud computing resources and
+visualize and compare simulation data with CML and Iterative Studio.
diff --git a/content/blogs/2023-04-20-testing-external-contributions-using-github-actions-secrets.md b/content/blogs/2023-04-20-testing-external-contributions-using-github-actions-secrets.md
new file mode 100644
index 0000000000..9ccd489cab
--- /dev/null
+++ b/content/blogs/2023-04-20-testing-external-contributions-using-github-actions-secrets.md
@@ -0,0 +1,156 @@
+---
+title: 'Testing external contributions using GitHub Actions secrets'
+date: 2023-04-20
+description: >
+  Learn how to test open source contributors' pull requests using GitHub Actions
+  secrets, securely.
+descriptionLong: >
+  Testing external open source contributors' pull requests using GitHub Actions
+  secrets is no easy feat, but we've found a way of doing so securely and
+  without sacrificing convenience.
+picture: 2023-04-20/octocat-padlock-illustration.png
+pictureComment:
+  Header image generated by [DALL·E 2](https://openai.com/dall-e-2)
+commentsUrl: https://discuss.dvc.org/t/blog-testing-external-contributions-using-github-actions-secrets/1613
+authors:
+  - '0x2b3bfa0'
+tags:
+  - Engineering
+  - Open Source
+  - CI/CD
+  - Security
+  - GitHub
+  - Company
+---
+
+As cloud-native applications become more complex and rely on more third-party
+services, testing becomes increasingly difficult. One of the most significant
+challenges for open source projects is testing contributions against complex
+services that require authentication and are particularly hard to mock.
+
+In this blog post, we will explore a simple method for securely running this
+kind of integration tests on external pull requests, using the GitHub Actions
+[`pull_request_target` trigger](https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target)
+and GitHub
+[environments](https://docs.github.com/en/actions/deployment/targeting-different-environments)
+to prevent unauthorized runs:
+
+## Configuration
+
+1. [Create some encrypted secrets](https://docs.github.com/en/actions/security-guides/encrypted-secrets#creating-encrypted-secrets-for-a-repository);
+   a secret named **`EXAMPLE`** will be used to illustrate the next sections.
+2. [Create an environment](https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment#creating-an-environment)
+   named `external` and add some trusted GitHub users or
+   [teams](https://docs.github.com/en/organizations/organizing-members-into-teams/about-teams)
+   as
+   [required reviewers](https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment#required-reviewers);
+   they’ll be responsible for approving every run triggered by external
+   contributors.
+
+   ![screenshot of environment settings](../uploads/images/2023-04-20/environment.jpg)
+
+## Workflow
+
+> ⚠️ **Warning**: using the
+> [`pull_request_target`](https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target)
+> event without the cautionary measures described below may allow unauthorized
+> GitHub users to open a “pwn request” and exfiltrate secrets; see also this
+> [[1](https://securitylab.github.com/research/github-actions-preventing-pwn-requests),
+> [2](https://securitylab.github.com/research/github-actions-untrusted-input),
+> [3](https://securitylab.github.com/research/github-actions-building-blocks)]
+> blog post series from GitHub Security Lab and
+> [this](https://stackoverflow.com/a/71366152/4654476) Stack Overflow answer.
+
+```yaml
+on: pull_request_target
+
+jobs:
+  authorize:
+    environment:
+      ${{ github.event_name == 'pull_request_target' &&
+      github.event.pull_request.head.repo.full_name != github.repository &&
+      'external' || 'internal' }}
+    runs-on: ubuntu-latest
+    steps:
+      - run: true
+
+  test:
+    needs: authorize
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          ref: ${{ github.event.pull_request.head.sha || github.ref }}
+      - run: printenv EXAMPLE
+        env:
+          EXAMPLE: ${{ secrets.EXAMPLE }}
+```
+
+This workflow will be triggered by the
+[`pull_request_target`](https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target)
+event, which is
+[similar](https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#onpushpull_requestpull_request_targetpathspaths-ignore)
+to the
+[`pull_request`](https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request)
+event, but it always passes secrets to workflows triggered from fork pull
+requests.
+
+The `authorize` job checks if the workflow was triggered from a fork pull
+request. In that case, the `external` environment will prevent the job from
+running until it’s approved. Otherwise (i.e. when pull requests belong to the
+main repository), the job will run without requiring explicit approval.
+
+The `test` job is where secrets would be used. It
+[`needs`](https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idneeds)
+the previous job, so it will never run without explicit approval. The security
+of this approach is based on the idea of a human approving every run after
+making sure that there is no malicious code on them, hence it also overrides
+[the `ref` from `actions/checkout`](https://github.com/actions/checkout#checkout-a-different-branch)
+to run on the pull request branch rather than on the main branch.
+
+## Alternatives
+
+Admittedly, adding this `authorize` job to the workflow isn’t particularly
+elegant but, as of January 2023, GitHub doesn’t provide any official guidance on
+how to achieve a similar result in simpler ways.
+
+- In 2020, GitHub
+  [introduced](https://github.blog/2020-08-03-github-actions-improvements-for-fork-and-pull-request-workflows/)
+  an option to send secrets to workflows from fork pull requests, but it only
+  has effect on fork pull requests from private repositories.
+- In 2021, GitHub
+  [introduced](https://github.blog/2021-04-22-github-actions-update-helping-maintainers-combat-bad-actors/)
+  an option to
+  [require approval for all the outside collaborators](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/enabling-features-for-your-repository/managing-github-actions-settings-for-a-repository#configuring-required-approval-for-workflows-from-public-forks),
+  but the `pull_request_target` event will trigger
+  [regardless of the approval settings](https://docs.github.com/en/enterprise-cloud@latest/actions/managing-workflow-runs/approving-workflow-runs-from-public-forks#about-workflow-runs-from-public-forks).
+
+Other common alternatives include: skipping tests that need access to secrets,
+disabling forks, and using pull request labels or code review approvals to
+control the execution of tests.
+
+## Security Testing
+
+This approach has been tested by sporadic security researchers who found our
+repositories while looking for the `pull_request_target` trigger, but none of
+them ([#1130](https://github.com/iterative/cml/pull/1130)
+[[1](https://marcyoung.us/post/zuckerpunch)] &
+[#1322](https://github.com/iterative/cml/pull/1322)) were able to bypass this
+protection. If you find out a way of bypassing it, please feel free to put
+[our bug bounty program](https://iterative.ai/security-and-privacy/) to good
+use.
+
+---
+
+Now you have it! As far as we know, this is currently the most elegant GitHub
+Actions configuration for testing pull requests from public repository forks
+using secrets. As maintainers of a lot of open source software, this is close to
+our hearts!
+
+Here are some example usages for
+[cml](https://github.com/iterative/cml/blob/1be24edaa817de320a657ec3ad1182e145aecef7/.github/workflows/test-deploy.yml#L13-L20)
+and
+[mlem](https://github.com/iterative/mlem/blob/462384ee7a9fc50196e06942684171e9915f46ae/.github/workflows/check-test-release.yml#L13-L25).
+
+_Do you have any better alternative or maybe a similar use case and want to
+discuss more? Join us in [Discord](https://discord.com/invite/dvwXA2N)!_
diff --git a/content/blogs/2023-05-10-managing-openfoam-physical-simulations-with-dvc-cml-studio-part-2.md b/content/blogs/2023-05-10-managing-openfoam-physical-simulations-with-dvc-cml-studio-part-2.md
new file mode 100644
index 0000000000..2baf829d99
--- /dev/null
+++ b/content/blogs/2023-05-10-managing-openfoam-physical-simulations-with-dvc-cml-studio-part-2.md
@@ -0,0 +1,296 @@
+---
+title:
+  'Managing OpenFOAM Physical Simulations with DVC, CML, and Studio (Part 2)'
+date: 2023-05-10
+description: >
+  In this second part, we discuss how to utilize cloud computing resources and
+  visualize simulation data with CML and Iterative Studio.
+
+descriptionLong: >
+  Time to celebrate our achievements and let automation take care of
+  the  details! We  run OpenFOAM simulations in the cloud with our CI/CD tool
+  CML and GitLab using AWS Computational resources. We then can easily
+  visualize  and share the simulation results with colleagues in Iterative
+  Studio.
+
+picture: 2023-05-10/dvc-and-openfoam-p2.jpg
+pictureComment:
+commentsUrl: https://discuss.dvc.org/t/blog-managing-openfoam-physical-simulations-with-dvc-cml-and-studio-part-2/1620
+authors:
+  - mikhail_rozhkov
+  - peter_zikan
+tags:
+  - DVC
+  - CML
+  - Studio
+  - OpenFOAM
+  - Tutorial
+---
+
+In the
+[previous post](https://iterative.ai/blog/managing-openfoam-physical-simulations-with-dvc-cml-studio-part-1/),
+we discussed how DVC simplifies physical simulation pipelines and data
+management. This post discusses how to run simulations in the cloud, run new
+experiments, and visualize simulation results with
+[Iterative Studio](https://studio.datachain.ai/) and other tools.
+
+In this post, you will learn how to:
+
+1. Manage computational resources on AWS and start and shut down EC2 instances
+   for simulation experiments.
+
+2. Run new [OpenFOAM](https://www.openfoam.com/) simulations in a cloud using
+   Iterative Studio and [CML](https://cml.dev/).
+
+3. Use Iterative Studio to view simulation results and DVC plots online.
+
+This post is a result of collaboration between the
+[Iterative.ai](http://iterative.ai) and
+[PlasmaSolve](https://plasmasolve.com/about-us/) teams. PlasmaSolve was founded
+in 2016 by plasma physicists and software engineers to provide a platform for
+cutting-edge physics simulation services and research. The PlasmaSolve team
+strives to deliver top-notch solutions and well-designed physics simulations to
+speed up research and reduce development costs using various open-source and
+commercial simulation tools.
+
+# Run simulations in the cloud with GitLab and CML
+
+<admon type="tip">
+
+For this part of the post, we follow the `main` branch in the
+[demo repository](https://gitlab.com/iterative.ai/cse_public/sonicfoam-demo/-/tree/main).
+Please follow the
+[README](https://gitlab.com/iterative.ai/cse_public/sonicfoam-demo/-/blob/main/README.md)
+to prepare your environment and install dependencies.
+
+</admon>
+
+OpenFOAM simulations can be computationally intensive, requiring access to
+high-performance computing resources or a cluster of computers to solve large or
+complex problems.
+
+To run the demo simulation in AWS we may apply
+[CML (Continuous Machine Learning)](https://cml.dev/doc). CML can start a new
+AWS EC2 instance to run a new simulation experiment and shut it down when it’s
+done.
+
+The full configuration for the demo CI pipeline can be found in the
+[`.gitlab-ci.yml`](https://gitlab.com/iterative.ai/cse_public/sonicfoam-demo/-/blob/main/.gitlab-ci.yml)
+file.
+
+The demo project shows an example of how to integrate CML into GitLab CI
+configuration. The pipeline has two stages: `build` and `run`. The `build` stage
+has a single job that builds a docker image based on the specified `Dockerfile`,
+pushes the image to Amazon Elastic Container Registry (ECR), and logs in to the
+registry. The `run` stage has three jobs: `launch`, `run`, and `report`. The
+`launch` job launches an EC2 instance on Amazon Web Services (AWS) and the `run`
+job runs a simulation on the instance. The `report` job generates a report on
+the simulation results. Visual representations of the CI pipeline and used AWS
+services are shown in the diagram below.
+
+![CML with Gitlab CI configuration](../uploads/images/2023-05-10/architecture.png)_CML
+with Gitlab CI configuration_
+
+## Using AWS computational resources
+
+When a workflow requires computational resources (such as GPUs), CML can
+automatically allocate cloud instances using 
+[cml runner](https://cml.dev/doc/ref/runner). You can spin up instances on AWS,
+Azure, GCP, or Kubernetes
+([see below](https://cml.dev/doc/self-hosted-runners#cloud-compute-resource-credentials)).
+Alternatively, you can connect to
+[any other computing provider or an on-premise (local) machine](https://cml.dev/doc/self-hosted-runners#on-premise-local-runners).
+
+Below is an example of the GitLab CI `launch` job configuration that allocates
+AWS instances using `cml runner` command. Users may define the region, instance
+type, and storage size that are needed:
+
+```yaml
+launch:
+  stage: run
+  rules:
+    - changes: [dvc.yaml, params.yaml, .gitlab-ci.yml]
+  image: iterativeai/cml:0-dvc2-base1
+  script: >
+    cml runner launch  
+    --cloud=aws  
+    --cloud-region=$AWS_DEFAULT_REGION --cloud-type=m5.2xlarge  
+    --cloud-hdd-size=32  --labels=cml
+    --docker-volumes="/home/.cml/cache:/home/.cml/cache"
+```
+
+## Setup CI jobs to run a simulation
+
+To run a new simulation experiment using the `cml runner` we need to specify the
+`cml` tag in the `run` job and run `dvc exp run` command.
+
+```yaml
+run:
+  stage: run
+  tags: [cml]
+  rules:
+    - changes: [params.yaml, .gitlab-ci.yml]
+  image:  ${AWS_CONTAINER_IMAGE}
+  script:
+    ...
+    # Run an experiment
+    - dvc pull || echo "Pull failed"  # Pull outputs of previous simulation if any
+    - dvc exp run -f
+    - dvc push # Save results
+    - rsync -r ./ /home/.cml/cache/run # Share results with 'report' job
+```
+
+Using `dvc pull` command helps to download the results of the previous
+experiments from the remote storage. Checking versions of previous results and
+DVC pipeline stage dependencies, DVC may skip running stages that do not need to
+be run and save a lot of time and computational resources. After the simulation
+completes, `dvc push` uploads the new results back to the remote storage.
+
+After the `run` job completes, the `report` job prepares and publishes the CML
+report to the associated Git commit. For this, we need to build a `report.md`
+file with all text & plots in Markdown format, and use the `cml comment create`
+command to publish this report and create a pull request.
+
+```yaml
+report:
+  ...
+  image: iterativeai/cml:0-dvc2-base1 # Python, DVC, & CML pre-installed
+  script:
+    ...
+    # Create CML report
+    - |
+      cat <<EOF > report.md
+      ...
+      ![](sonicFoam/postProcessing/float_pressure.png)
+      EOF
+    - cml comment create --publish-native report.md
+    - cml pr create .
+```
+
+In some cases, these reports may help to collaborate with teammates using a Git
+workflow.
+
+![A report posted after the simulation runs in the pull request](../uploads/images/2023-05-10/git_report.png)_A
+report posted after the simulation runs in the Pull Request_
+
+## Setup GitLab CI variables
+
+To run simulations in AWS with GitLab CI & CML, it's recommended to use
+provider-managed policies/roles and then explicitly limit the permissions
+further if possible.
+[Here is a set of common permissions required by CML](https://cml.dev/doc/ref/runner?tab=AWS#common-permissions).
+
+In this demo we used the following CI variables in the project
+`Settings → CI/CD → Variables`:
+
+- `AWS_ACCESS_KEY_ID`
+- `AWS_SECRET_ACCESS_KEY`
+- `AWS_SESSION_TOKEN` - it is optional and depends on the AWS organization
+  settings.
+- `REPO_TOKEN` - a
+  [personal access token](https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html)
+  with the `api`, `read_repository` and `write_repository` scopes. Find more
+  details in
+  [CML docs on Personal Access Token](https://cml.dev/doc/self-hosted-runners?tab=GitLab#personal-access-token)
+
+![Examples of CI variables in GitLab](../uploads/images/2023-05-10/ci_vars.png)_Examples
+of CI variables in GitLab_
+
+<admon type="tip">
+
+Note: → AWS_SESSION_TOKEN is not required for most users. It’s specific to
+Iterative's sandbox account. → REPO_TOKEN - a personal access token with the
+api, read_repository and write_repository scopes. Find more details in CML docs
+on
+[Personal Access Token.](https://cml.dev/doc/self-hosted-runners#personal-access-token)
+
+</admon>
+
+# Experimenting and visualization simulation results in Iterative Studio
+
+[Iterative Studio](https://studio.datachain.ai/) is a web application that you
+can access online or even host on-prem. Using the power of leading open-source
+tools [DVC](https://dvc.org/), [CML](https://cml.dev/),
+and [Git](https://git-scm.com/), enables you to seamlessly manage data, run and
+track experiments, and visualize and share results.
+
+## Run a new simulation
+
+Using Iterative Studio we can run new simulation experiments in the Cloud and
+visualize results in Studio UI.
+
+<video controlslist="nodownload" preload="metadata" autoplay muted loop
+style="width:100%;"><source src="../uploads/images/2023-05-10/studio-run-new-simulation.mp4" type="video/mp4"/> Your
+browser does not support the video tag. </video>_Example of running a new
+simulation experiment via Iterative Studio_
+
+## Visualize simulation results
+
+Iterative Studio helps to visualize simulation result images and DVC plots just
+after the simulation is complete. Studio allows one to plot images and metrics,
+and compare them with previous simulations.
+
+<video controlslist="nodownload" preload="metadata" autoplay muted loop
+style="width:100%;"><source src="../uploads/images/2023-05-10/studio-visualize-simulation-results.mp4" type="video/mp4"/> Your
+browser does not support the video tag. </video>_Example of visualization of
+simulation results in Iterative Studio_
+
+# Visualize the simulation outputs with ParaView
+
+OpenFOAM includes several utilities for visualizing simulation results,
+including ParaView, which is a popular open-source visualization tool. Users can
+use these tools to generate plots, contour plots, and volume renderings of
+simulation results.
+
+DVC can help to download the simulation outputs and visualize them locally. One
+could do a simple command to get all the data generated by the simulation:
+
+```cli
+$ dvc exp pull
+```
+
+Downloaded data can be visualized with third-party tools like ParaView.
+
+<video controlslist="nodownload" preload="metadata" autoplay muted loop
+style="width:100%;"><source src="../uploads/images/2023-05-10/ParaView_sonicFoam.mp4" type="video/mp4"/> Your
+browser does not support the video tag. </video> _Example for sonicFoam
+simulation results visualized in ParaView_
+
+# Summary
+
+This post details how Iterative tools help in physical and computational
+simulations. For this purpose, we created a demo project built with OpenFOAM.
+The demo shows how to set up DVC for simulation experiments and data management.
+CML is used in the GitLab CI pipeline to manage computational resources on AWS.
+Iterative Studio is then used as a UI to visualize simulation results and run
+new simulations in a few clicks.
+
+Overall, DVC, CML, and Iterative Studio can help OpenFOAM users:
+
+1. Reduce the complexity of simulation pipelines and automate tasks such as
+   running simulations, post-processing results, and generating reports.
+
+2. Manage and track the data and code associated with your OpenFOAM simulations,
+   and make it easier to reproduce simulation results. Store simulation data
+   on-premises or in the cloud using a variety of storage types, such as S3.
+
+3. Manage simulation experiments with simple YAML config files.
+
+4. Manage computational resources on AWS and start and shut down EC2 instances
+   for simulation experiments.
+
+5. Iterative Studio provides a user-friendly interface for simulation results,
+   visualization, and running new simulations quickly.
+
+6. Iterative Studio allows users to view and share simulation results and DVC
+   plots online, without the need to download and visualize results locally.
+
+# References
+
+- [Why OpenFOAM Users Should Try SimScale](https://www.simscale.com/blog/openfoam-users-should-try-simscale/)
+- [OpenFoam - Tutorial Guide: Supersonic flow over a forward-facing step](https://www.openfoam.com/documentation/tutorial-guide/3-compressible-flow/3.2-supersonic-flow-over-a-forward-facing-step)
+- [Introduction to ScalarTransportFoam solver on OpenFoamWiki](https://openfoamwiki.net/index.php/ScalarTransportFoam)
+- [`scalarTransportFoam` Tutorial](https://develop.openfoam.com/Development/openfoam/-/tree/master/tutorials/basic/scalarTransportFoam)
+- [Walkthrough and tutorial for `scalarTransportFoam`: a solver for advection-diffusion of a passive scalar](https://www.researchgate.net/profile/Ingo-Riess/post/How_to_model_smoke_propagation_for_an_existing_velocity_field_using_scalarTransportFoam_in_OpenFOAM/attachment/5cee6f723843b0b98254daac/AS%3A763860613099524%401559129970722/download/5-scalarTransportFoamTutorial.pdf),
+  _Eric Paterson and Kevin T. Crofton Department of Aerospace and Ocean
+  Engineering Virginia Polytechnic Institute and State University_
diff --git a/content/blogs/2023-06-14-dvc-3.0-ml-experiments-data-versioning.md b/content/blogs/2023-06-14-dvc-3.0-ml-experiments-data-versioning.md
new file mode 100644
index 0000000000..fc0258d276
--- /dev/null
+++ b/content/blogs/2023-06-14-dvc-3.0-ml-experiments-data-versioning.md
@@ -0,0 +1,256 @@
+---
+title: 'The DVC 3.0 Stack: Beyond the Command Line'
+date: 2023-06-14
+description: |
+  DVC 3.0 introduces a stack of tools outside the command line to bring it closer to
+  where you work (in code, IDE, web) while also focusing on DVC fundamentals.
+descriptionLong: |
+  DVC has brought engineering best practices to the ML and data world, making model
+  development more standardized and reproducible. Now we want to make it work when the
+  command line isn't the right fit, and it's easier to work in code, an IDE, or on the
+  web. This doesn't mean we forgot about DVC fundamentals -- data versioning is the core
+  of what we do.
+picture: 2023-06-14/dvc-3.png
+pictureComment: DVC Stack
+authors:
+  - dave_berenbaum
+tags:
+  - DVC
+  - Studio
+  - VS Code
+  - Release
+  - Experiment Tracking
+  - Machine Learning Experiments
+  - Model Management
+  - Model Registry
+  - Cloud Experiments
+  - Cloud Versioning
+---
+
+[DVC 3.0] helps you [experiment](#experiment-tracking-and-beyond), from notebook
+exploration to model management, and works smarter with your
+[cloud/remote storage](#smarter-cloudremote-storage) to make data versioning
+painless.
+
+## Experiment Tracking and Beyond
+
+In [DVC 2.0], we first released DVC experiments, providing a way to track
+experiments as hidden, [lightweight Git commits], so you don't have to
+separately manage your experiments and code. Now it's easier to [start tracking
+experiments] from your Python script or notebook (see examples). You only need a
+Git repo and DVC's Python logging library [DVCLive]. You don't need prior DVC
+knowledge or an existing DVC project.
+
+<toggle>
+
+<tab title="Pytorch Lightning">
+
+```python
+from dvclive.lightning import DVCLiveLogger
+
+...
+
+trainer = Trainer(logger=DVCLiveLogger(save_dvc_exp=True))
+trainer.fit(model)
+```
+
+</tab>
+
+<tab title="Hugging Face">
+
+```python
+from dvclive.huggingface import DVCLiveCallback
+
+...
+
+trainer.add_callback(DVCLiveCallback(save_dvc_exp=True))
+trainer.train()
+```
+
+</tab>
+
+<tab title="Keras">
+
+```python
+from dvclive.keras import DVCLiveCallback
+
+...
+
+model.fit(
+  train_dataset, validation_data=validation_dataset,
+  callbacks=[DVCLiveCallback(save_dvc_exp=True)])
+```
+
+</tab>
+
+<tab title="General Python API">
+
+```python
+from dvclive import Live
+
+with Live(save_dvc_exp=True) as live:
+    live.log_param("epochs", NUM_EPOCHS)
+
+    for epoch in range(NUM_EPOCHS):
+        train_model(...)
+        metrics = evaluate_model(...)
+        for metric_name, value in metrics.items():
+            live.log_metric(metric_name, value)
+        live.next_step()
+```
+
+</tab>
+
+</toggle>
+
+With the [DVC extension for VS Code], you get an experiment tracking workbench
+without any servers or logins. Your experiments are also available in our
+collaboration hub [Studio] and connected to your Git repo automatically, so you
+can share, review and merge like you would with code. You can work locally when
+you want and use Studio to share if and when it suits you, just like in Git.
+
+https://youtu.be/u-URI5Lvc-g
+
+### Model Management
+
+With the [Studio Model Registry], you can use DVC to manage your entire model
+lifecycle inside your Git workflow, from creating the model to deploying it in
+any deployment system. Our ethos for model management is consistent with
+everything else we do - It's all about integrating with your existing stack and
+tools, and empowering you to tie your workflows around GitOps principles and
+automation.
+
+https://youtu.be/wX0KBg8EU5Y
+
+### Cloud Experiments (Alpha Release)
+
+When we released DVC 2.0, we also launched the [`cml runner`][cml runner]
+command to run continuous integration (CI) on your own cloud instances so you
+could automate large ML jobs. Cloud experiments build on this technology without
+CI, meaning less setup (you can configure directly in Studio). With the alpha
+release of [Studio Cloud Experiments], you can run DVC experiments on your own
+cloud infrastructure in a few clicks, including with GPU and spot instance
+support.
+
+https://youtu.be/MF5k-qLUiAg
+
+### Hyperparameter Optimization
+
+DVC can also help you do hyperparameter optimization by integrating with other
+tools. You can [queue] an entire grid search of experiments, configure multiple
+complex model architectures with [Hydra] integration, and track your [Optuna]
+studies.
+
+https://youtu.be/EpzUqvtvZ4c
+
+## Smarter Cloud/Remote Storage
+
+We are committed to building the best data versioning experience. This means
+making DVC work with your existing data stack and not trying to replace it. We
+have focused on working more closely with cloud storage (and non-cloud storage)
+by making DVC not only faster but smarter.
+
+### Minimizing Downloads
+
+Avoiding unnecessary downloads saves time and space that could never be
+accomplished by transfer speedups alone. You can now [add or modify] individual
+files in a larger dataset. If you have a large dataset in remote storage, you
+can pull and modify any file without needing to download the full dataset.
+
+![partial-add](../uploads/images/2023-06-14/dvc-part-update.gif 'Add or modify files in a dataset.')
+
+You can also run or verify a pipeline [without pulling data] first. You can skip
+downloading data for stages that haven't changed and automatically download only
+the data needed for stages that have changed.
+
+https://youtu.be/CuorzMAUbgU
+
+### Cloud Versioning
+
+You shouldn't have to create extra copies of data that's already backed up and
+versioned on the cloud. DVC [cloud versioning] enables you to import data that's
+already versioned by your cloud provider. In the example below, DVC knows not to
+push any data to its own storage because it is already versioned by the cloud.
+Pulling the data later will recover it from its original source location.
+
+```cli
+$ dvc import-url --version-aware s3://mybucket/data
+Importing 's3://mybucket/data' -> 'data'
+
+$ dvc push
+Everything is up to date.
+```
+
+### Pythonic API
+
+You may need to work with your cloud data outside of the command-line workflow
+of pushing and pulling. The [DVCFileSystem] API enables you to read and manage
+files and directories from remote DVC repos like you would for a local
+filesystem. In the example below, each file in the `data/prepared` directory is
+streamed in as text.
+
+```python
+>>> from dvc.api import DVCFileSystem
+>>> url = "https://github.com/iterative/example-get-started.git"
+>>> fs = DVCFileSystem(url, rev="main")
+>>> for f in fs.find("data/prepared"):
+...     text = fs.read_text(f)
+...     # process the data
+```
+
+### Faster Performance
+
+Sometimes you just need faster performance, especially for large data downloads
+and uploads. We have focused on improving performance where it matters most. For
+example, pushing data to S3 is 2.5x faster in DVC 3.0 than in early versions of
+DVC 2.x according to our benchmarks.
+
+![push-s3](../uploads/images/2023-06-14/dvc-push-s3.png 'Time to push to S3.')
+
+# Thank You!
+
+Our constant interaction with the DVC community gives us feedback on what should
+be improved. We heard from you that the ML landscape is already complex and you
+want to keep your tools simple. That's why many of the new "features" are
+improvements to existing functionality, and why we are building this stack of
+tools to make DVC easier, more flexible, and the solid choice for your MLOps
+workflows.
+
+Finally, none of these improvements would be possible without the support of the
+teams who work on the entire DVC stack.
+
+Thanks to all of you who make DVC and its community what it is!
+
+# Get Started with the DVC 3.0 Stack
+
+Get started with DVC 3.0 or the other tools in the DVC stack:
+
+- [DVC 3.0]
+- [Studio]
+- [DVC extension for VS Code]
+
+[dvc 3.0]: https://dvc.org/doc/install
+[lightweight git commits]: https://iterative.ai/blog/experiment-refs
+[dvc 2.0]: https://iterative.ai/blog/dvc-2-0-release
+[start tracking experiments]: https://dvc.org/doc/start/experiments
+[dvclive]: https://dvc.org/doc/dvclive
+[dvc extension for vs code]:
+  https://marketplace.visualstudio.com/items?itemName=Iterative.dvc
+[studio]: https://studio.datachain.ai
+[queue]:
+  https://dvc.org/doc/user-guide/experiment-management/running-experiments
+[hydra]: https://dvc.org/doc/user-guide/experiment-management/hydra-composition
+[optuna]: https://dvc.org/doc/dvclive/ml-frameworks/optuna
+[studio model registry]: https://dvc.org/doc/studio/user-guide/model-registry
+[cml runner]:
+  https://iterative.ai/blog/dvc-2-0-release#new-method-to-provision-cloud-compute-in-new-cml-release
+[studio cloud experiments]:
+  https://dvc.org/doc/studio/user-guide/projects-and-experiments/run-experiments#cloud-experiments
+[cloud versioning]:
+  https://dvc.org/doc/user-guide/data-management/cloud-versioning
+[add or modify]:
+  https://dvc.org/doc/user-guide/data-management/modifying-large-datasets
+[without pulling data]:
+  https://dvc.org/doc/user-guide/pipelines/running-pipelines#pull-missing-data
+[dvcfilesystem]: https://dvc.org/doc/api-reference/dvcfilesystem
+[os packages]: https://dvc.org/doc/install
diff --git a/content/blogs/2023-08-22-sagemaker-model-deployment.md b/content/blogs/2023-08-22-sagemaker-model-deployment.md
new file mode 100644
index 0000000000..108c907a41
--- /dev/null
+++ b/content/blogs/2023-08-22-sagemaker-model-deployment.md
@@ -0,0 +1,330 @@
+---
+title:
+  'Automate model deployment to Amazon SageMaker with the DVC Model Registry'
+date: 2023-08-30
+description: |
+  DVC provides a Git-based mechanism to automate model deployment from an intuitive web UI.
+descriptionLong: |
+  The DVC Model Registry provides version registration and stage assignment as simple Git-based mechanisms to automate model deployment on any platform including Amazon SageMaker. Deploy real-time or serverless endpoints according to your need.
+picture: 2023-08-30/dvc-sagemaker-cover.png
+pictureComment: Model deployment to Amazon SageMaker from the DVC Model Registry
+authors:
+  - tapa_dipti_sitaula
+  - diglesia
+tags:
+  - DVC
+  - Studio
+  - GTO
+  - Sagemaker
+  - AWS
+  - Model Registry
+  - Model Management
+  - Model Deployment
+  - Tutorial
+---
+
+Amazon SageMaker from AWS is a popular platform for deploying Machine Learning
+models, showing up in almost all search results for the “best ML deployment
+platforms today.” So no doubt we’ve had many users ask us how they can deploy
+their models to SageMaker. If you would also like some help with this, you are
+in the right place.
+
+With DVC pipelines and live metrics tracking using DVCLive and DVC Studio,
+iterating on your Machine Learning experiments is a simple process. And DVC
+Model Registry makes logging, tracking and deploying your trained models equally
+simple. In this article, we’ll walk you through how you can create a training
+pipeline that saves your trained models to AWS S3, and how you can then deploy
+the models to different environments in SageMaker automatically!
+
+Interested in the final output right now? [Here’s the code][complete-code].
+
+# Prerequisites
+
+To follow along, you’ll need to provision the following resources in AWS:
+
+- An S3 bucket for saving your models
+- Credentials with write access to the above S3 bucket. You’ll need this during
+  training to save the models.
+- AWS role with `AmazonS3FullAccess` and `AmazonSageMakerFullAccess` for reading
+  the model files and deploying them to SageMaker.
+
+# First, why DVC + SageMaker?
+
+DVC provides a unified way to manage your experiments, datasets, models and
+code. It works on top of Git, enabling you to apply the best software
+engineering and DevOps practices to your Machine Learning projects. It is also
+platform agnostic, which means you have full control over the choice of cloud
+services. And with a range of options for model deployment, including real-time
+and serverless endpoints, SageMaker is a great choice for hosting models of
+different sizes and inference frequencies.
+
+# Prequel: `DVC push` to save the models during training
+
+DVC simplifies setting up [reproducible pipelines] that automatically save your
+model files during model training. Each stage in a DVC pipeline represents a
+distinct step in the training process. For each stage, you can specify
+hyperparameters and other dependencies, such as datasets or outputs of previous
+stages. You can also specify the outputs of each stage, such as metrics, plots,
+models, and other files. Learn more [here][learn dvc push].
+
+## Create a model file
+
+The [`sagemaker` stage] of our pipeline creates a tar file (`model.tar.gz`) of
+our trained model. We then mark this tar file as an output of the stage:
+
+```bash
+$ dvc stage add -n sagemaker … -o model.tar.gz …
+```
+
+Note that it is not essential to create a separate `sagemaker` stage like we
+did. You could also create the tar file as part of `train` or any other relevant
+stage. In fact, you could even use the approach without a DVC pipeline, by
+simply [`dvc add`]ing the model files or logging them with the [DVCLive
+`log_artifact()` method]. But we recommend using a DVC pipeline for easy
+reproducibility of your ML experiments.
+
+## Configure DVC remote
+
+Additionally, we’ve configured the default [DVC remote] to be our s3 bucket:
+
+```bash
+$ dvc remote add -d storage s3://dvc-public/remote/get-started-pools
+```
+
+This means that whenever we run [`dvc push`], the updated model tar file is
+pushed to the s3 bucket.
+
+## Run the pipeline to save the model in S3
+
+Now, every time we run our training pipeline an updated model tarfile is
+generated, and we `dvc push` it to the remote S3 bucket. By storing large files
+like the model tar file in remote storages such as s3, DVC makes it possible to
+track them in Git, maintaining Git as the single source of truth for your
+projects.
+
+# Track and manage model versions in DVC model registry
+
+Our training script [logs our model] using the [DVCLive `log_artifact()`
+method], which creates an [artifact entry] of type `model` in a `dvc.yaml` file.
+
+```yaml
+artifacts:
+  pool-segmentation:
+    path: ../../models/model.pkl
+    type: model
+    ...
+```
+
+Because of this, when we [add the project to DVC Studio], the model appears in
+the [model registry].
+
+Note that there are other ways to register the model in the model registry - you
+can [add the model from the Studio UI] or manually add it to the `dvc.yaml`
+file.
+
+Once the model is registered in the model registry, you can assign version
+numbers every time your ML experiment produces a model version that you like.
+Use the [`Register version`] option to select the Git commit for the experiment
+which produced the desired model version, and assign it a [semantic version].
+Every version registration is saved using specially formatted Git tags, which
+you can find in the [Git repository].
+
+![Version registration in the DVC Model Registry](../uploads/images/2023-08-30/mr-register-version.gif)_Version
+registration in the DVC Model Registry_
+
+# Trigger model deployment with stage assignments
+
+So far, you have saved your model versions in your Git repository (as Git tags)
+and the actual model tar files in S3. Suppose you just registered version
+`1.0.0` of your model, and would like to deploy it to your `dev` environment so
+that you and your team can evaluate its performance. The model registry
+simplifies this too, by providing a mechanism to assign stages to model versions
+and creating specially formatted Git tags representing this action.
+
+![Stage assignment in the DVC Model Registry](../uploads/images/2023-08-30/mr-assign-stage.gif)_Stage
+assignment in the DVC Model Registry_
+
+Since stage assignment also creates Git tags, you can write a [CI/CD action that
+runs on Git tag push][github workflow file].
+
+```yaml
+on:
+  push:
+    tags:
+      - 'results/train=pool-segmentation#*'
+```
+
+This action parses the Git tags to determine the model, version and stage. DVC
+model registry internally uses [GTO] to save version registrations and stage
+assignments, and the [Iterative GTO action] can be used in your [GitHub actions
+workflow] to parse the Git tags:
+
+```yaml
+uses: iterative/gto-action@v2
+```
+
+This action produces the outputs shown below:
+
+```yaml
+outputs:
+  event: ${{ steps.gto.outputs.event }} # whether the event is a version registration or a stage assignment
+  name: ${{ steps.gto.outputs.name }} # model name
+  stage: ${{ steps.gto.outputs.stage }}
+  version: ${{ steps.gto.outputs.version }}
+```
+
+This action is available only in GitHub though; if you’re using GitLab,
+Bitbucket or some other provider, you can use the [`gto check-ref`
+command][gto check-ref] to parse the Git tags, which follow [this format].
+
+Now, whenever you [`Assign stage`] to a model version, your CI/CD action
+understands which version of which model was assigned which stage. Then, it can
+use the [`dvc get –show-url`] command to determine the S3 path of the tar file
+for the model version.
+
+```yaml
+MODEL_DATA=$(dvc get --show-url . model.tar.gz)
+```
+
+Finally, it can invoke the [deployment script] with appropriate inputs.
+
+```bash
+python sagemaker/deploy_model.py \
+    --name ${{ needs.parse.outputs.name }} \
+    --stage ${{ needs.parse.outputs.stage }} \
+    --version ${{ needs.parse.outputs.version }} \
+    --model_data $MODEL_DATA \
+    --role ${{ secrets.AWS_ROLE_TO_ASSUME }}
+```
+
+This automates the model deployment process, which is very helpful if your model
+is expected to evolve constantly.
+
+Next, we will explain the [deployment script].
+
+# Deploy the model to SageMaker and run inference
+
+So far, you’ve seen how you can
+
+✅ create and run reproducible pipelines that save the model to S3,
+
+✅ track and manage model versions in a web model registry, and
+
+✅ assign stages to trigger model deployment.
+
+The last step above specifies which model version should be deployed to which
+environment. Now let’s see how to actually
+
+🔲 deploy the model, and
+
+🔲 run inference on it.
+
+A deployment in SageMaker is called an endpoint. When you deploy your model, you
+create or update an endpoint. And for running inference, you invoke the
+endpoint.
+
+[There are a few different ways to do the actual
+deployment][AWS SageMaker Developer Guide], including the [SageMaker Python SDK]
+and the [boto3 library]. We have chosen to use the SageMaker Python SDK, which
+has a two-step process for deployment:
+
+- create the SageMaker model bundle ([click to see the
+  code][create model code]), and
+- create the endpoint ([click to see the code][create endpoint code]).
+
+Note that if you do not expect your model to be constantly used for inference,
+you can create a serverless inference endpoint by specifying a [serverless
+inference config] (learn about the [different inference options]).
+
+Once deployed, the endpoint status becomes `InService` in the AWS console.
+
+![InService SageMaker Endpoint](../uploads/images/2023-08-30/aws-sagemaker-endpoints.png)_InService
+SageMaker Endpoint in the AWS console_
+
+## Run inference
+
+Now that your SageMaker deployment is ready, you can run inference using the
+[SageMaker predictor] (for boto3, use [`invoke_endpoint()`]). [Here is an
+inference script][inference script] that pre-processes your input, calls
+inference, and applies the result mask to the input image to create the output
+image, and saves the result.
+
+Run this script with the following command:
+
+```bash
+$ python src/endpoint_prediction.py \
+    --img <jpg-file-path> \
+    --endpoint_name <endpoint-name> \
+    --output_path <output-folder>
+```
+
+Here's my input image:
+![Input image](../uploads/images/2023-08-30/input-image.jpg)
+
+And the output identifying the swimming pools:
+![Output image](../uploads/images/2023-08-30/output-image.png)
+
+# Now, your turn!
+
+Let us know (reach out in [Discord]) if you run into any issues when trying to
+deploy your own model to SageMaker. We will be more than happy to help you
+figure it out!
+
+[complete-code]: https://github.com/iterative/example-get-started-experiments/
+[reproducible pipelines]:
+  https://dvc.org/doc/user-guide/pipelines/defining-pipelines
+[learn dvc push]: https://dvc.org/doc/command-reference/stage/add
+[`sagemaker` stage]:
+  https://github.com/iterative/example-get-started-experiments/blob/main/dvc.yaml#L33
+[`dvc add`]: https://dvc.org/doc/command-reference/add
+[dvc remote]: https://dvc.org/doc/user-guide/data-management/remote-storage
+[`dvc push`]: https://dvc.org/doc/command-reference/push#push
+[logs our model]:
+  https://github.com/iterative/example-get-started-experiments/blob/main/src/train.py#L72
+[dvclive `log_artifact()` method]: https://dvc.org/doc/dvclive/live/log_artifact
+[artifact entry]:
+  https://github.com/iterative/example-get-started-experiments/blob/main/results/train/dvc.yaml#L8
+[add the project to DVC Studio]:
+  https://dvc.org/doc/studio/user-guide/projects-and-experiments/create-a-project#connect-to-a-git-repository-and-add-a-project
+[model registry]: https://studio.datachain.ai/user/~/models
+[add the model from the studio ui]:
+  https://dvc.org/doc/studio/user-guide/model-registry/add-a-model
+[`register version`]:
+  https://dvc.org/doc/studio/user-guide/model-registry/register-version
+[semantic version]: https://semver.org/
+[git repository]:
+  https://github.com/iterative/example-get-started-experiments/tags
+[gto]: https://mlem.ai/doc/gto
+[iterative gto action]: https://github.com/iterative/gto-action
+[github actions workflow]:
+  https://github.com/iterative/example-get-started-experiments/blob/main/.github/workflows/deploy-model.yml
+[gto check-ref]: https://mlem.ai/doc/gto/command-reference/check-ref
+[this format]: https://mlem.ai/doc/gto/user-guide#git-tags-format
+[`assign stage`]:
+  https://dvc.org/doc/studio/user-guide/model-registry/assign-stage
+[github workflow file]:
+  https://github.com/iterative/example-get-started-experiments/blob/main/.github/workflows/deploy-model.yml
+[`dvc get –show-url`]:
+  https://github.com/iterative/example-get-started-experiments/blob/main/.github/workflows/deploy-model.yml#L64
+[sagemaker python sdk]: https://sagemaker.readthedocs.io/en/stable/overview.html
+[boto3 library]:
+  https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html
+[aws sagemaker developer guide]:
+  https://docs.aws.amazon.com/sagemaker/latest/dg/realtime-endpoints-deployment.html
+[deployment script]:
+  https://github.com/iterative/example-get-started-experiments/blob/main/sagemaker/deploy_model.py
+[create model code]:
+  https://github.com/iterative/example-get-started-experiments/blob/main/sagemaker/deploy_model.py#L38
+[create endpoint code]:
+  https://github.com/iterative/example-get-started-experiments/blob/main/sagemaker/deploy_model.py#L54
+[serverless inference config]:
+  https://github.com/iterative/example-get-started-experiments/blob/main/sagemaker/deploy_model.py#L58
+[different inference options]:
+  https://docs.aws.amazon.com/sagemaker/latest/dg/deploy-model.html#deploy-model-options
+[sagemaker predictor]:
+  https://github.com/iterative/example-get-started-experiments/blob/main/src/endpoint_prediction.py#L35
+[`invoke_endpoint()`]:
+  https://docs.aws.amazon.com/sagemaker/latest/APIReference/API_runtime_InvokeEndpoint.html
+[inference script]:
+  https://github.com/iterative/example-get-started-experiments/blob/main/src/endpoint_prediction.py
+[discord]: https://discordapp.com/invite/dvwXA2N
diff --git a/content/blogs/2023-09-08-finetune-llm-pipeline-dvc-skypilot.md b/content/blogs/2023-09-08-finetune-llm-pipeline-dvc-skypilot.md
new file mode 100644
index 0000000000..806f6d6b1d
--- /dev/null
+++ b/content/blogs/2023-09-08-finetune-llm-pipeline-dvc-skypilot.md
@@ -0,0 +1,460 @@
+---
+title: 'Fine-Tuning Large Language Models with a Production-Grade Pipeline'
+date: 2023-09-08
+description: >
+  This post describes a production ML pipeline for fine-tuning large language
+  models using DVC, SkyPilot, HuggingFace Transformers, and quantization
+  techniques.
+descriptionLong: >
+  In this post, we’ll walk through an end-to-end production ML pipeline for
+  fine-tuning large language models using several key technologies: DVC for
+  reproducible pipelines and efficient dataset versioning, SkyPilot for
+  launching cloud compute resources on demand, HuggingFace Transformers and
+  other libraries for efficient transformer model training, and quantization
+  techniques like PEFT and QLoRA for reduced precision and memory usage.
+picture: 2023-09-08/dvc-skypilot-hf-llama.png
+authors:
+  - alex_kim
+tags:
+  - LLM
+  - Llama2
+  - DVC
+  - Cloud
+  - SkyPilot
+  - HuggingFace
+  - Tutorial
+---
+
+## Introduction - Solving cloud resources and reproducibility for LLMs
+
+A few of weeks ago, I wrote a
+[post](https://alex000kim.com/tech/2023-08-10-ml-experiments-in-cloud-skypilot-dvc/)
+about the challenges of training large ML models, in particular:
+
+1. the need for more computing power and the complexity of managing cloud
+   resources;
+2. the difficulty of keeping track of ML experiments and reproducing results.
+
+There I proposed a solution to these problems by using
+[SkyPilot](https://skypilot.readthedocs.io/en/latest/) and
+[DVC](https://dvc.org/) to manage cloud resources and track experiments,
+respectively.
+
+These problems are especially relevant for large language models, where both the
+model size and the amount of data required for training are _very_ large. In
+this blog post, I will walk you through an end-to-end production-grade Machine
+Learning pipeline for performing Supervised Fine-Tuning (SFT) of large language
+models (LLMs) on conversational data. This project demonstrates the effective
+use of technologies like [DVC](https://github.com/iterative/dvc),
+[SkyPilot](https://github.com/skypilot-org/skypilot), HuggingFace
+[Transformers](https://github.com/huggingface/transformers),
+[PEFT](https://github.com/huggingface/peft),
+[TRL](https://github.com/huggingface/trl) and others.
+
+All the code for this project is available on GitHub:
+
+[https://github.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace](https://github.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace)
+
+### What’s fine-Tuning and when to use it
+
+Let’s recap the differences between prompt engineering, prompt tuning, and model
+fine-tuning, three distinct approaches to working with LLMs.
+
+Feel free to skip this section if you’re already familiar with these concepts.
+
+<details>
+<summary> Prompt engineering, prompt tuning, and model fine-tuning </summary>
+    
+Prompt engineering, prompt tuning, and model fine-tuning are three
+techniques for adapting large language models to downstream tasks. Prompt
+engineering relies on skillfully designing input prompts, often with demo
+examples, to steer model behavior without any parameter changes. Prompt
+tuning takes a more automated approach - learning continuous token
+embeddings as tunable prompts appended to the input. This keeps the base
+model frozen but allows the prompts to be optimized. Finally, model
+fine-tuning adapts all the model’s parameters directly through continued
+training on downstream data. While fine-tuning can achieve strong
+performance, prompt engineering and tuning offer greater parameter
+efficiency and model reuse. However, prompt methods may require more
+iteration and heuristics to work well.
+
+Fine-tuning gives the model maximal flexibility to adapt its entire set (or a
+subset) of parameters directly on the new data. This end-to-end training
+approach is especially powerful when the target task or domain differs
+significantly from the original pre-training data. In such cases, extensive
+adaptation of the model may be required beyond what is possible through the
+model’s fixed input representations alone. However, fine-tuning requires
+re-training large models which can be computationally expensive. It also loses
+the ability to efficiently share one model across multiple tasks. Overall,
+fine-tuning tends to be preferred when maximum task performance is critical and
+training resources are available.
+
+Below is a table comparing these techniques:
+
+| Method             | Description                                                                                                     | Advantages                                                                                             | Disadvantages                                                                                                                 |
+| ------------------ | --------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------- |
+| Prompt Engineering | Skillfully designing input prompts, often with demo examples, to steer model behavior without parameter changes | &bull; Efficient parameter reuse <br> &bull; No model re-training needed                               | &bull; Can require much iteration and tuning <br> &bull; Limited flexibility to adapt model                                   |
+| Prompt Tuning      | Learning continuous token embeddings as tunable prompts appended to input, keeps base model frozen              | &bull; Efficient parameter reuse <br> &bull; Automated prompt optimization                             | &bull; Less flexible than fine-tuning <br> &bull; Still some manual effort needed                                             |
+| Model Fine-tuning  | Adapting a subset of model parameters through continued training on new data                                    | &bull; Allows significant adaptation to new tasks/data <br> &bull; Can achieve very strong performance | &bull; Can be difficult to set up <br> &bull; Computationally expensive <br> &bull; Loses ability to share model across tasks |
+
+</details>
+
+## Overview of the Project
+
+The project leverages several technologies:
+
+1. **[DVC](https://dvc.org/)** for reproducible ML pipelines: This tool enables
+   us to define the ML workflow as a Directed Acyclic Graph (DAG) of pipeline
+   stages, with dependencies between data, models, and metrics automatically
+   tracked. It also integrates with remote storage like S3 to efficiently
+   version large datasets and model files.
+2. **[SkyPilot](https://skypilot.readthedocs.io/en/latest/)** for scalable cloud
+   infrastructure: SkyPilot simplifies the process of launching cloud compute
+   resources on demand for development or distributed training. It supports spot
+   instances to reduce training costs and permits the quick set up of remote
+   interactive development environments.
+3. **[HuggingFace](https://huggingface.co/)** and other libraries for efficient
+   training of quantized models: HuggingFace Transformers provides a simple API
+   for training and fine-tuning large transformer models. In combination with
+   bitsandbytes, it enables reduced-precision and quantization-aware training
+   for greater efficiency.
+
+The [QLoRA](https://github.com/artidoro/qlora) quantization technique will allow
+us to apply 4-bit quantization for model weights. For Llama 7b model, this
+reduces GPU memory requirements from ~98 GB (with float32 precision) down to ~12
+GB (with int4 precision). The screenshot below is from a handy
+[Model Memory Calculator](https://huggingface.co/spaces/hf-accelerate/model-memory-usage)
+that helps you calculate how much vRAM is needed to train on a model that can be
+found on the Hugging Face Hub.
+
+![GPU memory requirements](../uploads/images/2023-09-08/gpu_memory_requirements.png)
+
+Considering the GPU memory overhead due to optimizer states, gradients, and
+forward activations, we’d need around 16GB in vRAM to fine-tune a 4bit-quantized
+7b model. NVIDIA A10 is a good candidate for this
+([`g5.2xlarge`](https://aws.amazon.com/ec2/instance-types/g5/) instance on AWS)
+as it costs a little over $1 per hour for on-demand pricing or $0.35 per hour
+for spot instance pricing.
+
+The total training time will depend on the size of your dataset and the number
+of epochs you want to train for. But with this setup, I believe it's possible to
+train a model to achieve decent (better than the base pretrained model)
+performance on some narrow task for under $50 total.
+
+For comparison, if you were fine-tuning the same model but with float16
+precision, you’d need one or more NVIDIA A100 (80GB version) or H100 GPUs.
+Currently, they are almost impossible to get access to due to the high demand
+(unless you work at one of the
+[“GPU-rich” companies](https://www.semianalysis.com/p/google-gemini-eats-the-world-gemini)).
+This kind of cloud hardware can be 5-10 times more expensive. For example,
+according to this
+[post](https://blog.skypilot.co/finetuning-llama2-operational-guide/), it would
+cost you a little over $300 to fine-tune a non-quantized 7b Llama 2 model on the
+[ShareGPT](https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered)
+dataset for 3 epochs.
+
+The price, of course, isn’t the only important factor. There are other low-cost
+Jupyter-based development environments like Google Colab or Kaggle Notebooks.
+While Jupyter environment is convenient when developing prototypes, the key
+advantage of the everything-as-code (EaC) approach proposed here is centralizing
+your code, datasets, hyperparameters, model weights, training infrastructure and
+development environment in a git repository. With LLMs being notoriously
+unpredictable, maintaining tight version control over training is critical.
+
+### Setup
+
+To begin, clone the project repository. Then, install SkyPilot and DVC using
+pip:
+
+```bash
+$ pip install skypilot[all] dvc[all]
+```
+
+Next, configure your cloud provider credentials. You can refer to the
+[SkyPilot documentation](https://skypilot.readthedocs.io/en/latest/getting-started/installation.html#cloud-account-setup)
+for more details.
+
+Confirm the setup with the following command:
+
+```bash
+$ sky check
+```
+
+After configuring the setup, you’ll need to download the data from the read-only
+remote storage in this project to your local machine, then upload it to your own
+bucket (where you have write access).
+
+```bash
+# Pull data from remote storage to local machine
+$ dvc pull
+# Configure your own bucket in .dvc/config:
+#   - AWS: https://iterative.ai/blog/aws-remotes-in-dvc
+#   - GCP: https://iterative.ai/blog/using-gcp-remotes-in-dvc
+#   - Azure: https://iterative.ai/blog/azure-remotes-in-dvc
+# Push the data to your own bucket
+$ dvc push
+```
+
+## HuggingFace: Perform Resource Efficient Fine-Tuning
+
+Here we’ll walk through the training approach without going into too much
+detail. Please check the references at the end of this post for more information
+on the techniques used. We started by loading a pretrained Llama-2 model and
+tokenizer. To make training even more efficient, we used `bitsandbytes` and
+techniques like [PEFT](https://huggingface.co/blog/peft) and
+[QLoRA](https://github.com/artidoro/qlora) to quantize the model to 4-bit
+precision.
+
+```python
+def get_model_and_tokenizer(pretrained_model_path, use_4bit, bnb_4bit_compute_dtype, bnb_4bit_quant_type, use_nested_quant, device_map):
+    compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=use_4bit,
+        bnb_4bit_quant_type=bnb_4bit_quant_type,
+        bnb_4bit_compute_dtype=compute_dtype,
+        bnb_4bit_use_double_quant=use_nested_quant,
+    )
+    model = AutoModelForCausalLM.from_pretrained(
+        pretrained_model_name_or_path=pretrained_model_path,
+        quantization_config=bnb_config,
+        device_map=device_map
+    )
+    model.config.use_cache = False
+    model.config.pretraining_tp = 1
+    tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=pretrained_model_path,
+                                              padding_side="right",
+                                              trust_remote_code=True)
+    tokenizer.pad_token = tokenizer.eos_token
+    return model, tokenizer
+```
+
+Then we leveraged the [TRL](https://huggingface.co/docs/trl/index) library’s
+Supervised Fine-tuning Trainer (SFTTrainer) to efficiently adapt the model to
+our target domain. The SFTTrainer provides a simple API for text generation:
+
+```python
+def train_model(model, train_dataset, valid_dataset, lora_config, tokenizer, training_args, model_adapter_out_path):
+    trainer = SFTTrainer(
+        model=model,
+        train_dataset=train_dataset,
+        eval_dataset=valid_dataset,
+        peft_config=lora_config,
+        dataset_text_field="text",
+        tokenizer=tokenizer,
+        args=training_args,
+    )
+    cleanup_incomplete_checkpoints(training_args.output_dir)
+    trainer.add_callback(CheckpointCallback())
+    trainer.add_callback(DVCLiveCallback(log_model="all"))
+
+    if not os.listdir(training_args.output_dir):
+        trainer.train()
+    else:
+        print("Resuming from checkpoint...")
+        trainer.train(resume_from_checkpoint=True)
+
+    trainer.model.save_pretrained(model_adapter_out_path)
+```
+
+The quantized model can then be efficiently fine-tuned on much less capable
+hardware while retaining almost the same level of accuracy. By leveraging the
+pretrained model, tokenization, and efficient training techniques, we were able
+to effectively customize the model for our use case with far less resources than
+training from scratch. The pieces fit together nicely to enable state-of-the-art
+results on a budget.
+
+## DVC: Define ML Pipeline
+
+Writing the code to efficiently fine-tune a large language model is only part of
+the story. We also need to define a reproducible pipeline that can be run
+multiple times with different parameters and hyperparameters. This is where DVC
+comes in. Below are the stages of the pipeline defined in
+[`dvc.yaml`](https://github.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/blob/main/dvc.yaml):
+
+- `generate_identity_data`: Generates a small subset of hardcoded conversational
+  data about the model’s identity, creators, etc. saved to
+  `identity_subset.jsonl`.
+- `process_orca_data`: Takes a subset of the
+  [Open Orca](https://huggingface.co/datasets/Open-Orca/OpenOrca) dataset and
+  converts it to the prompt/completion format, saving to
+  `orca_processed_subset.jsonl`.
+- `process_platypus_data`: Similarly processes a subset of the
+  [Open Platypus](https://huggingface.co/datasets/garage-bAInd/Open-Platypus)
+  dataset.
+- `data_split`: Splits each of the 3 processed dataset files into
+  train/validation sets.
+- `merge_data`: Concatenates all the train splits and all the validation splits
+  into final `train.jsonl` and `val.jsonl`.
+- `train`: Fine-tunes a Llama-2 model on the training data using the
+  [PEFT](https://github.com/huggingface/peft) library and
+  [Supervised Fine-tuning Trainer](https://huggingface.co/docs/trl/main/en/sft_trainer).
+  Saves fine-tuned model adapters.
+- `merge_model`: Merges the fine-tuned adapter back into the original Llama-2
+  model.
+- `sanity_check`: Runs a few prompts through the original and fine-tuned model
+  for a quick sanity check.
+
+![DVC pipeline DAG](../uploads/images/2023-09-08/dvc_dag.svg)
+
+The
+[`params.yaml`](https://github.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/blob/main/params.yaml)
+file contains the project’s configuration values and training hyperparameters.
+
+You can try a larger model by changing the
+[`train.model_size`](https://github.com/alex000kim/ML-Pipeline-With-DVC-SkyPilot-HuggingFace/blob/main/params.yaml#L15)
+parameter to `13b` (you might need to either request a larger instance or reduce
+the batch size to fit in GPU memory).
+
+## SkyPilot: Run everything in Cloud
+
+You can either develop the project and run experiments interactively in the
+cloud inside VS Code, or submit a run job to the cloud and pull the results to
+your local machine.
+
+### Developing and Running Experiments Interactively in the Cloud
+
+To launch a cloud instance for interactive development, run:
+
+```bash
+$ sky launch -c vscode -i 60 sky-vscode.yaml
+```
+
+This SkyPilot command will launch a
+[VS Code tunnel](https://code.visualstudio.com/docs/remote/tunnels) to the cloud
+instance.
+
+```yaml
+# sky-vscode.yaml
+name: sky-vscode
+resources:
+  accelerators: A10G:1
+  cloud: aws
+  use_spot: true
+workdir: .
+file_mounts:
+  ~/.ssh/id_rsa: ~/.ssh/id_rsa
+  ~/.ssh/id_rsa.pub: ~/.ssh/id_rsa.pub
+  ~/.gitconfig: ~/.gitconfig
+setup: |
+  ...
+  pip install -r requirements.txt
+  sudo snap install --classic code
+  ...
+run: |
+  code tunnel --accept-server-license-terms
+```
+
+Once the tunnel is created, you can open the VS Code instance in your browser by
+clicking the link in the terminal output.
+
+![VS Code Tunnel](../uploads/images/2023-09-08/vscode_tunnel.png)
+
+### Submitting Experiment Jobs to the Cloud
+
+When you are ready to launch a long-running training job, run:
+
+```bash
+$ sky launch -c train --use-spot -i 30 --down sky-training.yaml
+```
+
+This SkyPilot command uses spot instances to save costs and automatically
+terminates the instance after 30 minutes of idleness. Once the experiment is
+complete, its artifacts such as model weights and metrics are stored in your
+bucket (thanks to the `dvc exp push origin` command in `sky-training.yaml`).
+
+```yaml
+# sky-training.yaml
+name: sky-training
+resources:
+  accelerators: A10G:1
+  cpus: 8
+  cloud: aws
+  disk_size: 1024
+workdir: .
+file_mounts:
+  ~/.ssh/id_rsa: ~/.ssh/id_rsa
+  ~/.ssh/id_rsa.pub: ~/.ssh/id_rsa.pub
+  ~/.gitconfig: ~/.gitconfig
+setup: |
+  pip install --upgrade pip
+  pip install -r requirements.txt
+run: |
+  dvc exp run --pull
+  dvc exp push origin
+```
+
+While the model is training you can monitor the logs by running the following
+command.
+
+```bash
+$ sky logs train
+...
+(sky-training, pid=25305)  52%|█████▏    | 28/54 [00:20<01:01,  2.38s/it]
+(sky-training, pid=25305)  54%|█████▎    | 29/54 [00:22<00:56,  2.28s/it]
+(sky-training, pid=25305)  56%|█████▌    | 30/54 [00:25<00:57,  2.39s/it]
+(sky-training, pid=25305)  57%|█████▋    | 31/54 [00:28<01:01,  2.67s/it]
+...
+```
+
+Then, you can pull the results of the experiment to your local machine by
+running:
+
+```bash
+$ dvc exp pull origin
+```
+
+### Customizing the Cloud Instance and Parameters
+
+- You can change the cloud provider and instance type in the `resources` section
+  of `sky-training.yaml` or `sky-vscode.yaml`.
+
+- To enable
+  [DVC Studio integration](https://dvc.org/doc/studio/user-guide/projects-and-experiments/live-metrics-and-plots),
+  for real-time monitoring of metrics and plots, add the
+  `--env DVC_STUDIO_TOKEN` option to the `sky launch` commands above.
+
+  ![DVC Studio integration](../uploads/images/2023-09-08/dvc_studio.png)
+
+- To enable [Weights & Biases](https://wandb.ai/) integration, add the
+  `--env WANDB_API_KEY` option to the `sky launch` commands above.
+
+  ![Weights & Biases integration](../uploads/images/2023-09-08/wandb.png)
+
+## Summary
+
+In this post, we walked through an end-to-end production ML pipeline for
+fine-tuning large language models using several key technologies:
+
+- DVC for reproducible pipelines and efficient dataset versioning
+- SkyPilot for launching cloud compute resources on demand
+- HuggingFace Transformers and other libraries for efficient transformer model
+  training
+- Quantization techniques like PEFT and QLoRA for reduced precision and memory
+  usage
+
+We used the everything-as-code (EaC) approach of centralizing code, datasets,
+hyperparameters, model weights, training infrastructure and development
+environment in a git repository. Even the most subtle changes to the training
+setup will be recorded in the git history.
+
+We started with a pretrained Llama-2 model and used `bitsandbytes` to quantize
+it for 4-bit precision. Then, we leveraged the TRL library’s Supervised
+Fine-tuning Trainer with PEFT for efficient domain-specific fine-tuning.
+
+The resulting pipeline enables state-of-the-art LLM capabilities to be
+customized for a target use case with modest compute requirements. DVC and
+SkyPilot enabled this to be built as a reproducible ML workflow using cloud
+resources efficiently.
+
+This demonstrates how proper MLOps tooling and techniques can make large
+language model fine-tuning achievable even with limited resources. The modular
+design also makes it easy to swap components like the model architecture,
+training method, or cloud provider.
+
+### References
+
+- [PEFT: Parameter-Efficient Fine-Tuning of Billion-Scale Models on Low-Resource Hardware](https://huggingface.co/blog/peft)
+- [Making LLMs even more accessible with bitsandbytes, 4-bit quantization and QLoRA](https://huggingface.co/blog/4bit-transformers-bitsandbytes)
+- [Fine-Tuning Llama-2: A Comprehensive Case Study for Tailoring Models to Unique Applications](https://www.anyscale.com/blog/fine-tuning-llama-2-a-comprehensive-case-study-for-tailoring-models-to-unique-applications)
+- [Fine-Tune Your Own Llama 2 Model in a Colab Notebook](https://mlabonne.github.io/blog/posts/Fine_Tune_Your_Own_Llama_2_Model_in_a_Colab_Notebook.html)
+- [Finetuning Llama 2 in your own cloud environment, privately](https://blog.skypilot.co/finetuning-llama2-operational-guide/)
diff --git a/content/blogs/2023-09-25-leveraging-llms-in-chatbots-the-dvc-approach.md b/content/blogs/2023-09-25-leveraging-llms-in-chatbots-the-dvc-approach.md
new file mode 100644
index 0000000000..69fde0baf1
--- /dev/null
+++ b/content/blogs/2023-09-25-leveraging-llms-in-chatbots-the-dvc-approach.md
@@ -0,0 +1,322 @@
+---
+title: 'Leveraging LLMs in Chatbots: The DVC Approach'
+date: 2023-09-25
+description: >
+  Read how DVC can optimize the development process for chatbots built on Large
+  Language Models.
+descriptionLong: >
+  This post explores how the Data Version Control
+  ([DVC](https://dvc.org/doc/start)) tool can enhance the efficiency and
+  organization in designing LLM applications, using a Retrieval-Augmented
+  Generation
+  ([RAG](https://artificialcorner.com/retrieval-augmented-generation-rag-a-short-introduction-21d0044d65ff))
+  chatbot as an example. This chatbot uses the RAG approach for its
+  computational efficiency, provides cited sources for its answers, and
+  leverages DVC features such as rollback capability, preventing redundant
+  computations, and visual representation through a Directed Acyclic Graph
+  (DAG).
+picture: 2023-09-25/dvc-chatbot.png
+authors:
+  - ryan
+tags:
+  - LLM
+  - RAG
+  - DVC
+  - Tutorial
+---
+
+In the modern world of Machine Learning (ML) and Natural Language Processing
+(NLP), there's been a surge in applications built on top of Large Language
+Models (LLMs). There has been an almost exponential adoption in applications and
+companies building applications from LLMs across a variety of areas.
+
+In this post we will show how DVC can make designing LLM applications more
+efficient and organized. We take a Retrieval-Augmented Generation
+([RAG](https://artificialcorner.com/retrieval-augmented-generation-rag-a-short-introduction-21d0044d65ff))
+approach and illustrate how we can break down the various phases of a RAG
+chatbot and version them with DVC. We can use DVC to both "time travel" and
+avoid the need to re-compute stages unnecessarily with little extra effort.
+
+## The Rise of Chatbots in Technical Advice
+
+Chatbots are finding a natural fit in providing technical advice. For our
+product, DVC, which has amassed significant popularity, we've introduced a
+chatbot designed to streamline user experience. Our bot sources information not
+just from our official documentation but also from our community discussions on
+Discord. This creates a broader knowledge base than using our official
+documentation alone, and ensures a balanced mix of official guidelines and
+community insights.
+
+## The RAG Approach
+
+Our chatbot uses the Retrieval-Augmented Generation (RAG) approach. The
+[debate](https://towardsdatascience.com/rag-vs-finetuning-which-is-the-best-tool-to-boost-your-llm-application-94654b1eaba7)
+between the efficacy of RAG vs. fine-tuning methods is ongoing and lively.
+However, our choice leans towards RAG due to its simplicity and relative
+computation efficiency for quickly iterating on different approaches.
+
+![RAG flowchart](../uploads/images/2023-09-25/flowchart.png '=800')_Illustration
+of the RAG approach: First we build a vector database with chunks of text. After
+we retrieve chunks relevant to the user query from the vector database, we
+insert those chunks into the prompt to give the LLM context._
+
+## Citation: A Key Differentiator
+
+A common complaint about chatbots is that they do not cite any sources, which
+leaves users with few avenues to validate the information provided by the
+chatbot.
+
+![Chatbot in action video](../uploads/images/2023-09-25/chat_bot_gif.gif '=800')_Demo
+of our chatbot_
+
+Our chatbot is able to cite the sources of its answers. It does with using the
+LangChain
+[RetrievalQAWithSourcesChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.qa_with_sources.retrieval.RetrievalQAWithSourcesChain.html).
+This is a key feature for many users.
+
+## Building the Chatbot Using DVC
+
+Our chatbot builds on top of the
+[LangChain Notion Question-Answering](https://github.com/hwchase17/notion-qa)
+example using DVC to manage the pipeline. Interestingly, while we built a
+chatbot for DVC, we also employed DVC in its construction. This seemingly
+circular approach allowed us to leverage the standard benefits that DVC offers:
+
+1. **Rollback Facility**: The ability to revert to previous versions is
+   invaluable, especially when dealing with unpredictable outputs in response to
+   varying prompts.
+2. **Efficiency**: DVC prevents redundant computation when updating specific
+   phases, saving both time and computational resources.
+3. **Visual Representation with DVC DAG**: The Directed Acyclic Graph (DAG)
+   provided by DVC visualizes how the chatbot's construction is broken down into
+   distinct stages, aiding understanding and development.
+
+```text
++----------------------+
+| discord_dump.zip.dvc |
++----------------------+
++-------------------+
+| docs_dump.zip.dvc |
++-------------------+
+          *
+          *
+          *
+      +--------+
+      | expand |
+      +--------+
+          *
+          *
+          *
+      +--------+
+      | ingest |
+      +--------+
+          *
+          *
+          *
+    +-----------+       +-----------------+
+    | vectorize |       | samples.txt.dvc |
+    +-----------+       +-----------------+
+               ***         ***
+                  *       *
+                   **   **
+                  +-----+
+                  | run |
+                  +-----+
+```
+
+The bot is built into a few standard phases for RAG:
+
+1. `expand`: unzip archives of documents
+2. `ingest`: This is how we chunk up the text of the documents into small pieces
+   that we can embed and also put into prompts for the chatbot. The standard
+   [text splitters](https://python.langchain.com/docs/modules/data_connection/document_transformers/text_splitters/character_text_splitter)
+   make sense for documentation pages, but a dump of 2 years worth of Discord
+   chats require a custom splitter.
+3. `vectorize`: Build a
+   [vector database](https://github.com/facebookresearch/faiss) with embeddings
+   of all the text chunks
+4. `run`: Extract the relevant text chunks for the sample questions, put into
+   prompts, and call the LLM
+
+DVC allows us to keep the outputs from each stage under version control, and
+manage the parameterization, with little extra effort. This provides the
+advantage that if we choose to update the vectorize stage, we can reuse the
+outputs of the ingest stage without re-running it. Or, if we want to roll back
+to an old version of vectorize, we can get that intermediate output back without
+re-running it and without the high chance of making a mistake in versioning if
+we try to do that manually.
+
+Both the vectorize and run stages use the OpenAI API. So, repeated computation
+not only costs time but also actual dollars.
+
+![Text chunking the official docs](../uploads/images/2023-09-25/docs_text_chunking.png '=800')_We
+apply a standard text chunker to the markdown for our official documentation. It
+contains a few options for chunk size and desired overlap between chunks. DVC
+helps organize these parameters._
+
+![Text chunking the public discord](../uploads/images/2023-09-25/discord_text_chunking.png '=800')_For
+our discord, we group together successive messages from the same author and then
+start a chunk at each message. Putting the author and datetime into the prompts
+in the later stages can be formatted in various ways. Experimenting with these
+options is easier when you have DVC._
+
+## The Importance of Rollback
+
+Changes in chatbot prompts can have unforeseen consequences. In some cases, they
+might improve the bot's performance, while in others, they might lead to
+degradation. Given the computational cost of re-running phases and the
+unpredictable nature of such changes, rollback doesn't merely refer to reverting
+to old code. It also allows reverting to older intermediate outputs, making the
+development process much more computationally efficient and organized.
+
+## Incorporating the Discord Community Insights
+
+One significant factor affecting the performance of our chatbot is the manner in
+which we segment and integrate text from our Discord channel. Different
+text-splitting techniques can lead to variance in performance, highlighting the
+importance of continually refining this integration process. Furthermore,
+providing useful meta information for sources in Discord can be done in various
+ways. Again, DVC handles the book keeping of iterating on these approaches
+without re-running unchanged stages.
+
+## Running it Yourself
+
+First clone the git repository [here](https://github.com/iterative/llm-demo).
+Once you have an [OpenAI API key](https://platform.openai.com/account/api-keys),
+you can easily get the project going with `dvc repro`. Re-running the demo from
+scratch costs about $0.40 USD in credits.
+
+First, you need to do a git pull of the code:
+
+```cli
+$ git clone git@github.com:iterative/llm-demo.git
+$ cd llm-demo
+```
+
+The training run is all logged in DVC in an S3 store. So, if you are already
+authenticated on AWS you can get all the data with:
+
+```cli
+$ dvc pull
+```
+
+To set your environment up to run the code, first install all requirements in a
+virtual env:
+
+```cli
+$ virtualenv env --python=python3
+$ source env/bin/activate
+$ pip install -r requirements.txt
+```
+
+Then set your OpenAI API key (if you don't have one, get one
+[here](https://beta.openai.com/playground)):
+
+```cli
+$  export OPENAI_API_KEY=...
+```
+
+The preceding spaces prevent the API key from staying in your bash history if
+that is
+[configured](https://stackoverflow.com/questions/6475524/how-do-i-prevent-commands-from-showing-up-in-bash-history).
+
+Now you should be ready to re-run the training pipeline. Assuming you have not
+changed anything, nothing should need to run. Everything can be re-used for the
+DVC pull:
+
+```cli
+$ dvc repro
+```
+
+Now you can startup the web UI using:
+
+```cli
+$ streamlit run main.py
+```
+
+The command should open the bot in your web browser. The log of interactions can
+be found in `chat.log`.
+
+## Example of using DVC rollback
+
+Let's take a concrete example illustrating how we can use DVC in the bot
+development, suppose we want to adjust the `embedding embedding_ctx_length`
+because we think it can help us save some cost on API calls and lower the
+interactive latency. To do this in a reproducible way, we first make a git
+branch to do the change:
+
+```cli
+$ git checkout -b try_new_embed
+```
+
+Now if we re-run the pipeline with DVC we will notice that it skips re-running
+the expand and ingest phases because nothing has changed for their dependencies:
+
+```cli
+$ dvc exp run -S 'OpenAIEmbeddings.embedding_ctx_length=256'
+'samples.txt.dvc' didn't change, skipping
+Stage 'setup' didn't change, skipping
+'docs_dump.zip.dvc' didn't change, skipping
+Stage 'expand' didn't change, skipping
+Stage 'ingest' didn't change, skipping
+Running stage 'vectorize':
+$ python vector_store.py
+...
+```
+
+We can also version the outputs with DVC:
+
+```cli
+$ git add dvc.lock params.yaml
+$ git commit -m "new embed model"
+```
+
+We can try out the new settings with:
+
+```cli
+$ streamlit run main.py
+```
+
+However, if despite any cost savings we don't like the results with these new
+settings, we can easily revert back to old pipeline using git and DVC:
+
+```cli
+$ git checkout master
+Switched to branch 'master'
+Your branch is up to date with 'origin/master'.
+$ dvc checkout
+M       faiss_store.pkl
+M       docs.index
+M       results.csv
+$ dvc exp run
+'samples.txt.dvc' didn't change, skipping
+Stage 'setup' didn't change, skipping
+'docs_dump.zip.dvc' didn't change, skipping
+Stage 'expand' didn't change, skipping
+Stage 'ingest' didn't change, skipping
+Stage 'vectorize' didn't change, skipping
+Stage 'run' didn't change, skipping
+Data and pipelines are up to date.
+```
+
+DVC does not need to rerun any stage because it has saved all the old outputs
+from the master branch. Likewise, we can always switch back to the experimental
+setup with:
+
+```cli
+$ git checkout try_new_embed
+$ dvc checkout
+```
+
+Using these few commands, we can use DVC to both "time travel" and avoid the
+need to re-compute stages unnecessarily with little extra effort.
+
+## Conclusion
+
+The benefits of using DVC are shared across most LLM applications. Whether you
+are working with discord, slack, or a google docs corpus, RAG or fine tuning,
+using DVC to manage your pipeline will bring similar benefits. The utilization
+of DVC not only enhances the development process but also brings about
+reproducible experiments. Given the similarities that most LLM applications
+share, it's safe to conclude that they could benefit immensely from
+incorporating DVC in their workflows.
diff --git a/content/blogs/2023-11-16-turn-vs-code-into-ml-platform.md b/content/blogs/2023-11-16-turn-vs-code-into-ml-platform.md
new file mode 100644
index 0000000000..18cb690755
--- /dev/null
+++ b/content/blogs/2023-11-16-turn-vs-code-into-ml-platform.md
@@ -0,0 +1,107 @@
+---
+title:
+  'Turn Your Favorite IDE into a Full Machine Learning Experimentation Platform'
+date: 2023-11-16
+description: >
+  DVC extension enables you to run, track and manage ML experiments without
+  leaving VS Code.
+descriptionLong: >
+  The DVC extension for VS Code converts your favorite IDE into a one-stop shop
+  for all your ML tasks. Run and track experiments, visualize and compare
+  experiments to discover the best models, and manage reproducible pipelines -
+  all from the comfort of your IDE.
+picture: 2023-11-16/dvc-vs-code-extension.png
+pictureComment: Manage experiments directly from VS Code
+authors:
+  - tapa_dipti_sitaula
+tags:
+  - DVC
+  - VS Code
+  - Launch
+  - Experiment Tracking
+---
+
+**Need an easy way to run and track your experiments?** Install the DVC
+extension from the
+[VS Code marketplace](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc).
+Then, run experiments, visualize deep learning metrics in real-time, compare
+experiments, and save the ones you like - all from your IDE.
+
+![Run Python file](../uploads/images/2023-11-16/run-python-file.gif)_Run a
+Python file and see results_
+
+**Want to simplify your chaotic ML iterations?** With the DVC extension, you can
+run reproducible workflows directly from VS Code.
+
+![Run a new experiment](../uploads/images/2023-11-16/modify-and-run.gif)_Run a
+new experiment directly from VS Code_
+
+Live plots let you visualize metrics from these runs in real-time.
+
+![View plots in real-time](../uploads/images/2023-11-16/live-plots.gif)_View
+plots in real-time_
+
+To make it easy for you to create the workflows, the extension even
+auto-generates code snippets.
+
+![Auto-generate pipeline specifications](../uploads/images/2023-11-16/auto-generate-code.gif)_Auto-generate
+pipeline specifications_
+
+**Tired of context switching throughout the day?** The integration of DVC with
+VS Code empowers you to do everything from within your IDE. No more jumping from
+notebooks to the terminal to IDE to web browsers to Git.
+
+# Why a DVC extension for VS Code?
+
+[DVC](https://dvc.org/) has helped individual ML developers and teams in
+companies like UBS, DeGould, Exscientia, Kibsi and many more to standardize
+their ML workflows on top of their cloud resources and Git repositories.
+
+Visual Studio Code (VS Code) is, by far, the most popular IDE for all
+developers, including ML engineers.
+
+![StackOverflow survey](../uploads/images/2023-11-16/so-survey.png)_Source:
+[StackOverflow survey 2022](https://survey.stackoverflow.co/2022/#section-most-popular-technologies-integrated-development-environment)_
+
+The DVC extension makes VS Code even more useful for you by providing you a VS
+Code-native environment for managing your ML projects. You get the power of DVC
+with capabilities beyond what's available in the terminal!
+
+With over 34 thousand installs, the extension is proven to help you solve the
+challenges of creating and managing your Machine Learning workflows.
+
+![DVC extension in the VS Code marketplace](../uploads/images/2023-11-16/dvc-extension-in-vs-code-marketplace.png)_DVC
+extension in the VS Code marketplace_
+
+# Getting started with the DVC extension for VS Code
+
+To install the extension, open VS Code and search for "DVC" in the Extensions
+view. Or install the extension from the
+[VS Code marketplace](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc).
+
+Now, create a DVC repository for your machine learning project and start
+experimenting! Here’s how to do this:
+
+https://youtu.be/6KtIRVfr61E
+
+To see all the DVC commands supported by the extension, open the DVC Command
+Palette using F1 or ⇧⌃P on Windows/Linux or ⇧⌘P on macOS and typing DVC.
+
+# It’s always getting better!
+
+Over the last one year, we’ve made several enhancements in the DVC extension for
+VS Code. For some of the interesting stuff you can do with it, watch the videos
+[here](https://www.youtube.com/watch?v=VMYggTLm_-U&list=PL7WG7YrwYcnBo3ZBapzKNxtBcfNjGDQMM&index=5).
+As a mark of the extension reaching a new level of maturity, today we have
+launched it in
+[Product Hunt](https://www.producthunt.com/posts/dvc-extension-for-vs-code). It
+would be awesome if you check it out and leave us some feedback and support!
+
+We are excited to see how the DVC VS Code extension helps you simplify your ML
+workflows. For more information:
+
+- DVC extension in the VS Code marketplace:
+  https://marketplace.visualstudio.com/items?itemName=Iterative.dvc
+- GitHub repository: https://github.com/iterative/vscode-dvc
+- DVC documentation: https://dvc.org/
+- DVC community forum: https://dvc.org/chat
diff --git a/content/blogs/2023-12-11-dvc-git-lfs.md b/content/blogs/2023-12-11-dvc-git-lfs.md
new file mode 100644
index 0000000000..50c22d0554
--- /dev/null
+++ b/content/blogs/2023-12-11-dvc-git-lfs.md
@@ -0,0 +1,229 @@
+---
+title: Integrating DVC and Git LFS via libgit2 filters
+date: 2024-01-03
+description: |
+  Read about how we built a Python Git LFS client to support integrating
+  projects which use Git LFS into your DVC workflow.
+descriptionLong: |
+  Learn how the latest DVC release (version 3.31.0) now supports reading Git LFS
+  objects, allowing users to import files from platforms like Hugging Face
+  without additional dependencies. The implementation utilizes the Dulwich and
+  pygit2 libraries, providing compatibility with Git LFS within DVC and
+  enhancing its capabilities in managing datasets across Git repositories.
+picture: 2024-01-03/dvc-dulwich-libgit2.png
+pictureComment: DVC, Dulwich and libgit2
+authors:
+  - peter_rowlands
+tags:
+  - Open Source
+  - DVC
+  - Git
+  - Version Control
+  - Company
+  - Hugging Face
+  - Git LFS
+  - Dulwich
+  - pygit2
+  - libgit2
+---
+
+One of the main features provided by DVC is the ability to [import][import] and
+[download][get] files from any Git repository. In prior releases this came with
+the caveat where projects which use [Git LFS](https://git-lfs.com/) were
+unsupported. As of version 3.31.0, DVC now supports reading Git LFS objects, so
+you can now `dvc import` upstream datasets from platforms like
+[Hugging Face](https://huggingface.co/) which use Git LFS, without needing to
+install any additional dependencies! Read on for an overview on how the DVC Git
+LFS client was implemented.
+
+_To get started using DVC with Hugging Face, please refer to the DVC
+integrations [documentation][hf-integration]_
+
+[import]:
+  https://dvc.org/doc/command-reference/import#example-importing-from-any-git-repository
+  'dvc import'
+[get]:
+  https://dvc.org/doc/command-reference/get#examples-get-a-misc-git-tracked-file
+  'dvc get'
+[hf-integration]:
+  https://dvc.org/doc/user-guide/integrations/huggingface
+  'DVC/Hugging Face Integration'
+
+DVC builds on top of Git's versioning capabilities using the open source
+libraries [Dulwich](https://www.dulwich.io/) and
+[pygit2](https://www.pygit2.org/) (which provides Python bindings for the C
+library [libgit2](https://github.com/libgit2/libgit2)). Using these libraries
+provides DVC with access to Git functionality without requiring a traditional
+command line Git installation, which can be particularly useful in containerized
+environments. When integrating support for Git LFS support into DVC, we wanted
+to keep the same approach, so DVC users could simply install DVC, and then
+import and download files from any Git repository, regardless of whether or not
+that repository uses Git LFS. Neither Dulwich nor libgit2/pygit2 support Git LFS
+natively, but libgit2 does provide an API for the low level Git filters
+functionality used by Git LFS. We have [contributed][pygit2-pr] to pygit2 so
+that pygit2 users (like DVC) can now write libgit2 filters purely in Python,
+without needing to use the lower level libgit2 C API.
+
+_DVC's Git client library (which wraps Dulwich and pygit2) is available
+[here](https://github.com/iterative/scmrepo)_
+
+[pygit2-pr]:
+  https://github.com/libgit2/pygit2/pull/1237
+  'pygit2 filters pull request'
+
+## Git filters overview
+
+Git supports using attribute [filters][git-filter] to manipulate how objects are
+stored internally in Git compared to how they are stored in your workspace. One
+commonly used built-in filter is the CRLF filter, which will adjust line endings
+in text files. The CRLF filter is typically used to ensure that files are
+checked out into the workspace using the appropriate line endings for the user's
+platform (linefeed on Unix and carriage return + linefeed on Windows), but are
+only stored in Git with Unix-style line endings.
+
+Git LFS also works by using Git filters. When you add a file with the
+`filter=lfs` attribute to Git, The Git LFS filter generates a "pointer" for Git
+to store internally. The LFS pointer is a small text file containing a SHA256
+LFS object ID for the original file. The Git LFS filter places the original file
+in Git LFS storage, and then outputs the pointer to Git (instead of the original
+file). Upon checkout, Git passes the pointer to the Git LFS filter, which then
+reads the LFS object ID and checks out the appropriate original file into your
+workspace.
+
+```
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c
+size 4
+```
+
+_Example Git LFS pointer_
+
+[git-filter]:
+  https://git-scm.com/docs/gitattributes#_filter
+  'Git attributes filters'
+
+## libgit2 and pygit2 filters
+
+When saving objects in Git and when checking them back out to the workspace,
+libgit2 runs a chain of registered filters. Each filter in the chain modifies
+the object data as needed, and then passes the modified result into the next
+filter. While writing a libgit2 filter in C is fairly complex and requires
+implementing multiple levels of callback structs for handling the underlying
+buffered write streams in addition to the filter itself, this is simplified by
+our newly contributed support for Python filters in pygit2. The low level
+libgit2 APIs are abstracted away, and a subclassed `pygit2.Filter`implementation
+only needs to implement three basic methods, `check()`, `write()` and `close()`.
+
+- `Filter.check()` is called prior to processing any object with Git attributes
+  which match the registered filter, and the filter can verify whether or not it
+  should be used with the given object, or indicate that the filter does not
+  need to be applied.
+- `Filter.write()` is called one or more times and is used to “write” input data
+  chunks to the filter.
+- `Filter.close()` is called after all of the input data has been written to the
+  filter.
+
+The filter can send output data chunks to the next filter in the chain as needed
+via the `write_next()` callback.
+
+_Note: in Git, `smudge` filters are run when checking out objects from the Git
+object database into the workspace, and `clean` filters are run when saving
+objects from the workspace into the Git object database. In libgit2 and pygit2,
+a single filter is registered which is used in both cases, and the direction is
+indicated by the `mode` parameter._
+
+## The scmrepo Git LFS filter
+
+Thanks to this higher level abstraction in pygit2, implementing the Git LFS
+`smudge` filter in Python is straightforward:
+
+```python
+ def check(self, src: "FilterSource", attr_values: List[str]):
+    if attr_values[0] == "lfs":
+        if src.mode != GIT_FILTER_CLEAN:
+            self._smudge_buf = io.BytesIO()
+            self._smudge_root = src.repo.workdir or src.repo.path
+            return
+    raise Passthrough
+```
+
+In `check()`, the first element in `attr_values` will contain the object’s
+`filter` Git attribute. We verify that the object has `filter=lfs` set and that
+we are in `smudge` mode (our filter is currently read-only and does not need to
+implement `clean` mode). When in `smudge` mode we initialize an internal buffer
+which will be used for reading the pointer data from Git, as well as storing the
+original Git repository root path (which will be needed later).
+
+```python
+def write(
+    self, data: bytes, src: "FilterSource", write_next: Callable[[bytes], None]
+):
+    …
+    self._smudge_buf.write(data)
+```
+
+In `write()` we append the input chunk to our buffer and then return. We do not
+write to the next filter, since Git LFS `smudge` depends on reading the entire
+pointer input before we can output any data.
+
+```python
+def close(self, write_next: Callable[[bytes], None]):
+    …
+    self._smudge(write_next)
+
+def _smudge(self, write_next: Callable[[bytes], None]):
+    …
+    self._smudge_buf.seek(0)
+    with Git(self._smudge_root) as scm:
+        try:
+            url = get_fetch_url(scm)
+        except InvalidRemote:
+            url = None
+        fobj = smudge(scm.lfs_storage, self._smudge_buf, url=url)
+        data = fobj.read(io.DEFAULT_BUFFER_SIZE)
+        try:
+            while data:
+                write_next(data)
+                data = fobj.read(io.DEFAULT_BUFFER_SIZE)
+        except KeyboardInterrupt:
+            return
+```
+
+In `close()`, we get the configured Git LFS remote URL (if it is set) and then
+run our actual `smudge()` implementation. scmrepo’s `smudge()` method will
+return a Python file-like object stream for the original file (and not the
+internal pointer). We then just need to do a series of chunked reads and writes
+to send the original file data to the next filter in the chain.
+
+Since Git LFS `smudge` behavior is well defined by the [Git LFS
+specification][smudge-spec] we will not go into a detailed explanation of our
+Python implementation here. In short, `smudge()` verifies that the input data is
+a valid Git LFS pointer, reads the Git LFS object ID from the pointer, and then
+loads the appropriate object from Git LFS storage. If the specified object ID is
+not available in the local Git LFS storage, it will be fetched from the remote
+Git LFS server.
+
+_The complete source code for our scmrepo Git LFS filter is available on Github:
+[filter.py][filter.py], [smudge.py][smudge.py]_
+
+[smudge-spec]:
+  https://github.com/git-lfs/git-lfs/blob/main/docs/spec.md#intercepting-git
+  'Git LFS specification'
+[filter.py]:
+  https://github.com/iterative/scmrepo/blob/main/src/scmrepo/git/backend/pygit2/filter.py
+  'scmrepo filter.py'
+[smudge.py]:
+  https://github.com/iterative/scmrepo/blob/main/src/scmrepo/git/lfs/smudge.py
+  'scmrepo smudge.py'
+
+## Conclusion
+
+This recent update to DVC marks a significant milestone by eliminating the prior
+limitation associated with Git LFS incompatibility. With version 3.31.0, DVC
+users can seamlessly import files from Git repositories, including platforms
+like Hugging Face, without needing extra dependencies. The integration of Git
+LFS support, facilitated by the Dulwich and pygit2 libraries, streamlines
+managing datasets and large objects in a Git repository.
+
+This reinforces DVC's commitment to providing a versatile and user-friendly
+open-source version control solution for diverse Git repositories.
diff --git a/content/blogs/2024-01-19-automate-data-validation-and-model-monitoring-with-evidently-and-dvc.md b/content/blogs/2024-01-19-automate-data-validation-and-model-monitoring-with-evidently-and-dvc.md
new file mode 100644
index 0000000000..3dc6fccf3d
--- /dev/null
+++ b/content/blogs/2024-01-19-automate-data-validation-and-model-monitoring-with-evidently-and-dvc.md
@@ -0,0 +1,564 @@
+---
+title:
+  'Tutorial: Automate Data Validation and Model Monitoring Pipelines with DVC
+  and Evidently'
+date: 2024-01-19
+description: >
+  Ensuring your machine learning models remain precise and efficient as time
+  progresses, and verifying that your data consistently reflects the real-world
+  scenario.
+descriptionLong: >
+  Imagine you're in charge of weekly batch scoring jobs in a retail setting,
+  where accurately predicting customer behavior is crucial. The challenge?
+  Ensuring your machine learning models remain precise and efficient as time
+  progresses, and verifying that your data consistently reflects the real-world
+  scenario.
+
+  This tutorial will equip you with the skills to use [DVC](https://dvc.org/)
+  and [Evidently](https://www.evidentlyai.com/), transforming them into powerful
+  allies for automating data validation and model monitoring pipelines. Tailored
+  for Data Scientists, ML Engineers, MLOps professionals, and Team Leads, this
+  guide offers a streamlined approach to boost and sustain your model's
+  performance in the ever-evolving business landscape.
+picture: 2024-01-19/1-evidently-dvc-title.png
+pictureComment:
+  Evidently + DVC integration
+  [example](https://github.com/iterative/evidently-dvc)
+authors:
+  - mikhail_rozhkov
+tags:
+  - Open Source
+  - Model Monitoring
+  - Data Validation
+  - Automation
+  - MLOps
+---
+
+_Feel free to clone the repository provided. It's more than a learning tool;
+it's a flexible reference architecture that you can adapt to fit your unique use
+cases._
+
+## Why DVC and Evidently?
+
+In the realm of Machine Learning Operations (MLOps), ensuring the robustness and
+reliability of models is paramount. Using the right tools can significantly
+enhance your MLOps practices.
+
+![Typical Machine Learning Operations (MLOps) workflow](../uploads/images/2024-01-19/2-mlops-workflow.png '=600')
+_Typical Machine Learning Operations (MLOps) workflow_
+
+**[DVC](https://dvc.org/)** is an open-source tool that brings agility and
+reproducibility to data science projects by treating data and model training
+pipelines as software. It connects versioned data sources and code with
+pipelines, track experiments, register models — all based on GitOps principles.
+
+**[Evidently](https://github.com/evidentlyai/evidently)** is an open-source
+Python library to evaluate, test, and
+[monitor ML models](https://www.evidentlyai.com/ml-in-production/model-monitoring).
+It has 100+ built-in metrics and tests on data quality, data drift, and model
+performance and helps interactively visualize them.
+
+When used together, DVC and Evidently tools offer a comprehensive solution for
+training, predicting, and monitoring ML models.
+
+![Core features of DVC and Evidently for MLOps practices](../uploads/images/2024-01-19/3-dvc-evidently-features.png '=600')
+_Core features of DVC and Evidently for MLOps practices_
+
+> 💡 **Want to learn more about DVC and Evidently?**
+>
+> - [Iterative Tools for Data Scientists & Analysts course](https://learn.iterative.ai/)
+>   with DVC
+> - [Open-source ML observability course](https://www.evidentlyai.com/ml-observability-course)
+>   with Evidently
+
+## Tutorial scope
+
+This tutorial teaches you how to build DVC pipelines for training and monitoring
+jobs, parse Evidently reports, and version reference datasets.
+
+![Pipelines and artifacts of the example project*](../uploads/images/2024-01-19/4-example-pipelines.png '=600')
+_Pipelines and artifacts of the example project_
+
+By the end of this tutorial, you will learn how to implement an ML monitoring
+architecture using:
+
+- [Evidently](https://www.evidentlyai.com/) to perform data quality, data drift,
+  and model quality checks.
+- [DVC](https://dvc.org/) to run monitoring jobs and version monitoring
+  artifacts
+- [DVCLive](https://dvc.org/doc/dvclive) to save monitoring metrics from Python
+  scripts and visualize in VS Code.
+
+Using a Python virtual environment, you can run the example on a local machine.
+
+### Dataset: Sales Forecasting
+
+**Dataset.** You will be diving into a
+[Kaggle dataset](https://www.kaggle.com/c/bike-sharing-demand/data) focused on
+Bike Sharing Demand. The goal is to predict hourly bike rental volumes.
+
+![Source: [https://www.evidentlyai.com/blog/tutorial-1-model-analytics-in-production](https://www.evidentlyai.com/blog/tutorial-1-model-analytics-in-production)](../uploads/images/2024-01-19/5-tutorial-1-model-analytics-in-production.png '=600')
+_Source:
+[https://www.evidentlyai.com/blog/tutorial-1-model-analytics-in-production](https://www.evidentlyai.com/blog/tutorial-1-model-analytics-in-production)_
+
+**ML Application.** Use historical usage and weather data to predict bike rental
+demand. Essential for operational efficiency and customer service.
+
+Similar applications:
+
+- Applicable in sectors like retail, transportation, and energy for demand
+  prediction.
+- Ensures models stay relevant and effective despite changing data patterns.
+
+### Prerequisites
+
+We expect that you:
+
+- Have learned the for DVC by following the
+  [Get Started with DVC](https://dvc.org/doc/start#get-started-with-dvc) guide
+- Went through the
+  Evidently [Get Started Tutorial](https://docs.evidentlyai.com/get-started/tutorial/?utm_source=website&utm_medium=referral&utm_campaign=blog_text&utm_content=batch-ml-monitoring-architecture) and
+  can generate visual and JSON Reports with Metrics.
+
+To follow this tutorial, you'll need the following tools installed on your local
+machine:
+
+- Python version 3.11 or above
+- Git
+- VS Code and
+  [DVC Extension for VS Code](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc)
+
+> 💡 Note: we tested this example on macOS/Linux.
+
+## 👩‍💻 Installation
+
+First, install the pre-built example. Check the origin README file for more
+technical details and notes.
+
+**1. Fork / Clone this repository**
+
+Clone the GitHub repository with the example code. This repository provides the
+necessary files and scripts for setting up the integration between Evidently and
+DVC.
+
+```bash
+$ git clone https://github.com/iterative/evidently-dvc.git
+$ cd evidently-dvc
+```
+
+**2. Install Python dependencies**
+
+```bash
+$ python3 -m venv .venv
+$ echo "export PYTHONPATH=$PWD" >> .venv/bin/activate
+$ source .venv/bin/activate
+$ pip install -r requirements.txt
+```
+
+> 💡 Note: To ensure everything runs smoothly, please make sure to execute all
+> the code examples provided below within an activated virtual environment.
+
+</aside>
+
+## 🚀 Run ML monitoring example
+
+Now, let’s launch the pre-built example to run monitoring pipelines and manage
+monitoring artifacts using DVC and Evidently.
+
+### 1. Running the `train` pipeline
+
+To run the entire pipeline, execute a simple command in your terminal. Make sure
+you're in the project's root directory:
+
+```bash
+$ dvc exp run pipelines/train/dvc.yaml
+```
+
+This command runs the stages defined in the `dvc.yaml` file located in
+`pipelines/train`. DVC experiments allow you to track changes made during each
+run, making it easier to iterate and improve your model. Here’s what happens in
+each stage:
+
+- **load_data**:
+  - Downloads and unzips the dataset into your `data/` directory.
+- **extract_data**:
+  - Executes `src/stages/extract_data.py`, using parameters from
+    `pipelines/train/params.yaml`.
+  - Outputs training and testing datasets to specified paths.
+- **train**:
+  - Runs `train.py`, training the model with the training data.
+  - Saves the model to `models/model.joblib`
+- **evaluate**:
+  - Runs `evaluate.py` to assess the model on the test data.
+  - Outputs reference data for monitoring to `data/reference_data.csv`.
+  - Builds the model performance report using Evidently Regression Preset and
+    saves it to `reports/train/model_performance.html`.
+  - Saves metrics to `reports/train/metrics.json`.
+
+After the pipeline is complete, you can
+
+- (1) visualize training metrics
+  [DVC Extension for Visual Studio Code](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc)
+  ,
+- (2) open the detailed model performance HTML report built with Evidently in
+  the browser.
+
+![Metrics and reports for Training pipeline](../uploads/images/2024-01-19/6-metrics-and-reports.png '=600')
+_Metrics and reports for Training pipeline_
+
+> 💡 Note: Make sure you have the
+> [DVC Extension for Visual Studio Code](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc)
+> installed.
+
+### 2. Running the `predict` pipeline
+
+Once your model is trained and evaluated, the next vital step is to perform
+predictions on new data. To run the pipeline, execute the following command in
+your terminal:
+
+```bash
+$ dvc repro pipelines/predict/dvc.yaml
+```
+
+Here’s what happens in each stage:
+
+- **predict**:
+  - Executes `src/stages/predict.py`, using parameters from
+    `pipelines/predict/params.yaml`.
+  - Saves predictions to a CSV file, formatted as
+    `data/predictions/${predict.week_start}--${predict.week_end}.csv`.
+    Parameters `week_start` and `week_end` are located in the corresponding
+    `params.yaml` file.
+
+DVC automatically starts versioning control for the saved CSV file. You can now
+push the data to remote storage in Clouds.
+
+![Managing prediction datasets with DVC](../uploads/images/2024-01-19/7-artifacts-versioned-with-dvc.png '=600')
+_Managing prediction datasets with DVC_
+
+> 💡 Note: You may find more features in scenarios for
+> [Data Management with DVC](https://dvc.org/doc/user-guide/data-management/remote-storage)
+> in docs.
+
+</aside>
+
+### 3. Run `monitor` pipeline
+
+The monitor pipeline consists of two key stages: `monitor_data` and
+`monitor_model`. These stages are crucial for ensuring your machine learning
+models' ongoing health and performance.
+
+```bash
+$ dvc repro pipelines/monitor/dvc.yaml
+```
+
+Here’s what happens in each stage:
+
+- **monitor_data:**
+  - This stage is responsible for monitoring data quality and detecting any data
+    drifts.
+  - Executes `src/stages/monitor_data.py` with configuration parameters from
+    `pipelines/monitor/params.yaml`.
+  - Produces HTML reports for data drift and data quality, and stores them in a
+    directory named as`reports/{predict.week_start}--${predict.week_end}`.
+- **monitor_model:**
+  - Focuses on monitoring the performance of the model and detecting any target
+    drifts
+  - Executes `src/stages/monitor_model.py` with configuration parameters from
+    `pipelines/monitor/params.yaml`.
+  - Generates HTML reports for model performance and target drift, saved in the
+    specified monitoring reports directory names as
+    `reports/{predict.week_start}--${predict.week_end}`.
+
+![Model Performance and Data Validation reports](../uploads/images/2024-01-19/8-evidently-reports.png '=600')
+_Model Performance and Data Validation reports_
+
+## 📈 Data Validation and Model Monitoring with Evidently
+
+Now, let’s explore how Evidently works internally as a part of an ML model
+monitoring architecture.
+
+### Metrics and Reports
+
+The idea behind Evidently is very simple: it calculates a bunch of metrics and
+organizes them into nice reports. Reports are the most effective way to analyze
+and debug your models and data visually. You may save reports as HTML files,
+JSON snapshots, or export the metrics externally by parsing JSON or Python
+dictionary outputs. This allows you to apply Evidently for multiple validation
+and monitoring scenarios in
+[real-time](https://evidentlyai.com/blog/fastapi-tutorial) and
+[batch-scoring](https://www.evidentlyai.com/blog/batch-ml-monitoring-architecture)
+ML applications:
+
+- save monitoring reports in HTML files and use them to analyze and debug your
+  models and data,
+- get values for specific metrics, and log them to external databases (like
+  PostgreSQL) and dashboarding tools (like Grafana),
+- save monitoring reports (as snapshots) in JSON files over time and run an
+  [Evidently Monitoring Dashboard](https://docs.evidentlyai.com/user-guide/monitoring/monitoring_overview)
+  for continuous monitoring.
+
+![Source: [https://docs.evidentlyai.com/](https://docs.evidentlyai.com/) ](../uploads/images/2024-01-19/9-evidently.png '=600')
+_Source: [https://docs.evidentlyai.com/](https://docs.evidentlyai.com/)_
+
+If you choose to use HTML and JSON files, you need a way to store and version
+them. In the following section of the tutorial, we will explore how DVC can
+assist with this.
+
+### Data Requirements
+
+To calculate metrics monitoring reports with Evidently, you typically need **two
+datasets**:
+
+- **Reference** dataset is a baseline for comparison or an exemplary dataset
+  that helps generate test conditions. This can be training data or earlier
+  production data. (from
+  [docs](https://docs.evidentlyai.com/user-guide/input-data/data-requirements))
+- **Current** dataset is the dataset you want to evaluate. It can include the
+  most recent production data. (from
+  [docs](https://docs.evidentlyai.com/user-guide/input-data/data-requirements))
+
+![Original image: [https://docs.evidentlyai.com/user-guide/input-data/data-requirements](https://docs.evidentlyai.com/user-guide/input-data/data-requirements) ](../uploads/images/2024-01-19/10-evidently-datasets.png '=600')
+_Original image:
+[https://docs.evidentlyai.com/user-guide/input-data/data-requirements](https://docs.evidentlyai.com/user-guide/input-data/data-requirements)_
+
+In this tutorial, the reference dataset is a sample extracted from the training
+dataset. It helps to automatically generate a reference during the training and
+align the version of the reference dataset and a model.
+
+```python
+# src/stages/evaluate.py
+
+reference_data = train_data.sample(frac=0.3)
+```
+
+## 📈 Automate Data and Monitoring Pipelines with DVC
+
+This section will guide you through the design and implementation of monitoring
+pipelines, providing insights for the next improvements and customization.
+
+### Separate DVC pipelines
+
+In the tutorial example, we tried to achieve the following ML system design
+principles:
+
+- **Modular Design**: Each stage of the ML workflow, such as data preparation,
+  model training, and monitoring, is encapsulated in separate DVC pipelines.
+  This modular approach enhances maintainability and scalability.
+- **Pipeline Independence**: These pipelines can be run independently, which
+  allows for flexibility in execution and troubleshooting. In a typical
+  scenario, training, inference, and monitoring pipelines run independently at
+  different time intervals and environments.
+- **Reusability**: By separating the pipelines, you can easily reuse components
+  across different projects or stages of the same project.
+
+As a result, the tutorial example has three pipelines for training, prediction
+inference, and monitoring. DVC allows you to have multiple `dvc.yaml` files to
+configure and run pipelines.
+
+![Pipelines Directory Structure](../uploads/images/2024-01-19/10-pipelines-dir.png '=600')
+_Pipelines Directory Structure_
+
+Let’s explore an excerpt from the `pipelines/monitor/dvc.yaml` to discuss a few
+“advanced” configuration features you may find useful:
+
+```yaml
+vars:
+  - PIPELINE_DIR: pipelines/monitor
+
+stages:
+  monitor_data:
+    cmd: python src/stages/monitor_data.py --config=${PIPELINE_DIR}/params.yaml
+    wdir: ../..
+    params:
+      - ${PIPELINE_DIR}/params.yaml:
+          - predict
+          - monitoring
+    deps:
+      - src/stages/monitor_data.py
+      - ${predict.predictions_dir}/${predict.week_start}--${predict.week_end}.csv
+    outs:
+      - ${monitoring.reports_dir}/${predict.week_start}--${predict.week_end}/${monitoring.data_drift_path}
+      - ${monitoring.reports_dir}/${predict.week_start}--${predict.week_end}/${monitoring.data_quality_path}
+```
+
+- ☝️ **Using `vars`:**
+  - Variables (`vars`) in DVC define values that can be reused across the
+    `dvc.yaml` file. It makes complex `dvc.yaml` files more readable and easier
+    to update.
+  - In this example, `PIPELINE_DIR` is used to specify the pipeline directory in
+    the project repository. You may reference this variable using the
+    [templating](https://dvc.org/doc/user-guide/project-structure/dvcyaml-files#templating)
+    format to insert values like `${PIPELINE_DIR}`.
+- ☝️ **Using `wdir`:**
+  - The `wdir` (working directory) key in `dvc.yaml` sets the directory context
+    for running the commands defined in a stage. Allows you to use relative
+    paths for dependencies (`deps`), outputs (`outs`), and scripts within that
+    directory.
+  - In this example, `wdir: ../..` points to the repository root. So, paths in
+    `deps` and `outs` are easier to read and maintain.
+- ☝️ **Using separate `params.yaml`:**
+  - The `params.yaml` file holds parameters, and DVC allows it to have multiple
+    ones.
+  - This example has separate `params.yaml` file for each pipeline. To let DVC
+    understand which file to use, we specify the full path to the `params.yaml`
+    using the `PIPELINE_DIR` variable.
+
+### Storing monitoring configuration in `params.yaml`
+
+In some monitoring scenarios, you may have parameterized pipelines. Using DVC
+you may find it useful to reuse `params.yaml` file to configure the monitoring
+pipeline. This brings a few benefits:
+
+- **Ease of Modification**: You can quickly adjust the pipeline's behavior by
+  modifying the parameters in this file, such as changing the data source or
+  tuning model parameters.
+- **Version Control for Parameters**: Since `params.yaml` is under Git version
+  control, changes in configurations are tracked by Git, ensuring
+  reproducibility and transparency in your pipeline's evolution.
+
+Let’s explore `pipelines/monitor/params.yaml`
+
+```yaml
+---
+data:
+  predict_data: data/test.csv
+  target_col: cnt
+  prediction_col: prediction
+  numerical_features: ['temp', 'atemp', 'hum', 'windspeed', 'hr', 'weekday']
+  categorical_features: ['season', 'holiday', 'workingday']
+
+predict:
+  model_path: models/model.joblib
+  week_start: '2011-01-29'
+  week_end: '2011-02-04'
+  predictions_dir: data/predictions
+
+monitoring:
+  reports_dir: reports
+  reference_data: data/reference_data.csv
+
+  # for monitor_model
+  model_performance_path: model_performance.html
+  target_drift_path: target_drift.html
+
+  # for monitor_data
+  data_drift_path: data_drift.html
+  data_quality_path: data_quality.html
+```
+
+- ☝️ **List features to be included in monitoring reports:**
+  - `target_col` and `prediction_col` define the names of the target and
+    prediction columns,
+  - `numerical_features` and `categorical_features` define feature names for
+    monitoring purposes. This could be especially beneficial for data monitoring
+    and data drift reports.
+- ☝️ **Parametrized data samples:**
+  - `week_start` and `week_end` define the time frame for which predictions are
+    generated. This example can be modified to support other approaches for data
+    extraction.
+- ☝️ **Specify a reference dataset:**
+  - `reference_data` specifies a path to the reference dataset used in
+    monitoring.
+  - You may have multiple reference datasets and select among them to generate
+    reports.
+- ☝️ **Specify the location to store monitoring artifacts:**
+  - `monitoring` section also specifies the location for monitoring reports.
+  - You may update the reports directory or filenames in a single place. It’s
+    handy!
+
+### Log monitoring metrics with DVCLive and visualize them in VS Code IDE
+
+[DVCLive](https://dvc.org/doc/dvclive) provides a Python API to log metrics,
+plots, models, and other artifacts from code. Metrics and plots saved with
+DVCLive can be automatically visualized in DVC extension for VS Code.
+
+![Metrics in DVC Extension for VS Code](../uploads/images/2024-01-19/11-metrics-vscode.png '=600')
+_Metrics in DVC Extension for VS Code_
+
+Let’s explore an example of the `src/stages/evaluate.py` script to demonstrate
+how DVCLive can help in DVC projects.
+
+```python
+from dvclive import Live
+ ...
+# Build a report
+model_performance_report = Report(metrics=[RegressionPreset()])
+model_performance_report.run(...)
+
+# Extract metrics
+regression_metrics: Dict = model_performance_report.as_dict()['metrics'][0]['result']["current"]
+metric_names = ['r2_score', 'rmse', 'mean_error', 'mean_abs_error', 'mean_abs_perc_error']
+selected_metrics = {k: regression_metrics.get(k) for k in metric_names}
+
+# Save evaluation metrics with DVCLive
+with Live(dir=str(REPORTS_DIR),
+    dvcyaml=f"{pdir}/dvc.yaml",) as live:
+
+    [live.log_metric(k, v, plot=False) for k,v in selected_metrics.items()]
+```
+
+This code snippet demonstrates how to log machine learning model performance
+metrics calculated with Evidently using DVCLive. Here's a breakdown of what it
+does:
+
+1. `model_performance_report` is created using Regression Preset from Evidently.
+2. The `model_performance_report` is executed with `.run(...)`, where the actual
+   model evaluation and metric computation occur.
+3. After `model_performance_report` building completes, you may parse the
+   required metrics. In this example `selected_metrics` contains
+   `['r2_score', 'rmse', 'mean_error', 'mean_abs_error', 'mean_abs_perc_error']`.
+4. Live object context logs `selected_metrics` using `live.log_metrics()`
+   method. There are few important arguments:
+   1. `dir=str(REPORTS_DIR)` instructs DVCLive to save metrics to
+      `reports/train` directory
+   2. `dvcyaml=f"{pdir}/dvc.yaml` instructs DVCLive to use `dvc.yaml` for the
+      `train` stage to add information about metrics files. The full path is
+      `pipelines/train/dvc.yaml` .
+
+> 💡 Note: If you are interested in other scenarios of DVCLive with Evidently
+> integration, check
+> [this integration example](https://dvc.org/doc/user-guide/integrations/evidently)
+
+### Versioning the Reference Dataset and Monitoring Reports
+
+This example shows that DVC allows easily managed reference datasets for
+monitoring purposes, and version monitoring reports themselves.
+
+![Versioning reference datasets with DVC](../uploads/images/2024-01-19/12-versioning.png '=600')
+_Versioning reference datasets with DVC_
+
+There are a few benefits for versioning reference datasets and monitoring
+reports with DVC:
+
+- **Registry of Reference Datasets:** DVC helps store, version, and download
+  datasets for monitoring purposes. You may need to download the reference
+  dataset saved to cloud storage for a monitoring job in the production
+  environment. DVC makes life easier!
+- **Traceability**: This practice ensures traceability, allowing you to link
+  model performance back to specific data versions.
+- **Version Control of Reports**: You may want to manage all monitoring reports
+  with DVC. It ensures a historical record of your model's performance and data
+  quality.
+
+## 🎨 Summing up
+
+The combination of DVC and Evidently in automating data and monitoring pipelines
+offers a structured and efficient approach to ML model management. This setup
+enhances the reproducibility and reliability of your ML workflows and provides a
+clear framework for monitoring and improving your models over time. With this
+setup, you're well-equipped to maintain high-quality ML models responsive to the
+dynamic nature of real-world data.
+
+However, this tutorial covers only a single approach for DVC and Evidently
+integration. We still working on other interesting scenarios and looking for
+community support! Stay tuned!
+
+> 💡 Did you find this tutorial interesting? Please, leave your comments and
+> share your experience with DVC and Evidently! Join us on
+> [Discord](https://discord.com/invite/dvwXA2N) 🙌
+
+## References
+
+- [How to break a model in 20 days. A tutorial on production model analytics](https://www.evidentlyai.com/blog/tutorial-1-model-analytics-in-production)
+- [Turn Your Favorite IDE into a Full Machine Learning Experimentation Platform](https://iterative.ai/blog/turn-vs-code-into-ml-platform)
diff --git a/content/blogs/2024-03-11-dvc-slurm-cluster-exscientia.md b/content/blogs/2024-03-11-dvc-slurm-cluster-exscientia.md
new file mode 100644
index 0000000000..b6a69fdf74
--- /dev/null
+++ b/content/blogs/2024-03-11-dvc-slurm-cluster-exscientia.md
@@ -0,0 +1,344 @@
+---
+title: 'Running DVC on a SLURM cluster'
+date: 2024-03-11
+description: >
+  Learn how Exscientia uses DVC experiments on a cloud-deployed SLURM cluster to
+  scale their ML experimentation.
+descriptionLong: >
+  For many ML projects, there comes a point when local development hits the wall
+  and we need to scale up the underlying compute resources. Maybe the dataset
+  grows too large for your primary workstation or the deep learning model
+  requires several high-end GPUs. This should be a routine transition for ML
+  developers, and one to which they shouldn’t have to give much thought. In this
+  blog post, we’ll explain our approach to remote DVC experiments on a SLURM
+  cluster and share some code to get you started with the same.
+picture: 2024-03-11/dvc-slurm-cluster-exscientia.png
+authors:
+  - dom_miketa
+  - luis_yanes
+tags:
+  - SLURM
+  - HPC
+  - DVC
+  - ML Experiments
+  - Exscientia
+  - Ruff
+  - Black
+  - Mypy
+  - Tutorial
+  - Community
+---
+
+## Introduction
+
+For many ML projects, there comes a point when local development hits the wall
+and we need to scale up the underlying compute resources. Maybe the dataset
+grows too large for your primary workstation or the deep learning model requires
+several high-end GPUs. This should be a routine transition for ML developers,
+and one to which they shouldn’t have to give much thought. In this blog post,
+we’ll explain our approach to remote DVC experiments on a SLURM cluster and
+share some code to get you started.
+
+We work at an AI-driven precision medicine company called
+[Exscientia](https://www.exscientia.ai/). Our goal is to change the way the
+world discovers and develops new medicines. The company is roughly evenly split
+between biologists and chemists on one side and technologists on the other, with
+your two authors belonging to the latter group; Dom is an AI research scientist
+and Luis is an engineer. This context is important to understand why we
+gravitated towards DVC in the first place, and why we scaled it up the way we
+did.
+
+## Why DVC on SLURM?
+
+As demonstrated in
+[research undertaken by the DevOps movement](<https://en.wikipedia.org/wiki/Accelerate_(book)>),
+it’s hard to maintain consistent software delivery without well-designed tooling
+(like CI/CD) and a conducive developer culture (like PRs or working in small
+batches). Our domain is highly specific, but the same principles apply: to move
+fast while maintaining high quality, reliability and reproducibility, we need to
+adopt best DevOps practices. There are only so many hours in a day and you want
+to spend all of them on trying out new ideas and ideally none on setting up
+infrastructure. Good tooling optimises scientists’ efficiency and lets them run
+more experiments, each more thorough and exhaustive than would otherwise have
+been possible – all this while maintaining control over research code bases
+which can, if left unchecked, turn into precarious Jenga towers. Predictable
+code with clear standards also eases collaboration, the lifeblood of science.
+Consequently it’s much more important to pick an arbitrary standard than to
+obsess over any particular detail.
+
+At Exscientia we provide researchers with project templates that automatically
+set up version control and CI/CD as well as QA tooling like Black, Ruff and
+Mypy. To coherently extend this setup to the joint realms of data science and
+ML, we integrated DVC. Our scientists can set up a fresh DVC-enabled repository
+with all the productivity tooling in just a few keystrokes and start
+experimenting right away. And because DVC transparently extends Git, there is
+less tool-induced context switching: users are always dealing with Git in some
+shape or form, rather than Git (for the code) and a database hidden behind a web
+service (for all the rest of it). Less context switching translates to less
+frustration and more flow.
+
+![High quality, reliability, and reproducibility](../uploads/images/2024-03-11/high-quality-reliability-reproducibility.png)
+
+To maintain a frictionless developer experience even as model sizes grow beyond
+the means of the humble laptop, we surveyed the organisation’s entire
+computational estate with a view towards designing an effective developer
+experience. Our platforms must support a number of teams with on-demand Jupyter
+or RStudio instances as well as workflow orchestration engines. We need to run
+large unsupervised jobs, interactive analyses and development sessions across
+many domains and technologies: data processing, ML model training and chemical
+simulations, each with different resource requirements. Finally, submitting a
+large workload should be a smooth and routine experience.
+
+In the end, a cloud-deployed SLURM cluster fit the bill. It can efficiently
+scale compute resources while maintaining a user-friendly interface for job
+submission. As a bonus, many of our users are already familiar with SLURM from
+their past lives in academia. The principal mode of interaction is very simple:
+the user submits a Bash script describing exactly what they want to happen,
+including the exact resources required. SLURM will wait until such resources are
+available and then execute the job as instructed. Thanks to this highly general
+interface, the same computational resource, and its administrators, can support
+very diverse groups of users at the same time, reducing infrastructural
+complexity across the organisation.
+
+## A sample project
+
+We’ll set up a [basic project](https://github.com/Exscientia/rdvc-demo-project)
+for this demo and, to keep with the drug discovery theme, we will be predicting
+solubility of chemical compounds in water using only our recently open-sourced
+framework MolFlux.
+
+The DVC pipeline consists of a featurisation stage, which loads the “ESOL”
+dataset consisting of pairs of molecules and their aqueous solubilities - how
+easily a molecule dissolves in water.
+
+![Stages](../uploads/images/2024-03-11/stages.png)
+
+A few words about molecules and neural networks. Cheminformatics typically
+represents molecules as graphs, with atoms acting as the nodes and chemical
+bonds as the edges. There are several ways to feed molecular data to neural
+networks, each with its own pros and cons. GNNs can act directly on the
+molecular graph. You can also represent the graph as a string (most commonly
+using the SMILES format) and feed it to any sequence model such as a
+transformer.
+
+In this example we’ll use a classic cheminformatics transformation called ECFP,
+or
+[extended connectivity fingerprint](https://pubs.acs.org/doi/10.1021/ci100050t).
+It’s essentially analogous to n-grams in NLP, which track whether a particular
+sequence of tokens appears in a text document. For example, does the 3-letter
+sequence “wea” appear in the Wikipedia article on blazers? Indeed it does, as
+part of “wear”.
+
+Returning to ECFPs defined on molecular graphs, each “n-gram” is an atom and its
+immediate (e.g. 2-hop) neighbourhood. Since the “vocabulary” of all possible
+“n-grams” is finite, we can associate to each molecule a finite bit-vector (of
+the same length as the vocabulary) such that the choice of 0 or 1 indicates
+whether the corresponding “n-gram” is present in the molecule. This bit-vector
+is the ECFP fingerprint. And since it has a constant length, we can feed it into
+a large variety of ML algorithms, such as the MLP in the training stage.
+
+We use DVC to configure and run the pipeline, decoupling the data featurisation
+step (where we convert molecules to ECFPs) from the model training step.
+
+![DVC Stage Spec](../uploads/images/2024-03-11/stages-dvcyaml.png)
+
+DVC pipelines are useful to organise projects. As they are versioned in Git, you
+can reproduce complete workflows and results. Running a new experiment is a
+command away:
+
+```dvc
+$ dvc exp run
+```
+
+This executes and tracks experiments in your repository without polluting it
+with unnecessary Git commits, branches, directories, etc. For more information
+and examples, see the
+[DVC documentation](https://dvc.org/doc/command-reference/exp/run).
+
+It may not be immediately obvious, but our setup is highly modular. Head over to
+`src/rdvc_demo_project/config/main.yaml` to see just an example of configuration
+options we can tweak for each individual experiment. To start a much longer
+training run, execute
+
+```dvc
+$ dvc exp run -S model.config.trainer.max_epochs=100
+```
+
+MolFlux was built to be explicitly config-driven and DVC’s
+[Hydra integration](https://dvc.org/doc/user-guide/experiment-management/hydra-composition)
+exposes all of that flexibility out of the box.
+
+## In the cloud
+
+Now that DVC experiments run on our local machine, we’d like to move them to the
+SLURM cluster. In this second repository, we share the source code to an
+internal tool we call [rDVC](https://github.com/exs-dmiketa/rdvc) (for _remote_
+DVC). It is, by design, a very thin layer around `dvc exp run` and accepts all
+of its options and arguments. But on top of that it also recognises many of
+[`sbatch` arguments and flags](https://slurm.schedmd.com/sbatch.html), allowing
+it to control which computational resource inside the cluster will be used and
+for how long. For a full list of options consult `rdvc run –help`.
+
+Let’s demonstrate how it works.
+
+On its own, DVC knows nothing about your remote cluster, so we’ll need to start
+with a small amount of setup. Make sure you have cloned the sample project repo
+and installed the Python virtual environment using `init_python_venv.sh`. You
+will initialise your local rDVC config with
+
+```cli
+$ rdvc init project
+```
+
+Follow the wizard to set up default options for this project’s remote runs; they
+will be found in `.rdvc/config.toml` inside of the project repository. Depending
+on the cluster’s setup, you may be able to choose the _instance type_ allocated
+to your job. For the demo we have configured the cluster with t3.xlarge,
+g5.xlarge and g5.12xlarge. Our internal version of rDVC supports many more
+instance types and we encourage you to fork rDVC, redefine supported instance
+types and make the tool your own. For this demo, we pick g5.xlarge as the
+default instance as we want access to the GPU. But let’s continue with the demo.
+To point rDVC at your SLURM cluster, we’ll run the global initialisation script
+next:
+
+```cli
+$ rdvc init global
+```
+
+rDVC now knows how to contact SLURM, so let’s finish with configuration of the
+remote server:
+
+```cli
+$ rdvc init remote
+```
+
+Nothing stands between us and a remote GPU-powered experiment! Since rDVC is in
+many ways just a wrapper around `dvc exp run`, we can easily set off a run with
+modified parameters as
+
+```cli
+$ rdvc run -S fabric=gpu
+```
+
+When your run is finished you can pull it to your local repository with
+
+```dvc
+$ dvc exp pull origin
+```
+
+and look at the results.
+
+## Behind the scenes
+
+rDVC compiled a SLURM batch (or “sbatch”) script containing these instructions:
+
+1. Clone the project repo
+
+```bash
+#!/bin/bash
+
+#SBATCH --output=".rdvc/logs/slurm-%j.out"
+#SBATCH --job-name=rdvc-run:rdvc-demo-project:main
+#SBATCH --wckey=rdvc-demo-project
+#SBATCH --mail-type=END,FAIL
+#SBATCH --mail-user=<email.address@domain.com>
+#SBATCH --constraint=t3.xlarge
+#SBATCH --cpus-per-task=2
+#SBATCH --nodes=1
+#SBATCH --exclusive
+# Ensure bashrc is loaded
+source "${HOME}/.bashrc"
+
+# Exit on failure http://redsymbol.net/articles/unofficial-bash-strict-mode/
+set -euxo pipefail
+IFS=$'\n\t'
+
+export RDVC_JOB_REPO_NAME="rdvc-demo-project"
+export RDVC_JOB_REPO_URL="git@github.com:<user>/rdvc-demo-project.git"
+export RDVC_JOB_REPO_BRANCH="main"
+export RDVC_JOB_REPO_REV="<git_hash>"
+
+export RDVC_DIR="${RDVC_DIR:-${HOME}/.rdvc}"
+
+# Prepare a directory for the current job
+export RDVC_JOB_WORKSPACE_DIR="/tmp/rdvc-${SLURM_JOB_ID}"
+mkdir -p "${RDVC_JOB_WORKSPACE_DIR}"
+
+# Ensure cleanup after job finishes, regardless of exit status
+function cleanup_job_dir(){
+  echo "Cleaning up the job directory."
+  rm -rf "${RDVC_JOB_WORKSPACE_DIR}"
+}
+
+trap cleanup_job_dir EXIT
+
+# Create an insulated Git workspace for the current job
+echo "Creating Git workspace."
+export RDVC_JOB_REPO_DIR="${RDVC_JOB_WORKSPACE_DIR}/${RDVC_JOB_REPO_NAME}"
+git clone --branch "${RDVC_JOB_REPO_BRANCH}" "${RDVC_JOB_REPO_URL}" "${RDVC_JOB_REPO_DIR}"
+cd "${RDVC_JOB_REPO_DIR}" || exit
+
+# Ensure the job runs on the same revision as was submitted (even if the branch has moved on in the meantime)
+git checkout "${RDVC_JOB_REPO_REV}"
+```
+
+2. Install the Python virtual environment with `init_python_venv.sh`
+
+```bash
+# Install Python environment
+echo "Install Python environment."
+./init_python_venv.sh
+
+echo "Activate Python environment."
+source ./.venv/bin/activate
+
+# Setup links for the DVC cache shared among jobs and projects
+dvc config --local cache.type hardlink,symlink,copy
+
+# Push results of experiments even if job fails
+function cleanup_dvc(){
+	if [ "$1" != "0" ]; then
+    	# Push cache of all runs, including failed
+    	echo "Job failed. Pushing run cache."
+    	dvc push --run-cache
+	else
+    	echo "Job successfully finished."
+	fi
+
+	deactivate
+	cleanup_job_dir
+}
+
+trap 'cleanup_dvc $?' EXIT
+```
+
+3. Execute dvc exp run -S fabric=gpu
+
+```bash
+export RDVC_JOB_EXP_RUN_OPTIONS_STRING="-S fabric=gpu"
+echo "Executing DVC experiment."
+eval "dvc exp run --pull --allow-missing ${RDVC_JOB_EXP_RUN_OPTIONS_STRING}"
+```
+
+4. Push the experiment to the remote
+
+```bash
+# Push experiment to the remote and update the repository
+echo "Pushing DVC experiment to Git and DVC remotes."
+dvc exp push $RDVC_JOB_REPO_URL
+```
+
+This script is submitted to the cluster over SSH. You can view it in
+`~/.rdvc/submissions`.
+
+And that’s it! It’s so simple you could do it manually in an interactive SLURM
+session - and that happens to be a good way to debug issues. If your job fails,
+first consult its log over at `~/.rdvc/logs` and then try to reproduce the
+submission script from an interactive session.
+
+## Conclusion
+
+We shared two repositories: a simple DVC project and a tool for remote execution
+on SLURM clusters. The latter is universal - it knows nothing about the
+project! - and easily hackable. We highly recommend to fork and customise it to
+your team’s needs.
diff --git a/content/blogs/2024-03-12-dvc-ray.md b/content/blogs/2024-03-12-dvc-ray.md
new file mode 100644
index 0000000000..74dd7db1b7
--- /dev/null
+++ b/content/blogs/2024-03-12-dvc-ray.md
@@ -0,0 +1,743 @@
+---
+title:
+  'Tutorial: Scalable and Distributed ML Workflows with DVC and Ray (Part 1)'
+date: 2024-03-12
+description: >
+  This tutorial introduces you to integrating DVC (Data Version Control)
+  with  Ray, turning them into your go-to toolkit for creating automated,
+  scalable, and distributed ML pipelines.
+descriptionLong: >
+  Training Models at scale require advanced tools that manage complexity while
+  ensuring efficiency. This tutorial introduces you to integrating
+  [DVC](https://dvc.org/) with  [Ray](https://www.ray.io/), turning them into
+  your go-to toolkit for creating automated, scalable, and distributed ML
+  pipelines.
+picture: 2024-03-12/1-cover-dvc-ray.png
+pictureComment:
+  Ray + DVC integration [example](https://github.com/iterative/ray-dvc)
+authors:
+  - mikhail_rozhkov
+  - dave_berenbaum
+tags:
+  - Open Source
+  - Distributed Computing
+  - Machine Learning
+  - Automation
+  - MLOps
+  - Ray.io
+  - DVC
+  - Tutorial
+---
+
+Training models at the scale of the Gemini or GPT-4 models requires advanced
+tools that manage complexity while ensuring efficiency. This tutorial explores
+how Data Version Control (DVC) can be a game-changer for ambitious projects. DVC
+simplifies AI development by automating pipelines, managing versions, and
+tracking experiments while embracing GitOps for reproducibility. It excels in
+both local and cloud environments for traditional ML workflows. However, the
+rise of Generative AI and complex deep learning projects demands scalable,
+distributed training solutions.
+
+This tutorial is divided into two parts. Part 1 sets the foundation for scalable
+and efficient machine learning workflows by leveraging Ray’s distributed
+computing capabilities and DVC’s data version control.
+
+In [Part 2](https://dvc.ai/blog/dvc-ray-part-2), we extend the solution to a Ray
+Cluster on AWS, demonstrating how to adapt the setup for cloud-based distributed
+computing. This involves configuring AWS resources, deploying Ray clusters in
+the cloud, and running DVC-managed pipelines at scale.
+
+> This guide is tailored for ML Engineers and Team Leads in AI projects who aim
+> to speed up training, optimize resources, and ensure reproducibility across
+> distributed environments. I am looking forward to hearing your feedback and
+> improvements! 🙌
+
+> We would like to express our gratitude to
+> [Andreas Schuh](https://www.linkedin.com/in/schuh/) from
+> [HeartFlow](https://www.heartflow.com/) for his contribution to this solution
+> and for providing ideas and feedback for the blog posts. 🤝
+
+<details>
+<summary>Table Of Contents</summary>
+
+- [Why DVC and Ray?](#why-dvc-and-ray)
+- [Tutorial Scope](#tutorial-scope)
+  - [High-level solution design](#high-level-solution-design)
+  - [Prerequisites](#prerequisites)
+- [👩‍💻 Installation](#-installation)
+- [⭐ Get Started with Ray](#-get-started-with-ray)
+  - [1 - Overview of the Ray Framework](#1---overview-of-the-ray-framework)
+  - [2 - Start a Ray Cluster](#2---start-a-ray-cluster)
+  - [3 - Run a test script on the Ray Cluster](#3---run-a-test-script-on-the-ray-cluster)
+- [🏃‍♂️ Run DVC Pipeline on a Ray Cluster](#️-run-dvc-pipeline-on-a-ray-cluster)
+  - [1 - Design Solution for DVC + Ray](#1---design-solution-for-dvc--ray)
+  - [2 - Create a DVC pipeline](#2---create-a-dvc-pipeline)
+    - [Tune Stage](#tune-stage)
+    - [Train Stage](#train-stage)
+  - [3 - Run DVC pipelines on Ray Cluster](#3---run-dvc-pipelines-on-ray-cluster)
+- [💬 Discuss the Solution Design](#-discuss-the-solution-design)
+  - [☝️ Use DVC to run scripts calling Ray API](#️-use-dvc-to-run-scripts-calling-ray-api)
+  - [☝️ Persist DVC stage outputs to keep them available for downstream stages in case of failure](#️-persist-dvc-stage-outputs-to-keep-them-available-for-downstream-stages-in-case-of-failure)
+  - [☝️ **Use DVCLive to track live metrics updates with DVC Studio and DVC Extension for VS Code**](#️-use-dvclive-to-track-live-metrics-updates-with-dvc-studio-and-dvc-extension-for-vs-code)
+  - [☝️ Propagate DVC environment variables to Worker nodes](#️-propagate-dvc-environment-variables-to-worker-nodes)
+  - [☝️ Copy the `model.pth` file from the Ray Trial folder to the DVC project repository](#️-copy-the-modelpth-file-from-the-ray-trial-folder-to-the-dvc-project-repository)
+- [🎨 Summing Up: DVC + Ray Integration](#-summing-up-dvc--ray-integration)
+  - [Key Takeaways](#key-takeaways)
+  - [Looking Ahead to Part 2](#looking-ahead-to-part-2)
+- [References](#references)
+
+</details>
+
+## Why DVC and Ray?
+
+[DVC](https://dvc.org/) is an open-source tool that brings GitOps and
+reproducibility to data management, ML experiments, and model development. It
+connects versioned data sources and code with pipelines, tracks experiments, and
+registers models — all based on GitOps principles.
+
+[Ray](https://www.ray.io/) is an open-source unified computing framework that
+makes scaling AI and Python workloads easy — from reinforcement learning to deep
+learning to tuning and model serving. Ray makes it a breeze to scale your
+compute-intensive tasks from a single machine to a massive cluster without
+losing your mind.
+
+![DVC + Ray for distributed ML](../uploads/images/2024-03-12/2-dvc-ray-distributed-ml.png '=600')
+
+DVC and Ray make your ML projects more manageable and prepare them to tackle the
+challenges of tomorrow’s AI-driven landscape. Let’s explore this dynamic duo and
+unlock new potentials in your MLOps journey!
+
+> 💡 **Want to learn more about DVC?**
+>
+> Join our online course about DVC:
+> [Iterative Tools for Data Scientists & Analysts course](https://learn.iterative.ai/)!
+
+## Tutorial Scope
+
+This tutorial will guide users through creating automated, scalable, and
+distributed ML pipelines using DVC (Data Version Control) and Ray. We start with
+configuring the Ray Cluster for local and cloud environments. Then, we discuss
+the challenges of running DVC in distributed environments. Then, we’ll run a few
+examples of using DVC and Ray. By the end of the tutorial, you will be able to
+design, run, and manage ML pipelines distributed over multiple nodes and
+trackable through version control.
+
+For **DVC users**, this tutorial offers several advantages:
+
+- Bring Distributed Computing Efficiency to DVC projects
+- Easy use of AWS Cloud for Development and Production workflows
+- Enable automated pipelines and data versioning in ML projects with Ray
+
+For **Ray users**, this tutorial aims to highlight the benefits of integrating
+DVC:
+
+- Enhance Model Training Reproducibility with DVC’s data versioning capabilities
+- Streamline ML Pipeline Management through DVC’s structured approach
+- Facilitate Efficient Collaboration among teams by leveraging DVC for shared
+  data and model management
+
+### High-level solution design
+
+Let’s overview the high-level design of our target solution with DVC and Ray.
+
+1. Users can manage Ray Cluster and run DVC pipelines from a “local”
+   environment.
+2. Ray distributes workloads across multiple workers and can auto-scale cluster
+   nodes.
+3. During the training, DVCLive logs live updates of metrics and parameters to
+   DVC Studio.
+4. DVC utilizes S3 to sync states between a Worker and Head nodes.
+5. DVC uses remote storage (AWS S3) to manage data and model artifacts.
+6. Users commit the results of the experiment to Git and DVC Remote Storage.
+
+![Solution Design](../uploads/images/2024-03-12/3-solution-design.png '=600')
+_High-Level Solution Design_
+
+### Prerequisites
+
+We expect that you:
+
+- Have some experience with Machine Learning or Data Engineering pipelines
+- Are familiar with DVC
+
+To follow this tutorial, you’ll need the following tools:
+
+- Git
+- Python 3.11 or above
+- AWS CLI (if you want to run pipelines in AWS)
+
+## 👩‍💻 Installation
+
+Creating an ML pipeline that runs distributed tasks is a powerful way to manage
+and scale your machine learning workflows. With DVC, we can efficiently
+orchestrate our pipeline stages and handle experiment outputs.
+
+To clone the example repository, you can follow these steps:
+
+```bash
+git clone https://github.com/iterative/tutorial-mnist-dvc-ray.git
+cd tutorial-mnist-dvc-ray
+```
+
+Install Python dependencies:
+
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+export PYTHONPATH=$PWD
+```
+
+## ⭐ Get Started with Ray
+
+### 1 - Overview of the Ray Framework
+
+[Ray](https://docs.ray.io/en/latest/ray-overview/index.html) is a framework for
+scaling AI and Python applications. For AI and ML applications, Ray helps to
+scale jobs without needing infrastructure expertise:
+
+- Efficiently parallelize and distribute ML workloads across multiple nodes and
+  GPUs.
+- Leverage the ML ecosystem with native and extensible integrations.
+
+![Stack of Ray libraries - a unified toolkit for ML workloads](../uploads/images/2024-03-12/4-ray-stack.png '=600')
+_Stack of Ray libraries - A Unified Toolkit For ML Workloads
+([Ray Docs](https://docs.ray.io/en/latest/ray-overview/index.html))_
+
+In this tutorial, we work with Ray Clusters and Ray AI Libraries (Ray Tune and
+Ray Train).
+[Ray Cluster](https://docs.ray.io/en/latest/cluster/getting-started.html) is a
+set of
+[Worker nodes](https://docs.ray.io/en/latest/cluster/key-concepts.html#cluster-worker-nodes)
+connected to a common Ray
+[Head node](https://docs.ray.io/en/latest/cluster/key-concepts.html#cluster-head-node).
+
+- The Head node serves as the central coordination point for the Ray cluster. It
+  manages the cluster’s metadata, maintains the cluster state, and handles task
+  scheduling and management.
+- Worker nodes are the computational workhorses of the Ray cluster. They are
+  responsible for executing tasks and running computations for applications.
+
+![Two nodes Ray Cluster](https://docs.ray.io/en/latest/_images/ray-cluster.svg)
+_A Ray cluster with two worker nodes. Each node runs Ray helper processes to
+facilitate distributed scheduling and memory management. The head node runs
+additional control processes (highlighted in blue). Source:
+[Ray Docs](https://docs.ray.io/en/latest/cluster/key-concepts.html#head-node)_
+
+Ray clusters can be fixed-size or autoscale up and down according to the
+resources requested by applications running on the cluster.
+
+[Ray Tune](https://docs.ray.io/en/latest/tune/index.html) is a Python Library
+that automates the hyperparameter tuning process across distributed resources.
+By integrating Ray Tune into the experiment workflow, we can evaluate numerous
+hyperparameter combinations in parallel, speeding up the search for optimal
+model configurations.
+
+![Distributed tuning with Ray](../uploads/images/2024-03-12/5-ray-tune.png '=600')
+_Distributed tuning with distributed training per trial. Source:
+[Ray Docs](https://docs.ray.io/en/latest/ray-overview/use-cases.html)_
+
+[Ray Train](https://docs.ray.io/en/latest/train/train.html) creates a setup to
+scale model training code from a single machine to a cluster of machines in the
+cloud and abstracts away the complexities of distributed computing. At a high
+level of abstraction, it distributes and runs training jobs among worker nodes.
+
+![Ray Train Overview](../uploads/images/2024-03-12/6-ray-train-overview.png '=600')
+_Ray Train Overview. Source:
+[Ray Docs](https://docs.ray.io/en/latest/train/overview.html)_
+
+### 2 - Start a Ray Cluster
+
+> 💡 Navigate to the `main` branch in the repository
+
+To start a Ray Cluster, first initiate the Ray head node. The head node is the
+primary node in the Ray cluster that manages the worker nodes. Since this is a
+local setup, your machine will act as both the Head and Worker nodes. Use the
+following command:
+
+```bash
+ray start --head
+```
+
+This command starts the Ray cluster with your machine acting as the head node.
+
+To monitor and debug Ray, view the dashboard at
+[http://127.0.0.1:8265/](http://127.0.0.1:8265/).
+
+![Ray Dashboard](../uploads/images/2024-03-12/7-ray-dashboard.png '=600') _Ray
+Dashboard - Cluster Nodes_
+
+> 💡 Multi-node Ray clusters are only supported on Linux. You may deploy Windows
+> and OSX clusters for development by setting the environment
+> variable `RAY_ENABLE_WINDOWS_OR_OSX_CLUSTER=1`. Source:
+> [Ray Clusters Overview](https://docs.ray.io/en/latest/cluster/getting-started.html).
+
+### 3 - Run a test script on the Ray Cluster
+
+You can run a simple test script to ensure your local Ray cluster works
+correctly. In your project directory, create a file named **`hello_cluster.py`**
+inside the **`src/test_scripts`** directory. Add a script to connect to the Ray
+cluster and print a message. Here’s an example script:
+
+```python
+import ray
+
+@ray.remote
+def hello_world():
+    return “Hello Ray cluster”
+
+# Automatically connect to the running Ray cluster.
+ray.init()
+print(ray.get(hello_world.remote()))
+```
+
+Execute the script using Python. Open your terminal and run:
+
+```bash
+python src/test_scripts/hello_cluster.py
+```
+
+You should see an output similar to this:
+
+```bash
+2023-11-14 12:11:17,363 INFO worker.py:1489 -- Connecting to existing Ray cluster at address: 192.168.100.19:6379...
+2023-11-14 12:11:17,370 INFO worker.py:1664 -- Connected to Ray cluster. View the dashboard at http://127.0.0.1:8265
+Hello Ray cluster
+```
+
+This output indicates that your script has successfully connected to the local
+Ray cluster and executed the print statement.
+
+## 🏃‍♂️ Run DVC Pipeline on a Ray Cluster
+
+You have a single-node Ray Cluster at this step on your local machine. Let’s
+start with the DVC pipeline setup.
+
+Goals for this section:
+
+- Design a Solution for DVC + Ray.
+- Create a DVC pipeline with two stages: tune and train.
+- Modify DVCLive to sync metrics and parameters with DVC Studio.
+
+### 1 - Design Solution for DVC + Ray
+
+The technical design calls for a structure where ML experiment scripts, managed
+by DVC, invoke Ray for their computation needs. DVC is the orchestrator,
+invoking the appropriate Ray functions for distributed processing.
+
+![Design POC Solution for DVC + Ray (local)](../uploads/images/2024-03-12/8-solution-design-local.png '=600')
+_Design POC Solution for DVC + Ray (local)_
+
+This diagram outlines the integration of DVC (Data Version Control) with a Ray
+cluster for running ML experiments in a distributed manner:
+
+1. DVC initiates the process by running a stage script. The `dvc.yaml` pipeline
+   definition is the blueprint for the ML workflow, defining stages that utilize
+   Ray for hyperparameter tuning and subsequent training stages.
+2. Ray Job Submission: The stage script (e.g., `src/stages/tune.py`) starts a
+   Ray application that submits computation jobs to Ray. The
+   `src/stages/tune.py` script utilizes Ray Tune’s `Tuner` class to define and
+   run the hyperparameter tuning trials.
+3. Ray Cluster contains a single Head Node where the actual computation occurs.
+   (Note: In the production cluster, Ray runs the jobs distributed across
+   multiple worker nodes). Ray saves results for each job (trial) to a local
+   directory in a worker node (outside the DVC project repo).
+4. After all jobs complete, the stage script retrieves results from Ray’s trial
+   directories to the DVC project repo (if needed).
+5. DVC manages the outputs of the pipeline, ensuring reproducibility and
+   traceability.
+
+The result is a robust framework for conducting and managing ML experiments that
+are scalable, reproducible, and efficiently optimized. This framework not only
+streamlines the experimentation process but also simplifies the transition of
+models from development to production.
+
+### 2 - Create a DVC pipeline
+
+In this tutorial, the `dvc.yaml` file contains only two stages in the ML
+pipeline: `tune` and `train`.
+
+![DVC pipeline](../uploads/images/2024-03-12/9-dvc-pipeline.png '=600') _DVC
+pipeline configuration in `dvc.yaml` with `tune` and `train` stages, and `plots`
+sections_
+
+#### Tune Stage
+
+This initial stage is responsible for hyperparameter tuning. It uses Ray to
+distribute the computation involved in this process. The stage executes a Python
+script `tune.py` that optimizes hyperparameters using the Ray Tune. The output
+of this stage is `best_params.yaml`, which contains the best hyperparameters
+found during the tuning process.
+
+```yaml
+tune:
+  cmd: python src/stages/tune.py --config params.yaml
+  params:
+    - tune
+  outs:
+    - ${tune.results_dir}/best_params.yaml:
+        cache: false
+        persist: true
+```
+
+Use two specific configuration parameters for the `best_params.yaml` output:
+
+- Set `cache: false` to instruct DVC not to cache the file but version it with
+  Git.
+- Set `persist: true` to instruct DVC not to remove the file before reproducing
+  the stage. It’s useful for stage dependencies when you work in an unstable
+  environment (or debugging), and the stage script can fail for any reason. In
+  this example, even if the `tune` stage fails, you can run the `train` stage
+  using `best_params.yaml` from the previous run.
+
+#### Train Stage
+
+The Train Stage runs distributed computation via Ray. This stage depends on
+`best_params.yaml` generated by the `tune` stage to access the optimal
+hyperparameters for training the model. The `train` stage is invoked by the
+`train.py` script, which will train the model based on the tuned parameters.
+
+```yaml
+train:
+  cmd: python src/stages/train.py --config params.yaml
+  params:
+    - train
+  deps:
+    - ${tune.results_dir}/best_params.yaml
+  outs:
+    - ${train.results_dir}/model.pth
+```
+
+The trained model is saved as `model.pth`, with the path again parameterized to
+allow flexibility in the output location. The output model is automatically
+cached and versioned with DVC.
+
+### 3 - Run DVC pipelines on Ray Cluster
+
+To execute your automated and distributed ML pipeline with DVC, perform the
+following steps:
+
+- Set the PYTHONPATH environment variable to ensure Python scripts can access
+  modules within your project’s directory by setting the `PYTHONPATH`
+  environment variable.
+- Run DVC pipeline with `dvc exp run` command.
+
+```bash
+export PYTHONPATH=$PWD
+dvc exp run
+```
+
+This will start the pipeline, running the `tune` and `train` stages as defined
+in your `dvc.yaml` file, utilizing distributed computation with Ray.
+
+You may see live updates of metrics and plots in
+[DVC Studio](https://studio.datachain.ai/) and
+[DVC Extension for VS Code](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc).
+DVC can generate and render plots based on your project’s data. Metrics and
+plots logged with DVCLive can be visualized in DVC Studio and DVC Extension for
+VS Code.
+
+A few benefits of tracking and visualizing metrics and plots with DVC
+([see docs](https://dvc.org/doc/user-guide/experiment-management/visualizing-plots)):
+
+- Enhanced Experiment Tracking: Compare metrics, parameters, version of data,
+  and plots between experiments in a live mode (docs:
+  [Visualize and Compare experiments](https://dvc.org/doc/studio/user-guide/experiments/visualize-and-compare)).
+- Customize Visualization: Define visualization template, select data to be
+  visualized and titles interactively, before or after the experiment is
+  complete (docs:
+  [Defining plots](https://dvc.org/doc/user-guide/experiment-management/visualizing-plots#defining-plots)).
+- Share & Version Control for Metrics: You can
+  send [live metrics and plots](https://dvc.org/doc/user-guide/experiment-management/sharing-experiments#live-metrics-and-plots) to [DVC Studio](https://studio.datachain.ai/), [push](https://dvc.org/doc/user-guide/experiment-management/sharing-experiments#push-experiments) completed experiments (including
+  data, models, and code), and convert an experiment into
+  a [persistent](https://dvc.org/doc/user-guide/experiment-management/sharing-experiments#persist-experiment) branch
+  or commit in your Git repo (docs
+  [Sharing Experiments](https://dvc.org/doc/user-guide/experiment-management/sharing-experiments)).
+
+![Experiment tracking](../uploads/images/2024-03-12/11-experiment-tracking.png '=600')
+_Experiment tracking with [DVC Studio](https://studio.datachain.ai/) and
+[DVC Extension for VS Code](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc)_
+
+> 💡 Note: Sometimes, when you run `dvc exp run` with a local Ray Cluster, the
+> process may get stuck with
+> `Connecting to existing Ray cluster at address: 192.168.100.19:6379...`
+> message due to a `ConnectionError` in Ray. In this case, open a new terminal
+> session, export `PYTHONPATH`, and run the `dvc exp run` command there.
+
+## 💬 Discuss the Solution Design
+
+This section above explains a simple example of running DVC and Ray together.
+It’s not a production setup. But it’s a good start for developing and debugging
+the DVC pipeline with Ray.
+
+Let’s think about what decisions we made and discuss some details:
+
+1. Use DVC to run scripts calling Ray API.
+2. Persist DVC stage outputs to keep them available for downstream stages in
+   case of failure.
+3. Use DVCLive to track metrics only on a worker with a rank of 0.
+4. Propagate DVC environment variables to a worker node using TorchTrainer
+   `train_loop_config`.
+5. Copy the `model.pth` file from the Ray Trial folder to the DVC project
+   repository.
+
+### ☝️ Use DVC to run scripts calling Ray API
+
+Ray framework provides a rich Python API for distributed data processing, model
+tuning, and training. Wrapping Ray scripts into callable Python modules
+simplifies using DVC. Therefore, you get two benefits:
+
+- Get scalability and distributed training with Ray
+- Get reproducibility and versioning with DVC
+
+A template of the `dvc.yaml` for DVC + Ray:
+
+```yaml
+stages:
+
+  first_stage:
+    cmd: python first_script_with_ray.py
+    ...
+
+  next_stage:
+    cmd: python second_script_with_ray.py
+    ...
+```
+
+### ☝️ Persist DVC stage outputs to keep them available for downstream stages in case of failure
+
+Set `persist: true` to instruct DVC not to remove the file before reproducing
+the stage. It’s useful for stage dependencies when you work in an unstable
+environment (or debugging), and the stage script might fail.
+
+```yaml
+stages:
+  first_stage:
+    cmd: python first_script_with_ray.py
+    outs:
+      - stage_output.file:
+          persist: true
+```
+
+### ☝️ **Use DVCLive to track live metrics updates with DVC Studio and [DVC Extension for VS Code](https://marketplace.visualstudio.com/items?itemName=Iterative.dvc)**
+
+Ray Train lets you use native experiment tracking libraries inside
+the [train_func](https://docs.ray.io/en/latest/train/overview.html#train-overview-training-function) function.
+[DVCLive](https://dvc.org/doc/dvclive) is a highly flexible and lightweight
+library that simplifies experiment tracking in DVC projects.
+
+```python
+from dvclive import Live
+
+with Live() as live:
+    live.log_metric(metric_name, value)
+```
+
+This solution uses log metrics with `Live()` inside
+`the train_func_per_worker()` function.
+
+One significant distinction between distributed and non-distributed training
+lies in the parallel execution of multiple processes in distributed training
+setups, which may yield identical results under specific configurations. When
+all processes communicate results to the tracking backend, there’s a risk of
+receiving duplicate entries (check
+[Ray docs](https://docs.ray.io/en/latest/train/user-guides/experiment-tracking.html)
+for details).
+
+Therefore, a few adjustments should be made to DVCLive.
+
+1. Use DVCLive to track metrics only on a worker with a rank of 0.
+2. Use the `DVC_ROOT` variable to create the `Live(dir=...)` object. DVC
+   automatically sets the value for the `DVC_ROOT` variable to the directory of
+   your DVC repository and ensures Ray writes metrics inside the repo
+   ([docs](https://dvc.org/doc/user-guide/env)).
+
+As a result, the DVCLive usage code inside the `train_func_per_worker()`
+function looks like the example below.
+
+```python
+# train.py
+
+def train_func_per_worker(config: Dict):
+
+    # Initialize DVC Live
+    live = None
+    rank = ray.train.get_context().get_world_rank()
+
+    # Create a Live object on the rank 0 worker
+    if rank == 0:
+      live = Live(
+          dir=os.path.join(os.environ.get("DVC_ROOT",""), "results/dvclive"),
+      )
+
+    for epoch in range(epochs):
+      # ...epoch training
+
+      # Log metrics with print()
+      if live:
+          live.log_metric("loss", test_loss)
+          live.log_metric("accuracy", accuracy)
+          live.next_step()
+```
+
+Utilizing DVCLive in Python code for logging metrics and plots automatically
+generates the necessary configurations for plots within the dvc.yaml file. Below
+is an example configuration for metrics and plots:
+
+```yaml
+metrics:
+  - results/dvclive/metrics.json
+plots:
+  - accuracy:
+      x: step
+      y:
+        results/dvclive/plots/metrics/accuracy.tsv: accuracy
+      title: Accuracy
+      x_label: Step
+      y_label: Accuracy
+  - loss:
+      template: simple
+      x: step
+      y:
+        results/dvclive/plots/metrics/loss.tsv: loss
+      title: Loss
+      x_label: Step
+      y_label: Accuracy
+  - results/tune/plots/images
+```
+
+The train stage logs metrics and plots to results/dvclive. Datapoints for
+metrics and plots are saved in files and visualized later in DVC Studio and VS
+Code.  
+![Metrics and plot](../uploads/images/2024-03-12/10-2-train-metrics.png '=600') _Metrics
+plot generated by the `tune` stage_
+
+The tune stage logs a mean_accuracy_plot.png file to visualize metrics for
+tuning trials.
+
+![Metrics plot](../uploads/images/2024-03-12/10-tune-metrics.png '=600')
+_Metrics plot generated by the `tune` stage_
+
+### ☝️ Propagate DVC environment variables to Worker nodes
+
+DVC environment variables are necessary for every Ray worker because they
+provide essential information and configurations for DVCLive, facilitating
+experiment tracking. These variables include:
+
+1. **DVC_STUDIO_REPO_URL**: Repository URL where DVC stores versioned data.
+2. **DVC_STUDIO_TOKEN**: Authentication token for secure access to DVC Studio.
+3. **DVC_STUDIO_URL**: Web interface URL for managing DVC projects.
+4. **DVC_EXP_BASELINE_REV**: Baseline revision for comparing experiment results.
+5. **DVC_EXP_NAME**: Descriptive identifier for the experiment.
+6. **DVC_ROOT**: Root directory of the DVC project on the filesystem.
+
+> 💡 Note: All environment variables above are set by DVC automatically when
+> running a pipeline.
+
+You don’t need to care about DVC environment variables when running DVC in a
+non-distributed environment. However, running it in Ray Cluster requires setting
+up on every worker. In this solution, DVC environment variables are passed via
+[RuntimeEnv](https://docs.ray.io/en/latest/ray-core/api/doc/ray.runtime_env.RuntimeEnv.html#ray.runtime_env.RuntimeEnv)
+to specify a runtime environment for the whole job.
+
+![Set up Environment Variables](../uploads/images/2024-03-12/12-env-vars.png '=600')
+_Set up DVC Environment Variables_
+
+The code snippet below demonstrates an approach to managing DVC environment
+variables within a TorchTrainer setup.
+
+```python
+def train_func_per_worker(config: Dict):
+    #...
+    if rank == 0:
+        live = Live(
+            dir=os.path.join(os.environ.get("DVC_ROOT",""), "results/dvclive"),
+        )
+
+def train(params: dict) -> None:
+    #...
+    trainer = TorchTrainer(
+        train_loop_per_worker=train_func_per_worker,
+        train_loop_config=train_config,
+        ...
+    )
+
+if __name__ == "__main__":
+    #...
+    # [1] Propogate DVC environment variables from Head Node to Workers
+    # =============================================
+    DVC_ENV_VARS = {k: v for k, v in os.environ.items() if  k.startswith("DVC")}
+    ray.init(runtime_env=RuntimeEnv(env_vars=DVC_ENV_VARS))
+
+    train(params)
+```
+
+- To ensure that DVC environment variables are accessible within the training
+  loop across all worker nodes, `RuntimeEnv` propagates these variables from the
+  head node to the workers.
+
+### ☝️ Copy the `model.pth` file from the Ray Trial folder to the DVC project repository
+
+Upon completing the training process, the `model.pth` file is saved in the Ray
+Trial folder. Therefore, it’s copied to the DVC project repository (as shown in
+the code example above).
+
+This ensures that the trained model file is appropriately stored within the
+DVC-managed project structure, facilitating version control and reproducibility.
+
+## 🎨 Summing Up: DVC + Ray Integration
+
+The DVC + Ray integration presents a comprehensive solution to the challenges of
+running machine learning experiments at scale. By addressing specific issues
+related to auto-scaling, execution optimization, live metrics tracking, and data
+synchronization, this setup ensures that machine learning teams can focus on
+innovation and experimentation backed by a robust, scalable, and efficient
+infrastructure.
+
+In Part 1 of the tutorial, we explored the basics of setting up and integrating
+DVC with Ray for distributed machine learning workflows. We covered the
+following key topics:
+
+- **Introduction to Ray**: We discussed Ray’s capabilities for scaling AI and
+  Python applications, focusing on its ability to parallelize and distribute ML
+  workloads across multiple nodes easily.
+- **Ray Clusters**: The architecture of Ray clusters was explained, highlighting
+  the roles of head and worker nodes in managing and executing tasks.
+- **Ray Tune and Ray Train**: We introduced Ray Tune for hyperparameter
+  optimization and Ray Train for scaling model training code, emphasizing their
+  integration into ML workflows.
+- **Local Ray Cluster Setup**: Step-by-step instructions were provided for
+  starting a Ray Cluster locally, showcasing how to test the setup with a simple
+  script.
+
+### Key Takeaways
+
+The key takeaway from Part 1 is the foundation it sets for scalable and
+efficient machine learning workflows. By leveraging Ray’s distributed computing
+capabilities and DVC’s data version control, we establish a robust framework for
+managing complex ML experiments. This combination enhances scalability,
+reproducibility, and collaboration in ML projects.
+
+### Looking Ahead to Part 2
+
+In Part 2 of the tutorial, we will extend the solution to a Ray Cluster on AWS,
+demonstrating how to adapt the setup for cloud-based distributed computing. This
+will involve configuring AWS resources, deploying Ray clusters in the cloud, and
+running DVC-managed pipelines at scale. The focus will shift towards managing
+the increased complexity and leveraging cloud infrastructure to maximize the
+efficiency and performance of ML experiments.
+
+Stay tuned for detailed instructions on deploying and managing cloud-based Ray
+clusters with DVC as we take the scalability and efficiency of ML workflows to
+the next level.
+
+> 💡 Did you find this tutorial interesting? Please leave your comments and
+> share your experience with DVC and Ray! Join us on
+> [Discord](https://discord.com/invite/dvwXA2N) 🙌
+
+## References
+
+- [DVC Studio: Explore ML Experiments](https://dvc.org/doc/studio/user-guide/experiments/explore-ml-experiments)
+- [Ray docs: Getting Started](https://docs.ray.io/en/latest/ray-overview/getting-started.html)
+- [How Ray solves common production challenges for Generative AI infrastructure](https://www.anyscale.com/blog/ray-common-production-challenges-for-generative-ai-infrastructure)
+- [Building a Modern Machine Learning Platform with Ray](https://medium.com/samsara-engineering/building-a-modern-machine-learning-platform-with-ray-eb0271f9cbcf)
diff --git a/content/blogs/2024-03-13-dvc-ray-part-2.md b/content/blogs/2024-03-13-dvc-ray-part-2.md
new file mode 100644
index 0000000000..52ca831653
--- /dev/null
+++ b/content/blogs/2024-03-13-dvc-ray-part-2.md
@@ -0,0 +1,688 @@
+---
+title:
+  'Tutorial: Scalable and Distributed ML Workflows with DVC and Ray on AWS (Part
+  2)'
+date: 2024-03-13
+description: >
+  Need to setup DVC to work with Ray Cluster on AWS? This tutorial  has you
+  covered!
+descriptionLong: >
+  In part 2 of the tutorial on DVC with Ray.io, you will learn how to set up a
+  Ray Cluster on AWS to run cloud-based distributed computing with focus on
+  managing increased complexity and leveraging cloud infrastructure to maximize
+  the efficiency and performance of your ML experiments.
+
+picture: 2024-03-13/1-cover-dvc-ray-part2.png
+pictureComment:
+  Ray + DVC integration [example](https://github.com/iterative/ray-dvc)
+authors:
+  - mikhail_rozhkov
+  - dave_berenbaum
+tags:
+  - Open Source
+  - Distributed Computing
+  - Machine Learning
+  - Automation
+  - MLOps
+  - Amazon Web Services
+  - Ray.io
+  - DVC
+  - Tutorial
+---
+
+In [Part 1](https://dvc.ai/blog/dvc-ray) of the tutorial, we explored the basics
+of setting up and integrating DVC with Ray for distributed machine learning
+workflows. By leveraging Ray's distributed computing capabilities and DVC's data
+version control, we establish a robust framework for managing complex ML
+experiments. This combination allows for enhanced scalability, reproducibility,
+and collaboration in ML projects.
+
+In Part 2, we extend the solution to a Ray Cluster on AWS, demonstrating how to
+adapt the setup for cloud-based distributed computing. This part involves
+configuring AWS resources, deploying Ray clusters in the cloud, and running
+DVC-managed pipelines at scale.
+
+> We would like to express our gratitude to
+> [Andreas Schuh](https://www.linkedin.com/in/schuh/) from
+> [HeartFlow](https://www.heartflow.com/) for his contribution to this solution
+> and for providing ideas and feedback for the blog posts. 🤝
+
+<details>
+<summary>Table of Contents</summary>
+
+- [🛠️ Design Scalable ML Experiments with DVC and Ray](#️design-scalable-ml-experiments-with-dvc-and-ray)
+  - [1 - Technical challenges of running DVC in a distributed Ray Cluster](#1---technical-challenges-of-running-dvc-in-a-distributed-ray-cluster)
+  - [2 - Overview of the Solution Design](#2---overview-of-the-solution-design)
+  - [3 - Discuss the solution design](#3---discuss-the-solution-design)
+    - [☝️ Use a modified DVCLive logger to upload metrics to the S3](#️use-a-modified-dvclive-logger-to-upload-metrics-to-the-s3)
+    - [☝️ Download DVCLive metrics to the DVC repository after the training is complete](#️download-dvclive-metrics-to-the-dvc-repository-after-the-training-is-complete)
+- [🚀 Set Up and Run DVC in Distributed Ray Cluster](#set-up-and-run-dvc-in-distributed-ray-cluster)
+  - [1 - Prepare **AWS and DVC Studio credentials**](#1---prepare-aws-and-dvc-studio-credentials)
+  - [2 - Configure Ray Cluster in `cluster.yaml`](#2---configure-ray-cluster-in-clusteryaml)
+    - [Set the cluster name and auto-scaling config](#set-the-cluster-name-and-auto-scaling-config)
+    - [Set up the Docker image for the head and worker nodes](#set-up-the-docker-image-for-the-head-and-worker-nodes)
+    - [Cloud-provider configuration](#cloud-provider-configuration)
+    - [Files or directories to copy to the head and worker nodes](#files-or-directories-to-copy-to-the-head-and-worker-nodes)
+    - [Additional commands to set up nodes](#additional-commands-to-set-up-nodes)
+  - [3 - Start a Ray Cluster on AWS](#3---start-a-ray-cluster-on-aws)
+  - [4 - Connect to the Head Node and Set Up Credentials](#4---connect-to-the-head-node-and-set-up-credentials)
+    - [Connecting to the Cluster](#connecting-to-the-cluster)
+    - [Setting Up Git Credentials](#setting-up-git-credentials)
+    - [Run tests to check the correct setup](#run-tests-to-check-the-correct-setup)
+  - [5 - Run DVC Pipelines on the remote Ray Cluster](#5---run-dvc-pipelines-on-the-remote-ray-cluster)
+  - [6 - Commit \& push experiments](#6---commit--push-experiments)
+  - [7 - Stop Cluster](#7---stop-cluster)
+- [🎨 Summing Up: DVC + Ray Integration](#-summing-up-dvc--ray-integration)
+- [References](#references)
+
+</details>
+
+## 🛠️ Design Scalable ML Experiments with DVC and Ray
+
+Moving from a local setup to deploying a multi-node Ray Cluster on AWS marks a
+significant shift, bringing forth a range of challenges that necessitate careful
+consideration. This section dives deep into these intricacies, shedding light on
+the hurdles encountered when scaling ML workflows to the cloud. We aim to
+provide a comprehensive analysis of these challenges and introduce refined
+solutions for a smooth integration of DVC and Ray in distributed environments.
+Through this exploration, we lay the groundwork for enhancing scalability,
+efficiency, and seamless operation of ML pipelines on a larger scale.
+
+**Goals for this section:**
+
+- Identify and address the technical challenges of running DVC in a distributed
+  Ray cluster.
+- Design an efficient and scalable integration of DVC and Ray in a distributed
+  environment.
+- Propose solutions and best practices for overcoming these challenges.
+
+### 1 - Technical challenges of running DVC in a distributed Ray Cluster
+
+Let’s outline the scope of the target solution for the following discussion:
+
+- A Ray Cluster can add more worker nodes (auto-scaling) on AWS EC2.
+- All jobs are executed only on worker nodes (not on the head node) in Docker
+  containers.
+- The user runs DVC pipelines and commits results on the head node (connected by
+  SSH).
+- During the training, the user should be able to track metrics updated in live
+  mode.
+- Data and models are stored in AWS S3.
+- Code and metadata are versioned with Git.
+
+![Challenges](../uploads/images/2024-03-13/2-challenges.png '=600') _Challenges
+of running DVC in a distributed Ray Cluster_
+
+Let's review each challenge and its proposed solution:
+
+1. **Auto-Scaling Worker Nodes**:
+   - Challenge: Ensuring seamless integration with Ray's auto-scaling feature to
+     add or remove worker nodes based on workload demand dynamically.
+   - Solution: Utilize Ray's built-in auto-scaling functionality, which allows
+     for the dynamic addition and removal of worker nodes as needed.
+2. **Execution on Worker Nodes Only**:
+   - Challenge: Ensuring that all jobs, including DVC pipelines and Ray tasks,
+     are executed exclusively on worker nodes to optimize resource utilization.
+     A specific part is a requirement to propagate DVC environment variables to
+     all worker nodes.
+   - Solution: Configure the Ray cluster to execute all tasks and jobs
+     exclusively on worker nodes. Monitor the head node's load and use Ray's
+     capabilities to distribute tasks evenly across the worker nodes.
+3. **Live Metrics Tracking During Training**
+   - Challenge: Tracking real-time metrics during model training on distributed
+     worker nodes with [DVCLive](https://dvc.org/doc/dvclive).
+   - Solution: Use DVCLive, a lightweight library compatible with DVC, to track
+     real-time metrics during training sessions. Set up the pipeline to use
+     DVCLive on the rank 0 worker only (as discussed above). Ensure that
+     DVCLive, running on the rank 0 worker, has access to the
+     [DVC environment variables](https://dvc.org/doc/user-guide/env), including
+     `DVC_STUDIO_TOKEN`, to log metrics to DVC Studio.
+4. **Synchronize DVC pipeline artifacts with the head node.**
+   - Challenge: Ensuring that artifacts generated by DVC pipelines on worker
+     nodes are consistently and efficiently synchronized back to the head node,
+     where they can be versioned and committed to Git and DVC remote storage.
+   - Solution: Setup
+     - **From Worker to S3**: Set up Ray to use an AWS S3 bucket as a persistent
+       storage to sync artifacts and checkpoints.
+     - **From S3 to Head Node**: After the distributed pipeline is complete,
+       pull the required artifacts and a model from the persistent storage on S3
+       to the project repository on the head node.
+
+### 2 - Overview of the Solution Design
+
+Here is a diagram that depicts the proposed solution:
+
+![Solution Design for DVC with Ray in Clouds](../uploads/images/2024-03-13/3-solution-design-2.png '=600')
+_Solution Design for DVC with Ray in Clouds_
+
+The diagram on the slide illustrates the integration of DVC (Data Version
+Control) and Ray in a cloud-based environment, specifically using AWS EC2
+instances. Let's break down the key components and steps outlined in the
+diagram.
+
+1. Package project & Provision Ray Cluster: Provision of the Ray cluster on AWS
+   EC2 instances before running experiments. There are a few ways to do this:
+   - Set up `cluster.yaml` to copy files and directories from the local machine
+     to the head and worker nodes.
+   - Pull the code and dependencies from the Git repository or S3 bucket.
+2. Run `dvc exp run`: In a Ray cluster, the head node coordinates tasks and
+   manages resources. It initiates the execution of parallel tasks on worker
+   nodes. Connect to Ray cluster (head node), navigate to the project directory,
+   and run `dvc exp run`.
+3. Publish Live Metrics to Studio:
+   - During the execution of `train.py`, DVCLive handles logging metrics and
+     parameters at Worker(rank=0) to avoid duplication.
+   - DataChain Studio visualizes metrics updates in live mode.
+4. Push DVCLive logs from a Worker Node to S3: The current version of the
+   DVCLive logs metrics and artifacts to the filesystem on the rank 0 worker. To
+   make them available in the project repository on the head node after the
+   experiment is complete, a few modifications were made:
+   - Use `DVCLiveRayLogger` as [Live](https://dvc.org/doc/dvclive/live) -
+     extended with functionality to store metrics in s3
+   - Modified Live.next_step() is responsible for uploading `/results/dvclive`
+     dir to s3 bucket: `s3://cse-cloud-version/tutorial-mnist-dvc-ray/` every
+     epoch.
+5. Pull DVCLive logs from S3 to the Head Node after completing the experiment.
+6. Commit & Push the DVC experiment artifacts and metadata updates.
+
+### 3 - Discuss the solution design
+
+Let’s summarise changes made in scripts to run in a distributed Ray cluster in
+the cloud:
+
+- Use a modified DVCLive logger to upload metrics to the S3 bucket every epoch.
+- Download DVCLive metrics to the DVC repository after the training is complete.
+
+#### ☝️ Use a modified DVCLive logger to upload metrics to the S3
+
+A modified `DVCLiveRayLogger` inherits from `Live` and introduces the ability to
+push DVCLive metrics directly to an S3 bucket. This is necessary because the
+code is executed on remote workers, and DVCLive can’t log metrics and artifacts
+directly to the DVC repository.
+
+```python
+class DVCLiveRayLogger(Live):
+    def __init__(self, bucket_name, s3_directory, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.bucket_name = bucket_name
+        self.s3_directory = s3_directory
+
+    def next_step(self, *args, **kwargs):
+        super().next_step(*args, **kwargs)
+
+        print("\nDVCLiveLogger: Push DVCLive metrics to S3")
+        upload_to_s3(self.dir, self.bucket_name, self.s3_directory,)
+```
+
+- By pushing DVCLive directory to S3, teams can easily share, access, and
+  analyze training progress from anywhere without relying on local file systems.
+
+#### ☝️ Download DVCLive metrics to the DVC repository after the training is complete
+
+Live object instance created from `DVCLiveRayLogger` behaves the same way as the
+original DVCLive. There are a few changes in the configuration:
+
+- Set `dir="results/dvclive"` to ensure that after the training DVC will
+  correctly resolve paths of logged metrics and artifacts.
+- Set `bucket_name` and `s3_directory` to save live metrics and artifacts in S3.
+
+```python
+def train_func_per_worker(config: Dict):
+    ...
+
+    # [3] Set up Live object for DVCLive
+    live = None
+    if worker_rank == 0:
+
+        # Initialize DVC Live
+        from src.live import DVCLiveRayLogger as Live
+        live = Live(
+            dir="results/dvclive",
+            save_dvc_exp=True,
+            bucket_name = "cse-cloud-version",
+            s3_directory = "tutorial-mnist-dvc-ray/dvclive",
+        )
+
+def train(params: dict) -> None:
+    ...
+
+    # Pull DVCLive logs from S3
+    s3_directory = "tutorial-mnist-dvc-ray/dvclive"
+    download_from_s3(bucket_name, s3_directory, 'results/dvclive/')
+
+if __name__ == "__main__":
+    ...
+    train(params)
+```
+
+- At every training epoch, `live.next_step()` pushes the `results/dvclive`
+  directory to the S3 bucket.
+- After the training, use `download_from_s3()` to download DVCLive metrics to
+  the `results/dvclive/` in the DVC repository.
+
+## 🚀 Set Up and Run DVC in Distributed Ray Cluster
+
+> 💡 Note: Navigate to the `cloud` branch in the repository
+
+This section of the tutorial provides a step-by-step guide on how to set up and
+run a DVC pipeline on a Ray cluster hosted on AWS. The integration of DVC with
+Ray on AWS allows for scaling machine learning workflows, leveraging cloud
+resources for distributed processing.
+
+**Goals for this section:**
+
+- Guide you through the steps to set up and run the example on a Ray cluster
+  hosted on AWS.
+- Explain specific solutions and best practices.
+
+### 1 - Prepare **AWS and DVC Studio credentials**
+
+This example uses a simple AWS access configuration. Prepare AWS credentials for
+use with Ray (or any other application that requires AWS access) and store them
+in a specific file (`~/.aws/ray-credentials`) on a local machine. In the next
+step, you’ll configure Ray to use this file.
+
+For example, use the following CLI script to store AWS secrets to
+`~/.aws/ray-credentials`:
+
+```bash
+echo "[default]
+aws_access_key_id = ASIAU7...
+aws_secret_access_key = Fdpgl...
+aws_session_token = IQoJb3JpZ...
+" > ~/.aws/ray-credentials
+```
+
+To track metrics with DVC Studio, Save
+your [DVC Studio client access token](https://dvc.org/doc/studio/user-guide/account-and-billing#client-access-tokens) to
+a `.dvc/config.local` file. Git or DVC does not track this file. In the next
+step, you’ll configure Ray to use this file to provision the head and worker
+nodes.
+
+```bash
+dvc config --local studio.token isat_2BlrAu0aileSH...
+```
+
+### 2 - Configure Ray Cluster in `cluster.yaml`
+
+To initiate a Ray cluster on AWS, you will use a configuration file named
+`cluster.yaml`, which outlines the specifications of your AWS setup, including
+instance types, the number of nodes, and other settings. The `cluster.yaml` is
+big and has a lot of comments. Let’s highlight only parts specific to the
+current solution design.
+
+#### Set the cluster name and auto-scaling config
+
+```yaml
+cluster_name: tutorial-mnist-dvc-ray
+max_workers: 2
+upscaling_speed: 1.0
+```
+
+- In the Ray cluster configuration for the `tutorial-mnist-dvc-ray` cluster, the
+  `cluster_name` specifies a unique identifier for the cluster, distinguishing
+  it from other clusters you might be running. This name is used in managing and
+  tracking the cluster's resources.
+
+- The `max_workers` setting defines the maximum number of worker nodes the
+  cluster can scale up to in addition to the head node. It's set to `2` here,
+  meaning the cluster can run up to two worker nodes concurrently to process
+  tasks.
+
+- The `upscaling_speed` parameter controls how quickly the cluster can scale up
+  by adding more worker nodes when there's an increase in load or tasks. Set at
+  `1.0`, the autoscaler can increase the cluster size by up to 100% of the
+  currently running nodes at each scaling operation.
+
+#### Set up the Docker image for the head and worker nodes
+
+Using Docker enables you to run your distributed applications in a consistent
+and controlled environment, leveraging Docker's containerization to manage
+dependencies and system settings across all nodes seamlessly.
+
+```yaml
+docker:
+  image: 'rayproject/ray-ml@sha256:fa8c69ae055b92bf2f97e22c6a96ea835be60afa69c224d6e1275c3040833d0a'
+  container_name: 'ray_container'
+  pull_before_run: True
+  run_options:
+    - --ulimit nofile=65536:65536
+```
+
+This Ray cluster configuration segment specifies Docker settings for running
+tasks across all nodes:
+
+- `image` The Docker image used for containers on all nodes, identified by its
+  SHA256 digest for consistency.
+- `container_name` The name for Docker containers, set as `ray_container`.
+
+#### Cloud-provider configuration
+
+This Ray cluster configuration outlines the setup for running distributed
+applications on AWS, specifying both cloud provider settings and instance
+configurations, including a unique approach for the head node.
+
+```yaml
+provider:
+  type: aws
+  region: us-west-2
+  availability_zone: us-west-2a,us-west-2b
+  cache_stopped_nodes: True
+
+available_node_types:
+  ray.head.default:
+    resources: { 'CPU': 0 }
+    node_config:
+      InstanceType: m5.2xlarge
+      BlockDeviceMappings:
+        - DeviceName: /dev/sda1
+          Ebs:
+            VolumeSize: 160
+            VolumeType: gp3
+
+  ray.worker.default:
+    min_workers: 1
+    max_workers: 2
+    resources: {}
+    node_config:
+      InstanceType: m5.2xlarge
+      InstanceMarketOptions:
+        MarketType: spot
+      BlockDeviceMappings:
+        - DeviceName: /dev/sda1
+          Ebs:
+            VolumeSize: 160
+            VolumeType: gp3
+```
+
+This configuration establishes a robust and cost-efficient Ray cluster on AWS,
+leveraging both on-demand and spot instances for worker nodes to optimize costs
+and performance:
+
+- **Head Node** (`ray.head.default`): Configured to use `m5.2xlar` instances,
+  with a custom block device mapping for increased EBS volume size (160 GB, gp3
+  type). Interestingly, the `resources` for the head node are set to `{"C": 0}`,
+  indicating it should not be used for computation-intensive tasks, focusing
+  instead on cluster management and coordination.
+- **Worker Nodes** (`ray.worker.default`): Also set to use `m5.2xlar` instances
+  with similar storage configurations as a default. Worker nodes can run on spot
+  instances to reduce costs, and their CPU and GPU resources are auto-detected,
+  allowing them to be allocated for computational tasks. The configuration
+  supports scaling between 1 and 2 worker nodes dynamically.
+- Setting `{CPU: 0}` for the head node is a strategic choice to ensure it does
+  not run compute-intensive tasks. The head node manages the cluster's
+  operations, including task scheduling and resource allocation.
+
+#### Files or directories to copy to the head and worker nodes
+
+The `file_mounts` configuration facilitates the replication of a consistent
+working environment across the cluster by ensuring all nodes have the necessary
+code, configurations, and credentials. This setup supports seamless distributed
+execution of tasks, including data processing, training machine learning models,
+and interacting with cloud services.
+
+```yaml
+file_mounts:
+  {
+    '/home/ray/tutorial-mnist-dvc-ray': '.',
+    '/home/ray/tutorial-mnist-dvc-ray/.dvc/config.local': './.dvc/config.local',
+    '/home/ray/.aws/credentials': '~/.aws/ray-credentials'
+  }
+
+rsync_filter:
+  - '.gitignore'
+```
+
+- `/home/ray/tutorial-mnist-dvc-ray`: This entry maps the current local
+  directory (denoted by `"."`) the remote directory
+  `/home/ray/tutorial-mnist-dvc-ray` on both the head and worker nodes. It's
+  useful for transferring the entire project (including `.git` directory), which
+  includes code, scripts, and potentially small data files or configuration
+  files that are necessary for the execution of the pipeline.
+- `/home/ray/tutorial-mnist-dvc-ray/.dvc/config.local`: This entry indicates
+  that the local DVC configuration file, `.dvc/conf.local`, should be explicitly
+  copied to the corresponding path on the remote nodes. This file includes an
+  access token for DVC Studio and is thus excluded from Git tracking as a
+  security measure. Given that the `rsync_filter` patterns employed in the
+  configuration are designed to omit all Git-ignored files — encompassing both
+  data files and the DVC cache — it becomes necessary to list the `config.loc`
+  file explicitly. This step ensures the file is transferred despite the filter,
+  thereby maintaining access to DVC Studio across all nodes in the cluster.
+- `/home/ray/.aws/credentials`: This maps a custom AWS credentials file from the
+  local machine (`~/.aws/ray-credentials`) to the standard AWS credentials path
+  (`/home/ray/.aws/credentials`) on the remote nodes. This setup is essential
+  for enabling AWS SDKs and CLI tools running on the remote nodes to
+  authenticate with AWS services using the provided credentials.
+
+> 💡 Note: This example uses the simplified approach to configure access to AWS
+> resources and DVC Studio. For the production setup, it's crucial to:
+>
+> - Ensure that sensitive information, especially credentials, is handled
+>   securely. Use IAM roles for EC2 instances where possible to avoid copying
+>   AWS credentials.
+> - Minimize the size of transferred directories to speed up the cluster
+>   initialization process. Consider excluding large datasets or output
+>   directories if they're not needed on every node or can be accessed from a
+>   shared storage service like Amazon S3.
+
+#### Additional commands to set up nodes
+
+The `setup_commands` section in the Ray cluster configuration outlines a series
+of shell commands executed on all nodes (both head and worker nodes) during
+their initialization phase. These commands are crucial for preparing the nodes
+with your application's necessary software and libraries.
+
+```yaml
+setup_commands:
+  - pip install -U ray[default]
+  - pip install dvc[s3]==3.43.1 dvclive==3.41.1
+  - pip install -U pyOpenSSL==24.0.0
+```
+
+Here’s a breakdown:
+
+- `pip insta dvc[s3]==3.43.1 dvclive==3.41.1`\*\*: Installs specific versions of
+  DVC (Data Version Control) with S3 support and DVCLive. Specifying versions
+  ensures consistency in running the tutorial example.
+- `pip insta -U pyOpenSSL==24.0.0`: Updates the pyOpenSSL library to a specific
+  version after the DVC installation. This is a specific requirement for this
+  example to ensure the consistency of the Python dependencies.
+
+### 3 - Start a Ray Cluster on AWS
+
+Run the following command to start your Ray cluster as defined in your
+`cluster.yaml` file:
+
+```bash
+ray up cluster.yaml
+```
+
+You can access the Ray dashboard once your Ray cluster is running. This
+dashboard provides a real-time view of your cluster's status, including resource
+utilization, task progress, and logs.
+
+To open the Ray dashboard, use:
+
+```bash
+ray dashboard cluster.yaml
+```
+
+![Ray Dashboard](../uploads/images/2024-03-13/4-dashboard.png '=600') _Ray
+Dashboard_
+
+### 4 - Connect to the Head Node and Set Up Credentials
+
+Once your Ray cluster is provisioned and all nodes are correctly set up with the
+necessary software, the next step involves connecting to the head node to
+configure access credentials for GitHub, Amazon S3, and other services like DVC
+Studio. These credentials are essential for version control, data storage, and
+continuous integration and deployment (CI/CD) processes.
+
+#### Connecting to the Cluster
+
+To initiate a secure connection to the head node of your Ray cluster, use the
+following command. This command utilizes the cluster configuration defined in
+`cluster.yaml`, providing you with a terminal session on the head node:
+
+```bash
+# Connect to cluster
+ray attach cluster.yaml
+```
+
+#### Setting Up Git Credentials
+
+Once connected to the head node, configure Git with your username and email to
+enable commits to your repositories. Additionally, an access token can be set up
+for GitHub to securely push and pull without using a password. Replace
+`<your_username>` with your GitHub username and `<your_email>` with your email
+associated with GitHub, and `<your_github_pat>` with your GitHub Personal Access
+Token (PAT).
+
+```bash
+git config --global user.name "<your_username>"
+git config --global user.email "<your_email>"
+export GITHUB_ACCESS_TOKEN=<your_github_pat>
+```
+
+Use the access token to update the repository's remote URL for authentication.
+This step assumes you have cloned the repository and are inside the repository
+directory.
+
+```bash
+git remote set-url origin https://your_username:${GITHUB_ACCESS_TOKEN}@github.com/your_username/tutorial-mnist-dvc-ray.git
+```
+
+#### Run tests to check the correct setup
+
+Run a few test scripts to ensure AWS credentials are correctly set up on the
+cluster for accessing S3 services.
+
+```bash
+export PYTHONPATH=$PWD
+python src/test_scripts/test_s3.py
+```
+
+> The example scripts are inside the `~/tutorial-mnist-dvc-ray` directory
+
+### 5 - Run DVC Pipelines on the remote Ray Cluster
+
+Navigate to the `tutorial-mnist-dvc-ray` directory and run a new experiment
+
+```bash
+export PYTHONPATH=$PWD
+dvc exp run -f
+```
+
+This will start the pipeline, running the `tune` and `train` stages as defined
+in your `dvc.yaml` file, utilizing distributed computation with Ray.
+
+You may see live updates of metrics and plots in
+[DVC Studio](https://studio.datachain.ai/).
+
+![Live Metrics Tracking with DVC Studio](../uploads/images/2024-03-13/5-dvc-studio.png '=600')
+_Live Metrics Tracking with DVC Studio_
+
+This setup with DVC and DVCLive offers a structured approach to monitoring model
+performance through metrics tracking and visualization. It aids in understanding
+the model's behavior over training, facilitating decisions on model adjustments
+or improvements. Moreover, after the experiment is complete, you may change the
+plot template, add new plots, or customize the existing ones to suit your
+specific requirements if needed.
+
+### 6 - Commit & push experiments
+
+Once you've completed an experiment and are ready to share or preserve the
+results, DVC provides a seamless workflow to list, select, and commit the
+outcomes of your experiments. Here’s how to manage and share your experiment
+results using DVC and Git.
+
+Use `dvc exp show` to get an overview of all experiments, including their
+metrics and parameters.
+
+```bash
+(base) ray@ip-172-31-41-217:~/tutorial-mnist-dvc-ray$ dvc exp show
+ ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────>
+  Experiment                 Created       loss   accuracy   step   tune.run_tune   tune.epoch_size   tune.test_size   tune.results_dir>
+ ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────>
+  workspace                  -          0.38723     0.8602      4   True            512               256              results/tune    >
+  cloud-remote               02:17 PM    0.3951     0.8542      4   True            512               256              results/tune    >
+  ├── dbcdc38 [broad-teas]   06:22 AM   0.38723     0.8602      4   True            512               256              results/tune    >
+  └── 11e273e [metal-sick]   06:21 AM    0.3951     0.8542      4   True            512               256              results/tune    >
+ ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────>
+(END)
+```
+
+After identifying the successful experiment (e.g., `broad-teas`), you can use
+DVC to create a new branch for this experiment, facilitating version control and
+collaboration.
+
+```bash
+dvc exp branch broad-tea
+```
+
+Next, push the newly created branch to your remote Git repository and upload
+artifacts to the DVC remote storage.
+
+```bash
+git checkout broad-teas-branch
+git push origin broad-teas-branch
+dvc push
+```
+
+### 7 - Stop Cluster
+
+Turn off the remote cluster when not in use to save money and reduce
+environmental impact!
+
+```bash
+ray down cluster.yaml
+```
+
+## 🎨 Summing Up: DVC + Ray Integration
+
+The DVC + Ray integration presents a comprehensive solution to the challenges of
+running machine learning experiments at scale. By addressing specific issues
+related to auto-scaling, execution optimization, live metrics tracking, and data
+synchronization, this setup ensures that machine learning teams can focus on
+innovation and experimentation backed by a robust, scalable, and efficient
+infrastructure.
+
+Integrating DVC with Ray combines the best data management and distributed
+computing for machine learning projects. Here's a simplified overview of what we
+covered:
+
+1. **Setup Ray Cluster**: Configured a Ray cluster to run on AWS, utilizing
+   Docker for consistent environments and specifying node types for resource
+   optimization.
+2. **Node Provisioning**: Automated the setup of head and worker nodes for a
+   scalable ML experiment environment.
+3. **Artifact Sync**: Ensured DVC pipeline artifacts were synchronized across
+   the cluster, keeping data and models consistent.
+4. **Manage Experiments with DVC Studio**: Demonstrated how to use DVC, DVCLive,
+   and DVC Studio for metrics tracking, artifacts versioning, and experiment
+   management.
+5. **Commit and Share Results**: Highlighted the process of committing
+   experiment results and pushing them to a repository for collaboration and
+   reproducibility.
+
+**Key Takeaways**:
+
+- **Scalability**: Ray and AWS offer a flexible and scalable setup for ML
+  experiments.
+- **Reproducibility**: DVC adds data version control, enhancing experiment
+  reproducibility.
+- **Automation**: The integration shows how to automate the ML workflow, from
+  setup to experiment tracking.
+- **Collaboration**: Using Git and DVC supports effective team collaboration on
+  ML projects.
+
+> 💡 Did you find this tutorial interesting? Please leave your comments and
+> share your experience with DVC and Ray! Join us on
+> [Discord](https://discord.com/invite/dvwXA2N) 🙌
+
+## References
+
+- [DVC Studio: Explore ML Experiments](https://dvc.org/doc/studio/user-guide/experiments/explore-ml-experiments)
+- [Ray docs: Getting Started](https://docs.ray.io/en/latest/ray-overview/getting-started.html)
+- [How Ray solves common production challenges for Generative AI infrastructure](https://www.anyscale.com/blog/ray-common-production-challenges-for-generative-ai-infrastructure)
+- [Building a Modern Machine Learning Platform with Ray](https://medium.com/samsara-engineering/building-a-modern-machine-learning-platform-with-ray-eb0271f9cbcf)
diff --git a/content/uploads.dvc b/content/uploads.dvc
new file mode 100644
index 0000000000..990d9da268
--- /dev/null
+++ b/content/uploads.dvc
@@ -0,0 +1,6 @@
+outs:
+- md5: 4597c033020a9b367e4798392f67745d.dir
+  size: 330493413
+  nfiles: 695
+  hash: md5
+  path: uploads
diff --git a/eslint.config.mjs b/eslint.config.mjs
index a54accf0bb..fd3acd29ff 100644
--- a/eslint.config.mjs
+++ b/eslint.config.mjs
@@ -1,13 +1,14 @@
-import globals from 'globals'
 import eslint from '@eslint/js'
 import json from '@eslint/json'
-
-import tseslint from 'typescript-eslint'
-
-import react from 'eslint-plugin-react'
+import { createTypeScriptImportResolver } from 'eslint-import-resolver-typescript'
+import * as pluginImportX from 'eslint-plugin-import-x'
 import jsxA11y from 'eslint-plugin-jsx-a11y'
 import eslintPluginPrettierRecommended from 'eslint-plugin-prettier/recommended'
+import react from 'eslint-plugin-react'
+// eslint-disable-next-line import-x/default
 import eslintPluginReactHooks from 'eslint-plugin-react-hooks'
+import globals from 'globals'
+import * as tseslint from 'typescript-eslint'
 
 export default tseslint.config(
   {
@@ -43,8 +44,7 @@ export default tseslint.config(
         'error',
         {
           selector: 'interface',
-          format: ['PascalCase'],
-          prefix: ['I']
+          format: ['PascalCase']
         },
         {
           selector: 'variableLike',
@@ -55,6 +55,50 @@ export default tseslint.config(
       '@typescript-eslint/no-unused-vars': 'error'
     }
   },
+  pluginImportX.flatConfigs.recommended,
+  pluginImportX.flatConfigs.typescript,
+  {
+    settings: {
+      'import-x/resolver-next': [
+        createTypeScriptImportResolver({
+          project: './tsconfig.json',
+          alwaysTryTypes: true // Always try to resolve types under `<root>@types` directory even it doesn't contain any source code, like `@types/unist`
+        })
+      ]
+    },
+    rules: {
+      'import-x/prefer-default-export': 'off',
+      'import-x/extensions': [
+        'error',
+        'ignorePackages',
+        { ts: 'never', tsx: 'never', js: 'never', jsx: 'never' }
+      ],
+      'import-x/order': [
+        'error',
+        {
+          alphabetize: { caseInsensitive: true, order: 'asc' },
+          groups: [
+            'builtin',
+            'external',
+            'internal',
+            'parent',
+            'sibling',
+            'index',
+            'object'
+          ],
+          pathGroups: [
+            {
+              pattern: '@/components/**',
+              group: 'external',
+              position: 'after'
+            },
+            { pattern: '@/composites/**', group: 'external', position: 'after' }
+          ],
+          'newlines-between': 'always'
+        }
+      ]
+    }
+  },
   react.configs.flat.recommended,
   react.configs.flat['jsx-runtime'],
   jsxA11y.flatConfigs.recommended,
diff --git a/gatsby-config.js b/gatsby-config.js
index b7e1c4f696..cb37c69600 100644
--- a/gatsby-config.js
+++ b/gatsby-config.js
@@ -19,13 +19,9 @@ const keywords = [
 ]
 
 const plugins = [
-  'gatsby-plugin-sharp',
-  'gatsby-plugin-twitter',
-  'landing-page',
   {
     resolve: '@dvcorg/gatsby-theme-iterative',
     options: {
-      remark: false,
       simpleLinkerTerms: require('./content/linked-terms'),
       glossaryPath: path.resolve('content', 'basic-concepts')
     }
@@ -44,9 +40,33 @@ const plugins = [
       path: path.join(__dirname, 'static', 'img')
     }
   },
-  'community-page',
   {
-    resolve: 'gatsby-plugin-catch-links'
+    resolve: `gatsby-source-filesystem`,
+    options: {
+      name: `authors`,
+      path: path.join(__dirname, `content`, `authors`)
+    }
+  },
+  {
+    resolve: `gatsby-source-filesystem`,
+    options: {
+      name: `blogs`,
+      path: path.join(__dirname, `content`, `blogs`)
+    }
+  },
+  {
+    resolve: `gatsby-source-filesystem`,
+    options: {
+      name: `authors-avatars`,
+      path: path.join(__dirname, `content`, `uploads`, `avatars`)
+    }
+  },
+  {
+    resolve: `gatsby-source-filesystem`,
+    options: {
+      name: `uploads-images`,
+      path: path.join(__dirname, `content`, `uploads`, `images`)
+    }
   },
   {
     resolve: `gatsby-source-rss-feed`,
@@ -55,6 +75,10 @@ const plugins = [
       name: `IterativeBlog`
     }
   },
+  `gatsby-plugin-catch-links`,
+  `gatsby-plugin-sharp`,
+  'gatsby-plugin-twitter',
+  `gatsby-transformer-remark-frontmatter`,
   {
     resolve: 'gatsby-plugin-manifest',
     options: {
@@ -113,7 +137,12 @@ const plugins = [
         }
       ]
     }
-  }
+  },
+  // local plugins
+  'landing-page',
+  `authors`,
+  `blogs`,
+  'community-page'
 ]
 
 if (process.env.GATSBY_GTM_ID) {
@@ -146,6 +175,8 @@ module.exports = {
   trailingSlash: 'never',
   plugins,
   siteMetadata: {
+    siteName: 'DVC',
+    twitterUsername: `DVCorg`,
     description,
     author: 'Iterative',
     keywords,
diff --git a/gatsby-node.js b/gatsby-node.js
index 19ffbaaab0..8beb452973 100644
--- a/gatsby-node.js
+++ b/gatsby-node.js
@@ -1,9 +1,10 @@
 require('dotenv').config()
 global.__basedir = __dirname
 
-const { setPageContext } = require('./src/gatsby/common')
+const { TsconfigPathsPlugin } = require('tsconfig-paths-webpack-plugin')
 
-const models = require('./src/gatsby/models.js')
+const { setPageContext } = require('./src/gatsby/common')
+const models = require('./src/gatsby/models')
 const callOnModels = require('./src/gatsby/utils/models')
 
 exports.createSchemaCustomization = api =>
@@ -41,4 +42,9 @@ exports.onCreateWebpackConfig = ({ stage, actions, getConfig }) => {
     }
     actions.replaceWebpackConfig(config)
   }
+  actions.setWebpackConfig({
+    resolve: {
+      plugins: [new TsconfigPathsPlugin()]
+    }
+  })
 }
diff --git a/jest.config.js b/jest.config.js
index d344eca3cb..79f24705dd 100644
--- a/jest.config.js
+++ b/jest.config.js
@@ -5,8 +5,9 @@
 
 module.exports = {
   testEnvironment: 'node',
+  preset: 'ts-jest',
   transform: {
-    '^.+\\.js?$': 'babel-jest'
+    '^.+\\.{js,ts}?$': 'babel-jest'
   },
   testPathIgnorePatterns: ['/node_modules/', '/.cache', '/public/']
 }
diff --git a/package.json b/package.json
index b69e381999..6d122c79c2 100644
--- a/package.json
+++ b/package.json
@@ -5,14 +5,14 @@
   "main": "index.js",
   "scripts": {
     "postinstall": "node .husky/install.mjs",
-    "develop": "gatsby develop",
-    "dev": "gatsby develop",
+    "develop": "dvc pull && gatsby develop",
+    "dev": "dvc pull && gatsby develop",
     "clean": "gatsby clean",
     "doc": "LIMIT_BLOG_PAGES=1 gatsby develop",
-    "author": "LIMIT_BLOG_PAGES=1 SKIP_DOCS=true gatsby develop",
-    "build": "gatsby build",
+    "author": "LIMIT_BLOG_PAGES=1 SKIP_DOCS=true dvc pull && gatsby develop",
+    "build": "dvc pull --force && gatsby build",
     "start": "node ./src/server.js",
-    "heroku-postbuild": "bash node_modules/.bin/heroku-deploy",
+    "heroku-postbuild": "bash node_modules/.bin/heroku-deploy-with-dvc",
     "test": "jest",
     "format": "prettier --write",
     "format-check-all": "prettier --check \"**/*.{js,jsx,md,tsx,ts,json,css,yml,yaml}\"",
@@ -44,12 +44,14 @@
   },
   "dependencies": {
     "@dvcorg/gatsby-theme-iterative": "0.3.25",
-    "@dvcorg/websites-server": "0.3.0",
+    "@dvcorg/websites-server": "0.3.2",
     "@iframe-resizer/react": "5.4.5",
     "@octokit/request": "8.4.1",
     "@radix-ui/react-dialog": "1.1.7",
     "@radix-ui/react-label": "2.1.3",
     "@reach/router": "1.3.4",
+    "@reach/tooltip": "0.18.0",
+    "algoliasearch": "4.24.0",
     "clsx": "2.1.1",
     "date-fns": "4.1.0",
     "dotenv": "16.5.0",
@@ -64,9 +66,11 @@
     "gatsby-plugin-webpack-bundle-analyser-v2": "1.1.32",
     "gatsby-remark-images": "7.14.0",
     "gatsby-source-rss-feed": "1.2.2",
+    "gatsby-transformer-remark-frontmatter": "1.1.0",
     "gatsby-transformer-sharp": "5.14.0",
     "isomorphic-fetch": "3.0.0",
     "lucide-react": "0.488.0",
+    "mkdirp": "3.0.1",
     "moment": "2.30.1",
     "nanoid": "5.1.5",
     "promise-polyfill": "8.3.0",
@@ -77,6 +81,7 @@
     "react-dom": "18.3.1",
     "react-focus-lock": "2.13.6",
     "react-helmet": "6.1.0",
+    "react-instantsearch-dom": "6.40.4",
     "react-popover": "0.5.10",
     "react-slick": "0.30.3",
     "react-use": "17.6.0",
@@ -84,6 +89,7 @@
     "slick-carousel": "1.8.1",
     "tailwind-merge": "3.2.0",
     "tailwindcss-animate": "1.0.7",
+    "tsconfig-paths-webpack-plugin": "4.2.0",
     "typed.js": "2.1.0",
     "upath": "2.0.1",
     "webpack-filter-warnings-plugin": "1.2.1"
@@ -94,15 +100,20 @@
     "@eslint/json": "0.11.0",
     "@types/gatsbyjs__reach-router": "2.0.5",
     "@types/isomorphic-fetch": "0.0.39",
+    "@types/jest": "29.5.14",
     "@types/promise-polyfill": "6.0.6",
     "@types/react": "18.3.20",
     "@types/react-collapse": "5.0.4",
     "@types/react-helmet": "6.1.11",
+    "@types/react-instantsearch-dom": "^6",
     "@types/react-popover": "0.5.8",
     "@types/react-slick": "0.23.13",
     "babel-jest": "29.7.0",
+    "class-variance-authority": "0.7.1",
     "eslint": "9.24.0",
     "eslint-config-prettier": "10.1.2",
+    "eslint-import-resolver-typescript": "4.3.4",
+    "eslint-plugin-import-x": "4.11.0",
     "eslint-plugin-jsx-a11y": "6.10.2",
     "eslint-plugin-prettier": "5.2.6",
     "eslint-plugin-react": "7.37.5",
@@ -118,6 +129,7 @@
     "remark-html": "16.0.1",
     "stylelint": "16.18.0",
     "stylelint-config-standard": "38.0.0",
+    "ts-jest": "29.3.2",
     "typescript": "5.8.3",
     "typescript-eslint": "8.30.1"
   },
diff --git a/plugins/authors/createSchemaCustomization.ts b/plugins/authors/createSchemaCustomization.ts
new file mode 100644
index 0000000000..0c30c75b8f
--- /dev/null
+++ b/plugins/authors/createSchemaCustomization.ts
@@ -0,0 +1,80 @@
+import { GatsbyNode } from 'gatsby'
+
+export const createSchemaCustomization: GatsbyNode['createSchemaCustomization'] =
+  ({ actions: { createTypes }, schema: { buildObjectType } }) => {
+    const typeDefs = [
+      buildObjectType({
+        name: `AuthorLink`,
+        fields: {
+          url: `String!`,
+          site: `String`,
+          username: `String`
+        }
+      }),
+      buildObjectType({
+        name: `AuthorPosts`,
+        fields: {
+          totalCount: `Int!`,
+          nodes: `[BlogPost]`
+        }
+      }),
+      buildObjectType({
+        name: `Author`,
+        interfaces: [`Node`],
+        fields: {
+          name: `String`,
+          links: `[AuthorLink]`,
+          slug: `String`,
+          avatar: {
+            type: `File`,
+            resolve: (source, _args, context) =>
+              context.nodeModel.findOne({
+                query: {
+                  filter: {
+                    sourceInstanceName: { eq: `authors-avatars` },
+                    relativePath: {
+                      eq: source.avatar
+                    }
+                  }
+                },
+                type: `File`
+              })
+          },
+          posts: {
+            type: `AuthorPosts`,
+            args: {
+              limit: {
+                type: `Int`
+              }
+            },
+            async resolve(source, args, context) {
+              const query = await context.nodeModel.findAll({
+                query: {
+                  filter: {
+                    author: {
+                      sourcePath: {
+                        relativePath: {
+                          eq: source.sourcePath
+                        }
+                      }
+                    }
+                  },
+                  sort: {
+                    fields: [`date`],
+                    order: [`DESC`]
+                  }
+                },
+                type: `BlogPost`
+              })
+
+              return {
+                totalCount: await query.totalCount(),
+                nodes: query.entries.slice(0, args.limit)
+              }
+            }
+          }
+        }
+      })
+    ]
+    createTypes(typeDefs)
+  }
diff --git a/plugins/authors/gatsby-node.ts b/plugins/authors/gatsby-node.ts
new file mode 100644
index 0000000000..bdfbd1b130
--- /dev/null
+++ b/plugins/authors/gatsby-node.ts
@@ -0,0 +1,2 @@
+export { createSchemaCustomization } from './createSchemaCustomization'
+export { onCreateNode } from './onCreateNode'
diff --git a/plugins/authors/onCreateNode.ts b/plugins/authors/onCreateNode.ts
new file mode 100644
index 0000000000..e60bf61996
--- /dev/null
+++ b/plugins/authors/onCreateNode.ts
@@ -0,0 +1,52 @@
+import { GatsbyNode } from 'gatsby'
+
+import parseLink from './parse-link'
+
+interface IAuthorFrontmatter {
+  path: string
+  name: string
+  avatar: string
+  links: string[]
+}
+
+export const onCreateNode: GatsbyNode['onCreateNode'] = async ({
+  node,
+  getNode,
+  createNodeId,
+  createContentDigest,
+  actions: { createParentChildLink, createNode }
+}) => {
+  if (node.internal.type !== `MarkdownRemark` || !node.parent) return
+
+  const parentNode = getNode(node.parent)
+  if (!parentNode || parentNode.sourceInstanceName !== `authors`) return
+  const { frontmatter, rawMarkdownBody } = node as unknown as {
+    frontmatter: IAuthorFrontmatter
+    rawMarkdownBody: string
+  }
+  const { path, name, avatar, links } = frontmatter
+  const { relativePath, name: filename } = parentNode
+
+  const fieldData = {
+    sourcePath: relativePath,
+    rawMarkdownBody,
+    path,
+    name,
+    links: links.map(parseLink),
+    avatar,
+    filename
+  }
+
+  const authorNode = {
+    ...fieldData,
+    id: createNodeId(`MarkdownAuthor >>> ${node.id}`),
+    parent: node.id,
+    children: [],
+    internal: {
+      type: `Author`,
+      contentDigest: createContentDigest(fieldData)
+    }
+  }
+  await createNode(authorNode)
+  createParentChildLink({ parent: node, child: authorNode })
+}
diff --git a/plugins/authors/package.json b/plugins/authors/package.json
new file mode 100644
index 0000000000..5767d25a25
--- /dev/null
+++ b/plugins/authors/package.json
@@ -0,0 +1,3 @@
+{
+  "name": "authors"
+}
diff --git a/plugins/authors/parse-link.test.ts b/plugins/authors/parse-link.test.ts
new file mode 100644
index 0000000000..307a8cd5db
--- /dev/null
+++ b/plugins/authors/parse-link.test.ts
@@ -0,0 +1,129 @@
+import parseLink from './parse-link'
+
+function expectPair(input: unknown, expected: unknown) {
+  expect(parseLink(input)).toEqual(expected)
+}
+
+function expectAll(expected: unknown, inputs: unknown[]) {
+  inputs.forEach(input => expectPair(input, expected))
+}
+
+const withTrailingSlashesAlso = (links: string[]) => [
+  ...links,
+  ...links.map(link => link + `/`)
+]
+
+describe(`URL formatting`, () => {
+  test(`defaults the scheme to HTTPS, removes www, and strips trailing slash`, () => {
+    expectPair(`www.unknownsite.com/`, {
+      site: null,
+      url: `https://unknownsite.com`,
+      username: null
+    })
+  })
+})
+
+describe(`Different sites`, () => {
+  test(`Twitter`, () => {
+    const exampleResult = {
+      site: `twitter`,
+      username: `testman123`,
+      url: `https://twitter.com/testman123`
+    }
+    expectAll(
+      exampleResult,
+      withTrailingSlashesAlso([
+        `https://www.twitter.com/testman123`,
+        `http://www.twitter.com/testman123`,
+        `www.twitter.com/testman123`,
+        `twitter.com/testman123`
+      ])
+    )
+  })
+
+  test(`LinkedIn`, () => {
+    expectAll(
+      {
+        site: `linkedin`,
+        username: `testman123`,
+        url: `https://www.linkedin.com/in/testman123`
+      },
+      withTrailingSlashesAlso([
+        `https://www.linkedin.com/in/testman123`,
+        `http://www.linkedin.com/in/testman123`,
+        `www.linkedin.com/in/testman123`,
+        `linkedin.com/in/testman123`
+      ])
+    )
+  })
+
+  test(`GitHub`, () => {
+    expectAll(
+      {
+        site: `github`,
+        username: `testman123`,
+        url: `https://github.com/testman123`
+      },
+      withTrailingSlashesAlso([
+        `https://www.github.com/testman123`,
+        `http://www.github.com/testman123`,
+        `www.github.com/testman123`,
+        `github.com/testman123`
+      ])
+    )
+  })
+
+  test(`Unrecognized site`, () => {
+    expectAll(
+      {
+        site: null,
+        url: `https://mysweethomepage.com`,
+        username: null
+      },
+      withTrailingSlashesAlso([
+        `https://www.mysweethomepage.com`,
+        `www.mysweethomepage.com`,
+        `mysweethomepage.com`
+      ])
+    )
+  })
+
+  test(`Site with a non-www subdomain`, () => {
+    expectAll(
+      {
+        site: null,
+        url: `https://subdomain.mysweethomepage.com`,
+        username: null
+      },
+      withTrailingSlashesAlso([
+        `https://subdomain.mysweethomepage.com`,
+        `subdomain.mysweethomepage.com`
+      ])
+    )
+  })
+})
+
+describe(`Passing objects`, () => {
+  test(`returns the same object, with no post-processing`, () => {
+    const testValue = { url: `twitter.com/testman123/` }
+    expectPair(testValue, testValue)
+  })
+})
+
+describe(`Throws`, () => {
+  test(`when given a null`, () => {
+    expect(() => {
+      parseLink(null)
+    }).toThrow(Error)
+  })
+  test(`when given an undefined`, () => {
+    expect(() => {
+      parseLink(undefined)
+    }).toThrow(Error)
+  })
+  test(`when given a Number`, () => {
+    expect(() => {
+      parseLink(1)
+    }).toThrow(Error)
+  })
+})
diff --git a/plugins/authors/parse-link.ts b/plugins/authors/parse-link.ts
new file mode 100644
index 0000000000..4c2d7f79af
--- /dev/null
+++ b/plugins/authors/parse-link.ts
@@ -0,0 +1,68 @@
+function WrongTypeError(type: string) {
+  return new Error(`A ${type} cannot be used as input for parseLink!`)
+}
+
+function parseLink(input: unknown) {
+  switch (typeof input) {
+    // Handle shorthand string links
+    case `string`: {
+      let url
+      try {
+        url = new URL(
+          input.startsWith(`http`)
+            ? input
+            : `https://` + input.replace(`www.`, ``)
+        )
+      } catch {
+        throw new Error(`Invalid URL: ${input}`)
+      }
+      let site = null
+      let username = null
+
+      if (
+        url.hostname.startsWith(`www.`) &&
+        !url.hostname.includes(`linkedin.com`)
+      ) {
+        url.hostname = url.hostname.substring(4)
+      }
+
+      if (url.hostname.includes(`twitter.com`)) {
+        site = `twitter`
+        username = url.pathname.split(`/`)[1]
+      } else if (url.hostname.includes(`linkedin.com`)) {
+        site = `linkedin`
+        if (!url.hostname.startsWith(`www.`)) {
+          url.hostname = `www.` + url.hostname
+        }
+        username = url.pathname.split(`/`)[2]
+      } else if (url.hostname.includes(`github.com`)) {
+        site = `github`
+        username = url.pathname.split(`/`)[1]
+      }
+
+      // Convert http to https
+      if (url.protocol === `http:`) {
+        url.protocol = `https:`
+      }
+
+      return {
+        site,
+        url: url.href.replace(/\/$/, ``),
+        username
+      }
+    }
+
+    // Pass object links through
+    case `object`: {
+      // typeof null is object, so handle that
+      if (input === null) throw WrongTypeError(`null`)
+      return input
+    }
+
+    // Throw on anything else
+    default:
+      throw WrongTypeError(typeof input)
+  }
+}
+
+export default parseLink
diff --git a/plugins/blogs/constants.ts b/plugins/blogs/constants.ts
new file mode 100644
index 0000000000..c3e142eddb
--- /dev/null
+++ b/plugins/blogs/constants.ts
@@ -0,0 +1,13 @@
+export const BLOG = {
+  imageMaxWidth: 700,
+  imageMaxWidthHero: 850,
+  imagePreviewWidth: 1200,
+  imagePreviewHeight: 630,
+
+  // Changing this has a negative SEO impact
+  postsPerPage: 9,
+  // This is so that we know how many posts to remove from a feed page with
+  // a hero item on top to make up for its height. MUST be smaller than
+  // postsPerPage.
+  postsPerRow: 3
+}
diff --git a/plugins/blogs/createPages.ts b/plugins/blogs/createPages.ts
new file mode 100644
index 0000000000..759bbaaff6
--- /dev/null
+++ b/plugins/blogs/createPages.ts
@@ -0,0 +1,196 @@
+import path from 'path'
+
+import tagToSlug from '@dvcorg/gatsby-theme-iterative/src/utils/shared/tagToSlug'
+import { GatsbyNode } from 'gatsby'
+
+import { IBlogsPluginOptions, blogPluginOptions } from './plugin-options'
+
+// Since blog pages and their indexes require a ton of image resizes, it's
+// useful to have an option to only generate a minimal set of these pages when
+// developing. Set LIMIT_BLOG_PAGES to anything truthy and this module will
+// attempt to generate as few blog pages as possible while still having a bit of
+// everything to look at.
+const pageUrl = (basePath: string, page: number) => {
+  if (page > 1) {
+    const basePrefix = basePath === `/` ? `` : `${basePath}/`
+
+    return `${basePrefix}page/${page}`
+  }
+
+  return basePath
+}
+
+function* pagesGenerator({
+  itemCount,
+  hasHeroItem = false,
+  basePath,
+  postsPerPage,
+  postsPerRow
+}: {
+  itemCount: number
+  hasHeroItem?: boolean
+  basePath: string
+  postsPerPage: IBlogsPluginOptions['postsPerPage']
+  postsPerRow: IBlogsPluginOptions['postsPerRow']
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+}): Generator<{ path: string; context: any }> {
+  let currentPage = 1
+  let skip = 0
+
+  while (skip < itemCount) {
+    const limit =
+      hasHeroItem && currentPage === 1
+        ? postsPerPage - postsPerRow + 1
+        : postsPerPage
+
+    let nextPage
+    let previousPage
+
+    if (skip + limit < itemCount) {
+      nextPage = pageUrl(basePath, currentPage + 1)
+    }
+
+    if (skip > 0) {
+      previousPage = pageUrl(basePath, currentPage - 1)
+    }
+
+    // For the Paginator component
+    const pageInfo = { currentPage, nextPage, previousPage }
+
+    yield {
+      path: pageUrl(basePath, currentPage),
+      context: { limit, pageInfo, skip }
+    }
+
+    currentPage++
+    skip += limit
+  }
+}
+
+const getPageLimit = (LIMIT_BLOG_PAGES?: string) => {
+  if (LIMIT_BLOG_PAGES === undefined) return 9999
+  const numberLimit = Number(LIMIT_BLOG_PAGES)
+  if (isNaN(numberLimit))
+    throw new Error(`LIMIT_BLOG_PAGES must be a number of pages!`)
+  return numberLimit
+}
+
+export const createPages: GatsbyNode['createPages'] = async (
+  { graphql, actions },
+  pluginOptions
+) => {
+  const { postsPerPage, postsPerRow } = blogPluginOptions(pluginOptions)
+  const LIMIT_BLOG_PAGES = process.env.LIMIT_BLOG_PAGES
+  const blogResponse = await graphql<{
+    allBlogPost: {
+      tags: {
+        fieldValue: string
+        pageInfo: {
+          itemCount: number
+        }
+      }[]
+      posts: {
+        slug: string
+        id: string
+      }[]
+    }
+  }>(
+    `
+      query BlogPageBuilderQuery($limit: Int) {
+        allBlogPost(sort: { date: DESC }, limit: $limit) {
+          tags: group(field: { tags: SELECT }) {
+            fieldValue
+            pageInfo {
+              itemCount
+            }
+          }
+          posts: nodes {
+            slug
+            id
+          }
+        }
+      }
+    `,
+    {
+      limit: getPageLimit(LIMIT_BLOG_PAGES)
+    }
+  )
+
+  if (blogResponse.errors) {
+    throw blogResponse.errors
+  }
+  if (!blogResponse.data) throw new Error(`No data in blogResponse`)
+
+  const { tags, posts } = blogResponse.data.allBlogPost
+
+  // Create home blog pages (with pagination)
+  const blogHomeTemplate = path.resolve(`./src/templates/blog-home.tsx`)
+
+  for (const page of pagesGenerator({
+    basePath: `/blog`,
+    hasHeroItem: true,
+    itemCount: posts.length,
+    postsPerPage,
+    postsPerRow
+  })) {
+    actions.createPage({
+      component: blogHomeTemplate,
+      path: page.path,
+      context: {
+        ...page.context,
+        isBlog: true
+      }
+    })
+  }
+
+  // Create blog posts pages
+  const blogPostTemplate = path.resolve(`./src/templates/blog-post.tsx`)
+
+  const blogPagesPromise = Promise.all(
+    posts.map(({ id, slug }, index) => {
+      const previous = index === posts.length - 1 ? null : posts[index + 1]
+      const next = index === 0 ? null : posts[index - 1]
+
+      return actions.createPage({
+        component: blogPostTemplate,
+        context: {
+          isBlog: true,
+          currentPage: index + 1,
+          next,
+          previous,
+          id
+        },
+        path: slug
+      })
+    })
+  )
+
+  // Create tags pages (with pagination)
+  const blogTagsTemplate = path.resolve(`./src/templates/blog-tags.tsx`)
+
+  // We have to explicitly limit tag pages here, otherwise we get one for
+  // every tag on the example post that makes images for a few children.
+  // That can easily add hundreds of images because of the blog index template.
+  const _tags = LIMIT_BLOG_PAGES !== undefined ? tags.slice(0, 1) : tags
+
+  const tagPagesPromise = Promise.all(
+    _tags.map(({ fieldValue: tag, pageInfo: { itemCount } }) => {
+      const basePath = `/blog/tags/${tagToSlug(tag)}`
+
+      for (const page of pagesGenerator({
+        basePath,
+        itemCount,
+        postsPerPage,
+        postsPerRow
+      })) {
+        actions.createPage({
+          component: blogTagsTemplate,
+          path: page.path,
+          context: { tag, ...page.context, isBlog: true }
+        })
+      }
+    })
+  )
+
+  await Promise.all([tagPagesPromise, blogPagesPromise])
+}
diff --git a/plugins/blogs/createSchemaCustomization.ts b/plugins/blogs/createSchemaCustomization.ts
new file mode 100644
index 0000000000..a32d1895e5
--- /dev/null
+++ b/plugins/blogs/createSchemaCustomization.ts
@@ -0,0 +1,62 @@
+import { GatsbyNode } from 'gatsby'
+
+export const createSchemaCustomization: GatsbyNode['createSchemaCustomization'] =
+  async api => {
+    const {
+      actions: { createTypes },
+      schema: { buildObjectType }
+    } = api
+
+    const typeDefs = [
+      buildObjectType({
+        name: `BlogPost`,
+        interfaces: [`Node`],
+        extensions: {
+          infer: false
+        },
+        fields: {
+          commentsUrl: `String`,
+          date: { type: `Date`, extensions: { dateformat: {} } },
+          description: `String`,
+          descriptionLong: {
+            type: `String`,
+            extensions: { md: {} }
+          },
+          pictureComment: {
+            type: `String`,
+            extensions: { md: {} }
+          },
+          slug: `String`,
+          sourcePath: `String`,
+          tags: `[String]`,
+          title: `String`,
+          author: {
+            type: `Author`,
+            extensions: {
+              link: { by: `filename` }
+            }
+          },
+          contributors: {
+            type: `[Author]`,
+            extensions: {
+              link: { by: `filename` }
+            }
+          },
+          picture: {
+            type: `File`,
+            resolve: (source, _args, context) =>
+              context.nodeModel.findOne({
+                type: `File`,
+                query: {
+                  filter: {
+                    sourceInstanceName: { eq: `uploads-images` },
+                    relativePath: { eq: source.picture }
+                  }
+                }
+              })
+          }
+        }
+      })
+    ]
+    createTypes(typeDefs)
+  }
diff --git a/plugins/blogs/gatsby-node.ts b/plugins/blogs/gatsby-node.ts
new file mode 100644
index 0000000000..9b5ac37bec
--- /dev/null
+++ b/plugins/blogs/gatsby-node.ts
@@ -0,0 +1,3 @@
+export { createSchemaCustomization } from './createSchemaCustomization'
+export { onCreateNode } from './onCreateNode'
+export { createPages } from './createPages'
diff --git a/plugins/blogs/onCreateNode.ts b/plugins/blogs/onCreateNode.ts
new file mode 100644
index 0000000000..685ba8eabb
--- /dev/null
+++ b/plugins/blogs/onCreateNode.ts
@@ -0,0 +1,126 @@
+import path from 'node:path'
+
+import 'dotenv/config'
+
+import { GatsbyNode } from 'gatsby'
+import { mkdirp } from 'mkdirp'
+import sharp from 'sharp'
+
+import { BLOG } from './constants'
+import { blogPluginOptions } from './plugin-options'
+import { isProduction } from './utils'
+
+async function addPictureMetaTagPath(baseDir: string, picture: string) {
+  const sourceImagePath = path.join(baseDir, `content`, `uploads`, picture)
+  const dirPath = path.dirname(path.join(baseDir, `public`, `blog`, picture))
+  await mkdirp(dirPath)
+  return sharp(sourceImagePath)
+    .resize({
+      width: BLOG.imagePreviewWidth,
+      height: BLOG.imagePreviewHeight
+    })
+    .toFile(path.join(baseDir, `public`, `blog`, picture))
+    .catch(err => {
+      console.error(err)
+    })
+}
+
+const getAuthors = (
+  authorField: string,
+  authorsField?: string[]
+): [string, string[]] => {
+  if (authorsField) {
+    const [primaryAuthor, ...restAuthors] = authorsField
+    return [primaryAuthor, restAuthors]
+  } else {
+    return [authorField, []]
+  }
+}
+
+interface Frontmatter {
+  date: string
+  tags: string[]
+  title: string
+  author: string
+  authors: string[]
+  description: string
+  descriptionLong: string
+  commentsUrl: string
+  picture: string
+  pictureComment: string
+}
+
+export const onCreateNode: GatsbyNode['onCreateNode'] = async (
+  {
+    node,
+    getNode,
+    createNodeId,
+    createContentDigest,
+    actions: { createParentChildLink, createNode }
+  },
+  pluginOptions
+) => {
+  if (node.internal.type !== `MarkdownRemark` || !node.parent) return
+
+  const parentNode = getNode(node.parent)
+  if (!parentNode || parentNode.sourceInstanceName !== `blogs`) return
+
+  const { baseDir } = blogPluginOptions(pluginOptions)
+
+  const { frontmatter, rawMarkdownBody } = node as unknown as {
+    frontmatter: Frontmatter
+    rawMarkdownBody: string
+  }
+  const {
+    date,
+    tags,
+    title,
+    author: authorField,
+    authors: authorsField,
+    description,
+    descriptionLong,
+    commentsUrl,
+    picture,
+    pictureComment
+  } = frontmatter
+  const { name, relativePath } = parentNode
+  const slug = /[-\d]*(.*)/.exec(name as string)?.[1]
+
+  const [author, contributors] = getAuthors(authorField, authorsField)
+
+  const pagePath = `/blog/` + slug
+
+  const fieldData = {
+    slug: pagePath,
+    rawMarkdownBody,
+    date,
+    tags,
+    title,
+    author,
+    contributors,
+    description,
+    descriptionLong,
+    commentsUrl,
+    picture,
+    pictureComment,
+    sourcePath: relativePath
+  }
+  const postNode = {
+    ...fieldData,
+    id: createNodeId(`MarkdownBlogPost >>> ${node.id}`),
+    parent: node.id,
+    children: [],
+    internal: {
+      type: `BlogPost`,
+      contentDigest: createContentDigest(fieldData)
+    }
+  }
+
+  if (isProduction && picture) {
+    await mkdirp(path.join(baseDir, `public`, `blog`, `images`))
+    await addPictureMetaTagPath(baseDir, path.join(`images`, picture))
+  }
+
+  await createNode(postNode)
+  createParentChildLink({ parent: node, child: postNode })
+}
diff --git a/plugins/blogs/package.json b/plugins/blogs/package.json
new file mode 100644
index 0000000000..1d7d9a71f6
--- /dev/null
+++ b/plugins/blogs/package.json
@@ -0,0 +1,3 @@
+{
+  "name": "blogs"
+}
diff --git a/plugins/blogs/plugin-options.ts b/plugins/blogs/plugin-options.ts
new file mode 100644
index 0000000000..e950627d12
--- /dev/null
+++ b/plugins/blogs/plugin-options.ts
@@ -0,0 +1,29 @@
+import { PluginOptions } from 'gatsby'
+
+export interface IBlogsPluginOptions {
+  baseDir: string
+  imageMaxWidth: number
+  imageMaxWidthHero: number
+  imagePreviewWidth: number
+  imagePreviewHeight: number
+  postsPerPage: number
+  postsPerRow: number
+}
+
+type BlogsPluginOptions = (pluginOptions: PluginOptions) => IBlogsPluginOptions
+
+export const blogPluginOptions: BlogsPluginOptions = pluginOptions => {
+  const defaults: IBlogsPluginOptions = {
+    baseDir: process.cwd(),
+    imageMaxWidth: 700,
+    imageMaxWidthHero: 800,
+    imagePreviewWidth: 1200,
+    imagePreviewHeight: 630,
+    postsPerPage: 9,
+    postsPerRow: 3
+  }
+  return {
+    ...defaults,
+    ...pluginOptions
+  }
+}
diff --git a/plugins/blogs/utils.ts b/plugins/blogs/utils.ts
new file mode 100644
index 0000000000..389bce2402
--- /dev/null
+++ b/plugins/blogs/utils.ts
@@ -0,0 +1,4 @@
+export const tagToSlug = (tag: string) =>
+  tag.trim().toLowerCase().replace(/\s/g, `-`).replace(/-+/g, `-`)
+
+export const isProduction = process.env.NODE_ENV === `production`
diff --git a/plugins/community-page/gatsby-node.js b/plugins/community-page/gatsby-node.js
index 3435c16176..52e88db6b2 100644
--- a/plugins/community-page/gatsby-node.js
+++ b/plugins/community-page/gatsby-node.js
@@ -1,7 +1,8 @@
 const moment = require('moment')
 const FilterWarningsPlugin = require('webpack-filter-warnings-plugin')
-const { getExpirationFields } = require('../../src/utils/shared/expiration')
+
 const content = require('../../content/community.json')
+const { getExpirationFields } = require('../../src/utils/shared/expiration')
 
 function expiredNodesLog(typeName, nodes) {
   if (nodes.length > 0) {
diff --git a/plugins/landing-page/gatsby-node.ts b/plugins/landing-page/gatsby-node.ts
index 59126e0f5b..ec7f712861 100644
--- a/plugins/landing-page/gatsby-node.ts
+++ b/plugins/landing-page/gatsby-node.ts
@@ -1,4 +1,8 @@
 import type { GatsbyNode } from 'gatsby'
+import * as yaml from 'js-yaml'
+import Prism from 'prismjs'
+
+import '@dvcorg/gatsby-theme-iterative/config/prismjs/dvc'
 
 const isTypedLine = (line: string) => line.startsWith('$')
 
@@ -6,12 +10,6 @@ const wrapWithBackticks = (line: string) => `\`${line}\``
 
 const DEFAULT_TYPED_LINE_PAUSE = '^250'
 
-import Prism from 'prismjs'
-
-import '@dvcorg/gatsby-theme-iterative/config/prismjs/dvc'
-
-import yaml from 'js-yaml'
-
 const processSplitTerminalLine = (
   line: string,
   addedPause: string | undefined
diff --git a/redirects-list.json b/redirects-list.json
index 92d79119c5..599d401635 100644
--- a/redirects-list.json
+++ b/redirects-list.json
@@ -3,16 +3,15 @@
   "^https://discuss\\.dataversioncontrol\\.com(.*)?                                       https://discuss.dvc.org$1",
   "^https://blog\\.dataversioncontrol\\.com/data-version-control-tutorial-9146715eda46    https://dvc.org/doc/start",
   "^https://blog\\.dataversioncontrol\\.com/data-version-control-beta-release-iterative-machine-learning-a7faf7c8be67 https://dvc.org/doc/start",
-  "^https://blog\\.dataversioncontrol\\.com/dvc-heartbeat-6301aebf5c96                    https://datachain.ai/blog/march-19-dvc-heartbeat",
-  "^https://blog\\.dataversioncontrol\\.com/april19-dvc-heartbeat-296c71a59be4            https://datachain.ai/blog/april-19-dvc-heartbeat",
+  "^https://blog\\.dataversioncontrol\\.com/dvc-heartbeat-6301aebf5c96                    https://dvc.org/blog/march-19-dvc-heartbeat",
+  "^https://blog\\.dataversioncontrol\\.com/april19-dvc-heartbeat-296c71a59be4            https://dvc.org/blog/april-19-dvc-heartbeat",
   "^https://blog\\.dataversioncontrol\\.com/dvc-0-8-5-release-f66ef3b10684                https://github.com/iterative/dvc/releases",
-  "^https://blog\\.dataversioncontrol\\.com(.+)-[a-z0-9]{12}$                             https://datachain.ai/blog$1",
-  "^https://blog\\.dataversioncontrol.com(.*)?                                            https://datachain.ai/blog$1",
+  "^https://blog\\.dataversioncontrol\\.com(.+)-[a-z0-9]{12}$                             https://dvc.org/blog$1",
+  "^https://blog\\.dataversioncontrol.com(.*)?                                            https://dvc.org/blog$1",
   "/blog/how-a-data-scientist-can-improve-his-productivity                                /blog/how-data-scientists-can-improve-their-productivity",
 
-  "^https://dvc\\.org/blog(.*)?                                                           https://datachain.ai/blog$1 302",
-  "^https://blog\\.dvc\\.org/blog/(.*)?$                                                  https://datachain.ai/blog/$1",
-  "^https://blog\\.dvc\\.org(.*)?$                                                        https://datachain.ai/blog$1",
+  "^https://blog\\.dvc\\.org/blog/(.*)?$                                                  https://dvc.org/blog/$1",
+  "^https://blog\\.dvc\\.org(.*)?$                                                        https://dvc.org/blog$1",
   "^https://www\\.dvc\\.org(.*)?                                                          https://dvc.org$1",
   "^https://man\\.dvc\\.org(.*)?                                                          https://dvc.org/doc/command-reference$1 303",
   "^https://error\\.dvc\\.org/(.*)?                                                       https://dvc.org/doc/user-guide/troubleshooting#$1 303",
@@ -144,5 +143,13 @@
   "^/doc/studio/user-guide/connect-custom-gitlab-server$                                  /doc/studio/user-guide/git-integrations/custom-gitlab-server",
   "^/doc/studio/troubleshooting$                                                          /doc/studio/user-guide/troubleshooting",
 
+  "^/blog/dvcx-dataset-factory-paper$                                                     https://datachain.ai/blog/dvcx-dataset-factory",
+  "^/blog/datachain-release$                                                              https://datachain.ai/blog/datachain-release",
+  "^/blog/enforcing-json-outputs-in-commercial-llms$                                      https://datachain.ai/blog/enforcing-json-outputs-in-commercial-llms",
+  "^/blog/multimodal-clip-fine-tuning$                                                    https://datachain.ai/blog/multimodal-clip-fine-tuning",
+  "^/blog/post-modern-ai-data-stack$                                                      https://datachain.ai/blog/post-modern-ai-data-stack",
+  "^/blog/datachain-unstructured-pdf-processing$                                          https://datachain.ai/blog/datachain-unstructured-pdf-processing",
+  "^/blog/robust-engineering-over-overblown-promises$                                     https://datachain.ai/blog/robust-engineering-over-overblown-promises",
+
   "^/(.+)/$                                                                               /$1"
 ]
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000..fad5b21095
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+dvc[s3]>=3.38.1
diff --git a/src/@dvcorg/gatsby-theme-iterative/components/Documentation/Layout/SearchForm/index.tsx b/src/@dvcorg/gatsby-theme-iterative/components/Documentation/Layout/SearchForm/index.tsx
index 9111bd3c70..f6add091c6 100644
--- a/src/@dvcorg/gatsby-theme-iterative/components/Documentation/Layout/SearchForm/index.tsx
+++ b/src/@dvcorg/gatsby-theme-iterative/components/Documentation/Layout/SearchForm/index.tsx
@@ -1,11 +1,9 @@
-import React, { useEffect, useState } from 'react'
-import Promise from 'promise-polyfill'
-import { loadResource } from '@dvcorg/gatsby-theme-iterative/src/utils/front/resources'
-
 import * as styles from '@dvcorg/gatsby-theme-iterative/src/components/Documentation/Layout/SearchForm/styles.module.css'
+import { loadResource } from '@dvcorg/gatsby-theme-iterative/src/utils/front/resources'
+import Promise from 'promise-polyfill'
+import { useEffect, useState } from 'react'
 
 declare global {
-  // eslint-disable-next-line @typescript-eslint/naming-convention
   interface Window {
     docsearch?: (opts: Record<string, unknown>) => void
   }
diff --git a/src/@dvcorg/gatsby-theme-iterative/components/HamburgerMenu/index.tsx b/src/@dvcorg/gatsby-theme-iterative/components/HamburgerMenu/index.tsx
index ea9699702d..b096b1196a 100644
--- a/src/@dvcorg/gatsby-theme-iterative/components/HamburgerMenu/index.tsx
+++ b/src/@dvcorg/gatsby-theme-iterative/components/HamburgerMenu/index.tsx
@@ -1,20 +1,18 @@
-import cn from 'classnames'
-import React, { useEffect, useState, useCallback, MouseEvent } from 'react'
-
 import HamburgerIcon from '@dvcorg/gatsby-theme-iterative/src/components/HamburgerIcon'
+import * as styles from '@dvcorg/gatsby-theme-iterative/src/components/HamburgerMenu/styles.module.css'
 import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import LogoGradient from '@dvcorg/gatsby-theme-iterative/src/components/LogoGradient'
+import { ReactComponent as GithubIcon } from '@dvcorg/gatsby-theme-iterative/src/components/SocialIcon/github.svg'
+import { ReactComponent as TwitterIcon } from '@dvcorg/gatsby-theme-iterative/src/components/SocialIcon/twitter.svg'
 import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
-
 import { getFirstPage } from '@dvcorg/gatsby-theme-iterative/src/utils/shared/sidebar'
+import cn from 'classnames'
+import { useEffect, useState, useCallback, MouseEvent } from 'react'
 
-import { ReactComponent as LogoSVG } from '../../../../../static/img/logo-white.svg'
-
-import { ReactComponent as TwitterIcon } from '@dvcorg/gatsby-theme-iterative/src/components/SocialIcon/twitter.svg'
-import { ReactComponent as GithubIcon } from '@dvcorg/gatsby-theme-iterative/src/components/SocialIcon/github.svg'
+import { blogsPageLink } from '@/constants/internalLinks'
 
-import * as styles from '@dvcorg/gatsby-theme-iterative/src/components/HamburgerMenu/styles.module.css'
+import { ReactComponent as LogoSVG } from '../../../../../static/img/logo-white.svg'
 import menuData from '../../data/menu'
-import LogoGradient from '@dvcorg/gatsby-theme-iterative/src/components/LogoGradient'
 
 const docsPage = getFirstPage()
 
@@ -98,7 +96,7 @@ export const HamburgerMenu: React.FC<
         </li>
         <li className={styles.section}>
           <Link
-            href="https://iterative.ai/blog"
+            href={blogsPageLink}
             className={styles.sectionHeading}
             onClick={() => handleItemClick('blog')}
           >
diff --git a/src/@dvcorg/gatsby-theme-iterative/components/LayoutFooter/index.tsx b/src/@dvcorg/gatsby-theme-iterative/components/LayoutFooter/index.tsx
index ddf5bea52d..1a8b80be23 100644
--- a/src/@dvcorg/gatsby-theme-iterative/components/LayoutFooter/index.tsx
+++ b/src/@dvcorg/gatsby-theme-iterative/components/LayoutFooter/index.tsx
@@ -1,18 +1,18 @@
-import cn from 'classnames'
-
+import * as styles from '@dvcorg/gatsby-theme-iterative/src/components/LayoutFooter/styles.module.css'
 import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer'
 import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
 import SocialIcon, {
   ISocialIcon
 } from '@dvcorg/gatsby-theme-iterative/src/components/SocialIcon'
-import { getFirstPage } from '@dvcorg/gatsby-theme-iterative/src/utils/shared/sidebar'
-
-import { ReactComponent as LogoSVG } from '../../../../../static/img/dvc_icon-color--square_vector.svg'
+import { ReactComponent as DiscordSVG } from '@dvcorg/gatsby-theme-iterative/src/components/SocialIcon/discord.svg'
 import { ReactComponent as GithubSVG } from '@dvcorg/gatsby-theme-iterative/src/components/SocialIcon/github.svg'
 import { ReactComponent as TwitterSVG } from '@dvcorg/gatsby-theme-iterative/src/components/SocialIcon/twitter.svg'
-import { ReactComponent as DiscordSVG } from '@dvcorg/gatsby-theme-iterative/src/components/SocialIcon/discord.svg'
+import { getFirstPage } from '@dvcorg/gatsby-theme-iterative/src/utils/shared/sidebar'
+import cn from 'classnames'
 
-import * as styles from '@dvcorg/gatsby-theme-iterative/src/components/LayoutFooter/styles.module.css'
+import { blogsPageLink } from '@/constants/internalLinks'
+
+import { ReactComponent as LogoSVG } from '../../../../../static/img/dvc_icon-color--square_vector.svg'
 
 const docsPage = getFirstPage()
 
@@ -39,6 +39,10 @@ const footerListsData: Array<IFooterListData> = [
       {
         href: '/doc/use-cases',
         text: 'Use Cases'
+      },
+      {
+        href: blogsPageLink,
+        text: 'Blog'
       }
     ]
   },
diff --git a/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/LinkItems/index.tsx b/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/LinkItems/index.tsx
index 321b578a83..c0d723b6c9 100644
--- a/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/LinkItems/index.tsx
+++ b/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/LinkItems/index.tsx
@@ -1,19 +1,17 @@
-import cn from 'classnames'
-
 import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
-import { IPopupProps } from '../Popup'
+import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
+import cn from 'classnames'
 
-import { ReactComponent as ArrowUpSVG } from '../../../../../../../static/img/arrow-up-icon.svg'
 import { ReactComponent as ArrowDownSVG } from '../../../../../../../static/img/arrow-down-icon.svg'
-
-import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
+import { ReactComponent as ArrowUpSVG } from '../../../../../../../static/img/arrow-up-icon.svg'
+import { ReactComponent as EllipsisIcon } from '../../../../../../../static/img/ellipsis.svg'
 import usePopup, {
   IUsePopupReturn
 } from '../../../../../../gatsby/hooks/usePopup'
+import menuData from '../../../../data/menu'
+import { IPopupProps } from '../Popup'
 
 import * as styles from './styles.module.css'
-import { ReactComponent as EllipsisIcon } from '../../../../../../../static/img/ellipsis.svg'
-import menuData from '../../../../data/menu'
 
 type PopupName = 'communityPopup' | 'otherToolsPopup' | 'otherPopup'
 
diff --git a/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/Popup/index.tsx b/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/Popup/index.tsx
index 46984ae6cc..96ec495b95 100644
--- a/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/Popup/index.tsx
+++ b/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/Popup/index.tsx
@@ -1,12 +1,12 @@
-import React, { PropsWithChildren } from 'react'
-import cn from 'classnames'
-
 import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
 import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
+import cn from 'classnames'
+import { PropsWithChildren } from 'react'
 
-import * as styles from './styles.module.css'
 import menuData from '../../../../data/menu'
 
+import * as styles from './styles.module.css'
+
 export interface IPopupProps {
   isVisible: boolean
   closePopup: () => void
diff --git a/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/index.tsx b/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/index.tsx
index 373bf41650..46e1b2dbde 100644
--- a/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/index.tsx
+++ b/src/@dvcorg/gatsby-theme-iterative/components/LayoutHeader/Nav/index.tsx
@@ -1,10 +1,9 @@
-import SocialIcons from './SocialIcons'
-import LinkItems from './LinkItems'
-
+import PseudoButton from '@dvcorg/gatsby-theme-iterative/src/components/PseudoButton'
 import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
 
+import LinkItems from './LinkItems'
+import SocialIcons from './SocialIcons'
 import * as styles from './styles.module.css'
-import PseudoButton from '@dvcorg/gatsby-theme-iterative/src/components/PseudoButton'
 
 const Nav: React.FC = () => (
   <div className={styles.wrapper}>
diff --git a/src/@dvcorg/gatsby-theme-iterative/data/menu.tsx b/src/@dvcorg/gatsby-theme-iterative/data/menu.tsx
index f629436b27..89e81eadfd 100644
--- a/src/@dvcorg/gatsby-theme-iterative/data/menu.tsx
+++ b/src/@dvcorg/gatsby-theme-iterative/data/menu.tsx
@@ -1,15 +1,18 @@
 import { getFirstPage } from '@dvcorg/gatsby-theme-iterative/src/utils/shared/sidebar'
 
-import * as styles from './styles.module.css'
+import { blogsPageLink } from '@/constants/internalLinks'
+
 import { ReactComponent as EllipsisIcon } from '../../../../static/img/ellipsis.svg'
-import {
-  CommunityPopup,
-  OtherPopup
-} from '../components/LayoutHeader/Nav/Popup'
 import {
   INavLinkData,
   INavLinkPopupData
 } from '../components/LayoutHeader/Nav/LinkItems'
+import {
+  CommunityPopup,
+  OtherPopup
+} from '../components/LayoutHeader/Nav/Popup'
+
+import * as styles from './styles.module.css'
 
 interface ICommunityData {
   title: string
@@ -38,6 +41,11 @@ const menuData: IMenuData = {
       eventType: 'doc',
       text: 'Doc'
     },
+    {
+      href: blogsPageLink,
+      eventType: 'blog',
+      text: 'Blog'
+    },
     {
       href: 'https://learn.iterative.ai/',
       eventType: 'course',
diff --git a/src/components/BackSection/index.tsx b/src/components/BackSection/index.tsx
new file mode 100644
index 0000000000..0bf2135e3f
--- /dev/null
+++ b/src/components/BackSection/index.tsx
@@ -0,0 +1,40 @@
+import { ReactNode } from 'react'
+
+import { Button } from '@/components/base/button'
+import Link from '@/components/Link'
+
+import { cn } from '@/utils'
+
+const BackSection = ({
+  section,
+  link,
+  children
+}: {
+  section: 'top' | 'bottom'
+  link: string
+  children?: ReactNode
+}) => {
+  return (
+    <section
+      className={cn('pt-5', section === 'bottom' ? 'text-center' : 'text-left')}
+    >
+      <Button
+        asChild
+        rounded="full"
+        size="lg"
+        variant={section === 'bottom' ? 'outline' : 'link'}
+        className="hover:no-underline"
+      >
+        <Link href={link} className="group">
+          <span className="mr-1 transition-transform group-hover:-translate-x-1">
+            &larr;
+          </span>
+          {` `}
+          {children || `Back`}
+        </Link>
+      </Button>
+    </section>
+  )
+}
+
+export default BackSection
diff --git a/src/components/Blog/Feed/Header.tsx b/src/components/Blog/Feed/Header.tsx
new file mode 100644
index 0000000000..4d63a65cc5
--- /dev/null
+++ b/src/components/Blog/Feed/Header.tsx
@@ -0,0 +1,20 @@
+import { ReactNode } from 'react'
+
+import Typography from '@/components/Typography'
+
+const BlogHeader = ({
+  title,
+  description
+}: {
+  title?: string
+  description?: ReactNode
+}) => {
+  return (
+    <div className="mt-2">
+      {title && <Typography variant="h1">{title}</Typography>}
+      {description && <Typography variant="subtitle">{description}</Typography>}
+    </div>
+  )
+}
+
+export default BlogHeader
diff --git a/src/components/Blog/Feed/Item/index.tsx b/src/components/Blog/Feed/Item/index.tsx
new file mode 100644
index 0000000000..bf98cf573c
--- /dev/null
+++ b/src/components/Blog/Feed/Item/index.tsx
@@ -0,0 +1,135 @@
+import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import cn from 'classnames'
+import { graphql } from 'gatsby'
+import { GatsbyImage, IGatsbyImageData } from 'gatsby-plugin-image'
+import { useEffect, useRef } from 'react'
+import { useRafState, useWindowSize } from 'react-use'
+
+import FeedMeta from '@/components/Blog/FeedMeta'
+
+import { IAuthor } from '@/templates/blog-post'
+
+import { ReactComponent as Placeholder } from './placeholder.svg'
+import * as styles from './styles.module.css'
+
+export interface IBlogPostData {
+  id: string
+  slug: string
+  title: string
+  date: string
+  description: string
+  author: IAuthor
+  contributors?: IAuthor[]
+  parent: {
+    timeToRead: string
+  }
+  picture?: {
+    childImageSharp: {
+      big: IGatsbyImageData
+    }
+  }
+}
+
+interface IBlogFeedItemProps {
+  big?: boolean
+  feedPost: IBlogPostData
+}
+
+const Item: React.FC<IBlogFeedItemProps> = ({
+  big,
+  feedPost: {
+    title,
+    description,
+    date,
+    picture,
+    author,
+    contributors,
+    slug,
+    parent: { timeToRead }
+  }
+}) => {
+  const image = picture?.childImageSharp.big
+  const bodyRef = useRef<HTMLDivElement>(null)
+  const { width } = useWindowSize()
+  const [isOverflown, setIsOverflown] = useRafState(true)
+
+  useEffect(() => {
+    if (bodyRef.current) {
+      const { scrollHeight, clientHeight } = bodyRef.current
+
+      setIsOverflown(scrollHeight <= clientHeight)
+    }
+  }, [setIsOverflown, width])
+
+  return (
+    <div className={cn(styles.wrapper, big && styles.big)}>
+      <Link href={slug} className={styles.pictureLink}>
+        {image ? (
+          <GatsbyImage alt="" image={image} className={styles.picture} />
+        ) : (
+          <Placeholder className={styles.picture} />
+        )}
+      </Link>
+      <div
+        className={cn(styles.body, !isOverflown && styles.overflown)}
+        ref={bodyRef}
+      >
+        <Link href={slug} className={styles.title}>
+          {title}
+        </Link>
+        <div className={styles.description}>{description}</div>
+      </div>
+      <div className={styles.meta}>
+        <FeedMeta
+          author={author}
+          contributors={contributors}
+          date={date}
+          timeToRead={timeToRead}
+        />
+      </div>
+    </div>
+  )
+}
+
+export const query = graphql`
+  fragment FeedPost on BlogPost {
+    id
+    slug
+    date(formatString: "MMM DD, YYYY")
+    title
+    description
+    picture {
+      childImageSharp {
+        big: gatsbyImageData(
+          width: 650
+          height: 450
+          transformOptions: { cropFocus: CENTER }
+        )
+      }
+    }
+    author {
+      name
+      links {
+        url
+        site
+      }
+      avatar {
+        childImageSharp {
+          gatsbyImageData(
+            width: 40
+            height: 40
+            transformOptions: { cropFocus: CENTER }
+            layout: FIXED
+          )
+        }
+      }
+    }
+    parent {
+      ... on MarkdownRemark {
+        timeToRead
+      }
+    }
+  }
+`
+
+export default Item
diff --git a/src/components/Blog/Feed/Item/placeholder.svg b/src/components/Blog/Feed/Item/placeholder.svg
new file mode 100644
index 0000000000..7b6992815b
--- /dev/null
+++ b/src/components/Blog/Feed/Item/placeholder.svg
@@ -0,0 +1,8 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 650 450">
+  <rect width="650" height="450" fill="#DEE8ED" />
+  <g transform="translate(201.224 161.46)" fill-rule="nonzero" opacity=".05">
+    <path fill="#000" d="M-7.10542736e-15,113.324 L-7.10542736e-15,4.756 C-7.10542736e-15,3.88132896 0.30066366,3.14333634 0.902,2.542 C1.50333634,1.94066366 2.18666284,1.64 2.952,1.64 L42.968,1.64 C58.8214126,1.64 72.3786104,7.2432773 83.64,18.45 C94.9013896,29.6567227 100.532,43.1319213 100.532,58.876 C100.532,74.7294126 94.9013896,88.2866104 83.64,99.548 C72.3786104,110.80939 58.8214126,116.44 42.968,116.44 L2.952,116.44 C2.18666284,116.44 1.50333634,116.139336 0.902,115.538 C0.30066366,114.936664 -7.10542736e-15,114.198671 -7.10542736e-15,113.324 Z M25.584,92.168 L41.328,92.168 C50.5120459,92.168 58.1379697,88.970032 64.206,82.574 C70.2740303,76.177968 73.308,68.2787137 73.308,58.876 C73.308,49.5826202 70.2740303,41.738032 64.206,35.342 C58.1379697,28.945968 50.5120459,25.748 41.328,25.748 L25.584,25.748 L25.584,92.168 Z"/>
+    <path fill="#464646" d="M116.994478,143.609333 L65.8264783,33.2373333 C65.2798089,32.1439945 65.307142,31.1600044 65.9084783,30.2853333 C66.5098147,29.4106623 67.4118057,28.9733333 68.6144783,28.9733333 L90.4264783,28.9733333 C91.8478188,28.9733333 92.7771428,29.5746607 93.2144783,30.7773333 L120.274478,90.6373333 L121.258478,90.6373333 L148.318478,30.7773333 C148.755814,29.5746607 149.685138,28.9733333 151.106478,28.9733333 L172.918478,28.9733333 C174.121151,28.9733333 175.023142,29.4106623 175.624478,30.2853333 C176.225815,31.1600044 176.253148,32.1439945 175.706478,33.2373333 L124.210478,143.609333 C123.554475,144.812006 122.625151,145.413333 121.422478,145.413333 L119.782478,145.413333 C118.579806,145.413333 117.650482,144.812006 116.994478,143.609333 Z"/>
+    <path fill="#000" d="M158.506,101.024 C147.025943,89.5439426 141.286,75.604082 141.286,59.204 C141.286,42.803918 147.053276,28.8367243 158.588,17.302 C170.122724,5.76727566 184.089918,0 200.49,0 C215.796743,0 229.025944,5.08394916 240.178,15.252 C241.818008,16.7826743 241.872674,18.3133257 240.342,19.844 L227.55,33.128 C226.12866,34.4400066 224.762007,34.4400066 223.45,33.128 C217.217969,27.5519721 209.838043,24.764 201.31,24.764 C192.125954,24.764 184.472697,28.0713003 178.35,34.686 C172.227303,41.3006997 169.166,49.2546202 169.166,58.548 C169.166,67.7320459 172.254636,75.5766341 178.432,82.082 C184.609364,88.5873659 192.289954,91.84 201.474,91.84 C210.002043,91.84 217.327303,89.2160262 223.45,83.968 C224.980674,82.6559934 226.401993,82.7106596 227.714,84.132 L240.506,97.744 C241.92734,99.1653404 241.872674,100.641326 240.342,102.172 C229.408612,112.777386 216.124745,118.08 200.49,118.08 C184.089918,118.08 170.095391,112.394724 158.506,101.024 Z"/>
+  </g>
+</svg>
diff --git a/src/components/Blog/Feed/Item/styles.module.css b/src/components/Blog/Feed/Item/styles.module.css
new file mode 100644
index 0000000000..c012814a52
--- /dev/null
+++ b/src/components/Blog/Feed/Item/styles.module.css
@@ -0,0 +1,179 @@
+.wrapper {
+  position: relative;
+  width: auto;
+  height: auto;
+  margin: 0;
+
+  &.big {
+    width: auto;
+  }
+
+  & + & {
+    margin-top: 10px;
+  }
+
+  @apply rounded-xl bg-slate-100;
+
+  @media screen(md) {
+    & + & {
+      margin-top: 0;
+    }
+
+    width: 300px;
+    height: 500px;
+    margin: 0 50px 50px 0;
+
+    &.big {
+      width: 650px;
+      height: auto;
+    }
+  }
+
+  @media screen(lg) {
+    &.big {
+      width: 1005px;
+      height: 450px;
+    }
+  }
+}
+
+.pictureLink {
+  display: block;
+
+  &:hover,
+  &:focus {
+    opacity: 0.7;
+  }
+
+  &:active {
+    position: relative;
+    top: 1px;
+    left: 1px;
+  }
+}
+
+.picture {
+  background-color: #dee8ed;
+
+  @apply rounded-t-xl;
+
+  display: block;
+  width: 100%;
+  height: auto;
+
+  .big & {
+    width: 100%;
+    height: auto;
+  }
+
+  @media screen(md) {
+    .big & {
+      width: 650px;
+      height: 450px;
+    }
+  }
+
+  @media screen(lg) {
+    width: calc(650px / 13 * 6);
+    height: calc(450px / 13 * 6);
+
+    .big & {
+      @apply rounded-none rounded-l-xl;
+
+      float: left;
+      width: 650px;
+      height: 450px;
+      margin-right: 50px;
+    }
+  }
+}
+
+.body {
+  position: relative;
+  overflow: hidden;
+  padding: 10px 20px;
+  max-height: 210px;
+
+  .placeholder & {
+    padding-top: 30px;
+  }
+
+  &.overflown::after {
+    content: '';
+    position: absolute;
+    right: 0;
+    bottom: 0;
+    left: 0;
+    height: 30px;
+    background: linear-gradient(
+      0deg,
+      rgb(238 244 248 / 100%) 0%,
+      rgb(238 244 248 / 0%) 100%
+    );
+  }
+
+  @media screen(md) {
+    .big & {
+      padding-top: 30px;
+    }
+  }
+
+  @media screen(lg) {
+    .big & {
+      max-height: 350px;
+      padding: 30px 30px 30px 0;
+    }
+  }
+}
+
+.title {
+  @apply inline-block text-lg font-medium text-gray-900 no-underline hover:text-black focus:text-black;
+
+  @media screen(md) {
+    @apply text-xl;
+  }
+
+  @media screen(lg) {
+    .big & {
+      @apply text-2xl;
+    }
+  }
+}
+
+.description {
+  margin-top: 10px;
+
+  @apply text-sm text-gray-800;
+
+  @media screen(lg) {
+    .big & {
+      margin-top: 40px;
+    }
+  }
+}
+
+.meta {
+  position: static;
+  padding: 0 30px 15px;
+
+  @media screen(md) {
+    position: absolute;
+    right: 20px;
+    bottom: 15px;
+    left: 20px;
+    padding: 0;
+
+    .big & {
+      position: static;
+      padding: 0 30px 15px;
+    }
+  }
+
+  @media screen(lg) {
+    .big & {
+      position: absolute;
+      left: 700px;
+      padding: 0;
+    }
+  }
+}
diff --git a/src/components/Blog/Feed/index.tsx b/src/components/Blog/Feed/index.tsx
new file mode 100644
index 0000000000..fc1a8bf6f9
--- /dev/null
+++ b/src/components/Blog/Feed/index.tsx
@@ -0,0 +1,89 @@
+import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import tagToSlug from '@dvcorg/gatsby-theme-iterative/src/utils/shared/tagToSlug'
+import cn from 'classnames'
+import { graphql } from 'gatsby'
+import { ArrowRightIcon } from 'lucide-react'
+
+import Paginator, { IPaginatorPageInfo } from '@/components/Blog/Paginator'
+
+import Item, { IBlogPostData } from './Item'
+import * as styles from './styles.module.css'
+
+export interface IBlogFeedPostList {
+  totalCount?: number
+  nodes: Array<IBlogPostData>
+}
+
+interface IBlogFeedProps {
+  feedPostList: IBlogFeedPostList
+  bigFirst?: boolean
+  header?: string
+  leadParagraph?: React.ReactNode
+  pageInfo: IPaginatorPageInfo
+  tag?: string
+}
+
+const Feed: React.FC<IBlogFeedProps> = ({
+  feedPostList: { nodes },
+  pageInfo,
+  bigFirst = true,
+  header,
+  leadParagraph,
+  tag
+}) => {
+  return (
+    <>
+      <div
+        className={cn(styles.meta, {
+          [styles.metaSlim]: bigFirst
+        })}
+      >
+        {tag && header ? (
+          <div className={styles.headerRow}>
+            <h2 className={styles.header} id={tagToSlug(header)}>
+              {` `}
+              {header}
+              {` `}
+            </h2>
+            <Link
+              href={`/blog/tags/${tagToSlug(tag)}`}
+              className={cn(styles.viewAll, `group`)}
+            >
+              View all posts
+              <ArrowRightIcon className="transition-all group-hover:scale-150" />
+            </Link>
+          </div>
+        ) : (
+          header && (
+            <h2 className={styles.header} id={tagToSlug(header)}>
+              {` `}
+              {header}
+              {` `}
+            </h2>
+          )
+        )}
+        {leadParagraph && <div className={styles.lead}>{leadParagraph}</div>}
+      </div>
+      <div className={styles.posts}>
+        {nodes.map((node, index) => (
+          <Item
+            feedPost={node}
+            key={node.id}
+            big={bigFirst && index === 0 && pageInfo.currentPage === 1}
+          />
+        ))}
+      </div>
+      <Paginator pageInfo={pageInfo} />
+    </>
+  )
+}
+
+export const query = graphql`
+  fragment FeedPostList on BlogPostConnection {
+    nodes {
+      ...FeedPost
+    }
+  }
+`
+
+export default Feed
diff --git a/src/components/Blog/Feed/styles.module.css b/src/components/Blog/Feed/styles.module.css
new file mode 100644
index 0000000000..7700a6e840
--- /dev/null
+++ b/src/components/Blog/Feed/styles.module.css
@@ -0,0 +1,76 @@
+.wrapper {
+  overflow: hidden;
+  padding: 0 15px;
+  margin: 0 auto;
+
+  @media screen(md) {
+    max-width: 650px;
+  }
+
+  @media screen(lg) {
+    max-width: 1005px;
+  }
+}
+
+.meta {
+  margin: 30px 0;
+  max-width: none;
+
+  @media screen(md) {
+    margin: 20px 0 40px;
+  }
+}
+
+.metaSlim {
+  max-width: 650px;
+}
+
+.title {
+  @apply m-0 text-3xl font-medium text-gray-900;
+
+  @media screen(lg) {
+    @apply text-5xl;
+  }
+}
+
+.header {
+  @apply m-0 scroll-mt-8 text-3xl text-gray-900 font-medium px-2;
+
+  @media screen(lg) {
+    @apply text-4xl;
+  }
+}
+
+.lead,
+.description {
+  @apply my-4 px-2 text-base leading-6 text-gray-700 lg:text-lg lg:leading-8;
+}
+
+.posts {
+  display: block;
+  width: auto;
+
+  @media screen(md) {
+    display: flex;
+    flex-wrap: wrap;
+    width: 700px;
+  }
+
+  @media screen(lg) {
+    width: 1050px;
+  }
+}
+
+.headerRow {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+}
+
+.viewAll {
+  @apply flex items-center text-gray-800 hover:text-gray-900;
+
+  svg {
+    @apply ml-1;
+  }
+}
diff --git a/src/components/Blog/FeedMeta/index.tsx b/src/components/Blog/FeedMeta/index.tsx
new file mode 100644
index 0000000000..d807193f45
--- /dev/null
+++ b/src/components/Blog/FeedMeta/index.tsx
@@ -0,0 +1,84 @@
+import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import SocialIcon from '@dvcorg/gatsby-theme-iterative/src/components/SocialIcon'
+import { pluralizeComments } from '@dvcorg/gatsby-theme-iterative/src/utils/front/i18n'
+import { GatsbyImage } from 'gatsby-plugin-image'
+
+import { IAuthor } from '@/templates/blog-post'
+
+import * as styles from './styles.module.css'
+
+interface IBlogFeedMetaProps {
+  commentsUrl?: string
+  commentsCount?: number
+  date: string
+  timeToRead: string
+  author: IAuthor
+  contributors?: IAuthor[]
+}
+
+const FeedMeta: React.FC<IBlogFeedMetaProps> = ({
+  author: { avatar, links, name },
+  contributors,
+  commentsUrl,
+  commentsCount,
+  date,
+  timeToRead
+}) => {
+  return (
+    <div className={styles.wrapper}>
+      {avatar && (
+        <GatsbyImage
+          alt=""
+          image={avatar.childImageSharp.gatsbyImageData}
+          className={styles.avatar}
+        />
+      )}
+      <ul className={styles.list}>
+        <li className={styles.segment}>
+          <div>{name}</div>
+          {contributors && contributors.length > 0 ? (
+            <div className="item-center flex flex-row">
+              <span className="text-sm">+{contributors.length}</span>
+              {contributors.map(
+                ({ avatar, name }) =>
+                  avatar && (
+                    <GatsbyImage
+                      key={name}
+                      alt={name}
+                      title={name}
+                      image={avatar.childImageSharp.gatsbyImageData}
+                      className="ml-1 inline-block rounded-full"
+                    />
+                  )
+              )}
+            </div>
+          ) : null}
+        </li>
+        {links && (
+          <li className={styles.linkIcons}>
+            {links.map(({ site, url }, i) => (
+              <SocialIcon site={site} url={url} key={i} />
+            ))}
+          </li>
+        )}
+
+        <li className={styles.item}>
+          {date} • {timeToRead} min read
+        </li>
+        {commentsUrl && typeof commentsCount === `number` && (
+          <li className={styles.item}>
+            <Link
+              href={commentsUrl}
+              className={styles.commentLink}
+              target="_blank"
+            >
+              {pluralizeComments(commentsCount)}
+            </Link>
+          </li>
+        )}
+      </ul>
+    </div>
+  )
+}
+
+export default FeedMeta
diff --git a/src/components/Blog/FeedMeta/styles.module.css b/src/components/Blog/FeedMeta/styles.module.css
new file mode 100644
index 0000000000..fa9332c921
--- /dev/null
+++ b/src/components/Blog/FeedMeta/styles.module.css
@@ -0,0 +1,77 @@
+.wrapper {
+  position: relative;
+  display: flex;
+  flex-wrap: wrap;
+  align-items: center;
+  min-height: 44px;
+  padding-left: 40px;
+}
+
+.avatar {
+  position: absolute !important;
+  top: 50%;
+  left: 0;
+  margin-top: -20px;
+  border-radius: 20px;
+}
+
+.list {
+  overflow: hidden;
+  margin: 0 0 0 10px;
+  padding: 1px;
+}
+
+.segment {
+  @apply text-base;
+  @apply inline-block align-middle leading-6 text-gray-600;
+}
+
+.item {
+  composes: segment;
+  white-space: nowrap;
+  position: relative;
+  margin-right: 14px;
+
+  @apply text-xs;
+
+  &::before {
+    content: '•';
+    position: absolute;
+    right: 100%;
+    width: 0.55em;
+  }
+}
+
+.linkIcons {
+  composes: segment;
+  padding: 0 10px 0 0.1rem;
+  display: inline-block;
+
+  a {
+    display: inline-block;
+    box-sizing: border-box;
+    vertical-align: middle;
+    color: inherit;
+    width: 26px;
+    height: 26px;
+    padding: 0.2rem;
+
+    @apply hover:text-gray-900 focus:text-gray-900;
+  }
+
+  img,
+  svg {
+    width: 100%;
+    height: 100%;
+  }
+}
+
+.commentLink {
+  @apply text-cyan-400 underline-offset-2 hover:underline;
+
+  &:active {
+    position: relative;
+    top: 1px;
+    left: 1px;
+  }
+}
diff --git a/src/components/Blog/Layout/index.tsx b/src/components/Blog/Layout/index.tsx
new file mode 100644
index 0000000000..38642dea6a
--- /dev/null
+++ b/src/components/Blog/Layout/index.tsx
@@ -0,0 +1,19 @@
+import { ILayoutComponentProps } from '@dvcorg/gatsby-theme-iterative/src/components/MainLayout'
+
+import MainLayout from '@/components/MainLayout'
+
+import { cn } from '@/utils'
+
+import PageContent from '../PageContent'
+
+import * as styles from './styles.module.css'
+
+const Layout = ({ children, ...restProps }: ILayoutComponentProps) => (
+  <MainLayout {...restProps}>
+    <PageContent className={cn(styles.layoutBlog, 'mt-20')}>
+      {children}
+    </PageContent>
+  </MainLayout>
+)
+
+export default Layout
diff --git a/src/components/Blog/Layout/styles.module.css b/src/components/Blog/Layout/styles.module.css
new file mode 100644
index 0000000000..bcffcb87ce
--- /dev/null
+++ b/src/components/Blog/Layout/styles.module.css
@@ -0,0 +1,13 @@
+.layoutBlog {
+  h4 {
+    font-weight: 500;
+  }
+
+  strong {
+    font-weight: 700;
+  }
+
+  em {
+    font-style: italic;
+  }
+}
diff --git a/src/components/Blog/PageContent/index.tsx b/src/components/Blog/PageContent/index.tsx
new file mode 100644
index 0000000000..88322e8b0f
--- /dev/null
+++ b/src/components/Blog/PageContent/index.tsx
@@ -0,0 +1,16 @@
+import { cn } from '@/utils'
+
+import * as styles from './styles.module.css'
+
+interface IPageContentProps {
+  className?: string
+  children: React.ReactNode
+}
+
+const PageContent: React.FC<IPageContentProps> = ({ className, children }) => (
+  <div className={styles.wrapper}>
+    <div className={cn(styles.pageContent, className)}>{children}</div>
+  </div>
+)
+
+export default PageContent
diff --git a/src/components/Blog/PageContent/styles.module.css b/src/components/Blog/PageContent/styles.module.css
new file mode 100644
index 0000000000..73f7c0ee1f
--- /dev/null
+++ b/src/components/Blog/PageContent/styles.module.css
@@ -0,0 +1,18 @@
+.pageContent {
+  padding-bottom: 50px;
+}
+
+.wrapper {
+  overflow: hidden;
+  padding: 0 5px;
+  margin: 0 auto;
+  width: 100%;
+
+  @media screen(md) {
+    max-width: 650px;
+  }
+
+  @media screen(lg) {
+    max-width: 1005px;
+  }
+}
diff --git a/src/components/Blog/Paginator/LocationContext/index.ts b/src/components/Blog/Paginator/LocationContext/index.ts
new file mode 100644
index 0000000000..f870e21a5f
--- /dev/null
+++ b/src/components/Blog/Paginator/LocationContext/index.ts
@@ -0,0 +1,13 @@
+import { createContext, useContext } from 'react'
+
+export interface IPaginatorLocationContextValue {
+  state?: {
+    fromPaginator?: boolean
+  }
+}
+
+export const PaginatorLocationContext =
+  createContext<IPaginatorLocationContextValue | null>(null)
+
+export const usePaginatorContext = (): IPaginatorLocationContextValue | null =>
+  useContext(PaginatorLocationContext)
diff --git a/src/components/Blog/Paginator/arrow.svg b/src/components/Blog/Paginator/arrow.svg
new file mode 100644
index 0000000000..f78b26c74a
--- /dev/null
+++ b/src/components/Blog/Paginator/arrow.svg
@@ -0,0 +1,3 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="18" height="10" viewBox="0 0 18 10">
+    <path fill="#945DD6" fill-rule="evenodd" d="M17.706 5.708a1 1 0 0 0 0-1.415l-3.999-4a.999.999 0 1 0-1.414 1.414L14.586 4H1a1 1 0 1 0 0 2h13.586l-2.293 2.293a.999.999 0 1 0 1.414 1.414l3.999-3.999z"/>
+</svg>
diff --git a/src/components/Blog/Paginator/index.tsx b/src/components/Blog/Paginator/index.tsx
new file mode 100644
index 0000000000..3547c0b497
--- /dev/null
+++ b/src/components/Blog/Paginator/index.tsx
@@ -0,0 +1,89 @@
+import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import { useLocation } from '@gatsbyjs/reach-router'
+import { useLayoutEffect, useState } from 'react'
+import ReactHelmet from 'react-helmet'
+
+import { cn } from '@/utils'
+
+import { ReactComponent as ArrowSVG } from './arrow.svg'
+import { IPaginatorLocationContextValue } from './LocationContext'
+import * as styles from './styles.module.css'
+
+export interface IPaginatorPageInfo {
+  currentPage: number
+  nextPage?: string
+  previousPage?: string
+}
+
+export interface IPaginatorProps {
+  pageInfo: IPaginatorPageInfo
+}
+
+// Enables a smooth scroll behavior between pages.
+// Doesn't smooth scroll into posts because the paginator isn't mounted in both :)
+const smoothScrollTag = (
+  <style
+    dangerouslySetInnerHTML={{
+      __html: `html { scroll-behavior: smooth; }`
+    }}
+  />
+)
+
+const Paginator: React.FC<IPaginatorProps> = ({
+  pageInfo: { nextPage, previousPage }
+}) => {
+  const { state } = useLocation() as IPaginatorLocationContextValue
+
+  const fromPaginator = Boolean(state?.fromPaginator)
+
+  const [needsSmoothScroll, setNeedsSmoothScroll] = useState(fromPaginator)
+
+  useLayoutEffect(() => {
+    // If we aren't coming from a paginator, we add the style tag.
+    // Next page change between components with the paginator,
+    // will be smooth scrolled
+    setNeedsSmoothScroll(true)
+  }, [needsSmoothScroll])
+
+  if (!previousPage && !nextPage) {
+    return null
+  }
+
+  return (
+    <div className={styles.paginator}>
+      {needsSmoothScroll && smoothScrollTag}
+      {previousPage && (
+        <>
+          <Link
+            className={cn(styles.link, styles.linkPrevious, `link-with-focus`)}
+            href={previousPage}
+            state={{ fromPaginator: true }}
+          >
+            <ArrowSVG />
+            <span>Newer posts</span>
+          </Link>
+          <ReactHelmet>
+            <link rel="prev" href={previousPage} />
+          </ReactHelmet>
+        </>
+      )}
+      {nextPage && (
+        <>
+          <Link
+            className={cn(styles.link, styles.linkNext, `link-with-focus`)}
+            href={nextPage}
+            state={{ fromPaginator: true }}
+          >
+            <span>Older posts</span>
+            <ArrowSVG />
+          </Link>
+          <ReactHelmet>
+            <link rel="next" href={nextPage} />
+          </ReactHelmet>
+        </>
+      )}
+    </div>
+  )
+}
+
+export default Paginator
diff --git a/src/components/Blog/Paginator/styles.module.css b/src/components/Blog/Paginator/styles.module.css
new file mode 100644
index 0000000000..7f022dac28
--- /dev/null
+++ b/src/components/Blog/Paginator/styles.module.css
@@ -0,0 +1,62 @@
+.paginator {
+  display: flex;
+  justify-content: center;
+  margin-top: 50px;
+
+  @media screen(md) {
+    margin-top: 0;
+  }
+}
+
+.link {
+  flex-grow: 1;
+  width: 50%;
+  text-decoration: none;
+  font-size: 1.3em;
+  font-weight: 500;
+  transition: transform 0.2s ease-in-out;
+  padding: 0 10px;
+
+  @apply text-gray-800;
+
+  svg {
+    display: inline;
+  }
+
+  svg path {
+    fill: currentcolor;
+  }
+
+  &:hover,
+  &:focus,
+  &:active {
+    @apply text-gray-900;
+
+    svg {
+      transform: translateX(0.25rem);
+    }
+  }
+}
+
+.linkNext {
+  text-align: right;
+
+  svg {
+    margin-left: 5px;
+  }
+}
+
+.linkPrevious {
+  svg {
+    margin-right: 10px;
+    transform: rotate(180deg);
+  }
+
+  &:hover,
+  &:focus,
+  &:active {
+    svg {
+      transform: translateX(-0.25rem) rotate(180deg);
+    }
+  }
+}
diff --git a/src/components/Blog/Post/HeroPic/index.tsx b/src/components/Blog/Post/HeroPic/index.tsx
new file mode 100644
index 0000000000..1013ae8396
--- /dev/null
+++ b/src/components/Blog/Post/HeroPic/index.tsx
@@ -0,0 +1,32 @@
+import { GatsbyImage, getImage } from 'gatsby-plugin-image'
+
+import { ReactComponent as Placeholder } from '@/components/Blog/Feed/Item/placeholder.svg'
+
+import { IBlogPostHeroPic } from '@/templates/blog-post'
+
+import * as styles from './styles.module.css'
+
+const HeroPic: React.FC<IBlogPostHeroPic> = ({ pictureComment, picture }) => {
+  const image =
+    picture?.childImageSharp?.gatsbyImageData &&
+    getImage(picture.childImageSharp.gatsbyImageData)
+  return (
+    <div className={styles.pictureWrapper}>
+      {image ? (
+        <div className={styles.picture}>
+          <GatsbyImage image={image} alt="Hero Picture" />
+        </div>
+      ) : (
+        <Placeholder className={styles.picture} />
+      )}
+      {pictureComment && (
+        <div
+          className={styles.pictureComment}
+          dangerouslySetInnerHTML={{ __html: pictureComment }}
+        />
+      )}
+    </div>
+  )
+}
+
+export default HeroPic
diff --git a/src/components/Blog/Post/HeroPic/styles.module.css b/src/components/Blog/Post/HeroPic/styles.module.css
new file mode 100644
index 0000000000..3cfc807659
--- /dev/null
+++ b/src/components/Blog/Post/HeroPic/styles.module.css
@@ -0,0 +1,61 @@
+.pictureWrapper {
+  display: flex;
+  position: relative;
+  flex-direction: column;
+  margin: 10px 15px 0;
+
+  @media screen(lg) {
+    align-items: center;
+    justify-content: center;
+
+    &::before {
+      content: '';
+      position: absolute;
+      inset: 0;
+      bottom: 50%;
+    }
+  }
+}
+
+.picture {
+  width: 100%;
+  position: relative;
+  overflow: hidden;
+
+  @media screen(lg) {
+    max-width: 850px;
+  }
+}
+
+.pictureBackground {
+  position: absolute;
+  width: 110%;
+  height: 110%;
+  background-repeat: no-repeat;
+  background-size: cover;
+  background-position: center;
+  filter: blur(5px);
+  background-color: #dee8ed;
+}
+
+.pictureComment {
+  @apply text-base text-gray-600;
+
+  margin-top: 5px;
+  font-style: italic;
+  text-align: center;
+
+  & p {
+    margin: 0;
+
+    + p {
+      margin-top: 5px;
+    }
+  }
+
+  & a {
+    @apply text-cyan-400;
+
+    text-decoration: underline;
+  }
+}
diff --git a/src/components/Blog/Post/Markdown/index.tsx b/src/components/Blog/Post/Markdown/index.tsx
new file mode 100644
index 0000000000..5e8ddc1f17
--- /dev/null
+++ b/src/components/Blog/Post/Markdown/index.tsx
@@ -0,0 +1,62 @@
+import Admonition from '@dvcorg/gatsby-theme-iterative/src/components/Documentation/Markdown/Admonition'
+import * as themeStyles from '@dvcorg/gatsby-theme-iterative/src/components/Documentation/Markdown/Main/theme.module.css'
+import {
+  Toggle,
+  Tab
+} from '@dvcorg/gatsby-theme-iterative/src/components/Documentation/Markdown/ToggleProvider'
+import cn from 'classnames'
+import type { Element } from 'hast'
+import { Fragment, ReactNode, createElement } from 'react'
+import rehypeReact from 'rehype-react'
+
+import * as styles from './styles.module.css'
+
+interface IMarkdownProps {
+  htmlAst: Element
+}
+
+const HoverSwitchImage = ({
+  children
+}: {
+  children: (ReactNode | string)[]
+}) => {
+  const [defaultImage, hoverImage] = children.filter(
+    child => typeof child !== `string`
+  )
+  return (
+    <div className={styles.hoverSwitcher}>
+      <div className={styles.hoverSwitcherDefault}>{defaultImage}</div>
+      <div className={styles.hoverSwitcherAlt}>{hoverImage}</div>
+    </div>
+  )
+}
+
+// Rehype's typedefs don't allow for custom components, even though they work
+
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+const renderAst = new (rehypeReact as any)({
+  createElement,
+  Fragment,
+  components: {
+    admon: Admonition,
+    admonition: Admonition,
+    toggle: Toggle,
+    tab: Tab,
+    hoverswitcher: HoverSwitchImage,
+    table: ({ children }: { children: ReactNode[] }) => (
+      <div className={cn(`overflow-x-auto`)}>
+        <table>{children}</table>
+      </div>
+    )
+  }
+}).Compiler
+
+const Markdown: React.FC<IMarkdownProps> = ({ htmlAst }) => {
+  return (
+    <div className={cn(styles.wrapper, themeStyles.code)}>
+      {renderAst(htmlAst)}
+    </div>
+  )
+}
+
+export default Markdown
diff --git a/src/components/Blog/Post/Markdown/styles.module.css b/src/components/Blog/Post/Markdown/styles.module.css
new file mode 100644
index 0000000000..a1095bc65b
--- /dev/null
+++ b/src/components/Blog/Post/Markdown/styles.module.css
@@ -0,0 +1,506 @@
+.wrapper {
+  :global {
+    .toggle {
+      input {
+        & + label {
+          @apply border-gray-700 text-gray-700;
+
+          &:hover {
+            @apply border-b-gray-800 text-gray-800;
+          }
+        }
+
+        &:checked + label {
+          @apply border-b-cyan-600 text-cyan-600;
+        }
+      }
+    }
+  }
+
+  @apply text-lg font-normal text-gray-800 md:text-xl md:leading-8;
+
+  & *:first-child {
+    margin-top: 0;
+  }
+
+  & *:last-child {
+    margin-bottom: 0;
+  }
+
+  :global(.gatsby-highlight),
+  :global(.gatsby-resp-iframe-wrapper),
+  :global(.gatsby-resp-image-wrapper),
+  :global(.gatsby-resp-image-wrapper) + em,
+  hr,
+  pre,
+  table {
+    margin: 10px 0;
+
+    @media screen(md) {
+      margin: 30px 0;
+    }
+
+    @media screen(lg) {
+      margin: 40px 0;
+    }
+  }
+
+  :global(.gatsby-highlight) pre {
+    @apply font-mono;
+  }
+
+  p {
+    @apply my-7 font-sans;
+
+    img {
+      display: block;
+      width: 100%;
+    }
+  }
+
+  :global(.gatsby-resp-image-wrapper) + em,
+  img + em {
+    @apply text-base text-gray-600;
+
+    display: block;
+    text-align: center;
+  }
+
+  img + em {
+    margin-top: 5px;
+  }
+
+  :global(.gatsby-resp-image-wrapper) + em {
+    margin-top: -5px;
+
+    @media screen(md) {
+      margin-top: -20px;
+    }
+
+    @media screen(lg) {
+      margin-top: -30px;
+    }
+  }
+
+  :global(.gatsby-resp-image-link) {
+    &:active {
+      position: relative;
+      top: 1px;
+      left: 1px;
+    }
+
+    &:hover,
+    &:focus {
+      opacity: 0.7;
+    }
+  }
+
+  h1,
+  h2,
+  h3 {
+    @apply font-medium text-gray-900;
+  }
+
+  h1 {
+    @apply text-3xl;
+
+    margin: 45px 0 15px;
+
+    @media screen(lg) {
+      @apply text-4xl;
+
+      margin: 60px 0 25px;
+    }
+  }
+
+  h2 {
+    @apply text-2xl;
+
+    margin: 60px 0 25px;
+
+    @media screen(lg) {
+      @apply text-3xl;
+    }
+  }
+
+  h3 {
+    @apply text-xl;
+
+    margin: 40px 0 15px;
+
+    @media screen(lg) {
+      @apply text-2xl font-medium;
+    }
+  }
+
+  blockquote:not(:global(.twitter-tweet)) {
+    margin: 30px 0;
+    padding-left: 25px;
+    position: relative;
+    font-size: 0.92em;
+    font-style: italic;
+
+    em {
+      font-style: normal;
+    }
+
+    &::before {
+      content: '';
+      position: absolute;
+      top: 5px;
+      bottom: -3px;
+      left: 0;
+      width: 5px;
+      border-radius: 2.5px;
+
+      @apply bg-gray-800;
+    }
+  }
+
+  :not(pre) > code,
+  kbd,
+  samp {
+    padding: 2px 6px;
+    border-radius: 4px;
+    font-style: inherit;
+    line-height: 1.6em;
+    font-size: 0.74em;
+
+    @apply bg-gray-200 font-mono text-gray-700;
+  }
+
+  h1,
+  h2,
+  h3 {
+    code {
+      line-height: 1.8em;
+      font-size: 0.84em;
+    }
+  }
+
+  ol,
+  ul {
+    margin: 30px 0;
+    padding: 0;
+  }
+
+  li {
+    margin: 15px 0;
+    padding-left: 30px;
+  }
+
+  ul > li {
+    position: relative;
+    list-style: none;
+
+    &::before {
+      content: '•';
+
+      @apply text-gray-800;
+
+      position: absolute;
+      margin-left: -30px;
+    }
+  }
+
+  ol {
+    counter-reset: ol;
+  }
+
+  ol > li {
+    counter-increment: ol;
+    position: relative;
+    list-style: none;
+
+    &::before {
+      content: counter(ol) '.';
+      position: absolute;
+      margin-left: -30px;
+      top: 1px;
+
+      @apply text-gray-800;
+
+      @media screen(lg) {
+        top: 2px;
+      }
+    }
+  }
+
+  hr {
+    border: none;
+    text-align: center;
+    font-weight: 600;
+    height: auto;
+
+    &::before {
+      content: '...';
+      letter-spacing: 0.6em;
+    }
+  }
+
+  abbr,
+  acronym {
+    @apply text-gray-600;
+
+    border-bottom: 1px dotted;
+    cursor: help;
+  }
+
+  abbr[title] {
+    text-decoration: none;
+  }
+
+  a {
+    @apply text-cyan-600 underline-offset-2 hover:underline;
+
+    word-wrap: break-word;
+
+    code {
+      color: inherit;
+    }
+  }
+
+  code {
+    font-family: monospace;
+  }
+
+  pre {
+    @apply lg:-mx-4;
+
+    overflow: auto;
+    max-width: 100%;
+    padding: 15px;
+    border-radius: 4px;
+  }
+
+  table {
+    @apply table-auto border-collapse;
+
+    border-radius: 4px;
+  }
+
+  thead {
+    text-align: left;
+  }
+
+  tr:last-child {
+    td {
+      border: none;
+    }
+  }
+
+  td {
+    @apply border-b border-solid border-gray-600;
+
+    font-feature-settings: 'tnum';
+    padding: 10px 0 10px 10px;
+  }
+
+  th {
+    font-feature-settings: 'tnum';
+    padding: 10px;
+
+    &:first-child {
+      border-radius: 4px 0 0;
+    }
+
+    &:last-child {
+      border-radius: 0 4px 0 0;
+    }
+  }
+
+  :global {
+    .elp-content-holder {
+      a {
+        @apply no-underline;
+      }
+
+      a:hover {
+        opacity: 0.7;
+
+        .elp-link {
+          @apply underline;
+        }
+      }
+
+      display: block;
+      border-width: 1px;
+
+      @apply border-solid border-gray-600;
+
+      .external-link-preview {
+        display: flex;
+        align-items: center;
+        min-height: 160px;
+
+        @apply text-gray-600;
+
+        flex-direction: column-reverse;
+
+        @media screen(sm) {
+          flex-direction: row;
+        }
+      }
+
+      .elp-description-holder {
+        flex-grow: 1;
+        padding: 15px 20px;
+        align-self: stretch;
+      }
+
+      .elp-title {
+        @apply text-gray-800;
+
+        margin-bottom: 0.5rem;
+        line-height: 1.25;
+      }
+
+      .elp-description {
+        @apply text-base text-gray-700;
+
+        line-height: 1.5;
+      }
+
+      .elp-link {
+        @apply text-base text-cyan-700;
+      }
+
+      .elp-image-holder {
+        flex-shrink: 0;
+        align-self: stretch;
+        position: relative;
+        width: 100%;
+        background-color: #dee8ed;
+
+        @media screen(sm) {
+          width: 180px;
+        }
+      }
+
+      img {
+        position: absolute;
+        object-fit: cover;
+        display: block;
+        width: 100%;
+        height: 100%;
+      }
+    }
+  }
+
+  :global(.twitter-tweet) {
+    margin: 0 auto;
+  }
+
+  :global {
+    .gist td {
+      border-bottom: none;
+    }
+  }
+
+  :global {
+    .yt-embed-wrapper {
+      position: relative;
+      display: flex;
+      margin-top: 0;
+      width: 100%;
+      padding-bottom: 56.25%;
+
+      iframe {
+        position: absolute;
+        bottom: 0;
+        left: 0;
+        width: 100%;
+        height: 100%;
+      }
+
+      &:hover &__tooltip {
+        opacity: 1;
+      }
+
+      &__overlay {
+        position: absolute;
+        bottom: 0;
+        left: 0;
+        width: 100%;
+        height: 100%;
+        display: flex;
+        align-items: flex-end;
+
+        &:hover {
+          cursor: pointer;
+        }
+
+        &.hidden {
+          display: none;
+        }
+      }
+
+      &__tooltip {
+        padding: 10px;
+        box-sizing: border-box;
+        color: #fff;
+        opacity: 0;
+        width: 100%;
+        font-size: 16px;
+        background-color: rgb(23 23 23 / 59%);
+        text-shadow: 0 1px 0 rgb(33 45 69 / 25%);
+        transition: opacity 0.2s ease-in-out;
+
+        &:hover {
+          cursor: auto;
+        }
+
+        a {
+          color: #fff;
+          text-decoration: underline;
+
+          &:hover {
+            opacity: 1;
+          }
+
+          &:focus {
+            position: static;
+          }
+
+          &::after {
+            display: none;
+          }
+        }
+      }
+    }
+  }
+
+  :global {
+    .anchor svg {
+      @apply fill-current;
+    }
+  }
+}
+
+.hoverSwitcher {
+  .hoverSwitcherDefault {
+    display: block;
+  }
+
+  .hoverSwitcherAlt {
+    display: none;
+  }
+
+  &:hover {
+    .hoverSwitcherDefault {
+      display: none;
+    }
+
+    .hoverSwitcherAlt {
+      display: block;
+    }
+  }
+
+  :global(.gatsby-resp-image-link) {
+    &:hover,
+    &:focus {
+      opacity: inherit;
+      color: inherit;
+    }
+  }
+}
diff --git a/src/components/Blog/Post/Share/icons/facebook.svg b/src/components/Blog/Post/Share/icons/facebook.svg
new file mode 100644
index 0000000000..9c52237d23
--- /dev/null
+++ b/src/components/Blog/Post/Share/icons/facebook.svg
@@ -0,0 +1,4 @@
+<svg width="40px" height="40px" viewBox="0 0 40 40" version="1.1" xmlns="http://www.w3.org/2000/svg">
+    <rect fill="#37559C" x="0" y="0" width="40" height="40" rx="20"></rect>
+    <path d="M21.6666667,16.6666667 L21.6666667,14.682 C21.6666667,13.7866667 21.8646667,13.3333333 23.2553333,13.3333333 L25,13.3333333 L25,10 L22.3333333,10 C19,10 17.6666667,12.224 17.6666667,14.6666667 L17.6666667,16.6666667 L15,16.6666667 L15,20 L17.6666667,20 L17.6666667,30 L21.6666667,30 L21.6666667,20 L24.604,20 L25,16.6666667 L21.6666667,16.6666667" fill="#FFFFFF"></path>
+</svg>
\ No newline at end of file
diff --git a/src/components/Blog/Post/Share/icons/subscribe.svg b/src/components/Blog/Post/Share/icons/subscribe.svg
new file mode 100644
index 0000000000..87f0adb4ee
--- /dev/null
+++ b/src/components/Blog/Post/Share/icons/subscribe.svg
@@ -0,0 +1,4 @@
+<svg width="40px" height="40px" viewBox="0 0 40 40" version="1.1" xmlns="http://www.w3.org/2000/svg">
+    <rect fill="#838D93" x="0" y="0" width="40" height="40" rx="20"></rect>
+    <path d="M8.99909222,15.4350922 L20,23.4364973 L30.9990922,15.4360922 L31,28 L9,28 L8.99909222,15.4350922 Z M30.9500922,12.9990922 L20,20.9635027 L9.04909222,12.9990922 L30.9500922,12.9990922 Z" fill="#FFFFFF"></path>
+</svg>
\ No newline at end of file
diff --git a/src/components/Blog/Post/Share/icons/twitter.svg b/src/components/Blog/Post/Share/icons/twitter.svg
new file mode 100644
index 0000000000..d07652127b
--- /dev/null
+++ b/src/components/Blog/Post/Share/icons/twitter.svg
@@ -0,0 +1,4 @@
+<svg width="40px" height="40px" viewBox="0 0 40 40" version="1.1" xmlns="http://www.w3.org/2000/svg">
+    <rect fill="#00AAF3" x="0" y="0" width="40" height="40" rx="20"></rect>
+    <path d="M32.1205,13.133 C31.3045,13.49375 30.433,13.7375 29.51425,13.85 C30.45175,13.2875 31.17325,12.3965 31.51075,11.3375 C30.634,11.858 29.66425,12.2375 28.6285,12.43925 C27.79825,11.55275 26.617,11 25.31425,11 C22.80625,11 20.77675,13.034 20.77675,15.542 C20.77675,15.89825 20.81425,16.2455 20.89375,16.5785 C17.1205,16.391 13.77325,14.58125 11.5375,11.8295 C11.14825,12.5 10.92325,13.283 10.92325,14.1125 C10.92325,15.6875 11.7295,17.0795 12.94825,17.8955 C12.19825,17.87675 11.4955,17.6705 10.88575,17.32775 L10.88575,17.38475 C10.88575,19.5875 12.45175,21.4205 14.52775,21.8375 C14.14825,21.94025 13.7455,21.9965 13.333,21.9965 C13.042,21.9965 12.75625,21.96875 12.4795,21.9125 C13.05625,23.717 14.73475,25.0295 16.72225,25.067 C15.1705,26.28575 13.21075,27.0125 11.083,27.0125 C10.717,27.0125 10.35625,26.98925 10,26.94725 C12.00175,28.25 14.3875,29 16.9465,29 C25.3045,29 29.8705,22.07675 29.8705,16.0715 C29.8705,15.875 29.86525,15.67775 29.85625,15.48575 C30.742,14.84375 31.51075,14.04725 32.1205,13.133" fill="#FFFFFF"></path>
+</svg>
\ No newline at end of file
diff --git a/src/components/Blog/Post/Share/index.tsx b/src/components/Blog/Post/Share/index.tsx
new file mode 100644
index 0000000000..f7bf1d1faf
--- /dev/null
+++ b/src/components/Blog/Post/Share/index.tsx
@@ -0,0 +1,89 @@
+import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import { graphql, useStaticQuery } from 'gatsby'
+import { useCallback } from 'react'
+
+import Tooltip from '@/components/Blog/Post/Tooltip'
+
+import { ReactComponent as Facebook } from './icons/facebook.svg'
+import { ReactComponent as Subscribe } from './icons/subscribe.svg'
+import { ReactComponent as Twitter } from './icons/twitter.svg'
+import * as styles from './styles.module.css'
+
+function openWindow(e: React.MouseEvent, href: string): void {
+  e.preventDefault()
+
+  window.open(
+    href,
+    ``,
+    `menubar=no,toolbar=no,resizable=yes,scrollbars=yes,height=300,width=600`
+  )
+}
+
+interface IShareProps {
+  className?: string
+  text: string
+  slug: string
+}
+
+const Share: React.FC<IShareProps> = ({ className, text, slug }) => {
+  const {
+    site: {
+      siteMetadata: { siteUrl }
+    }
+  } = useStaticQuery(graphql`
+    query {
+      site {
+        siteMetadata {
+          siteUrl
+        }
+      }
+    }
+  `)
+
+  const encodedURL = encodeURI(`${siteUrl}${slug}`)
+
+  const facebookHref = `https://www.facebook.com/sharer/sharer.php?u=${encodedURL}`
+  const twitterHref = `https://twitter.com/share?url=${encodedURL}&via=DVCorg&text=${text}`
+
+  const onClickFacebook = useCallback(
+    (e: React.MouseEvent) => openWindow(e, facebookHref),
+    [facebookHref]
+  )
+
+  const onClickTwitter = useCallback(
+    (e: React.MouseEvent) => openWindow(e, twitterHref),
+    [twitterHref]
+  )
+
+  return (
+    <div className={className}>
+      <Link
+        href={facebookHref}
+        onClick={onClickFacebook}
+        target="_blank"
+        className={styles.link}
+      >
+        <Tooltip label="Share on Facebook">
+          <Facebook />
+        </Tooltip>
+      </Link>
+      <Link
+        href={twitterHref}
+        onClick={onClickTwitter}
+        target="_blank"
+        className={styles.link}
+      >
+        <Tooltip label="Share on Twitter">
+          <Twitter />
+        </Tooltip>
+      </Link>
+      <Link href={`#subscribe`} className={styles.link}>
+        <Tooltip label="Subscribe to maillist">
+          <Subscribe />
+        </Tooltip>
+      </Link>
+    </div>
+  )
+}
+
+export default Share
diff --git a/src/components/Blog/Post/Share/styles.module.css b/src/components/Blog/Post/Share/styles.module.css
new file mode 100644
index 0000000000..54ccde8479
--- /dev/null
+++ b/src/components/Blog/Post/Share/styles.module.css
@@ -0,0 +1,21 @@
+.link {
+  position: relative;
+  display: block;
+  width: 40px;
+  height: 40px;
+  margin: 5px;
+
+  &:hover {
+    opacity: 0.7;
+  }
+
+  &:focus {
+    opacity: 0.7;
+  }
+
+  &:active {
+    position: relative;
+    top: 1px;
+    left: 1px;
+  }
+}
diff --git a/src/components/Blog/Post/Tooltip/index.tsx b/src/components/Blog/Post/Tooltip/index.tsx
new file mode 100644
index 0000000000..488698bc78
--- /dev/null
+++ b/src/components/Blog/Post/Tooltip/index.tsx
@@ -0,0 +1,76 @@
+/* Used https://reacttraining.com/reach-ui/tooltip as the base */
+
+import { Portal } from '@reach/portal'
+import {
+  TooltipPopup,
+  TooltipProps,
+  useTooltip,
+  Position
+} from '@reach/tooltip'
+import { cloneElement } from 'react'
+
+import '@reach/tooltip/styles.css'
+import * as styles from './styles.module.css'
+
+const centered: Position = (triggerRect, tooltipRect) => {
+  if (!triggerRect || !tooltipRect) {
+    return { left: 0, top: 0 }
+  }
+
+  const triggerCenter = triggerRect.left + triggerRect.width / 2
+  const left = triggerCenter - tooltipRect.width / 2
+  const maxLeft = window.innerWidth - tooltipRect.width - 2
+
+  const result = {
+    left: Math.min(Math.max(2, left), maxLeft) + window.scrollX,
+    top: triggerRect.top - 7 - tooltipRect.height + window.scrollY
+  }
+
+  return result
+}
+
+const portalStyle = (
+  triggerRect: DOMRect | null
+): { left?: number; top?: number } => {
+  if (!triggerRect) {
+    return { left: 0, top: 0 }
+  }
+
+  return {
+    left: triggerRect
+      ? triggerRect.left - 4 + triggerRect.width / 2
+      : undefined,
+    top: triggerRect ? triggerRect.top + window.scrollY - 7 : undefined
+  }
+}
+
+const ModifiedTooltip: React.FC<TooltipProps> = ({
+  children,
+  label,
+  ariaLabel
+}) => {
+  const [trigger, tooltip] = useTooltip()
+
+  const { isVisible, triggerRect } = tooltip
+
+  return (
+    <>
+      {cloneElement(children as React.ReactElement, trigger)}
+
+      {isVisible && (
+        <Portal>
+          <div className={styles.triangle} style={portalStyle(triggerRect)} />
+        </Portal>
+      )}
+      <TooltipPopup
+        {...tooltip}
+        label={label}
+        ariaLabel={ariaLabel}
+        className={styles.wrapper}
+        position={centered}
+      />
+    </>
+  )
+}
+
+export default ModifiedTooltip
diff --git a/src/components/Blog/Post/Tooltip/styles.module.css b/src/components/Blog/Post/Tooltip/styles.module.css
new file mode 100644
index 0000000000..b9a5dda667
--- /dev/null
+++ b/src/components/Blog/Post/Tooltip/styles.module.css
@@ -0,0 +1,22 @@
+.wrapper {
+  @apply text-base;
+
+  position: absolute;
+  z-index: 1;
+  padding: 2px 12px;
+  border-radius: 4px;
+  text-align: center;
+  white-space: nowrap;
+  color: #fff;
+  background: #000;
+  pointer-events: none;
+}
+
+.triangle {
+  position: absolute;
+  width: 0;
+  height: 0;
+  border-left: 4px solid transparent;
+  border-right: 4px solid transparent;
+  border-top: 4px solid #000;
+}
diff --git a/src/components/Blog/Post/index.tsx b/src/components/Blog/Post/index.tsx
new file mode 100644
index 0000000000..ccaaa73212
--- /dev/null
+++ b/src/components/Blog/Post/index.tsx
@@ -0,0 +1,119 @@
+import { TogglesProvider } from '@dvcorg/gatsby-theme-iterative/src/components/Documentation/Markdown/ToggleProvider'
+import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import { useCommentsCount } from '@dvcorg/gatsby-theme-iterative/src/utils/front/api'
+import { pluralizeComments } from '@dvcorg/gatsby-theme-iterative/src/utils/front/i18n'
+import patchHtmlAst from '@dvcorg/gatsby-theme-iterative/src/utils/front/patchHtmlAst'
+import useCustomYtEmbeds from '@dvcorg/gatsby-theme-iterative/src/utils/front/useCustomYtEmbeds'
+import tagToSlug from '@dvcorg/gatsby-theme-iterative/src/utils/shared/tagToSlug'
+import cn from 'classnames'
+import { useMemo, useRef } from 'react'
+import { useWindowScroll, useWindowSize } from 'react-use'
+
+import { Button } from '@/components/base/button'
+import FeedMeta from '@/components/Blog/FeedMeta'
+import Typography from '@/components/Typography'
+
+import { IBlogPostData } from '@/templates/blog-post'
+
+import HeroPic from './HeroPic'
+import Markdown from './Markdown'
+import Share from './Share'
+import * as styles from './styles.module.css'
+
+const Post = ({
+  parent: { timeToRead, htmlAst },
+  title,
+  date,
+  picture,
+  pictureComment,
+  description,
+  descriptionLong,
+  commentsUrl,
+  tags,
+  author,
+  contributors,
+  slug
+}: IBlogPostData) => {
+  const patchedHtmlAst = patchHtmlAst(htmlAst)
+  const wrapperRef = useRef<HTMLDivElement>(null)
+  const { width, height } = useWindowSize()
+  const { y } = useWindowScroll()
+  useCustomYtEmbeds()
+
+  const isFixed = useMemo(() => {
+    if (!wrapperRef.current) {
+      return false
+    }
+
+    const { bottom } = wrapperRef.current.getBoundingClientRect()
+
+    return bottom > height
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [wrapperRef, width, height, y])
+
+  const { error, ready, result } = useCommentsCount(commentsUrl || ``)
+  return (
+    <TogglesProvider>
+      <div className={styles.wrapper} ref={wrapperRef}>
+        <Share
+          className={cn(styles.share, isFixed && styles.fixed)}
+          text={description}
+          slug={slug}
+        />
+        <div className={styles.head}>
+          <div className={styles.headContent}>
+            <Typography variant="h2" as="h1">
+              {title}
+            </Typography>
+            {descriptionLong ? (
+              <div
+                className={styles.description}
+                dangerouslySetInnerHTML={{ __html: descriptionLong.html }}
+              />
+            ) : (
+              <div className={styles.description}>{description}</div>
+            )}
+            <FeedMeta
+              author={author}
+              contributors={contributors || []}
+              commentsCount={result}
+              commentsUrl={commentsUrl}
+              date={date}
+              timeToRead={timeToRead}
+            />
+          </div>
+        </div>
+        <HeroPic picture={picture} pictureComment={pictureComment?.html} />
+
+        <div className={styles.content}>
+          <Markdown htmlAst={patchedHtmlAst} />
+        </div>
+        {tags && (
+          <div className={styles.tags}>
+            {tags.map(tag => (
+              <Link
+                href={`/blog/tags/${tagToSlug(tag)}`}
+                className={styles.tag}
+                key={tag}
+              >
+                {tag}
+              </Link>
+            ))}
+          </div>
+        )}
+        {commentsUrl && ready && !error && (
+          <div className={styles.comments}>
+            <Button variant="outline" size="lg" asChild>
+              <Link href={commentsUrl}>Discuss this post</Link>
+            </Button>
+            <Link href={commentsUrl} className={styles.count} target="_blank">
+              {pluralizeComments(result || 0)}
+            </Link>
+          </div>
+        )}
+      </div>
+    </TogglesProvider>
+  )
+}
+
+export default Post
diff --git a/src/components/Blog/Post/styles.module.css b/src/components/Blog/Post/styles.module.css
new file mode 100644
index 0000000000..b906c52b24
--- /dev/null
+++ b/src/components/Blog/Post/styles.module.css
@@ -0,0 +1,212 @@
+.wrapper {
+  position: relative;
+  margin: 48px auto;
+  padding-bottom: 80px;
+  max-width: 768px;
+
+  @media screen(md) {
+    padding-bottom: 60px;
+  }
+
+  @media screen(lg) {
+    max-width: initial;
+  }
+
+  @media screen(xl) {
+    padding-bottom: 0;
+  }
+}
+
+.head {
+  @media screen(lg) {
+    padding-bottom: 20px;
+  }
+}
+
+.headContent {
+  position: relative;
+
+  @apply text-gray-800;
+
+  padding: 0 15px;
+
+  @media screen(lg) {
+    max-width: 700px;
+    margin: 0 auto;
+  }
+}
+
+.title {
+  margin: 0;
+  padding: 30px 0 0;
+
+  @apply text-4xl text-gray-900;
+
+  @media screen(lg) {
+    @apply text-5xl;
+  }
+}
+
+.description {
+  margin: 30px 0 5px;
+
+  @apply text-lg leading-10 text-gray-900/70;
+
+  @media screen(lg) {
+    @apply text-xl;
+
+    margin: 30px 0 20px;
+  }
+
+  & p {
+    margin: 0;
+
+    + p {
+      margin-top: 10px;
+    }
+  }
+
+  & a {
+    @apply text-cyan-400 underline-offset-2 hover:underline;
+  }
+
+  & code {
+    font-family: monospace;
+    font-size: 0.75em;
+    padding: 0.2em 0.3em;
+    border-radius: 3px;
+    background-color: rgb(27 31 35 / 5%);
+  }
+
+  ul {
+    padding-left: 2em;
+    margin-bottom: 16px;
+  }
+
+  li {
+    margin: 16px 0;
+
+    + li {
+      margin-top: 0.25em;
+    }
+  }
+}
+
+.share {
+  position: absolute;
+  z-index: 1;
+  display: flex;
+  justify-content: center;
+  left: auto;
+  right: 10px;
+  bottom: 5px;
+  margin-left: auto;
+
+  &.fixed {
+    position: fixed;
+    bottom: 5px;
+  }
+
+  @media screen(md) {
+    bottom: -5px;
+    margin-left: 205px;
+  }
+
+  @media screen(xl) {
+    flex-direction: row;
+    margin-left: 450px;
+
+    &.fixed {
+      flex-direction: column;
+      width: 60px;
+      left: 50%;
+      bottom: 15px;
+    }
+  }
+}
+
+.content {
+  margin: 0 15px;
+  padding: 30px 0 0;
+
+  @media screen(lg) {
+    max-width: 700px;
+    margin: 0 auto;
+    padding: 40px 0 0;
+  }
+}
+
+.tags {
+  box-sizing: border-box;
+  padding-right: 0;
+  margin: 30px 15px -10px;
+
+  @media screen(md) {
+    padding-right: 60px;
+  }
+
+  @media screen(lg) {
+    max-width: 700px;
+    margin: 30px auto -10px;
+  }
+}
+
+.tag {
+  margin: 0 10px 10px 0;
+  padding: 1px 9px;
+  border-width: 1px;
+  border-radius: 4px;
+
+  @apply text-base;
+  @apply inline-block border-solid border-gray-400 font-bold text-gray-400 no-underline hover:border-gray-800 hover:text-gray-800;
+}
+
+.comments {
+  display: flex;
+  align-items: center;
+  margin: 30px 15px 0;
+
+  @media screen(lg) {
+    max-width: 700px;
+    margin: 40px auto 0;
+  }
+}
+
+.count {
+  margin-left: 40px;
+  font-weight: 500;
+  text-decoration: none;
+
+  @apply text-cyan-400 underline-offset-2 hover:underline;
+
+  &:active {
+    position: relative;
+    top: 1px;
+    left: 1px;
+  }
+}
+
+:global {
+  .gatsby-resp-image-wrapper {
+    &.image-wrap-left,
+    &.image-wrap-right {
+      width: 100%; /* max-width deals with the rest */
+      margin: 0 25px 25px;
+    }
+
+    &.image-wrap-left {
+      float: left;
+      margin-left: 0;
+    }
+
+    &.image-wrap-right {
+      float: right;
+      margin-right: 0;
+    }
+  }
+
+  .image-wrap-stop {
+    clear: both;
+    margin-bottom: -25px; /* compensate for margin-bottom of image */
+  }
+}
diff --git a/src/components/Blog/Search/SearchBox/index.tsx b/src/components/Blog/Search/SearchBox/index.tsx
new file mode 100644
index 0000000000..1bcf4070a5
--- /dev/null
+++ b/src/components/Blog/Search/SearchBox/index.tsx
@@ -0,0 +1,27 @@
+import { PropsWithChildren } from 'react'
+import { SearchBoxProvided } from 'react-instantsearch-core'
+import { connectSearchBox } from 'react-instantsearch-dom'
+
+import * as styles from './styles.module.css'
+
+interface ISearchBox extends SearchBoxProvided {
+  setFocus: (arg0: boolean) => void
+}
+
+const SearchBox = connectSearchBox(
+  ({ refine, currentRefinement, setFocus }: PropsWithChildren<ISearchBox>) => (
+    <form onSubmit={e => e.preventDefault()} className={styles.form}>
+      <input
+        className={styles.searchInput}
+        type="search"
+        placeholder="Search Blog"
+        aria-label="Search Blog"
+        onChange={e => refine(e.target.value)}
+        value={currentRefinement}
+        onFocus={() => setFocus(true)}
+      />
+    </form>
+  )
+)
+
+export default SearchBox
diff --git a/src/components/Blog/Search/SearchBox/styles.module.css b/src/components/Blog/Search/SearchBox/styles.module.css
new file mode 100644
index 0000000000..fca8b7429c
--- /dev/null
+++ b/src/components/Blog/Search/SearchBox/styles.module.css
@@ -0,0 +1,40 @@
+.form {
+  display: flex;
+  align-items: center;
+  margin: 16px 0;
+  height: 40px;
+  width: 100%;
+}
+
+.searchInput {
+  box-sizing: border-box;
+  display: flex;
+  flex: 1;
+  width: 100%;
+  height: 100%;
+  padding-left: 48px;
+  padding-right: 24px;
+  border-width: 2px;
+
+  @apply rounded-xl border-gray-600 bg-transparent font-sans text-gray-800;
+
+  background-image: url('/img/search.svg');
+  background-repeat: no-repeat;
+  background-position: 15px center;
+  font-size: 16px;
+  font-weight: 500;
+
+  &::placeholder {
+    @apply text-gray-600;
+  }
+
+  &:focus {
+    outline: none;
+
+    @apply border-gray-800;
+  }
+}
+
+.searchInput::-webkit-search-decoration {
+  display: none;
+}
diff --git a/src/components/Blog/Search/SearchResult/index.tsx b/src/components/Blog/Search/SearchResult/index.tsx
new file mode 100644
index 0000000000..173f6f2842
--- /dev/null
+++ b/src/components/Blog/Search/SearchResult/index.tsx
@@ -0,0 +1,73 @@
+import cn from 'classnames'
+import { Link } from 'gatsby'
+import {
+  connectStateResults,
+  Highlight,
+  Hits,
+  Index,
+  Snippet,
+  PoweredBy
+} from 'react-instantsearch-dom'
+
+import * as styles from './styles.module.css'
+
+const HitCount = connectStateResults(({ searchState, searchResults }) => {
+  const hitCount = searchResults && searchResults.nbHits
+
+  return hitCount > 0 ? (
+    <div className={styles.result}>
+      <h3>Blog Posts</h3>
+      <div className={styles.hitCount}>
+        {hitCount} result{hitCount !== 1 ? `s` : ``}
+      </div>
+    </div>
+  ) : (
+    <div className={styles.noResult}>
+      No results found for query &quot;{searchState.query}&quot;
+    </div>
+  )
+})
+
+const PageHit = ({ hit }: { hit: { slug: string } }) => (
+  <Link to={hit.slug}>
+    <h4>
+      <Highlight attribute="title" hit={hit} tagName="mark" />
+    </h4>
+    <p className="mt-1 text-xs text-gray-200">
+      <Highlight attribute="description" hit={hit} tagName="mark" />
+    </p>
+    <p className="mt-1 text-sm text-gray-400">
+      <Snippet attribute="body" hit={hit} tagName="mark" />
+    </p>
+  </Link>
+)
+
+const HitsInIndex = ({ index }: { index: { name: string } }) => (
+  <Index indexName={index.name}>
+    <div className={styles.searchResultHeader}>
+      <HitCount />
+    </div>
+    <div className={styles.hits}>
+      <Hits hitComponent={PageHit} />
+    </div>
+  </Index>
+)
+
+const SearchResult = ({
+  indices,
+  show
+}: {
+  indices: Array<{ name: string }>
+  show: boolean
+}) => (
+  <div className={cn(styles.searchResult, show && styles.searchResultShow)}>
+    <div className={styles.searchResultContent}>
+      {indices.map(index => (
+        <HitsInIndex index={index} key={index.name} />
+      ))}
+    </div>
+    <PoweredBy className={styles.aisPoweredBy} />
+  </div>
+)
+
+export default SearchResult
diff --git a/src/components/Blog/Search/SearchResult/styles.module.css b/src/components/Blog/Search/SearchResult/styles.module.css
new file mode 100644
index 0000000000..b190a02724
--- /dev/null
+++ b/src/components/Blog/Search/SearchResult/styles.module.css
@@ -0,0 +1,134 @@
+.searchResult {
+  box-sizing: border-box;
+  display: none;
+  margin: 0;
+  width: 100%;
+  padding: 16px 6px 0 20px;
+  position: absolute;
+  z-index: 5;
+  left: 0;
+  top: 100%;
+  box-shadow:
+    0 1px 0 0 rgb(0 0 0 / 20%),
+    0 2px 3px 0 rgb(0 0 0 / 10%);
+  border-radius: 5px;
+
+  @apply bg-slate-800;
+
+  mark {
+    @apply bg-yellow-300;
+  }
+}
+
+.searchResultShow {
+  display: block;
+}
+
+.searchResultContent {
+  padding-right: 4px;
+  max-height: 650px;
+  overflow-y: auto;
+  -webkit-overflow-scrolling: touch;
+}
+
+.searchResultContent::-webkit-scrollbar {
+  width: 4px;
+}
+
+.searchResultContent::-webkit-scrollbar-thumb {
+  @apply bg-gray-300;
+
+  border-radius: 10px;
+}
+
+.searchResultContent::-webkit-scrollbar-track {
+  background-color: transparent;
+}
+
+.searchResultHeader {
+  margin-bottom: 15px;
+
+  h3 {
+    font-weight: 600;
+
+    @apply text-gray-200;
+  }
+}
+
+.result {
+  display: flex;
+  justify-content: space-between;
+}
+
+.hitCount {
+  display: flex;
+  justify-content: flex-end;
+
+  @apply mr-1 text-gray-200;
+}
+
+.hits {
+  ul {
+    list-style: none;
+    margin-left: 0;
+  }
+
+  li {
+    display: flex;
+    align-items: center;
+    box-sizing: border-box;
+    margin: 2px;
+    margin-bottom: 4px;
+
+    a {
+      padding: 15px;
+      box-sizing: border-box;
+      width: 100%;
+
+      @apply bg-slate-700;
+
+      &:hover {
+        @apply bg-gray-900;
+
+        cursor: pointer;
+      }
+
+      &:focus {
+        outline-offset: 0;
+      }
+
+      @apply text-gray-100;
+
+      text-decoration: none;
+    }
+  }
+}
+
+.noResult {
+  display: flex;
+  justify-content: center;
+  align-items: center;
+
+  @apply text-gray-200;
+
+  font-weight: 600;
+  font-size: 0.9em;
+}
+
+.aisPoweredBy {
+  display: flex;
+  justify-content: flex-end;
+  align-items: center;
+
+  @apply text-gray-200;
+
+  span {
+    margin-right: 5px;
+
+    @apply text-xs;
+  }
+
+  svg {
+    width: 70px;
+  }
+}
diff --git a/src/components/Blog/Search/index.tsx b/src/components/Blog/Search/index.tsx
new file mode 100644
index 0000000000..17ccb07942
--- /dev/null
+++ b/src/components/Blog/Search/index.tsx
@@ -0,0 +1,50 @@
+import algoliasearch from 'algoliasearch/lite'
+import cn from 'classnames'
+import { useState, useMemo, useRef } from 'react'
+import { InstantSearch } from 'react-instantsearch-dom'
+import useClickAway from 'react-use/lib/useClickAway'
+
+import config from '@/config'
+
+import SearchBox from './SearchBox'
+import SearchResult from './SearchResult'
+import * as styles from './styles.module.css'
+
+export default function Search({
+  indices
+}: {
+  indices: Array<{ name: string }>
+}): React.JSX.Element {
+  const rootRef = useRef<HTMLDivElement>(null)
+  const [query, setQuery] = useState(``)
+  const [hasFocus, setFocus] = useState(false)
+  const searchClient = useMemo(
+    () => algoliasearch(config.algolia.appId, config.algolia.searchKey),
+    []
+  )
+
+  useClickAway(rootRef, () => {
+    setQuery(``)
+    setFocus(false)
+  })
+
+  return (
+    <div className={cn(styles.searchDiv, hasFocus && styles.flexGrow)}>
+      <div className={styles.searchContainer} ref={rootRef}>
+        <div className={styles.search}>
+          <InstantSearch
+            searchClient={searchClient}
+            indexName={indices[0].name}
+            onSearchStateChange={({ query }) => setQuery(query)}
+          >
+            <SearchBox setFocus={setFocus} />
+            <SearchResult
+              show={!!query && query.length > 0 && hasFocus}
+              indices={indices}
+            />
+          </InstantSearch>
+        </div>
+      </div>
+    </div>
+  )
+}
diff --git a/src/components/Blog/Search/styles.module.css b/src/components/Blog/Search/styles.module.css
new file mode 100644
index 0000000000..771fe7461c
--- /dev/null
+++ b/src/components/Blog/Search/styles.module.css
@@ -0,0 +1,27 @@
+.searchContainer {
+  margin: 0 auto;
+
+  /* padding: 0 15px; */
+
+  @media screen(md) {
+    max-width: 650px;
+  }
+
+  @media screen(lg) {
+    max-width: 1005px;
+  }
+
+  .search {
+    position: relative;
+    display: flex;
+    justify-content: center;
+  }
+}
+
+.searchDiv {
+  transition: flex-grow 0.3s;
+}
+
+.flexGrow {
+  flex-grow: 1;
+}
diff --git a/src/components/Community/Contribute/index.tsx b/src/components/Community/Contribute/index.tsx
index ab5853f33c..4419f07096 100644
--- a/src/components/Community/Contribute/index.tsx
+++ b/src/components/Community/Contribute/index.tsx
@@ -1,11 +1,11 @@
-import { ICommunitySectionTheme } from '../'
 import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer'
 import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
-import CommunityBlock from '../Block'
-import CommunitySection from '../Section'
 import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
 
+import { ICommunitySectionTheme } from '../'
 import { useCommunityData } from '../../../utils/front/community'
+import CommunityBlock from '../Block'
+import CommunitySection from '../Section'
 import * as sharedStyles from '../styles.module.css'
 
 const log = (type: string): void =>
diff --git a/src/components/Community/Events/index.tsx b/src/components/Community/Events/index.tsx
index 4528ea265c..24a38f0a60 100644
--- a/src/components/Community/Events/index.tsx
+++ b/src/components/Community/Events/index.tsx
@@ -1,16 +1,16 @@
-import React, { useCallback } from 'react'
+import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer'
+import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
 import cn from 'classnames'
 import { format } from 'date-fns/format'
+import { useCallback } from 'react'
 
 import { ICommunitySectionTheme } from '../'
-import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer'
-import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import { useCommunityData } from '../../../utils/front/community'
 import Block from '../Block'
 import Section from '../Section'
-import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
-import { useCommunityData } from '../../../utils/front/community'
-
 import * as sharedStyles from '../styles.module.css'
+
 import * as styles from './styles.module.css'
 
 export interface IEvent {
diff --git a/src/components/Community/Hero/index.tsx b/src/components/Community/Hero/index.tsx
index f9ca2437f6..32c5015464 100644
--- a/src/components/Community/Hero/index.tsx
+++ b/src/components/Community/Hero/index.tsx
@@ -1,9 +1,10 @@
 import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer'
-import ShowOnly from '@dvcorg/gatsby-theme-iterative/src/components/ShowOnly'
 import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
-import { useCommunityData } from '../../../utils/front/community'
+import ShowOnly from '@dvcorg/gatsby-theme-iterative/src/components/ShowOnly'
 import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
 
+import { useCommunityData } from '../../../utils/front/community'
+
 import * as styles from './styles.module.css'
 
 const logHero = (): void => logEvent('Community', { Section: 'hero' })
diff --git a/src/components/Community/Learn/index.tsx b/src/components/Community/Learn/index.tsx
index 4b0dffe234..dbd62c0c5f 100644
--- a/src/components/Community/Learn/index.tsx
+++ b/src/components/Community/Learn/index.tsx
@@ -1,21 +1,20 @@
-import React, { useCallback } from 'react'
+import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer'
+import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import { useCommentsCount } from '@dvcorg/gatsby-theme-iterative/src/utils/front/api'
+import { pluralizeComments } from '@dvcorg/gatsby-theme-iterative/src/utils/front/i18n'
+import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
+import { getFirstPage } from '@dvcorg/gatsby-theme-iterative/src/utils/shared/sidebar'
 import cn from 'classnames'
 import { format } from 'date-fns/format'
+import { useCallback } from 'react'
 
 import { ICommunitySectionTheme } from '../'
-import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer'
-import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import usePosts from '../../../queries/usePosts'
+import { useCommunityData } from '../../../utils/front/community'
 import Block from '../Block'
 import Section from '../Section'
-
-import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
-import { getFirstPage } from '@dvcorg/gatsby-theme-iterative/src/utils/shared/sidebar'
-import { useCommentsCount } from '@dvcorg/gatsby-theme-iterative/src/utils/front/api'
-import { useCommunityData } from '../../../utils/front/community'
-import usePosts from '../../../queries/usePosts'
-import { pluralizeComments } from '@dvcorg/gatsby-theme-iterative/src/utils/front/i18n'
-
 import * as sharedStyles from '../styles.module.css'
+
 import * as styles from './styles.module.css'
 
 const docsPage = getFirstPage()
diff --git a/src/components/Community/Meet/index.tsx b/src/components/Community/Meet/index.tsx
index 15eb8eb54a..d6d1e6b3ba 100644
--- a/src/components/Community/Meet/index.tsx
+++ b/src/components/Community/Meet/index.tsx
@@ -1,22 +1,22 @@
-import React, { useCallback } from 'react'
-import { formatDistanceToNow } from 'date-fns/formatDistanceToNow'
-
-import { ICommunitySectionTheme } from '../'
 import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer'
-import Block from '../Block'
-import Section from '../Section'
 import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
-import { pluralizeComments } from '@dvcorg/gatsby-theme-iterative/src/utils/front/i18n'
-import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
 import {
   useIssues,
   useTopics,
   IGithubIssue,
   IDiscussTopic
 } from '@dvcorg/gatsby-theme-iterative/src/utils/front/api'
+import { pluralizeComments } from '@dvcorg/gatsby-theme-iterative/src/utils/front/i18n'
+import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
+import { formatDistanceToNow } from 'date-fns/formatDistanceToNow'
+import { useCallback } from 'react'
 
+import { ICommunitySectionTheme } from '../'
 import { useCommunityData } from '../../../utils/front/community'
+import Block from '../Block'
+import Section from '../Section'
 import * as sharedStyles from '../styles.module.css'
+
 import * as styles from './styles.module.css'
 
 const log = (section: string, eventType: string): void =>
diff --git a/src/components/Community/Section/index.tsx b/src/components/Community/Section/index.tsx
index 7a0c935e2f..b4348aab91 100644
--- a/src/components/Community/Section/index.tsx
+++ b/src/components/Community/Section/index.tsx
@@ -1,6 +1,6 @@
-import React, { useCallback, useEffect, useState } from 'react'
-import cn from 'classnames'
 import { useLocation } from '@reach/router'
+import cn from 'classnames'
+import { useCallback, useEffect, useState } from 'react'
 import { Collapse } from 'react-collapse'
 import { useWindowSize } from 'react-use'
 
diff --git a/src/components/Community/Testimonial/index.tsx b/src/components/Community/Testimonial/index.tsx
index 58ed7f07f2..1bf780bf67 100644
--- a/src/components/Community/Testimonial/index.tsx
+++ b/src/components/Community/Testimonial/index.tsx
@@ -1,10 +1,11 @@
 import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer'
-import Section from '../Section'
 import IframeResizer from '@iframe-resizer/react'
 
-import * as styles from './styles.module.css'
-import * as sharedStyles from '../styles.module.css'
 import { ICommunitySectionTheme } from '..'
+import Section from '../Section'
+import * as sharedStyles from '../styles.module.css'
+
+import * as styles from './styles.module.css'
 
 const description = 'See what people have to say about DVC'
 
diff --git a/src/components/Community/index.tsx b/src/components/Community/index.tsx
index 7570d6c2b8..1b06f90668 100644
--- a/src/components/Community/index.tsx
+++ b/src/components/Community/index.tsx
@@ -1,12 +1,11 @@
 import PageContent from '../PageContent'
-
 import SubscribeSection from '../SubscribeSection'
-import Hero from './Hero'
-import Meet from './Meet'
+
 import Contribute from './Contribute'
-import Learn from './Learn'
 import Events from './Events'
-
+import Hero from './Hero'
+import Learn from './Learn'
+import Meet from './Meet'
 import * as styles from './styles.module.css'
 import Testimonial from './Testimonial'
 
diff --git a/src/components/DownloadButton/index.tsx b/src/components/DownloadButton/index.tsx
index 0d48a486de..44210e80bf 100644
--- a/src/components/DownloadButton/index.tsx
+++ b/src/components/DownloadButton/index.tsx
@@ -1,13 +1,12 @@
-import React, { useRef, useEffect, useCallback, useState } from 'react'
+import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
 import cn from 'classnames'
+import { useRef, useEffect, useCallback, useState } from 'react'
 
+import { OS, useUserOS } from '../../utils/front/useUserOS'
 import TwoRowsButton from '../TwoRowsButton'
-import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
-
-import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
 
 import * as styles from './styles.module.css'
-import { OS, useUserOS } from '../../utils/front/useUserOS'
 
 const VERSION = `3.59.1`
 
diff --git a/src/components/HeroContainer/index.tsx b/src/components/HeroContainer/index.tsx
index 64df4d8a53..afcff30b37 100644
--- a/src/components/HeroContainer/index.tsx
+++ b/src/components/HeroContainer/index.tsx
@@ -1,6 +1,5 @@
-import cn from 'classnames'
-
 import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer'
+import cn from 'classnames'
 
 interface IHeroContainerProps {
   className?: string
diff --git a/src/components/Home/Alert/index.tsx b/src/components/Home/Alert/index.tsx
index 3936a21425..cd3d478c89 100644
--- a/src/components/Home/Alert/index.tsx
+++ b/src/components/Home/Alert/index.tsx
@@ -1,8 +1,9 @@
 import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
-import { cn } from '../../../utils'
-import { githubDatachainUrl } from '../../../utils/externalUrls'
 import { ReactComponent as GithubSVG } from '@dvcorg/gatsby-theme-iterative/src/components/SocialIcon/github.svg'
 
+import { githubDatachainUrl } from '../../../constants/externalUrls'
+import { cn } from '../../../utils'
+
 const banner = {
   title: 'DataChain Open-Source Release',
   subtitle: 'A New Way to Manage your Unstructured Data',
diff --git a/src/components/Home/Diagram/index.tsx b/src/components/Home/Diagram/index.tsx
index beab8add53..237e11667a 100644
--- a/src/components/Home/Diagram/index.tsx
+++ b/src/components/Home/Diagram/index.tsx
@@ -1,10 +1,9 @@
-import React, { forwardRef } from 'react'
-import Slider from 'react-slick'
-import cn from 'classnames'
-
 import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer'
-import ShowOnly from '@dvcorg/gatsby-theme-iterative/src/components/ShowOnly'
 import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import ShowOnly from '@dvcorg/gatsby-theme-iterative/src/components/ShowOnly'
+import cn from 'classnames'
+import { forwardRef } from 'react'
+import Slider from 'react-slick'
 
 import 'slick-carousel/slick/slick.css'
 import 'slick-carousel/slick/slick-theme.css'
diff --git a/src/components/Home/Hero/GetStarted/GetStartedWithDvc.tsx b/src/components/Home/Hero/GetStarted/GetStartedWithDvc.tsx
index a134fafd7a..d71fe08805 100644
--- a/src/components/Home/Hero/GetStarted/GetStartedWithDvc.tsx
+++ b/src/components/Home/Hero/GetStarted/GetStartedWithDvc.tsx
@@ -1,18 +1,15 @@
-import HeroContainer from '../../../HeroContainer'
-import { cn } from '../../../../utils'
-
-import TwoRowsButtonLink from '../../../TwoRowsButton/link'
+import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import ShowOnly from '@dvcorg/gatsby-theme-iterative/src/components/ShowOnly'
+import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
 
+import { cn } from '../../../../utils'
 import DownloadButton from '../../../DownloadButton'
-
+import HeroContainer from '../../../HeroContainer'
+import TwoRowsButtonLink from '../../../TwoRowsButton/link'
 import DvcSlides from '../../LandingHero/DvcSlides'
 
 import * as styles from './styles.module.css'
 
-import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
-import ShowOnly from '@dvcorg/gatsby-theme-iterative/src/components/ShowOnly'
-import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
-
 const logVSCodeEvent = () => {
   logEvent('Button', { Item: 'vscode-dvc-ext' })
 }
diff --git a/src/components/Home/Hero/index.tsx b/src/components/Home/Hero/index.tsx
index 39bcd91df9..1c5049105e 100644
--- a/src/components/Home/Hero/index.tsx
+++ b/src/components/Home/Hero/index.tsx
@@ -1,5 +1,5 @@
-import HeroTitleSection from './HeroTitleSection'
 import GetStartedWithDvc from './GetStarted/GetStartedWithDvc'
+import HeroTitleSection from './HeroTitleSection'
 
 const Hero = () => {
   return (
diff --git a/src/components/Home/LandingHero/DvcSlides.tsx b/src/components/Home/LandingHero/DvcSlides.tsx
index 29426db580..10a8b2f680 100644
--- a/src/components/Home/LandingHero/DvcSlides.tsx
+++ b/src/components/Home/LandingHero/DvcSlides.tsx
@@ -1,6 +1,7 @@
-import Slides, { ISlide } from './Slides'
 import { graphql, useStaticQuery } from 'gatsby'
 
+import Slides, { ISlide } from './Slides'
+
 const DvcSlides = () => {
   const {
     dvcSlide: { slides }
diff --git a/src/components/Home/LandingHero/GithubLine/index.tsx b/src/components/Home/LandingHero/GithubLine/index.tsx
index 61428bdc85..1ae03f6630 100644
--- a/src/components/Home/LandingHero/GithubLine/index.tsx
+++ b/src/components/Home/LandingHero/GithubLine/index.tsx
@@ -1,4 +1,5 @@
 import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+
 import useStars from '../../../../gatsby/hooks/stars'
 
 import * as styles from './styles.module.css'
diff --git a/src/components/Home/LandingHero/Slides/index.tsx b/src/components/Home/LandingHero/Slides/index.tsx
index 89a2985a5b..d529f48397 100644
--- a/src/components/Home/LandingHero/Slides/index.tsx
+++ b/src/components/Home/LandingHero/Slides/index.tsx
@@ -1,5 +1,6 @@
-import { Reducer, useCallback, useMemo, useReducer } from 'react'
 import cn from 'classnames'
+import { Reducer, useCallback, useMemo, useReducer } from 'react'
+
 import { MemoizedTypedTerminal } from '../Typed'
 
 export interface ISlide {
diff --git a/src/components/Home/LandingHero/Typed/index.tsx b/src/components/Home/LandingHero/Typed/index.tsx
index 3a6f0af255..d5e0b688e4 100644
--- a/src/components/Home/LandingHero/Typed/index.tsx
+++ b/src/components/Home/LandingHero/Typed/index.tsx
@@ -1,9 +1,8 @@
-import React, { useEffect, useRef } from 'react'
+import * as codeStyles from '@dvcorg/gatsby-theme-iterative/src/components/Documentation/Markdown/Main/theme.module.css'
 import cn from 'classnames'
+import { useEffect, useRef, memo } from 'react'
 import Typed, { TypedOptions } from 'typed.js'
 
-import * as codeStyles from '@dvcorg/gatsby-theme-iterative/src/components/Documentation/Markdown/Main/theme.module.css'
-
 export const TypedTerminal = ({
   typedOptions,
   className
@@ -27,4 +26,4 @@ export const TypedTerminal = ({
   return <pre className={cn(className, codeStyles.code)} ref={el} />
 }
 
-export const MemoizedTypedTerminal = React.memo(TypedTerminal)
+export const MemoizedTypedTerminal = memo(TypedTerminal)
diff --git a/src/components/Home/LandingHero/index.tsx b/src/components/Home/LandingHero/index.tsx
index b8d7480cf3..7d0b32d0c4 100644
--- a/src/components/Home/LandingHero/index.tsx
+++ b/src/components/Home/LandingHero/index.tsx
@@ -1,15 +1,14 @@
+import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import ShowOnly from '@dvcorg/gatsby-theme-iterative/src/components/ShowOnly'
+import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
 import cn from 'classnames'
 
-import TwoRowsButtonLink from '../../TwoRowsButton/link'
-import GithubLine from './GithubLine'
 import DownloadButton from '../../DownloadButton'
-
-import * as styles from './styles.module.css'
-import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
-import ShowOnly from '@dvcorg/gatsby-theme-iterative/src/components/ShowOnly'
-import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import TwoRowsButtonLink from '../../TwoRowsButton/link'
 
 import DvcSlides from './DvcSlides'
+import GithubLine from './GithubLine'
+import * as styles from './styles.module.css'
 
 const logUseCasesEvent = () => {
   logEvent('Button', { Item: 'how-it-works' })
diff --git a/src/components/Home/LearnMore/index.tsx b/src/components/Home/LearnMore/index.tsx
index 06cee5cabc..ea0fcbbc9f 100644
--- a/src/components/Home/LearnMore/index.tsx
+++ b/src/components/Home/LearnMore/index.tsx
@@ -1,7 +1,8 @@
-import * as styles from './styles.module.css'
 import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
 import cn from 'classnames'
 
+import * as styles from './styles.module.css'
+
 const logLearnMoreEvent = () => {
   logEvent('Hero', { Item: 'learn-more' })
 }
diff --git a/src/components/Home/LogosSlider/index.tsx b/src/components/Home/LogosSlider/index.tsx
index 12b0eb7f27..75bf0e5fa2 100644
--- a/src/components/Home/LogosSlider/index.tsx
+++ b/src/components/Home/LogosSlider/index.tsx
@@ -1,7 +1,7 @@
 import cn from 'classnames'
 
-import * as styles from './styles.module.css'
 import CompanyLogos from './CompanyLogos'
+import * as styles from './styles.module.css'
 
 const CompanySlider = () => {
   return (
diff --git a/src/components/Home/UseCases/CollapsibleText/index.tsx b/src/components/Home/UseCases/CollapsibleText/index.tsx
index 3285f0789a..c4d72d8a3e 100644
--- a/src/components/Home/UseCases/CollapsibleText/index.tsx
+++ b/src/components/Home/UseCases/CollapsibleText/index.tsx
@@ -1,7 +1,6 @@
-import React, { useState } from 'react'
-import { Collapse } from 'react-collapse'
-
 import { isTriggeredFromKB } from '@dvcorg/gatsby-theme-iterative/src/utils/front/keyboard'
+import { useState } from 'react'
+import { Collapse } from 'react-collapse'
 
 import * as styles from './styles.module.css'
 
diff --git a/src/components/Home/UseCases/Video/index.tsx b/src/components/Home/UseCases/Video/index.tsx
index dd29b808c0..8f4b524778 100644
--- a/src/components/Home/UseCases/Video/index.tsx
+++ b/src/components/Home/UseCases/Video/index.tsx
@@ -1,10 +1,9 @@
-import React, { useState, useCallback, useEffect } from 'react'
-
-import TwoRowsButton from '../../../TwoRowsButton'
+import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
 import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
 import safeLocalStorage from '@dvcorg/gatsby-theme-iterative/src/utils/front/safeLocalStorage'
+import { useState, useCallback, useEffect } from 'react'
 
-import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import TwoRowsButton from '../../../TwoRowsButton'
 
 import * as styles from './styles.module.css'
 
diff --git a/src/components/Home/UseCases/index.tsx b/src/components/Home/UseCases/index.tsx
index 7fcb0e13a4..6087e7c39a 100644
--- a/src/components/Home/UseCases/index.tsx
+++ b/src/components/Home/UseCases/index.tsx
@@ -1,11 +1,10 @@
-import React, { forwardRef } from 'react'
-import YoutubeVideo from './Video'
-import CollapsibleText from './CollapsibleText'
-
 import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer'
 import ShowOnly from '@dvcorg/gatsby-theme-iterative/src/components/ShowOnly'
+import { forwardRef } from 'react'
 
+import CollapsibleText from './CollapsibleText'
 import * as styles from './styles.module.css'
+import YoutubeVideo from './Video'
 
 const Heading1: React.FC = () => (
   <div className={styles.caseHeader}>
diff --git a/src/components/Home/WhatsNewModal/index.tsx b/src/components/Home/WhatsNewModal/index.tsx
index 0b52771c6d..26557f43a6 100644
--- a/src/components/Home/WhatsNewModal/index.tsx
+++ b/src/components/Home/WhatsNewModal/index.tsx
@@ -1,12 +1,11 @@
 /* eslint-disable jsx-a11y/no-noninteractive-tabindex */
-import React, { useEffect, useState } from 'react'
+import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
+import cn from 'classnames'
 import { graphql, useStaticQuery } from 'gatsby'
 import { GatsbyImage } from 'gatsby-plugin-image'
-import cn from 'classnames'
+import { useEffect, useState } from 'react'
 import FocusLock from 'react-focus-lock'
 
-import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
-
 import { ReactComponent as CloseSvg } from '../../../../static/img/close-icon.svg'
 
 import * as styles from './styles.module.css'
diff --git a/src/components/Home/index.tsx b/src/components/Home/index.tsx
index 862ba7459c..d1bef007c2 100644
--- a/src/components/Home/index.tsx
+++ b/src/components/Home/index.tsx
@@ -1,7 +1,7 @@
 import SubscribeSection from '../SubscribeSection'
 
-import CompanySlider from './LogosSlider'
 import Hero from './Hero'
+import CompanySlider from './LogosSlider'
 
 const Home: React.FC = () => {
   return (
diff --git a/src/components/Link/index.tsx b/src/components/Link/index.tsx
new file mode 100644
index 0000000000..0e738a8ae3
--- /dev/null
+++ b/src/components/Link/index.tsx
@@ -0,0 +1,96 @@
+import { Link as GatsbyLink, GatsbyLinkProps } from 'gatsby'
+import { ExternalLinkIcon } from 'lucide-react'
+import { ReactNode } from 'react'
+
+import { cn } from '@/utils'
+import { isExternalLink } from '@/utils/urls'
+
+interface LinkProps extends Omit<GatsbyLinkProps<unknown>, 'to' | 'ref'> {
+  href: string
+  disabled?: boolean
+  isExternal?: boolean
+  showExternalIcon?: boolean
+  smallExternalIcon?: boolean
+  rel?: string
+  target?: string
+  children: ReactNode
+  className?: string
+}
+
+const defaultLinkClassNames = `transition-colors cursor-pointer`
+
+function Link({
+  href,
+  disabled,
+  isExternal = isExternalLink(href),
+  rel = isExternal ? `noopener noreferrer` : undefined,
+  target = isExternal ? `_blank` : undefined,
+  children,
+  className,
+  showExternalIcon = true,
+  smallExternalIcon = false,
+  ...props
+}: LinkProps) {
+  if (disabled) {
+    return (
+      <span
+        className={cn(
+          `cursor-default text-zinc-400 hover:cursor-default hover:text-inherit focus:text-inherit`,
+          className
+        )}
+      >
+        {children}
+      </span>
+    )
+  }
+  if (isExternal) {
+    return (
+      <a
+        className={cn(
+          defaultLinkClassNames,
+          `inline-flex items-center gap-1`,
+          className
+        )}
+        rel={rel}
+        target={target}
+        href={href}
+        {...props}
+      >
+        {children}
+        {showExternalIcon && (
+          <ExternalLinkIcon
+            className={cn(
+              `inline-block h-3 w-3 md:h-4 md:w-4`,
+              smallExternalIcon && `h-2 w-2 md:h-3 md:w-3`
+            )}
+          />
+        )}
+      </a>
+    )
+  }
+  return (
+    <GatsbyLink
+      className={cn(defaultLinkClassNames, className)}
+      target={target}
+      to={href}
+      {...props}
+    >
+      {children}
+    </GatsbyLink>
+  )
+}
+
+export default Link
+
+export const PageLink = ({ className, ...props }: LinkProps) => {
+  return (
+    <Link
+      className={cn(
+        `text-cyan-400 underline-offset-2 hover:underline`,
+        className
+      )}
+      smallExternalIcon={true}
+      {...props}
+    />
+  )
+}
diff --git a/src/components/MainLayout/index.tsx b/src/components/MainLayout/index.tsx
index ddd2c153d3..df0db751e0 100644
--- a/src/components/MainLayout/index.tsx
+++ b/src/components/MainLayout/index.tsx
@@ -1,8 +1,8 @@
-import { PageProps } from 'gatsby'
 import ThemeMainLayout, {
   LayoutModifiers,
   ILayoutComponentProps
 } from '@dvcorg/gatsby-theme-iterative/src/components/MainLayout'
+import { PageProps } from 'gatsby'
 
 interface IMainLayoutProps extends ILayoutComponentProps {
   location: PageProps['location']
diff --git a/src/components/PageMainSection.tsx b/src/components/PageMainSection.tsx
new file mode 100644
index 0000000000..33a9274f24
--- /dev/null
+++ b/src/components/PageMainSection.tsx
@@ -0,0 +1,27 @@
+import { HTMLAttributes, ReactNode } from 'react'
+
+import Spacer from './Spacer'
+
+interface PageMainSectionProps extends HTMLAttributes<HTMLElement> {
+  children: ReactNode
+  className?: string
+  spacer?: boolean
+}
+
+const PageMainSection = ({
+  className,
+  children,
+  spacer = true,
+  ...props
+}: PageMainSectionProps) => {
+  return (
+    <>
+      <main className={className} {...props}>
+        {children}
+      </main>
+      {spacer && <Spacer />}
+    </>
+  )
+}
+
+export default PageMainSection
diff --git a/src/components/Paginator/index.tsx b/src/components/Paginator/index.tsx
index 3e8a41b311..ab853b0810 100644
--- a/src/components/Paginator/index.tsx
+++ b/src/components/Paginator/index.tsx
@@ -1,16 +1,13 @@
-import React, { useLayoutEffect, useState } from 'react'
-import Helmet from 'react-helmet'
-
 import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
 import cn from 'classnames'
+import { useLayoutEffect, useState } from 'react'
+import { Helmet } from 'react-helmet'
 
+import { ReactComponent as ArrowSVG } from './arrow.svg'
 import {
   IPaginatorLocationContextValue,
   usePaginatorContext
 } from './LocationContext'
-
-import { ReactComponent as ArrowSVG } from './arrow.svg'
-
 import * as styles from './styles.module.css'
 
 export interface IPaginatorPageInfo {
diff --git a/src/components/PromoSection/index.tsx b/src/components/PromoSection/index.tsx
index 96f4dcbcfb..369af9328b 100644
--- a/src/components/PromoSection/index.tsx
+++ b/src/components/PromoSection/index.tsx
@@ -1,7 +1,6 @@
-import React, { cloneElement } from 'react'
-import cn from 'classnames'
-
 import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer'
+import cn from 'classnames'
+import { cloneElement } from 'react'
 
 import * as styles from './styles.module.css'
 
diff --git a/src/components/SEO/index.tsx b/src/components/SEO/index.tsx
new file mode 100644
index 0000000000..dcb64c8161
--- /dev/null
+++ b/src/components/SEO/index.tsx
@@ -0,0 +1,193 @@
+import { graphql, useStaticQuery } from 'gatsby'
+import { IGatsbyImageData, getSrc } from 'gatsby-plugin-image'
+import { useMemo } from 'react'
+
+import { IPaginatorPageInfo } from '@/components/Blog/Paginator'
+
+const MetaTitle = ({ title }: { title: string }) => {
+  return (
+    <>
+      <meta property="og:title" content={title} />
+      <meta name="twitter:title" content={title} />
+    </>
+  )
+}
+
+const MetaDescription = ({ description }: { description: string }) => {
+  return (
+    <>
+      <meta name="description" content={description} />
+      <meta property="og:description" content={description} />
+      <meta name="twitter:description" content={description} />
+    </>
+  )
+}
+
+const MetaImage = ({
+  siteUrl,
+  image,
+  imageAlt,
+  imageHeight,
+  imageWidth
+}: {
+  siteUrl: string
+  image: IGatsbyImageData | string
+  imageAlt?: string
+  imageHeight?: number
+  imageWidth?: number
+}) => {
+  if (!image) return null
+  const isStr = typeof image === `string`
+  const imageUrl = isStr
+    ? image.startsWith(`http`)
+      ? image
+      : siteUrl + image
+    : siteUrl + getSrc(image)
+
+  return (
+    <>
+      <meta property="og:image" content={imageUrl} />
+      {imageAlt && <meta name="og:image:alt" content={imageAlt} />}
+      <meta property="og:image:width" content={String(imageWidth)} />
+      <meta property="og:image:height" content={String(imageHeight)} />
+      <meta name="twitter:image" content={imageUrl} />
+      <meta name="twitter:image:alt" content={imageAlt} />
+    </>
+  )
+}
+
+const DefaultMeta = () => {
+  return (
+    <>
+      <meta property="og:locale" content="en_US" />
+    </>
+  )
+}
+
+interface BuildMeta {
+  name?: string
+  content?: string
+  property?: string
+}
+
+const buildMetadata = (metas: BuildMeta[]) => {
+  return metas.map((meta, i) => <meta key={i} {...meta} />)
+}
+
+interface LinkProps {
+  rel: string
+  href: string
+}
+
+const buildLink = (links: LinkProps[]) => {
+  return links.map((link, i) => <link key={i} {...link} />)
+}
+
+const getFullUrl = (siteUrl: string, pathname?: string) => {
+  if (pathname && pathname !== `/`) {
+    return siteUrl + pathname
+  }
+  return siteUrl
+}
+
+const SEO = ({
+  title,
+  description,
+  canonicalUrl,
+  keywords,
+  image = `/datachain-social-share.png`,
+  imageAlt,
+  imageHeight = 630,
+  imageWidth = 1200,
+  pathname,
+  pageInfo,
+  meta = [
+    {
+      name: `twitter:card`,
+      content: `summary`
+    },
+    {
+      property: `og:type`,
+      content: `website`
+    }
+  ],
+  link = []
+}: {
+  title?: string
+  canonicalUrl?: string
+  description?: string
+  keywords?: string[]
+  imageAlt?: string
+  image?: IGatsbyImageData | string
+  imageHeight?: number
+  imageWidth?: number
+  pathname: string
+  pageInfo?: IPaginatorPageInfo
+  meta?: BuildMeta[]
+  link?: LinkProps[]
+}) => {
+  const siteMeta = useStaticQuery(graphql`
+    query {
+      site {
+        siteMetadata {
+          title
+          description
+          keywords
+          siteUrl
+          siteName
+          twitterUsername
+        }
+      }
+    }
+  `).site.siteMetadata
+
+  const fullUrl = getFullUrl(siteMeta.siteUrl, pathname)
+
+  const siteTitle = useMemo(() => {
+    return pageInfo && pageInfo.currentPage > 1
+      ? `${title || siteMeta.title} page ${pageInfo.currentPage}`
+      : title || siteMeta.title
+  }, [pageInfo, title, siteMeta.title])
+
+  const siteDescription = description || siteMeta.description
+  const siteKeywords = (keywords || siteMeta.keywords || []).join(`, `)
+
+  return (
+    <>
+      <html lang="en" />
+      <title>{siteTitle}</title>
+      <DefaultMeta />
+      {buildMetadata(meta)}
+      {buildLink([
+        ...(canonicalUrl
+          ? [{ rel: `canonical`, href: canonicalUrl }]
+          : pathname
+            ? [{ rel: `canonical`, href: fullUrl }]
+            : []),
+        ...link
+      ])}
+      <MetaTitle title={siteTitle} />
+      <MetaDescription description={siteDescription} />
+      <MetaImage
+        siteUrl={siteMeta.siteUrl}
+        image={image}
+        imageAlt={imageAlt}
+        imageHeight={imageHeight}
+        imageWidth={imageWidth}
+      />
+      <meta name="keywords" content={siteKeywords} />
+      {pathname && <meta property="og:url" content={fullUrl} />}
+      {siteMeta.twitterUsername && (
+        <>
+          <meta name="twitter:site" content={siteMeta.twitterUsername} />
+          <meta name="twitter:creator" content={siteMeta.twitterUsername} />
+        </>
+      )}
+      {siteMeta.siteName && (
+        <meta property="og:site_name" content={siteMeta.siteName} />
+      )}
+    </>
+  )
+}
+
+export default SEO
diff --git a/src/components/Spacer.tsx b/src/components/Spacer.tsx
new file mode 100644
index 0000000000..d7605afba0
--- /dev/null
+++ b/src/components/Spacer.tsx
@@ -0,0 +1,7 @@
+import { cn } from '@/utils'
+
+const Spacer = ({ className }: { className?: string }) => {
+  return <div className={cn(`h-px w-full py-5 md:py-10`, className)}></div>
+}
+
+export default Spacer
diff --git a/src/components/SubscribeSection/Form/index.tsx b/src/components/SubscribeSection/Form/index.tsx
index 8fcb1a2c58..d50f16f3c0 100644
--- a/src/components/SubscribeSection/Form/index.tsx
+++ b/src/components/SubscribeSection/Form/index.tsx
@@ -1,7 +1,6 @@
-import React, { useCallback, useRef } from 'react'
-import { nanoid } from 'nanoid'
-
 import { logEvent } from '@dvcorg/gatsby-theme-iterative/src/utils/front/plausible'
+import { nanoid } from 'nanoid'
+import { useCallback, useRef } from 'react'
 
 import * as styles from './styles.module.css'
 
diff --git a/src/components/SubscribeSection/index.tsx b/src/components/SubscribeSection/index.tsx
index ca368191f7..ccc8b8a06c 100644
--- a/src/components/SubscribeSection/index.tsx
+++ b/src/components/SubscribeSection/index.tsx
@@ -3,7 +3,6 @@ import cn from 'classnames'
 import SubscribeForm from './Form'
 import { default as Glyph1Src } from './glyph-1.svg'
 import { default as Glyph2Src } from './glyph-2.svg'
-
 import * as styles from './styles.module.css'
 
 const SubscribeSection: React.FC = () => (
diff --git a/src/components/Support/Popover/index.tsx b/src/components/Support/Popover/index.tsx
index 6cdc0dffa9..20ceffe152 100644
--- a/src/components/Support/Popover/index.tsx
+++ b/src/components/Support/Popover/index.tsx
@@ -1,7 +1,6 @@
-import React, { useState, PropsWithChildren } from 'react'
-import ReactPopover from 'react-popover'
-
 import { isTriggeredFromKB } from '@dvcorg/gatsby-theme-iterative/src/utils/front/keyboard'
+import { useState, PropsWithChildren } from 'react'
+import ReactPopover from 'react-popover'
 
 import './styles.module.css'
 
diff --git a/src/components/Support/RequestAQuoteDialog.tsx b/src/components/Support/RequestAQuoteDialog.tsx
deleted file mode 100644
index 0f82ebbcd0..0000000000
--- a/src/components/Support/RequestAQuoteDialog.tsx
+++ /dev/null
@@ -1,43 +0,0 @@
-import { Dispatch, useState } from 'react'
-import {
-  Dialog,
-  DialogContent,
-  DialogDescription,
-  DialogHeader,
-  DialogTitle
-} from '../base/dialog'
-import RequestAQuoteForm from './RequestAQuoteForm'
-
-const noop = () => {}
-
-const RequestAQuoteDialog = ({
-  openDialog,
-  setOpenDialog
-}: {
-  openDialog: boolean
-  setOpenDialog: Dispatch<React.SetStateAction<boolean>>
-}) => {
-  const [pauseDialog, setPauseDialog] = useState(false)
-  const title = 'Request a Quote'
-  const description =
-    "Fill out the form below and we'll reach out to find a time that works for you!"
-
-  return (
-    <Dialog open={openDialog} onOpenChange={pauseDialog ? noop : setOpenDialog}>
-      <DialogContent>
-        <DialogHeader>
-          <DialogTitle>{title}</DialogTitle>
-          <DialogDescription>{description}</DialogDescription>
-        </DialogHeader>
-        <div className="px-1 py-4">
-          <RequestAQuoteForm
-            setOpenDialog={setOpenDialog}
-            setPauseDialog={setPauseDialog}
-          />
-        </div>
-      </DialogContent>
-    </Dialog>
-  )
-}
-
-export default RequestAQuoteDialog
diff --git a/src/components/Support/RequestAQuoteForm.tsx b/src/components/Support/RequestAQuoteForm.tsx
deleted file mode 100644
index 14e557c4f5..0000000000
--- a/src/components/Support/RequestAQuoteForm.tsx
+++ /dev/null
@@ -1,252 +0,0 @@
-import React, { useEffect, useRef } from 'react'
-import { Label } from '../base/label'
-import { Input } from '../base/input'
-import { Select } from '../base/select'
-import { Textarea } from '../base/textarea'
-import Button from '../base/button'
-import { cn } from '../../utils'
-import { Loader2Icon } from 'lucide-react'
-
-import {
-  FormField,
-  FromErrors,
-  checkErrors,
-  defaultValues,
-  emptyValues,
-  teamSize
-} from './utils'
-import { studioContactUsApi } from '../../utils/externalUrls'
-
-const ErrorMessage = ({
-  name,
-  errors
-}: {
-  name: FormField
-  errors: FromErrors
-}) => {
-  if (!errors[name]) return null
-  return (
-    <p className="text-red-500 text-sm" id={`${name}-error`}>
-      {errors[name]}
-    </p>
-  )
-}
-
-const RequiredField = () => (
-  <span className="text-red-500" aria-hidden="true">
-    *
-  </span>
-)
-
-const RequestAQuoteForm = ({
-  setOpenDialog,
-  setPauseDialog
-}: {
-  setOpenDialog: React.Dispatch<React.SetStateAction<boolean>>
-  setPauseDialog: React.Dispatch<React.SetStateAction<boolean>>
-}) => {
-  const [values, setValues] = React.useState(defaultValues)
-
-  const abortControllerRef = useRef<AbortController | null>(null)
-  const [submitting, setSubmitting] = React.useState(false)
-
-  const [errors, setErrors] = React.useState<FromErrors>({
-    name: '',
-    company: '',
-    email: '',
-    teamSize: '',
-    message: '',
-    purpose: '',
-    areaOfInterest: '',
-    phone: ''
-  })
-
-  const [error, setError] = React.useState<string>('')
-  const [success, setSuccess] = React.useState(false)
-
-  useEffect(() => {
-    return () => {
-      // Cleanup function to abort any pending requests
-      if (abortControllerRef.current) {
-        abortControllerRef.current.abort()
-      }
-    }
-  }, [])
-
-  useEffect(() => {
-    if (submitting) {
-      setPauseDialog(true)
-    } else {
-      setPauseDialog(false)
-    }
-  }, [setPauseDialog, submitting])
-
-  const onChange = (
-    e: React.ChangeEvent<
-      HTMLInputElement | HTMLSelectElement | HTMLTextAreaElement
-    >
-  ) => {
-    setValues(prev => ({ ...prev, [e.target.name]: e.target.value }))
-  }
-
-  const onSubmit = async (e: React.FormEvent<HTMLFormElement>) => {
-    e.preventDefault()
-
-    // Abort any pending requests
-    if (abortControllerRef.current) {
-      abortControllerRef.current.abort()
-    }
-
-    // Create new abort controller
-    const newAbortController = new AbortController()
-
-    abortControllerRef.current = newAbortController
-
-    setError('')
-    setSubmitting(true)
-
-    const { hasError, newErrors } = checkErrors(values, errors)
-
-    setErrors(newErrors)
-
-    if (hasError) {
-      setSubmitting(false)
-      return
-    }
-
-    try {
-      const response = await fetch(studioContactUsApi, {
-        method: `POST`,
-        headers: {
-          'Content-Type': `application/json`
-        },
-        credentials: `include`,
-        mode: `cors`,
-        body: JSON.stringify({
-          area_of_interest: values.areaOfInterest,
-          company: values.company,
-          email: values.email,
-          info: values.message,
-          name: values.name,
-          phone: values.phone,
-          purpose: values.purpose,
-          site: window.location.href,
-          team_size: values.teamSize
-        }),
-        signal: newAbortController.signal
-      })
-      if (!response.ok) throw new Error(response.statusText)
-      const data = await response.json()
-      if (data.error) throw new Error(data.error)
-
-      setSubmitting(false)
-      setValues(defaultValues)
-      setErrors(emptyValues)
-      setSuccess(true)
-    } catch (error: unknown) {
-      if ((error as Error)?.name !== 'AbortError') {
-        setError('Something went wrong. Please try again later.')
-        setSubmitting(false)
-        return
-      }
-    }
-  }
-
-  if (success) {
-    return (
-      <div className="flex flex-col gap-4">
-        <div className="text-center">
-          <p className="text-green-500">Form submitted successfully!</p>
-          <p className="text-green-500">
-            Thank you for your interest. We will get back to you shortly.
-          </p>
-        </div>
-
-        <Button
-          className={cn('w-full', 'bg-blue hover:bg-blue-hover', 'text-base')}
-          onClick={() => setOpenDialog(false)}
-        >
-          Close
-        </Button>
-      </div>
-    )
-  }
-
-  return (
-    <form onSubmit={onSubmit} method="post" className="flex flex-col gap-4">
-      <div>
-        <Label htmlFor="name">
-          Full Name <RequiredField />
-        </Label>
-        <Input name="name" id="name" value={values.name} onChange={onChange} />
-        <ErrorMessage name="name" errors={errors} />
-      </div>
-      <div>
-        <Label htmlFor="company">
-          Company <RequiredField />
-        </Label>
-        <Input
-          name="company"
-          id="company"
-          value={values.company}
-          onChange={onChange}
-        />
-        <ErrorMessage name="company" errors={errors} />
-      </div>
-      <div>
-        <Label htmlFor="email">
-          Business Email <RequiredField />
-        </Label>
-        <Input
-          name="email"
-          id="email"
-          value={values.email}
-          onChange={onChange}
-        />
-        <ErrorMessage name="email" errors={errors} />
-      </div>
-      <div>
-        <Label htmlFor="teamSize">Team Size</Label>
-        <Select
-          name="teamSize"
-          id="teamSize"
-          value={values.teamSize}
-          onChange={onChange}
-        >
-          {teamSize.map((size, index) => (
-            <option key={index} value={size}>
-              {size}
-            </option>
-          ))}
-        </Select>
-        <ErrorMessage name="teamSize" errors={errors} />
-      </div>
-      <div>
-        <Label htmlFor="message">How we can help?</Label>
-        <Textarea
-          name="message"
-          id="message"
-          value={values.message}
-          onChange={onChange}
-        />
-        <ErrorMessage name="message" errors={errors} />
-      </div>
-
-      <div>
-        <Button
-          className={cn('w-full', 'bg-blue hover:bg-blue-hover', 'text-base')}
-          type="submit"
-          disabled={submitting}
-        >
-          Submit
-          {submitting && <Loader2Icon className="ml-2 h-4 w-4 animate-spin" />}
-        </Button>
-        {error && (
-          <p className="mt-2 text-center text-red-500 text-sm">{error}</p>
-        )}
-      </div>
-    </form>
-  )
-}
-
-export default RequestAQuoteForm
diff --git a/src/components/Support/index.tsx b/src/components/Support/index.tsx
index 44f388b698..05eeda1d19 100644
--- a/src/components/Support/index.tsx
+++ b/src/components/Support/index.tsx
@@ -1,14 +1,10 @@
-import React, { useState } from 'react'
-
-import PageContent from '../PageContent'
 import LayoutWidthContainer from '@dvcorg/gatsby-theme-iterative/src/components/LayoutWidthContainer'
-
-import PromoSection from '../PromoSection'
 import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
-
 import { BrainIcon, NotebookPenIcon, RouteIcon, TargetIcon } from 'lucide-react'
+
 import { cn } from '../../utils'
-import RequestAQuoteDialog from './RequestAQuoteDialog'
+import PageContent from '../PageContent'
+import PromoSection from '../PromoSection'
 
 const services = [
   {
@@ -56,7 +52,6 @@ const getAccentColor = (index: number) => {
 }
 
 const SupportPage: React.FC = () => {
-  const [openDialog, setOpenDialog] = useState(false)
   return (
     <>
       <PageContent>
@@ -105,10 +100,6 @@ const SupportPage: React.FC = () => {
           </Link>
         ]}
       />
-      <RequestAQuoteDialog
-        openDialog={openDialog}
-        setOpenDialog={setOpenDialog}
-      />
     </>
   )
 }
diff --git a/src/components/TwoRowsButton/link.tsx b/src/components/TwoRowsButton/link.tsx
index 0a511ee7f9..91453dcb20 100644
--- a/src/components/TwoRowsButton/link.tsx
+++ b/src/components/TwoRowsButton/link.tsx
@@ -1,7 +1,7 @@
+import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
 import cn from 'classnames'
 
 import * as styles from './styles.module.css'
-import Link from '@dvcorg/gatsby-theme-iterative/src/components/Link'
 
 type ITwoRowsButtonLinkProps = {
   mode: 'azure' | 'purple' | 'outline' | 'vscode'
diff --git a/src/components/Typography/Typography.stories.tsx b/src/components/Typography/Typography.stories.tsx
new file mode 100644
index 0000000000..7305db83e6
--- /dev/null
+++ b/src/components/Typography/Typography.stories.tsx
@@ -0,0 +1,79 @@
+import Typography from '.'
+
+const Examples = () => {
+  return (
+    <>
+      <Typography variant="h1">h1. Heading</Typography>
+      <Typography variant="h2">h2. Heading</Typography>
+      <Typography variant="h3">h3. Heading</Typography>
+      <Typography variant="h4">h4. Heading</Typography>
+
+      <Typography variant="subtitle" size="large">
+        Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla vitae
+        elit libero, a pharetra augue. Donec id elit non mi porta gravida at
+        eget metus.
+      </Typography>
+
+      <Typography variant="subtitle" size="medium">
+        Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla vitae
+        elit libero, a pharetra augue. Donec id elit non mi porta gravida at
+        eget metus.
+      </Typography>
+
+      <Typography variant="subtitle" size="small">
+        Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla vitae
+        elit libero, a pharetra augue. Donec id elit non mi porta gravida at
+        eget metus.
+      </Typography>
+
+      <Typography variant="body" size="large">
+        Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla vitae
+        elit libero, a pharetra augue. Donec id elit non mi porta gravida at
+        eget metus.
+      </Typography>
+
+      <Typography variant="body" size="medium">
+        Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla vitae
+        elit libero, a pharetra augue. Donec id elit non mi porta gravida at
+        eget metus.
+      </Typography>
+
+      <Typography variant="body" size="small">
+        Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla vitae
+        elit libero, a pharetra augue. Donec id elit non mi porta gravida at
+        eget metus.
+      </Typography>
+
+      <Typography variant="p">
+        Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla vitae
+        elit libero, a pharetra augue. Donec id elit non mi porta gravida at
+        eget metus.
+      </Typography>
+
+      <Typography variant="h2">
+        Build your model registry on top of Git
+      </Typography>
+      <Typography
+        variant="subtitle"
+        size="large"
+        className="max-w-3xl text-center"
+      >
+        Reuse existing Git infrastructure for managing ML models together with
+        code, data and metrics. With git as your single source of truth, use
+        GitOps for model deployment.
+      </Typography>
+      <Typography variant="body">
+        Manage the lifecycle of each model as it moves through staging,
+        production and other stages. See at a glance which model versions are in
+        which stage.{` `}
+        <Typography variant="span">
+          Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla vitae
+          elit libero, a pharetra augue. Donec id elit non mi porta gravida at
+          eget metus.
+        </Typography>
+      </Typography>
+    </>
+  )
+}
+
+export default Examples
diff --git a/src/components/Typography/index.tsx b/src/components/Typography/index.tsx
new file mode 100644
index 0000000000..91d4a034db
--- /dev/null
+++ b/src/components/Typography/index.tsx
@@ -0,0 +1,85 @@
+import { cva, type VariantProps } from 'class-variance-authority'
+import * as React from 'react'
+
+import { cn } from '@/utils'
+
+type Components = 'h1' | 'h2' | 'h3' | 'h4' | 'p' | 'span' | 'div'
+
+const typographyVariants = cva(`font-sans font-normal text-zinc-900`, {
+  variants: {
+    variant: {
+      h1: `mb-4 mt-0 text-[clamp(2.125rem,5vw,3.75rem)] font-bold leading-tight md:mb-6 lg:mb-8`,
+      h2: `mb-4 mt-0 text-[clamp(1.5rem,4vw,3rem)] font-bold leading-tight tracking-tight md:mb-6 lg:mb-8`,
+      h3: `mb-4 mt-0 text-[clamp(1.25rem,3vw,2rem)] font-semibold leading-tight tracking-tight md:mb-6 lg:mb-8`,
+      h4: `mb-4 mt-0 text-[clamp(1.125rem,2.5vw,1.5rem)] font-semibold leading-tight tracking-tight md:mb-6 lg:mb-8`,
+      subtitle: `mb-4 mt-0 leading-normal tracking-normal text-gray-300 md:mb-6 md:leading-relaxed lg:mb-8`,
+      body: `mb-4 mt-0 leading-normal tracking-normal md:mb-6 lg:mb-8 lg:leading-relaxed lg:tracking-wide`,
+      p: `mb-4 mt-0 text-base leading-6 tracking-tight md:mb-6 lg:mb-8`,
+      span: `text-base leading-6 tracking-tight`
+    },
+    theme: {
+      gray: `text-gray-400`,
+      lightGray: `text-gray-300`,
+      lighterGray: `text-gray-200`,
+      light: `text-gray-100`,
+      white: `text-white`,
+      black: `text-black`
+    },
+    margin: {
+      none: `mb-0 md:mb-0 lg:mb-0`
+    },
+    size: {
+      small: `text-sm md:text-base lg:text-lg`,
+      medium: `text-base md:text-lg lg:text-xl`,
+      large: `text-lg md:text-xl lg:text-2xl`
+    }
+  },
+  compoundVariants: [
+    {
+      variant: `h3`,
+      margin: `none`,
+      className: `pb-2`
+    }
+  ],
+  defaultVariants: {
+    variant: `span`
+  }
+})
+
+export interface TypographyProps
+  extends React.HTMLAttributes<HTMLHeadingElement>,
+    VariantProps<typeof typographyVariants> {
+  as?: Components
+}
+
+const Typography = React.forwardRef<HTMLHeadingElement, TypographyProps>(
+  ({ as, variant, theme, size, margin, className, ...props }, ref) => {
+    type variantType = Exclude<typeof variant, undefined | null>
+    const variantMap: {
+      [key in variantType]: Components
+    } = {
+      h1: `h1`,
+      h2: `h2`,
+      h3: `h3`,
+      h4: `h4`,
+      subtitle: `p`,
+      body: `p`,
+      p: `p`,
+      span: `span`
+    }
+    const Component = as || (variant && variantMap[variant]) || `p`
+    return (
+      <Component
+        ref={ref}
+        className={cn(
+          typographyVariants({ variant, theme, size, margin, className })
+        )}
+        {...props}
+      />
+    )
+  }
+)
+
+Typography.displayName = `Typography`
+
+export default Typography
diff --git a/src/components/base/button.tsx b/src/components/base/button.tsx
index 06e9912091..2f7e0d04c1 100644
--- a/src/components/base/button.tsx
+++ b/src/components/base/button.tsx
@@ -1,30 +1,62 @@
-import { ButtonHTMLAttributes, forwardRef } from 'react'
-import { cn } from '../../utils'
+import { Slot } from '@radix-ui/react-slot'
+import { cva, type VariantProps } from 'class-variance-authority'
+import * as React from 'react'
 
-export type IButtonProps = ButtonHTMLAttributes<HTMLButtonElement>
+import { cn } from '@/utils'
 
-const Button = forwardRef<HTMLButtonElement, IButtonProps>(
-  ({ className, ...props }, ref) => {
+const buttonVariants = cva(
+  `inline-flex items-center justify-center font-semibold ring-offset-white transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-slate-950 focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 dark:ring-offset-slate-950 dark:focus-visible:ring-slate-300`,
+  {
+    variants: {
+      variant: {
+        default: `bg-slate-50 text-slate-900 hover:bg-slate-50/90`,
+        destructive: `bg-red-500 text-slate-50 hover:bg-red-500/90 dark:bg-red-900 dark:text-slate-50 dark:hover:bg-red-900/90`,
+        outline: `border border-solid border-purple bg-transparent text-slate-700 outline-none hover:bg-purple hover:text-white`,
+        ghost: `hover:bg-slate-100 hover:text-slate-900 dark:hover:bg-slate-800 dark:hover:text-slate-50`,
+        link: `text-slate-900 underline-offset-4 hover:underline`,
+        primary: `bg-purple-700 text-slate-100 hover:bg-purple-600 hover:text-white`,
+        secondary: `bg-white/10 text-purple hover:bg-purple-900 hover:text-slate-200`
+      },
+      size: {
+        default: `h-10 px-4 py-2`,
+        sm: `h-9 px-3`,
+        lg: `h-11 px-8`,
+        xl: `h-14 px-8`,
+        icon: `h-10 w-10`
+      },
+      rounded: {
+        default: `rounded`,
+        sm: `rounded-sm`,
+        lg: `rounded-lg`,
+        full: `rounded-full`
+      }
+    },
+    defaultVariants: {
+      variant: `default`,
+      size: `default`,
+      rounded: `default`
+    }
+  }
+)
+
+export interface IButtonProps
+  extends React.ButtonHTMLAttributes<HTMLButtonElement>,
+    VariantProps<typeof buttonVariants> {
+  asChild?: boolean
+}
+
+const Button = React.forwardRef<HTMLButtonElement, IButtonProps>(
+  ({ className, variant, size, rounded, asChild = false, ...props }, ref) => {
+    const Comp = asChild ? Slot : `button`
     return (
-      <button
-        className={cn(
-          `inline-flex items-center justify-center whitespace-nowrap`,
-          'h-9 py-2 px-4',
-          `text-sm font-semibold`,
-          `text-gray-100 bg-gray-800 hover:text-white hover:bg-gray-950`,
-          `rounded shadow-sm`,
-          `transition-colors`,
-          `focus:outline-none focus:ring-1`,
-          `disabled:cursor-not-allowed disabled:opacity-50`,
-          className
-        )}
+      <Comp
+        className={cn(buttonVariants({ variant, size, rounded, className }))}
         ref={ref}
         {...props}
       />
     )
   }
 )
+Button.displayName = `Button`
 
-Button.displayName = 'Button'
-
-export default Button
+export { Button, buttonVariants }
diff --git a/src/components/base/dialog.tsx b/src/components/base/dialog.tsx
index 6a62445d35..84ed0cdf18 100644
--- a/src/components/base/dialog.tsx
+++ b/src/components/base/dialog.tsx
@@ -1,8 +1,8 @@
 import * as DialogPrimitive from '@radix-ui/react-dialog'
 import { XIcon } from 'lucide-react'
+import { forwardRef } from 'react'
 
 import { cn } from '../../utils'
-import { forwardRef } from 'react'
 
 const Dialog = DialogPrimitive.Root
 
diff --git a/src/components/base/input.tsx b/src/components/base/input.tsx
index 2c3f03db96..0bc343295b 100644
--- a/src/components/base/input.tsx
+++ b/src/components/base/input.tsx
@@ -1,4 +1,5 @@
 import * as React from 'react'
+
 import { cn } from '../../utils'
 
 export type IInputProps = React.InputHTMLAttributes<HTMLInputElement>
diff --git a/src/components/base/label.tsx b/src/components/base/label.tsx
index 2f9d67a759..a1f77d1dd7 100644
--- a/src/components/base/label.tsx
+++ b/src/components/base/label.tsx
@@ -1,7 +1,8 @@
 import * as LabelPrimitive from '@radix-ui/react-label'
-import { cn } from '../../utils'
 import { forwardRef } from 'react'
 
+import { cn } from '../../utils'
+
 const Label = forwardRef<
   React.ElementRef<typeof LabelPrimitive.Root>,
   React.ComponentPropsWithoutRef<typeof LabelPrimitive.Root>
diff --git a/src/components/base/select.tsx b/src/components/base/select.tsx
index d0bda14660..cffceef868 100644
--- a/src/components/base/select.tsx
+++ b/src/components/base/select.tsx
@@ -1,4 +1,5 @@
 import * as React from 'react'
+
 import { cn } from '../../utils'
 
 export type ISelectProps = React.SelectHTMLAttributes<HTMLSelectElement>
diff --git a/src/components/base/textarea.tsx b/src/components/base/textarea.tsx
index f8dff26f0b..9eaa18e461 100644
--- a/src/components/base/textarea.tsx
+++ b/src/components/base/textarea.tsx
@@ -1,4 +1,5 @@
 import * as React from 'react'
+
 import { cn } from '../../utils'
 
 export type ITextareaProps = React.TextareaHTMLAttributes<HTMLTextAreaElement>
diff --git a/src/utils/externalUrls.ts b/src/constants/externalUrls.ts
similarity index 100%
rename from src/utils/externalUrls.ts
rename to src/constants/externalUrls.ts
diff --git a/src/constants/internalLinks.ts b/src/constants/internalLinks.ts
new file mode 100644
index 0000000000..30e6773f3a
--- /dev/null
+++ b/src/constants/internalLinks.ts
@@ -0,0 +1,4 @@
+// Pages
+export const homePageLink = `/`
+export const blogsPageLink = `/blog`
+export const blogsPageRssLink = `/blog/rss.xml`
diff --git a/src/gatsby/hooks/stars.ts b/src/gatsby/hooks/stars.ts
index 062d61aaf9..9716413b16 100644
--- a/src/gatsby/hooks/stars.ts
+++ b/src/gatsby/hooks/stars.ts
@@ -1,7 +1,7 @@
-import { useEffect, useState } from 'react'
+import * as Sentry from '@sentry/gatsby'
 import { useStaticQuery, graphql } from 'gatsby'
 import fetch from 'isomorphic-fetch'
-import * as Sentry from '@sentry/gatsby'
+import { useEffect, useState } from 'react'
 
 export default function useStars(repo = 'dvc'): number | null {
   // Get the amount of stars from build time
diff --git a/src/gatsby/hooks/usePopup.tsx b/src/gatsby/hooks/usePopup.tsx
index 406b95df19..c950dd540f 100644
--- a/src/gatsby/hooks/usePopup.tsx
+++ b/src/gatsby/hooks/usePopup.tsx
@@ -1,4 +1,5 @@
 import { useState, useRef, RefObject } from 'react'
+
 import useOnClickOutside from './useOnClickOutside'
 import useOnEscape from './useOnEscape'
 
diff --git a/src/gatsby/models.js b/src/gatsby/models.js
index e212842dfe..328cad51bd 100644
--- a/src/gatsby/models.js
+++ b/src/gatsby/models.js
@@ -1,7 +1,7 @@
-const imageSourcePaths = require('./models/image-source-paths')
+const blogFeed = require('./models/blogFeed')
 const github = require('./models/github')
+const imageSourcePaths = require('./models/image-source-paths')
 const pruneCache = require('./models/prune-cache')
-const blogFeed = require('./models/blogFeed')
 
 const models = [blogFeed, imageSourcePaths, github, pruneCache]
 
diff --git a/src/gatsby/models/prune-cache/index.js b/src/gatsby/models/prune-cache/index.js
index af72315701..a85a3ccd4d 100644
--- a/src/gatsby/models/prune-cache/index.js
+++ b/src/gatsby/models/prune-cache/index.js
@@ -1,5 +1,6 @@
 const fs = require('fs')
-const crawlPageData = require('../../../utils/shared/crawlPageData.js')
+
+const crawlPageData = require('../../../utils/shared/crawlPageData')
 
 async function removeFile(filePath) {
   return new Promise((resolve, reject) =>
diff --git a/src/pages/404.tsx b/src/pages/404.tsx
index 0514e50dea..a7e1d47e41 100644
--- a/src/pages/404.tsx
+++ b/src/pages/404.tsx
@@ -1,7 +1,7 @@
-import { PageProps } from 'gatsby'
 import SEO from '@dvcorg/gatsby-theme-iterative/src/components/SEO'
-import MainLayout from '../components/MainLayout'
+import { PageProps } from 'gatsby'
 
+import MainLayout from '../components/MainLayout'
 import NotFound from '../components/NotFound'
 
 const NotFoundPage = ({ location }: PageProps) => (
diff --git a/src/pages/community.tsx b/src/pages/community.tsx
index 8b72644baa..a3c43b2bea 100644
--- a/src/pages/community.tsx
+++ b/src/pages/community.tsx
@@ -1,8 +1,8 @@
-import { PageProps } from 'gatsby'
-import MainLayout from '../components/MainLayout'
 import SEO from '@dvcorg/gatsby-theme-iterative/src/components/SEO'
+import { PageProps } from 'gatsby'
 
 import Community from '../components/Community'
+import MainLayout from '../components/MainLayout'
 
 const CommunityPage = ({ location }: PageProps) => (
   <MainLayout location={location} className="mt-14">
diff --git a/src/pages/doc/user-guide/glossary.tsx b/src/pages/doc/user-guide/glossary.tsx
index 6c84230963..2e32906334 100644
--- a/src/pages/doc/user-guide/glossary.tsx
+++ b/src/pages/doc/user-guide/glossary.tsx
@@ -1,11 +1,11 @@
-import cn from 'classnames'
-import { PageProps } from 'gatsby'
-import MainLayout from '../../../components/MainLayout'
+import DocLayout from '@dvcorg/gatsby-theme-iterative/src/components/Documentation/Layout'
+import DocWithJsx from '@dvcorg/gatsby-theme-iterative/src/components/Documentation/WithJSX'
 import AutoLinkElement from '@dvcorg/gatsby-theme-iterative/src/components/Documentation/WithJSX/AutoLinkElement'
 import useGlossary from '@dvcorg/gatsby-theme-iterative/src/utils/front/glossary'
+import cn from 'classnames'
+import { PageProps } from 'gatsby'
 
-import DocLayout from '@dvcorg/gatsby-theme-iterative/src/components/Documentation/Layout'
-import DocWithJsx from '@dvcorg/gatsby-theme-iterative/src/components/Documentation/WithJSX'
+import MainLayout from '../../../components/MainLayout'
 
 const Glossary = ({ location }: PageProps) => {
   const { contents } = useGlossary()
diff --git a/src/pages/enterprise.tsx b/src/pages/enterprise.tsx
index fbe30cc113..c2799ec0a7 100644
--- a/src/pages/enterprise.tsx
+++ b/src/pages/enterprise.tsx
@@ -1,4 +1,5 @@
 import { PageProps } from 'gatsby'
+
 import MainLayout from '../components/MainLayout'
 import Typeform from '../components/Typeform'
 
diff --git a/src/pages/index.tsx b/src/pages/index.tsx
index e02bb0da5d..37338df77d 100644
--- a/src/pages/index.tsx
+++ b/src/pages/index.tsx
@@ -1,7 +1,7 @@
 import { PageProps } from 'gatsby'
-import MainLayout from '../components/MainLayout'
 
 import Home from '../components/Home'
+import MainLayout from '../components/MainLayout'
 
 const HomePage = ({ location }: PageProps) => (
   <MainLayout
diff --git a/src/pages/subscriber-thank-you.tsx b/src/pages/subscriber-thank-you.tsx
index f94e572323..a9b8dc8007 100644
--- a/src/pages/subscriber-thank-you.tsx
+++ b/src/pages/subscriber-thank-you.tsx
@@ -1,4 +1,5 @@
 import { PageProps } from 'gatsby'
+
 import MainLayout from '../components/MainLayout'
 import ThankYou from '../components/ThankYou'
 
diff --git a/src/pages/support.tsx b/src/pages/support.tsx
index a0f0a3d1cf..ea29305293 100644
--- a/src/pages/support.tsx
+++ b/src/pages/support.tsx
@@ -1,7 +1,7 @@
+import SEO from '@dvcorg/gatsby-theme-iterative/src/components/SEO'
 import { PageProps } from 'gatsby'
 
 import MainLayout from '../components/MainLayout'
-import SEO from '@dvcorg/gatsby-theme-iterative/src/components/SEO'
 import Support from '../components/Support'
 
 const SupportPage = ({ location }: PageProps) => (
diff --git a/src/templates/blog-home.tsx b/src/templates/blog-home.tsx
new file mode 100644
index 0000000000..bb4ac233cf
--- /dev/null
+++ b/src/templates/blog-home.tsx
@@ -0,0 +1,83 @@
+import { graphql, HeadProps, PageProps } from 'gatsby'
+
+import Feed, { IBlogFeedPostList } from '@/components/Blog/Feed'
+import BlogLayout from '@/components/Blog/Layout'
+import { IPaginatorPageInfo } from '@/components/Blog/Paginator'
+import {
+  IPaginatorLocationContextValue,
+  PaginatorLocationContext
+} from '@/components/Blog/Paginator/LocationContext'
+import SEO from '@/components/SEO'
+
+interface IBlogHomePageProps {
+  data: { posts: IBlogFeedPostList }
+  location: PageProps['location']
+  paginatorLocation: IPaginatorLocationContextValue
+  pageContext: {
+    pageInfo: IPaginatorPageInfo
+  }
+}
+
+const BlogHomePage = ({
+  data,
+  paginatorLocation,
+  location,
+  pageContext
+}: IBlogHomePageProps) => {
+  return (
+    <PaginatorLocationContext.Provider value={paginatorLocation}>
+      <BlogLayout location={location}>
+        <Feed
+          feedPostList={data.posts}
+          pageInfo={pageContext.pageInfo}
+          header="Data Version Control Blog"
+          leadParagraph={
+            <>
+              Insights and updates from the DVC team. Explore best practices in
+              data versioning, machine learning workflows, and model management.
+              Stay informed with our latest news, tutorials, and community
+              highlights.
+            </>
+          }
+        />
+      </BlogLayout>
+    </PaginatorLocationContext.Provider>
+  )
+}
+
+export default BlogHomePage
+
+const keywords = [
+  `git`,
+  `data`,
+  `version control`,
+  `machine learning models management`,
+  `datasets`
+]
+const description =
+  `We write about machine learning workflow. ` +
+  `From data versioning and processing to model productionization. We share ` +
+  `our news, findings, interesting reads, community takeaways.`
+
+export const Head = ({
+  pageContext,
+  location
+}: IBlogHomePageProps & HeadProps) => (
+  <>
+    <SEO
+      pathname={location.pathname}
+      title="Blog | DataChain | DVC"
+      description={description}
+      keywords={keywords}
+      pageInfo={pageContext?.pageInfo}
+    />
+  </>
+)
+
+export const pageQuery = graphql`
+  query ($skip: Int, $limit: Int) {
+    posts: allBlogPost(sort: { date: DESC }, skip: $skip, limit: $limit) {
+      ...FeedPostList
+    }
+  }
+`
diff --git a/src/templates/blog-post.tsx b/src/templates/blog-post.tsx
new file mode 100644
index 0000000000..c78042eee8
--- /dev/null
+++ b/src/templates/blog-post.tsx
@@ -0,0 +1,246 @@
+import { ISocialIcon } from '@dvcorg/gatsby-theme-iterative/src/components/SocialIcon'
+import { HeadProps, PageProps, Script, graphql } from 'gatsby'
+import { IGatsbyImageData } from 'gatsby-plugin-image'
+import type { Element } from 'hast'
+import { useEffect } from 'react'
+
+import BackSection from '@/components/BackSection'
+import BlogLayout from '@/components/Blog/Layout'
+import Post from '@/components/Blog/Post'
+import SEO from '@/components/SEO'
+
+import { blogsPageLink } from '@/constants/internalLinks'
+import { isProduction } from '@/utils'
+
+export interface IBlogPostHeroPic {
+  picture?: {
+    childImageSharp: {
+      gatsbyImageData: IGatsbyImageData
+    }
+  }
+  pictureComment?: string
+}
+
+export interface IAuthor {
+  name: string
+  links: Array<ISocialIcon>
+  avatar: {
+    childImageSharp: {
+      gatsbyImageData: IGatsbyImageData
+    }
+  }
+}
+
+export interface IBlogPostData {
+  id: string
+  slug: string
+  title: string
+  date: string
+  description: string
+  descriptionLong?: {
+    html: string
+  }
+  commentsUrl?: string
+  tags?: string[]
+  picture?: {
+    relativePath: string
+    childImageSharp: {
+      gatsbyImageData: IGatsbyImageData
+    }
+  }
+  pictureComment?: {
+    html: string
+  }
+  author: IAuthor
+  contributors?: IAuthor[]
+  parent: {
+    timeToRead: string
+    htmlAst: Element
+  }
+}
+
+interface IBlogPostPageProps {
+  data: {
+    blogPost: IBlogPostData
+  }
+  location: PageProps['location']
+}
+
+const BlogPostPage: React.FC<IBlogPostPageProps> = ({ data, location }) => {
+  const post = data.blogPost
+
+  useEffect(() => {
+    const link = document.createElement(`link`)
+    link.rel = `preload`
+    link.href = `https://github.githubassets.com/assets/gist-embed-b67d146bba31.css`
+    link.as = `style`
+
+    const onLoad = () => {
+      link.rel = `stylesheet`
+    }
+
+    link.addEventListener(`load`, onLoad)
+    document.head.appendChild(link)
+
+    return () => {
+      link.removeEventListener(`load`, onLoad)
+      if (document.head.contains(link)) document.head.removeChild(link)
+    }
+  }, [])
+
+  return (
+    <BlogLayout location={location}>
+      <Script src="//embed.redditmedia.com/widgets/platform.js" />
+      <BackSection link={blogsPageLink} section="top">
+        Back to blogs
+      </BackSection>
+      <article>
+        <Post {...post} />
+      </article>
+      <BackSection link={blogsPageLink} section="bottom">
+        Back to blogs
+      </BackSection>
+    </BlogLayout>
+  )
+}
+
+export default BlogPostPage
+
+export const Head = ({
+  location,
+  data: {
+    blogPost: { title, description, picture, author, contributors, date }
+  }
+}: IBlogPostPageProps & HeadProps) => {
+  return (
+    <>
+      <SEO
+        pathname={location.pathname}
+        title={title}
+        description={description}
+        image={
+          picture &&
+          (isProduction
+            ? `/blog/images/${picture.relativePath}`
+            : picture.childImageSharp.gatsbyImageData)
+        }
+        imageHeight={picture?.childImageSharp.gatsbyImageData.height}
+        imageWidth={picture?.childImageSharp.gatsbyImageData.width}
+        meta={[
+          {
+            name: `twitter:card`,
+            content: `summary_large_image`
+          },
+          {
+            property: `og:type`,
+            content: `article`
+          },
+          {
+            property: `article:author`,
+            content: author.name
+          },
+          {
+            property: `article:published_time`,
+            content: new Date(date).toISOString().slice(0, 10)
+          }
+        ]}
+      />
+      {isProduction && (
+        <script type="application/ld+json">
+          {JSON.stringify({
+            '@context': `https://schema.org`,
+            '@type': `BlogPosting`,
+            headline: title,
+            datePublished: date,
+            description,
+            author: [
+              {
+                '@type': `Person`,
+                name: author.name,
+                url: author.links?.[0]?.url
+              },
+              ...(contributors
+                ? contributors.map(c => ({
+                    '@type': `Person`,
+                    name: c.name,
+                    url: c.links?.[0]?.url
+                  }))
+                : [])
+            ],
+            image: {
+              '@type': `ImageObject`,
+              url:
+                picture && `https://dvc.ai/blog/images/${picture.relativePath}`
+            }
+          })}
+        </script>
+      )}
+    </>
+  )
+}
+
+export const pageQuery = graphql`
+  query BlogPostPage($id: String!) {
+    blogPost(id: { eq: $id }) {
+      parent {
+        ... on MarkdownRemark {
+          htmlAst
+          timeToRead
+        }
+      }
+      id
+      slug
+      title
+      date(formatString: "MMMM DD, YYYY")
+      description
+      descriptionLong {
+        html
+      }
+      tags
+      commentsUrl
+      author {
+        name
+        links {
+          url
+          site
+        }
+        avatar {
+          childImageSharp {
+            gatsbyImageData(
+              width: 40
+              height: 40
+              transformOptions: { cropFocus: CENTER }
+              layout: FIXED
+            )
+          }
+        }
+      }
+      contributors {
+        name
+        links {
+          url
+          site
+        }
+        avatar {
+          childImageSharp {
+            gatsbyImageData(
+              width: 18
+              height: 18
+              transformOptions: { cropFocus: CENTER }
+              layout: FIXED
+            )
+          }
+        }
+      }
+      picture {
+        relativePath
+        childImageSharp {
+          gatsbyImageData(width: 850, quality: 90)
+        }
+      }
+      pictureComment {
+        html
+      }
+    }
+  }
+`
diff --git a/src/templates/blog-tags.tsx b/src/templates/blog-tags.tsx
new file mode 100644
index 0000000000..385f0f9e13
--- /dev/null
+++ b/src/templates/blog-tags.tsx
@@ -0,0 +1,79 @@
+import { HeadProps, PageProps, graphql } from 'gatsby'
+
+import BackSection from '@/components/BackSection'
+import Feed, { IBlogFeedPostList } from '@/components/Blog/Feed'
+import BlogLayout from '@/components/Blog/Layout'
+import PageContent from '@/components/Blog/PageContent'
+import { IPaginatorPageInfo } from '@/components/Blog/Paginator'
+import {
+  IPaginatorLocationContextValue,
+  PaginatorLocationContext
+} from '@/components/Blog/Paginator/LocationContext'
+import SEO from '@/components/SEO'
+
+import { blogsPageLink } from '@/constants/internalLinks'
+
+interface IBlogTagsPageData {
+  data: { posts: IBlogFeedPostList }
+  paginatorLocation: IPaginatorLocationContextValue
+  pageContext: {
+    tag: string
+    pageInfo: IPaginatorPageInfo
+  }
+  location: PageProps['location']
+}
+
+const BlogTagsPage: React.FC<IBlogTagsPageData> = ({
+  data,
+  pageContext,
+  paginatorLocation,
+  location
+}) => {
+  return (
+    <BlogLayout location={location}>
+      <PaginatorLocationContext.Provider value={paginatorLocation}>
+        <PageContent>
+          <BackSection link={blogsPageLink} section="top">
+            Back to blogs
+          </BackSection>
+          <div className="flex w-full items-center justify-center rounded bg-slate-200 p-4 text-gray-900">
+            <h1 className="text-4xl font-medium">{pageContext.tag}</h1>
+          </div>
+          <Feed
+            feedPostList={data.posts}
+            pageInfo={pageContext.pageInfo}
+            bigFirst={false}
+          />
+        </PageContent>
+      </PaginatorLocationContext.Provider>
+    </BlogLayout>
+  )
+}
+
+export default BlogTagsPage
+
+export const Head = ({
+  pageContext,
+  location
+}: IBlogTagsPageData & HeadProps) => (
+  <>
+    <SEO
+      pathname={location.pathname}
+      title={`All  ${pageContext.tag} posts`}
+      pageInfo={pageContext.pageInfo}
+    />
+  </>
+)
+
+export const pageQuery = graphql`
+  query ($tag: String, $skip: Int, $limit: Int) {
+    posts: allBlogPost(
+      sort: { date: DESC }
+      filter: { tags: { in: [$tag] } }
+      skip: $skip
+      limit: $limit
+    ) {
+      ...FeedPostList
+    }
+  }
+`
diff --git a/src/templates/doc-jsx.tsx b/src/templates/doc-jsx.tsx
index 34a9f6ba8f..3c3ef8733b 100644
--- a/src/templates/doc-jsx.tsx
+++ b/src/templates/doc-jsx.tsx
@@ -1,9 +1,7 @@
-import React, { PropsWithChildren } from 'react'
-import { getItemByPath } from '@dvcorg/gatsby-theme-iterative/src/utils/shared/sidebar'
-
-import SEO from '@dvcorg/gatsby-theme-iterative/src/components/SEO'
-
 import Documentation from '@dvcorg/gatsby-theme-iterative/src/components/Documentation/Layout'
+import SEO from '@dvcorg/gatsby-theme-iterative/src/components/SEO'
+import { getItemByPath } from '@dvcorg/gatsby-theme-iterative/src/utils/shared/sidebar'
+import { PropsWithChildren } from 'react'
 
 interface IJSXDocPageProps {
   title?: string
diff --git a/src/utils/front/community.ts b/src/utils/front/community.ts
index 17578b2ae1..45baf70f4a 100644
--- a/src/utils/front/community.ts
+++ b/src/utils/front/community.ts
@@ -1,13 +1,13 @@
-import { useMemo } from 'react'
 import { useStaticQuery, graphql } from 'gatsby'
+import { useMemo } from 'react'
 
-import { ICommunitySection } from '../../components/Community/Section'
 import { IEvent } from '../../components/Community/Events'
 import { IHero } from '../../components/Community/Hero'
 import {
   ICommunityUserContentProps,
   ICommunityDocumentationProps
 } from '../../components/Community/Learn'
+import { ICommunitySection } from '../../components/Community/Section'
 
 export interface ICommunityData {
   rest: {
diff --git a/src/utils/index.ts b/src/utils/index.ts
index d32b0fe652..754f19bcd6 100644
--- a/src/utils/index.ts
+++ b/src/utils/index.ts
@@ -4,3 +4,5 @@ import { twMerge } from 'tailwind-merge'
 export function cn(...inputs: ClassValue[]) {
   return twMerge(clsx(inputs))
 }
+
+export const isProduction = process.env.NODE_ENV === `production`
diff --git a/src/utils/shared/crawlPageData.js b/src/utils/shared/crawlPageData.js
index d2a016c742..12f022e1d1 100644
--- a/src/utils/shared/crawlPageData.js
+++ b/src/utils/shared/crawlPageData.js
@@ -1,4 +1,5 @@
 const fs = require('fs')
+
 const path = require('upath')
 
 async function crawlPageData(dataPath, onPageData) {
diff --git a/src/utils/shared/expiration.test.js b/src/utils/shared/expiration.test.js
index cd530ce5ac..63065d9d2d 100644
--- a/src/utils/shared/expiration.test.js
+++ b/src/utils/shared/expiration.test.js
@@ -1,10 +1,10 @@
+const moment = require('moment')
+
 const {
   dateIsExpired,
   getExpirationFields,
   getExpirationDate
-} = require('./expiration.js')
-
-const moment = require('moment')
+} = require('./expiration')
 
 const now = moment()
 const tomorrow = moment(now).add(1, 'days')
diff --git a/tsconfig.json b/tsconfig.json
index f413f2f38c..9c4683767b 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -19,7 +19,12 @@
     "allowSyntheticDefaultImports": true,
     "allowJs": true,
     "alwaysStrict": true,
-    "noEmit": true
+    "noEmit": true,
+    "baseUrl": ".",
+    "paths": {
+      "@/*": ["./src/*"],
+      "@/static/*": ["./static/*"]
+    }
   },
   "include": [
     "node_modules/@dvcorg/gatsby-theme-iterative",
diff --git a/yarn.lock b/yarn.lock
index 84fbe14a7a..a0eade9aee 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -12,6 +12,306 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@algolia/cache-browser-local-storage@npm:4.24.0":
+  version: 4.24.0
+  resolution: "@algolia/cache-browser-local-storage@npm:4.24.0"
+  dependencies:
+    "@algolia/cache-common": "npm:4.24.0"
+  checksum: 10c0/68823c3b1c07dab093de98e678e2ff7fcf7a40915a157715f6f51d073e3865086be98cbbe554b7bf9e0514db5dd9e726033e27e566d9e5db059cb5059c3436cc
+  languageName: node
+  linkType: hard
+
+"@algolia/cache-common@npm:4.24.0":
+  version: 4.24.0
+  resolution: "@algolia/cache-common@npm:4.24.0"
+  checksum: 10c0/ad481ad50d7ea92d0cce525757627f4a647b5373dc6d3cbed6405d05cb83f21a110919e7133e5233d5b13c2c8f59ed9e927efdbc82e70571707709075b07d2c6
+  languageName: node
+  linkType: hard
+
+"@algolia/cache-in-memory@npm:4.24.0":
+  version: 4.24.0
+  resolution: "@algolia/cache-in-memory@npm:4.24.0"
+  dependencies:
+    "@algolia/cache-common": "npm:4.24.0"
+  checksum: 10c0/2956600b2722f113373dbb71449f546afb5a0fb1a3d1558a1a3e957b7a630d1f25045c29646c8dbb44cdffe6ff4c9d1219bf63fc9fd8e4d5467381c7150e09f9
+  languageName: node
+  linkType: hard
+
+"@algolia/client-abtesting@npm:5.24.0":
+  version: 5.24.0
+  resolution: "@algolia/client-abtesting@npm:5.24.0"
+  dependencies:
+    "@algolia/client-common": "npm:5.24.0"
+    "@algolia/requester-browser-xhr": "npm:5.24.0"
+    "@algolia/requester-fetch": "npm:5.24.0"
+    "@algolia/requester-node-http": "npm:5.24.0"
+  checksum: 10c0/c6ba022b56c5466a87f85b145387ebdfb38e550104f74a14cd0b3300b79baec6ccf5ed9df8707430533d7d2217a2905d667d457c6429f2a72555d49155d8f329
+  languageName: node
+  linkType: hard
+
+"@algolia/client-account@npm:4.24.0":
+  version: 4.24.0
+  resolution: "@algolia/client-account@npm:4.24.0"
+  dependencies:
+    "@algolia/client-common": "npm:4.24.0"
+    "@algolia/client-search": "npm:4.24.0"
+    "@algolia/transporter": "npm:4.24.0"
+  checksum: 10c0/3dd52dd692a2194eb45844280e6261192d5a4ef99aec729a09a01da5cf071fd77b37c6d164bf8877823efc1484d576068d76ada764a4f0624238a3475bc199b2
+  languageName: node
+  linkType: hard
+
+"@algolia/client-analytics@npm:4.24.0":
+  version: 4.24.0
+  resolution: "@algolia/client-analytics@npm:4.24.0"
+  dependencies:
+    "@algolia/client-common": "npm:4.24.0"
+    "@algolia/client-search": "npm:4.24.0"
+    "@algolia/requester-common": "npm:4.24.0"
+    "@algolia/transporter": "npm:4.24.0"
+  checksum: 10c0/8d02e6d0eb0dcde099832c62fa7d7e9910b2757b4d37e07e1eefb65a12fef7e7ce3d73fda23e8ee02d53953a91efc15086016b1af5e9fea9227dfc0fc61c9f63
+  languageName: node
+  linkType: hard
+
+"@algolia/client-analytics@npm:5.24.0":
+  version: 5.24.0
+  resolution: "@algolia/client-analytics@npm:5.24.0"
+  dependencies:
+    "@algolia/client-common": "npm:5.24.0"
+    "@algolia/requester-browser-xhr": "npm:5.24.0"
+    "@algolia/requester-fetch": "npm:5.24.0"
+    "@algolia/requester-node-http": "npm:5.24.0"
+  checksum: 10c0/f90003448acecb91ffc4b1a9ee04951a0962ef6fff58fe0a60cbc991a128f547e1452a0c843ed7a6b058fcb978498d1bbb88680890ebdd3f33b7c9abe9ac1853
+  languageName: node
+  linkType: hard
+
+"@algolia/client-common@npm:4.24.0":
+  version: 4.24.0
+  resolution: "@algolia/client-common@npm:4.24.0"
+  dependencies:
+    "@algolia/requester-common": "npm:4.24.0"
+    "@algolia/transporter": "npm:4.24.0"
+  checksum: 10c0/9e75d0bb51bb04f099e823e4397d1bac6659e1ecb7c7a73a5eaf9153632d544bd6c62a4961b606490220b236361eb8b7b77a5e4c47f12aefdd2952b14ce2fd18
+  languageName: node
+  linkType: hard
+
+"@algolia/client-common@npm:5.24.0":
+  version: 5.24.0
+  resolution: "@algolia/client-common@npm:5.24.0"
+  checksum: 10c0/74a383783d77d0edbdab398d5b6db26dd8488f089c9c83bbe61084316d84352039fe45dd8274f22a739d1b4562e2bd83a5436b6e4683a1c29189ba8991137c4e
+  languageName: node
+  linkType: hard
+
+"@algolia/client-insights@npm:5.24.0":
+  version: 5.24.0
+  resolution: "@algolia/client-insights@npm:5.24.0"
+  dependencies:
+    "@algolia/client-common": "npm:5.24.0"
+    "@algolia/requester-browser-xhr": "npm:5.24.0"
+    "@algolia/requester-fetch": "npm:5.24.0"
+    "@algolia/requester-node-http": "npm:5.24.0"
+  checksum: 10c0/ea8f3979d300d8d1dacd1f7c0d35ff5fd6771c4d1da6a674d07a7b9fe9d445275aff9a2801fb2bb836bbc87712271474471fc393225830f9108d20d2396b7b3c
+  languageName: node
+  linkType: hard
+
+"@algolia/client-personalization@npm:4.24.0":
+  version: 4.24.0
+  resolution: "@algolia/client-personalization@npm:4.24.0"
+  dependencies:
+    "@algolia/client-common": "npm:4.24.0"
+    "@algolia/requester-common": "npm:4.24.0"
+    "@algolia/transporter": "npm:4.24.0"
+  checksum: 10c0/9193e032841ae991ce6dd8c8988608d0d83a6785681abf26055812506aaf070db8d8f44403d0270384ff39530677603d103c330a869a397181d594bebe46b4b0
+  languageName: node
+  linkType: hard
+
+"@algolia/client-personalization@npm:5.24.0":
+  version: 5.24.0
+  resolution: "@algolia/client-personalization@npm:5.24.0"
+  dependencies:
+    "@algolia/client-common": "npm:5.24.0"
+    "@algolia/requester-browser-xhr": "npm:5.24.0"
+    "@algolia/requester-fetch": "npm:5.24.0"
+    "@algolia/requester-node-http": "npm:5.24.0"
+  checksum: 10c0/1c90559b41c9f94ebe4e1f11ba656640e066e889000bb7fe9bfd2456c5fa89a6b99f2bece533601405e08a42d8bb0808705afb7c693147033fd72091df6674e9
+  languageName: node
+  linkType: hard
+
+"@algolia/client-query-suggestions@npm:5.24.0":
+  version: 5.24.0
+  resolution: "@algolia/client-query-suggestions@npm:5.24.0"
+  dependencies:
+    "@algolia/client-common": "npm:5.24.0"
+    "@algolia/requester-browser-xhr": "npm:5.24.0"
+    "@algolia/requester-fetch": "npm:5.24.0"
+    "@algolia/requester-node-http": "npm:5.24.0"
+  checksum: 10c0/2fa993ce1547359a702ed97b50e6fae63bfd818d5eae86312f358b634b4a27062f42f3ea36b3e4cb57e04a4a9bb6e643b0b4077fe2d84c4ec2722df07bb8e6fe
+  languageName: node
+  linkType: hard
+
+"@algolia/client-search@npm:4.24.0":
+  version: 4.24.0
+  resolution: "@algolia/client-search@npm:4.24.0"
+  dependencies:
+    "@algolia/client-common": "npm:4.24.0"
+    "@algolia/requester-common": "npm:4.24.0"
+    "@algolia/transporter": "npm:4.24.0"
+  checksum: 10c0/d161235014fa73acc0ff04d737c695b7357c060d31db6d602464b27ba846208c6aeb35b179e76d4c33b51329b77de0c460f6cb21b66d364c18a5534874c7b987
+  languageName: node
+  linkType: hard
+
+"@algolia/client-search@npm:5.24.0":
+  version: 5.24.0
+  resolution: "@algolia/client-search@npm:5.24.0"
+  dependencies:
+    "@algolia/client-common": "npm:5.24.0"
+    "@algolia/requester-browser-xhr": "npm:5.24.0"
+    "@algolia/requester-fetch": "npm:5.24.0"
+    "@algolia/requester-node-http": "npm:5.24.0"
+  checksum: 10c0/fb8155360b9057448e403baea55189bafefe8db2937f3117c409e0dcaae6b279f6024d7a2461f6c91de9e129e0de04d6ec1abbd3f7571a954aa9c5e5f364e4f2
+  languageName: node
+  linkType: hard
+
+"@algolia/events@npm:^4.0.1":
+  version: 4.0.1
+  resolution: "@algolia/events@npm:4.0.1"
+  checksum: 10c0/f398d815c6ed21ac08f6caadf1e9155add74ac05d99430191c3b1f1335fd91deaf468c6b304e6225c9885d3d44c06037c53def101e33d9c22daff175b2a65ca9
+  languageName: node
+  linkType: hard
+
+"@algolia/ingestion@npm:1.24.0":
+  version: 1.24.0
+  resolution: "@algolia/ingestion@npm:1.24.0"
+  dependencies:
+    "@algolia/client-common": "npm:5.24.0"
+    "@algolia/requester-browser-xhr": "npm:5.24.0"
+    "@algolia/requester-fetch": "npm:5.24.0"
+    "@algolia/requester-node-http": "npm:5.24.0"
+  checksum: 10c0/afdac51688db135a482dafdd7634af67156e43b384b1876fd5f56b3fcb629569b5934a940a2116baa5532fabf52f0e5feb43d28350c1a9874d2b41509806b8fa
+  languageName: node
+  linkType: hard
+
+"@algolia/logger-common@npm:4.24.0":
+  version: 4.24.0
+  resolution: "@algolia/logger-common@npm:4.24.0"
+  checksum: 10c0/1ebe93901a2b3ce41696b535d028337c1c6a98a4262868117c16dd603cc8bb106b840e45cf53c08d098cf518e07bedc64a59cc86bef18795dc49031c2c208d31
+  languageName: node
+  linkType: hard
+
+"@algolia/logger-console@npm:4.24.0":
+  version: 4.24.0
+  resolution: "@algolia/logger-console@npm:4.24.0"
+  dependencies:
+    "@algolia/logger-common": "npm:4.24.0"
+  checksum: 10c0/fdfa3983e6c38cc7b69d66e1085ac702e009d693bd49d64b27cad9ba4197788a8784529a8ed9c25e6ccd51cc4ad3a2427241ecc322c22ca2c8ce6a8d4d94fe69
+  languageName: node
+  linkType: hard
+
+"@algolia/monitoring@npm:1.24.0":
+  version: 1.24.0
+  resolution: "@algolia/monitoring@npm:1.24.0"
+  dependencies:
+    "@algolia/client-common": "npm:5.24.0"
+    "@algolia/requester-browser-xhr": "npm:5.24.0"
+    "@algolia/requester-fetch": "npm:5.24.0"
+    "@algolia/requester-node-http": "npm:5.24.0"
+  checksum: 10c0/a8059969d6bd6e82fe0f868db4983b027237a9fbff4fc53bd04eb56268c4a4355bd0d7fbdfcad03f8fff1fde510c26c0776a0ee1f33b9a8e64f9ead23fa5e78e
+  languageName: node
+  linkType: hard
+
+"@algolia/recommend@npm:4.24.0":
+  version: 4.24.0
+  resolution: "@algolia/recommend@npm:4.24.0"
+  dependencies:
+    "@algolia/cache-browser-local-storage": "npm:4.24.0"
+    "@algolia/cache-common": "npm:4.24.0"
+    "@algolia/cache-in-memory": "npm:4.24.0"
+    "@algolia/client-common": "npm:4.24.0"
+    "@algolia/client-search": "npm:4.24.0"
+    "@algolia/logger-common": "npm:4.24.0"
+    "@algolia/logger-console": "npm:4.24.0"
+    "@algolia/requester-browser-xhr": "npm:4.24.0"
+    "@algolia/requester-common": "npm:4.24.0"
+    "@algolia/requester-node-http": "npm:4.24.0"
+    "@algolia/transporter": "npm:4.24.0"
+  checksum: 10c0/685fb5c1d85d7b9fd39d9246b49da5be4199fecc144bb350ed92fc191b66e4e1101ee6df9ca857ac5096f587638fa3366e01ddca0258f11000aa092ed68daea3
+  languageName: node
+  linkType: hard
+
+"@algolia/recommend@npm:5.24.0":
+  version: 5.24.0
+  resolution: "@algolia/recommend@npm:5.24.0"
+  dependencies:
+    "@algolia/client-common": "npm:5.24.0"
+    "@algolia/requester-browser-xhr": "npm:5.24.0"
+    "@algolia/requester-fetch": "npm:5.24.0"
+    "@algolia/requester-node-http": "npm:5.24.0"
+  checksum: 10c0/5ad3717f02f150279e6b01073e21a462be50a809cbd55a162c54d606390214e915dec2792ab66095db66e7bcb4f5b9c09249499b770f47d6976ed52ba019087c
+  languageName: node
+  linkType: hard
+
+"@algolia/requester-browser-xhr@npm:4.24.0":
+  version: 4.24.0
+  resolution: "@algolia/requester-browser-xhr@npm:4.24.0"
+  dependencies:
+    "@algolia/requester-common": "npm:4.24.0"
+  checksum: 10c0/2d277b291bcc0a388f114116879c15a96c057f698b026c32e719b354c2e2e03e05b3c304f45d2354eb4dd8dfa519d481af51ce8ef19b6fb4fd6d384cf41373de
+  languageName: node
+  linkType: hard
+
+"@algolia/requester-browser-xhr@npm:5.24.0":
+  version: 5.24.0
+  resolution: "@algolia/requester-browser-xhr@npm:5.24.0"
+  dependencies:
+    "@algolia/client-common": "npm:5.24.0"
+  checksum: 10c0/da71d0727b7388ef6d59c323a4d23347133edbf4f2e39acdb47a03aab237d2cb0f7f3c2cd979e34b2d564e198a8e13bac80f6db847d48a88f54b396e6ef51128
+  languageName: node
+  linkType: hard
+
+"@algolia/requester-common@npm:4.24.0":
+  version: 4.24.0
+  resolution: "@algolia/requester-common@npm:4.24.0"
+  checksum: 10c0/cf88ca1f04f4243515bbfa05d7cf51afe6a57904390d9e1ccab799bae20f6fa77e954d9eee9d5c718086582aeb478e271ccf1d5a6a5ab943494250dce820268e
+  languageName: node
+  linkType: hard
+
+"@algolia/requester-fetch@npm:5.24.0":
+  version: 5.24.0
+  resolution: "@algolia/requester-fetch@npm:5.24.0"
+  dependencies:
+    "@algolia/client-common": "npm:5.24.0"
+  checksum: 10c0/d60b3004836320cc77616b9af46ca19f198ec26de0b560a380195a252ff934adc6cfe42e0030c6f8bebbc901190bb84c788358999a2fbc807763f69f14c1f24f
+  languageName: node
+  linkType: hard
+
+"@algolia/requester-node-http@npm:4.24.0":
+  version: 4.24.0
+  resolution: "@algolia/requester-node-http@npm:4.24.0"
+  dependencies:
+    "@algolia/requester-common": "npm:4.24.0"
+  checksum: 10c0/e9cef1463f29035a44f12941ddeb343a213ff512c61ade46a07db19b2023f49a5ac12024a3f56d8b9c0c5b2bd32466030c5e27b26a6a6e17773b810388ddb3b7
+  languageName: node
+  linkType: hard
+
+"@algolia/requester-node-http@npm:5.24.0":
+  version: 5.24.0
+  resolution: "@algolia/requester-node-http@npm:5.24.0"
+  dependencies:
+    "@algolia/client-common": "npm:5.24.0"
+  checksum: 10c0/5bdeadc442a4568768e8780ee7d6d14e9dd458bbf884fd089e248e76cf0b5f7d8169b1b9272424ac64e2163e455bb1f04e1b9c36369f954d2c31ce7e78b956f1
+  languageName: node
+  linkType: hard
+
+"@algolia/transporter@npm:4.24.0":
+  version: 4.24.0
+  resolution: "@algolia/transporter@npm:4.24.0"
+  dependencies:
+    "@algolia/cache-common": "npm:4.24.0"
+    "@algolia/logger-common": "npm:4.24.0"
+    "@algolia/requester-common": "npm:4.24.0"
+  checksum: 10c0/9eee8e6613c8d2a5562e4df284dc7b0804a7bf80586fd8512ad769dc4829f947a334480378d94efd3cc57ca4d400886eb677786a3c5664f85881093f9e27cab7
+  languageName: node
+  linkType: hard
+
 "@alloc/quick-lru@npm:^5.2.0":
   version: 5.2.0
   resolution: "@alloc/quick-lru@npm:5.2.0"
@@ -2776,9 +3076,9 @@ __metadata:
   languageName: node
   linkType: hard
 
-"@dvcorg/websites-server@npm:0.3.0":
-  version: 0.3.0
-  resolution: "@dvcorg/websites-server@npm:0.3.0"
+"@dvcorg/websites-server@npm:0.3.2":
+  version: 0.3.2
+  resolution: "@dvcorg/websites-server@npm:0.3.2"
   dependencies:
     "@aws-sdk/client-s3": "npm:^3.787.0"
     "@dvcorg/gatsby-theme-iterative": "npm:^0.3.25"
@@ -2801,9 +3101,36 @@ __metadata:
   bin:
     deploy-with-s3: scripts/deploy-with-s3.js
     heroku-deploy: scripts/heroku-deploy.sh
-    heroku-deploy-dvc-ai: scripts/heroku-deploy-dvc-ai.sh
-    heroku-deploy-iterative-ai: scripts/heroku-deploy-iterative-ai.sh
-  checksum: 10c0/fdd990f287a6f55b650d3f26c630981537cecdf746b39bd1e23b4fe78ef6a733a17a48d2013a0a27b101c76dfda621e47b650f4251a64021e6b877111280aa12
+    heroku-deploy-with-dvc: scripts/heroku-deploy-with-dvc.sh
+  checksum: 10c0/6b2c38c88e2ce0a72bcf6b7981a8e99571b8bb979ae977ad699ab7fab7b5ae03e6c751dc6bdde75630f901acc420915ab3acf938a8ded70540bfddb9781b33e0
+  languageName: node
+  linkType: hard
+
+"@emnapi/core@npm:^1.4.0":
+  version: 1.4.3
+  resolution: "@emnapi/core@npm:1.4.3"
+  dependencies:
+    "@emnapi/wasi-threads": "npm:1.0.2"
+    tslib: "npm:^2.4.0"
+  checksum: 10c0/e30101d16d37ef3283538a35cad60e22095aff2403fb9226a35330b932eb6740b81364d525537a94eb4fb51355e48ae9b10d779c0dd1cdcd55d71461fe4b45c7
+  languageName: node
+  linkType: hard
+
+"@emnapi/runtime@npm:^1.4.0":
+  version: 1.4.3
+  resolution: "@emnapi/runtime@npm:1.4.3"
+  dependencies:
+    tslib: "npm:^2.4.0"
+  checksum: 10c0/3b7ab72d21cb4e034f07df80165265f85f445ef3f581d1bc87b67e5239428baa00200b68a7d5e37a0425c3a78320b541b07f76c5530f6f6f95336a6294ebf30b
+  languageName: node
+  linkType: hard
+
+"@emnapi/wasi-threads@npm:1.0.2":
+  version: 1.0.2
+  resolution: "@emnapi/wasi-threads@npm:1.0.2"
+  dependencies:
+    tslib: "npm:^2.4.0"
+  checksum: 10c0/f0621b1fc715221bd2d8332c0ca922617bcd77cdb3050eae50a124eb8923c54fa425d23982dc8f29d505c8798a62d1049bace8b0686098ff9dd82270e06d772e
   languageName: node
   linkType: hard
 
@@ -3884,6 +4211,17 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@napi-rs/wasm-runtime@npm:^0.2.9":
+  version: 0.2.9
+  resolution: "@napi-rs/wasm-runtime@npm:0.2.9"
+  dependencies:
+    "@emnapi/core": "npm:^1.4.0"
+    "@emnapi/runtime": "npm:^1.4.0"
+    "@tybys/wasm-util": "npm:^0.9.0"
+  checksum: 10c0/1cc40b854b255f84e12ade634456ba489f6bf90659ef8164a16823c515c294024c96ee2bb81ab51f35493ba9496f62842b960f915dbdcdc1791f221f989e9e59
+  languageName: node
+  linkType: hard
+
 "@nicolo-ribaudo/eslint-scope-5-internals@npm:5.1.1-v1":
   version: 5.1.1-v1
   resolution: "@nicolo-ribaudo/eslint-scope-5-internals@npm:5.1.1-v1"
@@ -5337,6 +5675,25 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@reach/auto-id@npm:0.18.0":
+  version: 0.18.0
+  resolution: "@reach/auto-id@npm:0.18.0"
+  dependencies:
+    "@reach/utils": "npm:0.18.0"
+  peerDependencies:
+    react: ^16.8.0 || 17.x
+    react-dom: ^16.8.0 || 17.x
+  checksum: 10c0/631e693633dae0aa7981af0b71efe1790939ed96ffb6eb98a8b7f29f051fdb966a0ea517f25dde4b2e40d75f07e55f833ab56fff06152f96182728d80a7890e0
+  languageName: node
+  linkType: hard
+
+"@reach/observe-rect@npm:1.2.0":
+  version: 1.2.0
+  resolution: "@reach/observe-rect@npm:1.2.0"
+  checksum: 10c0/e2d2b399381e466705bcf7535ba1ed29866792d7aff386a2a41ca1f5ae9d8920f21c769d67b82b38045cd14e1c2aa29dbf6f37a77f323d16d01378eb02ad2925
+  languageName: node
+  linkType: hard
+
 "@reach/polymorphic@npm:0.18.0":
   version: 0.18.0
   resolution: "@reach/polymorphic@npm:0.18.0"
@@ -5346,7 +5703,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"@reach/portal@npm:^0.18.0":
+"@reach/portal@npm:0.18.0, @reach/portal@npm:^0.18.0":
   version: 0.18.0
   resolution: "@reach/portal@npm:0.18.0"
   dependencies:
@@ -5358,6 +5715,19 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@reach/rect@npm:0.18.0":
+  version: 0.18.0
+  resolution: "@reach/rect@npm:0.18.0"
+  dependencies:
+    "@reach/observe-rect": "npm:1.2.0"
+    "@reach/utils": "npm:0.18.0"
+  peerDependencies:
+    react: ^16.8.0 || 17.x
+    react-dom: ^16.8.0 || 17.x
+  checksum: 10c0/9da897b3708f7b6214ef18590cd7bfbf9ccf3e8801c0458be26886acbe8e547a313e6434d6c30950998900880d529f182bce8e9704e15996edb1f5c6a388f39f
+  languageName: node
+  linkType: hard
+
 "@reach/router@npm:1.3.4":
   version: 1.3.4
   resolution: "@reach/router@npm:1.3.4"
@@ -5385,6 +5755,23 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@reach/tooltip@npm:0.18.0":
+  version: 0.18.0
+  resolution: "@reach/tooltip@npm:0.18.0"
+  dependencies:
+    "@reach/auto-id": "npm:0.18.0"
+    "@reach/polymorphic": "npm:0.18.0"
+    "@reach/portal": "npm:0.18.0"
+    "@reach/rect": "npm:0.18.0"
+    "@reach/utils": "npm:0.18.0"
+    "@reach/visually-hidden": "npm:0.18.0"
+  peerDependencies:
+    react: ^16.8.0 || 17.x
+    react-dom: ^16.8.0 || 17.x
+  checksum: 10c0/9ba863400a7814149f17d3b2429c95ef4e44605ba7f65b031e36d14f122cc935a5372585e29da18e79de746ed7dea5840d33b8ba10c2a99ebebeed44e87e8ffa
+  languageName: node
+  linkType: hard
+
 "@reach/utils@npm:0.18.0":
   version: 0.18.0
   resolution: "@reach/utils@npm:0.18.0"
@@ -5395,6 +5782,18 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@reach/visually-hidden@npm:0.18.0":
+  version: 0.18.0
+  resolution: "@reach/visually-hidden@npm:0.18.0"
+  dependencies:
+    "@reach/polymorphic": "npm:0.18.0"
+  peerDependencies:
+    react: ^16.8.0 || 17.x || 18.x
+    react-dom: ^16.8.0 || 17.x || 18.x
+  checksum: 10c0/85646a56b6ea38099126becdff4681a7a7db12f3a775de54015929f269462d920053361b0d80168306d021cfc1ece2a6bb99dca5f69d56b520c9815c1a716acd
+  languageName: node
+  linkType: hard
+
 "@sentry-internal/browser-utils@npm:9.12.0":
   version: 9.12.0
   resolution: "@sentry-internal/browser-utils@npm:9.12.0"
@@ -6581,6 +6980,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@tybys/wasm-util@npm:^0.9.0":
+  version: 0.9.0
+  resolution: "@tybys/wasm-util@npm:0.9.0"
+  dependencies:
+    tslib: "npm:^2.4.0"
+  checksum: 10c0/f9fde5c554455019f33af6c8215f1a1435028803dc2a2825b077d812bed4209a1a64444a4ca0ce2ea7e1175c8d88e2f9173a36a33c199e8a5c671aa31de8242d
+  languageName: node
+  linkType: hard
+
 "@types/babel__core@npm:^7.1.14":
   version: 7.20.5
   resolution: "@types/babel__core@npm:7.20.5"
@@ -6863,6 +7271,16 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@types/jest@npm:29.5.14":
+  version: 29.5.14
+  resolution: "@types/jest@npm:29.5.14"
+  dependencies:
+    expect: "npm:^29.0.0"
+    pretty-format: "npm:^29.0.0"
+  checksum: 10c0/18e0712d818890db8a8dab3d91e9ea9f7f19e3f83c2e50b312f557017dc81466207a71f3ed79cf4428e813ba939954fa26ffa0a9a7f153181ba174581b1c2aed
+  languageName: node
+  linkType: hard
+
 "@types/js-cookie@npm:^2.2.6":
   version: 2.2.7
   resolution: "@types/js-cookie@npm:2.2.7"
@@ -7066,6 +7484,27 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@types/react-instantsearch-core@npm:*":
+  version: 6.26.10
+  resolution: "@types/react-instantsearch-core@npm:6.26.10"
+  dependencies:
+    "@types/react": "npm:*"
+    algoliasearch: "npm:>=4"
+    algoliasearch-helper: "npm:>=3"
+  checksum: 10c0/2de8e01b5623f4ffec9ccfd23486e45a6975ff400a34c224dc51059faa80dd7392e32b682a5e66b37af57edab8ad350f94450db4649781d266d9c034d58f81fe
+  languageName: node
+  linkType: hard
+
+"@types/react-instantsearch-dom@npm:^6":
+  version: 6.12.8
+  resolution: "@types/react-instantsearch-dom@npm:6.12.8"
+  dependencies:
+    "@types/react": "npm:*"
+    "@types/react-instantsearch-core": "npm:*"
+  checksum: 10c0/951236c98b0d1317e29a0d7ed1f2e24b79b8659ccd1528466e25d97db0a0c3d62e556da2af226d416b0b3fe0f248b96d693079b5ca9431856365e103e3ab9657
+  languageName: node
+  linkType: hard
+
 "@types/react-popover@npm:0.5.8":
   version: 0.5.8
   resolution: "@types/react-popover@npm:0.5.8"
@@ -7319,6 +7758,16 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@typescript-eslint/scope-manager@npm:8.31.1":
+  version: 8.31.1
+  resolution: "@typescript-eslint/scope-manager@npm:8.31.1"
+  dependencies:
+    "@typescript-eslint/types": "npm:8.31.1"
+    "@typescript-eslint/visitor-keys": "npm:8.31.1"
+  checksum: 10c0/759cfaa922f8bc97ecdcfe583df88ad31b04d02a865efc2c6dab622374c9f32839054596193ec3b1c478d8a73690999cbd996e1092605f41a54bbe6a9a62bbf3
+  languageName: node
+  linkType: hard
+
 "@typescript-eslint/type-utils@npm:5.62.0":
   version: 5.62.0
   resolution: "@typescript-eslint/type-utils@npm:5.62.0"
@@ -7365,6 +7814,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"@typescript-eslint/types@npm:8.31.1":
+  version: 8.31.1
+  resolution: "@typescript-eslint/types@npm:8.31.1"
+  checksum: 10c0/d52692559028b71d8bfda4f098c7fa08e272c11cf9dd99ea9e1cfb00036c0849d6d53694e047a942c6568b3bf5637512e46356de70b412a9216ec6cfb8b2b950
+  languageName: node
+  linkType: hard
+
 "@typescript-eslint/typescript-estree@npm:5.62.0":
   version: 5.62.0
   resolution: "@typescript-eslint/typescript-estree@npm:5.62.0"
@@ -7401,63 +7857,227 @@ __metadata:
   languageName: node
   linkType: hard
 
-"@typescript-eslint/utils@npm:5.62.0":
-  version: 5.62.0
-  resolution: "@typescript-eslint/utils@npm:5.62.0"
-  dependencies:
-    "@eslint-community/eslint-utils": "npm:^4.2.0"
-    "@types/json-schema": "npm:^7.0.9"
-    "@types/semver": "npm:^7.3.12"
-    "@typescript-eslint/scope-manager": "npm:5.62.0"
-    "@typescript-eslint/types": "npm:5.62.0"
-    "@typescript-eslint/typescript-estree": "npm:5.62.0"
-    eslint-scope: "npm:^5.1.1"
-    semver: "npm:^7.3.7"
-  peerDependencies:
-    eslint: ^6.0.0 || ^7.0.0 || ^8.0.0
-  checksum: 10c0/f09b7d9952e4a205eb1ced31d7684dd55cee40bf8c2d78e923aa8a255318d97279825733902742c09d8690f37a50243f4c4d383ab16bd7aefaf9c4b438f785e1
+"@typescript-eslint/typescript-estree@npm:8.31.1":
+  version: 8.31.1
+  resolution: "@typescript-eslint/typescript-estree@npm:8.31.1"
+  dependencies:
+    "@typescript-eslint/types": "npm:8.31.1"
+    "@typescript-eslint/visitor-keys": "npm:8.31.1"
+    debug: "npm:^4.3.4"
+    fast-glob: "npm:^3.3.2"
+    is-glob: "npm:^4.0.3"
+    minimatch: "npm:^9.0.4"
+    semver: "npm:^7.6.0"
+    ts-api-utils: "npm:^2.0.1"
+  peerDependencies:
+    typescript: ">=4.8.4 <5.9.0"
+  checksum: 10c0/77059f204389d2d1b6db32d4df63473c99f5bd051218200f257531c2d2b2e3f237b23aa80a79baebc9ca8a776636867f1fd2d03533d207da2685d740e2c7fbef
+  languageName: node
+  linkType: hard
+
+"@typescript-eslint/utils@npm:5.62.0":
+  version: 5.62.0
+  resolution: "@typescript-eslint/utils@npm:5.62.0"
+  dependencies:
+    "@eslint-community/eslint-utils": "npm:^4.2.0"
+    "@types/json-schema": "npm:^7.0.9"
+    "@types/semver": "npm:^7.3.12"
+    "@typescript-eslint/scope-manager": "npm:5.62.0"
+    "@typescript-eslint/types": "npm:5.62.0"
+    "@typescript-eslint/typescript-estree": "npm:5.62.0"
+    eslint-scope: "npm:^5.1.1"
+    semver: "npm:^7.3.7"
+  peerDependencies:
+    eslint: ^6.0.0 || ^7.0.0 || ^8.0.0
+  checksum: 10c0/f09b7d9952e4a205eb1ced31d7684dd55cee40bf8c2d78e923aa8a255318d97279825733902742c09d8690f37a50243f4c4d383ab16bd7aefaf9c4b438f785e1
+  languageName: node
+  linkType: hard
+
+"@typescript-eslint/utils@npm:8.30.1":
+  version: 8.30.1
+  resolution: "@typescript-eslint/utils@npm:8.30.1"
+  dependencies:
+    "@eslint-community/eslint-utils": "npm:^4.4.0"
+    "@typescript-eslint/scope-manager": "npm:8.30.1"
+    "@typescript-eslint/types": "npm:8.30.1"
+    "@typescript-eslint/typescript-estree": "npm:8.30.1"
+  peerDependencies:
+    eslint: ^8.57.0 || ^9.0.0
+    typescript: ">=4.8.4 <5.9.0"
+  checksum: 10c0/ad54aa386edc2e19957c73ef25eea3e263e7e15e941c72e91ca6c8ea2536979d343a6069de0e40b15f0e732ddaacbfcc3d5f25a1583e11a32120c42c471802ea
+  languageName: node
+  linkType: hard
+
+"@typescript-eslint/utils@npm:^8.31.0":
+  version: 8.31.1
+  resolution: "@typescript-eslint/utils@npm:8.31.1"
+  dependencies:
+    "@eslint-community/eslint-utils": "npm:^4.4.0"
+    "@typescript-eslint/scope-manager": "npm:8.31.1"
+    "@typescript-eslint/types": "npm:8.31.1"
+    "@typescript-eslint/typescript-estree": "npm:8.31.1"
+  peerDependencies:
+    eslint: ^8.57.0 || ^9.0.0
+    typescript: ">=4.8.4 <5.9.0"
+  checksum: 10c0/6190551702605aa60e67828163cb5880eee7ab5f1ee789d32227e4f4297d80ea9be98776400fd0660551dcbcac2a35babef33dd94267856dcb6f36c9c94f11ab
+  languageName: node
+  linkType: hard
+
+"@typescript-eslint/visitor-keys@npm:5.62.0":
+  version: 5.62.0
+  resolution: "@typescript-eslint/visitor-keys@npm:5.62.0"
+  dependencies:
+    "@typescript-eslint/types": "npm:5.62.0"
+    eslint-visitor-keys: "npm:^3.3.0"
+  checksum: 10c0/7c3b8e4148e9b94d9b7162a596a1260d7a3efc4e65199693b8025c71c4652b8042501c0bc9f57654c1e2943c26da98c0f77884a746c6ae81389fcb0b513d995d
+  languageName: node
+  linkType: hard
+
+"@typescript-eslint/visitor-keys@npm:8.30.1":
+  version: 8.30.1
+  resolution: "@typescript-eslint/visitor-keys@npm:8.30.1"
+  dependencies:
+    "@typescript-eslint/types": "npm:8.30.1"
+    eslint-visitor-keys: "npm:^4.2.0"
+  checksum: 10c0/bdc182289c68a5c8f891f9aecf6ccb59743c3f2b1bbe57f57f8c7ce1688f4381182e301919895cefc929539eea914eeb847f7d351cdc3f685ed6c5ee67a10c9e
+  languageName: node
+  linkType: hard
+
+"@typescript-eslint/visitor-keys@npm:8.31.1":
+  version: 8.31.1
+  resolution: "@typescript-eslint/visitor-keys@npm:8.31.1"
+  dependencies:
+    "@typescript-eslint/types": "npm:8.31.1"
+    eslint-visitor-keys: "npm:^4.2.0"
+  checksum: 10c0/09dbd8e1fdff72802a10bae2c12fa6d25f7e2dab1ff9b720afc2eb4e848b723c179109032aeaeb409d0c9e4107ab4fab8c8b1b47a55d58713d3f29a1365db3ea
+  languageName: node
+  linkType: hard
+
+"@ungap/structured-clone@npm:^1.0.0, @ungap/structured-clone@npm:^1.2.0":
+  version: 1.2.0
+  resolution: "@ungap/structured-clone@npm:1.2.0"
+  checksum: 10c0/8209c937cb39119f44eb63cf90c0b73e7c754209a6411c707be08e50e29ee81356dca1a848a405c8bdeebfe2f5e4f831ad310ae1689eeef65e7445c090c6657d
+  languageName: node
+  linkType: hard
+
+"@unrs/resolver-binding-darwin-arm64@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-darwin-arm64@npm:1.7.2"
+  conditions: os=darwin & cpu=arm64
+  languageName: node
+  linkType: hard
+
+"@unrs/resolver-binding-darwin-x64@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-darwin-x64@npm:1.7.2"
+  conditions: os=darwin & cpu=x64
+  languageName: node
+  linkType: hard
+
+"@unrs/resolver-binding-freebsd-x64@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-freebsd-x64@npm:1.7.2"
+  conditions: os=freebsd & cpu=x64
+  languageName: node
+  linkType: hard
+
+"@unrs/resolver-binding-linux-arm-gnueabihf@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-linux-arm-gnueabihf@npm:1.7.2"
+  conditions: os=linux & cpu=arm
+  languageName: node
+  linkType: hard
+
+"@unrs/resolver-binding-linux-arm-musleabihf@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-linux-arm-musleabihf@npm:1.7.2"
+  conditions: os=linux & cpu=arm
+  languageName: node
+  linkType: hard
+
+"@unrs/resolver-binding-linux-arm64-gnu@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-linux-arm64-gnu@npm:1.7.2"
+  conditions: os=linux & cpu=arm64 & libc=glibc
+  languageName: node
+  linkType: hard
+
+"@unrs/resolver-binding-linux-arm64-musl@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-linux-arm64-musl@npm:1.7.2"
+  conditions: os=linux & cpu=arm64 & libc=musl
+  languageName: node
+  linkType: hard
+
+"@unrs/resolver-binding-linux-ppc64-gnu@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-linux-ppc64-gnu@npm:1.7.2"
+  conditions: os=linux & cpu=ppc64 & libc=glibc
+  languageName: node
+  linkType: hard
+
+"@unrs/resolver-binding-linux-riscv64-gnu@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-linux-riscv64-gnu@npm:1.7.2"
+  conditions: os=linux & cpu=riscv64 & libc=glibc
+  languageName: node
+  linkType: hard
+
+"@unrs/resolver-binding-linux-riscv64-musl@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-linux-riscv64-musl@npm:1.7.2"
+  conditions: os=linux & cpu=riscv64 & libc=musl
+  languageName: node
+  linkType: hard
+
+"@unrs/resolver-binding-linux-s390x-gnu@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-linux-s390x-gnu@npm:1.7.2"
+  conditions: os=linux & cpu=s390x & libc=glibc
+  languageName: node
+  linkType: hard
+
+"@unrs/resolver-binding-linux-x64-gnu@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-linux-x64-gnu@npm:1.7.2"
+  conditions: os=linux & cpu=x64 & libc=glibc
   languageName: node
   linkType: hard
 
-"@typescript-eslint/utils@npm:8.30.1":
-  version: 8.30.1
-  resolution: "@typescript-eslint/utils@npm:8.30.1"
-  dependencies:
-    "@eslint-community/eslint-utils": "npm:^4.4.0"
-    "@typescript-eslint/scope-manager": "npm:8.30.1"
-    "@typescript-eslint/types": "npm:8.30.1"
-    "@typescript-eslint/typescript-estree": "npm:8.30.1"
-  peerDependencies:
-    eslint: ^8.57.0 || ^9.0.0
-    typescript: ">=4.8.4 <5.9.0"
-  checksum: 10c0/ad54aa386edc2e19957c73ef25eea3e263e7e15e941c72e91ca6c8ea2536979d343a6069de0e40b15f0e732ddaacbfcc3d5f25a1583e11a32120c42c471802ea
+"@unrs/resolver-binding-linux-x64-musl@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-linux-x64-musl@npm:1.7.2"
+  conditions: os=linux & cpu=x64 & libc=musl
   languageName: node
   linkType: hard
 
-"@typescript-eslint/visitor-keys@npm:5.62.0":
-  version: 5.62.0
-  resolution: "@typescript-eslint/visitor-keys@npm:5.62.0"
+"@unrs/resolver-binding-wasm32-wasi@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-wasm32-wasi@npm:1.7.2"
   dependencies:
-    "@typescript-eslint/types": "npm:5.62.0"
-    eslint-visitor-keys: "npm:^3.3.0"
-  checksum: 10c0/7c3b8e4148e9b94d9b7162a596a1260d7a3efc4e65199693b8025c71c4652b8042501c0bc9f57654c1e2943c26da98c0f77884a746c6ae81389fcb0b513d995d
+    "@napi-rs/wasm-runtime": "npm:^0.2.9"
+  conditions: cpu=wasm32
   languageName: node
   linkType: hard
 
-"@typescript-eslint/visitor-keys@npm:8.30.1":
-  version: 8.30.1
-  resolution: "@typescript-eslint/visitor-keys@npm:8.30.1"
-  dependencies:
-    "@typescript-eslint/types": "npm:8.30.1"
-    eslint-visitor-keys: "npm:^4.2.0"
-  checksum: 10c0/bdc182289c68a5c8f891f9aecf6ccb59743c3f2b1bbe57f57f8c7ce1688f4381182e301919895cefc929539eea914eeb847f7d351cdc3f685ed6c5ee67a10c9e
+"@unrs/resolver-binding-win32-arm64-msvc@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-win32-arm64-msvc@npm:1.7.2"
+  conditions: os=win32 & cpu=arm64
   languageName: node
   linkType: hard
 
-"@ungap/structured-clone@npm:^1.0.0, @ungap/structured-clone@npm:^1.2.0":
-  version: 1.2.0
-  resolution: "@ungap/structured-clone@npm:1.2.0"
-  checksum: 10c0/8209c937cb39119f44eb63cf90c0b73e7c754209a6411c707be08e50e29ee81356dca1a848a405c8bdeebfe2f5e4f831ad310ae1689eeef65e7445c090c6657d
+"@unrs/resolver-binding-win32-ia32-msvc@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-win32-ia32-msvc@npm:1.7.2"
+  conditions: os=win32 & cpu=ia32
+  languageName: node
+  linkType: hard
+
+"@unrs/resolver-binding-win32-x64-msvc@npm:1.7.2":
+  version: 1.7.2
+  resolution: "@unrs/resolver-binding-win32-x64-msvc@npm:1.7.2"
+  conditions: os=win32 & cpu=x64
   languageName: node
   linkType: hard
 
@@ -7851,6 +8471,72 @@ __metadata:
   languageName: node
   linkType: hard
 
+"algoliasearch-helper@npm:3.14.0":
+  version: 3.14.0
+  resolution: "algoliasearch-helper@npm:3.14.0"
+  dependencies:
+    "@algolia/events": "npm:^4.0.1"
+  peerDependencies:
+    algoliasearch: ">= 3.1 < 6"
+  checksum: 10c0/8c60aae2bcaa3f8eb547fd48cec0089a329dc5fec05e6c7364642fb2353256f11e4402ea3cec58c4a2bdad6a1720980fbd7dbab51be0b37b13a26b78705ddcc9
+  languageName: node
+  linkType: hard
+
+"algoliasearch-helper@npm:>=3":
+  version: 3.25.0
+  resolution: "algoliasearch-helper@npm:3.25.0"
+  dependencies:
+    "@algolia/events": "npm:^4.0.1"
+  peerDependencies:
+    algoliasearch: ">= 3.1 < 6"
+  checksum: 10c0/932e1397e702f7722450e12a852f6f366aa581dfb1801d078153d591a94d5ccf7da4098cd381f8c684463638aca738f468ba7e968da6e97a7bba14870aa3c5df
+  languageName: node
+  linkType: hard
+
+"algoliasearch@npm:4.24.0":
+  version: 4.24.0
+  resolution: "algoliasearch@npm:4.24.0"
+  dependencies:
+    "@algolia/cache-browser-local-storage": "npm:4.24.0"
+    "@algolia/cache-common": "npm:4.24.0"
+    "@algolia/cache-in-memory": "npm:4.24.0"
+    "@algolia/client-account": "npm:4.24.0"
+    "@algolia/client-analytics": "npm:4.24.0"
+    "@algolia/client-common": "npm:4.24.0"
+    "@algolia/client-personalization": "npm:4.24.0"
+    "@algolia/client-search": "npm:4.24.0"
+    "@algolia/logger-common": "npm:4.24.0"
+    "@algolia/logger-console": "npm:4.24.0"
+    "@algolia/recommend": "npm:4.24.0"
+    "@algolia/requester-browser-xhr": "npm:4.24.0"
+    "@algolia/requester-common": "npm:4.24.0"
+    "@algolia/requester-node-http": "npm:4.24.0"
+    "@algolia/transporter": "npm:4.24.0"
+  checksum: 10c0/ef09096619191181f3ea3376ed46b5bb2de1cd7d97a8d016f7cfe8e93c89d34f38cac8db5835314f8d97c939ad007c3dde716c1609953540258352edb25d12c2
+  languageName: node
+  linkType: hard
+
+"algoliasearch@npm:>=4":
+  version: 5.24.0
+  resolution: "algoliasearch@npm:5.24.0"
+  dependencies:
+    "@algolia/client-abtesting": "npm:5.24.0"
+    "@algolia/client-analytics": "npm:5.24.0"
+    "@algolia/client-common": "npm:5.24.0"
+    "@algolia/client-insights": "npm:5.24.0"
+    "@algolia/client-personalization": "npm:5.24.0"
+    "@algolia/client-query-suggestions": "npm:5.24.0"
+    "@algolia/client-search": "npm:5.24.0"
+    "@algolia/ingestion": "npm:1.24.0"
+    "@algolia/monitoring": "npm:1.24.0"
+    "@algolia/recommend": "npm:5.24.0"
+    "@algolia/requester-browser-xhr": "npm:5.24.0"
+    "@algolia/requester-fetch": "npm:5.24.0"
+    "@algolia/requester-node-http": "npm:5.24.0"
+  checksum: 10c0/dca05792304b5d4dfe11e54ce0244561a05fccf16f3ad916eb787895e8e5a71934556bdae8247cc3a0b17cbbd8cdc067ff52193f3bb0ae0249fe694ed7b9c6bf
+  languageName: node
+  linkType: hard
+
 "anser@npm:^2.1.1":
   version: 2.1.1
   resolution: "anser@npm:2.1.1"
@@ -8288,7 +8974,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"async@npm:^3.2.5":
+"async@npm:^3.2.3, async@npm:^3.2.5":
   version: 3.2.6
   resolution: "async@npm:3.2.6"
   checksum: 10c0/36484bb15ceddf07078688d95e27076379cc2f87b10c03b6dd8a83e89475a3c8df5848859dd06a4c95af1e4c16fc973de0171a77f18ea00be899aca2a4f85e70
@@ -8921,6 +9607,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"bs-logger@npm:^0.2.6":
+  version: 0.2.6
+  resolution: "bs-logger@npm:0.2.6"
+  dependencies:
+    fast-json-stable-stringify: "npm:2.x"
+  checksum: 10c0/80e89aaaed4b68e3374ce936f2eb097456a0dddbf11f75238dbd53140b1e39259f0d248a5089ed456f1158984f22191c3658d54a713982f676709fbe1a6fa5a0
+  languageName: node
+  linkType: hard
+
 "bser@npm:2.1.1":
   version: 2.1.1
   resolution: "bser@npm:2.1.1"
@@ -9228,7 +9923,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"chalk@npm:^4.0.0, chalk@npm:^4.1.0, chalk@npm:^4.1.2":
+"chalk@npm:^4.0.0, chalk@npm:^4.0.2, chalk@npm:^4.1.0, chalk@npm:^4.1.2":
   version: 4.1.2
   resolution: "chalk@npm:4.1.2"
   dependencies:
@@ -9480,6 +10175,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"class-variance-authority@npm:0.7.1":
+  version: 0.7.1
+  resolution: "class-variance-authority@npm:0.7.1"
+  dependencies:
+    clsx: "npm:^2.1.1"
+  checksum: 10c0/0f438cea22131808b99272de0fa933c2532d5659773bfec0c583de7b3f038378996d3350683426b8e9c74a6286699382106d71fbec52f0dd5fbb191792cccb5b
+  languageName: node
+  linkType: hard
+
 "classnames@npm:^2.2.5":
   version: 2.3.2
   resolution: "classnames@npm:2.3.2"
@@ -9603,7 +10307,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"clsx@npm:2.1.1":
+"clsx@npm:2.1.1, clsx@npm:^2.1.1":
   version: 2.1.1
   resolution: "clsx@npm:2.1.1"
   checksum: 10c0/c4c8eb865f8c82baab07e71bfa8897c73454881c4f99d6bc81585aecd7c441746c1399d08363dc096c550cceaf97bd4ce1e8854e1771e9998d9f94c4fe075839
@@ -9755,6 +10459,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"comment-parser@npm:^1.4.1":
+  version: 1.4.1
+  resolution: "comment-parser@npm:1.4.1"
+  checksum: 10c0/d6c4be3f5be058f98b24f2d557f745d8fe1cc9eb75bebbdccabd404a0e1ed41563171b16285f593011f8b6a5ec81f564fb1f2121418ac5cbf0f49255bf0840dd
+  languageName: node
+  linkType: hard
+
 "common-path-prefix@npm:^3.0.0":
   version: 3.0.0
   resolution: "common-path-prefix@npm:3.0.0"
@@ -11090,6 +11801,17 @@ __metadata:
   languageName: node
   linkType: hard
 
+"ejs@npm:^3.1.10":
+  version: 3.1.10
+  resolution: "ejs@npm:3.1.10"
+  dependencies:
+    jake: "npm:^10.8.5"
+  bin:
+    ejs: bin/cli.js
+  checksum: 10c0/52eade9e68416ed04f7f92c492183340582a36482836b11eab97b159fcdcfdedc62233a1bf0bf5e5e1851c501f2dca0e2e9afd111db2599e4e7f53ee29429ae1
+  languageName: node
+  linkType: hard
+
 "electron-to-chromium@npm:^1.4.535":
   version: 1.4.593
   resolution: "electron-to-chromium@npm:1.4.593"
@@ -11236,6 +11958,16 @@ __metadata:
   languageName: node
   linkType: hard
 
+"enhanced-resolve@npm:^5.7.0":
+  version: 5.18.1
+  resolution: "enhanced-resolve@npm:5.18.1"
+  dependencies:
+    graceful-fs: "npm:^4.2.4"
+    tapable: "npm:^2.2.0"
+  checksum: 10c0/4cffd9b125225184e2abed9fdf0ed3dbd2224c873b165d0838fd066cde32e0918626cba2f1f4bf6860762f13a7e2364fd89a82b99566be2873d813573ac71846
+  languageName: node
+  linkType: hard
+
 "enquire.js@npm:^2.1.6":
   version: 2.1.6
   resolution: "enquire.js@npm:2.1.6"
@@ -11790,6 +12522,29 @@ __metadata:
   languageName: node
   linkType: hard
 
+"eslint-import-resolver-typescript@npm:4.3.4":
+  version: 4.3.4
+  resolution: "eslint-import-resolver-typescript@npm:4.3.4"
+  dependencies:
+    debug: "npm:^4.4.0"
+    get-tsconfig: "npm:^4.10.0"
+    is-bun-module: "npm:^2.0.0"
+    stable-hash: "npm:^0.0.5"
+    tinyglobby: "npm:^0.2.13"
+    unrs-resolver: "npm:^1.6.3"
+  peerDependencies:
+    eslint: "*"
+    eslint-plugin-import: "*"
+    eslint-plugin-import-x: "*"
+  peerDependenciesMeta:
+    eslint-plugin-import:
+      optional: true
+    eslint-plugin-import-x:
+      optional: true
+  checksum: 10c0/dba14e699855a7c32756e5c5258075e787a6e7938123ace553d69e8f84cefb4e6364fb1935d6d5500f96de82cee79662fee1e826fa45c0672a83841c27cf6abe
+  languageName: node
+  linkType: hard
+
 "eslint-module-utils@npm:^2.8.0":
   version: 2.8.0
   resolution: "eslint-module-utils@npm:2.8.0"
@@ -11814,6 +12569,27 @@ __metadata:
   languageName: node
   linkType: hard
 
+"eslint-plugin-import-x@npm:4.11.0":
+  version: 4.11.0
+  resolution: "eslint-plugin-import-x@npm:4.11.0"
+  dependencies:
+    "@typescript-eslint/utils": "npm:^8.31.0"
+    comment-parser: "npm:^1.4.1"
+    debug: "npm:^4.4.0"
+    eslint-import-resolver-node: "npm:^0.3.9"
+    get-tsconfig: "npm:^4.10.0"
+    is-glob: "npm:^4.0.3"
+    minimatch: "npm:^9.0.3 || ^10.0.1"
+    semver: "npm:^7.7.1"
+    stable-hash: "npm:^0.0.5"
+    tslib: "npm:^2.8.1"
+    unrs-resolver: "npm:^1.7.0"
+  peerDependencies:
+    eslint: ^8.57.0 || ^9.0.0
+  checksum: 10c0/4013ba4ff968c2ebe4ff876b8949d00c0ee57b9d7c5eb7bb5235d92326b834841893a9abfb5092725a07714b4090c009ef52c2921cdc945fab7fb6f1c16c9218
+  languageName: node
+  linkType: hard
+
 "eslint-plugin-import@npm:^2.27.5":
   version: 2.29.0
   resolution: "eslint-plugin-import@npm:2.29.0"
@@ -12334,7 +13110,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"expect@npm:^29.7.0":
+"expect@npm:^29.0.0, expect@npm:^29.7.0":
   version: 29.7.0
   resolution: "expect@npm:29.7.0"
   dependencies:
@@ -12529,7 +13305,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"fast-json-stable-stringify@npm:^2.0.0, fast-json-stable-stringify@npm:^2.1.0":
+"fast-json-stable-stringify@npm:2.x, fast-json-stable-stringify@npm:^2.0.0, fast-json-stable-stringify@npm:^2.1.0":
   version: 2.1.0
   resolution: "fast-json-stable-stringify@npm:2.1.0"
   checksum: 10c0/7f081eb0b8a64e0057b3bb03f974b3ef00135fbf36c1c710895cd9300f13c94ba809bb3a81cf4e1b03f6e5285610a61abbd7602d0652de423144dfee5a389c9b
@@ -12631,6 +13407,18 @@ __metadata:
   languageName: node
   linkType: hard
 
+"fdir@npm:^6.4.4":
+  version: 6.4.4
+  resolution: "fdir@npm:6.4.4"
+  peerDependencies:
+    picomatch: ^3 || ^4
+  peerDependenciesMeta:
+    picomatch:
+      optional: true
+  checksum: 10c0/6ccc33be16945ee7bc841e1b4178c0b4cf18d3804894cb482aa514651c962a162f96da7ffc6ebfaf0df311689fb70091b04dd6caffe28d56b9ebdc0e7ccadfdd
+  languageName: node
+  linkType: hard
+
 "fetch-blob@npm:^3.1.2, fetch-blob@npm:^3.1.4":
   version: 3.2.0
   resolution: "fetch-blob@npm:3.2.0"
@@ -12707,6 +13495,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"filelist@npm:^1.0.4":
+  version: 1.0.4
+  resolution: "filelist@npm:1.0.4"
+  dependencies:
+    minimatch: "npm:^5.0.1"
+  checksum: 10c0/426b1de3944a3d153b053f1c0ebfd02dccd0308a4f9e832ad220707a6d1f1b3c9784d6cadf6b2f68f09a57565f63ebc7bcdc913ccf8012d834f472c46e596f41
+  languageName: node
+  linkType: hard
+
 "filename-reserved-regex@npm:^2.0.0":
   version: 2.0.0
   resolution: "filename-reserved-regex@npm:2.0.0"
@@ -13744,6 +14541,16 @@ __metadata:
   languageName: node
   linkType: hard
 
+"gatsby-transformer-remark-frontmatter@npm:1.1.0":
+  version: 1.1.0
+  resolution: "gatsby-transformer-remark-frontmatter@npm:1.1.0"
+  peerDependencies:
+    gatsby: ^3.6.2
+    graphql: ^14.6.0
+  checksum: 10c0/7968c093ff3a5c7a9815b275813ba262f64eecbf09f719aac5471b4d3166553681e943eaf5878516a9b5a309b15d2bb0ec82da3a07f024dada18f4253abcc94b
+  languageName: node
+  linkType: hard
+
 "gatsby-transformer-remark@npm:^6.14.0":
   version: 6.14.0
   resolution: "gatsby-transformer-remark@npm:6.14.0"
@@ -14140,6 +14947,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"get-tsconfig@npm:^4.10.0":
+  version: 4.10.0
+  resolution: "get-tsconfig@npm:4.10.0"
+  dependencies:
+    resolve-pkg-maps: "npm:^1.0.0"
+  checksum: 10c0/c9b5572c5118923c491c04285c73bd55b19e214992af957c502a3be0fc0043bb421386ffd45ca3433c0a7fba81221ca300479e8393960acf15d0ed4563f38a86
+  languageName: node
+  linkType: hard
+
 "github-from-package@npm:0.0.0":
   version: 0.0.0
   resolution: "github-from-package@npm:0.0.0"
@@ -15650,6 +16466,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"is-bun-module@npm:^2.0.0":
+  version: 2.0.0
+  resolution: "is-bun-module@npm:2.0.0"
+  dependencies:
+    semver: "npm:^7.7.1"
+  checksum: 10c0/7d27a0679cfa5be1f5052650391f9b11040cd70c48d45112e312c56bc6b6ca9c9aea70dcce6cc40b1e8947bfff8567a5c5715d3b066fb478522dab46ea379240
+  languageName: node
+  linkType: hard
+
 "is-callable@npm:^1.1.3, is-callable@npm:^1.1.4, is-callable@npm:^1.2.7":
   version: 1.2.7
   resolution: "is-callable@npm:1.2.7"
@@ -16464,6 +17289,20 @@ __metadata:
   languageName: node
   linkType: hard
 
+"jake@npm:^10.8.5":
+  version: 10.9.2
+  resolution: "jake@npm:10.9.2"
+  dependencies:
+    async: "npm:^3.2.3"
+    chalk: "npm:^4.0.2"
+    filelist: "npm:^1.0.4"
+    minimatch: "npm:^3.1.2"
+  bin:
+    jake: bin/cli.js
+  checksum: 10c0/c4597b5ed9b6a908252feab296485a4f87cba9e26d6c20e0ca144fb69e0c40203d34a2efddb33b3d297b8bd59605e6c1f44f6221ca1e10e69175ecbf3ff5fe31
+  languageName: node
+  linkType: hard
+
 "javascript-stringify@npm:^2.0.1":
   version: 2.1.0
   resolution: "javascript-stringify@npm:2.1.0"
@@ -16835,7 +17674,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"jest-util@npm:^29.7.0":
+"jest-util@npm:^29.0.0, jest-util@npm:^29.7.0":
   version: 29.7.0
   resolution: "jest-util@npm:29.7.0"
   dependencies:
@@ -17100,7 +17939,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"json5@npm:^2.1.2, json5@npm:^2.2.0, json5@npm:^2.2.1, json5@npm:^2.2.3":
+"json5@npm:^2.1.2, json5@npm:^2.2.0, json5@npm:^2.2.1, json5@npm:^2.2.2, json5@npm:^2.2.3":
   version: 2.2.3
   resolution: "json5@npm:2.2.3"
   bin:
@@ -17706,6 +18545,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"make-error@npm:^1.3.6":
+  version: 1.3.6
+  resolution: "make-error@npm:1.3.6"
+  checksum: 10c0/171e458d86854c6b3fc46610cfacf0b45149ba043782558c6875d9f42f222124384ad0b468c92e996d815a8a2003817a710c0a160e49c1c394626f76fa45396f
+  languageName: node
+  linkType: hard
+
 "make-fetch-happen@npm:^13.0.0":
   version: 13.0.1
   resolution: "make-fetch-happen@npm:13.0.1"
@@ -18929,6 +19775,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"minimatch@npm:^5.0.1":
+  version: 5.1.6
+  resolution: "minimatch@npm:5.1.6"
+  dependencies:
+    brace-expansion: "npm:^2.0.1"
+  checksum: 10c0/3defdfd230914f22a8da203747c42ee3c405c39d4d37ffda284dac5e45b7e1f6c49aa8be606509002898e73091ff2a3bbfc59c2c6c71d4660609f63aa92f98e3
+  languageName: node
+  linkType: hard
+
 "minimatch@npm:^8.0.2":
   version: 8.0.4
   resolution: "minimatch@npm:8.0.4"
@@ -18938,6 +19793,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"minimatch@npm:^9.0.3 || ^10.0.1":
+  version: 10.0.1
+  resolution: "minimatch@npm:10.0.1"
+  dependencies:
+    brace-expansion: "npm:^2.0.1"
+  checksum: 10c0/e6c29a81fe83e1877ad51348306be2e8aeca18c88fdee7a99df44322314279e15799e41d7cb274e4e8bb0b451a3bc622d6182e157dfa1717d6cda75e9cd8cd5d
+  languageName: node
+  linkType: hard
+
 "minimatch@npm:^9.0.4":
   version: 9.0.4
   resolution: "minimatch@npm:9.0.4"
@@ -19059,6 +19923,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"mkdirp@npm:3.0.1":
+  version: 3.0.1
+  resolution: "mkdirp@npm:3.0.1"
+  bin:
+    mkdirp: dist/cjs/src/bin.js
+  checksum: 10c0/9f2b975e9246351f5e3a40dcfac99fcd0baa31fbfab615fe059fb11e51f10e4803c63de1f384c54d656e4db31d000e4767e9ef076a22e12a641357602e31d57d
+  languageName: node
+  linkType: hard
+
 "mkdirp@npm:^0.5.1, mkdirp@npm:^0.5.4":
   version: 0.5.6
   resolution: "mkdirp@npm:0.5.6"
@@ -19264,6 +20137,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"napi-postinstall@npm:^0.2.2":
+  version: 0.2.3
+  resolution: "napi-postinstall@npm:0.2.3"
+  bin:
+    napi-postinstall: lib/cli.js
+  checksum: 10c0/125cb677d59f284e61cd9b4cd840cf735edd4c325ffc54af4fad16c8726642ffeddaa63c5ca3533b5e7023be4d8e9ff223484c5eea2a8efe2e2498fd063cabbd
+  languageName: node
+  linkType: hard
+
 "natural-compare-lite@npm:^1.4.0":
   version: 1.4.0
   resolution: "natural-compare-lite@npm:1.4.0"
@@ -20372,6 +21254,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"picomatch@npm:^4.0.2":
+  version: 4.0.2
+  resolution: "picomatch@npm:4.0.2"
+  checksum: 10c0/7c51f3ad2bb42c776f49ebf964c644958158be30d0a510efd5a395e8d49cb5acfed5b82c0c5b365523ce18e6ab85013c9ebe574f60305892ec3fa8eee8304ccc
+  languageName: node
+  linkType: hard
+
 "pidtree@npm:^0.6.0":
   version: 0.6.0
   resolution: "pidtree@npm:0.6.0"
@@ -21117,7 +22006,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"pretty-format@npm:^29.7.0":
+"pretty-format@npm:^29.0.0, pretty-format@npm:^29.7.0":
   version: 29.7.0
   resolution: "pretty-format@npm:29.7.0"
   dependencies:
@@ -21522,7 +22411,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"react-fast-compare@npm:^3.1.1":
+"react-fast-compare@npm:^3.0.0, react-fast-compare@npm:^3.1.1":
   version: 3.2.2
   resolution: "react-fast-compare@npm:3.2.2"
   checksum: 10c0/0bbd2f3eb41ab2ff7380daaa55105db698d965c396df73e6874831dbafec8c4b5b08ba36ff09df01526caa3c61595247e3269558c284e37646241cba2b90a367
@@ -21563,6 +22452,39 @@ __metadata:
   languageName: node
   linkType: hard
 
+"react-instantsearch-core@npm:6.40.4":
+  version: 6.40.4
+  resolution: "react-instantsearch-core@npm:6.40.4"
+  dependencies:
+    "@babel/runtime": "npm:^7.1.2"
+    algoliasearch-helper: "npm:3.14.0"
+    prop-types: "npm:^15.6.2"
+    react-fast-compare: "npm:^3.0.0"
+  peerDependencies:
+    algoliasearch: ">= 3.1 < 5"
+    react: ">= 16.3.0 < 19"
+  checksum: 10c0/b739c7e5b37a1bd6cd20bf073fef047922f9264ee3d2b5c8936e496962949c26adfbe540929eed975c44a2c83822c4cfb00b82a740a8c86bd49c02b0169aa2b8
+  languageName: node
+  linkType: hard
+
+"react-instantsearch-dom@npm:6.40.4":
+  version: 6.40.4
+  resolution: "react-instantsearch-dom@npm:6.40.4"
+  dependencies:
+    "@babel/runtime": "npm:^7.1.2"
+    algoliasearch-helper: "npm:3.14.0"
+    classnames: "npm:^2.2.5"
+    prop-types: "npm:^15.6.2"
+    react-fast-compare: "npm:^3.0.0"
+    react-instantsearch-core: "npm:6.40.4"
+  peerDependencies:
+    algoliasearch: ">= 3.1 < 5"
+    react: ">= 16.3.0 < 19"
+    react-dom: ">= 16.3.0 < 19"
+  checksum: 10c0/d87fc16e29a5a2e1507929c242c70cf02b5010bf016636f9b6f0337111bc36cecbec8d507634ce0857bce0889b82f12d6bc383a88310954cf5190178e923f1b4
+  languageName: node
+  linkType: hard
+
 "react-intersection-observer@npm:^9.16.0":
   version: 9.16.0
   resolution: "react-intersection-observer@npm:9.16.0"
@@ -22585,6 +23507,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"resolve-pkg-maps@npm:^1.0.0":
+  version: 1.0.0
+  resolution: "resolve-pkg-maps@npm:1.0.0"
+  checksum: 10c0/fb8f7bbe2ca281a73b7ef423a1cbc786fb244bd7a95cbe5c3fba25b27d327150beca8ba02f622baea65919a57e061eb5005204daa5f93ed590d9b77463a567ab
+  languageName: node
+  linkType: hard
+
 "resolve.exports@npm:^2.0.0":
   version: 2.0.2
   resolution: "resolve.exports@npm:2.0.2"
@@ -23125,7 +24054,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"semver@npm:^7.6.0":
+"semver@npm:^7.6.0, semver@npm:^7.7.1":
   version: 7.7.1
   resolution: "semver@npm:7.7.1"
   bin:
@@ -23806,6 +24735,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"stable-hash@npm:^0.0.5":
+  version: 0.0.5
+  resolution: "stable-hash@npm:0.0.5"
+  checksum: 10c0/ca670cb6d172f1c834950e4ec661e2055885df32fee3ebf3647c5df94993b7c2666a5dbc1c9a62ee11fc5c24928579ec5e81bb5ad31971d355d5a341aab493b3
+  languageName: node
+  linkType: hard
+
 "stable@npm:^0.1.8":
   version: 0.1.8
   resolution: "stable@npm:0.1.8"
@@ -24628,7 +25564,7 @@ __metadata:
   languageName: node
   linkType: hard
 
-"tapable@npm:^2.1.1, tapable@npm:^2.2.0":
+"tapable@npm:^2.1.1, tapable@npm:^2.2.0, tapable@npm:^2.2.1":
   version: 2.2.1
   resolution: "tapable@npm:2.2.1"
   checksum: 10c0/bc40e6efe1e554d075469cedaba69a30eeb373552aaf41caeaaa45bf56ffacc2674261b106245bd566b35d8f3329b52d838e851ee0a852120acae26e622925c9
@@ -24828,6 +25764,16 @@ __metadata:
   languageName: node
   linkType: hard
 
+"tinyglobby@npm:^0.2.13":
+  version: 0.2.13
+  resolution: "tinyglobby@npm:0.2.13"
+  dependencies:
+    fdir: "npm:^6.4.4"
+    picomatch: "npm:^4.0.2"
+  checksum: 10c0/ef07dfaa7b26936601d3f6d999f7928a4d1c6234c5eb36896bb88681947c0d459b7ebe797022400e555fe4b894db06e922b95d0ce60cb05fd827a0a66326b18c
+  languageName: node
+  linkType: hard
+
 "title-case@npm:^3.0.3":
   version: 3.0.3
   resolution: "title-case@npm:3.0.3"
@@ -24976,6 +25922,56 @@ __metadata:
   languageName: node
   linkType: hard
 
+"ts-jest@npm:29.3.2":
+  version: 29.3.2
+  resolution: "ts-jest@npm:29.3.2"
+  dependencies:
+    bs-logger: "npm:^0.2.6"
+    ejs: "npm:^3.1.10"
+    fast-json-stable-stringify: "npm:^2.1.0"
+    jest-util: "npm:^29.0.0"
+    json5: "npm:^2.2.3"
+    lodash.memoize: "npm:^4.1.2"
+    make-error: "npm:^1.3.6"
+    semver: "npm:^7.7.1"
+    type-fest: "npm:^4.39.1"
+    yargs-parser: "npm:^21.1.1"
+  peerDependencies:
+    "@babel/core": ">=7.0.0-beta.0 <8"
+    "@jest/transform": ^29.0.0
+    "@jest/types": ^29.0.0
+    babel-jest: ^29.0.0
+    jest: ^29.0.0
+    typescript: ">=4.3 <6"
+  peerDependenciesMeta:
+    "@babel/core":
+      optional: true
+    "@jest/transform":
+      optional: true
+    "@jest/types":
+      optional: true
+    babel-jest:
+      optional: true
+    esbuild:
+      optional: true
+  bin:
+    ts-jest: cli.js
+  checksum: 10c0/84762720dbef45c1644348d67d0dcb8b7ad6369a16628c4752aceeb47f0ccdad63ae14485048b641c20ce096337a160ab816881361ef5517325bac6a5b3756e0
+  languageName: node
+  linkType: hard
+
+"tsconfig-paths-webpack-plugin@npm:4.2.0":
+  version: 4.2.0
+  resolution: "tsconfig-paths-webpack-plugin@npm:4.2.0"
+  dependencies:
+    chalk: "npm:^4.1.0"
+    enhanced-resolve: "npm:^5.7.0"
+    tapable: "npm:^2.2.1"
+    tsconfig-paths: "npm:^4.1.2"
+  checksum: 10c0/495c5ab7c1cb079217d98fe25d61def01e4bab38047c7ab25ec11876cc8c697ff01f43ea6c9933181875e51e49835407fc71afd92ea6cca1ba1bebf513dfb510
+  languageName: node
+  linkType: hard
+
 "tsconfig-paths@npm:^3.14.2":
   version: 3.14.2
   resolution: "tsconfig-paths@npm:3.14.2"
@@ -24988,6 +25984,17 @@ __metadata:
   languageName: node
   linkType: hard
 
+"tsconfig-paths@npm:^4.1.2":
+  version: 4.2.0
+  resolution: "tsconfig-paths@npm:4.2.0"
+  dependencies:
+    json5: "npm:^2.2.2"
+    minimist: "npm:^1.2.6"
+    strip-bom: "npm:^3.0.0"
+  checksum: 10c0/09a5877402d082bb1134930c10249edeebc0211f36150c35e1c542e5b91f1047b1ccf7da1e59babca1ef1f014c525510f4f870de7c9bda470c73bb4e2721b3ea
+  languageName: node
+  linkType: hard
+
 "tslib@npm:^1.10.0, tslib@npm:^1.8.1, tslib@npm:^1.9.0":
   version: 1.14.1
   resolution: "tslib@npm:1.14.1"
@@ -25073,6 +26080,13 @@ __metadata:
   languageName: node
   linkType: hard
 
+"type-fest@npm:^4.39.1":
+  version: 4.40.1
+  resolution: "type-fest@npm:4.40.1"
+  checksum: 10c0/590cb7d4dcd3da83efe49b5b52cd041661f6fa29f18cb76650fe1fdeb4090688e92955656e9d981e606abb13d25c0418be8c6c6504d80e87fe18dc9ca0888392
+  languageName: node
+  linkType: hard
+
 "type-is@npm:^1.6.4, type-is@npm:~1.6.18":
   version: 1.6.18
   resolution: "type-is@npm:1.6.18"
@@ -25859,6 +26873,67 @@ __metadata:
   languageName: node
   linkType: hard
 
+"unrs-resolver@npm:^1.6.3, unrs-resolver@npm:^1.7.0":
+  version: 1.7.2
+  resolution: "unrs-resolver@npm:1.7.2"
+  dependencies:
+    "@unrs/resolver-binding-darwin-arm64": "npm:1.7.2"
+    "@unrs/resolver-binding-darwin-x64": "npm:1.7.2"
+    "@unrs/resolver-binding-freebsd-x64": "npm:1.7.2"
+    "@unrs/resolver-binding-linux-arm-gnueabihf": "npm:1.7.2"
+    "@unrs/resolver-binding-linux-arm-musleabihf": "npm:1.7.2"
+    "@unrs/resolver-binding-linux-arm64-gnu": "npm:1.7.2"
+    "@unrs/resolver-binding-linux-arm64-musl": "npm:1.7.2"
+    "@unrs/resolver-binding-linux-ppc64-gnu": "npm:1.7.2"
+    "@unrs/resolver-binding-linux-riscv64-gnu": "npm:1.7.2"
+    "@unrs/resolver-binding-linux-riscv64-musl": "npm:1.7.2"
+    "@unrs/resolver-binding-linux-s390x-gnu": "npm:1.7.2"
+    "@unrs/resolver-binding-linux-x64-gnu": "npm:1.7.2"
+    "@unrs/resolver-binding-linux-x64-musl": "npm:1.7.2"
+    "@unrs/resolver-binding-wasm32-wasi": "npm:1.7.2"
+    "@unrs/resolver-binding-win32-arm64-msvc": "npm:1.7.2"
+    "@unrs/resolver-binding-win32-ia32-msvc": "npm:1.7.2"
+    "@unrs/resolver-binding-win32-x64-msvc": "npm:1.7.2"
+    napi-postinstall: "npm:^0.2.2"
+  dependenciesMeta:
+    "@unrs/resolver-binding-darwin-arm64":
+      optional: true
+    "@unrs/resolver-binding-darwin-x64":
+      optional: true
+    "@unrs/resolver-binding-freebsd-x64":
+      optional: true
+    "@unrs/resolver-binding-linux-arm-gnueabihf":
+      optional: true
+    "@unrs/resolver-binding-linux-arm-musleabihf":
+      optional: true
+    "@unrs/resolver-binding-linux-arm64-gnu":
+      optional: true
+    "@unrs/resolver-binding-linux-arm64-musl":
+      optional: true
+    "@unrs/resolver-binding-linux-ppc64-gnu":
+      optional: true
+    "@unrs/resolver-binding-linux-riscv64-gnu":
+      optional: true
+    "@unrs/resolver-binding-linux-riscv64-musl":
+      optional: true
+    "@unrs/resolver-binding-linux-s390x-gnu":
+      optional: true
+    "@unrs/resolver-binding-linux-x64-gnu":
+      optional: true
+    "@unrs/resolver-binding-linux-x64-musl":
+      optional: true
+    "@unrs/resolver-binding-wasm32-wasi":
+      optional: true
+    "@unrs/resolver-binding-win32-arm64-msvc":
+      optional: true
+    "@unrs/resolver-binding-win32-ia32-msvc":
+      optional: true
+    "@unrs/resolver-binding-win32-x64-msvc":
+      optional: true
+  checksum: 10c0/c293db95c59b08e33f3bfb00042120fb90fd5448bd1790cd2dc779a13eb6062dddf04a91b72c73d3635b0c539552435675ce816fa52e66bb0cd7b7e5a2f6399c
+  languageName: node
+  linkType: hard
+
 "upath@npm:2.0.1, upath@npm:^2.0.1":
   version: 2.0.1
   resolution: "upath@npm:2.0.1"
@@ -26375,7 +27450,7 @@ __metadata:
   dependencies:
     "@babel/core": "npm:7.26.10"
     "@dvcorg/gatsby-theme-iterative": "npm:0.3.25"
-    "@dvcorg/websites-server": "npm:0.3.0"
+    "@dvcorg/websites-server": "npm:0.3.2"
     "@eslint/js": "npm:9.24.0"
     "@eslint/json": "npm:0.11.0"
     "@iframe-resizer/react": "npm:5.4.5"
@@ -26383,20 +27458,27 @@ __metadata:
     "@radix-ui/react-dialog": "npm:1.1.7"
     "@radix-ui/react-label": "npm:2.1.3"
     "@reach/router": "npm:1.3.4"
+    "@reach/tooltip": "npm:0.18.0"
     "@types/gatsbyjs__reach-router": "npm:2.0.5"
     "@types/isomorphic-fetch": "npm:0.0.39"
+    "@types/jest": "npm:29.5.14"
     "@types/promise-polyfill": "npm:6.0.6"
     "@types/react": "npm:18.3.20"
     "@types/react-collapse": "npm:5.0.4"
     "@types/react-helmet": "npm:6.1.11"
+    "@types/react-instantsearch-dom": "npm:^6"
     "@types/react-popover": "npm:0.5.8"
     "@types/react-slick": "npm:0.23.13"
+    algoliasearch: "npm:4.24.0"
     babel-jest: "npm:29.7.0"
+    class-variance-authority: "npm:0.7.1"
     clsx: "npm:2.1.1"
     date-fns: "npm:4.1.0"
     dotenv: "npm:16.5.0"
     eslint: "npm:9.24.0"
     eslint-config-prettier: "npm:10.1.2"
+    eslint-import-resolver-typescript: "npm:4.3.4"
+    eslint-plugin-import-x: "npm:4.11.0"
     eslint-plugin-jsx-a11y: "npm:6.10.2"
     eslint-plugin-prettier: "npm:5.2.6"
     eslint-plugin-react: "npm:7.37.5"
@@ -26414,6 +27496,7 @@ __metadata:
     gatsby-remark-images: "npm:7.14.0"
     gatsby-source-filesystem: "npm:5.14.0"
     gatsby-source-rss-feed: "npm:1.2.2"
+    gatsby-transformer-remark-frontmatter: "npm:1.1.0"
     gatsby-transformer-sharp: "npm:5.14.0"
     globals: "npm:16.0.0"
     husky: "npm:9.1.7"
@@ -26421,6 +27504,7 @@ __metadata:
     jest: "npm:29.7.0"
     lint-staged: "npm:15.5.1"
     lucide-react: "npm:0.488.0"
+    mkdirp: "npm:3.0.1"
     moment: "npm:2.30.1"
     nanoid: "npm:5.1.5"
     prettier: "npm:3.5.3"
@@ -26432,6 +27516,7 @@ __metadata:
     react-dom: "npm:18.3.1"
     react-focus-lock: "npm:2.13.6"
     react-helmet: "npm:6.1.0"
+    react-instantsearch-dom: "npm:6.40.4"
     react-popover: "npm:0.5.10"
     react-slick: "npm:0.30.3"
     react-use: "npm:17.6.0"
@@ -26443,6 +27528,8 @@ __metadata:
     stylelint-config-standard: "npm:38.0.0"
     tailwind-merge: "npm:3.2.0"
     tailwindcss-animate: "npm:1.0.7"
+    ts-jest: "npm:29.3.2"
+    tsconfig-paths-webpack-plugin: "npm:4.2.0"
     typed.js: "npm:2.1.0"
     typescript: "npm:5.8.3"
     typescript-eslint: "npm:8.30.1"