dcirne
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎CITATION.cff‎
Lines changed: 41 additions & 0 deletions b/‎CITATION.cff‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎Dockerfile‎
Lines changed: 31 additions & 0 deletions b/‎Dockerfile‎
Lines changed: 31 additions & 0 deletions
@@ -0,0 +1,4 @@
+*.swp
+__pycache__
+.DS_Store
+.ipynb_checkpoints
@@ -0,0 +1,41 @@
+# This CITATION.cff file was generated with cffinit.
+# Visit https://bit.ly/cffinit to generate yours today!
+
+cff-version: 1.2.0
+title: RAG Fundamentals and Semantic Chunking
+message: >-
+  If you reference this repository, please cite it as
+  indicated.
+type: software
+authors:
+  - given-names: Dalmo
+    family-names: Cirne
+    orcid: 'https://orcid.org/0009-0005-6354-0041'
+repository-code: 'https://github.com/dcirne/rag_fundamentals'
+repository: 'https://dalmocirne.com'
+abstract: >-
+  The material in this repository was initially prepared for
+  a lecture I gave at The 2024 IARIA Annual Congress on
+  Frontiers in Science, Technology, Services, and
+  Applications, on the topics of Retrieval-Augmented
+  Generation (RAG) and Semantic Chunking.
+
+
+  RAG is a technique used to optimize the output of a Large
+  Language Model (LLM). The expectation is that In-Context
+  Learning (ICL) takes place, leading the LLM to produce
+  better results.
+
+
+  RAG can be more effective when semantic chunking is used.
+  The basic idea is to retrieve and compile small "chunks"
+  of data to augment the prompt to be sent to an LLM, rather
+  than inserting entire documents that contain the topic of
+  interest, but also information that is not relevant to the
+  user interaction.
+keywords:
+  - rag
+  - semantic-chunking
+license: Apache-2.0
+version: 1.0.0
+date-released: '2024-06-18'
@@ -0,0 +1,31 @@
+FROM python:3.11-bookworm
+
+# Set bash as shell
+SHELL ["/bin/bash", "-c"]
+
+# Create the deployment and other directories
+RUN mkdir -p /workspace
+
+# Working directory
+WORKDIR /workspace
+
+# Update system and install required dependencies
+RUN apt-get update && \
+    apt-get -y upgrade
+
+# Copy necessary files
+COPY requirements.txt /workspace/
+
+# Install packages and tools
+RUN set -eux && \
+    apt-get install -y --no-install-recommends \
+        build-essential && \
+    pip install --upgrade pip
+
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Exposes connection port
+EXPOSE 8024
+
+# Run the Jupyter Notebook
+CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8024", "--no-browser", "--allow-root"]