mehrzads · abbasi0abolfazl · Feb 5, 2025 · Feb 5, 2025 · Feb 5, 2025 · Feb 5, 2025
diff --git a/README.md b/README.md
@@ -2,19 +2,77 @@
 
 A Python package for building RAG (Retrieval-Augmented Generation) applications using PDFs, ChromaDB, and Ollama.
 
-## Installation
+## Project Structure
+
+```
+.
+├── pdf_rag
+│   ├── document_processor.py
+│   ├── __init__.py
+│   ├── llm_interface.py
+│   ├── main.py
+│   └── vector_store.py
+├── README.md
+├── requirements.txt
+├── setup.py
+├── test_package.py
+└── test.py
 
-```bash
-pip install -e .
+2 directories, 10 files
 ```
 
-## Usage
+## Installation
+
+1. **Create and activate a virtual environment:**
+
+    ```bash
+    # Create a virtual environment
+    python -m venv venv
+
+    # Activate the virtual environment
+    # On Windows
+    venv\Scripts\activate
+    # On Unix or MacOS
+    source venv/bin/activate
+    ```
+
+2. **Install the package:**
+
+    ```bash
+    pip install -e .
+    ```
+
+3. **Install Ollama on Linux:**
+
+    Follow the steps below to install Ollama on a Linux system.
+
+    ```bash
+    # Download the Ollama installer
+    curl -fsSL https://ollama.com/install.sh | sh
+
+    # Verify the installation
+    ollama --version
+    ```
+
+4. **Download models in Ollama:**
+
+    To download specific models such as `llama3` and `deepseek-R1`, use the following commands:
+
+    ```bash
+    # Download the llama3 model
+    ollama pull llama3
+
+    # Download the deepseek-R1 model
+    ollama pull deepseek-R1
+    ```
+
+## base Usage
 
 ```python
 from pdf_rag import PDFRAGApplication
 
 # Initialize the application
-rag = PDFRAGApplication()
+rag = PDFRAGApplication(model_name= "deepseek-r1")
 
 # Load a PDF
 rag.load_pdf("your_document.pdf")
@@ -23,3 +81,31 @@ rag.load_pdf("your_document.pdf")
 response = rag.query("What is this document about?")
 print(response)
 ```
+
+## Testing
+
+Run the `test.py` script to see how the module works with ChromaDB:
+
+```python
+import chromadb
+chroma_client = chromadb.Client()
+
+# switch `create_collection` to `get_or_create_collection` to avoid creating a new collection every time
+collection = chroma_client.get_or_create_collection(name="my_collection")
+
+# switch `add` to `upsert` to avoid adding the same documents every time
+collection.upsert(
+    documents=[
+        "This is a document about pineapple",
+        "This is a document about oranges"
+    ],
+    ids=["id1", "id2"]
+)
+
+results = collection.query(
+    query_texts=["This is a query document about hawaii"], # Chroma will embed this for you
+    n_results=2 # how many results to return
+)
+
+print(results)
+```
diff --git a/pdf_rag/__pycache__/__init__.cpython-310.pyc b/pdf_rag/__pycache__/__init__.cpython-310.pyc
diff --git a/pdf_rag/__pycache__/document_processor.cpython-310.pyc b/pdf_rag/__pycache__/document_processor.cpython-310.pyc
diff --git a/pdf_rag/__pycache__/llm_interface.cpython-310.pyc b/pdf_rag/__pycache__/llm_interface.cpython-310.pyc
diff --git a/pdf_rag/__pycache__/main.cpython-310.pyc b/pdf_rag/__pycache__/main.cpython-310.pyc
diff --git a/pdf_rag/__pycache__/vector_store.cpython-310.pyc b/pdf_rag/__pycache__/vector_store.cpython-310.pyc
diff --git a/setup.py b/setup.py
@@ -9,8 +9,6 @@
         "chromadb",
         "requests"
     ],
-    author="Your Name",
-    author_email="your.email@example.com",
     description="A RAG application for PDF documents using ChromaDB and Ollama",
     long_description=open("README.md").read(),
     long_description_content_type="text/markdown",

diff --git a/test_package.py b/test_package.py
@@ -1,12 +1,15 @@
-## Usage
 from pdf_rag import PDFRAGApplication
 
-# Initialize the application
-rag = PDFRAGApplication( model_name="llama3") 
+def test_pdf_rag():
+    # Initialize the application
+    rag = PDFRAGApplication(model_name= "deepseek-r1")
 
-# Load a PDF
-rag.load_pdf("resume.pdf")
+    # Load a PDF
+    rag.load_pdf("your_document.pdf")
 
-# Query the system
-response = rag.query("What is this document about?")
-print(response)
+    # Query the system
+    response = rag.query("What is this document about?")
+    print(response)
+
+if __name__ == "__main__":
+    test_pdf_rag()