Skip to content

Commit ee2c593

Browse files
setup data dir
1 parent 37f1465 commit ee2c593

File tree

5 files changed

+18
-10
lines changed

5 files changed

+18
-10
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ This application demonstrates how to deploy a Flask-based Retrieval-Augmented Ge
4141
## Configuration
4242

4343
- The knowledge base is the all the markdown files in the Defang docs [website](https://docs.defang.io/docs/intro). The logic for parsing can be found in `./app/get_knowledge_base.py`.
44-
- The file `get_knowledge_base.py` parses every webpage as specified into paragraphs and writes to `knowledge_base.json` for the RAG retrieval.
44+
- The file `get_knowledge_base.py` parses every webpage as specified into paragraphs and writes to `./data/knowledge_base.json` for the RAG retrieval.
4545
- To obtain your own knowledge base, please feel free to implement your own parsing scheme.
4646
- for local development, please use the `compose.dev.yaml` file where as for production, please use the `compose.yaml`.
4747

app/app.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from flask import Flask, request, jsonify, render_template, Response, stream_with_context, session
1+
from flask import Flask, request, jsonify, render_template, Response, stream_with_context, session, send_from_directory
22
from flask_wtf.csrf import CSRFProtect
33
from rag_system import rag_system
44
import hashlib
@@ -116,6 +116,12 @@ def trigger_rebuild():
116116
print(f"Error in /trigger-rebuild endpoint: {e}")
117117
return jsonify({"error": "Internal Server Error"}), 500
118118

119+
@app.route("/data/<path:name>")
120+
def download_file(name):
121+
return send_from_directory(
122+
"data", name, as_attachment=True
123+
)
124+
119125
if os.getenv('DEBUG') == '1':
120126
@app.route('/ask/debug', methods=['POST'])
121127
def debug_context():
File renamed without changes.

app/get_knowledge_base.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import json
66
from git import Repo
77

8+
kb_file_path = './data/knowledge_base.json'
9+
810
def clean_tmp(dir_path):
911
""" Clears out all contents of the specified directory except for prebuild.sh """
1012
for item in os.listdir(dir_path):
@@ -80,14 +82,14 @@ def parse_markdown():
8082

8183
def reset_knowledge_base():
8284
""" Resets or initializes the knowledge base JSON file. """
83-
with open('./knowledge_base.json', 'w') as output_file:
85+
with open(kb_file_path, 'w') as output_file:
8486
json.dump([], output_file)
8587

8688
def parse_markdown_file_to_json(file_path):
8789
""" Parses individual markdown file and adds its content to JSON """
8890
try:
8991
# Load existing content if the file exists
90-
with open('./knowledge_base.json', 'r') as existing_file:
92+
with open(kb_file_path, 'r') as existing_file:
9193
json_output = json.load(existing_file)
9294
current_id = len(json_output) + 1 # Start ID from the next available number
9395
except (FileNotFoundError, json.JSONDecodeError):
@@ -147,15 +149,15 @@ def parse_markdown_file_to_json(file_path):
147149
})
148150
current_id += 1
149151

150-
# Write the augmented JSON output to knowledge_base.json
151-
with open('./knowledge_base.json', 'w', encoding='utf-8') as output_file:
152+
# Write the augmented JSON output to ./data/knowledge_base.json
153+
with open(kb_file_path, 'w', encoding='utf-8') as output_file:
152154
json.dump(json_output, output_file, indent=2, ensure_ascii=False)
153155

154156
def parse_cli_markdown(file_path):
155157
""" Parses CLI-specific markdown files """
156158
try:
157159
# Load existing content if the file exists
158-
with open('./knowledge_base.json', 'r') as existing_file:
160+
with open(kb_file_path, 'r') as existing_file:
159161
json_output = json.load(existing_file)
160162
current_id = len(json_output) + 1 # Start ID from the next available number
161163
except (FileNotFoundError, json.JSONDecodeError):
@@ -187,8 +189,8 @@ def parse_cli_markdown(file_path):
187189
})
188190
current_id += 1
189191

190-
# Write the augmented JSON output to knowledge_base.json
191-
with open('./knowledge_base.json', 'w', encoding='utf-8') as output_file:
192+
# Write the augmented JSON output to data/knowledge_base.json
193+
with open(kb_file_path, 'w', encoding='utf-8') as output_file:
192194
json.dump(json_output, output_file, indent=2, ensure_ascii=False)
193195

194196
def recursive_parse_directory(root_dir):

app/rag_system.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
openai.api_key = os.getenv("OPENAI_API_KEY")
1313

1414
class RAGSystem:
15-
def __init__(self, knowledge_base_path='knowledge_base.json'):
15+
def __init__(self, knowledge_base_path='./data/knowledge_base.json'):
1616
self.knowledge_base_path = knowledge_base_path
1717
self.knowledge_base = self.load_knowledge_base()
1818
self.model = SentenceTransformer('all-MiniLM-L6-v2')

0 commit comments

Comments
 (0)