Skip to content

Commit 7a7ebb0

Browse files
authored
Merge pull request #57 from data-exp-lab/fe-rag
GraphRAG Implementation
2 parents f329d92 + 975165e commit 7a7ebb0

File tree

16 files changed

+3325
-61
lines changed

16 files changed

+3325
-61
lines changed

.gitignore

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,60 @@ dist
1111
*temp
1212
*repo_metadata.json
1313
__pycache__
14+
15+
# Database files
1416
*.duckdb
1517
*.gexf
16-
*.gexf.gz
18+
*.gexf.gz
19+
20+
# GraphRAG specific
21+
backend/app/kuzu/
22+
backend/app/cache/
23+
rag/kuzu_db_*
24+
*.kuzu
25+
26+
# Python
27+
__pycache__/
28+
*.py[cod]
29+
*$py.class
30+
*.so
31+
.Python
32+
build/
33+
develop-eggs/
34+
dist/
35+
downloads/
36+
eggs/
37+
.eggs/
38+
lib/
39+
lib64/
40+
parts/
41+
sdist/
42+
var/
43+
wheels/
44+
*.egg-info/
45+
.installed.cfg
46+
*.egg
47+
MANIFEST
48+
49+
# Virtual environments
50+
venv/
51+
env/
52+
ENV/
53+
env.bak/
54+
venv.bak/
55+
56+
# IDE
57+
.vscode/
58+
.idea/
59+
*.swp
60+
*.swo
61+
*~
62+
63+
# OS
64+
.DS_Store
65+
.DS_Store?
66+
._*
67+
.Spotlight-V100
68+
.Trashes
69+
ehthumbs.db
70+
Thumbs.db

backend/app/main.py

Lines changed: 192 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1-
from flask import Flask, jsonify, request, send_file, url_for
1+
from flask import Flask, jsonify, request, send_file, url_for, Response, stream_template
22
from flask_cors import CORS
33
from services.topic_service import TopicService
44
from services.ai_service import AITopicProcessor
55
from services.gexf_node_service import GexfNodeGenerator
66
from services.edge_generation_service import EdgeGenerationService
7+
from services.graphrag_service import graphrag_service
78
import os
89
import asyncio
910
import re
1011
import json
12+
import time
1113

1214
app = Flask(__name__, static_folder='gexf', static_url_path='/gexf')
1315
CORS(
@@ -26,6 +28,43 @@
2628
gexf_node_service = GexfNodeGenerator()
2729
edge_generation_service = EdgeGenerationService()
2830

31+
# Global progress tracking for GraphRAG setup
32+
graphrag_progress = {
33+
"current_step": "",
34+
"current": 0,
35+
"total": 0,
36+
"message": "",
37+
"status": "idle" # idle, running, completed, error
38+
}
39+
40+
# Global variable to track if GraphRAG is set up
41+
graphrag_ready = False
42+
43+
@app.route("/api/graphrag-health", methods=["GET"])
44+
def graphrag_health():
45+
"""Check if GraphRAG backend is ready and set up."""
46+
global graphrag_ready
47+
try:
48+
if graphrag_ready:
49+
return jsonify({
50+
"success": True,
51+
"ready": True,
52+
"message": "GraphRAG backend is ready"
53+
})
54+
else:
55+
return jsonify({
56+
"success": True,
57+
"ready": False,
58+
"message": "GraphRAG backend is not set up"
59+
}), 503
60+
except Exception as e:
61+
return jsonify({
62+
"success": False,
63+
"ready": False,
64+
"error": str(e),
65+
"message": "Error checking GraphRAG health"
66+
}), 500
67+
2968

3069
@app.route("/api/process-topics", methods=["GET", "POST"])
3170
def process_topics():
@@ -524,15 +563,16 @@ def create_edges_on_graph():
524563
"shared_organization_enabled": false,
525564
"common_stargazers_enabled": true,
526565
"stargazer_threshold": 5,
527-
"use_and_logic": false,
528-
"strict_and_logic": true
529-
}
566+
"use_and_logic": false
567+
},
568+
"filtered_node_ids": ["node1", "node2", ...] // Optional: only consider these nodes for edge creation
530569
}
531570
"""
532571
try:
533572
data = request.get_json()
534573
gexf_content = data.get("gexfContent", "")
535574
criteria_config = data.get("criteria_config", {})
575+
filtered_node_ids = data.get("filtered_node_ids", None) # Get filtered node IDs
536576

537577
if not gexf_content:
538578
return jsonify({
@@ -582,7 +622,7 @@ def create_edges_on_graph():
582622
edge_service = EdgeGenerationService()
583623

584624
# Create edges based on the criteria
585-
edges_created = edge_service.create_edges_on_existing_graph(G, criteria_config)
625+
edges_created = edge_service.create_edges_on_existing_graph(G, criteria_config, filtered_node_ids)
586626

587627
# Save the updated graph
588628
import hashlib
@@ -627,6 +667,153 @@ def create_edges_on_graph():
627667
}), 500
628668

629669

670+
@app.route("/api/graphrag-reset-progress", methods=["POST", "OPTIONS"])
671+
def graphrag_reset_progress_endpoint():
672+
"""Reset GraphRAG progress status to initial state."""
673+
if request.method == "OPTIONS":
674+
return "", 200
675+
676+
global graphrag_progress
677+
graphrag_progress = {
678+
"current_step": "Initializing...",
679+
"current": 0,
680+
"total": 100,
681+
"message": "Preparing GraphRAG setup",
682+
"status": "running"
683+
}
684+
return jsonify({"success": True, "message": "Progress reset"})
685+
686+
@app.route("/api/graphrag-progress", methods=["GET"])
687+
def graphrag_progress_endpoint():
688+
"""Server-Sent Events endpoint for GraphRAG progress updates."""
689+
def generate():
690+
while True:
691+
# Send current progress
692+
data = f"data: {json.dumps(graphrag_progress)}\n\n"
693+
yield data
694+
695+
# If completed or error, stop streaming
696+
if graphrag_progress["status"] in ["completed", "error"]:
697+
break
698+
699+
time.sleep(0.5) # Update every 0.5 seconds for more responsive updates
700+
701+
return Response(generate(), mimetype="text/event-stream")
702+
703+
704+
@app.route("/api/graphrag-setup", methods=["POST"])
705+
def graphrag_setup_endpoint():
706+
"""GraphRAG setup endpoint with progress tracking."""
707+
global graphrag_progress, graphrag_ready
708+
709+
try:
710+
data = request.get_json()
711+
712+
# Extract parameters
713+
provider = data.get("provider", "openai")
714+
api_keys = data.get("apiKeys", {})
715+
graph_file = data.get("graphFile", "")
716+
717+
if not graph_file:
718+
return jsonify({
719+
"success": False,
720+
"error": "Graph file is required",
721+
"message": "Please provide a graph file"
722+
}), 400
723+
724+
github_token = api_keys.get("githubToken", "")
725+
if not github_token:
726+
return jsonify({
727+
"success": False,
728+
"error": "GitHub token is required",
729+
"message": "Please provide a GitHub personal access token"
730+
}), 400
731+
732+
# Reset progress
733+
graphrag_progress = {
734+
"current_step": "Starting setup...",
735+
"current": 0,
736+
"total": 100,
737+
"message": "Initializing GraphRAG setup",
738+
"status": "running"
739+
}
740+
741+
# Setup database from GEXF content with progress updates
742+
setup_result = graphrag_service.setup_database_from_gexf_with_progress(graph_file, github_token, graphrag_progress)
743+
if not setup_result["success"]:
744+
graphrag_progress["status"] = "error"
745+
graphrag_progress["message"] = setup_result.get("error", "Setup failed")
746+
return jsonify(setup_result), 500
747+
748+
# Initialize GraphRAG with the selected provider
749+
graphrag_progress["current_step"] = "Initializing AI system..."
750+
graphrag_progress["current"] = 90
751+
graphrag_progress["message"] = "Setting up AI analysis system"
752+
753+
init_result = graphrag_service.initialize_graphrag(provider, api_keys)
754+
if not init_result["success"]:
755+
graphrag_progress["status"] = "error"
756+
graphrag_progress["message"] = init_result.get("error", "AI initialization failed")
757+
return jsonify(init_result), 500
758+
759+
# Mark as completed and set ready flag
760+
graphrag_progress["status"] = "completed"
761+
graphrag_progress["current"] = 100
762+
graphrag_progress["message"] = "GraphRAG setup completed successfully!"
763+
graphrag_ready = True
764+
765+
return jsonify({
766+
"success": True,
767+
"message": "GraphRAG setup completed successfully",
768+
"ready": True
769+
})
770+
771+
except Exception as e:
772+
graphrag_progress["status"] = "error"
773+
graphrag_progress["message"] = str(e)
774+
return jsonify({
775+
"success": False,
776+
"error": str(e),
777+
"message": "An error occurred during GraphRAG setup"
778+
}), 500
779+
780+
781+
@app.route("/api/graphrag", methods=["POST"])
782+
def graphrag_endpoint():
783+
"""GraphRAG endpoint for AI-powered graph analysis."""
784+
try:
785+
data = request.get_json()
786+
787+
# Extract parameters
788+
query = data.get("query", "")
789+
provider = data.get("provider", "openai")
790+
api_keys = data.get("apiKeys", {})
791+
792+
if not query:
793+
return jsonify({
794+
"success": False,
795+
"error": "Query is required",
796+
"message": "Please provide a query"
797+
}), 400
798+
799+
# Execute the query
800+
query_result = graphrag_service.query_graphrag(query)
801+
if not query_result["success"]:
802+
return jsonify(query_result), 500
803+
804+
return jsonify({
805+
"success": True,
806+
"result": query_result["result"]
807+
})
808+
809+
except Exception as e:
810+
return jsonify({
811+
"success": False,
812+
"error": str(e),
813+
"message": "An error occurred while processing the GraphRAG query"
814+
}), 500
815+
816+
630817
@app.route("/")
631818
def home():
632819
return "Hello World!"

backend/app/services/edge_generation_service.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,13 +148,14 @@ def generate_edges_with_criteria(
148148

149149
return G, edge_stats
150150

151-
def create_edges_on_existing_graph(self, G: nx.Graph, criteria_config: Dict[str, any]) -> Dict[str, any]:
151+
def create_edges_on_existing_graph(self, G: nx.Graph, criteria_config: Dict[str, any], filtered_node_ids: List[str] = None) -> Dict[str, any]:
152152
"""
153153
Create edges on an existing graph based on specified criteria.
154154
155155
Args:
156156
G: Existing NetworkX graph with nodes
157157
criteria_config: Configuration for edge generation criteria
158+
filtered_node_ids: Optional list of node IDs to consider for edge creation (only these nodes will be used)
158159
159160
Returns:
160161
Dictionary with statistics about created edges
@@ -184,10 +185,27 @@ def create_edges_on_existing_graph(self, G: nx.Graph, criteria_config: Dict[str,
184185
# Remove all existing edges first
185186
G.remove_edges_from(list(G.edges()))
186187

187-
# Get all nodes
188-
nodes = list(G.nodes())
189-
if len(nodes) < 2:
190-
return {'message': 'Not enough nodes to create edges'}
188+
# Use filtered nodes if provided, otherwise use all nodes
189+
if filtered_node_ids is not None:
190+
# Validate that all filtered node IDs exist in the graph
191+
existing_nodes = set(G.nodes())
192+
valid_filtered_nodes = [node_id for node_id in filtered_node_ids if node_id in existing_nodes]
193+
194+
if len(valid_filtered_nodes) < 2:
195+
return {
196+
'message': f'Not enough valid filtered nodes to create edges. Found {len(valid_filtered_nodes)} valid nodes out of {len(filtered_node_ids)} provided.',
197+
'total_edges': 0,
198+
'total_nodes': len(valid_filtered_nodes)
199+
}
200+
201+
nodes = valid_filtered_nodes
202+
print(f"Using {len(nodes)} filtered nodes for edge creation out of {len(existing_nodes)} total nodes")
203+
else:
204+
# Use all nodes if no filtering is applied
205+
nodes = list(G.nodes())
206+
if len(nodes) < 2:
207+
return {'message': 'Not enough nodes to create edges'}
208+
print(f"Using all {len(nodes)} nodes for edge creation")
191209

192210
# Generate edges based on enabled criteria
193211
edge_stats = {}

0 commit comments

Comments
 (0)