Skip to content

Commit 33815bf

Browse files
committed
lint 4x
1 parent 9179ac1 commit 33815bf

File tree

1 file changed

+27
-11
lines changed
  • supporting-blog-content/github-assistant

1 file changed

+27
-11
lines changed

supporting-blog-content/github-assistant/index.py

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
1-
from llama_index.core import Document, Settings, SimpleDirectoryReader, StorageContext, VectorStoreIndex
2-
from llama_index.core.node_parser import SentenceSplitter, CodeSplitter, MarkdownNodeParser, JSONNodeParser
1+
from llama_index.core import (
2+
Document,
3+
Settings,
4+
SimpleDirectoryReader,
5+
StorageContext,
6+
VectorStoreIndex,
7+
)
8+
from llama_index.core.node_parser import (
9+
SentenceSplitter,
10+
CodeSplitter,
11+
MarkdownNodeParser,
12+
JSONNodeParser,
13+
)
314
from llama_index.vector_stores.elasticsearch import ElasticsearchStore
415
from dotenv import load_dotenv
516
from llama_index.embeddings.openai import OpenAIEmbedding
@@ -17,9 +28,6 @@
1728
import glob
1829
import os
1930

20-
#logging.basicConfig(stream=sys.stdout, level=logging.INFO)
21-
#logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
22-
#logging.getLogger("elasticsearch").setLevel(logging.DEBUG)
2331

2432
nest_asyncio.apply()
2533

@@ -38,16 +46,19 @@ def clone_repository(owner, repo, branch, base_path="/tmp"):
3846
raise ValueError(
3947
"Branch is not provided and GITHUB_BRANCH environment variable is not set."
4048
)
41-
49+
50+
4251
local_repo_path = os.path.join(base_path, owner, repo)
4352
clone_url = f"https://github.com/{owner}/{repo}.git"
44-
53+
54+
4555
if os.path.exists(local_repo_path):
4656
print(f"Repository already exists at {local_repo_path}. Skipping clone.")
4757
return local_repo_path
4858

4959
attempts = 3
50-
60+
61+
5162
for attempt in range(attempts):
5263
try:
5364
os.makedirs(local_repo_path, exist_ok=True)
@@ -65,6 +76,7 @@ def clone_repository(owner, repo, branch, base_path="/tmp"):
6576
else:
6677
raise Exception("Failed to clone repository after multiple attempts")
6778

79+
6880
def print_docs_and_nodes(docs, nodes):
6981
print("\n=== Documents ===\n")
7082
for doc in docs:
@@ -76,11 +88,13 @@ def print_docs_and_nodes(docs, nodes):
7688
print(f"Node ID: {node.id_}")
7789
print(f"Node Content:\n{node.text}\n\n---\n")
7890

91+
7992
def collect_and_print_file_summary(file_summary):
8093
print("\n=== File Summary ===\n")
8194
for summary in file_summary:
8295
print(summary)
8396

97+
8498
def parse_documents():
8599
owner = os.getenv("GITHUB_OWNER")
86100
repo = os.getenv("GITHUB_REPO")
@@ -91,7 +105,8 @@ def parse_documents():
91105
raise ValueError(
92106
"GITHUB_OWNER and GITHUB_REPO environment variables must be set."
93107
)
94-
108+
109+
95110
local_repo_path = clone_repository(owner, repo, branch, base_path)
96111

97112
nodes = []
@@ -127,7 +142,8 @@ def parse_documents():
127142
file_summary.append(
128143
f"Found {len(matching_files)} {extension_list} files in the repository."
129144
)
130-
145+
146+
131147
loader = SimpleDirectoryReader(
132148
input_dir=local_repo_path, required_exts=extensions, recursive=True
133149
)
@@ -166,7 +182,7 @@ def get_es_vector_store():
166182
return es_vector_store
167183
except elastic_transport.ConnectionTimeout:
168184
print(f"Connection attempt {attempt + 1}/{retries} timed out. Retrying...")
169-
time.sleep(10)
185+
time.sleep(10)
170186
raise Exception("Failed to initialize Elasticsearch store after multiple attempts")
171187

172188

0 commit comments

Comments
 (0)