Skip to content

Commit 056f817

Browse files
committed
fmt
1 parent 74572f1 commit 056f817

File tree

3 files changed

+84
-57
lines changed

3 files changed

+84
-57
lines changed

supporting-blog-content/github-assistant/evaluation.py

Lines changed: 37 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,29 @@
2020
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
2121
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
2222

23-
parser = argparse.ArgumentParser(description='Process documents and questions for evaluation.')
24-
parser.add_argument('--num_documents', type=int, default=None,
25-
help='Number of documents to process (default: all)')
26-
parser.add_argument('--skip_documents', type=int, default=0,
27-
help='Number of documents to skip at the beginning (default: 0)')
28-
parser.add_argument('--num_questions', type=int, default=None,
29-
help='Number of questions to process (default: all)')
30-
parser.add_argument('--skip_questions', type=int, default=0,
31-
help='Number of questions to skip at the beginning (default: 0)')
32-
parser.add_argument('--process_last_questions', action='store_true',
33-
help='Process last N questions instead of first N')
23+
parser = argparse.ArgumentParser(description="Process documents and questions for evaluation.")
24+
parser.add_argument("--num_documents",
25+
type=int,
26+
default=None,
27+
help="Number of documents to process (default: all)")
28+
parser.add_argument("--skip_documents",
29+
type=int,
30+
default=0,
31+
help="Number of documents to skip at the beginning (default: 0)")
32+
parser.add_argument("--num_questions",
33+
type=int,
34+
default=None,
35+
help="Number of questions to process (default: all)")
36+
parser.add_argument("--skip_questions",
37+
type=int,
38+
default=0,
39+
help="Number of questions to skip at the beginning (default: 0)")
40+
parser.add_argument("--process_last_questions",
41+
action="store_true",
42+
help="Process last N questions instead of first N")
3443
args = parser.parse_args()
3544

36-
load_dotenv('.env')
45+
load_dotenv(".env")
3746

3847
reader = SimpleDirectoryReader("/tmp/elastic/production-readiness-review")
3948
documents = reader.load_data()
@@ -43,10 +52,10 @@
4352

4453

4554
if args.skip_documents > 0:
46-
documents = documents[args.skip_documents:]
55+
documents = documents[args.skip_documents :]
4756

4857
if args.num_documents is not None:
49-
documents = documents[:args.num_documents]
58+
documents = documents[: args.num_documents]
5059

5160
print(f"Number of documents loaded: {len(documents)}")
5261

@@ -57,7 +66,7 @@
5766
try:
5867
eval_questions = data_generator.generate_questions_from_nodes()
5968
if isinstance(eval_questions, str):
60-
eval_questions_list = eval_questions.strip().split('\n')
69+
eval_questions_list = eval_questions.strip().split("\n")
6170
else:
6271
eval_questions_list = eval_questions
6372
eval_questions_list = [q for q in eval_questions_list if q.strip()]
@@ -100,32 +109,32 @@ def display_eval_df(
100109
eval_result_relevancy: EvaluationResult,
101110
eval_result_faith: EvaluationResult,
102111
) -> None:
103-
relevancy_feedback = getattr(eval_result_relevancy, 'feedback', '')
104-
relevancy_passing = getattr(eval_result_relevancy, 'passing', False)
105-
relevancy_passing_str = 'Pass' if relevancy_passing else 'Fail'
112+
relevancy_feedback = getattr(eval_result_relevancy, "feedback", "")
113+
relevancy_passing = getattr(eval_result_relevancy, "passing", False)
114+
relevancy_passing_str = "Pass" if relevancy_passing else "Fail"
106115

107116
relevancy_score = 1.0 if relevancy_passing else 0.0
108117

109-
faithfulness_feedback = getattr(eval_result_faith, 'feedback', '')
110-
faithfulness_passing_bool = getattr(eval_result_faith, 'passing', False)
111-
faithfulness_passing = 'Pass' if faithfulness_passing_bool else 'Fail'
118+
faithfulness_feedback = getattr(eval_result_faith, "feedback", "")
119+
faithfulness_passing_bool = getattr(eval_result_faith, "passing", False)
120+
faithfulness_passing = "Pass" if faithfulness_passing_bool else "Fail"
112121

113122
def wrap_text(text, width=50):
114123
if text is None:
115-
return ''
124+
return ""
116125
text = str(text)
117-
text = text.replace('\r', '')
118-
lines = text.split('\n')
126+
text = text.replace("\r", "")
127+
lines = text.split("\n")
119128
wrapped_lines = []
120129
for line in lines:
121130
wrapped_lines.extend(textwrap.wrap(line, width=width))
122-
wrapped_lines.append('')
123-
return '\n'.join(wrapped_lines)
131+
wrapped_lines.append("")
132+
return "\n".join(wrapped_lines)
124133

125134
if response.source_nodes:
126135
source_content = wrap_text(response.source_nodes[0].node.get_content())
127136
else:
128-
source_content = ''
137+
source_content = ""
129138

130139
eval_data = {
131140
"Query": wrap_text(query),
@@ -141,7 +150,7 @@ def wrap_text(text, width=50):
141150
eval_df = pd.DataFrame([eval_data])
142151

143152
print("\nEvaluation Result:")
144-
print(tabulate(eval_df, headers='keys', tablefmt='grid', showindex=False, stralign='left'))
153+
print(tabulate(eval_df, headers="keys", tablefmt="grid", showindex=False, stralign="left"))
145154

146155
query_engine = vector_index.as_query_engine(llm=llm)
147156

supporting-blog-content/github-assistant/index.py

Lines changed: 40 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919

2020
#logging.basicConfig(stream=sys.stdout, level=logging.INFO)
2121
#logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
22-
#logging.getLogger('elasticsearch').setLevel(logging.DEBUG)
22+
#logging.getLogger("elasticsearch").setLevel(logging.DEBUG)
2323

2424
nest_asyncio.apply()
2525

26-
load_dotenv('.env')
26+
load_dotenv(".env")
2727

2828
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large")
2929
Settings.chunk_lines = 1024
@@ -35,7 +35,9 @@
3535
def clone_repository(owner, repo, branch, base_path="/tmp"):
3636
branch = branch or os.getenv("GITHUB_BRANCH")
3737
if not branch:
38-
raise ValueError("Branch is not provided and GITHUB_BRANCH environment variable is not set.")
38+
raise ValueError(
39+
"Branch is not provided and GITHUB_BRANCH environment variable is not set."
40+
)
3941

4042
local_repo_path = os.path.join(base_path, owner, repo)
4143
clone_url = f"https://github.com/{owner}/{repo}.git"
@@ -50,7 +52,9 @@ def clone_repository(owner, repo, branch, base_path="/tmp"):
5052
try:
5153
os.makedirs(local_repo_path, exist_ok=True)
5254
print(f"Attempting to clone repository... Attempt {attempt + 1}")
53-
subprocess.run(["git", "clone", "-b", branch, clone_url, local_repo_path], check=True)
55+
subprocess.run(
56+
["git", "clone", "-b", branch, clone_url, local_repo_path], check=True
57+
)
5458
print(f"Repository cloned into {local_repo_path}.")
5559
return local_repo_path
5660
except subprocess.CalledProcessError:
@@ -78,53 +82,63 @@ def collect_and_print_file_summary(file_summary):
7882
print(summary)
7983

8084
def parse_documents():
81-
owner = os.getenv('GITHUB_OWNER')
82-
repo = os.getenv('GITHUB_REPO')
83-
branch = os.getenv('GITHUB_BRANCH')
84-
base_path = os.getenv('BASE_PATH', "/tmp")
85+
owner = os.getenv("GITHUB_OWNER")
86+
repo = os.getenv("GITHUB_REPO")
87+
branch = os.getenv("GITHUB_BRANCH")
88+
base_path = os.getenv("BASE_PATH", "/tmp")
8589

8690
if not owner or not repo:
87-
raise ValueError("GITHUB_OWNER and GITHUB_REPO environment variables must be set.")
91+
raise ValueError(
92+
"GITHUB_OWNER and GITHUB_REPO environment variables must be set."
93+
)
8894

8995
local_repo_path = clone_repository(owner, repo, branch, base_path)
9096

9197
nodes = []
9298
file_summary = []
9399

94-
ts_parser = get_parser('typescript')
95-
py_parser = get_parser('python')
96-
go_parser = get_parser('go')
97-
js_parser = get_parser('javascript')
98-
bash_parser = get_parser('bash')
99-
yaml_parser = get_parser('yaml')
100+
ts_parser = get_parser("typescript")
101+
py_parser = get_parser("python")
102+
go_parser = get_parser("go")
103+
js_parser = get_parser("javascript")
104+
bash_parser = get_parser("bash")
105+
yaml_parser = get_parser("yaml")
100106

101107
parsers_and_extensions = [
102108
(SentenceSplitter(), [".md"]),
103-
(CodeSplitter(language='python', parser=py_parser), [".py", ".ipynb"]),
104-
(CodeSplitter(language='typescript', parser=ts_parser), [".ts"]),
105-
(CodeSplitter(language='go', parser=go_parser), [".go"]),
106-
(CodeSplitter(language='javascript', parser=js_parser), [".js"]),
107-
(CodeSplitter(language='bash', parser=bash_parser), [".bash", ",sh"]),
108-
(CodeSplitter(language='yaml', parser=yaml_parser), [".yaml", ".yml"]),
109+
(CodeSplitter(language="python", parser=py_parser), [".py", ".ipynb"]),
110+
(CodeSplitter(language="typescript", parser=ts_parser), [".ts"]),
111+
(CodeSplitter(language="go", parser=go_parser), [".go"]),
112+
(CodeSplitter(language="javascript", parser=js_parser), [".js"]),
113+
(CodeSplitter(language="bash", parser=bash_parser), [".bash", ",sh"]),
114+
(CodeSplitter(language="yaml", parser=yaml_parser), [".yaml", ".yml"]),
109115
(JSONNodeParser(), [".json"]),
110116
]
111117

112118
for parser, extensions in parsers_and_extensions:
113119
matching_files = []
114120
for ext in extensions:
115-
matching_files.extend(glob.glob(f"{local_repo_path}/**/*{ext}", recursive=True))
121+
matching_files.extend(
122+
glob.glob(f"{local_repo_path}/**/*{ext}", recursive=True)
123+
)
116124

117125
if len(matching_files) > 0:
118-
file_summary.append(f"Found {len(matching_files)} {', '.join(extensions)} files in the repository.")
119-
loader = SimpleDirectoryReader(input_dir=local_repo_path, required_exts=extensions, recursive=True)
126+
file_summary.append(
127+
f"Found {len(matching_files)} {", ".join(extensions)} files in the repository."
128+
)
129+
loader = SimpleDirectoryReader(
130+
input_dir=local_repo_path, required_exts=extensions, recursive=True
131+
)
120132
docs = loader.load_data()
121133
parsed_nodes = parser.get_nodes_from_documents(docs)
122134

123135
print_docs_and_nodes(docs, parsed_nodes)
124136

125137
nodes.extend(parsed_nodes)
126138
else:
127-
file_summary.append(f"No {', '.join(extensions)} files found in the repository.")
139+
file_summary.append(
140+
f"No {", ".join(extensions)} files found in the repository."
141+
)
128142

129143
collect_and_print_file_summary(file_summary)
130144
print("\n")
@@ -144,7 +158,7 @@ def get_es_vector_store():
144158
es_cloud_id=es_cloud_id,
145159
es_user=es_user,
146160
es_password=es_password,
147-
batch_size=100
161+
batch_size=100,
148162
)
149163
print("Elasticsearch store initialized.")
150164
return es_vector_store

supporting-blog-content/github-assistant/query.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def run_query_sync():
2121
llm=openai_llm,
2222
similarity_top_k=3,
2323
streaming=False,
24-
response_mode="tree_summarize"
24+
response_mode="tree_summarize",
2525
)
2626

2727
bundle = QueryBundle(query, embedding=embed_model.get_query_embedding(query))
@@ -31,9 +31,13 @@ def run_query_sync():
3131
except Exception as e:
3232
print(f"An error occurred while running the query: {e}")
3333
finally:
34-
if hasattr(openai_llm, 'client') and isinstance(openai_llm.client, httpx.Client):
34+
if hasattr(openai_llm, "client") and isinstance(
35+
openai_llm.client, httpx.Client
36+
):
3537
openai_llm.client.close()
36-
if hasattr(embed_model, 'client') and isinstance(embed_model.client, httpx.Client):
38+
if hasattr(embed_model, "client") and isinstance(
39+
embed_model.client, httpx.Client
40+
):
3741
embed_model.client.close()
3842
if hasattr(es_vector_store, "close"):
3943
es_vector_store.close()

0 commit comments

Comments
 (0)