Skip to content

Commit e674baf

Browse files
Merge pull request #4874 from yuwenma/promote-wip
llm-eval: run root Gemini CLI (and config) with different KCC repos
2 parents f032ab8 + 50b3617 commit e674baf

File tree

13 files changed

+115
-117
lines changed

13 files changed

+115
-117
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,4 +176,4 @@ experiments/mcp/**/__pycache__
176176
experiments/mcp/**/*.egg-info
177177

178178
# MCP eval
179-
experiments/mcp-eval/**/__pycache__
179+
experiments/**/__pycache__/

experiments/llm-eval/evaluator.py

Lines changed: 38 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,19 @@
2424

2525
STOP_TOKEN="soapoirejwpgoijrepoiqjt"
2626
class MCPEvaluator:
27-
def __init__(self, gemini_cli_path="gemini", mcp_config_path="~/.gemini/settings.json", use_mcp=True, log_path=None):
27+
def __init__(self, gemini_cli_path="gemini", src_mcp_config_path="~/.gemini/settings.json", use_mcp=True, log_path=None):
2828
self.gemini_cli_path = gemini_cli_path
29-
self.mcp_config_path = os.path.expanduser(mcp_config_path)
3029
self.use_mcp = use_mcp
30+
31+
if self.use_mcp:
32+
config_to_write = {}
33+
if src_mcp_config_path:
34+
config_path = os.path.expanduser(src_mcp_config_path)
35+
with open(config_path, 'r') as f:
36+
config_to_write = json.load(f)
37+
38+
self.mcp_config = config_to_write
39+
3140
self.log_path = log_path
3241
self.test_results = [] # To store detailed results
3342
self.metrics = defaultdict(float) # For aggregated metrics
@@ -37,22 +46,6 @@ def __init__(self, gemini_cli_path="gemini", mcp_config_path="~/.gemini/settings
3746
with open(self.log_path, 'w') as f:
3847
f.write("--- Evaluation Log ---\n\n")
3948

40-
def setup_mcp_config(self, config_data=None):
41-
"""
42-
Writes MCP server configuration to the Gemini CLI settings.json.
43-
If use_mcp is False, an empty config is written.
44-
"""
45-
expanded_path = os.path.expanduser(self.mcp_config_path)
46-
os.makedirs(os.path.dirname(expanded_path), exist_ok=True)
47-
48-
config_to_write = {}
49-
if self.use_mcp and config_data:
50-
config_to_write = config_data
51-
52-
with open(expanded_path, 'w') as f:
53-
json.dump(config_to_write, f, indent=4)
54-
print(f"MCP configuration written to: {expanded_path}")
55-
5649
def _get_git_root(self):
5750
try:
5851
return subprocess.check_output(["git", "rev-parse", "--show-toplevel"], text=True).strip()
@@ -91,7 +84,7 @@ def run_test_case(self, name, prompt, verifier_script=None, cleanup_script=None,
9184
effective_cwd = task_dir
9285
# Run setup script
9386
if setup_script:
94-
success, stdout, stderr = self._run_script(setup_script, task_dir)
87+
success, stdout, stderr = self._run_script(setup_script, effective_cwd)
9588
if not success:
9689
print(f"Setup script {setup_script} failed. Skipping test.")
9790
print(f" Stdout:\n{stdout}")
@@ -110,12 +103,22 @@ def run_test_case(self, name, prompt, verifier_script=None, cleanup_script=None,
110103
}
111104
self.test_results.append(result)
112105
return
113-
effective_cwd = os.path.join(task_dir, stdout.strip())
114106

115107
print(f"--- Running LLM: {prompt} ---")
116-
command = "/mcp" if self.use_mcp else None
108+
print(f"--- effective_cwd: {effective_cwd} ---")
109+
gemini_working_dir = os.path.join(effective_cwd, stdout.strip())
110+
print(f"--- LLM response: {stdout} ---")
111+
112+
if self.use_mcp:
113+
# Write MCP configuration to the .gemini/settings.json file
114+
mcp_config_path = os.path.join(gemini_working_dir, ".gemini", "settings.json")
115+
os.makedirs(os.path.dirname(mcp_config_path), exist_ok=True)
116+
with open(mcp_config_path, 'w') as f:
117+
json.dump(self.mcp_config, f, indent=4)
118+
print(f"MCP configuration written to: {mcp_config_path}")
119+
117120
start_time = time.time()
118-
stdout, stderr, returncode, llm_requests = self._run_gemini_command_internal(command, prompt, cwd=effective_cwd)
121+
stdout, stderr, returncode, llm_requests = self._run_gemini_command_internal(prompt, cwd=gemini_working_dir)
119122
end_time = time.time()
120123
latency = (end_time - start_time) * 1000 # in ms
121124
print(f"--- LLM response: {stdout} ---")
@@ -239,31 +242,30 @@ def get_summary(self):
239242
)
240243
return summary
241244

242-
def _run_gemini_command_internal(self, command, prompt, cwd=None):
245+
def _run_gemini_command_internal(self, prompt, cwd=None):
243246
"""
244247
Runs a Gemini CLI command and captures its output and error, printing it in real-time.
245248
The command is terminated when 'User notified.' is detected in the output.
246249
247250
Args:
248-
command (str): The Gemini CLI command (e.g., "/mcp", "ask").
249251
prompt (str): The prompt to send to Gemini CLI.
250252
cwd (str): The working directory to run the command in.
251253
252254
Returns:
253255
tuple: A tuple containing (stdout, stderr, returncode, llm_requests_count).
254256
"""
255-
gemini_cli_path = self.gemini_cli_path
256-
prompt=f"cd {cwd}\n" + prompt +f"\nOnce you are done. return {STOP_TOKEN}"
257-
args = [gemini_cli_path, "-d", "-p", prompt, "-y"] # Use -p for non-interactive mode
258-
259-
if command:
260-
args.insert(1, command) # Insert command after 'gemini'
261-
262257
env = os.environ.copy()
263258
# mcp is under a different python virtual env.
264259
env.pop('VIRTUAL_ENV', None)
260+
261+
effective_cwd = cwd if cwd else self.git_root
262+
env['MCPWorkDir'] = effective_cwd
265263

266-
effective_cwd = self.git_root
264+
gemini_cli_path = self.gemini_cli_path
265+
# Always inform the LLM of the context directory.
266+
prompt_with_context = prompt + f"\nOnce you are done. return {STOP_TOKEN}"
267+
args = [gemini_cli_path, "-d", "-p", prompt_with_context, "-y"] # Use -p for non-interactive mode
268+
267269
try:
268270
process = subprocess.Popen(
269271
args,
@@ -279,8 +281,8 @@ def _run_gemini_command_internal(self, command, prompt, cwd=None):
279281
stdout_output = []
280282
stderr_output = []
281283

282-
streams = [process.stdout, process.stderr]
283-
terminated = False
284+
streams = [process.stdout, process.stderr]
285+
terminated = False
284286
while streams:
285287
readable, _, _ = select.select(streams, [], [], 1) # 1s timeout
286288
if not readable:
@@ -324,10 +326,10 @@ def _run_gemini_command_internal(self, command, prompt, cwd=None):
324326
llm_requests = len(re.findall(r"LLM API request sent", stderr))
325327
return stdout, stderr, process.returncode, llm_requests
326328

327-
except FileNotFoundError:
329+
except FileNotFoundError as e:
328330
print(f"Error: Gemini CLI not found at '{gemini_cli_path}'. "
329331
"Please ensure it's installed and in your system's PATH.")
330-
return "", f"Gemini CLI not found at '{gemini_cli_path}'", 127, 0
332+
return "", f"Gemini CLI not found at '{gemini_cli_path}': {e}", 127, 0
331333
except Exception as e:
332334
print(f"An error occurred: {e}")
333335
return "", str(e), 1, 0

experiments/llm-eval/main.py

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,6 @@ def compare_reports(mcp_df, no_mcp_df):
156156
# --- Run with MCP Disabled ---
157157
print("\n--- Starting Evaluation with MCP Disabled ---")
158158
no_mcp_evaluator = MCPEvaluator(gemini_cli_path=args.gemini_cli_path, use_mcp=False, log_path=args.log)
159-
no_mcp_evaluator.setup_mcp_config()
160159
for test in test_cases:
161160
no_mcp_evaluator.run_test_case(**test)
162161
no_mcp_results_df = no_mcp_evaluator.generate_report()
@@ -166,23 +165,7 @@ def compare_reports(mcp_df, no_mcp_df):
166165
else:
167166
# --- Run with MCP Enabled ---
168167
print("--- Starting Evaluation with MCP Enabled ---")
169-
mcp_config = {}
170-
if args.config_path:
171-
config_path = os.path.expanduser(args.config_path)
172-
if not os.path.isabs(config_path):
173-
config_path = os.path.join(git_root, config_path)
174-
175-
with open(config_path, 'r') as f:
176-
mcp_config = json.load(f)
177-
178-
# Make server directories absolute
179-
if "mcp_servers" in mcp_config:
180-
for server in mcp_config["mcp_servers"]:
181-
if "directory" in server and not os.path.isabs(server["directory"]):
182-
server["directory"] = os.path.join(git_root, server["directory"])
183-
184-
mcp_evaluator = MCPEvaluator(gemini_cli_path=args.gemini_cli_path, use_mcp=True, log_path=args.log)
185-
mcp_evaluator.setup_mcp_config(mcp_config)
168+
mcp_evaluator = MCPEvaluator(gemini_cli_path=args.gemini_cli_path, use_mcp=True, src_mcp_config_path=args.config_path, log_path=args.log)
186169
for test in test_cases:
187170
mcp_evaluator.run_test_case(**test)
188171
mcp_results_df = mcp_evaluator.generate_report()
@@ -194,7 +177,6 @@ def compare_reports(mcp_df, no_mcp_df):
194177
if not args.task:
195178
print("\n--- Starting Evaluation with MCP Disabled ---")
196179
no_mcp_evaluator = MCPEvaluator(gemini_cli_path=args.gemini_cli_path, use_mcp=False, log_path=args.log)
197-
no_mcp_evaluator.setup_mcp_config()
198180
for test in test_cases:
199181
no_mcp_evaluator.run_test_case(**test)
200182
no_mcp_results_df = no_mcp_evaluator.generate_report()

experiments/llm-eval/tasks/beta-promote/BigQueryReservationReservation-promote/setup.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,7 @@
1616
set -e
1717
SUBDIR="BigQueryReservationReservation-promote-bigqueryreservation"
1818
git clone git@github.com:GoogleCloudPlatform/k8s-config-connector.git $SUBDIR
19-
echo $SUBDIR
19+
rm -rf $SUBDIR/.gemini # Avoid using git cloned .gemini.
20+
21+
export MCPWorkDir=${SUBDIR} # placeholder. evaluator runs setup.sh as a temp shell. This env var won't take effect when the shell finishes.
22+
echo ${MCPWorkDir}

experiments/llm-eval/tasks/beta-promote/KMSImportJob-promote/setup.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,7 @@
1616
set -e
1717
SUBDIR="KMSImportJob-promote-kms"
1818
git clone git@github.com:GoogleCloudPlatform/k8s-config-connector.git $SUBDIR
19-
echo $SUBDIR
19+
rm -rf $SUBDIR/.gemini # Avoid using git cloned .gemini.
20+
21+
export MCPWorkDir=${SUBDIR} # placeholder. evaluator runs setup.sh as a temp shell. This env var won't take effect when the shell finishes.
22+
echo ${MCPWorkDir}

experiments/llm-eval/tasks/beta-promote/LoggingLink-promote/setup.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,7 @@
1616
set -e
1717
SUBDIR="LoggingLink-promote-logging"
1818
git clone git@github.com:GoogleCloudPlatform/k8s-config-connector.git $SUBDIR
19-
echo $SUBDIR
19+
rm -rf $SUBDIR/.gemini # Avoid using git cloned .gemini.
20+
21+
export MCPWorkDir=${SUBDIR} # placeholder. evaluator runs setup.sh as a temp shell. This env var won't take effect when the shell finishes.
22+
echo ${MCPWorkDir}

experiments/llm-eval/tasks/beta-promote/MetastoreBackup-promote/setup.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,7 @@
1616
set -e
1717
SUBDIR="MetastoreBackup-promote-metastore"
1818
git clone git@github.com:GoogleCloudPlatform/k8s-config-connector.git $SUBDIR
19-
echo $SUBDIR
19+
rm -rf $SUBDIR/.gemini # Avoid using git cloned .gemini.
20+
21+
export MCPWorkDir=${SUBDIR} # placeholder. evaluator runs setup.sh as a temp shell. This env var won't take effect when the shell finishes.
22+
echo ${MCPWorkDir}

experiments/llm-eval/tasks/beta-promote/MetastoreFederation-promote/setup.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,7 @@
1616
set -e
1717
SUBDIR="MetastoreFederation-promote-metastore"
1818
git clone git@github.com:GoogleCloudPlatform/k8s-config-connector.git $SUBDIR
19-
echo $SUBDIR
19+
rm -rf $SUBDIR/.gemini # Avoid using git cloned .gemini.
20+
21+
export MCPWorkDir=${SUBDIR} # placeholder. evaluator runs setup.sh as a temp shell. This env var won't take effect when the shell finishes.
22+
echo ${MCPWorkDir}

experiments/llm-eval/tasks/beta-promote/VMwareEngineExternalAddress-promote/setup.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,7 @@
1616
set -e
1717
SUBDIR="VMwareEngineExternalAddress-promote-vmwareengine"
1818
git clone git@github.com:GoogleCloudPlatform/k8s-config-connector.git $SUBDIR
19-
echo $SUBDIR
19+
rm -rf $SUBDIR/.gemini # Avoid using git cloned .gemini.
20+
21+
export MCPWorkDir=${SUBDIR} # placeholder. evaluator runs setup.sh as a temp shell. This env var won't take effect when the shell finishes.
22+
echo ${MCPWorkDir}

experiments/mcp/criteria/apis.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,6 @@
2828
"**Completeness**: The YAML must include all required fields as defined in the CRD.",
2929
"**Correctness**: User-provided configurations must be placed at the correct paths within the YAML structure.",
3030
"**Validity**: All values, especially names and labels, must adhere to Kubernetes syntax and naming conventions.",
31-
"**Minimalism**: The generated YAML should not include optional fields unless they are explicitly requested or required for a specific configuration.",
31+
"**Minimalism**: The generated YAML should not include optional fields unless they are explicitly requested or required for a specific configuration."
3232
]
3333
}

0 commit comments

Comments
 (0)