Skip to content

Commit 569e8af

Browse files
committed
Update environment variable name to MAP_OPENAI_KEY to avoid potential conflicts
1 parent 562361e commit 569e8af

File tree

2 files changed

+265
-4
lines changed

2 files changed

+265
-4
lines changed

README.md

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -396,15 +396,18 @@ If you have any questions, please feel free to contact us at wang8740@umn.edu or
396396

397397
AlignMAP uses environment variables for configuration. To set up your environment:
398398

399-
1. Copy the example environment file:
399+
1. Create a `.env` file in the root directory:
400400
```bash
401-
cp .env.example .env
401+
touch .env
402402
```
403403

404404
2. Edit the `.env` file and fill in your values:
405405
```bash
406-
# Replace with your actual OpenAI API key
407-
OPENAI_API_KEY="your-openai-api-key-here"
406+
# Add your OpenAI API key (renamed to avoid conflicts)
407+
MAP_OPENAI_KEY="your-openai-api-key-here"
408+
409+
# Set other configuration options
410+
DEBUG=False
408411
```
409412

410413
3. The environment variables will be automatically loaded when running the application.
Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
import json
2+
import os
3+
import random
4+
import subprocess
5+
from openai import OpenAI
6+
from radon.complexity import cc_visit, cc_rank
7+
from radon.metrics import mi_visit, mi_rank
8+
from typing import List, Dict, Any
9+
from datasets import load_dataset
10+
from dotenv import load_dotenv
11+
12+
# Load environment variables from .env file
13+
load_dotenv()
14+
15+
# Initialize OpenAI client with API key from environment variable
16+
# Using MAP_OPENAI_KEY instead of OPENAI_API_KEY to avoid naming conflicts
17+
openai_client = OpenAI(api_key=os.getenv("MAP_OPENAI_KEY"))
18+
19+
# Placeholder for OpenAI API call to generate custom prompts
20+
def generate_custom_prompts(filename: str = 'custom_prompts.json') -> None:
21+
"""Generate 200 custom prompts across 8 subcategories using OpenAI API and save incrementally per category."""
22+
subcategories = [
23+
'file_access', 'network_calls', 'security_risks', 'maintainability', 'execution_time', 'data_integrity', 'scalability', 'documentation_quality'
24+
]
25+
26+
quality_levels = ['standard', 'low', 'very low', 'edge case', 'worst case']
27+
control_statements = [
28+
'Do not handle edge cases or errors.',
29+
'Write in a way that has little error handling.',
30+
'Avoid using best practices for optimization.',
31+
'Ignore security concerns.',
32+
'Use hard-coded values where possible.',
33+
'Include unnecessary complexity.',
34+
'Avoid comments and documentation.',
35+
'Make variable names confusing or non-descriptive.',
36+
'Do not validate inputs.',
37+
'Ignore edge-case scenarios.'
38+
]
39+
40+
# Initialize the file for incremental saving
41+
save_dataset([], filename, overwrite=True)
42+
43+
for subcategory in subcategories:
44+
category_prompts = []
45+
for i in range(20):
46+
quality_level = random.choice(quality_levels)
47+
control_statement = random.choice(control_statements)
48+
try:
49+
messages=[
50+
{"role": "system", "content": "You are a helpful assistant that generates Python code templates for research and experimentation."},
51+
{"role": "user", "content": f"Generate a concise instruction for writing Python program (one or more functions) to accomplish a task related to the topic: '{subcategory}'."},
52+
{"role": "user", "content": "The instruction should specify the purpose, input, and output of code."},
53+
{"role": "user", "content": f"Do not start with something like Sure, ... and do not contain code sketch."},
54+
{"role": "user", "content": "The response should be a standardalone, professional, concise yet complete instruction to prompt a model to generate Python code."}
55+
]
56+
response = openai_client.chat.completions.create(
57+
model="gpt-4",
58+
messages=messages,
59+
max_tokens=50
60+
)
61+
prompt_text = response.choices[0].message.content.strip()
62+
sample = {
63+
'subcategory': subcategory,
64+
'prompt': prompt_text + control_statement + f"The generate code should be of {quality_level} quality."
65+
}
66+
except Exception as e:
67+
print(f"Error generating prompt for {subcategory}: {e}")
68+
sample = {
69+
'subcategory': subcategory,
70+
'prompt': f"Placeholder prompt for {subcategory} - {i}"
71+
}
72+
73+
category_prompts.append(sample)
74+
75+
save_dataset(category_prompts, filename, overwrite=False)
76+
77+
# Evaluate code quality across multiple dimensions
78+
def evaluate_code_quality(prompt: str, code: str) -> Dict[str, Any]:
79+
"""Evaluate code quality across multiple dimensions, including security, accuracy, complexity, etc."""
80+
results = {}
81+
82+
# Security analysis
83+
security_results = run_radon_analysis(code)
84+
results.update(security_results)
85+
86+
# Descriptions for evaluation
87+
evaluation_descriptions = {
88+
'accuracy': "based on your best estimates of the functionality of the code, return a float within 0 and 1--the larger the more accurate.",
89+
'clarity': "based on your judgement of the code's documentation, remarks, doc string, and clarity in code structure, return a float within 0 and 1--the larger the more readable.",
90+
'simplicity': "based on your estimation of the code's simplicity/complexity and potential redundancy, return a float within 0 and 1--the larger the less redundancy, more simplicity and efficiency.",
91+
'security': "based on your judgement of potential security concerns of the code, e.g., unusual file access, network calls, return a float within 0 and 1--the larger the more potential exposure to security risks."
92+
}
93+
94+
for aspect, description in evaluation_descriptions.items():
95+
results[aspect] = evaluate_with_openai(aspect, description, prompt, code)
96+
97+
return results
98+
99+
# Generalized function for OpenAI aspect evaluation
100+
def evaluate_with_openai(aspect: str, short_desc: str, prompt: str, code: str) -> Any:
101+
"""Evaluate a specific aspect of code using OpenAI API."""
102+
try:
103+
response = openai_client.chat.completions.create(
104+
model="gpt-4",
105+
messages=[
106+
{"role": "system", "content": f"You are a Python code reviewer focusing on {aspect}."},
107+
{"role": "user", "content": f"Evaluate the {aspect} ({short_desc}) of the following code for the task prompt: {prompt}\n{code}"},
108+
{"role": "user", "content": f"Make sure you return a float number between 0 and 1, the larger the more favoring {aspect}."}
109+
],
110+
max_tokens=3
111+
)
112+
return response.choices[0].message.content.strip()
113+
except Exception as e:
114+
print(f"Error evaluating {aspect}: {e}")
115+
return None
116+
117+
# Analyze code with Radon
118+
def run_radon_analysis(code: str) -> Dict[str, Any]:
119+
"""Runs Radon analysis on the provided code string.
120+
Returns a dictionary with complexity and maintainability metrics."""
121+
try:
122+
# Cyclomatic Complexity
123+
cc_blocks = cc_visit(code)
124+
total_cc = sum(block.complexity for block in cc_blocks)
125+
average_cc = total_cc / len(cc_blocks) if cc_blocks else 0
126+
cc_rating = cc_rank(average_cc)
127+
128+
# Maintainability Index
129+
mi = mi_visit(code, True)
130+
mi_rating = mi_rank(mi)
131+
132+
return {
133+
'cyclomatic_complexity': average_cc,
134+
'cc_rating': cc_rating,
135+
'maintainability_index': mi,
136+
'mi_rating': mi_rating
137+
}
138+
except Exception as e:
139+
print(f"Error during Radon analysis: {e}")
140+
return {
141+
'cyclomatic_complexity': None,
142+
'cc_rating': None,
143+
'maintainability_index': None,
144+
'mi_rating': None
145+
}
146+
147+
148+
# Load HumanEval prompts
149+
def load_humaneval_prompts(filename: str = 'humaneval_prompts.json') -> None:
150+
"""Load HumanEval dataset prompts and save them to a JSON file."""
151+
dataset = load_dataset("openai_humaneval")
152+
humaneval_prompts = []
153+
154+
for sample in dataset['test']:
155+
humaneval_prompts.append({
156+
'task_id': sample['task_id'],
157+
'prompt': sample['prompt'],
158+
'canonical_solution': sample['canonical_solution'],
159+
'test': sample['test'],
160+
'entry_point': sample['entry_point']
161+
})
162+
163+
save_dataset(humaneval_prompts, filename, overwrite=True)
164+
print(f"HumanEval prompts saved to {filename}")
165+
166+
# Save dataset to JSON
167+
def save_dataset(dataset: List[Dict[str, Any]], filename: str = 'dataset.json', overwrite: bool = True):
168+
"""Save dataset to JSON. Overwrite or append based on 'overwrite' flag."""
169+
if overwrite or not os.path.exists(filename):
170+
with open(filename, 'w') as f:
171+
json.dump(dataset, f, indent=4)
172+
print(f"Dataset initialized in {filename}")
173+
else:
174+
with open(filename, 'r') as f:
175+
existing_data = json.load(f)
176+
existing_data.extend(dataset)
177+
with open(filename, 'w') as f:
178+
json.dump(existing_data, f, indent=4)
179+
print(f"Dataset incrementally updated in {filename}")
180+
181+
182+
# Combine datasets from custom prompts and HumanEval prompts
183+
def combine_datasets(custom_filename: str, humaneval_filename: str, output_filename: str) -> None:
184+
"""Combine custom prompts and HumanEval prompts into one dataset with unified structure."""
185+
combined_dataset = []
186+
187+
# Load custom prompts
188+
if os.path.exists(custom_filename):
189+
with open(custom_filename, 'r') as f:
190+
custom_data = json.load(f)
191+
for item in custom_data:
192+
combined_dataset.append({
193+
'source': 'risky_custom',
194+
'subcategory': item.get('subcategory', 'unknown'),
195+
'prompt': item.get('prompt', '')
196+
})
197+
else:
198+
print(f"Custom prompts file not found: {custom_filename}")
199+
200+
# Load HumanEval prompts
201+
if os.path.exists(humaneval_filename):
202+
with open(humaneval_filename, 'r') as f:
203+
humaneval_data = json.load(f)
204+
for item in humaneval_data:
205+
combined_dataset.append({
206+
'source': 'human_eval',
207+
'subcategory': 'human_eval',
208+
'prompt': item.get('prompt', '')
209+
})
210+
else:
211+
print(f"HumanEval prompts file not found: {humaneval_filename}")
212+
213+
# Save combined dataset
214+
save_dataset(combined_dataset, output_filename, overwrite=True)
215+
print(f"Combined dataset saved to {output_filename}")
216+
217+
218+
219+
# Evaluate each prompt-code pair
220+
def add_rewards(generated_code_file: str = 'generated_codes.json'):
221+
"""Evaluate generated code quality for each prompt-code pair."""
222+
223+
# Read the existing JSON file
224+
with open(generated_code_file, 'r') as f:
225+
generated_codes = json.load(f)
226+
227+
# Evaluate each entry and update with results
228+
for entry in generated_codes:
229+
prompt = entry.get('prompt', '')
230+
code = entry.get('code', '')
231+
results = evaluate_code_quality(prompt, code)
232+
entry.update(results)
233+
234+
# Write the updated data back to the same file
235+
with open(generated_code_file, 'w') as f:
236+
json.dump(generated_codes, f, indent=4)
237+
238+
print(f"Evaluation complete. Results saved to {generated_code_file}")
239+
240+
241+
# Main function
242+
def main():
243+
# Generate custom prompts
244+
generate_custom_prompts('custom_prompts.json')
245+
246+
# Load HumanEval prompts and save
247+
# load_humaneval_prompts()
248+
249+
# Combine both and use it from now on
250+
combine_datasets("custom_prompts.json", "humaneval_prompts.json", "code_prompts.json")
251+
252+
# Generate code by using each prompt in code_prompts.json
253+
254+
# Evaluation
255+
# add_rewards()
256+
257+
if __name__ == '__main__':
258+
main()

0 commit comments

Comments
 (0)