Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ Installer pre-commit avec uv (pas obligatoire si vous avez déjà pre-commit ins

```
uv tool install pre-commit --with pre-commit-uv
pre-commit --version
pre-commit --version # check proper installation
pre-commit install # activate hooks before each commit
```

Lancer les pre-commit hooks manuellement :
Expand Down
Binary file not shown.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Binary file not shown.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def suppress_output():
with open("saved_dspy_model/best_policy_extraction_model/program.pkl", "rb") as f:
policy_program = pickle.load(f)

with open("saved_dspy_model/best_geo_impact_extraction_model/program.pkl", "rb") as f:
with open("saved_dspy_model/best_geo_extraction_model/program.pkl", "rb") as f:
geo_program = pickle.load(f)

# --------------------------------------------------
Expand Down Expand Up @@ -123,12 +123,7 @@ def suppress_output():
"policy_list": policy_list,
"regional_group": geo_dict.get("regional_group"),
"geographical_scopes": geo_dict.get("geographical_scopes", []),
"main_country_focus": geo_dict.get("main_country_focus", []),
"human_needs": geo_dict.get("human_needs", []),
"natural_resources": geo_dict.get("natural_ressource", []),
"wellbeing": geo_dict.get("wellbeing", []),
"justice": geo_dict.get("justice_consideration", []),
"planetary_boundaries": geo_dict.get("planetary_boundaries", []),
"main_country_focus": geo_dict.get("main_country_focus", [])
}

buffered_records.append(record)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,44 +14,44 @@

# 1. Configuration
### Scaleway API
"""

lm = dspy.LM(
model="mistral/mistral-small-3.2-24b-instruct-2506:fp8",
model="mistral/mistral-small-3.2-24b-instruct-2506",
api_key=os.getenv('SCALEWAY_API_KEY'),
api_base="https://c1b66caa-347e-448c-a54c-d3fb43889a62.ifr.fr-par.scaleway.com"
api_base="https://api.scaleway.ai/a2dc0d31-c47f-47f1-b0b9-9877dd4eb2b5/v1"
)
"""

### OpenAI API
lm = dspy.LM("openai/gpt-4o-mini", api_key=os.getenv('OPENAI_API_KEY'))
#lm = dspy.LM("openai/gpt-4o-mini", api_key=os.getenv('OPENAI_API_KEY'))


dspy.configure(lm=lm)
model_used = lm.model.replace("/","_")
# 2. Data Loading
golden_dataset = []

if os.path.exists('model_training_data/conclusions&pollitiques_gold.jsonl'):
with open('model_training_data/conclusions&pollitiques_gold.jsonl', 'r', encoding='utf-8') as f:
if os.path.exists('dspy_policies_and_taxonomy_extraction/model_training_data/gold_policy.jsonl'):
with open('dspy_policies_and_taxonomy_extraction/model_training_data/gold_policy.jsonl', 'r', encoding='utf-8') as f:
for line in f:
data = json.loads(line)
example = dspy.Example(question=data['question'], response=data['response'])
golden_dataset.append(example.with_inputs('question'))
else:
exit ("Data file 'model_training_data/conclusions&pollitiques_gold.jsonl' not found.")
exit ("Data file 'model_training_data/gold_policy.jsonl' not found.")

syntetic_dataset = []
if os.path.exists('model_training_data/conclusions&pollitiques_synthetiques_diversifies.jsonl'):
with open('model_training_data/conclusions&pollitiques_synthetiques_diversifies.jsonl', 'r', encoding='utf-8') as f:
if os.path.exists('dspy_policies_and_taxonomy_extraction/model_training_data/synthetic_policy.jsonl'):
with open('dspy_policies_and_taxonomy_extraction/model_training_data/synthetic_policy.jsonl', 'r', encoding='utf-8') as f:
for line in f:
data = json.loads(line)
example = dspy.Example(question=data['question'], response=data['response'])
syntetic_dataset.append(example.with_inputs('question'))
else:
exit ("Data file 'model_training_data/conclusions&pollitiques_synthetiques_diversifies.jsonl' not found.")
exit ("Data file 'dspy_policies_and_taxonomy_extraction/model_training_data/synthetic_policy.jsonl' not found.")

# Meilleur score avec dataset synthetique petit
trainset = syntetic_dataset
devset = golden_dataset
trainset = golden_dataset[40:]
devset = golden_dataset[:40]

print(f"Training examples: {len(trainset)}, Validation examples: {len(devset)}")

Expand Down Expand Up @@ -106,7 +106,7 @@ def __call__(self,example, pred, trace=None):
print("Starting optimization...")

optimizer = MIPROv2(metric=metric_fn
#,auto="heavy"
,auto="heavy"
)

compiled_program = optimizer.compile(MonProgramme(), trainset=trainset)
Expand All @@ -121,7 +121,7 @@ def __call__(self,example, pred, trace=None):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")


optimized_score = optimized_evaluator(compiled_program,save_as_json=f"saved_dspy_model/policy/{model_used}{timestamp}.json")
optimized_score = optimized_evaluator(compiled_program,save_as_json=f"dspy_policies_and_taxonomy_extraction/saved_dspy_model/policy/{timestamp}.json")
print(optimized_score)

score_str = f"{round(optimized_score.score,2)}".replace(".", "_")
Expand All @@ -134,6 +134,6 @@ def __call__(self,example, pred, trace=None):
print(f"Final Score on Validation Set (optimized): {optimized_score}%")

# --- Saving the optimized model ---
model_path = f"saved_dspy_model/policy/{score_str}"
model_path = "saved_dspy_model/policy_model/"
compiled_program.save(model_path,save_program=True)
print(f"Optimized model saved to {model_path}")
Loading