Added preferences and tests

Michael · Michael · commit a06fa2ade5d3 · 2025-07-03T17:55:04.000+10:00
diff --git a/.gitignore b/.gitignore
@@ -35,3 +35,6 @@ var/
 .installed.cfg
 *.egg
 MANIFES
+
+# Test files
+*.xlsx
diff --git a/team_former/make_teams.py b/team_former/make_teams.py
@@ -7,12 +7,51 @@
 from ortools.sat.python import cp_model
 
 
+def parse_preferences(df):
+    """Parse positive and negative preferences from the DataFrame columns."""
+    id_to_index = {row["Student_ID"]: idx for idx, row in df.iterrows()}
+
+    positive_prefs = []
+    negative_prefs = []
+
+    has_pos = "Prefer_With" in df.columns
+    has_neg = "Prefer_Not_With" in df.columns
+
+    for _, row in df.iterrows():
+        student = row["Student_ID"].strip()
+
+        # Positive preferences
+        if has_pos and pd.notna(row["Prefer_With"]) and row["Prefer_With"].strip():
+            preferred = [s.strip() for s in row["Prefer_With"].split(",") if s.strip()]
+            for target in preferred:
+                positive_prefs.append((student, target))
+
+        # Negative preferences
+        if (
+            has_neg
+            and pd.notna(row["Prefer_Not_With"])
+            and row["Prefer_Not_With"].strip()
+        ):
+            not_preferred = [
+                s.strip() for s in row["Prefer_Not_With"].split(",") if s.strip()
+            ]
+            for target in not_preferred:
+                negative_prefs.append((student, target))
+
+    positive_prefs = [(id_to_index[a], id_to_index[b]) for (a, b) in positive_prefs]
+    negative_prefs = [(id_to_index[a], id_to_index[b]) for (a, b) in negative_prefs]
+
+    return positive_prefs, negative_prefs
+
+
 def allocate_teams(
     *,
     input_file="students.xlsx",
     sheet_name=0,
     output_file="class_teams.xlsx",
     wam_weight=0.05,
+    pos_pref_weight=0.05,
+    neg_pref_weight=0.1,
     min_team_size=4,
     max_team_size=5,
     max_solve_time=60,
@@ -25,6 +64,8 @@ def allocate_teams(
         sheet_name (int or str): Sheet index or name.
         output_file (str): Output Excel file with team assignments.
         wam_weight (float): Weight for WAM balancing in the objective.
+        pos_pref_weight (float): Weight for positive preference balancing in the objective.
+        neg_pref_weight (float): Weight for negative preference balancing in the objective.
         min_team_size (int): Minimum number of students per team.
         max_team_size (int): Maximum number of students per team.
         max_solve_time (int): Solver timeout in seconds.
@@ -41,6 +82,8 @@ def allocate_teams(
     global_avg_wam = sum(wams) // len(wams)
     max_teams = num_students // min_team_size
 
+    pos_preferences, neg_preferences = parse_preferences(student_df)
+
     model = cp_model.CpModel()
 
     # Variables
@@ -103,8 +146,34 @@ def allocate_teams(
         model.AddMultiplicationEquality(squared_diff, [diff, diff])
         squared_deviation_terms.append(squared_diff)
 
+    pref_bonus_terms = []  # student indices who prefer each other
+    for i, j in pos_preferences:
+        for team in range(max_teams):
+            together = model.NewBoolVar(f"prefer_{i}_{j}_team_{team}")
+            model.AddBoolAnd([assign[i, team], assign[j, team]]).OnlyEnforceIf(together)
+            model.AddBoolOr(
+                [assign[i, team].Not(), assign[j, team].Not()]
+            ).OnlyEnforceIf(together.Not())
+            pref_bonus_terms.append(together)
+
+    negative_terms = []
+    for i, j in neg_preferences:
+        together_vars = []
+        for team in range(max_teams):
+            both = model.NewBoolVar(f"neg_pref_{i}_{j}_team_{team}")
+            model.AddBoolAnd([assign[i, team], assign[j, team]]).OnlyEnforceIf(both)
+            model.AddBoolOr(
+                [assign[i, team].Not(), assign[j, team].Not()]
+            ).OnlyEnforceIf(both.Not())
+            together_vars.append(both)
+        negative_terms.append(model.NewBoolVar(f"neg_pref_{i}_{j}_some_team"))
+    model.AddMaxEquality(negative_terms[-1], together_vars)
+
     model.Minimize(
-        sum(team_used) + int(wam_weight * 1000) * sum(squared_deviation_terms)
+        sum(team_used)
+        + int(wam_weight * 1000) * sum(squared_deviation_terms)
+        - pos_pref_weight * sum(pref_bonus_terms)
+        + neg_pref_weight * sum(negative_terms)
     )
 
     # Solve
diff --git a/tests/test_teams.py b/tests/test_teams.py
@@ -1,5 +1,6 @@
 """Unit tests for the team allocation logic using pytest and Faker."""
 
+import random
 from unittest import mock
 
 import pandas as pd
@@ -9,16 +10,42 @@
 from team_former.make_teams import allocate_teams
 
 
+def generate_random_preferences(
+    student_ids, p_pos=0.4, p_neg=0.3, max_pos=3, max_neg=2
+):
+    """Generate random positive and negative preferences for students."""
+    prefs_with = {s: [] for s in student_ids}
+    prefs_not_with = {s: [] for s in student_ids}
+
+    for s in student_ids:
+        others = [o for o in student_ids if o != s]
+        if random.random() < p_pos and others:
+            prefs_with[s] = random.sample(
+                others, k=random.randint(1, min(max_pos, len(others)))
+            )
+        if random.random() < p_neg and others:
+            prefs_not_with[s] = random.sample(
+                others, k=random.randint(1, min(max_neg, len(others)))
+            )
+    return prefs_with, prefs_not_with
+
+
 @pytest.fixture
 def fake_student_df_fixture():
-    """Generate a fake student dataframe with Faker."""
+    """Fixture to generate a fake student DataFrame with preferences."""
     fake = Faker()
     Faker.seed(1234)
+    random.seed(1234)
+
+    n = 100
+    student_ids = [f"S{i+1}" for i in range(n)]
+    pos_prefs, neg_prefs = generate_random_preferences(student_ids)
 
     students = []
-    for _ in range(100):
+    for sid in student_ids:
         students.append(
             {
+                "Student_ID": sid,
                 "first_name": fake.first_name(),
                 "last_name": fake.last_name(),
                 "email": fake.email(),
@@ -34,65 +61,143 @@ def fake_student_df_fixture():
                     2,
                 ),
                 "lab": fake.random_int(min=1, max=4),
+                "Prefer_With": ", ".join(pos_prefs[sid]) if pos_prefs[sid] else "",
+                "Prefer_Not_With": ", ".join(neg_prefs[sid]) if neg_prefs[sid] else "",
             }
         )
-
     return pd.DataFrame(students)
 
 
-def test_allocate_teams_returns_df(df_in=fake_student_df_fixture):
+def report_wam_balance(df_out):
+    """Report per-team WAM averages and optionally check balance."""
+    team_groups = df_out.groupby("team")
+    team_wams = team_groups["wam"].mean()
+    overall_mean = df_out["wam"].mean()
+
+    print("\n📊 WAM balance report per team:")
+    for team, avg_wam in team_wams.items():
+        print(f"  Team {team}: avg WAM = {avg_wam:.2f}")
+    print(f"\n🎯 Overall mean WAM: {overall_mean:.2f}")
+    print(f"⚖️ Max deviation: {abs(team_wams - overall_mean).max():.2f}")
+
+
+def test_allocate_teams_returns_df(request):
     """Check that allocate_teams returns a valid DataFrame with a team column."""
-    with mock.patch("pandas.read_excel", return_value=df_in), mock.patch(
+    fake_df = request.getfixturevalue("fake_student_df_fixture")
+    with mock.patch("pandas.read_excel", return_value=fake_df), mock.patch(
         "pandas.DataFrame.to_excel"
     ):
-
         df_out = allocate_teams(
             input_file="fake.xlsx",
             sheet_name=0,
             output_file="output.xlsx",
             max_solve_time=40,
             wam_weight=0.05,
+            pos_pref_weight=0.8,
+            neg_pref_weight=0.8,
             min_team_size=3,
             max_team_size=5,
         )
-
         assert isinstance(df_out, pd.DataFrame)
         assert "team" in df_out.columns
-        assert len(df_out) == len(df_in)
+        assert len(df_out) == len(fake_df)
         assert df_out["team"].notna().all()
 
 
-def test_teams_have_valid_sizes(df_in=fake_student_df_fixture):
-    """Verify each team is within the size limits."""
-    with mock.patch("pandas.read_excel", return_value=df_in), mock.patch(
+def test_teams_have_valid_sizes(request):
+    """Verify each team is within the size limits and report WAM balance."""
+    fake_df = request.getfixturevalue("fake_student_df_fixture")
+    with mock.patch("pandas.read_excel", return_value=fake_df), mock.patch(
         "pandas.DataFrame.to_excel"
     ):
-
         df_out = allocate_teams(
             input_file="fake.xlsx",
             sheet_name=0,
             output_file="output.xlsx",
             max_solve_time=40,
             wam_weight=0.05,
+            pos_pref_weight=0.8,
+            neg_pref_weight=0.8,
             min_team_size=3,
             max_team_size=5,
         )
-
         team_sizes = df_out.groupby("team").size()
         assert (team_sizes >= 3).all()
         assert (team_sizes <= 5).all()
+        report_wam_balance(df_out)
 
 
-def test_fake_student_df_content(df_in=fake_student_df_fixture):
-    """Verify the fake data fixture is correct."""
-    assert len(df_in) == 100
-    assert set(df_in.columns) == {
+def test_fake_student_df_content(request):
+    """Verify the fake data fixture content and columns."""
+    fake_df = request.getfixturevalue("fake_student_df_fixture")
+    assert len(fake_df) == 100
+    expected_cols = {
+        "Student_ID",
         "first_name",
         "last_name",
         "email",
         "gender",
         "wam",
         "lab",
+        "Prefer_With",
+        "Prefer_Not_With",
     }
-    assert df_in["lab"].between(1, 4).all()
-    assert df_in["wam"].between(50, 90).all()
+    assert set(fake_df.columns) == expected_cols
+    assert fake_df["lab"].between(1, 4).all()
+    assert fake_df["wam"].between(50, 90).all()
+
+
+@pytest.mark.filterwarnings("ignore:R0914")
+def test_preferences_satisfaction(request):
+    """Check how many preferences are satisfied in the allocation."""
+    fake_df = request.getfixturevalue("fake_student_df_fixture")
+    with mock.patch("pandas.read_excel", return_value=fake_df), mock.patch(
+        "pandas.DataFrame.to_excel"
+    ):
+        df_out = allocate_teams(
+            input_file="fake.xlsx",
+            sheet_name=0,
+            output_file="output.xlsx",
+            max_solve_time=40,
+            wam_weight=0.05,
+            pos_pref_weight=0.8,
+            neg_pref_weight=0.8,
+            min_team_size=3,
+            max_team_size=5,
+        )
+
+        team_map = df_out.set_index("Student_ID")["team"].to_dict()
+
+        pos_prefs = []
+        neg_prefs = []
+
+        for _, row in fake_df.iterrows():
+            student = row["Student_ID"].strip()
+
+            if pd.notna(row["Prefer_With"]) and row["Prefer_With"].strip():
+                preferred = [
+                    s.strip() for s in row["Prefer_With"].split(",") if s.strip()
+                ]
+                for p in preferred:
+                    pos_prefs.append((student, p))
+
+            if pd.notna(row["Prefer_Not_With"]) and row["Prefer_Not_With"].strip():
+                not_preferred = [
+                    s.strip() for s in row["Prefer_Not_With"].split(",") if s.strip()
+                ]
+                for np in not_preferred:
+                    neg_prefs.append((student, np))
+
+        pos_satisfied = sum(
+            a in team_map and b in team_map and team_map[a] == team_map[b]
+            for a, b in pos_prefs
+        )
+        neg_satisfied = sum(
+            a in team_map and b in team_map and team_map[a] != team_map[b]
+            for a, b in neg_prefs
+        )
+
+        assert pos_satisfied > 0, "No positive preferences satisfied"
+        assert neg_satisfied > 0, "No negative preferences satisfied"
+        print(f"Positive preferences satisfied: {pos_satisfied}/{len(pos_prefs)}")
+        print(f"Negative preferences satisfied: {neg_satisfied}/{len(neg_prefs)}")