Skip to content

Commit 99707f3

Browse files
committed
Bump to version 6.0.0
2 parents 3a97c3b + aa48e99 commit 99707f3

File tree

98 files changed

+4971
-3892
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

98 files changed

+4971
-3892
lines changed

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@ Triage is designed to:
2828
To install Triage locally, you need:
2929

3030
- Ubuntu/RedHat
31-
- Python 3.9+
32-
- A PostgreSQL 9.6+ database with your source data (events,
31+
- Python 3.10+
32+
- A PostgreSQL 13+ database with your source data (events,
3333
geographical data, etc) loaded.
34-
- **NOTE**: If your database is PostgreSQL 11+ you will get some
34+
- **NOTE**: If your database is PostgreSQL 13+ you will get some
3535
speed improvements. We recommend updating to a recent
3636
version of PostgreSQL.
3737
- Ample space on an available disk (or for example in Amazon Web
@@ -90,7 +90,7 @@ There are a plethora of options available for experiment running, affecting thin
9090

9191
## Development
9292

93-
Triage was initially developed at [University of Chicago's Center For Data Science and Public Policy](http://dsapp.uchicago.edu) and is now being maintained at Carnegie Mellon University.
93+
Triage was initially developed at [University of Chicago's Center For Data Science and Public Policy](http://dsapp.uchicago.edu) and is now being maintained at [Carnegie Mellon University](https://datasciencepublicpolicy.org/).
9494

9595
To build this package (without installation), its dependencies may
9696
alternatively be installed from the terminal using `pip`:

example/colab/colab_triage.ipynb

Lines changed: 50 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -53,17 +53,35 @@
5353
"## Getting Set Up\n",
5454
"\n",
5555
"We'll need a few dependencies to run triage in a colab notebook:\n",
56+
"- Python version 3.10\n",
5657
"- A local postgresql server (we'll use version 11)\n",
5758
"- A simplified dataset loaded into this database (we'll use data from DonorsChoose)\n",
5859
"- Triage and its dependencies (we'll use the current version in pypi)"
5960
]
6061
},
62+
{
63+
"cell_type": "markdown",
64+
"metadata": {},
65+
"source": [
66+
"### Python version 3.10 \n",
67+
"\n",
68+
"🛑    Triage requires Python 3.10 in Colab. Colab's default Python version is 3.12+, to change the version click at the bottom right of Colab where it says **Python 3** and select the option **\"Change runtime type\"**. A window with different options will pop up, choose the option **2025.07** on the \"Runtime version\" options section.and select **Save**. "
69+
]
70+
},
6171
{
6272
"cell_type": "code",
6373
"execution_count": null,
64-
"metadata": {
65-
"id": "-htIBoS7N4gK"
66-
},
74+
"metadata": {},
75+
"outputs": [],
76+
"source": [
77+
"# Making sure you have python 3.10 version\n",
78+
"!python --version"
79+
]
80+
},
81+
{
82+
"cell_type": "code",
83+
"execution_count": null,
84+
"metadata": {},
6785
"outputs": [],
6886
"source": [
6987
"# Install and start postgresql-11 server\n",
@@ -144,12 +162,12 @@
144162
},
145163
"outputs": [],
146164
"source": [
147-
"from sqlalchemy.engine.url import URL\n",
165+
"from sqlalchemy.engine import URL\n",
148166
"from triage.util.db import create_engine\n",
149167
"import pandas as pd\n",
150168
"\n",
151-
"db_url = URL(\n",
152-
" 'postgres',\n",
169+
"db_url = URL.create(\n",
170+
" 'postgresql+psycopg2',\n",
153171
" host='localhost',\n",
154172
" username='postgres',\n",
155173
" database='donors_choose',\n",
@@ -874,16 +892,26 @@
874892
"metadata": {
875893
"id": "jYzBKFG3qDhQ"
876894
},
877-
"outputs": [],
895+
"outputs": [
896+
{
897+
"ename": "SyntaxError",
898+
"evalue": "invalid syntax (136456548.py, line 41)",
899+
"output_type": "error",
900+
"traceback": [
901+
"\u001b[0;36m Cell \u001b[0;32mIn[1], line 41\u001b[0;36m\u001b[0m\n\u001b[0;31m cursor.execute(text\"SET ROLE {dbconfig['role']}\")\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
902+
]
903+
}
904+
],
878905
"source": [
879906
"import yaml\n",
880907
"import shutil\n",
881908
"import os\n",
882909
"import logging\n",
883910
"\n",
884-
"from sqlalchemy.engine.url import URL\n",
911+
"from sqlalchemy.engine import URL\n",
885912
"from sqlalchemy.event import listens_for\n",
886913
"from sqlalchemy.pool import Pool\n",
914+
"from sqlalchemy import event\n",
887915
"\n",
888916
"from triage.util.db import create_engine\n",
889917
"from triage.experiments import MultiCoreExperiment\n",
@@ -910,18 +938,15 @@
910938
" # dbconfig = yaml.safe_load(dbf)\n",
911939
"\n",
912940
" dbconfig = yaml.safe_load(database_yaml)\n",
913-
" print(dbconfig['role'])\n",
914941
"\n",
915942
" # assume group role to ensure shared permissions\n",
916-
" @listens_for(Pool, \"connect\")\n",
917-
" def assume_role(dbapi_con, connection_record):\n",
918-
" logging.debug(f\"setting role {dbconfig['role']};\")\n",
919-
" dbapi_con.cursor().execute(f\"set role {dbconfig['role']};\")\n",
920-
" # logging.debug(f\"setting role postres;\")\n",
921-
" # dbapi_con.cursor().execute(f\"set role postgres;\")\n",
922-
"\n",
923-
" db_url = URL(\n",
924-
" 'postgres',\n",
943+
" @event.listens_for(Pool, \"connect\")\n",
944+
" def assume_role(dbapi_conn, connection_record):\n",
945+
" with dbapi_conn.cursor() as cursor:\n",
946+
" cursor.execute(\"SET ROLE %s\", (dbconfig['role'],))\n",
947+
"\n",
948+
" db_url = URL.create(\n",
949+
" 'postgresql+psycopg2',\n",
925950
" host=dbconfig['host'],\n",
926951
" username=dbconfig['user'],\n",
927952
" database=dbconfig['db'],\n",
@@ -1005,15 +1030,15 @@
10051030
"import yaml\n",
10061031
"import pandas as pd\n",
10071032
"\n",
1008-
"from sqlalchemy.engine.url import URL\n",
1033+
"from sqlalchemy.engine import URL\n",
10091034
"from triage.util.db import create_engine\n",
10101035
"from triage.component.postmodeling.experiment_summarizer import ExperimentReport\n",
10111036
"from matplotlib import pyplot as plt\n",
10121037
"%matplotlib inline\n",
10131038
"\n",
10141039
"dbconfig = yaml.safe_load(database_yaml)\n",
1015-
"db_url = URL(\n",
1016-
" 'postgres',\n",
1040+
"db_url = URL.create(\n",
1041+
" 'postgresql+psycopg2',\n",
10171042
" host=dbconfig['host'],\n",
10181043
" username=dbconfig['user'],\n",
10191044
" database=dbconfig['db'],\n",
@@ -1599,8 +1624,8 @@
15991624
"outputs": [],
16001625
"source": [
16011626
"dbconfig = yaml.safe_load(database_yaml)\n",
1602-
"db_url = URL(\n",
1603-
" 'postgres',\n",
1627+
"db_url = URL.create(\n",
1628+
" 'postgresql+psycopg2',\n",
16041629
" host=dbconfig['host'],\n",
16051630
" username=dbconfig['user'],\n",
16061631
" database=dbconfig['db'],\n",
@@ -2010,7 +2035,7 @@
20102035
"provenance": []
20112036
},
20122037
"kernelspec": {
2013-
"display_name": "colab_env",
2038+
"display_name": "triage_6",
20142039
"language": "python",
20152040
"name": "python3"
20162041
},
@@ -2024,7 +2049,7 @@
20242049
"name": "python",
20252050
"nbconvert_exporter": "python",
20262051
"pygments_lexer": "ipython3",
2027-
"version": "3.11.9"
2052+
"version": "3.10.6"
20282053
}
20292054
},
20302055
"nbformat": 4,

requirement/main.txt

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
polars==0.18.2
22
pyarrow>=12.0.1
33
numpy==1.26.0
4-
pandas==1.5.0
5-
alembic==1.7.7
6-
SQLAlchemy==1.3.18 # pyup: ignore
4+
pandas==2.0.3
5+
alembic==1.17.2
6+
SQLAlchemy==2.0
77
PyYAML==6.0.2
88
psycopg2-binary==2.9.3
99
boto3==1.22.4
@@ -19,15 +19,14 @@ sqlparse==0.4.4
1919
pebble==4.6.3
2020
adjustText==0.7.3
2121
graphviz==0.20
22-
requests==2.31.0
22+
requests==2.32.3
2323
coloredlogs==15.0.1
2424
verboselogs==1.7
2525
s3fs==0.4.2 # pyup: ignore
2626
scikit-learn==1.6.1
27-
matplotlib==3.5.1
28-
matplotlib-inline==0.1.3
29-
seaborn==0.11.2
27+
matplotlib==3.8.1
28+
seaborn==0.13.2
3029
ohio==0.5.0
31-
aequitas==0.42.0
30+
aequitas==1.0.0
3231
plotly==5.24.0
3332
jupyter==1.0.0

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 5.5.4
2+
current_version = 5.5.5
33
commit = True
44
tag = True
55

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def stream_requirements(fd):
4444

4545
setup(
4646
name='triage',
47-
version='5.5.5',
47+
version='6.0.0',
4848
description="Risk modeling and prediction",
4949
long_description=README_PATH.read_text(),
5050
long_description_content_type="text/markdown",
@@ -73,10 +73,10 @@ def stream_requirements(fd):
7373
"License :: OSI Approved :: MIT License",
7474
"Natural Language :: English",
7575
"Programming Language :: Python :: 3",
76-
"Programming Language :: Python :: 3.9",
7776
"Programming Language :: Python :: 3.10",
77+
"Programming Language :: Python :: 3.11",
7878
],
79-
python_requires=">=3.8",
79+
python_requires=">=3.10",
8080
test_suite="tests",
8181
tests_require=REQUIREMENTS_TEST,
8282
)

src/tests/architect_tests/test_builders.py

Lines changed: 45 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import testing.postgresql
55

66
from contextlib import contextmanager
7-
7+
from sqlalchemy import text
88
from triage import create_engine
99
from triage.component.catwalk.utils import filename_friendly_hash
1010
from triage.component.architect.feature_group_creator import FeatureGroup
@@ -280,7 +280,9 @@ def test_make_entity_date_table():
280280
experiment_hash=experiment_hash,
281281
engine=engine,
282282
)
283-
engine.execute("CREATE TABLE features.tmp_entity_date (a int, b date);")
283+
with engine.begin() as conn:
284+
conn.execute(text("CREATE TABLE features.tmp_entity_date (a int, b date);"))
285+
284286
# call the function to test the creation of the table
285287
entity_date_table_name = builder.make_entity_date_table(
286288
as_of_times=dates,
@@ -293,12 +295,11 @@ def test_make_entity_date_table():
293295
)
294296

295297
# read in the table
296-
result = pd.read_sql(
297-
"select * from features.{} order by entity_id, as_of_date".format(
298-
entity_date_table_name
299-
),
300-
engine,
301-
)
298+
with engine.connect() as conn:
299+
result = pd.read_sql(
300+
f"select * from features.{entity_date_table_name} order by entity_id, as_of_date",
301+
conn.connection,
302+
)
302303
# compare the table to the test dataframe
303304
test = result == ids_dates
304305
assert test.all().all()
@@ -327,8 +328,11 @@ def test_make_entity_date_table_include_missing_labels():
327328
label_timespan="1 month",
328329
)
329330
# this line adds the new entity-date combo as an expected one
330-
ids_dates = ids_dates.append(
331-
{"entity_id": 0, "as_of_date": datetime.date(2016, 6, 1)}, ignore_index=True
331+
ids_dates = pd.concat([
332+
ids_dates,
333+
pd.DataFrame({"entity_id": [0], "as_of_date": [datetime.date(2016, 6, 1)]})
334+
],
335+
ignore_index=True
332336
)
333337

334338
with testing.postgresql.Postgresql() as postgresql:
@@ -347,7 +351,8 @@ def test_make_entity_date_table_include_missing_labels():
347351
include_missing_labels_in_train_as=False,
348352
engine=engine,
349353
)
350-
engine.execute("CREATE TABLE features.tmp_entity_date (a int, b date);")
354+
with engine.begin() as conn:
355+
conn.execute(text("CREATE TABLE features.tmp_entity_date (a int, b date);"))
351356
# call the function to test the creation of the table
352357
entity_date_table_name = builder.make_entity_date_table(
353358
as_of_times=dates,
@@ -360,12 +365,11 @@ def test_make_entity_date_table_include_missing_labels():
360365
)
361366

362367
# read in the table
363-
result = pd.read_sql(
364-
"select * from features.{} order by entity_id, as_of_date".format(
365-
entity_date_table_name
366-
),
367-
engine,
368-
)
368+
with engine.connect() as conn:
369+
result = pd.read_sql(
370+
f"select * from features.{entity_date_table_name} order by entity_id, as_of_date",
371+
conn.connection,
372+
)
369373

370374
# compare the table to the test dataframe
371375
assert sorted(result.values.tolist()) == sorted(ids_dates.values.tolist())
@@ -484,33 +488,31 @@ def test_stitch_csvs(self):
484488

485489
matrix_store = matrix_storage_engine.get_store("1234")
486490

487-
result = builder.stitch_csvs(
491+
result, labels_ = builder.stitch_csvs(
488492
features_queries=feature_queries,
489493
label_query=label_query,
490494
matrix_store=matrix_store,
491495
matrix_uuid="1234"
492496
)
493497

494-
# chekc if entity_id and as_of_date are as index
498+
# check if entity_id and as_of_date are as index
495499
should_be = ['entity_id', 'as_of_date']
496500
actual_indices = result.index.names
497-
498501
TestCase().assertListEqual(should_be, actual_indices)
499502

500-
# last element in the DF should be the label
503+
# check that last element is not the label
501504
last_col = 'booking'
502505
output = result.columns.values[-1] # label name
506+
TestCase().assertNotEqual(last_col, output)
503507

504-
TestCase().assertEqual(last_col, output)
505-
506-
# number of columns must be the sum of all the columns on each feature table + 1 for the label
507-
TestCase().assertEqual(result.shape[1], 4+1,
508-
"Number of features and label doesn't match")
508+
# number of columns must be the sum of all the columns on each feature table
509+
TestCase().assertEqual(result.shape[1], 4,
510+
"Number of features don't match")
509511

510512
# number of rows
511513
assert result.shape[0] == 5
512514
TestCase().assertEqual(result.shape[0], 5,
513-
"Number of rows doesn't match")
515+
"Number of rows don't match")
514516

515517
# types of the final df should be float32
516518
types = set(result.apply(lambda x: x.dtype == 'float32').values)
@@ -688,17 +690,22 @@ def test_nullcheck(self):
688690
}
689691

690692
uuid = filename_friendly_hash(good_metadata)
691-
with self.assertRaises(ValueError):
692-
builder.build_matrix(
693-
as_of_times=dates,
694-
label_name="booking",
695-
label_type="binary",
696-
feature_dictionary=feature_dictionary,
697-
matrix_metadata=good_metadata,
698-
matrix_uuid=uuid,
699-
matrix_type="other",
700-
)
701-
693+
# check if an exception was raised (but swallowed in the original code)
694+
# so looking for a None data frame. Verify code in builders.py/build_matrix
695+
# when we catch a ValueError we just do a return, not raising an exception
696+
# maybe we should consider to actually raise an exception? (2026-01-04)
697+
# that will stop the run though!
698+
699+
result = builder.build_matrix(
700+
as_of_times=dates,
701+
label_name="booking",
702+
label_type="binary",
703+
feature_dictionary=feature_dictionary,
704+
matrix_metadata=good_metadata,
705+
matrix_uuid=uuid,
706+
matrix_type="other",
707+
)
708+
self.assertIsNone(result)
702709

703710
def test_replace_false_rerun(self):
704711
with testing.postgresql.Postgresql() as postgresql:

0 commit comments

Comments
 (0)