Skip to content

Commit 4067119

Browse files
authored
[ENH] Conditional join column selection (#1104)
* Allow multiindex columns * change column name type check to hashable * add option for column selection/renaming * use Any type hint for df_columns and right_columns * changelog * notebook update * create generic function for creating multiindex column
1 parent 4bcfd6c commit 4067119

File tree

4 files changed

+477
-72
lines changed

4 files changed

+477
-72
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## [Unreleased]
44

55
- [DOC] Updated developer guide docs.
6+
- [ENH] Allow column selection/renaming within conditional_join. #1102 @samukweku.
67

78
## [v0.23.1] - 2022-05-03
89

examples/notebooks/conditional_join.ipynb

Lines changed: 297 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1146,14 +1146,309 @@
11461146
" )\n",
11471147
")"
11481148
]
1149+
},
1150+
{
1151+
"cell_type": "markdown",
1152+
"metadata": {},
1153+
"source": [
1154+
"Selection of relevant columns within `conditional_join`: "
1155+
]
1156+
},
1157+
{
1158+
"cell_type": "code",
1159+
"execution_count": 11,
1160+
"metadata": {},
1161+
"outputs": [
1162+
{
1163+
"data": {
1164+
"text/html": [
1165+
"<div>\n",
1166+
"<style scoped>\n",
1167+
" .dataframe tbody tr th:only-of-type {\n",
1168+
" vertical-align: middle;\n",
1169+
" }\n",
1170+
"\n",
1171+
" .dataframe tbody tr th {\n",
1172+
" vertical-align: top;\n",
1173+
" }\n",
1174+
"\n",
1175+
" .dataframe thead tr th {\n",
1176+
" text-align: left;\n",
1177+
" }\n",
1178+
"</style>\n",
1179+
"<table border=\"1\" class=\"dataframe\">\n",
1180+
" <thead>\n",
1181+
" <tr>\n",
1182+
" <th></th>\n",
1183+
" <th>left</th>\n",
1184+
" <th>right</th>\n",
1185+
" </tr>\n",
1186+
" <tr>\n",
1187+
" <th></th>\n",
1188+
" <th>id</th>\n",
1189+
" <th>id</th>\n",
1190+
" </tr>\n",
1191+
" </thead>\n",
1192+
" <tbody>\n",
1193+
" <tr>\n",
1194+
" <th>0</th>\n",
1195+
" <td>1</td>\n",
1196+
" <td>2</td>\n",
1197+
" </tr>\n",
1198+
" <tr>\n",
1199+
" <th>1</th>\n",
1200+
" <td>1</td>\n",
1201+
" <td>2</td>\n",
1202+
" </tr>\n",
1203+
" <tr>\n",
1204+
" <th>2</th>\n",
1205+
" <td>1</td>\n",
1206+
" <td>2</td>\n",
1207+
" </tr>\n",
1208+
" <tr>\n",
1209+
" <th>3</th>\n",
1210+
" <td>1</td>\n",
1211+
" <td>3</td>\n",
1212+
" </tr>\n",
1213+
" <tr>\n",
1214+
" <th>4</th>\n",
1215+
" <td>1</td>\n",
1216+
" <td>2</td>\n",
1217+
" </tr>\n",
1218+
" <tr>\n",
1219+
" <th>5</th>\n",
1220+
" <td>1</td>\n",
1221+
" <td>2</td>\n",
1222+
" </tr>\n",
1223+
" <tr>\n",
1224+
" <th>6</th>\n",
1225+
" <td>1</td>\n",
1226+
" <td>2</td>\n",
1227+
" </tr>\n",
1228+
" <tr>\n",
1229+
" <th>7</th>\n",
1230+
" <td>1</td>\n",
1231+
" <td>3</td>\n",
1232+
" </tr>\n",
1233+
" <tr>\n",
1234+
" <th>8</th>\n",
1235+
" <td>1</td>\n",
1236+
" <td>2</td>\n",
1237+
" </tr>\n",
1238+
" <tr>\n",
1239+
" <th>9</th>\n",
1240+
" <td>1</td>\n",
1241+
" <td>2</td>\n",
1242+
" </tr>\n",
1243+
" <tr>\n",
1244+
" <th>10</th>\n",
1245+
" <td>1</td>\n",
1246+
" <td>2</td>\n",
1247+
" </tr>\n",
1248+
" <tr>\n",
1249+
" <th>11</th>\n",
1250+
" <td>1</td>\n",
1251+
" <td>3</td>\n",
1252+
" </tr>\n",
1253+
" <tr>\n",
1254+
" <th>12</th>\n",
1255+
" <td>2</td>\n",
1256+
" <td>3</td>\n",
1257+
" </tr>\n",
1258+
" <tr>\n",
1259+
" <th>13</th>\n",
1260+
" <td>2</td>\n",
1261+
" <td>3</td>\n",
1262+
" </tr>\n",
1263+
" </tbody>\n",
1264+
"</table>\n",
1265+
"</div>"
1266+
],
1267+
"text/plain": [
1268+
" left right\n",
1269+
" id id\n",
1270+
"0 1 2\n",
1271+
"1 1 2\n",
1272+
"2 1 2\n",
1273+
"3 1 3\n",
1274+
"4 1 2\n",
1275+
"5 1 2\n",
1276+
"6 1 2\n",
1277+
"7 1 3\n",
1278+
"8 1 2\n",
1279+
"9 1 2\n",
1280+
"10 1 2\n",
1281+
"11 1 3\n",
1282+
"12 2 3\n",
1283+
"13 2 3"
1284+
]
1285+
},
1286+
"execution_count": 11,
1287+
"metadata": {},
1288+
"output_type": "execute_result"
1289+
}
1290+
],
1291+
"source": [
1292+
"df1.conditional_join(\n",
1293+
" df2,\n",
1294+
" ('id', 'id', \"<\"),\n",
1295+
" df_columns = 'id',\n",
1296+
" right_columns = 'id'\n",
1297+
" )"
1298+
]
1299+
},
1300+
{
1301+
"cell_type": "markdown",
1302+
"metadata": {},
1303+
"source": [
1304+
"Column renaming is also possible:"
1305+
]
1306+
},
1307+
{
1308+
"cell_type": "code",
1309+
"execution_count": 12,
1310+
"metadata": {},
1311+
"outputs": [
1312+
{
1313+
"data": {
1314+
"text/html": [
1315+
"<div>\n",
1316+
"<style scoped>\n",
1317+
" .dataframe tbody tr th:only-of-type {\n",
1318+
" vertical-align: middle;\n",
1319+
" }\n",
1320+
"\n",
1321+
" .dataframe tbody tr th {\n",
1322+
" vertical-align: top;\n",
1323+
" }\n",
1324+
"\n",
1325+
" .dataframe thead th {\n",
1326+
" text-align: right;\n",
1327+
" }\n",
1328+
"</style>\n",
1329+
"<table border=\"1\" class=\"dataframe\">\n",
1330+
" <thead>\n",
1331+
" <tr style=\"text-align: right;\">\n",
1332+
" <th></th>\n",
1333+
" <th>df_id</th>\n",
1334+
" <th>right_id</th>\n",
1335+
" </tr>\n",
1336+
" </thead>\n",
1337+
" <tbody>\n",
1338+
" <tr>\n",
1339+
" <th>0</th>\n",
1340+
" <td>1</td>\n",
1341+
" <td>2</td>\n",
1342+
" </tr>\n",
1343+
" <tr>\n",
1344+
" <th>1</th>\n",
1345+
" <td>1</td>\n",
1346+
" <td>2</td>\n",
1347+
" </tr>\n",
1348+
" <tr>\n",
1349+
" <th>2</th>\n",
1350+
" <td>1</td>\n",
1351+
" <td>2</td>\n",
1352+
" </tr>\n",
1353+
" <tr>\n",
1354+
" <th>3</th>\n",
1355+
" <td>1</td>\n",
1356+
" <td>3</td>\n",
1357+
" </tr>\n",
1358+
" <tr>\n",
1359+
" <th>4</th>\n",
1360+
" <td>1</td>\n",
1361+
" <td>2</td>\n",
1362+
" </tr>\n",
1363+
" <tr>\n",
1364+
" <th>5</th>\n",
1365+
" <td>1</td>\n",
1366+
" <td>2</td>\n",
1367+
" </tr>\n",
1368+
" <tr>\n",
1369+
" <th>6</th>\n",
1370+
" <td>1</td>\n",
1371+
" <td>2</td>\n",
1372+
" </tr>\n",
1373+
" <tr>\n",
1374+
" <th>7</th>\n",
1375+
" <td>1</td>\n",
1376+
" <td>3</td>\n",
1377+
" </tr>\n",
1378+
" <tr>\n",
1379+
" <th>8</th>\n",
1380+
" <td>1</td>\n",
1381+
" <td>2</td>\n",
1382+
" </tr>\n",
1383+
" <tr>\n",
1384+
" <th>9</th>\n",
1385+
" <td>1</td>\n",
1386+
" <td>2</td>\n",
1387+
" </tr>\n",
1388+
" <tr>\n",
1389+
" <th>10</th>\n",
1390+
" <td>1</td>\n",
1391+
" <td>2</td>\n",
1392+
" </tr>\n",
1393+
" <tr>\n",
1394+
" <th>11</th>\n",
1395+
" <td>1</td>\n",
1396+
" <td>3</td>\n",
1397+
" </tr>\n",
1398+
" <tr>\n",
1399+
" <th>12</th>\n",
1400+
" <td>2</td>\n",
1401+
" <td>3</td>\n",
1402+
" </tr>\n",
1403+
" <tr>\n",
1404+
" <th>13</th>\n",
1405+
" <td>2</td>\n",
1406+
" <td>3</td>\n",
1407+
" </tr>\n",
1408+
" </tbody>\n",
1409+
"</table>\n",
1410+
"</div>"
1411+
],
1412+
"text/plain": [
1413+
" df_id right_id\n",
1414+
"0 1 2\n",
1415+
"1 1 2\n",
1416+
"2 1 2\n",
1417+
"3 1 3\n",
1418+
"4 1 2\n",
1419+
"5 1 2\n",
1420+
"6 1 2\n",
1421+
"7 1 3\n",
1422+
"8 1 2\n",
1423+
"9 1 2\n",
1424+
"10 1 2\n",
1425+
"11 1 3\n",
1426+
"12 2 3\n",
1427+
"13 2 3"
1428+
]
1429+
},
1430+
"execution_count": 12,
1431+
"metadata": {},
1432+
"output_type": "execute_result"
1433+
}
1434+
],
1435+
"source": [
1436+
"df1.conditional_join(\n",
1437+
" df2,\n",
1438+
" ('id', 'id', \"<\"),\n",
1439+
" df_columns = {'id':'df_id'},\n",
1440+
" right_columns = {'id':'right_id'}\n",
1441+
" )"
1442+
]
11491443
}
11501444
],
11511445
"metadata": {
11521446
"interpreter": {
1153-
"hash": "98b0a9b7b4eaaa670588a142fd0a9b87eaafe866f1db4228be72b4211d12040f"
1447+
"hash": "d4d1e4263499bec80672ea0156c357c1ee493ec2b1c70f0acce89fc37c4a6abe"
11541448
},
11551449
"kernelspec": {
1156-
"display_name": "Python 3.8.10 64-bit ('base': conda)",
1450+
"display_name": "Python 3.9.10 ('base')",
1451+
"language": "python",
11571452
"name": "python3"
11581453
},
11591454
"language_info": {

0 commit comments

Comments
 (0)