|
270 | 270 | " <th>0</th>\n", |
271 | 271 | " <td>1</td>\n", |
272 | 272 | " <td>2001-01-02 00:00:00</td>\n", |
273 | | - " <td>A</td>\n", |
| 273 | + " <td>['A']</td>\n", |
274 | 274 | " <td>1</td>\n", |
275 | 275 | " </tr>\n", |
276 | 276 | " <tr>\n", |
277 | 277 | " <th>1</th>\n", |
278 | 278 | " <td>1</td>\n", |
279 | 279 | " <td>2015-04-25 00:00:00</td>\n", |
280 | | - " <td>B</td>\n", |
| 280 | + " <td>['B']</td>\n", |
281 | 281 | " <td>2</td>\n", |
282 | 282 | " </tr>\n", |
283 | 283 | " <tr>\n", |
284 | 284 | " <th>2</th>\n", |
285 | 285 | " <td>2</td>\n", |
286 | 286 | " <td>2000-04-01 00:00:00</td>\n", |
287 | | - " <td>A</td>\n", |
| 287 | + " <td>['A']</td>\n", |
288 | 288 | " <td>1</td>\n", |
289 | 289 | " </tr>\n", |
290 | 290 | " <tr>\n", |
291 | 291 | " <th>3</th>\n", |
292 | 292 | " <td>3</td>\n", |
293 | 293 | " <td>2014-04-07 00:00:00</td>\n", |
294 | | - " <td>D</td>\n", |
| 294 | + " <td>['D']</td>\n", |
295 | 295 | " <td>1</td>\n", |
296 | 296 | " </tr>\n", |
297 | 297 | " <tr>\n", |
298 | 298 | " <th>4</th>\n", |
299 | 299 | " <td>4</td>\n", |
300 | 300 | " <td>2005-06-16 00:00:00</td>\n", |
301 | | - " <td>A</td>\n", |
| 301 | + " <td>['A']</td>\n", |
302 | 302 | " <td>1</td>\n", |
303 | 303 | " </tr>\n", |
304 | 304 | " <tr>\n", |
305 | 305 | " <th>5</th>\n", |
306 | 306 | " <td>4</td>\n", |
307 | 307 | " <td>2009-01-20 00:00:00</td>\n", |
308 | | - " <td>B, D</td>\n", |
| 308 | + " <td>['B', 'D']</td>\n", |
309 | 309 | " <td>2</td>\n", |
310 | 310 | " </tr>\n", |
311 | 311 | " <tr>\n", |
312 | 312 | " <th>6</th>\n", |
313 | 313 | " <td>4</td>\n", |
314 | 314 | " <td>2012-12-01 00:00:00</td>\n", |
315 | | - " <td>C</td>\n", |
| 315 | + " <td>['C']</td>\n", |
316 | 316 | " <td>3</td>\n", |
317 | 317 | " </tr>\n", |
318 | 318 | " <tr>\n", |
319 | 319 | " <th>7</th>\n", |
320 | 320 | " <td>5</td>\n", |
321 | 321 | " <td>2003-11-09 00:00:00</td>\n", |
322 | | - " <td>B</td>\n", |
| 322 | + " <td>['B']</td>\n", |
323 | 323 | " <td>1</td>\n", |
324 | 324 | " </tr>\n", |
325 | 325 | " <tr>\n", |
326 | 326 | " <th>8</th>\n", |
327 | 327 | " <td>5</td>\n", |
328 | 328 | " <td>2010-10-10 00:00:00</td>\n", |
329 | | - " <td>A</td>\n", |
| 329 | + " <td>['A']</td>\n", |
330 | 330 | " <td>2</td>\n", |
331 | 331 | " </tr>\n", |
332 | 332 | " <tr>\n", |
333 | 333 | " <th>9</th>\n", |
334 | 334 | " <td>6</td>\n", |
335 | 335 | " <td>2004-01-09 00:00:00</td>\n", |
336 | | - " <td>B</td>\n", |
| 336 | + " <td>['B']</td>\n", |
337 | 337 | " <td>1</td>\n", |
338 | 338 | " </tr>\n", |
339 | 339 | " </tbody>\n", |
340 | 340 | "</table>\n", |
341 | 341 | "</div>" |
342 | 342 | ], |
343 | 343 | "text/plain": [ |
344 | | - " ID DATE OP rank\n", |
345 | | - "0 1 2001-01-02 00:00:00 A 1\n", |
346 | | - "1 1 2015-04-25 00:00:00 B 2\n", |
347 | | - "2 2 2000-04-01 00:00:00 A 1\n", |
348 | | - "3 3 2014-04-07 00:00:00 D 1\n", |
349 | | - "4 4 2005-06-16 00:00:00 A 1\n", |
350 | | - "5 4 2009-01-20 00:00:00 B, D 2\n", |
351 | | - "6 4 2012-12-01 00:00:00 C 3\n", |
352 | | - "7 5 2003-11-09 00:00:00 B 1\n", |
353 | | - "8 5 2010-10-10 00:00:00 A 2\n", |
354 | | - "9 6 2004-01-09 00:00:00 B 1" |
| 344 | + " ID DATE OP rank\n", |
| 345 | + "0 1 2001-01-02 00:00:00 ['A'] 1\n", |
| 346 | + "1 1 2015-04-25 00:00:00 ['B'] 2\n", |
| 347 | + "2 2 2000-04-01 00:00:00 ['A'] 1\n", |
| 348 | + "3 3 2014-04-07 00:00:00 ['D'] 1\n", |
| 349 | + "4 4 2005-06-16 00:00:00 ['A'] 1\n", |
| 350 | + "5 4 2009-01-20 00:00:00 ['B', 'D'] 2\n", |
| 351 | + "6 4 2012-12-01 00:00:00 ['C'] 3\n", |
| 352 | + "7 5 2003-11-09 00:00:00 ['B'] 1\n", |
| 353 | + "8 5 2010-10-10 00:00:00 ['A'] 2\n", |
| 354 | + "9 6 2004-01-09 00:00:00 ['B'] 1" |
355 | 355 | ] |
356 | 356 | }, |
357 | 357 | "execution_count": 3, |
|
361 | 361 | ], |
362 | 362 | "source": [ |
363 | 363 | "# define a user aggregation function\n", |
364 | | - "# a function to paste a vector of strings together\n", |
| 364 | + "\n", |
| 365 | + "# # a function to paste a vector of strings together\n", |
| 366 | + "# def sorted_concat(vals):\n", |
| 367 | + "# return ', '.join(sorted([str(vi) for vi in set(vals)]))\n", |
| 368 | + "\n", |
| 369 | + "# just a class that doesn't declare an iterable interface\n", |
| 370 | + "# so Pandas thinks of these values as scalars.\n", |
| 371 | + "class Container:\n", |
| 372 | + " def __init__(self, value):\n", |
| 373 | + " self.value = value\n", |
| 374 | + " \n", |
| 375 | + " def __repr__(self):\n", |
| 376 | + " return self.value.__repr__()\n", |
| 377 | + " \n", |
| 378 | + " def __str__(self):\n", |
| 379 | + " return self.value.__repr__()\n", |
| 380 | + "\n", |
| 381 | + "\n", |
365 | 382 | "def sorted_concat(vals):\n", |
366 | | - " return ', '.join(sorted([str(vi) for vi in set(vals)]))\n", |
| 383 | + " return Container(sorted([str(vi) for vi in set(vals)]))\n", |
367 | 384 | "\n", |
368 | 385 | "# merge the operations to get one row per ID and DATE\n", |
369 | 386 | "# then rank the rows for each ID by DATE\n", |
|
705 | 722 | " <th>0</th>\n", |
706 | 723 | " <td>1</td>\n", |
707 | 724 | " <td>2001-01-02 00:00:00</td>\n", |
708 | | - " <td>A</td>\n", |
| 725 | + " <td>['A']</td>\n", |
709 | 726 | " <td>2015-04-25 00:00:00</td>\n", |
710 | | - " <td>B</td>\n", |
| 727 | + " <td>['B']</td>\n", |
711 | 728 | " <td>NaN</td>\n", |
712 | 729 | " <td>NaN</td>\n", |
713 | 730 | " </tr>\n", |
714 | 731 | " <tr>\n", |
715 | 732 | " <th>1</th>\n", |
716 | 733 | " <td>2</td>\n", |
717 | 734 | " <td>2000-04-01 00:00:00</td>\n", |
718 | | - " <td>A</td>\n", |
| 735 | + " <td>['A']</td>\n", |
719 | 736 | " <td>NaN</td>\n", |
720 | 737 | " <td>NaN</td>\n", |
721 | 738 | " <td>NaN</td>\n", |
|
725 | 742 | " <th>2</th>\n", |
726 | 743 | " <td>3</td>\n", |
727 | 744 | " <td>2014-04-07 00:00:00</td>\n", |
728 | | - " <td>D</td>\n", |
| 745 | + " <td>['D']</td>\n", |
729 | 746 | " <td>NaN</td>\n", |
730 | 747 | " <td>NaN</td>\n", |
731 | 748 | " <td>NaN</td>\n", |
|
735 | 752 | " <th>3</th>\n", |
736 | 753 | " <td>4</td>\n", |
737 | 754 | " <td>2005-06-16 00:00:00</td>\n", |
738 | | - " <td>A</td>\n", |
| 755 | + " <td>['A']</td>\n", |
739 | 756 | " <td>2009-01-20 00:00:00</td>\n", |
740 | | - " <td>B, D</td>\n", |
| 757 | + " <td>['B', 'D']</td>\n", |
741 | 758 | " <td>2012-12-01 00:00:00</td>\n", |
742 | | - " <td>C</td>\n", |
| 759 | + " <td>['C']</td>\n", |
743 | 760 | " </tr>\n", |
744 | 761 | " <tr>\n", |
745 | 762 | " <th>4</th>\n", |
746 | 763 | " <td>5</td>\n", |
747 | 764 | " <td>2003-11-09 00:00:00</td>\n", |
748 | | - " <td>B</td>\n", |
| 765 | + " <td>['B']</td>\n", |
749 | 766 | " <td>2010-10-10 00:00:00</td>\n", |
750 | | - " <td>A</td>\n", |
| 767 | + " <td>['A']</td>\n", |
751 | 768 | " <td>NaN</td>\n", |
752 | 769 | " <td>NaN</td>\n", |
753 | 770 | " </tr>\n", |
754 | 771 | " <tr>\n", |
755 | 772 | " <th>5</th>\n", |
756 | 773 | " <td>6</td>\n", |
757 | 774 | " <td>2004-01-09 00:00:00</td>\n", |
758 | | - " <td>B</td>\n", |
| 775 | + " <td>['B']</td>\n", |
759 | 776 | " <td>NaN</td>\n", |
760 | 777 | " <td>NaN</td>\n", |
761 | 778 | " <td>NaN</td>\n", |
|
766 | 783 | "</div>" |
767 | 784 | ], |
768 | 785 | "text/plain": [ |
769 | | - " ID DATE1 OP1 DATE2 OP2 \\\n", |
770 | | - "0 1 2001-01-02 00:00:00 A 2015-04-25 00:00:00 B \n", |
771 | | - "1 2 2000-04-01 00:00:00 A NaN NaN \n", |
772 | | - "2 3 2014-04-07 00:00:00 D NaN NaN \n", |
773 | | - "3 4 2005-06-16 00:00:00 A 2009-01-20 00:00:00 B, D \n", |
774 | | - "4 5 2003-11-09 00:00:00 B 2010-10-10 00:00:00 A \n", |
775 | | - "5 6 2004-01-09 00:00:00 B NaN NaN \n", |
| 786 | + " ID DATE1 OP1 DATE2 OP2 \\\n", |
| 787 | + "0 1 2001-01-02 00:00:00 ['A'] 2015-04-25 00:00:00 ['B'] \n", |
| 788 | + "1 2 2000-04-01 00:00:00 ['A'] NaN NaN \n", |
| 789 | + "2 3 2014-04-07 00:00:00 ['D'] NaN NaN \n", |
| 790 | + "3 4 2005-06-16 00:00:00 ['A'] 2009-01-20 00:00:00 ['B', 'D'] \n", |
| 791 | + "4 5 2003-11-09 00:00:00 ['B'] 2010-10-10 00:00:00 ['A'] \n", |
| 792 | + "5 6 2004-01-09 00:00:00 ['B'] NaN NaN \n", |
776 | 793 | "\n", |
777 | | - " DATE3 OP3 \n", |
778 | | - "0 NaN NaN \n", |
779 | | - "1 NaN NaN \n", |
780 | | - "2 NaN NaN \n", |
781 | | - "3 2012-12-01 00:00:00 C \n", |
782 | | - "4 NaN NaN \n", |
783 | | - "5 NaN NaN " |
| 794 | + " DATE3 OP3 \n", |
| 795 | + "0 NaN NaN \n", |
| 796 | + "1 NaN NaN \n", |
| 797 | + "2 NaN NaN \n", |
| 798 | + "3 2012-12-01 00:00:00 ['C'] \n", |
| 799 | + "4 NaN NaN \n", |
| 800 | + "5 NaN NaN " |
784 | 801 | ] |
785 | 802 | }, |
786 | 803 | "execution_count": 8, |
|
850 | 867 | " <th>0</th>\n", |
851 | 868 | " <td>1</td>\n", |
852 | 869 | " <td>2001-01-02 00:00:00</td>\n", |
853 | | - " <td>A</td>\n", |
| 870 | + " <td>['A']</td>\n", |
854 | 871 | " <td>2015-04-25 00:00:00</td>\n", |
855 | | - " <td>B</td>\n", |
| 872 | + " <td>['B']</td>\n", |
856 | 873 | " <td>NaN</td>\n", |
857 | 874 | " <td>NaN</td>\n", |
858 | 875 | " </tr>\n", |
859 | 876 | " <tr>\n", |
860 | 877 | " <th>1</th>\n", |
861 | 878 | " <td>2</td>\n", |
862 | 879 | " <td>2000-04-01 00:00:00</td>\n", |
863 | | - " <td>A</td>\n", |
| 880 | + " <td>['A']</td>\n", |
864 | 881 | " <td>NaN</td>\n", |
865 | 882 | " <td>NaN</td>\n", |
866 | 883 | " <td>NaN</td>\n", |
|
870 | 887 | " <th>2</th>\n", |
871 | 888 | " <td>3</td>\n", |
872 | 889 | " <td>2014-04-07 00:00:00</td>\n", |
873 | | - " <td>D</td>\n", |
| 890 | + " <td>['D']</td>\n", |
874 | 891 | " <td>NaN</td>\n", |
875 | 892 | " <td>NaN</td>\n", |
876 | 893 | " <td>NaN</td>\n", |
|
880 | 897 | " <th>3</th>\n", |
881 | 898 | " <td>4</td>\n", |
882 | 899 | " <td>2005-06-16 00:00:00</td>\n", |
883 | | - " <td>A</td>\n", |
| 900 | + " <td>['A']</td>\n", |
884 | 901 | " <td>2009-01-20 00:00:00</td>\n", |
885 | | - " <td>B, D</td>\n", |
| 902 | + " <td>['B', 'D']</td>\n", |
886 | 903 | " <td>2012-12-01 00:00:00</td>\n", |
887 | | - " <td>C</td>\n", |
| 904 | + " <td>['C']</td>\n", |
888 | 905 | " </tr>\n", |
889 | 906 | " <tr>\n", |
890 | 907 | " <th>4</th>\n", |
891 | 908 | " <td>5</td>\n", |
892 | 909 | " <td>2003-11-09 00:00:00</td>\n", |
893 | | - " <td>B</td>\n", |
| 910 | + " <td>['B']</td>\n", |
894 | 911 | " <td>2010-10-10 00:00:00</td>\n", |
895 | | - " <td>A</td>\n", |
| 912 | + " <td>['A']</td>\n", |
896 | 913 | " <td>NaN</td>\n", |
897 | 914 | " <td>NaN</td>\n", |
898 | 915 | " </tr>\n", |
899 | 916 | " <tr>\n", |
900 | 917 | " <th>5</th>\n", |
901 | 918 | " <td>6</td>\n", |
902 | 919 | " <td>2004-01-09 00:00:00</td>\n", |
903 | | - " <td>B</td>\n", |
| 920 | + " <td>['B']</td>\n", |
904 | 921 | " <td>NaN</td>\n", |
905 | 922 | " <td>NaN</td>\n", |
906 | 923 | " <td>NaN</td>\n", |
|
911 | 928 | "</div>" |
912 | 929 | ], |
913 | 930 | "text/plain": [ |
914 | | - " ID DATE1 OP1 DATE2 OP2 \\\n", |
915 | | - "0 1 2001-01-02 00:00:00 A 2015-04-25 00:00:00 B \n", |
916 | | - "1 2 2000-04-01 00:00:00 A NaN NaN \n", |
917 | | - "2 3 2014-04-07 00:00:00 D NaN NaN \n", |
918 | | - "3 4 2005-06-16 00:00:00 A 2009-01-20 00:00:00 B, D \n", |
919 | | - "4 5 2003-11-09 00:00:00 B 2010-10-10 00:00:00 A \n", |
920 | | - "5 6 2004-01-09 00:00:00 B NaN NaN \n", |
| 931 | + " ID DATE1 OP1 DATE2 OP2 \\\n", |
| 932 | + "0 1 2001-01-02 00:00:00 ['A'] 2015-04-25 00:00:00 ['B'] \n", |
| 933 | + "1 2 2000-04-01 00:00:00 ['A'] NaN NaN \n", |
| 934 | + "2 3 2014-04-07 00:00:00 ['D'] NaN NaN \n", |
| 935 | + "3 4 2005-06-16 00:00:00 ['A'] 2009-01-20 00:00:00 ['B', 'D'] \n", |
| 936 | + "4 5 2003-11-09 00:00:00 ['B'] 2010-10-10 00:00:00 ['A'] \n", |
| 937 | + "5 6 2004-01-09 00:00:00 ['B'] NaN NaN \n", |
921 | 938 | "\n", |
922 | | - " DATE3 OP3 \n", |
923 | | - "0 NaN NaN \n", |
924 | | - "1 NaN NaN \n", |
925 | | - "2 NaN NaN \n", |
926 | | - "3 2012-12-01 00:00:00 C \n", |
927 | | - "4 NaN NaN \n", |
928 | | - "5 NaN NaN " |
| 939 | + " DATE3 OP3 \n", |
| 940 | + "0 NaN NaN \n", |
| 941 | + "1 NaN NaN \n", |
| 942 | + "2 NaN NaN \n", |
| 943 | + "3 2012-12-01 00:00:00 ['C'] \n", |
| 944 | + "4 NaN NaN \n", |
| 945 | + "5 NaN NaN " |
929 | 946 | ] |
930 | 947 | }, |
931 | 948 | "execution_count": 9, |
|
0 commit comments