@@ -1461,16 +1461,33 @@ Looking up values by index/column labels
14611461
14621462Sometimes you want to extract a set of values given a sequence of row labels
14631463and column labels, this can be achieved by ``pandas.factorize `` and NumPy indexing.
1464- For instance:
14651464
1466- .. ipython :: python
1465+ For heterogeneous column types, we subset columns to avoid unnecessary NumPy conversions:
1466+
1467+ .. code-block :: python
1468+
1469+ def pd_lookup_het (df , row_labels , col_labels ):
1470+ rows = df.index.get_indexer(row_labels)
1471+ cols = df.columns.get_indexer(col_labels)
1472+ sub = df.take(np.unique(cols), axis = 1 )
1473+ sub = sub.take(np.unique(rows), axis = 0 )
1474+ rows = sub.index.get_indexer(row_labels)
1475+ values = sub.melt()[" value" ]
1476+ cols = sub.columns.get_indexer(col_labels)
1477+ flat_index = rows + cols * len (sub)
1478+ result = values[flat_index]
1479+ return result
1480+
1481+ For homogeneous column types, it is fastest to skip column subsetting and go directly to NumPy:
1482+
1483+ .. code-block :: python
14671484
1468- df = pd.DataFrame({ ' col ' : [ " A " , " A " , " B " , " B " ],
1469- ' A ' : [ 80 , 23 , np.nan, 22 ],
1470- ' B ' : [ 80 , 55 , 76 , 67 ]})
1471- df
1472- idx, cols = pd.factorize(df[ ' col ' ])
1473- df.reindex(cols, axis = 1 ).to_numpy()[np.arange( len (df)), idx]
1485+ def pd_lookup_hom ( df , row_labels , col_labels ):
1486+ rows = df.index.get_indexer(row_labels)
1487+ df = df.loc[:, sorted ( set (col_labels))]
1488+ cols = df.columns.get_indexer(col_labels)
1489+ result = df.to_numpy()[rows, cols]
1490+ return result
14741491
14751492 Formerly this could be achieved with the dedicated ``DataFrame.lookup `` method
14761493which was deprecated in version 1.2.0 and removed in version 2.0.0.
0 commit comments