1+ #!/usr/bin/env python3
2+ """
3+ Group-by aggregation example - demonstrates the exact use case from Polars issue #15443
4+
5+ This example shows how to aggregate U256 values by groups, which is essential for:
6+ - Blockchain transaction analysis (total spend per account)
7+ - Token balance aggregation (total supply per token)
8+ - DeFi protocol analytics (TVL per pool)
9+ """
10+
11+ import polars as pl
12+ import polars_u256_plugin as u256
13+
14+ def main ():
15+ print ("🔗 Blockchain Transaction Aggregation Example" )
16+ print ("=" * 50 )
17+
18+ # Simulate blockchain transaction data
19+ transactions = pl .DataFrame ({
20+ "account_id" : ["alice" , "bob" , "alice" , "charlie" , "bob" , "alice" , "charlie" ],
21+ "tx_amount_wei" : [
22+ 1_000_000_000_000_000_000 , # 1 ETH in wei
23+ 2_500_000_000_000_000_000 , # 2.5 ETH
24+ 500_000_000_000_000_000 , # 0.5 ETH
25+ 10_000_000_000_000_000_000 , # 10 ETH
26+ 1_200_000_000_000_000_000 , # 1.2 ETH
27+ 750_000_000_000_000_000 , # 0.75 ETH
28+ 3_300_000_000_000_000_000 , # 3.3 ETH
29+ ],
30+ "gas_used" : [21000 , 45000 , 21000 , 85000 , 32000 , 21000 , 52000 ],
31+ "block_number" : [18500000 , 18500001 , 18500002 , 18500003 , 18500004 , 18500005 , 18500006 ]
32+ })
33+
34+ print ("📊 Raw transaction data:" )
35+ print (transactions )
36+
37+ # Aggregate by account - the core use case from GitHub issue #15443
38+ print ("\n 💰 Total spending per account (U256 aggregation):" )
39+ account_totals = transactions .group_by ("account_id" ).agg ([
40+ u256 .sum (u256 .from_int (pl .col ("tx_amount_wei" ))).alias ("total_wei" ),
41+ pl .col ("gas_used" ).sum ().alias ("total_gas" ),
42+ pl .len ().alias ("tx_count" ),
43+ pl .col ("block_number" ).min ().alias ("first_block" ),
44+ pl .col ("block_number" ).max ().alias ("last_block" )
45+ ]).with_columns (
46+ # Add human-readable ETH amounts
47+ (u256 .to_int (pl .col ("total_wei" )) / 1_000_000_000_000_000_000 ).alias ("total_eth" ),
48+ u256 .to_hex (pl .col ("total_wei" )).alias ("total_wei_hex" )
49+ ).sort ("total_eth" , descending = True )
50+
51+ print (account_totals .select (["account_id" , "total_eth" , "tx_count" , "total_gas" ]))
52+
53+ # Show hex representation for verification
54+ print ("\n 🔍 Detailed breakdown with hex values:" )
55+ for row in account_totals .iter_rows (named = True ):
56+ print (f"{ row ['account_id' ]:8s} : { row ['total_eth' ]:6.2f} ETH "
57+ f"({ row ['tx_count' ]} txs, { row ['total_gas' ]:,} gas)" )
58+ print (f" Wei: { row ['total_wei_hex' ]} " )
59+
60+ # Demonstrate large number handling with hex input
61+ print (f"\n 🚀 Handling very large numbers (from hex):" )
62+ large_amounts = pl .DataFrame ({
63+ "protocol" : ["uniswap" , "compound" , "aave" ],
64+ "tvl_hex" : [
65+ hex (2 ** 200 ), # Astronomically large TVL as hex
66+ hex (2 ** 180 ),
67+ hex (2 ** 190 )
68+ ]
69+ })
70+
71+ protocol_total = large_amounts .select (
72+ u256 .sum (u256 .from_hex (pl .col ("tvl_hex" ))).alias ("total_tvl" )
73+ ).with_columns (
74+ u256 .to_hex (pl .col ("total_tvl" )).alias ("total_tvl_hex" )
75+ )
76+
77+ print ("Total Protocol TVL (impossible with standard int types):" )
78+ print (f"Hex: { protocol_total [0 , 'total_tvl_hex' ]} " )
79+
80+ print ("\n ✅ All operations completed successfully!" )
81+ print ("🎯 This demonstrates the exact group_by().agg(u256.sum()) use case" )
82+ print (" mentioned in Polars issue #15443" )
83+
84+ if __name__ == "__main__" :
85+ main ()
0 commit comments