Skip to content

Commit 833e110

Browse files
committed
docs: add comprehensive group_by aggregation examples
- Document group_by().agg(u256.sum()) use case from Polars issue #15443 - Add practical blockchain transaction aggregation examples - Show mixed aggregations with regular Polars operations - Include working example with large number handling This addresses the core DeFi/blockchain analytics use cases discussed in the GitHub issue.
1 parent 9565b63 commit 833e110

File tree

2 files changed

+109
-0
lines changed

2 files changed

+109
-0
lines changed

README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,30 @@ u256.to_hex(col) # → hex strings (0x...)
127127
u256.to_int(col) # → Python int (if fits in i64)
128128
```
129129

130+
### Group-By Aggregations
131+
```python
132+
df.group_by("account_id").agg(
133+
u256.sum(u256.from_int(pl.col("tx_amount"))).alias("total_spent")
134+
)
135+
136+
# Multiple aggregations
137+
df.group_by("token_address").agg([
138+
u256.sum(u256.from_int(pl.col("balance"))).alias("total_supply"),
139+
pl.len().alias("holder_count"),
140+
pl.col("last_updated").max()
141+
])
142+
143+
# Mixed with regular Polars aggregations
144+
df.group_by("wallet").agg([
145+
u256.sum(u256.from_int(pl.col("wei_balance"))).alias("total_wei"),
146+
pl.col("gas_used").sum(),
147+
pl.col("block_number").max()
148+
]).with_columns(
149+
# Convert to readable hex for display
150+
u256.to_hex(pl.col("total_wei")).alias("total_wei_hex")
151+
)
152+
```
153+
130154
### Display Utilities
131155
```python
132156
u256.format_u256_dataframe(df, cols) # Format u256 columns as hex

examples/groupby_example.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Group-by aggregation example - demonstrates the exact use case from Polars issue #15443
4+
5+
This example shows how to aggregate U256 values by groups, which is essential for:
6+
- Blockchain transaction analysis (total spend per account)
7+
- Token balance aggregation (total supply per token)
8+
- DeFi protocol analytics (TVL per pool)
9+
"""
10+
11+
import polars as pl
12+
import polars_u256_plugin as u256
13+
14+
def main():
15+
print("🔗 Blockchain Transaction Aggregation Example")
16+
print("=" * 50)
17+
18+
# Simulate blockchain transaction data
19+
transactions = pl.DataFrame({
20+
"account_id": ["alice", "bob", "alice", "charlie", "bob", "alice", "charlie"],
21+
"tx_amount_wei": [
22+
1_000_000_000_000_000_000, # 1 ETH in wei
23+
2_500_000_000_000_000_000, # 2.5 ETH
24+
500_000_000_000_000_000, # 0.5 ETH
25+
10_000_000_000_000_000_000, # 10 ETH
26+
1_200_000_000_000_000_000, # 1.2 ETH
27+
750_000_000_000_000_000, # 0.75 ETH
28+
3_300_000_000_000_000_000, # 3.3 ETH
29+
],
30+
"gas_used": [21000, 45000, 21000, 85000, 32000, 21000, 52000],
31+
"block_number": [18500000, 18500001, 18500002, 18500003, 18500004, 18500005, 18500006]
32+
})
33+
34+
print("📊 Raw transaction data:")
35+
print(transactions)
36+
37+
# Aggregate by account - the core use case from GitHub issue #15443
38+
print("\n💰 Total spending per account (U256 aggregation):")
39+
account_totals = transactions.group_by("account_id").agg([
40+
u256.sum(u256.from_int(pl.col("tx_amount_wei"))).alias("total_wei"),
41+
pl.col("gas_used").sum().alias("total_gas"),
42+
pl.len().alias("tx_count"),
43+
pl.col("block_number").min().alias("first_block"),
44+
pl.col("block_number").max().alias("last_block")
45+
]).with_columns(
46+
# Add human-readable ETH amounts
47+
(u256.to_int(pl.col("total_wei")) / 1_000_000_000_000_000_000).alias("total_eth"),
48+
u256.to_hex(pl.col("total_wei")).alias("total_wei_hex")
49+
).sort("total_eth", descending=True)
50+
51+
print(account_totals.select(["account_id", "total_eth", "tx_count", "total_gas"]))
52+
53+
# Show hex representation for verification
54+
print("\n🔍 Detailed breakdown with hex values:")
55+
for row in account_totals.iter_rows(named=True):
56+
print(f"{row['account_id']:8s}: {row['total_eth']:6.2f} ETH "
57+
f"({row['tx_count']} txs, {row['total_gas']:,} gas)")
58+
print(f" Wei: {row['total_wei_hex']}")
59+
60+
# Demonstrate large number handling with hex input
61+
print(f"\n🚀 Handling very large numbers (from hex):")
62+
large_amounts = pl.DataFrame({
63+
"protocol": ["uniswap", "compound", "aave"],
64+
"tvl_hex": [
65+
hex(2**200), # Astronomically large TVL as hex
66+
hex(2**180),
67+
hex(2**190)
68+
]
69+
})
70+
71+
protocol_total = large_amounts.select(
72+
u256.sum(u256.from_hex(pl.col("tvl_hex"))).alias("total_tvl")
73+
).with_columns(
74+
u256.to_hex(pl.col("total_tvl")).alias("total_tvl_hex")
75+
)
76+
77+
print("Total Protocol TVL (impossible with standard int types):")
78+
print(f"Hex: {protocol_total[0, 'total_tvl_hex']}")
79+
80+
print("\n✅ All operations completed successfully!")
81+
print("🎯 This demonstrates the exact group_by().agg(u256.sum()) use case")
82+
print(" mentioned in Polars issue #15443")
83+
84+
if __name__ == "__main__":
85+
main()

0 commit comments

Comments
 (0)