1+ from typing import Dict , Any
2+ import pandas as pd
3+ import numpy as np
4+ from dataclasses import dataclass
5+
6+
7+ @dataclass
8+ class FeeStatistics :
9+ """Statistics for fee analysis."""
10+ mean : float
11+ median : float
12+ std : float
13+ min : float
14+ max : float
15+ percentiles : Dict [str , float ]
16+
17+
18+ def calculate_fee_statistics (df : pd .DataFrame ) -> Dict [str , FeeStatistics ]:
19+ """Calculate comprehensive fee analysis."""
20+ stats = {}
21+
22+ # Relative fee percentage analysis
23+ stats ['relative_percentage' ] = FeeStatistics (
24+ mean = df ['relative_fees_percentage_mean' ].mean (),
25+ median = df ['relative_fees_percentage_median' ].mean (),
26+ std = df ['relative_fees_percentage_mean' ].std (),
27+ min = df ['relative_fees_percentage_mean' ].min (),
28+ max = df ['relative_fees_percentage_mean' ].max (),
29+ percentiles = {
30+ '25' : df ['relative_fees_percentage_mean' ].quantile (0.25 ),
31+ '75' : df ['relative_fees_percentage_mean' ].quantile (0.75 ),
32+ '95' : df ['relative_fees_percentage_mean' ].quantile (0.95 )
33+ }
34+ )
35+
36+ # Absolute fee analysis
37+ stats ['absolute_satoshis' ] = FeeStatistics (
38+ mean = df ['absolute_fees_satoshis_mean' ].mean (),
39+ median = df ['absolute_fees_satoshis_median' ].mean (),
40+ std = df ['absolute_fees_satoshis_mean' ].std (),
41+ min = df ['absolute_fees_satoshis_mean' ].min (),
42+ max = df ['absolute_fees_satoshis_mean' ].max (),
43+ percentiles = {
44+ '25' : df ['absolute_fees_satoshis_mean' ].quantile (0.25 ),
45+ '75' : df ['absolute_fees_satoshis_mean' ].quantile (0.75 ),
46+ '95' : df ['absolute_fees_satoshis_mean' ].quantile (0.95 )
47+ }
48+ )
49+
50+ return stats
51+
52+
53+ def calculate_time_based_statistics (df : pd .DataFrame , freq : str = 'D' ) -> pd .DataFrame :
54+ """Calculate analysis over different time periods."""
55+ return df .groupby (pd .Grouper (freq = freq )).agg ({
56+ 'relative_fees_percentage_mean' : ['mean' , 'std' , 'count' ],
57+ 'absolute_fees_satoshis_mean' : ['mean' , 'std' , 'count' ],
58+ 'total_liquidity' : ['mean' , 'std' , 'min' , 'max' ],
59+ 'total_unique_makers' : ['mean' , 'min' , 'max' ]
60+ })
61+
62+
63+ # aggregations.py
64+ def compute_fee_ratios (df : pd .DataFrame , window_size : int = 1000 ) -> pd .DataFrame :
65+ """Compute smoothed fee type ratios."""
66+ df_smooth = pd .DataFrame (index = df .index )
67+
68+ # Calculate fee ratios
69+ df_smooth ['relative_ratio' ] = (
70+ df ['relative_fees_count' ] /
71+ (df ['relative_fees_count' ] + df ['absolute_fees_count' ])
72+ )
73+
74+ df_smooth ['absolute_ratio' ] = (
75+ df ['absolute_fees_count' ] /
76+ (df ['relative_fees_count' ] + df ['absolute_fees_count' ])
77+ )
78+
79+ # Add smoothed versions
80+ for col in ['relative_ratio' , 'absolute_ratio' ]:
81+ df_smooth [f'{ col } _smooth' ] = df_smooth [col ].rolling (
82+ window = window_size , center = True ).mean ()
83+
84+ return df_smooth
85+
86+
87+ def compute_volume_metrics (df : pd .DataFrame , window_size : int = 1000 ) -> pd .DataFrame :
88+ """Compute volume-related metrics."""
89+ df_vol = pd .DataFrame (index = df .index )
90+
91+ # Calculate total volume
92+ df_vol ['total_volume' ] = (
93+ df ['relative_fees_count' ] + df ['absolute_fees_count' ]
94+ )
95+
96+ # Calculate market share
97+ df_vol ['relative_share' ] = df ['relative_fees_count' ] / df_vol ['total_volume' ]
98+ df_vol ['absolute_share' ] = df ['absolute_fees_count' ] / df_vol ['total_volume' ]
99+
100+ # Add smoothed versions
101+ metrics = ['total_volume' , 'relative_share' , 'absolute_share' ]
102+ for metric in metrics :
103+ df_vol [f'{ metric } _smooth' ] = df_vol [metric ].rolling (
104+ window = window_size , center = True ).mean ()
105+
106+ return df_vol
107+
108+
109+ # metrics.py
110+ def calculate_liquidity_metrics (df : pd .DataFrame ) -> Dict [str , float ]:
111+ """Calculate liquidity-related metrics."""
112+ return {
113+ 'avg_liquidity' : df ['total_liquidity' ].mean (),
114+ 'liquidity_per_maker' : (
115+ df ['total_liquidity' ] / df ['total_unique_makers' ]
116+ ).mean (),
117+ 'liquidity_volatility' : df ['total_liquidity' ].std () / df ['total_liquidity' ].mean (),
118+ }
119+
120+
121+ def calculate_market_health_metrics (df : pd .DataFrame ) -> Dict [str , float ]:
122+ """Calculate market health indicators."""
123+ return {
124+ 'maker_stability' : (
125+ df ['total_unique_makers' ].rolling (window = 1000 ).std () /
126+ df ['total_unique_makers' ].rolling (window = 1000 ).mean ()
127+ ).mean (),
128+ 'fee_stability' : (
129+ df ['relative_fees_percentage_mean' ].rolling (window = 1000 ).std () /
130+ df ['relative_fees_percentage_mean' ].rolling (window = 1000 ).mean ()
131+ ).mean (),
132+ 'market_depth' : (
133+ df ['total_liquidity' ] * df ['total_unique_makers' ]
134+ ).mean (),
135+ }
0 commit comments