11import pandas as pd
2+ from pandas .errors import ParserError
23import re
34
5+ class BaseDf :
6+ def __init__ (self , df ):
7+ self .df = df
8+
9+ def prepare (self ):
10+ self .verify_column_count ()
11+ self .prepare_date_column ()
12+ self .prepare_value_column ()
13+
14+ def verify_column_count (self ):
15+ if not (3 <= self .df .shape [1 ] <= 6 ):
16+ raise BaseDfException ("number of columns must be between 3 and 6" )
17+
18+ def prepare_date_column (self ):
19+ original_name = self .df .columns [- 1 ]
20+ try :
21+ self .df [original_name ] = pd .to_datetime (self .df [original_name ], format = "ISO8601" )
22+ except (ValueError , ParserError ):
23+ raise BaseDfException ("last column must be a date column" )
24+ self .df .rename (columns = {original_name : "date" })
25+
26+ def prepare_value_column (self ):
27+ original_name = self .df .columns [- 2 ]
28+ try :
29+ self .df [original_name ] = self .df [original_name ].astype ("float" )
30+ except ValueError :
31+ raise BaseDfException ("second to last column must be a quantity column" )
32+ self .df .rename (columns = {original_name : "value" })
33+
34+
35+ class BaseDfException (Exception ):
36+ def __init__ (self , message ):
37+ self .message = message
38+ return super ().__init__ (message )
39+
40+
441def process_bar_chart_race (df ):
542 """
643 Process data for bar chart race visualization.
@@ -9,19 +46,11 @@ def process_bar_chart_race(df):
946 :return: Processed data suitable for bar chart race or error message
1047 """
1148
12- # RULES
13-
14- # 1. Check if the number of columns is between 3 and 6
15- if not (3 <= df .shape [1 ] <= 6 ):
16- return {"failed" : "data failed bar chart race rule - number of columns must be between 3 and 6" }
17-
18- # 2. Check if the last column is a date column
19- if not df .iloc [:, - 1 ].apply (is_datetime_string ).any ():
20- return {"failed" : "data failed bar chart race rule - last column must be a date column" }
21-
22- # 3. Check if the second to last column is a number-string column
23- if not df .iloc [:, - 2 ].apply (lambda x : isinstance (x , str ) and x .replace ('.' , '' , 1 ).isdigit ()).any ():
24- return {"failed" : "data failed bar chart race rule - second to last column must be a quantity column" }
49+ bdf = BaseDf (df )
50+ try :
51+ bdf .prepare ()
52+ except BaseDfException as e :
53+ return {"failed" : e .message }
2554
2655 # 4. Identify columns with word identifiers
2756 identifier_columns = identify_word_identifier_columns (df .iloc [:, :- 2 ])
0 commit comments