@@ -120,6 +120,27 @@ def table_to_pandas(table: Table):
120120 return res
121121
122122
123+ def _validate_dataframe (df : pd .DataFrame ) -> None :
124+ for pseudocolumn in api .PANDAS_PSEUDOCOLUMNS :
125+ if pseudocolumn in df .columns :
126+ if not pd .api .types .is_integer_dtype (df [pseudocolumn ].dtype ):
127+ raise ValueError (f"Column { pseudocolumn } has to contain integers only." )
128+ if api .TIME_PSEUDOCOLUMN in df .columns :
129+ if any (df [api .TIME_PSEUDOCOLUMN ] < 0 ):
130+ raise ValueError (
131+ f"Column { api .TIME_PSEUDOCOLUMN } cannot contain negative times."
132+ )
133+ if any (df [api .TIME_PSEUDOCOLUMN ] % 2 == 1 ):
134+ warn ("timestamps are required to be even; all timestamps will be doubled" )
135+ df [api .TIME_PSEUDOCOLUMN ] = 2 * df [api .TIME_PSEUDOCOLUMN ]
136+
137+ if api .DIFF_PSEUDOCOLUMN in df .columns :
138+ if any ((df [api .DIFF_PSEUDOCOLUMN ] != 1 ) & (df [api .DIFF_PSEUDOCOLUMN ] != - 1 )):
139+ raise ValueError (
140+ f"Column { api .DIFF_PSEUDOCOLUMN } can only have 1 and -1 values."
141+ )
142+
143+
123144@runtime_type_check
124145@trace_user_frame
125146def table_from_pandas (
@@ -128,14 +149,27 @@ def table_from_pandas(
128149 unsafe_trusted_ids : bool = False ,
129150 schema : type [Schema ] | None = None ,
130151) -> Table :
152+ """
153+ A function for creating a table from a pandas DataFrame. If it contains a special
154+ column ``__time__``, rows will be split into batches with timestamps from the column.
155+ A special column ``__diff__`` can be used to set an event type - with ``1`` treated
156+ as inserting the row and ``-1`` as removing it.
157+ """
131158 if id_from is not None and schema is not None :
132159 raise ValueError ("parameters `schema` and `id_from` are mutually exclusive" )
133160
161+ ordinary_columns_names = [
162+ column for column in df .columns if column not in api .PANDAS_PSEUDOCOLUMNS
163+ ]
134164 if schema is None :
135- schema = schema_from_pandas (df , id_from = id_from )
136- elif list (df .columns ) != schema .column_names ():
165+ schema = schema_from_pandas (
166+ df , id_from = id_from , exclude_columns = api .PANDAS_PSEUDOCOLUMNS
167+ )
168+ elif ordinary_columns_names != schema .column_names ():
137169 raise ValueError ("schema does not match given dataframe" )
138170
171+ _validate_dataframe (df )
172+
139173 return table_from_datasource (
140174 PandasDataSource (
141175 schema = schema ,
@@ -168,18 +202,28 @@ def _markdown_to_pandas(table_def):
168202 ).convert_dtypes ()
169203
170204
171- def parse_to_table (
205+ def table_from_markdown (
172206 table_def ,
173207 id_from = None ,
174208 unsafe_trusted_ids = False ,
175209 schema : type [Schema ] | None = None ,
176210) -> Table :
211+ """
212+ A function for creating a table from its definition in markdown. If it contains a special
213+ column ``__time__``, rows will be split into batches with timestamps from the column.
214+ A special column ``__diff__`` can be used to set an event type - with ``1`` treated
215+ as inserting the row and ``-1`` as removing it.
216+ """
177217 df = _markdown_to_pandas (table_def )
178218 return table_from_pandas (
179219 df , id_from = id_from , unsafe_trusted_ids = unsafe_trusted_ids , schema = schema
180220 )
181221
182222
223+ # XXX: clean this up
224+ parse_to_table = table_from_markdown
225+
226+
183227@runtime_type_check
184228def table_from_parquet (
185229 path : str | PathLike ,
@@ -205,10 +249,6 @@ def table_to_parquet(table: Table, filename: str | PathLike):
205249 return df .to_parquet (filename )
206250
207251
208- # XXX: clean this up
209- table_from_markdown = parse_to_table
210-
211-
212252class _EmptyConnectorSubject (ConnectorSubject ):
213253 def run (self ):
214254 pass
@@ -352,7 +392,7 @@ def table_from_pandas(
352392 """
353393 if schema is None :
354394 schema = schema_from_pandas (
355- df , exclude_columns = [ "_time" , "_diff" , "_worker" ]
395+ df , exclude_columns = { "_time" , "_diff" , "_worker" }
356396 )
357397 schema , api_schema = read_schema (schema = schema )
358398 value_fields : list [api .ValueField ] = api_schema ["value_fields" ]
0 commit comments