|
71 | 71 | "outputs": [], |
72 | 72 | "source": [ |
73 | 73 | "import pandas as pd\n", |
| 74 | + "\n", |
74 | 75 | "import pandera.pandas as pa\n", |
75 | 76 | "from pandera.typing import DataFrame, Series\n", |
76 | 77 | "\n", |
|
127 | 128 | "@pa.check_types(lazy=True)\n", |
128 | 129 | "def add_sales_tax(data: DataFrame[Schema]):\n", |
129 | 130 | " # creates a new column in the data frame that calculates prices after sales tax\n", |
130 | | - " data['after_tax'] = data['price'] + (data['price'] * .06)\n", |
| 131 | + " data[\"after_tax\"] = data[\"price\"] + (data[\"price\"] * 0.06)\n", |
131 | 132 | " return data" |
132 | 133 | ] |
133 | 134 | }, |
|
153 | 154 | "metadata": {}, |
154 | 155 | "outputs": [], |
155 | 156 | "source": [ |
156 | | - "valid_data = pd.DataFrame.from_records([\n", |
157 | | - " {\"item\": \"apple\", \"price\": 0.5},\n", |
158 | | - " {\"item\": \"orange\", \"price\": 0.75}\n", |
159 | | - "])\n", |
| 157 | + "valid_data = pd.DataFrame.from_records(\n", |
| 158 | + " [{\"item\": \"apple\", \"price\": 0.5}, {\"item\": \"orange\", \"price\": 0.75}]\n", |
| 159 | + ")\n", |
160 | 160 | "\n", |
161 | 161 | "add_sales_tax(valid_data)" |
162 | 162 | ] |
|
177 | 177 | "metadata": {}, |
178 | 178 | "outputs": [], |
179 | 179 | "source": [ |
180 | | - "invalid_data = pd.DataFrame.from_records([\n", |
181 | | - " {\"item\": \"applee\", \"price\": 0.5},\n", |
182 | | - " {\"item\": \"orange\", \"price\": -1000}\n", |
183 | | - "])\n", |
| 180 | + "invalid_data = pd.DataFrame.from_records(\n", |
| 181 | + " [{\"item\": \"applee\", \"price\": 0.5}, {\"item\": \"orange\", \"price\": -1000}]\n", |
| 182 | + ")\n", |
184 | 183 | "\n", |
185 | 184 | "try:\n", |
186 | 185 | " add_sales_tax(invalid_data)\n", |
|
262 | 261 | " item: Series[str] = pa.Field(isin=[\"apple\", \"orange\"], coerce=True)\n", |
263 | 262 | " price: Series[float] = pa.Field(gt=0, coerce=True)\n", |
264 | 263 | "\n", |
| 264 | + "\n", |
265 | 265 | "class TransformedSchema(Schema):\n", |
266 | 266 | " expiry: Series[pd.Timestamp] = pa.Field(coerce=True)" |
267 | 267 | ] |
|
289 | 289 | "outputs": [], |
290 | 290 | "source": [ |
291 | 291 | "from datetime import datetime\n", |
292 | | - "from typing import List\n", |
293 | 292 | "\n", |
294 | 293 | "\n", |
295 | 294 | "@pa.check_types(lazy=True)\n", |
296 | 295 | "def transform_data(\n", |
297 | 296 | " data: DataFrame[Schema],\n", |
298 | | - " expiry: List[datetime],\n", |
| 297 | + " expiry: list[datetime],\n", |
299 | 298 | ") -> DataFrame[TransformedSchema]:\n", |
300 | 299 | " return data.assign(expiry=expiry)\n", |
301 | 300 | "\n", |
|
381 | 380 | " item: Series[str] = pa.Field(isin=[\"apple\", \"orange\"], coerce=True)\n", |
382 | 381 | " price: Series[float] = pa.Field(gt=0, coerce=True)\n", |
383 | 382 | "\n", |
| 383 | + "\n", |
384 | 384 | "# object-based API\n", |
385 | | - "schema = pa.DataFrameSchema({\n", |
386 | | - " \"item\": pa.Column(str, pa.Check.isin([\"apple\", \"orange\"]), coerce=True),\n", |
387 | | - " \"price\": pa.Column(float, pa.Check.gt(0), coerce=True),\n", |
388 | | - "})" |
| 385 | + "schema = pa.DataFrameSchema(\n", |
| 386 | + " {\n", |
| 387 | + " \"item\": pa.Column(\n", |
| 388 | + " str, pa.Check.isin([\"apple\", \"orange\"]), coerce=True\n", |
| 389 | + " ),\n", |
| 390 | + " \"price\": pa.Column(float, pa.Check.gt(0), coerce=True),\n", |
| 391 | + " }\n", |
| 392 | + ")" |
389 | 393 | ] |
390 | 394 | }, |
391 | 395 | { |
|
407 | 411 | "source": [ |
408 | 412 | "transformed_schema = schema.add_columns({\"expiry\": pa.Column(pd.Timestamp)})\n", |
409 | 413 | "schema.remove_columns([\"item\"]) # remove the \"item\" column\n", |
410 | | - "schema.update_column(\"price\", dtype=int) # update the datatype of the \"price\" column to integer" |
| 414 | + "schema.update_column(\n", |
| 415 | + " \"price\", dtype=int\n", |
| 416 | + ") # update the datatype of the \"price\" column to integer" |
411 | 417 | ] |
412 | 418 | }, |
413 | 419 | { |
|
0 commit comments