@@ -72,9 +72,9 @@ def _to_text(
7272
7373
7474@apply_configs
75- def to_csv ( # pylint: disable=too-many-arguments,too-many-locals,too-many-statements
75+ def to_csv ( # pylint: disable=too-many-arguments,too-many-locals,too-many-statements,too-many-branches
7676 df : pd .DataFrame ,
77- path : str ,
77+ path : Optional [ str ] = None ,
7878 sep : str = "," ,
7979 index : bool = True ,
8080 columns : Optional [List [str ]] = None ,
@@ -137,8 +137,9 @@ def to_csv( # pylint: disable=too-many-arguments,too-many-locals,too-many-state
137137 ----------
138138 df: pandas.DataFrame
139139 Pandas DataFrame https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
140- path : str
141- Amazon S3 path (e.g. s3://bucket/filename.csv).
140+ path : str, optional
141+ Amazon S3 path (e.g. s3://bucket/prefix/filename.csv) (for dataset e.g. ``s3://bucket/prefix``).
142+ Required if dataset=False or when creating a new dataset
142143 sep : str
143144 String of length 1. Field delimiter for the output file.
144145 index : bool
@@ -414,13 +415,27 @@ def to_csv( # pylint: disable=too-many-arguments,too-many-locals,too-many-state
414415 catalog_table_input = catalog ._get_table_input ( # pylint: disable=protected-access
415416 database = database , table = table , boto3_session = session , catalog_id = catalog_id
416417 )
418+ catalog_path = catalog_table_input ["StorageDescriptor" ]["Location" ] if catalog_table_input else None
419+ if path is None :
420+ if catalog_path :
421+ path = catalog_path
422+ else :
423+ raise exceptions .InvalidArgumentValue (
424+ "Glue table does not exist in the catalog. Please pass the `path` argument to create it."
425+ )
426+ elif path and catalog_path :
427+ if path .rstrip ("/" ) != catalog_path .rstrip ("/" ):
428+ raise exceptions .InvalidArgumentValue (
429+ f"The specified path: { path } , does not match the existing Glue catalog table path: { catalog_path } "
430+ )
417431 if pandas_kwargs .get ("compression" ) not in ("gzip" , "bz2" , None ):
418432 raise exceptions .InvalidArgumentCombination (
419433 "If database and table are given, you must use one of these compressions: gzip, bz2 or None."
420434 )
421435
422436 df = _apply_dtype (df = df , dtype = dtype , catalog_table_input = catalog_table_input , mode = mode )
423437
438+ paths : List [str ] = []
424439 if dataset is False :
425440 pandas_kwargs ["sep" ] = sep
426441 pandas_kwargs ["index" ] = index
@@ -434,7 +449,7 @@ def to_csv( # pylint: disable=too-many-arguments,too-many-locals,too-many-state
434449 s3_additional_kwargs = s3_additional_kwargs ,
435450 ** pandas_kwargs ,
436451 )
437- paths = [path ]
452+ paths = [path ] # type: ignore
438453 else :
439454 if database and table :
440455 quoting : Optional [int ] = csv .QUOTE_NONE
@@ -461,7 +476,7 @@ def to_csv( # pylint: disable=too-many-arguments,too-many-locals,too-many-state
461476 func = _to_text ,
462477 concurrent_partitioning = concurrent_partitioning ,
463478 df = df ,
464- path_root = path ,
479+ path_root = path , # type: ignore
465480 index = index ,
466481 sep = sep ,
467482 compression = compression ,
@@ -486,7 +501,7 @@ def to_csv( # pylint: disable=too-many-arguments,too-many-locals,too-many-state
486501 catalog ._create_csv_table ( # pylint: disable=protected-access
487502 database = database ,
488503 table = table ,
489- path = path ,
504+ path = path , # type: ignore
490505 columns_types = columns_types ,
491506 partitions_types = partitions_types ,
492507 bucketing_info = bucketing_info ,
0 commit comments