Replacing all f-string on logging commands

igorborgest · igorborgest · commit 85bfade15ae1 · 2020-04-27T19:32:01.000-03:00
diff --git a/awswrangler/_data_types.py b/awswrangler/_data_types.py
@@ -207,7 +207,7 @@ def pyarrow2sqlalchemy(  # pylint: disable=too-many-branches,too-many-return-sta
         return sqlalchemy.types.Date
     if pa.types.is_binary(dtype):
         if db_type == "redshift":
-            raise exceptions.UnsupportedType(f"Binary columns are not supported for Redshift.")  # pragma: no cover
+            raise exceptions.UnsupportedType("Binary columns are not supported for Redshift.")  # pragma: no cover
         return sqlalchemy.types.Binary
     if pa.types.is_decimal(dtype):
         return sqlalchemy.types.Numeric(precision=dtype.precision, scale=dtype.scale)
@@ -257,7 +257,7 @@ def pyarrow_types_from_pandas(
     # Filling schema
     columns_types: Dict[str, pa.DataType]
     columns_types = {n: cols_dtypes[n] for n in sorted_cols}
-    _logger.debug(f"columns_types: {columns_types}")
+    _logger.debug("columns_types: %s", columns_types)
     return columns_types
 
 
@@ -275,7 +275,7 @@ def athena_types_from_pandas(
             athena_columns_types[k] = casts[k]
         else:
             athena_columns_types[k] = pyarrow2athena(dtype=v)
-    _logger.debug(f"athena_columns_types: {athena_columns_types}")
+    _logger.debug("athena_columns_types: %s", athena_columns_types)
     return athena_columns_types
 
 
@@ -315,7 +315,7 @@ def pyarrow_schema_from_pandas(
         if (k in df.columns) and (k not in ignore):
             columns_types[k] = athena2pyarrow(v)
     columns_types = {k: v for k, v in columns_types.items() if v is not None}
-    _logger.debug(f"columns_types: {columns_types}")
+    _logger.debug("columns_types: %s", columns_types)
     return pa.schema(fields=columns_types)
 
 
@@ -324,11 +324,11 @@ def athena_types_from_pyarrow_schema(
 ) -> Tuple[Dict[str, str], Optional[Dict[str, str]]]:
     """Extract the related Athena data types from any PyArrow Schema considering possible partitions."""
     columns_types: Dict[str, str] = {str(f.name): pyarrow2athena(dtype=f.type) for f in schema}
-    _logger.debug(f"columns_types: {columns_types}")
+    _logger.debug("columns_types: %s", columns_types)
     partitions_types: Optional[Dict[str, str]] = None
     if partitions is not None:
         partitions_types = {p.name: pyarrow2athena(p.dictionary.type) for p in partitions}
-    _logger.debug(f"partitions_types: {partitions_types}")
+    _logger.debug("partitions_types: %s", partitions_types)
     return columns_types, partitions_types
 
 
@@ -382,5 +382,5 @@ def sqlalchemy_types_from_pandas(
             sqlalchemy_columns_types[k] = casts[k]
         else:
             sqlalchemy_columns_types[k] = pyarrow2sqlalchemy(dtype=v, db_type=db_type)
-    _logger.debug(f"sqlalchemy_columns_types: {sqlalchemy_columns_types}")
+    _logger.debug("sqlalchemy_columns_types: %s", sqlalchemy_columns_types)
     return sqlalchemy_columns_types
diff --git a/awswrangler/athena.py b/awswrangler/athena.py
@@ -176,8 +176,8 @@ def wait_query(query_execution_id: str, boto3_session: Optional[boto3.Session] =
         time.sleep(_QUERY_WAIT_POLLING_DELAY)
         response = client_athena.get_query_execution(QueryExecutionId=query_execution_id)
         state = response["QueryExecution"]["Status"]["State"]
-    _logger.debug(f"state: {state}")
-    _logger.debug(f"StateChangeReason: {response['QueryExecution']['Status'].get('StateChangeReason')}")
+    _logger.debug("state: %s", state)
+    _logger.debug("StateChangeReason: %s", response["QueryExecution"]["Status"].get("StateChangeReason"))
     if state == "FAILED":
         raise exceptions.QueryFailed(response["QueryExecution"]["Status"].get("StateChangeReason"))
     if state == "CANCELLED":
@@ -265,7 +265,7 @@ def _get_query_metadata(
     cols_types: Dict[str, str] = get_query_columns_types(
         query_execution_id=query_execution_id, boto3_session=boto3_session
     )
-    _logger.debug(f"cols_types: {cols_types}")
+    _logger.debug("cols_types: %s", cols_types)
     dtype: Dict[str, str] = {}
     parse_timestamps: List[str] = []
     parse_dates: List[str] = []
@@ -298,11 +298,11 @@ def _get_query_metadata(
             converters[col_name] = lambda x: Decimal(str(x)) if str(x) not in ("", "none", " ", "<NA>") else None
         else:
             dtype[col_name] = pandas_type
-    _logger.debug(f"dtype: {dtype}")
-    _logger.debug(f"parse_timestamps: {parse_timestamps}")
-    _logger.debug(f"parse_dates: {parse_dates}")
-    _logger.debug(f"converters: {converters}")
-    _logger.debug(f"binaries: {binaries}")
+    _logger.debug("dtype: %s", dtype)
+    _logger.debug("parse_timestamps: %s", parse_timestamps)
+    _logger.debug("parse_dates: %s", parse_dates)
+    _logger.debug("converters: %s", converters)
+    _logger.debug("binaries: %s", binaries)
     return dtype, parse_timestamps, parse_dates, converters, binaries
 
 
@@ -446,7 +446,7 @@ def read_sql_query(  # pylint: disable=too-many-branches,too-many-locals
             f") AS\n"
             f"{sql}"
         )
-    _logger.debug(f"sql: {sql}")
+    _logger.debug("sql: %s", sql)
     query_id: str = start_query_execution(
         sql=sql,
         database=database,
@@ -456,7 +456,7 @@ def read_sql_query(  # pylint: disable=too-many-branches,too-many-locals
         kms_key=kms_key,
         boto3_session=session,
     )
-    _logger.debug(f"query_id: {query_id}")
+    _logger.debug("query_id: %s", query_id)
     query_response: Dict[str, Any] = wait_query(query_execution_id=query_id, boto3_session=session)
     if query_response["QueryExecution"]["Status"]["State"] in ["FAILED", "CANCELLED"]:  # pragma: no cover
         reason: str = query_response["QueryExecution"]["Status"]["StateChangeReason"]
@@ -468,7 +468,7 @@ def read_sql_query(  # pylint: disable=too-many-branches,too-many-locals
         manifest_path: str = f"{_s3_output}/tables/{query_id}-manifest.csv"
         paths: List[str] = _extract_ctas_manifest_paths(path=manifest_path, boto3_session=session)
         chunked: Union[bool, int] = False if chunksize is None else chunksize
-        _logger.debug(f"chunked: {chunked}")
+        _logger.debug("chunked: %s", chunked)
         if not paths:
             if chunked is False:
                 dfs = pd.DataFrame()
@@ -485,9 +485,9 @@ def read_sql_query(  # pylint: disable=too-many-branches,too-many-locals
     )
     path = f"{_s3_output}/{query_id}.csv"
     s3.wait_objects_exist(paths=[path], use_threads=False, boto3_session=session)
-    _logger.debug(f"Start CSV reading from {path}")
+    _logger.debug("Start CSV reading from %s", path)
     _chunksize: Optional[int] = chunksize if isinstance(chunksize, int) else None
-    _logger.debug(f"_chunksize: {_chunksize}")
+    _logger.debug("_chunksize: %s", _chunksize)
     ret = s3.read_csv(
         path=[path],
         dtype=dtype,
diff --git a/awswrangler/catalog.py b/awswrangler/catalog.py
@@ -766,7 +766,7 @@ def drop_duplicated_columns(df: pd.DataFrame) -> pd.DataFrame:
     duplicated_cols = df.columns.duplicated()
     duplicated_cols_names: List[str] = list(df.columns[duplicated_cols])
     if len(duplicated_cols_names) > 0:
-        _logger.warning(f"Dropping repeated columns: {duplicated_cols_names}")
+        _logger.warning("Dropping repeated columns: %s", duplicated_cols_names)
     return df.loc[:, ~duplicated_cols]
 
 
@@ -967,11 +967,11 @@ def _create_table(
             if name in columns_comments:
                 par["Comment"] = columns_comments[name]
     session: boto3.Session = _utils.ensure_session(session=boto3_session)
-
-    if mode == "overwrite":
+    exist: bool = does_table_exist(database=database, table=table, boto3_session=session)
+    if (mode == "overwrite") or (exist is False):
         delete_table_if_exists(database=database, table=table, boto3_session=session)
-    client_glue: boto3.client = _utils.client(service_name="glue", session=session)
-    client_glue.create_table(DatabaseName=database, TableInput=table_input)
+        client_glue: boto3.client = _utils.client(service_name="glue", session=session)
+        client_glue.create_table(DatabaseName=database, TableInput=table_input)
 
 
 def _csv_table_definition(
diff --git a/awswrangler/cloudwatch.py b/awswrangler/cloudwatch.py
@@ -56,11 +56,11 @@ def start_query(
     ... )
 
     """
-    _logger.debug(f"log_group_names: {log_group_names}")
+    _logger.debug("log_group_names: %s", log_group_names)
     start_timestamp: int = int(1000 * start_time.timestamp())
     end_timestamp: int = int(1000 * end_time.timestamp())
-    _logger.debug(f"start_timestamp: {start_timestamp}")
-    _logger.debug(f"end_timestamp: {end_timestamp}")
+    _logger.debug("start_timestamp: %s", start_timestamp)
+    _logger.debug("end_timestamp: %s", end_timestamp)
     args: Dict[str, Any] = {
         "logGroupNames": log_group_names,
         "startTime": start_timestamp,
@@ -109,7 +109,7 @@ def wait_query(query_id: str, boto3_session: Optional[boto3.Session] = None) ->
         time.sleep(_QUERY_WAIT_POLLING_DELAY)
         response = client_logs.get_query_results(queryId=query_id)
         status = response["status"]
-    _logger.debug(f"status: {status}")
+    _logger.debug("status: %s", status)
     if status == "Failed":  # pragma: no cover
         raise exceptions.QueryFailed(f"query ID: {query_id}")
     if status == "Cancelled":
diff --git a/awswrangler/db.py b/awswrangler/db.py
@@ -646,7 +646,7 @@ def copy_files_to_redshift(  # pylint: disable=too-many-locals,too-many-argument
     athena_types, _ = s3.read_parquet_metadata(
         path=paths, dataset=False, use_threads=use_threads, boto3_session=session
     )
-    _logger.debug(f"athena_types: {athena_types}")
+    _logger.debug("athena_types: %s", athena_types)
     redshift_types: Dict[str, str] = {}
     for col_name, col_type in athena_types.items():
         length: int = _varchar_lengths[col_name] if col_name in _varchar_lengths else varchar_lengths_default
@@ -680,7 +680,7 @@ def copy_files_to_redshift(  # pylint: disable=too-many-locals,too-many-argument
 def _rs_upsert(con: Any, table: str, temp_table: str, schema: str, primary_keys: Optional[List[str]] = None) -> None:
     if not primary_keys:
         primary_keys = _rs_get_primary_keys(con=con, schema=schema, table=table)
-    _logger.debug(f"primary_keys: {primary_keys}")
+    _logger.debug("primary_keys: %s", primary_keys)
     if not primary_keys:  # pragma: no cover
         raise exceptions.InvalidRedshiftPrimaryKeys()
     equals_clause: str = f"{table}.%s = {temp_table}.%s"
@@ -735,7 +735,7 @@ def _rs_create_table(
         f"{distkey_str}"
         f"{sortkey_str}"
     )
-    _logger.debug(f"Create table query:\n{sql}")
+    _logger.debug("Create table query:\n%s", sql)
     con.execute(sql)
     return table, schema
 
@@ -746,7 +746,7 @@ def _rs_validate_parameters(
     if diststyle not in _RS_DISTSTYLES:
         raise exceptions.InvalidRedshiftDiststyle(f"diststyle must be in {_RS_DISTSTYLES}")
     cols = list(redshift_types.keys())
-    _logger.debug(f"Redshift columns: {cols}")
+    _logger.debug("Redshift columns: %s", cols)
     if (diststyle == "KEY") and (not distkey):
         raise exceptions.InvalidRedshiftDistkey("You must pass a distkey if you intend to use KEY diststyle")
     if distkey and distkey not in cols:
@@ -775,13 +775,13 @@ def _rs_copy(
     sql: str = (
         f"COPY {table_name} FROM '{manifest_path}'\n" f"IAM_ROLE '{iam_role}'\n" "MANIFEST\n" "FORMAT AS PARQUET"
     )
-    _logger.debug(f"copy query:\n{sql}")
+    _logger.debug("copy query:\n%s", sql)
     con.execute(sql)
     sql = "SELECT pg_last_copy_id() AS query_id"
     query_id: int = con.execute(sql).fetchall()[0][0]
     sql = f"SELECT COUNT(DISTINCT filename) as num_files_loaded " f"FROM STL_LOAD_COMMITS WHERE query = {query_id}"
     num_files_loaded: int = con.execute(sql).fetchall()[0][0]
-    _logger.debug(f"{num_files_loaded} files counted. {num_files} expected.")
+    _logger.debug("%s files counted. %s expected.", num_files_loaded, num_files)
     if num_files_loaded != num_files:  # pragma: no cover
         raise exceptions.RedshiftLoadError(
             f"Redshift load rollbacked. {num_files_loaded} files counted. {num_files} expected."
@@ -846,17 +846,17 @@ def write_redshift_copy_manifest(
     payload: str = json.dumps(manifest)
     bucket: str
     bucket, key = _utils.parse_path(manifest_path)
-    _logger.debug(f"payload: {payload}")
+    _logger.debug("payload: %s", payload)
     client_s3: boto3.client = _utils.client(service_name="s3", session=session)
-    _logger.debug(f"bucket: {bucket}")
-    _logger.debug(f"key: {key}")
+    _logger.debug("bucket: %s", bucket)
+    _logger.debug("key: %s", key)
     client_s3.put_object(Body=payload, Bucket=bucket, Key=key)
     return manifest
 
 
 def _rs_drop_table(con: Any, schema: str, table: str) -> None:
     sql = f"DROP TABLE IF EXISTS {schema}.{table}"
-    _logger.debug(f"Drop table query:\n{sql}")
+    _logger.debug("Drop table query:\n%s", sql)
     con.execute(sql)
 
 
@@ -1104,7 +1104,7 @@ def unload_redshift_to_files(
         query_id: int = _con.execute(sql).fetchall()[0][0]
         sql = f"SELECT path FROM STL_UNLOAD_LOG WHERE query={query_id};"
         paths = [x[0].replace(" ", "") for x in _con.execute(sql).fetchall()]
-        _logger.debug(f"paths: {paths}")
+        _logger.debug("paths: %s", paths)
         return paths
 
 
diff --git a/awswrangler/emr.py b/awswrangler/emr.py
@@ -364,7 +364,7 @@ def _build_cluster_args(**pars):  # pylint: disable=too-many-branches,too-many-s
     if pars["tags"] is not None:
         args["Tags"] = [{"Key": k, "Value": v} for k, v in pars["tags"].items()]
 
-    _logger.info(f"args: \n{json.dumps(args, default=str, indent=4)}")
+    _logger.info("args: \n%s", json.dumps(args, default=str, indent=4))
     return args
 
 
@@ -665,7 +665,7 @@ def create_cluster(  # pylint: disable=too-many-arguments,too-many-locals,unused
     args: Dict[str, Any] = _build_cluster_args(**locals())
     client_emr: boto3.client = _utils.client(service_name="emr", session=boto3_session)
     response: Dict[str, Any] = client_emr.run_job_flow(**args)
-    _logger.debug(f"response: \n{json.dumps(response, default=str, indent=4)}")
+    _logger.debug("response: \n%s", json.dumps(response, default=str, indent=4))
     return response["JobFlowId"]
 
 
@@ -696,7 +696,7 @@ def get_cluster_state(cluster_id: str, boto3_session: Optional[boto3.Session] =
     """
     client_emr: boto3.client = _utils.client(service_name="emr", session=boto3_session)
     response: Dict[str, Any] = client_emr.describe_cluster(ClusterId=cluster_id)
-    _logger.debug(f"response: \n{json.dumps(response, default=str, indent=4)}")
+    _logger.debug("response: \n%s", json.dumps(response, default=str, indent=4))
     return response["Cluster"]["Status"]["State"]
 
 
@@ -723,7 +723,7 @@ def terminate_cluster(cluster_id: str, boto3_session: Optional[boto3.Session] =
     """
     client_emr: boto3.client = _utils.client(service_name="emr", session=boto3_session)
     response: Dict[str, Any] = client_emr.terminate_job_flows(JobFlowIds=[cluster_id])
-    _logger.debug(f"response: \n{json.dumps(response, default=str, indent=4)}")
+    _logger.debug("response: \n%s", json.dumps(response, default=str, indent=4))
 
 
 def submit_steps(
@@ -755,7 +755,7 @@ def submit_steps(
     """
     client_emr: boto3.client = _utils.client(service_name="emr", session=boto3_session)
     response: Dict[str, Any] = client_emr.add_job_flow_steps(JobFlowId=cluster_id, Steps=steps)
-    _logger.debug(f"response: \n{json.dumps(response, default=str, indent=4)}")
+    _logger.debug("response: \n%s", json.dumps(response, default=str, indent=4))
     return response["StepIds"]
 
 
@@ -807,7 +807,7 @@ def submit_step(
     )
     client_emr: boto3.client = _utils.client(service_name="emr", session=session)
     response: Dict[str, Any] = client_emr.add_job_flow_steps(JobFlowId=cluster_id, Steps=[step])
-    _logger.debug(f"response: \n{json.dumps(response, default=str, indent=4)}")
+    _logger.debug("response: \n%s", json.dumps(response, default=str, indent=4))
     return response["StepIds"][0]
 
 
@@ -898,7 +898,7 @@ def get_step_state(cluster_id: str, step_id: str, boto3_session: Optional[boto3.
     """
     client_emr: boto3.client = _utils.client(service_name="emr", session=boto3_session)
     response: Dict[str, Any] = client_emr.describe_step(ClusterId=cluster_id, StepId=step_id)
-    _logger.debug(f"response: \n{json.dumps(response, default=str, indent=4)}")
+    _logger.debug("response: \n%s", json.dumps(response, default=str, indent=4))
     return response["Step"]["Status"]["State"]
 
 
@@ -942,7 +942,7 @@ def submit_ecr_credentials_refresh(
     )
     client_emr: boto3.client = _utils.client(service_name="emr", session=session)
     response: Dict[str, Any] = client_emr.add_job_flow_steps(JobFlowId=cluster_id, Steps=[step])
-    _logger.debug(f"response: \n{json.dumps(response, default=str, indent=4)}")
+    _logger.debug("response: \n%s", json.dumps(response, default=str, indent=4))
     return response["StepIds"][0]
 
 
diff --git a/awswrangler/s3.py b/awswrangler/s3.py