Apply changes from #2388 to async tests

sfc-gh-pczajka · sfc-gh-pczajka · commit b811eeb69f34 · 2025-08-14T14:12:00.000+02:00
diff --git a/test/integ/aio/test_arrow_result_async.py b/test/integ/aio/test_arrow_result_async.py
@@ -834,35 +834,46 @@ async def test_select_vector(conn_cnx, is_public_test):
 
 @pytest.mark.asyncio
 async def test_select_time(conn_cnx):
-    for scale in range(10):
-        await select_time_with_scale(conn_cnx, scale)
-
-
-async def select_time_with_scale(conn_cnx, scale):
+    # Test key scales and meaningful cases in a single table operation
+    # Cover: no fractional seconds, milliseconds, microseconds, nanoseconds
+    scales = [0, 3, 6, 9]  # Key precision levels
     cases = [
-        "00:01:23",
-        "00:01:23.1",
-        "00:01:23.12",
-        "00:01:23.123",
-        "00:01:23.1234",
-        "00:01:23.12345",
-        "00:01:23.123456",
-        "00:01:23.1234567",
-        "00:01:23.12345678",
-        "00:01:23.123456789",
+        "00:01:23",  # Basic time
+        "00:01:23.123456789",  # Max precision
+        "23:59:59.999999999",  # Edge case - max time with max precision
+        "00:00:00.000000001",  # Edge case - min time with min precision
     ]
-    table = "test_arrow_time"
-    column = f"(a time({scale}))"
-    values = (
-        "(-1, NULL), ("
-        + "),(".join([f"{i}, '{c}'" for i, c in enumerate(cases)])
-        + f"), ({len(cases)}, NULL)"
-    )
-    await init(conn_cnx, table, column, values)
-    sql_text = f"select a from {table} order by s"
-    row_count = len(cases) + 2
-    col_count = 1
-    await iterate_over_test_chunk("time", conn_cnx, sql_text, row_count, col_count)
+
+    table = "test_arrow_time_scales"
+
+    # Create columns for selected scales only (init function will add 's number' automatically)
+    columns = ", ".join([f"a{i} time({i})" for i in scales])
+    column_def = f"({columns})"
+
+    # Create values for selected scales - each case tests all scales simultaneously
+    value_rows = []
+    for i, case in enumerate(cases):
+        # Each row has the same time value for all scale columns
+        time_values = ", ".join([f"'{case}'" for _ in scales])
+        value_rows.append(f"({i}, {time_values})")
+
+    # Add NULL rows
+    null_values = ", ".join(["NULL" for _ in scales])
+    value_rows.append(f"(-1, {null_values})")
+    value_rows.append(f"({len(cases)}, {null_values})")
+
+    values = ", ".join(value_rows)
+
+    # Single table creation and test
+    await init(conn_cnx, table, column_def, values)
+
+    # Test each scale column
+    for scale in scales:
+        sql_text = f"select a{scale} from {table} order by s"
+        row_count = len(cases) + 2
+        col_count = 1
+        await iterate_over_test_chunk("time", conn_cnx, sql_text, row_count, col_count)
+
     await finish(conn_cnx, table)
 
 
diff --git a/test/integ/aio/test_dbapi_async.py b/test/integ/aio/test_dbapi_async.py
@@ -714,15 +714,67 @@ async def test_escape(conn_local):
     async with conn_local() as con:
         cur = con.cursor()
         await executeDDL1(cur)
-        for i in teststrings:
-            args = {"dbapi_ddl2": i}
-            await cur.execute("insert into %s values (%%(dbapi_ddl2)s)" % TABLE1, args)
-            await cur.execute("select * from %s" % TABLE1)
-            row = await cur.fetchone()
-            await cur.execute("delete from %s where name=%%s" % TABLE1, i)
-            assert (
-                i == row[0]
-            ), f"newline not properly converted, got {row[0]}, should be {i}"
+
+        # Test 1: Batch INSERT with dictionary parameters (executemany)
+        # This tests the same dictionary parameter binding as the original
+        batch_args = [{"dbapi_ddl2": test_string} for test_string in teststrings]
+        await cur.executemany(
+            "insert into %s values (%%(dbapi_ddl2)s)" % TABLE1, batch_args
+        )
+
+        # Test 2: Batch SELECT with no parameters
+        # This tests the same SELECT functionality as the original
+        await cur.execute("select name from %s" % TABLE1)
+        rows = await cur.fetchall()
+
+        # Verify each test string was properly escaped/handled
+        assert len(rows) == len(
+            teststrings
+        ), f"Expected {len(teststrings)} rows, got {len(rows)}"
+
+        # Extract actual strings from result set
+        actual_strings = {row[0] for row in rows}  # Use set to ignore order
+        expected_strings = set(teststrings)
+
+        # Verify all expected strings are present
+        missing_strings = expected_strings - actual_strings
+        extra_strings = actual_strings - expected_strings
+
+        assert len(missing_strings) == 0, f"Missing strings: {missing_strings}"
+        assert len(extra_strings) == 0, f"Extra strings: {extra_strings}"
+        assert actual_strings == expected_strings, "String sets don't match"
+
+        # Test 3: DELETE with positional parameters (batched for efficiency)
+        # This maintains the same DELETE parameter binding test as the original
+        # We test a representative subset to maintain coverage while being efficient
+        critical_test_strings = [
+            teststrings[0],  # Basic newline: "abc\ndef"
+            teststrings[5],  # Double quote: 'abc"def'
+            teststrings[7],  # Single quote: "abc'def"
+            teststrings[13],  # Tab: "abc\tdef"
+            teststrings[16],  # Backslash-x: "\\x"
+        ]
+
+        # Batch DELETE with positional parameters using executemany
+        # This tests the same positional parameter binding as the original individual DELETEs
+        await cur.executemany(
+            "delete from %s where name=%%s" % TABLE1,
+            [(test_string,) for test_string in critical_test_strings],
+        )
+
+        # Batch verification: check that all critical strings were deleted
+        await cur.execute(
+            "select name from %s where name in (%s)"
+            % (TABLE1, ",".join(["%s"] * len(critical_test_strings))),
+            critical_test_strings,
+        )
+        remaining_critical = await cur.fetchall()
+        assert (
+            len(remaining_critical) == 0
+        ), f"Failed to delete strings: {[row[0] for row in remaining_critical]}"
+
+        # Clean up remaining rows
+        await cur.execute("delete from %s" % TABLE1)
 
 
 @pytest.mark.skipolddriver
diff --git a/test/integ/aio/test_put_get_async.py b/test/integ/aio/test_put_get_async.py
@@ -232,15 +232,30 @@ async def test_get_multiple_files_with_same_name(tmp_path, aio_connection, caplo
         f"PUT 'file://{filename_in_put}' @{stage_name}/data/2/",
     )
 
+    # Verify files are uploaded before attempting GET
+    import asyncio
+
+    for _ in range(10):  # Wait up to 10 seconds for files to be available
+        file_list = await (await cur.execute(f"LS @{stage_name}")).fetchall()
+        if len(file_list) >= 2:  # Both files should be available
+            break
+        await asyncio.sleep(1)
+    else:
+        pytest.fail(f"Files not available in stage after 10 seconds: {file_list}")
+
     with caplog.at_level(logging.WARNING):
         try:
             await cur.execute(
                 f"GET @{stage_name} file://{tmp_path} PATTERN='.*data.csv.gz'"
             )
         except OperationalError:
-            # This is expected flakiness
+            # This can happen due to cloud storage timing issues
             pass
-    assert "Downloading multiple files with the same name" in caplog.text
+
+    # Check for the expected warning message
+    assert (
+        "Downloading multiple files with the same name" in caplog.text
+    ), f"Expected warning not found in logs: {caplog.text}"
 
 
 async def test_transfer_error_message(tmp_path, aio_connection):
@@ -267,24 +282,26 @@ async def test_transfer_error_message(tmp_path, aio_connection):
 @pytest.mark.skipolddriver
 async def test_put_md5(tmp_path, aio_connection):
     """This test uploads a single and a multi part file and makes sure that md5 is populated."""
-    # Generate random files and folders
-    small_folder = tmp_path / "small"
-    big_folder = tmp_path / "big"
-    small_folder.mkdir()
-    big_folder.mkdir()
-    generate_k_lines_of_n_files(3, 1, tmp_dir=str(small_folder))
-    # This generates a ~342 MB file to trigger a multipart upload
-    generate_k_lines_of_n_files(3_000_000, 1, tmp_dir=str(big_folder))
-
-    small_test_file = small_folder / "file0"
-    big_test_file = big_folder / "file0"
+    # Create files directly without subfolders for efficiency
+    # Small file for single-part upload test
+    small_test_file = tmp_path / "small_file.txt"
+    small_test_file.write_text("test content\n")  # Minimal content
+
+    # Big file for multi-part upload test - 200MB (well over 64MB threshold)
+    big_test_file = tmp_path / "big_file.txt"
+    chunk_size = 1024 * 1024  # 1MB chunks
+    chunk_data = "A" * chunk_size  # 1MB of 'A' characters
+    with open(big_test_file, "w") as f:
+        for _ in range(200):  # Write 200MB total
+            f.write(chunk_data)
 
     stage_name = random_string(5, "test_put_md5_")
     # Use the async connection for PUT/LS operations
     await aio_connection.connect()
     async with aio_connection.cursor() as cur:
         await cur.execute(f"create temporary stage {stage_name}")
 
+        # Upload both files in sequence
         small_filename_in_put = str(small_test_file).replace("\\", "/")
         big_filename_in_put = str(big_test_file).replace("\\", "/")
 
@@ -295,6 +312,8 @@ async def test_put_md5(tmp_path, aio_connection):
             f"PUT 'file://{big_filename_in_put}' @{stage_name}/big AUTO_COMPRESS = FALSE"
         )
 
-        res = await cur.execute(f"LS @{stage_name}")
-
-    assert all(map(lambda e: e[2] is not None, await res.fetchall()))
+        # Verify MD5 is populated for both files
+        file_list = await (await cur.execute(f"LS @{stage_name}")).fetchall()
+        assert all(
+            file_info[2] is not None for file_info in file_list
+        ), "MD5 should be populated for all uploaded files"