Merge pull request #3237 from darwintree/fix-coverage

Pana · web-flow · commit 9f0f747c9b44 · 2025-06-12T20:03:49.000+08:00
Fix coverage
diff --git a/docs/coverage.md b/docs/coverage.md
@@ -46,13 +46,18 @@ cargo nextest run --no-fail-fast -p cfx-addr --no-default-features
 
 Run integration tests:
 
+> It should be noted that we compile the binary in debug mode, so the performance is not good.
+> You might need to change parallel parameters if frequent io error or timeout error occurs.
+
 ```bash
 # Run integration tests
+# Change -n to control the number of tests running in parallel.
 pytest integration_tests/tests -vv -n 6 --dist loadscope --conflux-binary $(pwd)/target/debug/conflux
 
 # Set up benchmark binary path before running `python tests/test_all.py`
-export CONFLUX_BENCH=$(pwd)/target/debug/consensus_bench
-# Run additional tests
+export CONFLUX_BENCH=$(pwd)/tools/consensus_bench/target/debug/consensus_bench
+# Run additional tests.
+# Use --max-workers and --max-nodes to control the number of workers and nodes.
 python tests/test_all.py --conflux-binary $(pwd)/target/debug/conflux
 ```
 
diff --git a/integration_tests/test_framework/util/adapter.py b/integration_tests/test_framework/util/adapter.py
@@ -142,7 +142,7 @@ def get_raw_tx_from_transaction(tx: Transaction) -> bytes:
                 for tx in block.txs:
                     raw_tx = get_raw_tx_from_transaction(tx)
                     tx_hash = ew3.eth.send_raw_transaction(raw_tx)
-                    receipt = ew3.eth.wait_for_transaction_receipt(tx_hash, timeout=1, poll_latency=0.5)
+                    receipt = ew3.eth.wait_for_transaction_receipt(tx_hash, timeout=10, poll_latency=0.5)
                     
     elif tx:
         raw_tx = get_raw_tx_from_transaction(tx)
@@ -155,7 +155,7 @@ def get_raw_tx_from_transaction(tx: Transaction) -> bytes:
                     assert tx.error.name.lower().replace("_", " ") in e.rpc_response["error"]["message"].lower()
         else:
             tx_hash = ew3.eth.send_raw_transaction(raw_tx)
-            receipt = ew3.eth.wait_for_transaction_receipt(tx_hash, timeout=1, poll_latency=0.5)
+            receipt = ew3.eth.wait_for_transaction_receipt(tx_hash, timeout=10, poll_latency=0.5)
             if receipt["status"] == 0:
                 print(f"Transaction failed: {tx_hash.hex()}")
                 print(f"TxErrorMsg: {receipt.get('txErrorMsg', 'No error message')}")
diff --git a/tests/test_all.py b/tests/test_all.py
@@ -67,7 +67,7 @@ def run():
     parser.add_argument(
         "--max-nodes",
         dest="max_nodes",
-        default=24,
+        default=0,
         type=int,
     )
     parser.add_argument(
@@ -96,6 +96,10 @@ def run():
             "../target/release/conflux"),
         type=str)
     options = parser.parse_args()
+    
+    if options.max_nodes == 0:
+        options.max_nodes = os.cpu_count()
+        print(f"Max nodes not specified, using {options.max_nodes} nodes")
 
     all_failed = set()
 
diff --git a/tests/test_utils/test_scheduler.py b/tests/test_utils/test_scheduler.py
@@ -5,12 +5,13 @@
 import os
 import tomllib
 import time
+from typing import Callable
 
 SPECIAL_SCRIPTS = {
     "ghast_consensus_test.py": 1
 }
 
-def get_num_test_nodes(py, test_dir, script):
+def get_num_test_nodes(py: str, test_dir: str, script: str) -> int:
     if script in SPECIAL_SCRIPTS:
         return SPECIAL_SCRIPTS[script]
     toml_output = subprocess.check_output(
@@ -22,7 +23,15 @@ def get_num_test_nodes(py, test_dir, script):
 class TestScheduler:
     """Scheduler for managing test execution and controlling concurrency based on resource requirements"""
 
-    def __init__(self, task_executable, py, test_dir, max_workers, available_nodes, port_min, port_max, conflux_binary):
+    def __init__(self,
+                 task_executable: Callable[[str, str, str, int, int, int, str], None],
+                 py: str,
+                 test_dir: str,
+                 max_workers: int,
+                 available_nodes: int,
+                 port_min: int,
+                 port_max: int,
+                 conflux_binary: str):
         self.task_executable = task_executable
         self.py = py
         self.test_dir = test_dir
@@ -41,7 +50,7 @@ def __init__(self, task_executable, py, test_dir, max_workers, available_nodes,
         self.results = []
         self.failed_tests = set()
 
-    def schedule(self, test_scripts):
+    def schedule(self, test_scripts: list[str]) -> set[str]:
         """Schedules the execution of test scripts"""
         
         # Prepare task queue
@@ -53,11 +62,11 @@ def schedule(self, test_scripts):
             self._collect_results()
         return self.failed_tests
 
-    def _prepare_task_queue(self, test_scripts):
+    def _prepare_task_queue(self, test_scripts: list[str]) -> list[tuple[str, int, int]]:
         """Prepares a task queue with scripts and resource requirements"""
         
-        task_queue = queue.Queue()
-        print("Scanning num_nodes requirement for each task.", end = "")
+        task_queue = []
+        print("Scanning num_nodes requirement for each task")
         with ThreadPoolExecutor() as executor:
             tasks = [(executor.submit(
                       get_num_test_nodes, 
@@ -68,40 +77,43 @@ def _prepare_task_queue(self, test_scripts):
             # Collect completed task results and add them to the queue
             for future, i, script in tasks:
                 result = future.result()
-                task_queue.put((script, result, i))
-        print(" Done")
+                if result > self.available_nodes:
+                    raise RuntimeError(f"Cannot run {script} because it requires {result} nodes, "
+                                       f"but only max to {self.available_nodes} nodes are available"
+                                       f"Please specify --max-nodes to run the test")
+
+                task_queue.append((script, result, i))
+        for script, nodes_needed, index in task_queue:
+            print(f"Task {index}: {script} requires {nodes_needed} nodes")
+        print("Scanning done")
         return task_queue
+    
+    def _pop_next_task(self, task_queue: list[tuple[str, int, int]]) -> tuple[str, int, int]:
+        """Selects the next task to process"""
+        if not task_queue:
+            raise RuntimeError("No tasks to process")
+        while True:
+            for i, (script, nodes_needed, index) in enumerate(task_queue):
+                if self._try_acquire_resources(nodes_needed):
+                    task_queue.pop(i)
+                    return script, nodes_needed, index
+            self.resource_event.wait(timeout=10)
+            self.resource_event.clear()
 
-    def _process_task_queue(self, executor, task_queue):
+    def _process_task_queue(self, executor: ThreadPoolExecutor, task_queue: list[tuple[str, int, int]]):
         """Processes the task queue, scheduling tests based on resource availability"""
         
-        while not task_queue.empty():
-            try:
-                # Retrieve task
-                script, nodes_needed, index = task_queue.get(block=False)
-                
-                # Attempt to allocate resources
-                if self._try_acquire_resources(nodes_needed):
-                    # Enough resources available, execute test
-                    future = executor.submit(
-                        self._run_test_with_cleanup,
-                        script,
-                        index,
-                        nodes_needed
-                    )
-                    self.results.append((script, future))
-                    
-                    # Wait for at least 1 second to avoid launch a lot of tasks
-                    time.sleep(1)
-                else:
-                    # Insufficient resources, re-add to queue and wait
-                    task_queue.put((script, nodes_needed, index))
-                    self.resource_event.wait(timeout=0.2)
-                    self.resource_event.clear()
-            except queue.Empty:
-                break
+        while task_queue:
+            script, nodes_needed, index = self._pop_next_task(task_queue)
+            future = executor.submit(
+                self._run_test_with_cleanup,
+                script,
+                index,
+                nodes_needed
+            )
+            self.results.append((script, future))
 
-    def _try_acquire_resources(self, nodes_needed):
+    def _try_acquire_resources(self, nodes_needed: int) -> bool:
         """Attempts to acquire required resources, returns True if successful"""
         
         with self.resource_lock:
@@ -111,15 +123,15 @@ def _try_acquire_resources(self, nodes_needed):
                 return True
             return False
 
-    def _release_resources(self, nodes_count):
+    def _release_resources(self, nodes_count: int):
         """Releases resources and notifies waiting threads"""
         
         with self.resource_lock:
             self.available_nodes += nodes_count
             self.available_workers += 1
             self.resource_event.set()
 
-    def _run_test_with_cleanup(self, script, index, nodes_count):
+    def _run_test_with_cleanup(self, script: str, index: int, nodes_count: int):
         """Runs a test and ensures resources are released"""
         
         try: