diff --git a/.github/workflows/format-typing-check.yml b/.github/workflows/format-typing-check.yml
index a4128a15..c5a97d4b 100644
--- a/.github/workflows/format-typing-check.yml
+++ b/.github/workflows/format-typing-check.yml
@@ -37,8 +37,8 @@ jobs:
       - name: Install ruff and mypy
         run: |
           pip install ruff mypy typing_extensions \
-            types-Deprecated types-beautifulsoup4 types-jsonschema \
-            types-networkx types-tabulate types-PyYAML pandas-stubs
+            types-Deprecated types-beautifulsoup4 types-jsonschema types-requests \
+            types-networkx types-tabulate types-PyYAML pandas-stubs 
       - name: Get all changed python files
         id: changed-python-files
         uses: tj-actions/changed-files@v44
diff --git a/pyproject.toml b/pyproject.toml
index 879895f0..8fd371b7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -58,6 +58,7 @@ dev = [
     "mypy",
     "typing_extensions",
     # stub packages. Update the `format-typing-check.yml` too if you add more.
+    "types-requests",
     "types-beautifulsoup4",
     "types-jsonschema",
     "types-networkx",
diff --git a/src/nplinker/genomics/antismash/__init__.py b/src/nplinker/genomics/antismash/__init__.py
index e126f548..03e50b07 100644
--- a/src/nplinker/genomics/antismash/__init__.py
+++ b/src/nplinker/genomics/antismash/__init__.py
@@ -1,16 +1,28 @@
-from .antismash_downloader import download_and_extract_antismash_data
+from .antismash_api_client import antismash_job_is_done
+from .antismash_api_client import submit_antismash_job
+from .antismash_downloader import download_and_extract_from_antismash_api
+from .antismash_downloader import download_and_extract_from_antismash_db
+from .antismash_downloader import extract_antismash_data
 from .antismash_loader import AntismashBGCLoader
 from .antismash_loader import parse_bgc_genbank
+from .genome_accession_resolver import resolve_genome_accession
+from .ncbi_downloader import download_and_extract_ncbi_genome
 from .podp_antismash_downloader import GenomeStatus
 from .podp_antismash_downloader import get_best_available_genome_id
 from .podp_antismash_downloader import podp_download_and_extract_antismash_data
 
 
 __all__ = [
-    "download_and_extract_antismash_data",
+    "extract_antismash_data",
+    "resolve_genome_accession",
+    "download_and_extract_from_antismash_api",
+    "download_and_extract_from_antismash_db",
     "AntismashBGCLoader",
     "parse_bgc_genbank",
     "GenomeStatus",
     "get_best_available_genome_id",
     "podp_download_and_extract_antismash_data",
+    "download_and_extract_ncbi_genome",
+    "submit_antismash_job",
+    "antismash_job_is_done",
 ]
diff --git a/src/nplinker/genomics/antismash/antismash_api_client.py b/src/nplinker/genomics/antismash/antismash_api_client.py
new file mode 100644
index 00000000..124aedcb
--- /dev/null
+++ b/src/nplinker/genomics/antismash/antismash_api_client.py
@@ -0,0 +1,81 @@
+from __future__ import annotations
+import logging
+from os import PathLike
+from pathlib import Path
+import requests
+
+
+logger = logging.getLogger(__name__)
+
+
+def submit_antismash_job(genbank_filepath: str | PathLike) -> str:
+    """Submits an antiSMASH job using the provided GenBank file.
+
+    This function sends a GenBank file to the antiSMASH API
+    and retrieves the job ID if the submission is successful.
+
+    Args:
+        genbank_filepath (str | PathLike): The path to the GenBank file to be submitted.
+
+    Returns:
+        str: The job ID of the submitted antiSMASH job.
+
+    Raises:
+        requests.exceptions.RequestException: If there is an issue with the HTTP request.
+        RuntimeError: If the API response does not contain a job ID.
+    """
+    url = "https://antismash.secondarymetabolites.org/api/v1.0/submit"
+    genbank_filepath = Path(genbank_filepath)
+
+    with open(genbank_filepath, "rb") as file:
+        files = {"seq": file}
+        response = requests.post(url, files=files)
+        response.raise_for_status()  # Raise an exception for HTTP errors
+
+    data = response.json()
+    if "id" not in data:
+        raise RuntimeError("No antiSMASH job ID returned")
+    return str(data["id"])
+
+
+def antismash_job_is_done(job_id: str) -> bool:
+    """Determines if the antiSMASH job has completed by checking its status.
+
+    This function queries the antiSMASH API to retrieve the current state
+    of the job and determines whether it has finished successfully, is still
+    in progress, or has encountered an error.
+
+    Args:
+        job_id (str): The unique identifier of the antiSMASH job.
+
+    Returns:
+        bool: True if the job is completed successfully, False if it is still
+            running or queued.
+
+    Raises:
+        RuntimeError: If the job has failed or if the API response indicates an error.
+        ValueError: If the job state is missing or an unexpected state is encountered
+            in the API response.
+        requests.exceptions.HTTPError: If an HTTP error occurs during the API request.
+    """
+    url = f"https://antismash.secondarymetabolites.org/api/v1.0/status/{job_id}"
+
+    response = requests.get(url, timeout=10)
+    response.raise_for_status()  # Raise exception for HTTP errors
+    respose_data = response.json()
+
+    if "state" not in respose_data:
+        raise ValueError(f"Job state missing in response for job_id: {job_id}")
+
+    job_state = respose_data["state"]
+    if job_state in ("running", "queued"):
+        return False
+    if job_state == "done":
+        return True
+    if job_state == "failed":
+        job_status = respose_data.get("status", "No error message provided")
+        raise RuntimeError(f"AntiSMASH job {job_id} failed with an error: {job_status}")
+    else:
+        raise ValueError(
+            f"Unexpected job state for antismash job ID {job_id}. Job state: {job_state}"
+        )
diff --git a/src/nplinker/genomics/antismash/antismash_downloader.py b/src/nplinker/genomics/antismash/antismash_downloader.py
index a02728f4..2c27938e 100644
--- a/src/nplinker/genomics/antismash/antismash_downloader.py
+++ b/src/nplinker/genomics/antismash/antismash_downloader.py
@@ -4,7 +4,9 @@
 import shutil
 from os import PathLike
 from pathlib import Path
+import requests
 from nplinker.utils import download_and_extract_archive
+from nplinker.utils import extract_archive
 from nplinker.utils import list_dirs
 from nplinker.utils import list_files
 
@@ -15,10 +17,75 @@
 ANTISMASH_DB_DOWNLOAD_URL = "https://antismash-db.secondarymetabolites.org/output/{}/{}"
 # The antiSMASH DBV2 is for the availability of the old version, better to keep it.
 ANTISMASH_DBV2_DOWNLOAD_URL = "https://antismash-dbv2.secondarymetabolites.org/output/{}/{}"
+# antismash api to download results from submitted jobs
+ANTISMASH_API_DOWNLOAD_URL = "https://antismash.secondarymetabolites.org/upload/{}/{}"
 
 
 def download_and_extract_antismash_data(
-    antismash_id: str, download_root: str | PathLike, extract_root: str | PathLike
+    url: str, antismash_id: str, download_root: str | PathLike, extract_root: str | PathLike
+) -> None:
+    """Download and extract antiSMASH BGC archive for a specified genome.
+
+    This function downloads a BGC archive from the specified URL, extracts its contents,
+    and organizes the extracted files into a structured directory under the given `extract_root`.
+
+    Args:
+        url (str): The URL to download the BGC archive from.
+        antismash_id (str): The identifier for the antiSMASH genome, used to name the extraction directory.
+        download_root: Path to the directory where the downloaded archive will be stored.
+        extract_root: Path to the directory where the data files will be extracted.
+            Note that an `antismash` directory will be created in the specified `extract_root` if
+            it doesn't exist. The files will be extracted to `<extract_root>/antismash/<antismash_id>` directory.
+
+    Raises:
+        ValueError: if `<extract_root>/antismash/<antismash_id>` dir is not empty.
+        Exception: If any error occurs during the download or extraction process, the partially extracted
+            directory will be cleaned up, and the exception will be re-raised.
+
+    Examples:
+         >>> download_and_extract_antismash_data(
+                 "https://antismash-db.secondarymetabolites.org/output/GCF_001.1/GCF_001.1.zip",
+                 "GCF_001.1",
+                 "/data/download",
+                 "/data/extracted"
+             )
+    """
+    extract_path = Path(extract_root) / "antismash" / antismash_id
+
+    _prepare_extract_path(extract_path)
+    try:
+        download_and_extract_archive(url, download_root, extract_path, f"{antismash_id}.zip")
+        _cleanup_extracted_files(extract_path)
+    except Exception as e:
+        shutil.rmtree(extract_path)
+        raise e
+
+
+def download_and_extract_from_antismash_api(
+    job_id: str, antismash_id: str, download_root: str | PathLike, extract_root: str | PathLike
+) -> None:
+    """Downloads and extracts results from an antiSMASH API job.
+
+    This function constructs the download URL using the provided job ID then
+    downloads the results as a ZIP file and extracts its contents to the specified directories.
+
+    Args:
+        antismash_id (str): The unique identifier for the antiSMASH dataset.
+        job_id (str): The job ID for the antiSMASH API job.
+        download_root (str or PathLike): The root directory where the ZIP file will be downloaded.
+        extract_root (str or PathLike): The root directory where the contents of the ZIP file will be extracted.
+
+    Raises:
+        requests.exceptions.RequestException: If there is an issue with the HTTP request.
+        zipfile.BadZipFile: If the downloaded file is not a valid ZIP file.
+        OSError: If there is an issue with file operations such as writing or extracting.
+    """
+    url = ANTISMASH_API_DOWNLOAD_URL.format(job_id, antismash_id + ".zip")
+    download_and_extract_antismash_data(url, antismash_id, download_root, extract_root)
+
+
+def download_and_extract_from_antismash_db(
+    refseq_acc: str, download_root: str | PathLike, extract_root: str | PathLike
 ) -> None:
     """Download and extract antiSMASH BGC archive for a specified genome.
 
@@ -27,7 +94,7 @@ def download_and_extract_antismash_data(
     of a genome as the id of the archive.
 
     Args:
-        antismash_id: The id used to download BGC archive from antiSMASH database.
+        refseq_acc: The id used to download BGC archive from antiSMASH database.
             If the id is versioned (e.g., "GCF_004339725.1") please be sure to
             specify the version as well.
         download_root: Path to the directory to place downloaded archive in.
@@ -36,41 +103,53 @@ def download_and_extract_antismash_data(
             it doesn't exist. The files will be extracted to `<extract_root>/antismash/<antismash_id>` directory.
 
     Raises:
-        ValueError: if `<extract_root>/antismash/<refseq_assembly_id>` dir is not empty.
+        ValueError: if `<extract_root>/antismash/<refseq_acc>` dir is not empty.
 
     Examples:
-        >>> download_and_extract_antismash_metadata("GCF_004339725.1", "/data/download", "/data/extracted")
+        >>> download_and_extract_from_antismash_db("GCF_004339725.1", "/data/download", "/data/extracted")
     """
-    download_root = Path(download_root)
-    extract_root = Path(extract_root)
-    extract_path = extract_root / "antismash" / antismash_id
+    for base_url in [ANTISMASH_DB_DOWNLOAD_URL, ANTISMASH_DBV2_DOWNLOAD_URL]:
+        url = base_url.format(refseq_acc, f"{refseq_acc}.zip")
+        if requests.head(url).status_code == 404:  # not found
+            continue
+        download_and_extract_antismash_data(url, refseq_acc, download_root, extract_root)
+        return  # Exit the loop once a valid URL is processed
 
-    try:
-        if extract_path.exists():
-            _check_extract_path(extract_path)
-        else:
-            extract_path.mkdir(parents=True, exist_ok=True)
+    # if both urls give 404 not found
+    raise RuntimeError(f"No results in antiSMASH DB for {refseq_acc}")
 
-        for base_url in [ANTISMASH_DB_DOWNLOAD_URL, ANTISMASH_DBV2_DOWNLOAD_URL]:
-            url = base_url.format(antismash_id, antismash_id + ".zip")
-            download_and_extract_archive(url, download_root, extract_path, antismash_id + ".zip")
-            break
 
-        # delete subdirs
-        for subdir_path in list_dirs(extract_path):
-            shutil.rmtree(subdir_path)
+def extract_antismash_data(
+    archive: str | PathLike, extract_root: str | PathLike, antimash_id: str
+) -> None:
+    """Extracts antiSMASH results from a given archive into a specified directory.
 
-        # delete unnecessary files
-        files_to_keep = list_files(extract_path, suffix=(".json", ".gbk"))
-        for file in list_files(extract_path):
-            if file not in files_to_keep:
-                os.remove(file)
+    This function handles the extraction of antiSMASH results by preparing the
+    extraction path, extracting the archive, and performing cleanup of
+    unnecessary files. If an error occurs during the process, the partially
+    extracted files are removed, and the exception is re-raised.
 
-        logger.info("antiSMASH BGC data of %s is downloaded and extracted.", antismash_id)
+    Args:
+        archive (str | PathLike): The path to the archive file containing antiSMASH results.
+        extract_root (str | PathLike): The root directory where the data should
+            be extracted.
+        antimash_id (str): A unique identifier for the antiSMASH data, used to
+            create a subdirectory for the extracted files.
+
+    Raises:
+        Exception: If any error occurs during the extraction process, the
+            exception is re-raised after cleaning up the extraction directory.
+    """
+    extract_path = Path(extract_root) / "antismash" / antimash_id
+
+    _prepare_extract_path(extract_path)
+
+    try:
+        extract_archive(archive, extract_path, remove_finished=False)
+        _cleanup_extracted_files(extract_path)
 
     except Exception as e:
         shutil.rmtree(extract_path)
-        logger.warning(e)
         raise e
 
 
@@ -78,3 +157,23 @@ def _check_extract_path(extract_path: Path):
     # check if extract_path is empty
     if any(extract_path.iterdir()):
         raise ValueError(f'Nonempty directory: "{extract_path}"')
+
+
+def _cleanup_extracted_files(extract_path: str | PathLike) -> None:
+    # delete subdirs
+    for subdir_path in list_dirs(extract_path):
+        shutil.rmtree(subdir_path)
+
+    # delete unnecessary files
+    files_to_keep = list_files(extract_path, suffix=(".json", ".gbk"))
+    for file in list_files(extract_path):
+        if file not in files_to_keep:
+            os.remove(file)
+
+
+def _prepare_extract_path(extract_path: str | PathLike) -> None:
+    extract_path = Path(extract_path)
+    if extract_path.exists():
+        _check_extract_path(extract_path)
+    else:
+        extract_path.mkdir(parents=True, exist_ok=True)
diff --git a/src/nplinker/genomics/antismash/genome_accession_resolver.py b/src/nplinker/genomics/antismash/genome_accession_resolver.py
new file mode 100644
index 00000000..e30433c3
--- /dev/null
+++ b/src/nplinker/genomics/antismash/genome_accession_resolver.py
@@ -0,0 +1,230 @@
+import logging
+import re
+from typing import Any
+from typing import Callable
+from typing import Literal
+from typing import Mapping
+import httpx
+from bs4 import BeautifulSoup
+
+
+JGI_GENOME_LOOKUP_URL = (
+    "https://img.jgi.doe.gov/cgi-bin/m/main.cgi?section=TaxonDetail&page=taxonDetail&taxon_oid={}"
+)
+USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0"
+
+logger = logging.getLogger(__name__)
+
+
+def get_latest_assembly_accession(acc: str) -> str:
+    """Retrieve the latest NCBI genome assembly accession for a given accession.
+
+    This function retrieves the most recent genome assembly accession
+    from the revision history of the provided accession. It prioritizes
+    RefSeq accessions, and if unavailable, falls back to GenBank accessions.
+
+    Args:
+        acc (str): The accession identifier to resolve.
+
+    Returns:
+        str: The latest valid genome assembly accession.
+
+    Raises:
+        ValueError: If no valid RefSeq or GenBank accession is found in
+            the assembly revision history.
+    """
+    revision_history = _get_revision_history(acc)
+
+    acc_priority = ("refseq_accession", "genbank_accession")
+    for acc_type in acc_priority:
+        assembly_revisions = revision_history.get("assembly_revisions", [])
+        revisions_with_acc = [entry for entry in assembly_revisions if acc_type in entry]
+        if revisions_with_acc:
+            latest_revision = max(revisions_with_acc, key=lambda x: x["release_date"])
+            return str(latest_revision[acc_type])
+
+    raise ValueError("No valid genome accession found in assembly revision history")
+
+
+def resolve_genome_accession(genome_id_data: Mapping[Any, Any]) -> str:
+    """Gets the NCBI genome assembly accession.
+
+    Gets the latest RefSeq genome assembly accession, or if not available,
+    the latest GenBank genome assembly accession.
+
+    This function gets the latest RefSeq genome assembly accession, or if not
+    available, the latest GenBank genome assembly accession. It attempts to get
+    a genome accession by checking the genome id date for specific ID types in
+    the following order:
+    1. RefSeq_accession
+    2. GenBank_accession
+    3. JGI_Genome_ID
+
+    For each ID type, it uses a corresponding resolver function to process
+    the ID. If a resolver fails, a warning is logged, and the function
+    proceeds to the next ID type. If no valid genome assembly accession can be
+    retrieved, a RuntimeError is raised.
+
+    Args:
+        genome_id_data (dict): A dictionary containing genome ID types as keys
+            and their corresponding values.
+
+    Returns:
+        str: The retrieved genome accession, prioritizing RefSeq if available,
+            otherwise GenBank.
+
+    Raises:
+        RuntimeError: If no valid assembly accessions can be retrieved.
+
+    Logs:
+        Warning messages if a resolver fails for a specific ID type.
+    """
+    resolver_priority = ["RefSeq_accession", "GenBank_accession", "JGI_Genome_ID"]
+    resolvers: dict[str, Callable] = {
+        "RefSeq_accession": _resolve_refseq,
+        "GenBank_accession": _resolve_genbank,
+        "JGI_Genome_ID": _resolve_jgi,
+    }
+
+    for id_type in resolver_priority:
+        if id_type not in genome_id_data:
+            continue
+
+        resolver = resolvers[id_type]
+        try:
+            genome_id = genome_id_data[id_type].strip()
+            return str(resolver(genome_id))
+        except Exception as e:
+            logger.warning(f"Failed to resolve {id_type}: {e}")
+
+    raise RuntimeError("No valid assembly accessions found")
+
+
+def validate_assembly_acc(acc: str, acc_type: Literal["RefSeq", "GenBank"]) -> None:
+    """Validates a NCBI genome assembly accession string based on its type.
+
+    This function checks if the provided genome assembly accession string
+    adheres to the expected format for the specified accession type
+    ('RefSeq' or 'GenBank'). It ensures that the accession starts with the
+    correct prefix ('GCF_' for RefSeq or 'GCA_' for GenBank) and contains
+    a version number.
+
+    Args:
+        acc (str): The genome assembly accession string to validate.
+        acc_type (Literal["RefSeq", "GenBank"]): The type of accession, either
+            'RefSeq' or 'GenBank'.
+
+    Raises:
+        ValueError: If the accession type is invalid.
+        ValueError: If the accession does not start with the expected prefix.
+        ValueError: If the accession is missing a version number.
+    """
+    if acc_type == "RefSeq":
+        prefix = "GCF_"
+    elif acc_type == "GenBank":
+        prefix = "GCA_"
+    else:
+        raise ValueError(
+            f"Invalid genome assembly accession type '{acc_type}'. Expected 'RefSeq' or 'GenBank'."
+        )
+    if not acc.startswith(prefix):
+        raise ValueError(f"Invalid {acc_type} assembly accession (must start with {prefix}): {acc}")
+    if "." not in acc:
+        raise ValueError(f"Invalid assembly accession (missing version number): {acc}")
+
+
+def _get_revision_history(assembly_acc: str) -> dict[str, Any]:
+    """Fetches the revision history of a genome assembly from the NCBI Datasets API.
+
+    Args:
+        assembly_acc (str): The accession number of the genome assembly.
+
+    Returns:
+        dict[str, Any]: A dictionary containing the revision history of the specified assembly.
+
+    Raises:
+        httpx.HTTPStatusError: If the HTTP request fails or returns a non-success status code.
+        ValueError: If no revision history data is found for the given accession number.
+    """
+    url = (
+        f"https://api.ncbi.nlm.nih.gov/datasets/v2/genome/accession/{assembly_acc}/revision_history"
+    )
+    resp = httpx.get(url, headers={"User-Agent": USER_AGENT}, timeout=10.0, follow_redirects=True)
+    resp.raise_for_status()
+    revision_history = resp.json()
+    if not revision_history:
+        raise ValueError(f"No Assembly Revision data found for {assembly_acc}")
+    if not isinstance(revision_history, dict):
+        raise ValueError(f"Unexpected response format: {type(revision_history)}")
+    return revision_history
+
+
+def _resolve_refseq(acc: str) -> str:
+    """Retrieves the latest NCBI genome assembly accession for a given RefSeq accession.
+
+    This function validates the provided RefSeq accession and retrieves the
+    most up-to-date RefSeq genome assembly accession, or if not found,
+    alternatively the most up-to-date RefSeq GenBank genome assembly accession.
+
+    Args:
+        acc (str): The RefSeq accession to resolve.
+
+    Returns:
+        str: The latest NCBI assembly accession corresponding to the given RefSeq accession.
+
+    Raises:
+        ValueError: If the provided accession is invalid or not a RefSeq accession.
+    """
+    validate_assembly_acc(acc, "RefSeq")
+    return get_latest_assembly_accession(acc)
+
+
+def _resolve_genbank(acc: str) -> str:
+    """Retrieves the latest NCBI genome assembly accession for a given GenBank accession.
+
+    This function validates the provided GenBank accession and retrieves
+    most up-to-date RefSeq genome assembly accession, or if not found,
+    alternatively the most up-to-date RefSeq GenBank genome assembly accession.
+
+    Args:
+        acc (str): The GenBank accession to resolve.
+
+    Returns:
+        str: The latest NCBI assembly accession corresponding to the given GenBank accession.
+
+    Raises:
+        ValueError: If the provided accession is invalid or not a RefSeq accession.
+    """
+    validate_assembly_acc(acc, "GenBank")
+    return get_latest_assembly_accession(acc)
+
+
+def _resolve_jgi(jgi_genome_id: str) -> str:
+    """Resolves a JGI Genome ID to its corresponding NCBI assembly accession.
+
+    This function queries a predefined JGI genome lookup URL using the provided
+    JGI Genome ID. It parses the HTML response to extract the NCBI assembly
+    accession link. If no valid link is found, an exception is raised. The
+    function then retrieves the most up-to-date NCBI assembly accession for the
+    found assembly accession.
+
+    Args:
+        jgi_genome_id (str): The JGI Genome ID to resolve.
+
+    Returns:
+        str: The latest NCBI assembly accession corresponding to the given JGI Genome ID.
+
+    Raises:
+        ValueError: If no NCBI accessions can be found for the given JGI Genome ID.
+        httpx.HTTPStatusError: If the HTTP request to the JGI genome lookup URL fails.
+    """
+    url = JGI_GENOME_LOOKUP_URL.format(jgi_genome_id)
+    resp = httpx.get(url, headers={"User-Agent": USER_AGENT}, timeout=10.0, follow_redirects=True)
+    resp.raise_for_status()
+
+    soup = BeautifulSoup(resp.content, "html.parser")
+    link = soup.find("a", href=re.compile("https://www.ncbi.nlm.nih.gov/datasets/genome/.*"))
+    if not link:
+        raise ValueError(f"Unable to find NCBI accessions for the JGI Genome ID: {jgi_genome_id}. ")
+    assembly_acc = link.text
+    return get_latest_assembly_accession(assembly_acc)
diff --git a/src/nplinker/genomics/antismash/ncbi_downloader.py b/src/nplinker/genomics/antismash/ncbi_downloader.py
new file mode 100644
index 00000000..fe016fd5
--- /dev/null
+++ b/src/nplinker/genomics/antismash/ncbi_downloader.py
@@ -0,0 +1,133 @@
+from __future__ import annotations
+import logging
+import os
+import shutil
+import time
+from os import PathLike
+from pathlib import Path
+import httpx
+import requests
+from nplinker.utils import check_md5
+from nplinker.utils import download_url
+from nplinker.utils import extract_archive
+
+
+logger = logging.getLogger(__name__)
+
+
+def download_and_extract_ncbi_genome(
+    genome_assembly_acc: str,
+    download_root: str | PathLike,
+    extract_root: str | PathLike,
+    max_attempts: int = 10,
+) -> Path:
+    """Downloads and extracts an NCBI dataset for a given genome assembly accession.
+
+    This function retrieves a dataset from the NCBI database using the provided
+    genome assembly accession. It retries the download process up to a specified maximum number
+    of attempts in case of errors. The function verifies the integrity of the
+    downloaded files using MD5 checksums, extracts the dataset, and renames the
+    GenBank file for easier access. Unnecessary files are removed after successful
+    processing.
+
+    Args:
+        genome_assembly_acc (str): The NCBI accession of the genome assembly to be downloaded.
+        download_root (str | PathLike): The directory where the dataset will be downloaded.
+        extract_root (str | PathLike): The directory where the dataset will be extracted.
+        max_attempts (int): The maximum number of download attempts. Defaults to 10.
+
+    Returns:
+        Path: The path to the extracted GenBank file.
+
+    Raises:
+        RuntimeError: If the maximum number of retries is reached and the dataset
+            could not be successfully downloaded and extracted.
+    """
+    extract_path = Path(extract_root) / "ncbi_genomes"
+    extract_path.mkdir(parents=True, exist_ok=True)
+
+    _check_genome_accession_validity(genome_assembly_acc)
+    archive = _download_genome(genome_assembly_acc, download_root, max_attempts)
+    extract_archive(archive, extract_path)
+    _verify_ncbi_dataset_md5_sums(extract_path)
+
+    # Move and rename GenBank file
+    genbank_path = extract_path / "ncbi_dataset" / "data" / genome_assembly_acc / "genomic.gbff"
+    new_genbank_path = extract_path / f"{genome_assembly_acc}.gbff"
+    genbank_path.rename(new_genbank_path)
+
+    # Delete unnecessary files
+    shutil.rmtree(extract_path / "ncbi_dataset")
+    os.remove(extract_path / "md5sum.txt")
+    os.remove(extract_path / "README.md")
+
+    return new_genbank_path
+
+
+def _check_genome_accession_validity(genome_assembly_acc, max_attempts=10):
+    """Check the validity of genome accessio."""
+    url = f"https://api.ncbi.nlm.nih.gov/datasets/v2/genome/accession/{genome_assembly_acc}/check"
+
+    # Retry multiple times because NCBI has currently issues (500 Internal Server Error)
+    for attempt in range(1, max_attempts + 1):
+        try:
+            response = requests.get(url)
+            response.raise_for_status()
+            break
+        except Exception:
+            if attempt < max_attempts:
+                time.sleep(1)
+
+    # Raise if no attempt was successful
+    response.raise_for_status()
+    # Raise if genome assembly is not successful
+    if "valid_assemblies" not in response.json():
+        raise ValueError(f"Not a valid genome assembly accession: {genome_assembly_acc}")
+
+
+def _download_genome(genome_assembly_acc, download_root, max_attempts):
+    url = (
+        "https://api.ncbi.nlm.nih.gov/datasets/v2/genome/accession/"
+        f"{genome_assembly_acc}/download?include_annotation_type=GENOME_GB"
+    )
+    download_root = Path(download_root)
+    filename = f"ncbi_{genome_assembly_acc}.zip"
+
+    # Retry multiple times because NCBI has issues currently
+    for attempt in range(1, max_attempts + 1):
+        try:
+            download_url(url, download_root, filename)
+            return download_root / filename
+        except httpx.ReadTimeout as e:
+            logger.warning(f"Attempt {attempt}/{max_attempts} failed to download {url}. Error: {e}")
+            if attempt < max_attempts:
+                time.sleep(1)
+    else:
+        raise httpx.ReadTimeout(
+            f"Failed to download the genome {genome_assembly_acc} from NCBI. "
+            f"Maximum download retries ({max_attempts}) reached for {url}."
+        )
+
+
+def _verify_ncbi_dataset_md5_sums(extract_path: str | PathLike) -> None:
+    """Verify the integrity of files in a specified directory using MD5 checksums.
+
+    This function reads an "md5sum.txt" file located in the given extraction path,
+    which contains MD5 checksums and corresponding file names. It then computes
+    the MD5 checksum for each file and compares it with the expected value. If any
+    file's checksum does not match, a `ValueError` is raised.
+
+    Args:
+        extract_path (PathLike): Path to the directory containing the files and
+            the "md5sum.txt" file.
+
+    Raises:
+        ValueError: If the MD5 checksum of any file does not match the expected value.
+    """
+    extract_path = Path(extract_path)
+    with open(extract_path / "md5sum.txt", "r") as f:
+        for line in f:
+            md5sum, file_name = line.strip().split()
+            file_path = extract_path / file_name
+            if not check_md5(file_path, md5sum):
+                raise ValueError(f"MD5 checksum mismatch for {file_path}")
diff --git a/src/nplinker/genomics/antismash/podp_antismash_downloader.py b/src/nplinker/genomics/antismash/podp_antismash_downloader.py
index 84e3cee4..b630b963 100644
--- a/src/nplinker/genomics/antismash/podp_antismash_downloader.py
+++ b/src/nplinker/genomics/antismash/podp_antismash_downloader.py
@@ -1,17 +1,21 @@
 from __future__ import annotations
 import json
 import logging
-import re
+import time
 import warnings
 from collections.abc import Mapping
 from collections.abc import Sequence
 from os import PathLike
 from pathlib import Path
-import httpx
-from bs4 import BeautifulSoup
 from jsonschema import validate
 from nplinker.defaults import GENOME_STATUS_FILENAME
-from nplinker.genomics.antismash import download_and_extract_antismash_data
+from nplinker.genomics.antismash import antismash_job_is_done
+from nplinker.genomics.antismash import download_and_extract_from_antismash_api
+from nplinker.genomics.antismash import download_and_extract_from_antismash_db
+from nplinker.genomics.antismash import download_and_extract_ncbi_genome
+from nplinker.genomics.antismash import extract_antismash_data
+from nplinker.genomics.antismash import resolve_genome_accession
+from nplinker.genomics.antismash import submit_antismash_job
 from nplinker.schemas import GENOME_STATUS_SCHEMA
 
 
@@ -33,24 +37,23 @@ class GenomeStatus:
     def __init__(
         self,
         original_id: str,
-        resolved_refseq_id: str = "",
-        resolve_attempted: bool = False,
+        resolved_id: str = "",
+        failed_previously: bool = False,
         bgc_path: str = "",
     ):
         """Initialize a GenomeStatus object for the given genome.
 
         Args:
             original_id: The original ID of the genome.
-            resolved_refseq_id: The resolved RefSeq ID of the
-                genome. Defaults to "".
-            resolve_attempted: A flag indicating whether an
-                attempt to resolve the RefSeq ID has been made. Defaults to False.
+            resolved_id: The resolved genome ID of the genome. Defaults to "".
+            failed_previously: Indicates whether a previous attempt to get BGC data
+                for the genome has failed. Defaults to False.
             bgc_path: The path to the downloaded BGC file for
                 the genome. Defaults to "".
         """
         self.original_id = original_id
-        self.resolved_refseq_id = "" if resolved_refseq_id == "None" else resolved_refseq_id
-        self.resolve_attempted = resolve_attempted
+        self.resolved_id = "" if resolved_id == "None" else resolved_id
+        self.failed_previously = failed_previously
         self.bgc_path = bgc_path
 
     @staticmethod
@@ -114,8 +117,8 @@ def _to_dict(self) -> dict:
         """Convert the GenomeStatus object to a dict."""
         return {
             "original_id": self.original_id,
-            "resolved_refseq_id": self.resolved_refseq_id,
-            "resolve_attempted": self.resolve_attempted,
+            "resolved_id": self.resolved_id,
+            "failed_previously": self.failed_previously,
             "bgc_path": self.bgc_path,
         }
 
@@ -153,59 +156,110 @@ def podp_download_and_extract_antismash_data(
     gs_dict = GenomeStatus.read_json(gs_file)
 
     for i, genome_record in enumerate(genome_records):
-        # get the best available ID from the dict
-        genome_id_data = genome_record["genome_ID"]
-        raw_genome_id = get_best_available_genome_id(genome_id_data)
-        if raw_genome_id is None or len(raw_genome_id) == 0:
-            logger.warning(f'Invalid input genome record "{genome_record}"')
-            continue
-
-        # check if genome ID exist in the genome status file
-        if raw_genome_id not in gs_dict:
-            gs_dict[raw_genome_id] = GenomeStatus(raw_genome_id)
-
-        gs_obj = gs_dict[raw_genome_id]
-
         logger.info(
-            f"Checking for antismash data {i + 1}/{len(genome_records)}, "
-            f"current genome ID={raw_genome_id}"
+            f"Getting antiSMASH BGC data for genome record {i + 1} of {len(genome_records)}."
         )
-        # first, check if BGC data is downloaded
-        if gs_obj.bgc_path and Path(gs_obj.bgc_path).exists():
-            logger.info(f"Genome ID {raw_genome_id} already downloaded to {gs_obj.bgc_path}")
+
+        # get the best available genome ID from the dict
+        original_genome_id = get_best_available_genome_id(genome_record["genome_ID"])
+        if not original_genome_id:
+            logger.warning(f"Skipping invalid genome record: {genome_record}")
             continue
-        # second, check if lookup attempted previously
-        if gs_obj.resolve_attempted:
-            logger.info(f"Genome ID {raw_genome_id} skipped due to previous failed attempt")
+        # Retrieve or initialize the GenomeStatus object for the genome ID
+        gs = gs_dict.setdefault(original_genome_id, GenomeStatus(original_genome_id))
+
+        # Check if genomes already have antiSMASH BGC data
+        if gs.bgc_path and Path(gs.bgc_path).exists():
+            logger.info(
+                f"antiSMASH BGC data for genome ID {gs.original_id} already downloaded to "
+                f"{gs.bgc_path}"
+            )
+            try:
+                process_existing_antismash_data(gs, project_extract_root)
+                continue
+            except Exception as e:
+                logger.warning(
+                    "Failed to process existing antiSMASH BGC data for genome ID "
+                    f"{gs.original_id}. Error: {e}"
+                )
+        gs.bgc_path = ""  # Reset bgc path
+
+        # Check if a previous attempt to get bgc data has failed
+        if gs.failed_previously:
+            logger.info(f"Genome ID {gs.original_id} skipped due to previous failed attempt")
+            GenomeStatus.to_json(gs_dict, gs_file)
             continue
 
-        # if not downloaded or lookup attempted, then try to resolve the ID
-        # and download
-        logger.info(f"Start lookup process for genome ID {raw_genome_id}")
-        gs_obj.resolved_refseq_id = _resolve_refseq_id(genome_id_data)
-        gs_obj.resolve_attempted = True
-
-        if gs_obj.resolved_refseq_id == "":
-            # give up on this one
-            logger.warning(f"Failed lookup for genome ID {raw_genome_id}")
+        # resolve genome ID
+        try:
+            gs.resolved_id = resolve_genome_accession(genome_record["genome_ID"])
+        except Exception as e:
+            logger.warning(f"Failed to resolve genome ID {gs.original_id}. Error: {e}")
+            gs.failed_previously = True
+            GenomeStatus.to_json(gs_dict, gs_file)
             continue
 
-        # if resolved id is valid, try to download and extract antismash data
+        # check if the antiSMASH BGC data is already in the downloads
+        bgc_path = Path(project_download_root, f"{gs.resolved_id}.zip").absolute()
+        if bgc_path.exists():
+            logger.info(
+                f"antiSMASH BGC data for genome ID {gs.original_id} already downloaded to "
+                f"{bgc_path}"
+            )
+            try:
+                gs.bgc_path = str(bgc_path)
+                process_existing_antismash_data(gs, project_extract_root)
+                GenomeStatus.to_json(gs_dict, gs_file)
+                continue
+            except Exception as e:
+                logger.warning(
+                    "Failed to process existing antiSMASH BGC data for genome ID "
+                    f"{gs.original_id}. Error: {e}"
+                )
+                gs.bgc_path = ""
+
+        # retrieve antiSMASH BGC data from antiSMASH-DB
         try:
-            download_and_extract_antismash_data(
-                gs_obj.resolved_refseq_id, project_download_root, project_extract_root
+            retrieve_antismash_db_data(gs, project_download_root, project_extract_root)
+            logger.info(
+                f"antiSMASH BGC data for genome ID {gs.original_id} is downloaded and extracted"
             )
-
-            gs_obj.bgc_path = str(
-                Path(project_download_root, gs_obj.resolved_refseq_id + ".zip").absolute()
+            GenomeStatus.to_json(gs_dict, gs_file)
+            continue
+        except Exception as e:
+            logger.info(
+                f"Unable to retrieve BGC data from antiSMASH-DB for genome ID {gs.original_id}. "
+                f"Error: {e}"
             )
 
-            output_path = Path(project_extract_root, "antismash", gs_obj.resolved_refseq_id)
-            if output_path.exists():
-                Path.touch(output_path / "completed", exist_ok=True)
+        # retrieve antismash BGC by submitting antismash job via API
+        try:
+            logger.info(
+                "Downloading genome assembly from NCBI and submitting antiSMASH job for "
+                f"genome ID {gs.original_id}."
+            )
+            genome_path = download_and_extract_ncbi_genome(
+                gs.resolved_id, project_download_root, project_extract_root
+            )
+            job_id = submit_antismash_job(genome_path)
+            logger.info(f"Waiting for antiSMASH job {job_id} to complete.")
+            while antismash_job_is_done(job_id) is False:
+                time.sleep(15)
+            retrieve_antismash_job_data(job_id, gs, project_download_root, project_extract_root)
+            logger.info(
+                f"antiSMASH BGC data for genome ID {gs.original_id} is downloaded and extracted"
+            )
+            GenomeStatus.to_json(gs_dict, gs_file)
+            continue
+        except Exception as e:
+            logger.info(
+                f"Unable to retrieve BGC data via antiSMASH API for genome ID {gs.original_id}. "
+                f"Error: {e}"
+            )
 
-        except Exception:
-            gs_obj.bgc_path = ""
+        logger.warning(f"Failed to retrieve BGC data for genome ID {gs.original_id}.")
+        gs.failed_previously = True
+        GenomeStatus.to_json(gs_dict, gs_file)
 
     # raise and log warning for failed downloads
     failed_ids = [gs.original_id for gs in gs_dict.values() if not gs.bgc_path]
@@ -216,9 +270,6 @@ def podp_download_and_extract_antismash_data(
         logger.warning(warning_message)
         warnings.warn(warning_message, UserWarning)
 
-    # save updated genome status to json file
-    GenomeStatus.to_json(gs_dict, gs_file)
-
     if len(failed_ids) == len(genome_records):
         raise ValueError("No antiSMASH data found for any genome")
 
@@ -247,122 +298,100 @@ def get_best_available_genome_id(genome_id_data: Mapping[str, str]) -> str | Non
     return best_id
 
 
-def _resolve_genbank_accession(genbank_id: str) -> str:
-    """Try to get RefSeq assembly id through given GenBank assembly id.
+def process_existing_antismash_data(gs_obj: GenomeStatus, extract_root: str | PathLike) -> None:
+    """Processes already downloaded antiSMASH BGC data archive.
 
-    Note that GenBank assembly accession starts with "GCA_" and RefSeq assembly
-    accession starts with "GCF_". For more info, see
-    https://www.ncbi.nlm.nih.gov/datasets/docs/v2/troubleshooting/faq
+    This function ensures that the antiSMASH data archive associated with a given genomic sequence
+    object is properly extracted into a specified directory. If the data has already been extracted,
+    the function skips the extraction process.
 
     Args:
-        genbank_id: ID for GenBank assembly accession.
+        gs_obj: An object representing a genomic sequence, which contains the path
+                to the antiSMASH BGC data (accessible via `gs_obj.bgc_path`) and
+                an original identifier (`gs_obj.original_id`).
+        extract_root: The root directory where the antiSMASH data should be extracted.
 
     Raises:
-        httpx.ReadTimeout: If the request times out.
-
-    Returns:
-        RefSeq assembly ID if the search is successful, otherwise an empty string.
+        Any exceptions raised by the `extract_antismash_data` function if the extraction fails.
     """
-    logger.info(
-        f"Attempting to resolve Genbank assembly accession {genbank_id} to RefSeq accession"
-    )
-    # NCBI Datasets API https://www.ncbi.nlm.nih.gov/datasets/docs/v2/api/
-    # Note that there is a API rate limit of 5 requests per second without using an API key
-    # For more info, see https://www.ncbi.nlm.nih.gov/datasets/docs/v2/troubleshooting/faq/
-
-    # API for getting revision history of a genome assembly
-    # For schema, see https://www.ncbi.nlm.nih.gov/datasets/docs/v2/api/rest-api/#get-/genome/accession/-accession-/revision_history
-    url = f"https://api.ncbi.nlm.nih.gov/datasets/v2/genome/accession/{genbank_id}/revision_history"
-
-    refseq_id = ""
-    try:
-        resp = httpx.get(
-            url, headers={"User-Agent": USER_AGENT}, timeout=10.0, follow_redirects=True
-        )
-        resp.raise_for_status()
-
-        data = resp.json()
-        if not data:
-            raise ValueError("No Assembly Revision data found")
-
-        assembly_entries = [
-            entry for entry in data["assembly_revisions"] if "refseq_accession" in entry
-        ]
-        if not assembly_entries:
-            raise ValueError("No RefSeq assembly accession found")
-
-        latest_entry = max(assembly_entries, key=lambda x: x["release_date"])
-        refseq_id = latest_entry["refseq_accession"]
-
-    except httpx.RequestError as exc:
-        logger.warning(f"An error occurred while requesting {exc.request.url!r}: {exc}")
-    except httpx.HTTPStatusError as exc:
-        logger.warning(
-            f"Error response {exc.response.status_code} while requesting {exc.request.url!r}"
+    antismash_id = Path(gs_obj.bgc_path).stem
+    extract_path = Path(extract_root, "antismash", antismash_id)
+    completed_marker = extract_path / "completed"
+
+    # Check if archive is already successfully extracted
+    if completed_marker.exists():
+        logger.info(
+            f"antiSMASH BGC data for {gs_obj.original_id} already extracted at {extract_path}."
         )
-    except httpx.ReadTimeout:
-        logger.warning("Timed out waiting for result of GenBank assembly lookup")
-    except ValueError as exc:
-        logger.warning(f"Error while resolving GenBank assembly accession {genbank_id}: {exc}")
+        return
 
-    return refseq_id
+    extract_antismash_data(gs_obj.bgc_path, extract_root, antismash_id)
+    completed_marker.touch(exist_ok=True)
 
 
-def _resolve_jgi_accession(jgi_id: str) -> str:
-    """Try to get RefSeq id through given JGI id.
+def retrieve_antismash_db_data(
+    genome_status: GenomeStatus, download_root: str | PathLike, extract_root: str | PathLike
+) -> None:
+    """Retrieve antiSMASH database data for a given genome and update its status.
+
+    This function downloads and extracts antiSMASH data for a genome identified
+    by its resolved genome ID. It updates the `genome_status` object with the
+    path to the downloaded data or sets it to an empty string if an error occurs.
 
     Args:
-        jgi_id: JGI_Genome_ID for GenBank accession.
+        genome_status (GenomeStatus): An object representing the genome's status,
+            including its resolved genome ID and BGC path.
+        download_root (str | PathLike): The root directory where the antiSMASH
+            data will be downloaded.
+        extract_root (str | PathLike): The root directory where the antiSMASH
+            data will be extracted.
 
-    Returns:
-        RefSeq ID if search is successful, otherwise an empty string.
+    Raises:
+        Exception: If an error occurs during the download or extraction process.
     """
-    url = JGI_GENOME_LOOKUP_URL.format(jgi_id)
-    logger.info(f"Attempting to resolve JGI_Genome_ID {jgi_id} to GenBank accession via {url}")
-    # no User-Agent header produces a 403 Forbidden error on this site...
-    try:
-        resp = httpx.get(
-            url, headers={"User-Agent": USER_AGENT}, timeout=10.0, follow_redirects=True
+    if not genome_status.resolved_id.startswith("GCF_"):
+        raise ValueError(
+            f"Resolved genome ID '{genome_status.resolved_id}' is not a valid RefSeq assembly and "
+            "antiSMASH-DB only contains results for RefSeq assemblies."
         )
-    except httpx.ReadTimeout:
-        logger.warning("Timed out waiting for result of JGI_Genome_ID lookup")
-        return ""
-
-    soup = BeautifulSoup(resp.content, "html.parser")
-    # Find the table entry giving the "NCBI Assembly Accession" ID
-    link = soup.find("a", href=re.compile("https://www.ncbi.nlm.nih.gov/datasets/genome/.*"))
-    if link is None:
-        return ""
-
-    assembly_id = link.text
-    # check if the assembly ID is already a RefSeq ID
-    if assembly_id.startswith("GCF_"):
-        return assembly_id  # type: ignore
-    else:
-        return _resolve_genbank_accession(assembly_id)
+
+    antismash_id = genome_status.resolved_id
+    extract_path = Path(extract_root, "antismash", antismash_id)
+    download_path = Path(download_root, f"{antismash_id}.zip").absolute()
+
+    download_and_extract_from_antismash_db(antismash_id, download_root, extract_root)
+    Path.touch(extract_path / "completed", exist_ok=True)
+    genome_status.bgc_path = str(download_path)
 
 
-def _resolve_refseq_id(genome_id_data: Mapping[str, str]) -> str:
-    """Get the RefSeq ID to which the genome accession is linked.
+def retrieve_antismash_job_data(
+    job_id: str,
+    genome_status: GenomeStatus,
+    download_root: str | PathLike,
+    extract_root: str | PathLike,
+) -> None:
+    """Retrieve antiSMASH API data for a given genome and update its status.
 
-    Check https://pairedomicsdata.bioinformatics.nl/schema.json.
+    This function downloads and extracts antiSMASH data for a genome identified
+    by its resolved genome ID. It updates the `genome_status` object with the
+    path to the downloaded data or sets it to an empty string if an error occurs.
 
     Args:
-        genome_id_data: dictionary containing information
-        for each genome record present.
+        job_id (str): The job ID for the antiSMASH API job.
+        genome_status (GenomeStatus): An object representing the genome's status,
+            including its resolved genome ID and BGC path.
+        download_root (str | PathLike): The root directory where the antiSMASH
+            data will be downloaded.
+        extract_root (str | PathLike): The root directory where the antiSMASH
+            data will be extracted.
 
-    Returns:
-        RefSeq ID if present, otherwise an empty string.
+    Raises:
+        Exception: If an error occurs during the download or extraction process.
     """
-    if "RefSeq_accession" in genome_id_data:
-        # best case, can use this directly
-        return genome_id_data["RefSeq_accession"]
-    if "GenBank_accession" in genome_id_data:
-        # resolve via NCBI
-        return _resolve_genbank_accession(genome_id_data["GenBank_accession"])
-    if "JGI_Genome_ID" in genome_id_data:
-        # resolve via JGI => NCBI
-        return _resolve_jgi_accession(genome_id_data["JGI_Genome_ID"])
-
-    logger.warning(f"Unable to resolve genome_ID: {genome_id_data}")
-    return ""
+    antismash_id = genome_status.resolved_id
+    extract_path = Path(extract_root, "antismash", antismash_id)
+    download_path = Path(download_root, f"{antismash_id}.zip").absolute()
+
+    download_and_extract_from_antismash_api(job_id, antismash_id, download_root, extract_root)
+    Path.touch(extract_path / "completed", exist_ok=True)
+    genome_status.bgc_path = str(download_path)
diff --git a/src/nplinker/genomics/utils.py b/src/nplinker/genomics/utils.py
index ecf6521b..2b723dff 100644
--- a/src/nplinker/genomics/utils.py
+++ b/src/nplinker/genomics/utils.py
@@ -276,7 +276,7 @@ def extract_mappings_original_genome_id_resolved_genome_id(
             Generate strain mappings JSON file for PODP pipeline.
     """
     gs_mappings_dict = GenomeStatus.read_json(genome_status_json_file)
-    return {gs.original_id: gs.resolved_refseq_id for gs in gs_mappings_dict.values()}
+    return {gs.original_id: gs.resolved_id for gs in gs_mappings_dict.values()}
 
 
 def extract_mappings_resolved_genome_id_bgc_id(
diff --git a/src/nplinker/schemas/genome_status_schema.json b/src/nplinker/schemas/genome_status_schema.json
index 470c74f4..b3dbdef7 100644
--- a/src/nplinker/schemas/genome_status_schema.json
+++ b/src/nplinker/schemas/genome_status_schema.json
@@ -17,8 +17,8 @@
         "type": "object",
         "required": [
           "original_id",
-          "resolved_refseq_id",
-          "resolve_attempted",
+          "resolved_id",
+          "failed_previously",
           "bgc_path"
         ],
         "properties": {
@@ -28,15 +28,15 @@
             "description": "The original ID of the genome",
             "minLength": 1
           },
-          "resolved_refseq_id": {
+          "resolved_id": {
             "type": "string",
-            "title": "Resolved RefSeq ID",
-            "description": "The RefSeq ID that was resolved for this genome"
+            "title": "Resolved genome ID",
+            "description": "The genome ID that was resolved for this genome"
           },
-          "resolve_attempted": {
+          "failed_previously": {
             "type": "boolean",
-            "title": "Resolve Attempted",
-            "description": "Whether or not an attempt was made to resolve this genome"
+            "title": "Failed Attempt",
+            "description": "Indicates if a previous attempt to retrieve BGC data for this genome was unsuccessful"
           },
           "bgc_path": {
             "type": "string",
diff --git a/tests/unit/genomics/test_antismash_downloader.py b/tests/unit/genomics/test_antismash_downloader.py
index 1dfeb4cf..553699ba 100644
--- a/tests/unit/genomics/test_antismash_downloader.py
+++ b/tests/unit/genomics/test_antismash_downloader.py
@@ -1,10 +1,10 @@
 import pytest
-from nplinker.genomics.antismash import download_and_extract_antismash_data
+from nplinker.genomics.antismash import download_and_extract_from_antismash_db
 from nplinker.utils import extract_archive
 from nplinker.utils import list_files
 
 
-class TestDownloadAndExtractAntismashData:
+class TestDownloadAndExtractFromAntismashDb:
     antismash_id = "GCF_004339725.1"
 
     def test_default(self, tmp_path):
@@ -14,7 +14,7 @@ def test_default(self, tmp_path):
         extract_root.mkdir()
         original_extract_root = tmp_path / "original"
         original_extract_root.mkdir()
-        download_and_extract_antismash_data(self.antismash_id, download_root, extract_root)
+        download_and_extract_from_antismash_db(self.antismash_id, download_root, extract_root)
         archive = download_root / "GCF_004339725.1.zip"
         extracted_folder = extract_root / "antismash" / "GCF_004339725.1"
         extracted_files = list_files(extracted_folder, keep_parent=False)
@@ -32,7 +32,9 @@ def test_error_nonempty_path(self, tmp_path):
         nonempty_path = tmp_path / "extracted" / "antismash" / f"{self.antismash_id}" / "subdir"
         nonempty_path.mkdir(parents=True)
         with pytest.raises(ValueError, match="Nonempty directory"):
-            download_and_extract_antismash_data(self.antismash_id, tmp_path, tmp_path / "extracted")
+            download_and_extract_from_antismash_db(
+                self.antismash_id, tmp_path, tmp_path / "extracted"
+            )
 
     # test a non-existent ID, which can be either a fake ID, non-existent in NCBI
     # or a valid NCBI genome ID but it does not have BGC data in antismash database
@@ -44,6 +46,6 @@ def test_nonexisting_id(self, tmp_path):
         extract_root.mkdir()
         for test_id in nonexisting_ids:
             with pytest.raises(RuntimeError):
-                download_and_extract_antismash_data(test_id, download_root, extract_root)
+                download_and_extract_from_antismash_db(test_id, download_root, extract_root)
             extracted_folder = extract_root / "antismash" / test_id
             assert not extracted_folder.exists()
diff --git a/tests/unit/genomics/test_genome_accession_resolver.py b/tests/unit/genomics/test_genome_accession_resolver.py
new file mode 100644
index 00000000..5b76eedc
--- /dev/null
+++ b/tests/unit/genomics/test_genome_accession_resolver.py
@@ -0,0 +1,203 @@
+from unittest.mock import patch
+import pytest
+from nplinker.genomics.antismash.genome_accession_resolver import get_latest_assembly_accession
+from nplinker.genomics.antismash.genome_accession_resolver import resolve_genome_accession
+from nplinker.genomics.antismash.genome_accession_resolver import validate_assembly_acc
+
+
+USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:86.0) Gecko/20100101 Firefox/86.0"
+
+
+@pytest.fixture
+def mock_resolvers():
+    with (
+        patch(
+            "nplinker.genomics.antismash.genome_accession_resolver._resolve_refseq"
+        ) as mock_refseq,
+        patch(
+            "nplinker.genomics.antismash.genome_accession_resolver._resolve_genbank"
+        ) as mock_genbank,
+        patch("nplinker.genomics.antismash.genome_accession_resolver._resolve_jgi") as mock_jgi,
+    ):
+        yield mock_refseq, mock_genbank, mock_jgi
+
+
+# test get_latest_assembly_accession
+
+
+def test_get_latest_assembly_accession_refseq_success():
+    with patch(
+        "nplinker.genomics.antismash.genome_accession_resolver._get_revision_history"
+    ) as mock_get_revision_history:
+        mock_get_revision_history.return_value = {
+            "assembly_revisions": [
+                {"refseq_accession": "GCF_000001.2", "release_date": "2023-01-01"},
+                {"refseq_accession": "GCF_000001.1", "release_date": "2022-01-01"},
+            ]
+        }
+
+        result = get_latest_assembly_accession("GCF_000001.1")
+
+        assert result == "GCF_000001.2"
+        mock_get_revision_history.assert_called_once_with("GCF_000001.1")
+
+
+def test_get_latest_assembly_accession_genbank_success():
+    with patch(
+        "nplinker.genomics.antismash.genome_accession_resolver._get_revision_history"
+    ) as mock_get_revision_history:
+        mock_get_revision_history.return_value = {
+            "assembly_revisions": [
+                {"genbank_accession": "GCA_000002.2", "release_date": "2023-01-01"},
+                {"genbank_accession": "GCA_000002.1", "release_date": "2022-01-01"},
+            ]
+        }
+
+        result = get_latest_assembly_accession("GCA_000002.1")
+
+        assert result == "GCA_000002.2"
+        mock_get_revision_history.assert_called_once_with("GCA_000002.1")
+
+
+def test_get_latest_assembly_accession_refseq_and_genbank():
+    with patch(
+        "nplinker.genomics.antismash.genome_accession_resolver._get_revision_history"
+    ) as mock_get_revision_history:
+        mock_get_revision_history.return_value = {
+            "assembly_revisions": [
+                {"refseq_accession": "GCF_000001.2", "release_date": "2023-01-01"},
+                {"genbank_accession": "GCA_000002.2", "release_date": "2022-01-01"},
+            ]
+        }
+
+        result = get_latest_assembly_accession("GCF_000001.1")
+
+        assert result == "GCF_000001.2"
+        mock_get_revision_history.assert_called_once_with("GCF_000001.1")
+
+
+def test_get_latest_assembly_accession_no_valid_accession():
+    with patch(
+        "nplinker.genomics.antismash.genome_accession_resolver._get_revision_history"
+    ) as mock_get_revision_history:
+        mock_get_revision_history.return_value = {"assembly_revisions": []}
+
+        with pytest.raises(ValueError, match="No valid genome accession found"):
+            get_latest_assembly_accession("GCF_000001.1")
+
+        mock_get_revision_history.assert_called_once_with("GCF_000001.1")
+
+
+# test resolve_for_genome_accession
+
+
+def test_resolve_genome_accession_refseq_success(mock_resolvers):
+    mock_refseq, mock_genbank, mock_jgi = mock_resolvers
+    mock_refseq.return_value = "GCF_000001.2"
+
+    genome_id_data = {"RefSeq_accession": "GCF_000001.1"}
+    result = resolve_genome_accession(genome_id_data)
+
+    assert result == "GCF_000001.2"
+    mock_refseq.assert_called_once_with("GCF_000001.1")
+    mock_genbank.assert_not_called()
+    mock_jgi.assert_not_called()
+
+
+def test_resolve_genome_accession_genbank_success(mock_resolvers):
+    mock_refseq, mock_genbank, mock_jgi = mock_resolvers
+    mock_refseq.side_effect = Exception("RefSeq resolution failed")
+    mock_genbank.return_value = "GCA_000002.2"
+
+    genome_id_data = {
+        "RefSeq_accession": "GCF_000001.1",
+        "GenBank_accession": "GCA_000002.1",
+    }
+    result = resolve_genome_accession(genome_id_data)
+
+    assert result == "GCA_000002.2"
+    mock_refseq.assert_called_once_with("GCF_000001.1")
+    mock_genbank.assert_called_once_with("GCA_000002.1")
+    mock_jgi.assert_not_called()
+
+
+def test_resolve_genome_accession_jgi_success(mock_resolvers):
+    mock_refseq, mock_genbank, mock_jgi = mock_resolvers
+    mock_refseq.side_effect = Exception("RefSeq resolution failed")
+    mock_genbank.side_effect = Exception("GenBank resolution failed")
+    mock_jgi.return_value = "GCF_000001.2"
+
+    genome_id_data = {
+        "RefSeq_accession": "GCF_000001.1",
+        "GenBank_accession": "GCA_000002.1",
+        "JGI_Genome_ID": "12345",
+    }
+    result = resolve_genome_accession(genome_id_data)
+
+    assert result == "GCF_000001.2"
+    mock_refseq.assert_called_once_with("GCF_000001.1")
+    mock_genbank.assert_called_once_with("GCA_000002.1")
+    mock_jgi.assert_called_once_with("12345")
+
+
+def test_resolve_genome_accession_no_valid_accession(mock_resolvers):
+    mock_refseq, mock_genbank, mock_jgi = mock_resolvers
+    mock_refseq.side_effect = Exception("RefSeq resolution failed")
+    mock_genbank.side_effect = Exception("GenBank resolution failed")
+    mock_jgi.side_effect = Exception("JGI resolution failed")
+
+    genome_id_data = {
+        "RefSeq_accession": "GCF_000001.1",
+        "GenBank_accession": "GCA_000002.1",
+        "JGI_Genome_ID": "12345",
+    }
+
+    with pytest.raises(RuntimeError, match="No valid assembly accessions found"):
+        resolve_genome_accession(genome_id_data)
+
+    mock_refseq.assert_called_once_with("GCF_000001.1")
+    mock_genbank.assert_called_once_with("GCA_000002.1")
+    mock_jgi.assert_called_once_with("12345")
+
+
+def test_resolve_genome_accession_missing_keys(mock_resolvers):
+    mock_refseq, mock_genbank, mock_jgi = mock_resolvers
+
+    genome_id_data = {}
+    with pytest.raises(RuntimeError, match="No valid assembly accessions found"):
+        resolve_genome_accession(genome_id_data)
+
+    mock_refseq.assert_not_called()
+    mock_genbank.assert_not_called()
+    mock_jgi.assert_not_called()
+
+
+def test_validate_assembly_acc_valid():
+    # Test valid GenBank accession
+    validate_assembly_acc("GCA_000002.1", "GenBank")
+    # Test valid RefSeq accession
+    validate_assembly_acc("GCF_000001.1", "RefSeq")
+
+
+def test_validate_assembly_acc_invalid_type():
+    # Test invalid accession type
+    with pytest.raises(ValueError, match="Invalid genome assembly accession type 'InvalidType'"):
+        validate_assembly_acc("GCF_000001.1", "InvalidType")
+
+
+def test_validate_assembly_acc_invalid_prefix():
+    # Test invalid prefix for GenBank accession
+    with pytest.raises(ValueError, match="Invalid GenBank assembly accession"):
+        validate_assembly_acc("GCF_000002.1", "GenBank")
+    # Test invalid prefix for RefSeq accession
+    with pytest.raises(ValueError, match="Invalid RefSeq assembly accession"):
+        validate_assembly_acc("GCA_000001.1", "RefSeq")
+
+
+def test_validate_assembly_acc_missing_version():
+    # Test missing version number for GenBank accession
+    with pytest.raises(ValueError, match="Invalid assembly accession \\(missing version number\\)"):
+        validate_assembly_acc("GCA_000002", "GenBank")
+    # Test missing version number for RefSeq accession
+    with pytest.raises(ValueError, match="Invalid assembly accession \\(missing version number\\)"):
+        validate_assembly_acc("GCF_000001", "RefSeq")
diff --git a/tests/unit/genomics/test_ncbi_downloader.py b/tests/unit/genomics/test_ncbi_downloader.py
new file mode 100644
index 00000000..bc1dda48
--- /dev/null
+++ b/tests/unit/genomics/test_ncbi_downloader.py
@@ -0,0 +1,45 @@
+from unittest.mock import patch
+import httpx
+import pytest
+from nplinker.genomics.antismash.ncbi_downloader import download_and_extract_ncbi_genome
+
+
+@pytest.fixture
+def download_root(tmp_path):
+    return tmp_path / "download"
+
+
+@pytest.fixture
+def extract_root(tmp_path):
+    return tmp_path / "extracted"
+
+
+def test_download_and_extract_ncbi_genome_success(download_root, extract_root):
+    assembly_accession = "GCF_000514775.1"
+
+    genome_path = download_and_extract_ncbi_genome(assembly_accession, download_root, extract_root)
+
+    assert genome_path == extract_root / "ncbi_genomes" / f"{assembly_accession}.gbff"
+    assert not (extract_root / "ncbi_genomes" / "md5sum.txt").exists()
+    assert not (extract_root / "ncbi_genomes" / "README.md").exists()
+    assert not (extract_root / "ncbi_genomes" / "ncbi_dataset").exists()
+
+
+def test_download_and_extract_ncbi_genome_max_retries(download_root, extract_root):
+    assembly_accession = "GCF_000514775.1"
+
+    with patch(
+        "nplinker.genomics.antismash.ncbi_downloader.download_url",
+        side_effect=httpx.ReadTimeout("Download failed"),
+    ):
+        with pytest.raises(httpx.ReadTimeout, match="Maximum download retries"):
+            download_and_extract_ncbi_genome(
+                assembly_accession, download_root, extract_root, max_attempts=1
+            )
+
+
+def test_download_and_extract_ncbi_genome_invalid_accession(download_root, extract_root):
+    assembly_accession = "invalid_ref_seq_id"
+
+    with pytest.raises(ValueError, match="Not a valid genome assembly accession"):
+        download_and_extract_ncbi_genome(assembly_accession, download_root, extract_root)
diff --git a/tests/unit/genomics/test_podp_antismash_downloader.py b/tests/unit/genomics/test_podp_antismash_downloader.py
index 7fb9ec0c..6be23f7f 100644
--- a/tests/unit/genomics/test_podp_antismash_downloader.py
+++ b/tests/unit/genomics/test_podp_antismash_downloader.py
@@ -36,7 +36,7 @@ def genome_status_file(download_root):
 )
 def test_genome_status_init(params, expected):
     gs = GenomeStatus(*params)
-    assert [gs.original_id, gs.resolved_refseq_id, gs.resolve_attempted, gs.bgc_path] == expected
+    assert [gs.original_id, gs.resolved_id, gs.failed_previously, gs.bgc_path] == expected
 
 
 def test_genome_status_read_json(tmp_path):
@@ -44,14 +44,14 @@ def test_genome_status_read_json(tmp_path):
         "genome_status": [
             {
                 "original_id": "genome1",
-                "resolved_refseq_id": "refseq1",
-                "resolve_attempted": True,
+                "resolved_id": "refseq1",
+                "failed_previously": True,
                 "bgc_path": "/path/to/bgc1",
             },
             {
                 "original_id": "genome2",
-                "resolved_refseq_id": "",
-                "resolve_attempted": False,
+                "resolved_id": "",
+                "failed_previously": False,
                 "bgc_path": "",
             },
         ],
@@ -64,12 +64,12 @@ def test_genome_status_read_json(tmp_path):
 
     assert len(genome_status_dict) == 2
     assert genome_status_dict["genome1"].original_id == "genome1"
-    assert genome_status_dict["genome1"].resolved_refseq_id == "refseq1"
-    assert genome_status_dict["genome1"].resolve_attempted is True
+    assert genome_status_dict["genome1"].resolved_id == "refseq1"
+    assert genome_status_dict["genome1"].failed_previously is True
     assert genome_status_dict["genome1"].bgc_path == "/path/to/bgc1"
     assert genome_status_dict["genome2"].original_id == "genome2"
-    assert genome_status_dict["genome2"].resolved_refseq_id == ""
-    assert genome_status_dict["genome2"].resolve_attempted is False
+    assert genome_status_dict["genome2"].resolved_id == ""
+    assert genome_status_dict["genome2"].failed_previously is False
     assert genome_status_dict["genome2"].bgc_path == ""
 
 
@@ -86,12 +86,12 @@ def test_genome_status_to_json(tmp_path):
     assert loaded_data["version"] == "1.0"
     assert len(loaded_data["genome_status"]) == 2
     assert loaded_data["genome_status"][0]["original_id"] == "genome1"
-    assert loaded_data["genome_status"][0]["resolved_refseq_id"] == "refseq1"
-    assert loaded_data["genome_status"][0]["resolve_attempted"] is True
+    assert loaded_data["genome_status"][0]["resolved_id"] == "refseq1"
+    assert loaded_data["genome_status"][0]["failed_previously"] is True
     assert loaded_data["genome_status"][0]["bgc_path"] == "/path/to/bgc1"
     assert loaded_data["genome_status"][1]["original_id"] == "genome2"
-    assert loaded_data["genome_status"][1]["resolved_refseq_id"] == ""
-    assert loaded_data["genome_status"][1]["resolve_attempted"] is False
+    assert loaded_data["genome_status"][1]["resolved_id"] == ""
+    assert loaded_data["genome_status"][1]["failed_previously"] is False
     assert loaded_data["genome_status"][1]["bgc_path"] == ""
 
 
@@ -105,10 +105,10 @@ def test_genome_status_to_json_nofile():
     assert isinstance(result, str)
     assert (
         result == '{"genome_status": '
-        '[{"original_id": "genome1", "resolved_refseq_id": "refseq1", '
-        '"resolve_attempted": true, "bgc_path": "/path/to/bgc1"}, '
-        '{"original_id": "genome2", "resolved_refseq_id": "", '
-        '"resolve_attempted": false, "bgc_path": ""}], "version": "1.0"}'
+        '[{"original_id": "genome1", "resolved_id": "refseq1", '
+        '"failed_previously": true, "bgc_path": "/path/to/bgc1"}, '
+        '{"original_id": "genome2", "resolved_id": "", '
+        '"failed_previously": false, "bgc_path": ""}], "version": "1.0"}'
     )
 
 
@@ -215,10 +215,10 @@ def test_caching(download_root, extract_root, genome_status_file, caplog):
     genome_status_old = GenomeStatus.read_json(genome_status_file)
     genome_obj = genome_status_old["GCF_000016425.1"]
     assert Path(genome_obj.bgc_path).exists()
-    assert genome_obj.resolve_attempted
+    assert genome_obj.failed_previously is False
     podp_download_and_extract_antismash_data(genome_records, download_root, extract_root)
     assert (
-        f"Genome ID {genome_obj.original_id} already downloaded to {genome_obj.bgc_path}"
+        f"antiSMASH BGC data for genome ID {genome_obj.original_id} already downloaded to {genome_obj.bgc_path}"
         in caplog.text
     )
     assert (
@@ -271,8 +271,8 @@ def test_refseq_id(download_root, extract_root, genome_status_file):
 
     genome_status = GenomeStatus.read_json(genome_status_file)
     genome_obj = genome_status["GCF_000016425.1"]
-    archive = download_root / Path(str(genome_obj.resolved_refseq_id) + ".zip")
-    extracted_folder = extract_root / "antismash" / genome_obj.resolved_refseq_id
+    archive = download_root / Path(str(genome_obj.resolved_id) + ".zip")
+    extracted_folder = extract_root / "antismash" / genome_obj.resolved_id
     extracted_files = list_files(extracted_folder, keep_parent=False)
 
     assert archive.exists()
@@ -298,8 +298,8 @@ def test_genbank_id(download_root, extract_root, genome_status_file):
 
     genome_status = GenomeStatus.read_json(genome_status_file)
     genome_obj = genome_status["GCA_000016425.1"]
-    archive = download_root / Path(str(genome_obj.resolved_refseq_id) + ".zip")
-    extracted_folder = extract_root / "antismash" / genome_obj.resolved_refseq_id
+    archive = download_root / Path(str(genome_obj.resolved_id) + ".zip")
+    extracted_folder = extract_root / "antismash" / genome_obj.resolved_id
     extracted_files = list_files(extracted_folder, keep_parent=False)
 
     assert archive.exists()
@@ -325,8 +325,8 @@ def test_jgi_id(download_root, extract_root, genome_status_file):
 
     genome_status = GenomeStatus.read_json(genome_status_file)
     genome_obj = genome_status["640427140"]
-    archive = download_root / Path(str(genome_obj.resolved_refseq_id) + ".zip")
-    extracted_folder = extract_root / "antismash" / genome_obj.resolved_refseq_id
+    archive = download_root / Path(str(genome_obj.resolved_id) + ".zip")
+    extracted_folder = extract_root / "antismash" / genome_obj.resolved_id
     extracted_files = list_files(extracted_folder, keep_parent=False)
 
     assert archive.exists()
@@ -357,8 +357,8 @@ def test_refseq_jgi_id(download_root, extract_root, genome_status_file):
 
     genome_status = GenomeStatus.read_json(genome_status_file)
     genome_obj = genome_status["GCF_000016425.1"]
-    archive = download_root / Path(str(genome_obj.resolved_refseq_id) + ".zip")
-    extracted_folder = extract_root / "antismash" / genome_obj.resolved_refseq_id
+    archive = download_root / Path(str(genome_obj.resolved_id) + ".zip")
+    extracted_folder = extract_root / "antismash" / genome_obj.resolved_id
     extracted_files = list_files(extracted_folder, keep_parent=False)
 
     assert archive.exists()
@@ -389,8 +389,8 @@ def test_refseq_genbank_id(download_root, extract_root, genome_status_file):
 
     genome_status = GenomeStatus.read_json(genome_status_file)
     genome_obj = genome_status["GCF_000016425.1"]
-    archive = download_root / Path(str(genome_obj.resolved_refseq_id) + ".zip")
-    extracted_folder = extract_root / "antismash" / genome_obj.resolved_refseq_id
+    archive = download_root / Path(str(genome_obj.resolved_id) + ".zip")
+    extracted_folder = extract_root / "antismash" / genome_obj.resolved_id
     extracted_files = list_files(extracted_folder, keep_parent=False)
 
     assert archive.exists()
@@ -421,8 +421,8 @@ def test_genbank_jgi_id(download_root, extract_root, genome_status_file):
 
     genome_status = GenomeStatus.read_json(genome_status_file)
     genome_obj = genome_status["GCA_000016425.1"]
-    archive = download_root / Path(str(genome_obj.resolved_refseq_id) + ".zip")
-    extracted_folder = extract_root / "antismash" / genome_obj.resolved_refseq_id
+    archive = download_root / Path(str(genome_obj.resolved_id) + ".zip")
+    extracted_folder = extract_root / "antismash" / genome_obj.resolved_id
     extracted_files = list_files(extracted_folder, keep_parent=False)
 
     assert archive.exists()
diff --git a/tests/unit/genomics/test_utils.py b/tests/unit/genomics/test_utils.py
index 8a17aa69..7d68eb66 100644
--- a/tests/unit/genomics/test_utils.py
+++ b/tests/unit/genomics/test_utils.py
@@ -192,20 +192,20 @@ def test_extract_mappings_original_genome_id_resolved_genome_id(tmp_path):
         "genome_status": [
             {
                 "original_id": "id1",
-                "resolved_refseq_id": "refseq1",
-                "resolve_attempted": True,
+                "resolved_id": "refseq1",
+                "failed_previously": True,
                 "bgc_path": "",
             },
             {
                 "original_id": "id2",
-                "resolved_refseq_id": "refseq2",
-                "resolve_attempted": True,
+                "resolved_id": "refseq2",
+                "failed_previously": True,
                 "bgc_path": "",
             },
             {
                 "original_id": "id3",
-                "resolved_refseq_id": "refseq3",
-                "resolve_attempted": True,
+                "resolved_id": "refseq3",
+                "failed_previously": True,
                 "bgc_path": "",
             },
         ],
diff --git a/tests/unit/schemas/test_genome_status_schema.py b/tests/unit/schemas/test_genome_status_schema.py
index de0679a5..ddde0fa9 100644
--- a/tests/unit/schemas/test_genome_status_schema.py
+++ b/tests/unit/schemas/test_genome_status_schema.py
@@ -10,9 +10,7 @@
 data_empty_genome_status = {"genome_status": [], "version": "1.0"}
 
 data_no_original_id = {
-    "genome_status": [
-        {"resolved_refseq_id": "id1_refseq", "resolve_attempted": True, "bgc_path": ""}
-    ],
+    "genome_status": [{"resolved_id": "id1_refseq", "failed_previously": True, "bgc_path": ""}],
     "version": "1.0",
 }
 
@@ -20,8 +18,8 @@
     "genome_status": [
         {
             "original_id": "",
-            "resolved_refseq_id": "id1_refseq",
-            "resolve_attempted": True,
+            "resolved_id": "id1_refseq",
+            "failed_previously": True,
             "bgc_path": "",
         }
     ],
@@ -32,37 +30,37 @@
     "genome_status": [
         {
             "original_id": 1,
-            "resolved_refseq_id": "id1_refseq",
-            "resolve_attempted": True,
+            "resolved_id": "id1_refseq",
+            "failed_previously": True,
             "bgc_path": "",
         }
     ],
     "version": "1.0",
 }
 
-data_no_resolved_refseq_id = {
-    "genome_status": [{"original_id": "id1", "resolve_attempted": True, "bgc_path": ""}],
+data_no_resolved_id = {
+    "genome_status": [{"original_id": "id1", "failed_previously": True, "bgc_path": ""}],
     "version": "1.0",
 }
 
-data_invalid_resolved_refseq_id = {
+data_invalid_resolved_id = {
     "genome_status": [
-        {"original_id": "id1", "resolved_refseq_id": 1, "resolve_attempted": True, "bgc_path": ""}
+        {"original_id": "id1", "resolved_id": 1, "failed_previously": True, "bgc_path": ""}
     ],
     "version": "1.0",
 }
 
-data_no_resolve_attempted = {
-    "genome_status": [{"original_id": "id1", "resolved_refseq_id": "id1_refseq", "bgc_path": ""}],
+data_no_failed_previously = {
+    "genome_status": [{"original_id": "id1", "resolved_id": "id1_refseq", "bgc_path": ""}],
     "version": "1.0",
 }
 
-data_invalid_resolve_attempted = {
+data_invalid_failed_previously = {
     "genome_status": [
         {
             "original_id": "id1",
-            "resolved_refseq_id": "id1_refseq",
-            "resolve_attempted": 1,
+            "resolved_id": "id1_refseq",
+            "failed_previously": 1,
             "bgc_path": "",
         }
     ],
@@ -71,7 +69,7 @@
 
 data_no_bgc_path = {
     "genome_status": [
-        {"original_id": "id1", "resolved_refseq_id": "id1_refseq", "resolve_attempted": True}
+        {"original_id": "id1", "resolved_id": "id1_refseq", "failed_previously": True}
     ],
     "version": "1.0",
 }
@@ -80,8 +78,8 @@
     "genome_status": [
         {
             "original_id": "id1",
-            "resolved_refseq_id": "id1_refseq",
-            "resolve_attempted": True,
+            "resolved_id": "id1_refseq",
+            "failed_previously": True,
             "bgc_path": 1,
         }
     ],
@@ -94,7 +92,7 @@
 
 data_empty_version = {
     "genome_status": [{"strain_id": "strain1", "strain_alias": ["alias1", "alias2"]}],
-    "version": "" "",
+    "version": "",
 }
 
 data_invalid_version = {
@@ -112,10 +110,10 @@
         [data_no_original_id, "'original_id' is a required property"],
         [data_empty_original_id, "'' should be non-empty"],
         [data_invalid_original_id, "1 is not of type 'string'"],
-        [data_no_resolved_refseq_id, "'resolved_refseq_id' is a required property"],
-        [data_invalid_resolved_refseq_id, "1 is not of type 'string'"],
-        [data_no_resolve_attempted, "'resolve_attempted' is a required property"],
-        [data_invalid_resolve_attempted, "1 is not of type 'boolean'"],
+        [data_no_resolved_id, "'resolved_id' is a required property"],
+        [data_invalid_resolved_id, "1 is not of type 'string'"],
+        [data_no_failed_previously, "'failed_previously' is a required property"],
+        [data_invalid_failed_previously, "1 is not of type 'boolean'"],
         [data_no_bgc_path, "'bgc_path' is a required property"],
         [data_invalid_bgc_path, "1 is not of type 'string'"],
         [data_no_version, "'version' is a required property"],
@@ -135,14 +133,14 @@ def test_valid_data():
         "genome_status": [
             {
                 "original_id": "id1",
-                "resolved_refseq_id": "id1_refseq",
-                "resolve_attempted": True,
+                "resolved_id": "id1_refseq",
+                "failed_previously": True,
                 "bgc_path": "",
             },
             {
                 "original_id": "id2",
-                "resolved_refseq_id": "id2_refseq",
-                "resolve_attempted": False,
+                "resolved_id": "id2_refseq",
+                "failed_previously": False,
                 "bgc_path": "",
             },
         ],