diff --git a/libs/community/Makefile b/libs/community/Makefile
index ec82d805c..3b5f88bab 100644
--- a/libs/community/Makefile
+++ b/libs/community/Makefile
@@ -15,7 +15,7 @@ test tests:
 	uv run --group test pytest --disable-socket --allow-unix-socket $(TEST_FILE)
 
 integration_test integration_tests:
-	uv run --group test --group test_integration pytest --retries 3 --retry-delay 1 $(TEST_FILE)
+	uv run --group test --group test_integration pytest -n auto --retries 3 --retry-delay 1 $(TEST_FILE)
 
 # Run unit tests and generate a coverage report.
 coverage:
diff --git a/libs/community/pyproject.toml b/libs/community/pyproject.toml
index d2b533e2a..547b1ee82 100644
--- a/libs/community/pyproject.toml
+++ b/libs/community/pyproject.toml
@@ -53,6 +53,7 @@ featurestore = ["google-cloud-bigquery-storage>=2.6.0,<3.0.0", "pandas>=1.0.0; p
 [dependency-groups]
 test = [
     "pytest>=7.3.0,<8.0.0",
+    "pytest-xdist>=3.8.0,<4.0.0",
     "freezegun>=1.2.2,<2.0.0",
     "pytest-mock>=3.10.0,<4.0.0",
     "syrupy>=4.0.2,<5.0.0",
diff --git a/libs/community/tests/integration_tests/test_check_grounding.py b/libs/community/tests/integration_tests/test_check_grounding.py
index 3bd571eb2..2596d3cc7 100644
--- a/libs/community/tests/integration_tests/test_check_grounding.py
+++ b/libs/community/tests/integration_tests/test_check_grounding.py
@@ -93,14 +93,14 @@ def output_parser(
 
 
 @pytest.mark.extended
-def test_integration_parse(
+async def test_integration_parse(
     output_parser: VertexAICheckGroundingWrapper,
     input_documents: List[Document],
 ) -> None:
     answer_candidate = "Ulm, in the Kingdom of Württemberg in the German Empire"
-    response = output_parser.with_config(
+    response = await output_parser.with_config(
         configurable={"documents": input_documents}
-    ).invoke(answer_candidate)
+    ).ainvoke(answer_candidate)
 
     assert isinstance(response, VertexAICheckGroundingWrapper.CheckGroundingResponse)
     assert response.support_score >= 0 and response.support_score <= 1
diff --git a/libs/community/tests/integration_tests/test_rank.py b/libs/community/tests/integration_tests/test_rank.py
index 01e2148dc..43a544445 100644
--- a/libs/community/tests/integration_tests/test_rank.py
+++ b/libs/community/tests/integration_tests/test_rank.py
@@ -159,20 +159,20 @@ def ranker(
 
 
 @pytest.mark.extended
-def test_compression_retriever(
+async def test_compression_retriever(
     mock_vector_store_retriever: MockVectorStoreRetriever, ranker: VertexAIRank
 ) -> None:
     compression_retriever = CustomRankingRetriever(
         base_retriever=mock_vector_store_retriever, ranker=ranker
     )
     query = "What was the name of einstein's mother? Was she a scientist too?"
-    compressed_docs = compression_retriever.invoke(query)
+    compressed_docs = await compression_retriever.ainvoke(query)
 
     expected_docs = [
         Document(
             page_content=(
-                "Life and career\nChildhood, youth and education\n"
-                "See also: Einstein family\nEinstein in 1882, age\xa03\n"
+                "Life and career\\nChildhood, youth and education\\n"
+                "See also: Einstein family\\nEinstein in 1882, age\xa03\\n"
                 "Albert Einstein was born in Ulm,[19] in the Kingdom of "
                 "Württemberg in the German Empire, on 14 March 1879.[20][21] "
                 "His parents, secular Ashkenazi Jews, were Hermann Einstein, "
@@ -181,7 +181,7 @@ def test_compression_retriever(
                 "Isarvorstadt, where Einstein's father and his uncle Jakob "
                 "founded Elektrotechnische Fabrik J. Einstein & Cie, a "
                 "company that manufactured electrical equipment based on "
-                "direct current.[19]\nAlbert attended a Catholic elementary "
+                "direct current.[19]\\nAlbert attended a Catholic elementary "
                 "school in Munich from the age of five. When he was eight, "
                 "he was transferred to the Luitpold Gymnasium, where he "
                 "received advanced primary and then secondary school "
@@ -195,9 +195,9 @@ def test_compression_retriever(
         ),
         Document(
             page_content=(
-                "Marriages, relationships and children\n"
-                "Albert Einstein and Mileva Marić Einstein, 1912\n"
-                "Albert Einstein and Elsa Einstein, 1930\n"
+                "Marriages, relationships and children\\n"
+                "Albert Einstein and Mileva Marić Einstein, 1912\\n"
+                "Albert Einstein and Elsa Einstein, 1930\\n"
                 "Correspondence between Einstein and Marić, discovered and "
                 "published in 1987, revealed that in early 1902, while "
                 "Marić was visiting her parents in Novi Sad, she gave birth "
@@ -205,7 +205,7 @@ def test_compression_retriever(
                 "it was without the child, whose fate is uncertain. A "
                 "letter of Einstein's that he wrote in September 1903 "
                 "suggests that the girl was either given up for adoption or "
-                "died of scarlet fever in infancy.[45][46]\n"
+                "died of scarlet fever in infancy.[45][46]\\n"
                 "Einstein and Marić married in January 1903. In May 1904, "
                 "their son Hans Albert was born in Bern, Switzerland. Their "
                 "son Eduard was born in Zürich in July 1910. In letters "
@@ -237,7 +237,7 @@ def test_compression_retriever(
                 "by some to be a Russian spy; her husband, the Russian "
                 "sculptor Sergei Konenkov, created the bronze bust of "
                 "Einstein at the Institute for Advanced Study at "
-                "Princeton.[65][66][failed verification]\n"
+                "Princeton.[65][66][failed verification]\\n"
                 "Following an episode of acute mental illness at about the "
                 "age of twenty, Einstein's son Eduard was diagnosed with "
                 "schizophrenia.[67] He spent the remainder of his life "
diff --git a/libs/community/uv.lock b/libs/community/uv.lock
index 1770b1183..5337a0239 100644
--- a/libs/community/uv.lock
+++ b/libs/community/uv.lock
@@ -334,6 +334,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10", size = 16674, upload-time = "2025-05-10T17:42:49.33Z" },
 ]
 
+[[package]]
+name = "execnet"
+version = "2.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/bf/89/780e11f9588d9e7128a3f87788354c7946a9cbb1401ad38a48c4db9a4f07/execnet-2.1.2.tar.gz", hash = "sha256:63d83bfdd9a23e35b9c6a3261412324f964c2ec8dcd8d3c6916ee9373e0befcd", size = 166622, upload-time = "2025-11-12T09:56:37.75Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ab/84/02fc1827e8cdded4aa65baef11296a9bbe595c474f0d6d758af082d849fd/execnet-2.1.2-py3-none-any.whl", hash = "sha256:67fba928dd5a544b783f6056f449e5e3931a5c378b128bc18501f7ea79e296ec", size = 40708, upload-time = "2025-11-12T09:56:36.333Z" },
+]
+
 [[package]]
 name = "freezegun"
 version = "1.5.5"
@@ -1247,6 +1256,7 @@ test = [
     { name = "pytest-retry" },
     { name = "pytest-socket" },
     { name = "pytest-watcher" },
+    { name = "pytest-xdist" },
     { name = "syrupy" },
 ]
 test-integration = [
@@ -1316,6 +1326,7 @@ test = [
     { name = "pytest-retry", specifier = ">=1.7.0,<2.0.0" },
     { name = "pytest-socket", specifier = ">=0.7.0,<1.0.0" },
     { name = "pytest-watcher", specifier = ">=0.3.4,<1.0.0" },
+    { name = "pytest-xdist", specifier = ">=3.8.0,<4.0.0" },
     { name = "syrupy", specifier = ">=4.0.2,<5.0.0" },
 ]
 test-integration = [{ name = "pillow", specifier = ">=10.1.0,<11.0.0" }]
@@ -2259,6 +2270,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5b/3a/c44a76c6bb5e9e896d9707fb1c704a31a0136950dec9514373ced0684d56/pytest_watcher-0.4.3-py3-none-any.whl", hash = "sha256:d59b1e1396f33a65ea4949b713d6884637755d641646960056a90b267c3460f9", size = 11852, upload-time = "2024-08-28T17:37:45.731Z" },
 ]
 
+[[package]]
+name = "pytest-xdist"
+version = "3.8.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "execnet" },
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/78/b4/439b179d1ff526791eb921115fca8e44e596a13efeda518b9d845a619450/pytest_xdist-3.8.0.tar.gz", hash = "sha256:7e578125ec9bc6050861aa93f2d59f1d8d085595d6551c2c90b6f4fad8d3a9f1", size = 88069, upload-time = "2025-07-01T13:30:59.346Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ca/31/d4e37e9e550c2b92a9cbc2e4d0b7420a27224968580b5a447f420847c975/pytest_xdist-3.8.0-py3-none-any.whl", hash = "sha256:202ca578cfeb7370784a8c33d6d05bc6e13b4f25b5053c30a152269fd10f0b88", size = 46396, upload-time = "2025-07-01T13:30:56.632Z" },
+]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"
diff --git a/libs/genai/Makefile b/libs/genai/Makefile
index d17111492..25c9aa472 100644
--- a/libs/genai/Makefile
+++ b/libs/genai/Makefile
@@ -15,7 +15,7 @@ test tests:
 	uv run --group test pytest --disable-socket --allow-unix-socket $(TEST_FILE)
 
 integration_test integration_tests:
-	uv run --group test --group test_integration pytest --retries 3 --retry-delay 1 $(TEST_FILE)
+	uv run --group test --group test_integration pytest -n auto --retries 3 --retry-delay 1 $(TEST_FILE)
 
 check_imports: $(shell find langchain_google_genai -name '*.py')
 	uv run --all-groups python ./scripts/check_imports.py $^
diff --git a/libs/genai/pyproject.toml b/libs/genai/pyproject.toml
index 3b1dc7f57..a38fe247e 100644
--- a/libs/genai/pyproject.toml
+++ b/libs/genai/pyproject.toml
@@ -40,6 +40,7 @@ typing = [
 ]
 test = [
     "pytest>=8.4.0,<9.0.0",
+    "pytest-xdist>=3.8.0,<4.0.0",
     "freezegun>=1.5.0,<2.0.0",
     "pytest-mock>=3.14.0,<4.0.0",
     "syrupy>=4.9.0,<5.0.0",
diff --git a/libs/genai/tests/integration_tests/test_callbacks.py b/libs/genai/tests/integration_tests/test_callbacks.py
index f24a07127..8b2bc22b7 100644
--- a/libs/genai/tests/integration_tests/test_callbacks.py
+++ b/libs/genai/tests/integration_tests/test_callbacks.py
@@ -27,12 +27,12 @@ def on_llm_end(self, response: LLMResult, **kwargs: Any) -> Any:
     "model_name",
     MODEL_NAMES,
 )
-def test_streaming_callback(model_name: str) -> None:
+async def test_streaming_callback(model_name: str) -> None:
     prompt_template = "Tell me details about the Company {name} with 2 bullet point?"
     cb = StreamingLLMCallbackHandler()
     llm = ChatGoogleGenerativeAI(model=model_name, callbacks=[cb])
     llm_chain = PromptTemplate.from_template(prompt_template) | llm
-    for _t in llm_chain.stream({"name": "Google"}):
+    async for _t in llm_chain.astream({"name": "Google"}):
         pass
     assert len(cb.tokens) > 1
     assert len(cb.generations) == 1
diff --git a/libs/genai/tests/integration_tests/test_chat_models.py b/libs/genai/tests/integration_tests/test_chat_models.py
index d5d1cfa9a..7233a2485 100644
--- a/libs/genai/tests/integration_tests/test_chat_models.py
+++ b/libs/genai/tests/integration_tests/test_chat_models.py
@@ -173,7 +173,7 @@ async def test_chat_google_genai_invoke(is_async: bool) -> None:
 
 
 @pytest.mark.flaky(retries=3, delay=1)
-def test_chat_google_genai_invoke_with_image() -> None:
+async def test_chat_google_genai_invoke_with_image() -> None:
     """Test generating an image and then text.
 
     Using `generation_config` to specify response modalities.
@@ -184,7 +184,7 @@ def test_chat_google_genai_invoke_with_image() -> None:
 
     for _ in range(3):
         # We break as soon as we get an image back, as it's not guaranteed
-        result = llm.invoke(
+        result = await llm.ainvoke(
             "Say 'meow!' and then Generate an image of a cat.",
             config={"tags": ["meow"]},
             generation_config={
@@ -210,7 +210,7 @@ def test_chat_google_genai_invoke_with_image() -> None:
 
     # Test we can pass back in
     next_message = {"role": "user", "content": "Thanks!"}
-    _ = llm.invoke([result, next_message])
+    _ = await llm.ainvoke([result, next_message])
 
     # Test content_blocks property
     content_blocks = result.content_blocks
@@ -225,13 +225,13 @@ def test_chat_google_genai_invoke_with_image() -> None:
     _ = llm.invoke(["What's this?", {"role": "assistant", "content": content_blocks}])
 
 
-def test_chat_google_genai_invoke_with_audio() -> None:
+async def test_chat_google_genai_invoke_with_audio() -> None:
     """Test generating audio."""
     llm = ChatGoogleGenerativeAI(
         model=_AUDIO_OUTPUT_MODEL, response_modalities=[Modality.AUDIO]
     )
 
-    result = llm.invoke(
+    result = await llm.ainvoke(
         "Please say The quick brown fox jumps over the lazy dog",
     )
     assert isinstance(result, AIMessage)
@@ -259,7 +259,7 @@ def test_chat_google_genai_invoke_with_audio() -> None:
         (100, "explicit thinking budget"),
     ],
 )
-def test_chat_google_genai_invoke_thinking(
+async def test_chat_google_genai_invoke_thinking(
     thinking_budget: int | None, test_description: str
 ) -> None:
     """Test invoke a thinking model with different thinking budget configurations."""
@@ -269,7 +269,7 @@ def test_chat_google_genai_invoke_thinking(
 
     llm = ChatGoogleGenerativeAI(**llm_kwargs)
 
-    result = llm.invoke(
+    result = await llm.ainvoke(
         "How many O's are in Google? Please tell me how you double checked the result",
     )
 
@@ -728,14 +728,14 @@ def test_chat_google_genai_invoke_media_resolution(message: BaseMessage) -> None
     )
 
 
-def test_chat_google_genai_single_call_with_history() -> None:
+async def test_chat_google_genai_single_call_with_history() -> None:
     model = ChatGoogleGenerativeAI(model=_MODEL)
     text_question1, text_answer1 = "How much is 2+2?", "4"
     text_question2 = "How much is 3+3?"
     message1 = HumanMessage(content=text_question1)
     message2 = AIMessage(content=text_answer1)
     message3 = HumanMessage(content=text_question2)
-    response = model.invoke([message1, message2, message3])
+    response = await model.ainvoke([message1, message2, message3])
     assert isinstance(response, AIMessage)
     if isinstance(response.content, list):
         text_content = "".join(
@@ -752,7 +752,7 @@ def test_chat_google_genai_single_call_with_history() -> None:
     "model_name",
     [_MODEL, _PRO_MODEL],
 )
-def test_chat_google_genai_system_message(
+async def test_chat_google_genai_system_message(
     model_name: str,
 ) -> None:
     """Test system message handling.
@@ -769,7 +769,7 @@ def test_chat_google_genai_system_message(
     message1 = HumanMessage(content=text_question1)
     message2 = AIMessage(content=text_answer1)
     message3 = HumanMessage(content=text_question2)
-    response = model.invoke([system_message, message1, message2, message3])
+    response = await model.ainvoke([system_message, message1, message2, message3])
     assert isinstance(response, AIMessage)
     if isinstance(response.content, list):
         text_content = "".join(
@@ -815,7 +815,7 @@ def test_safety_settings_gemini(use_streaming: bool) -> None:
         assert len(output.content) > 0
 
 
-def test_chat_function_calling_with_multiple_parts() -> None:
+async def test_chat_function_calling_with_multiple_parts() -> None:
     @tool
     def search(
         question: str,
@@ -849,7 +849,7 @@ def search(
             "sparrow, hawk, crow by using search tool."
         )
     )
-    response = llm_with_search_force.invoke([request])
+    response = await llm_with_search_force.ainvoke([request])
 
     assert isinstance(response, AIMessage)
     assert len(response.tool_calls) > 0
@@ -873,7 +873,7 @@ def search(
             "colors?"
         )
     )
-    result = llm_with_search.invoke([request, response, *tool_messages, follow_up])
+    result = await llm_with_search.ainvoke([request, response, *tool_messages, follow_up])
 
     assert isinstance(result, AIMessage)
 
diff --git a/libs/genai/tests/integration_tests/test_embeddings.py b/libs/genai/tests/integration_tests/test_embeddings.py
index 36fc08bc6..5fa7ce8aa 100644
--- a/libs/genai/tests/integration_tests/test_embeddings.py
+++ b/libs/genai/tests/integration_tests/test_embeddings.py
@@ -19,11 +19,11 @@
         " model against the pickle rick?",
     ],
 )
-def test_embed_query_different_lengths(query: str) -> None:
+async def test_embed_query_different_lengths(query: str) -> None:
     """Test embedding queries of different lengths."""
     model = GoogleGenerativeAIEmbeddings(model=_MODEL)
     # Note: embed_query() is a sync method, but initialization needs the loop
-    result = model.embed_query(query, output_dimensionality=_OUTPUT_DIMENSIONALITY)
+    result = await model.aembed_query(query, output_dimensionality=_OUTPUT_DIMENSIONALITY)
     assert len(result) == 768
     assert isinstance(result, list)
 
@@ -47,12 +47,12 @@ async def test_aembed_query_different_lengths(query: str) -> None:
     assert isinstance(result, list)
 
 
-def test_embed_documents() -> None:
+async def test_embed_documents() -> None:
     """Test embedding a query."""
     model = GoogleGenerativeAIEmbeddings(
         model=_MODEL,
     )
-    result = model.embed_documents(
+    result = await model.aembed_documents(
         ["Hello world", "Good day, world"], output_dimensionality=_OUTPUT_DIMENSIONALITY
     )
     assert len(result) == 2
@@ -94,24 +94,24 @@ def test_invalid_api_key_error_handling() -> None:
         ).embed_query("Hello world", output_dimensionality=_OUTPUT_DIMENSIONALITY)
 
 
-def test_embed_documents_consistency() -> None:
+async def test_embed_documents_consistency() -> None:
     """Test embedding consistency for the same document."""
     model = GoogleGenerativeAIEmbeddings(model=_MODEL)
     doc = "Consistent document for testing"
-    result1 = model.embed_documents([doc], output_dimensionality=_OUTPUT_DIMENSIONALITY)
-    result2 = model.embed_documents([doc], output_dimensionality=_OUTPUT_DIMENSIONALITY)
+    result1 = await model.aembed_documents([doc], output_dimensionality=_OUTPUT_DIMENSIONALITY)
+    result2 = await model.aembed_documents([doc], output_dimensionality=_OUTPUT_DIMENSIONALITY)
     assert result1 == result2
 
 
-def test_embed_documents_quality() -> None:
+async def test_embed_documents_quality() -> None:
     """Smoke test embedding quality by comparing similar and dissimilar documents."""
     model = GoogleGenerativeAIEmbeddings(model=_MODEL)
     similar_docs = ["Document A", "Similar Document A"]
     dissimilar_docs = ["Document A", "Completely Different Zebra"]
-    similar_embeddings = model.embed_documents(
+    similar_embeddings = await model.aembed_documents(
         similar_docs, output_dimensionality=_OUTPUT_DIMENSIONALITY
     )
-    dissimilar_embeddings = model.embed_documents(
+    dissimilar_embeddings = await model.aembed_documents(
         dissimilar_docs, output_dimensionality=_OUTPUT_DIMENSIONALITY
     )
     similar_distance = np.linalg.norm(
@@ -123,22 +123,22 @@ def test_embed_documents_quality() -> None:
     assert similar_distance < dissimilar_distance
 
 
-def test_embed_query_task_type() -> None:
+async def test_embed_query_task_type() -> None:
     """Test for task_type."""
     embeddings = GoogleGenerativeAIEmbeddings(model=_MODEL, task_type="clustering")
-    emb = embeddings.embed_query(
+    emb = await embeddings.aembed_query(
         "How does alphafold work?", output_dimensionality=_OUTPUT_DIMENSIONALITY
     )
 
     embeddings2 = GoogleGenerativeAIEmbeddings(model=_MODEL)
-    emb2 = embeddings2.embed_query(
+    emb2 = await embeddings2.aembed_query(
         "How does alphafold work?",
         task_type="clustering",
         output_dimensionality=_OUTPUT_DIMENSIONALITY,
     )
 
     embeddings3 = GoogleGenerativeAIEmbeddings(model=_MODEL)
-    emb3 = embeddings3.embed_query(
+    emb3 = await embeddings3.aembed_query(
         "How does alphafold work?", output_dimensionality=_OUTPUT_DIMENSIONALITY
     )
 
diff --git a/libs/genai/tests/integration_tests/test_function_call.py b/libs/genai/tests/integration_tests/test_function_call.py
index f966bef84..434b0904d 100644
--- a/libs/genai/tests/integration_tests/test_function_call.py
+++ b/libs/genai/tests/integration_tests/test_function_call.py
@@ -18,7 +18,7 @@
     "model_name",
     MODEL_NAMES,
 )
-def test_function_call(model_name: str) -> None:
+async def test_function_call(model_name: str) -> None:
     functions = [
         {
             "name": "get_weather",
@@ -37,7 +37,7 @@ def test_function_call(model_name: str) -> None:
         }
     ]
     llm = ChatGoogleGenerativeAI(model=model_name).bind(functions=functions)
-    res = llm.invoke("what weather is today in san francisco?")
+    res = await llm.ainvoke("what weather is today in san francisco?")
     assert res
     assert res.additional_kwargs
     assert "function_call" in res.additional_kwargs
@@ -52,14 +52,14 @@ def test_function_call(model_name: str) -> None:
     "model_name",
     MODEL_NAMES,
 )
-def test_tool_call(model_name: str) -> None:
+async def test_tool_call(model_name: str) -> None:
     @tool
     def search_tool(query: str) -> str:
         """Searches the web for `query` and returns the result."""
         raise NotImplementedError
 
     llm = ChatGoogleGenerativeAI(model=model_name).bind(functions=[search_tool])
-    response = llm.invoke("weather in san francisco")
+    response = await llm.ainvoke("weather in san francisco")
     assert isinstance(response, AIMessage)
     function_call = response.additional_kwargs.get("function_call")
     assert function_call
@@ -79,9 +79,9 @@ class MyModel(BaseModel):
     "model_name",
     MODEL_NAMES,
 )
-def test_pydantic_call(model_name: str) -> None:
+async def test_pydantic_call(model_name: str) -> None:
     llm = ChatGoogleGenerativeAI(model=model_name).bind(functions=[MyModel])
-    response = llm.invoke("my name is Erick and I am 27 years old")
+    response = await llm.ainvoke("my name is Erick and I am 27 years old")
     assert isinstance(response, AIMessage)
     function_call = response.additional_kwargs.get("function_call")
     assert function_call
diff --git a/libs/genai/tests/integration_tests/test_llms.py b/libs/genai/tests/integration_tests/test_llms.py
index 9b72f9493..9ca66e09d 100644
--- a/libs/genai/tests/integration_tests/test_llms.py
+++ b/libs/genai/tests/integration_tests/test_llms.py
@@ -17,13 +17,13 @@
     "model_name",
     MODEL_NAMES,
 )
-def test_google_generativeai_call(model_name: str) -> None:
+async def test_google_generativeai_call(model_name: str) -> None:
     """Test valid call to Google GenerativeAI text API."""
     if model_name:
         llm = GoogleGenerativeAI(max_tokens=10, model=model_name)
     else:
         llm = GoogleGenerativeAI(max_tokens=10)
-    output = llm.invoke("Say foo:")
+    output = await llm.ainvoke("Say foo:")
     assert isinstance(output, str)
     assert llm._llm_type == "google_gemini"
     assert llm.client.model == f"models/{model_name}"
@@ -33,9 +33,9 @@ def test_google_generativeai_call(model_name: str) -> None:
     "model_name",
     MODEL_NAMES,
 )
-def test_google_generativeai_generate(model_name: str) -> None:
+async def test_google_generativeai_generate(model_name: str) -> None:
     llm = GoogleGenerativeAI(temperature=0.3, model=model_name)
-    output = llm.generate(["Say foo:"])
+    output = await llm.agenerate(["Say foo:"])
     assert isinstance(output, LLMResult)
     assert len(output.generations) == 1
     assert len(output.generations[0]) == 1
@@ -59,9 +59,11 @@ async def test_google_generativeai_agenerate(model_name: str) -> None:
     "model_name",
     MODEL_NAMES,
 )
-def test_generativeai_stream(model_name: str) -> None:
+async def test_generativeai_stream(model_name: str) -> None:
     llm = GoogleGenerativeAI(temperature=0, model=model_name)
-    outputs = list(llm.stream("Please say foo:"))
+    outputs = []
+    async for chunk in llm.astream("Please say foo:"):
+        outputs.append(chunk)
     assert isinstance(outputs[0], str)
 
 
@@ -79,10 +81,10 @@ def test_generativeai_get_num_tokens_gemini(model_name: str) -> None:
     "model_name",
     MODEL_NAMES,
 )
-def test_safety_settings_gemini(model_name: str) -> None:
+async def test_safety_settings_gemini(model_name: str) -> None:
     # test with blocked prompt
     llm = GoogleGenerativeAI(temperature=0, model=model_name)
-    output = llm.generate(prompts=["how to make a bomb?"])
+    output = await llm.agenerate(prompts=["how to make a bomb?"])
     assert isinstance(output, LLMResult)
     assert len(output.generations[0]) > 0
 
@@ -92,14 +94,14 @@ def test_safety_settings_gemini(model_name: str) -> None:
     }
 
     # test with safety filters directly to generate
-    output = llm.generate(["how to make a bomb?"], safety_settings=safety_settings)
+    output = await llm.agenerate(["how to make a bomb?"], safety_settings=safety_settings)
     assert isinstance(output, LLMResult)
     assert len(output.generations[0]) > 0
 
     # test with safety filters directly to stream
-    output_stream = llm.stream("how to make a bomb?", safety_settings=safety_settings)
-    assert isinstance(output_stream, Generator)
-    streamed_messages = list(output_stream)
+    streamed_messages = []
+    async for chunk in llm.astream("how to make a bomb?", safety_settings=safety_settings):
+        streamed_messages.append(chunk)
     assert len(streamed_messages) > 0
 
     # test with safety filters on instantiation
@@ -108,6 +110,6 @@ def test_safety_settings_gemini(model_name: str) -> None:
         safety_settings=safety_settings,
         temperature=0,
     )
-    output = llm.generate(prompts=["how to make a bomb?"])
+    output = await llm.agenerate(prompts=["how to make a bomb?"])
     assert isinstance(output, LLMResult)
     assert len(output.generations[0]) > 0
diff --git a/libs/genai/tests/integration_tests/test_tools.py b/libs/genai/tests/integration_tests/test_tools.py
index 6af8c8d52..401125582 100644
--- a/libs/genai/tests/integration_tests/test_tools.py
+++ b/libs/genai/tests/integration_tests/test_tools.py
@@ -23,7 +23,7 @@ def check_tennis_score(player: str) -> str:
     return f"{player} is currently winning 6-0"
 
 
-def test_multiple_tools() -> None:
+async def test_multiple_tools() -> None:
     tools = [check_weather, check_live_traffic, check_tennis_score]
 
     model = ChatGoogleGenerativeAI(
@@ -34,6 +34,6 @@ def test_multiple_tools() -> None:
 
     input_ = "What is the latest tennis score for Leonid?"
 
-    result = model_with_tools.invoke(input_)
+    result = await model_with_tools.ainvoke(input_)
     assert len(result.tool_calls) == 1
     assert result.tool_calls[0]["name"] == "check_tennis_score"
diff --git a/libs/genai/uv.lock b/libs/genai/uv.lock
index ff1408a96..19830e2f8 100644
--- a/libs/genai/uv.lock
+++ b/libs/genai/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.10.0, <4.0.0"
 resolution-markers = [
     "python_full_version >= '3.14' and platform_python_implementation == 'PyPy'",
@@ -246,6 +246,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10", size = 16674, upload-time = "2025-05-10T17:42:49.33Z" },
 ]
 
+[[package]]
+name = "execnet"
+version = "2.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/bf/89/780e11f9588d9e7128a3f87788354c7946a9cbb1401ad38a48c4db9a4f07/execnet-2.1.2.tar.gz", hash = "sha256:63d83bfdd9a23e35b9c6a3261412324f964c2ec8dcd8d3c6916ee9373e0befcd", size = 166622, upload-time = "2025-11-12T09:56:37.75Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ab/84/02fc1827e8cdded4aa65baef11296a9bbe595c474f0d6d758af082d849fd/execnet-2.1.2-py3-none-any.whl", hash = "sha256:67fba928dd5a544b783f6056f449e5e3931a5c378b128bc18501f7ea79e296ec", size = 40708, upload-time = "2025-11-12T09:56:36.333Z" },
+]
+
 [[package]]
 name = "filetype"
 version = "1.2.0"
@@ -526,6 +535,7 @@ test = [
     { name = "pytest-retry" },
     { name = "pytest-socket" },
     { name = "pytest-watcher" },
+    { name = "pytest-xdist" },
     { name = "syrupy" },
 ]
 test-integration = [
@@ -562,6 +572,7 @@ test = [
     { name = "pytest-retry", specifier = ">=1.7.0,<2.0.0" },
     { name = "pytest-socket", specifier = ">=0.7.0,<1.0.0" },
     { name = "pytest-watcher", specifier = ">=0.4.0,<1.0.0" },
+    { name = "pytest-xdist", specifier = ">=3.8.0,<4.0.0" },
     { name = "syrupy", specifier = ">=4.9.0,<5.0.0" },
 ]
 test-integration = [{ name = "pytest", specifier = ">=8.4.0,<9.0.0" }]
@@ -1543,6 +1554,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5b/3a/c44a76c6bb5e9e896d9707fb1c704a31a0136950dec9514373ced0684d56/pytest_watcher-0.4.3-py3-none-any.whl", hash = "sha256:d59b1e1396f33a65ea4949b713d6884637755d641646960056a90b267c3460f9", size = 11852, upload-time = "2024-08-28T17:37:45.731Z" },
 ]
 
+[[package]]
+name = "pytest-xdist"
+version = "3.8.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "execnet" },
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/78/b4/439b179d1ff526791eb921115fca8e44e596a13efeda518b9d845a619450/pytest_xdist-3.8.0.tar.gz", hash = "sha256:7e578125ec9bc6050861aa93f2d59f1d8d085595d6551c2c90b6f4fad8d3a9f1", size = 88069, upload-time = "2025-07-01T13:30:59.346Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ca/31/d4e37e9e550c2b92a9cbc2e4d0b7420a27224968580b5a447f420847c975/pytest_xdist-3.8.0-py3-none-any.whl", hash = "sha256:202ca578cfeb7370784a8c33d6d05bc6e13b4f25b5053c30a152269fd10f0b88", size = 46396, upload-time = "2025-07-01T13:30:56.632Z" },
+]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"
diff --git a/libs/vertexai/Makefile b/libs/vertexai/Makefile
index b11bdb520..815c43ebe 100644
--- a/libs/vertexai/Makefile
+++ b/libs/vertexai/Makefile
@@ -15,7 +15,7 @@ test tests:
 	uv run --group test pytest --disable-socket --allow-unix-socket --release $(TEST_FILE)
 
 integration_test integration_tests:
-	uv run --group test --group test_integration pytest --retries 3 --retry-delay 1 --release $(TEST_FILE)
+	uv run --group test --group test_integration pytest -n auto --retries 3 --retry-delay 1 --release $(TEST_FILE)
 
 test_watch:
 	uv run ptw --snapshot-update --now . -- -vv $(TEST_FILE)
diff --git a/libs/vertexai/langchain_google_vertexai/evaluators/evaluation.py b/libs/vertexai/langchain_google_vertexai/evaluators/evaluation.py
index 4344c112f..d6afb3c15 100644
--- a/libs/vertexai/langchain_google_vertexai/evaluators/evaluation.py
+++ b/libs/vertexai/langchain_google_vertexai/evaluators/evaluation.py
@@ -240,6 +240,36 @@ async def _aevaluate_strings(
         response = await self._async_client.evaluate_instances(request)
         return _parse_response(response, metric=self._metric)[0]
 
+    async def aevaluate(
+        self,
+        examples: Sequence[dict[str, str]],
+        predictions: Sequence[dict[str, str]],
+        *,
+        question_key: str = "context",
+        answer_key: str = "reference",
+        prediction_key: str = "prediction",
+        instruction_key: str = "instruction",
+        **kwargs: Any,
+    ) -> list[dict]:
+        instances: list[dict] = []
+        for example, prediction in zip(examples, predictions, strict=False):
+            row = {"prediction": prediction[prediction_key]}
+            if answer_key in example:
+                row["reference"] = example[answer_key]
+            if question_key in example:
+                row["context"] = example[question_key]
+            if instruction_key in example:
+                row["instruction"] = example[instruction_key]
+            instances.append(row)
+
+        if self._metric in _METRICS_MULTIPLE_INSTANCES:
+            request = _prepare_request(
+                instances, metric=self._metric, location=self._location
+            )
+            response = await self._async_client.evaluate_instances(request)
+            return _parse_response(response, metric=self._metric)
+        return [await self._aevaluate_strings(**i) for i in instances]
+
 
 class VertexPairWiseStringEvaluator(_EvaluatorBase, PairwiseStringEvaluator):
     """Evaluate the perplexity of a predicted string."""
diff --git a/libs/vertexai/tests/integration_tests/test_anthropic_files.py b/libs/vertexai/tests/integration_tests/test_anthropic_files.py
index 86e8cde04..c87ba70fb 100644
--- a/libs/vertexai/tests/integration_tests/test_anthropic_files.py
+++ b/libs/vertexai/tests/integration_tests/test_anthropic_files.py
@@ -10,7 +10,7 @@
 
 
 @pytest.mark.extended
-def test_pdf_gcs_uri() -> None:
+async def test_pdf_gcs_uri() -> None:
     gcs_uri = "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf"
     llm = ChatAnthropicVertex(
         model="claude-sonnet-4-5@20250929",
@@ -19,7 +19,7 @@ def test_pdf_gcs_uri() -> None:
         project=os.environ["PROJECT_ID"],
     )
 
-    res = llm.invoke(
+    res = await llm.ainvoke(
         [
             {
                 "role": "user",
@@ -34,7 +34,7 @@ def test_pdf_gcs_uri() -> None:
 
 
 @pytest.mark.extended
-def test_pdf_byts() -> None:
+async def test_pdf_byts() -> None:
     gcs_uri = "gs://cloud-samples-data/generative-ai/pdf/2403.05530.pdf"
     llm = ChatAnthropicVertex(
         model="claude-sonnet-4-5@20250929",
@@ -45,7 +45,7 @@ def test_pdf_byts() -> None:
     image = load_image_from_gcs(gcs_uri, "kuligin-sandbox1")
     image_data = image_bytes_to_b64_string(image.data, "ascii", "pdf")
 
-    res = llm.invoke(
+    res = await llm.ainvoke(
         [
             {
                 "role": "user",
@@ -60,7 +60,7 @@ def test_pdf_byts() -> None:
 
 
 @pytest.mark.extended
-def test_https_image() -> None:
+async def test_https_image() -> None:
     uri = "https://picsum.photos/seed/picsum/200/300.jpg"
 
     llm = ChatAnthropicVertex(
@@ -70,7 +70,7 @@ def test_https_image() -> None:
         project=os.environ["PROJECT_ID"],
     )
 
-    res = llm.invoke(
+    res = await llm.ainvoke(
         [
             {
                 "role": "user",
diff --git a/libs/vertexai/tests/integration_tests/test_callbacks.py b/libs/vertexai/tests/integration_tests/test_callbacks.py
index 1e9f640c4..e45c1db01 100644
--- a/libs/vertexai/tests/integration_tests/test_callbacks.py
+++ b/libs/vertexai/tests/integration_tests/test_callbacks.py
@@ -12,16 +12,16 @@
     "model_name",
     [_DEFAULT_MODEL_NAME],
 )
-def test_llm_invoke(model_name: str) -> None:
+async def test_llm_invoke(model_name: str) -> None:
     vb = VertexAICallbackHandler()
     llm = VertexAI(model_name=model_name, temperature=0.0, callbacks=[vb])
-    _ = llm.invoke("2+2")
+    _ = await llm.ainvoke("2+2")
     assert vb.successful_requests == 1
     assert vb.prompt_tokens > 0
     assert vb.completion_tokens > 0
     prompt_tokens = vb.prompt_tokens
     completion_tokens = vb.completion_tokens
-    _ = llm.invoke("2+2")
+    _ = await llm.ainvoke("2+2")
     assert vb.successful_requests == 2
     assert vb.prompt_tokens > prompt_tokens
     assert vb.completion_tokens > completion_tokens
@@ -32,17 +32,17 @@ def test_llm_invoke(model_name: str) -> None:
     "model_name",
     [_DEFAULT_MODEL_NAME],
 )
-def test_chat_call(model_name: str) -> None:
+async def test_chat_call(model_name: str) -> None:
     vb = VertexAICallbackHandler()
     llm = ChatVertexAI(model_name=model_name, temperature=0.0, callbacks=[vb])
     message = HumanMessage(content="Hello")
-    _ = llm.invoke([message])
+    _ = await llm.ainvoke([message])
     assert vb.successful_requests == 1
     assert vb.prompt_tokens > 0
     assert vb.completion_tokens > 0
     prompt_tokens = vb.prompt_tokens
     completion_tokens = vb.completion_tokens
-    _ = llm.invoke([message])
+    _ = await llm.ainvoke([message])
     assert vb.successful_requests == 2
     assert vb.prompt_tokens > prompt_tokens
     assert vb.completion_tokens > completion_tokens
@@ -53,26 +53,26 @@ def test_chat_call(model_name: str) -> None:
     "model_name",
     [_DEFAULT_MODEL_NAME],
 )
-def test_invoke_config(model_name: str) -> None:
+async def test_invoke_config(model_name: str) -> None:
     vb = VertexAICallbackHandler()
     llm = VertexAI(model_name=model_name, temperature=0.0)
-    llm.invoke("2+2", config={"callbacks": [vb]})
+    await llm.ainvoke("2+2", config={"callbacks": [vb]})
     assert vb.successful_requests == 1
     assert vb.prompt_tokens > 0
     assert vb.completion_tokens > 0
     prompt_tokens = vb.prompt_tokens
     completion_tokens = vb.completion_tokens
-    llm.invoke("2+2", config={"callbacks": [vb]})
+    await llm.ainvoke("2+2", config={"callbacks": [vb]})
     assert vb.successful_requests == 2
     assert vb.prompt_tokens > prompt_tokens
     assert vb.completion_tokens > completion_tokens
 
 
 @pytest.mark.release
-def test_llm_stream() -> None:
+async def test_llm_stream() -> None:
     vb = VertexAICallbackHandler()
     llm = VertexAI(model_name=_DEFAULT_MODEL_NAME, temperature=0.0, callbacks=[vb])
-    for _ in llm.stream("2+2"):
+    async for _ in llm.astream("2+2"):
         pass
     assert vb.successful_requests == 1
     assert vb.prompt_tokens > 0
diff --git a/libs/vertexai/tests/integration_tests/test_chains.py b/libs/vertexai/tests/integration_tests/test_chains.py
index 9f12fc159..d3f7db475 100644
--- a/libs/vertexai/tests/integration_tests/test_chains.py
+++ b/libs/vertexai/tests/integration_tests/test_chains.py
@@ -26,7 +26,7 @@ class RecordDog(BaseModel):
 
 
 @pytest.mark.release
-def test_create_structured_runnable() -> None:
+async def test_create_structured_runnable() -> None:
     llm = ChatVertexAI(model_name=_DEFAULT_MODEL_NAME)
     prompt = ChatPromptTemplate.from_template(
         "You are a world class algorithm for recording entities.\nMake calls to the "
@@ -34,12 +34,12 @@ def test_create_structured_runnable() -> None:
         "Tip: Make sure to answer in the correct format"
     )
     chain = create_structured_runnable([RecordPerson, RecordDog], llm, prompt=prompt)
-    res = chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
+    res = await chain.ainvoke({"input": "Harry was a chubby brown beagle who loved chicken"})
     assert isinstance(res, RecordDog)
 
 
 @pytest.mark.release
-def test_create_structured_runnable_with_prompt() -> None:
+async def test_create_structured_runnable_with_prompt() -> None:
     llm = ChatVertexAI(model_name=_DEFAULT_MODEL_NAME, temperature=0)
     prompt = ChatPromptTemplate.from_template(
         "Describe a random {class} and mention their name, {attr} and favorite food"
@@ -47,12 +47,12 @@ def test_create_structured_runnable_with_prompt() -> None:
     chain = create_structured_runnable(
         [RecordPerson, RecordDog], llm, prompt=prompt, use_extra_step=True
     )
-    res = chain.invoke({"class": "person", "attr": "age"})
+    res = await chain.ainvoke({"class": "person", "attr": "age"})
     assert isinstance(res, RecordPerson)
 
 
 @pytest.mark.release
-def test_reflection() -> None:
+async def test_reflection() -> None:
     class Reflection(BaseModel):
         reflections: str = Field(
             description="The critique and reflections on the sufficiency, superfluency,"
@@ -93,5 +93,5 @@ def normalized_score(self) -> float:
     )
 
     reflection_llm_chain = prompt | llm.with_structured_output(Reflection)
-    res = reflection_llm_chain.invoke({"input": "Mars"})
+    res = await reflection_llm_chain.ainvoke({"input": "Mars"})
     assert isinstance(res, Reflection)
diff --git a/libs/vertexai/tests/integration_tests/test_chat_models.py b/libs/vertexai/tests/integration_tests/test_chat_models.py
index d4186406e..07ea6b718 100644
--- a/libs/vertexai/tests/integration_tests/test_chat_models.py
+++ b/libs/vertexai/tests/integration_tests/test_chat_models.py
@@ -123,7 +123,7 @@ def test_init_from_credentials_obj() -> None:
 @pytest.mark.release
 @pytest.mark.parametrize("model_name", model_names_to_test)
 @pytest.mark.parametrize("endpoint_version", endpoint_versions)
-def test_vertexai_single_call(model_name: str | None, endpoint_version: str) -> None:
+async def test_vertexai_single_call(model_name: str | None, endpoint_version: str) -> None:
     """Test making a single invoke call."""
     model = ChatVertexAI(
         model_name=model_name,
@@ -131,7 +131,7 @@ def test_vertexai_single_call(model_name: str | None, endpoint_version: str) ->
         endpoint_version=endpoint_version,
     )
     message = HumanMessage(content="Hello")
-    response = model.invoke([message])
+    response = await model.ainvoke([message])
     assert isinstance(response, AIMessage)
     assert isinstance(response.content, str)
     _check_usage_metadata(response)
@@ -139,7 +139,9 @@ def test_vertexai_single_call(model_name: str | None, endpoint_version: str) ->
 
 @pytest.mark.release
 @pytest.mark.xfail(reason="vertex api doesn't respect n/candidate_count")
-def test_candidates() -> None:
+@pytest.mark.release
+@pytest.mark.xfail(reason="vertex api doesn't respect n/candidate_count")
+async def test_candidates() -> None:
     """Test making a single invoke call with `n>1`.
 
     # TODO: what is chat-bison@001? is it marked for deprecation?
@@ -148,7 +150,7 @@ def test_candidates() -> None:
         model_name="chat-bison@001", temperature=0.3, n=2, rate_limiter=RATE_LIMITER
     )
     message = HumanMessage(content="Hello")
-    response = model.generate(messages=[[message]])
+    response = await model.agenerate(messages=[[message]])
     assert isinstance(response, LLMResult)
     assert len(response.generations) == 1
     assert len(response.generations[0]) == 2
@@ -235,7 +237,7 @@ async def test_vertexai_astream() -> None:
 
 
 @pytest.mark.release
-def test_multimodal() -> None:
+async def test_multimodal() -> None:
     """Test multimodal input with a gcs image URL in chat completions format."""
     llm = ChatVertexAI(model_name=_DEFAULT_MODEL_NAME, rate_limiter=RATE_LIMITER)
     gcs_url = (
@@ -250,13 +252,13 @@ def test_multimodal() -> None:
         "text": "What is shown in this image?",
     }
     message = HumanMessage(content=[text_message, image_message])
-    output = llm.invoke([message])
+    output = await llm.ainvoke([message])
     assert isinstance(output.content, str)
     assert isinstance(output, AIMessage)
     _check_usage_metadata(output)
 
     llm = ChatVertexAI(model_name="gemini-2.5-pro", rate_limiter=RATE_LIMITER)
-    for chunk in llm.stream([message]):
+    async for chunk in llm.astream([message]):
         assert isinstance(chunk, AIMessageChunk)
 
 
@@ -280,7 +282,9 @@ def test_multimodal() -> None:
 
 @pytest.mark.release
 @pytest.mark.parametrize(("file_uri", "mime_type"), MULTIMODAL_INPUTS)
-def test_multimodal_media_file_uri(file_uri, mime_type) -> None:
+@pytest.mark.release
+@pytest.mark.parametrize(("file_uri", "mime_type"), MULTIMODAL_INPUTS)
+async def test_multimodal_media_file_uri(file_uri, mime_type) -> None:
     """Test multimodal input with gcs file URIs (video, audio, image)."""
     llm = ChatVertexAI(model_name=_DEFAULT_MODEL_NAME, rate_limiter=RATE_LIMITER)
     media_message = {
@@ -293,14 +297,17 @@ def test_multimodal_media_file_uri(file_uri, mime_type) -> None:
         "text": "Describe the attached media in 5 words!",
     }
     message = HumanMessage(content=[text_message, media_message])
-    output = llm.invoke([message])
+    output = await llm.ainvoke([message])
     assert isinstance(output.content, str)
 
 
 @pytest.mark.release
 @pytest.mark.parametrize(("file_uri", "mime_type"), MULTIMODAL_INPUTS)
 @pytest.mark.first
-def test_multimodal_media_inline_base64(file_uri, mime_type) -> None:
+@pytest.mark.release
+@pytest.mark.parametrize(("file_uri", "mime_type"), MULTIMODAL_INPUTS)
+@pytest.mark.first
+async def test_multimodal_media_inline_base64(file_uri, mime_type) -> None:
     """Test multimodal input with base64 encoded media content (video, audio, image)."""
     llm = ChatVertexAI(model_name=_DEFAULT_MODEL_NAME, rate_limiter=RATE_LIMITER)
     storage_client = storage.Client()
@@ -316,13 +323,15 @@ def test_multimodal_media_inline_base64(file_uri, mime_type) -> None:
         "text": "Describe the attached media in 5 words!",
     }
     message = HumanMessage(content=[text_message, media_message])
-    output = llm.invoke([message])
+    output = await llm.ainvoke([message])
     assert isinstance(output.content, str)
 
 
 @pytest.mark.release
 @pytest.mark.first
-def test_multimodal_media_inline_base64_template() -> None:
+@pytest.mark.release
+@pytest.mark.first
+async def test_multimodal_media_inline_base64_template() -> None:
     """Test multimodal input with base64 encoded media content using prompt template."""
     llm = ChatVertexAI(model_name=_DEFAULT_MODEL_NAME)
     prompt_template = ChatPromptTemplate(
@@ -351,12 +360,13 @@ def test_multimodal_media_inline_base64_template() -> None:
     blob = storage.Blob.from_string(file_uri, client=storage_client)
     media_base64 = base64.b64encode(blob.download_as_bytes()).decode()
     chain = prompt_template | llm
-    output = chain.invoke({"media_base64": media_base64, "mime_type": mime_type})
+    output = await chain.ainvoke({"media_base64": media_base64, "mime_type": mime_type})
     assert isinstance(output.content, str)
 
 
 @pytest.mark.extended
-def test_multimodal_media_inline_base64_agent() -> None:
+@pytest.mark.extended
+async def test_multimodal_media_inline_base64_agent() -> None:
     """Test multimodal input with base64 encoded media content using a ReAct agent."""
     from langchain import agents
 
@@ -390,7 +400,7 @@ def get_climate_info(query: str) -> str:
         model=llm,
         tools=tools,
     )
-    output = agent.invoke(
+    output = await agent.ainvoke(
         {"messages": [{"role": "user", "content": [text_message, media_message]}]}
     )
     assert "messages" in output
@@ -398,7 +408,8 @@ def get_climate_info(query: str) -> str:
 
 
 @pytest.mark.flaky(retries=3, delay=1)
-def test_audio_timestamp() -> None:
+@pytest.mark.flaky(retries=3, delay=1)
+async def test_audio_timestamp() -> None:
     storage_client = storage.Client()
     llm = ChatVertexAI(model_name=_DEFAULT_MODEL_NAME, rate_limiter=RATE_LIMITER)
 
@@ -419,7 +430,7 @@ def test_audio_timestamp() -> None:
     text_message = {"type": "text", "text": instruction}
 
     message = HumanMessage(content=[media_message, text_message])
-    output = llm.invoke([message], audio_timestamp=True)
+    output = await llm.ainvoke([message], audio_timestamp=True)
 
     assert isinstance(output.content, str)
     assert re.search(r"(\d{2}:\d{2}:?|\[\d{2}:\d{2}:\d{2}\])", output.content)
@@ -461,7 +472,10 @@ def test_parse_history_gemini_multimodal_FC() -> None:
 @pytest.mark.xfail(reason="investigating")
 @pytest.mark.release
 @pytest.mark.parametrize(("file_uri", "mime_type"), [VIDEO_PARAM])
-def test_multimodal_video_metadata(file_uri, mime_type) -> None:
+@pytest.mark.xfail(reason="investigating")
+@pytest.mark.release
+@pytest.mark.parametrize(("file_uri", "mime_type"), [VIDEO_PARAM])
+async def test_multimodal_video_metadata(file_uri, mime_type) -> None:
     llm = ChatVertexAI(model_name=_DEFAULT_MODEL_NAME, rate_limiter=RATE_LIMITER)
     media_message = {
         "type": "media",
@@ -478,32 +492,35 @@ def test_multimodal_video_metadata(file_uri, mime_type) -> None:
     }
 
     message = HumanMessage(content=[text_message, media_message])
-    output = llm.invoke([message])
+    output = await llm.ainvoke([message])
     assert isinstance(output.content, str)
 
 
 @pytest.mark.release
 @pytest.mark.parametrize("model_name", model_names_to_test)
-def test_vertexai_single_call_with_history(model_name: str | None) -> None:
+@pytest.mark.release
+@pytest.mark.parametrize("model_name", model_names_to_test)
+async def test_vertexai_single_call_with_history(model_name: str | None) -> None:
     model = ChatVertexAI(model_name=model_name, rate_limiter=RATE_LIMITER)
     text_question1, text_answer1 = "How much is 2+2?", "4"
     text_question2 = "How much is 3+3?"
     message1 = HumanMessage(content=text_question1)
     message2 = AIMessage(content=text_answer1)
     message3 = HumanMessage(content=text_question2)
-    response = model.invoke([message1, message2, message3])
+    response = await model.ainvoke([message1, message2, message3])
     assert isinstance(response, AIMessage)
     assert isinstance(response.content, str)
 
 
 @pytest.mark.release
-def test_vertexai_system_message() -> None:
+@pytest.mark.release
+async def test_vertexai_system_message() -> None:
     model = ChatVertexAI(model_name=_DEFAULT_MODEL_NAME, rate_limiter=RATE_LIMITER)
     system_instruction = """CymbalBank is a bank located in London"""
     text_question1 = "Where is Cymbal located? Provide only the name of the city."
     sys_message = SystemMessage(content=system_instruction)
     message1 = HumanMessage(content=text_question1)
-    response = model.invoke([sys_message, message1])
+    response = await model.ainvoke([sys_message, message1])
 
     assert isinstance(response, AIMessage)
     assert isinstance(response.content, str)
@@ -511,20 +528,22 @@ def test_vertexai_system_message() -> None:
 
 
 @pytest.mark.release
-def test_vertexai_single_call_with_no_system_messages() -> None:
+@pytest.mark.release
+async def test_vertexai_single_call_with_no_system_messages() -> None:
     model = ChatVertexAI(model_name=_DEFAULT_MODEL_NAME, rate_limiter=RATE_LIMITER)
     text_question1, text_answer1 = "How much is 2+2?", "4"
     text_question2 = "How much is 3+3?"
     message1 = HumanMessage(content=text_question1)
     message2 = AIMessage(content=text_answer1)
     message3 = HumanMessage(content=text_question2)
-    response = model.invoke([message1, message2, message3])
+    response = await model.ainvoke([message1, message2, message3])
     assert isinstance(response, AIMessage)
     assert isinstance(response.content, str)
 
 
 @pytest.mark.release
-def test_vertexai_single_call_previous_blocked_response() -> None:
+@pytest.mark.release
+async def test_vertexai_single_call_previous_blocked_response() -> None:
     """If a previous call was blocked, the AIMessage will have empty content.
 
     Empty content should be ignored.
@@ -551,7 +570,7 @@ def test_vertexai_single_call_previous_blocked_response() -> None:
         },
     )
     message2 = HumanMessage(content=text_question2)
-    response = model.invoke([message1, message2])
+    response = await model.ainvoke([message1, message2])
     assert isinstance(response, AIMessage)
     assert isinstance(response.content, str)
 
@@ -570,7 +589,9 @@ def test_get_num_tokens_from_messages(model_name: str) -> None:
 
 @pytest.mark.extended
 @pytest.mark.parametrize("endpoint_version", endpoint_versions)
-def test_chat_vertexai_gemini_function_calling(endpoint_version: str) -> None:
+@pytest.mark.extended
+@pytest.mark.parametrize("endpoint_version", endpoint_versions)
+async def test_chat_vertexai_gemini_function_calling(endpoint_version: str) -> None:
     class MyModel(BaseModel):
         name: str
         age: int
@@ -586,7 +607,7 @@ class MyModel(BaseModel):
         rate_limiter=RATE_LIMITER,
         endpoint_version=endpoint_version,
     ).bind_tools([MyModel])
-    response = model.invoke([message])
+    response = await model.ainvoke([message])
     _check_tool_calls(response, "MyModel")
 
     # Test .bind_tools with function
@@ -598,7 +619,7 @@ def my_model(name: str, age: int) -> None:
         safety_settings=safety,
         rate_limiter=RATE_LIMITER,
     ).bind_tools([my_model])
-    response = model.invoke([message])
+    response = await model.ainvoke([message])
     _check_tool_calls(response, "my_model")
 
     # Test .bind_tools with tool
@@ -609,13 +630,13 @@ def my_tool(name: str, age: int) -> None:
     model = ChatVertexAI(
         model_name=_DEFAULT_MODEL_NAME, safety_settings=safety
     ).bind_tools([my_tool])
-    response = model.invoke([message])
+    response = await model.ainvoke([message])
     _check_tool_calls(response, "my_tool")
 
     # Test streaming
-    stream = model.stream([message])
+    stream = model.astream([message])
     first = True
-    for chunk in stream:
+    async for chunk in stream:
         if first:
             gathered = chunk
             first = False
@@ -630,7 +651,8 @@ def my_tool(name: str, age: int) -> None:
 
 
 @pytest.mark.release
-def test_chat_vertexai_gemini_function_calling_tool_config_any() -> None:
+@pytest.mark.release
+async def test_chat_vertexai_gemini_function_calling_tool_config_any() -> None:
     class MyModel(BaseModel):
         name: str
         age: int
@@ -652,7 +674,7 @@ class MyModel(BaseModel):
         },
     )
     message = HumanMessage(content="My name is Erick and I am 27 years old")
-    response = model.invoke([message])
+    response = await model.ainvoke([message])
     assert isinstance(response, AIMessage)
     assert isinstance(response.content, str)
     assert response.content == ""
@@ -669,7 +691,8 @@ class MyModel(BaseModel):
 
 
 @pytest.mark.release
-def test_chat_vertexai_gemini_function_calling_tool_config_none() -> None:
+@pytest.mark.release
+async def test_chat_vertexai_gemini_function_calling_tool_config_none() -> None:
     class MyModel(BaseModel):
         name: str
         age: int
@@ -686,7 +709,7 @@ class MyModel(BaseModel):
         },
     )
     message = HumanMessage(content="My name is Erick and I am 27 years old")
-    response = model.invoke([message])
+    response = await model.ainvoke([message])
     assert isinstance(response, AIMessage)
     assert isinstance(response.content, str)
     assert response.content != ""
@@ -695,9 +718,10 @@ class MyModel(BaseModel):
 
 
 @pytest.mark.release
-def test_chat_model_multiple_system_message() -> None:
+@pytest.mark.release
+async def test_chat_model_multiple_system_message() -> None:
     model = ChatVertexAI(model_name=_DEFAULT_MODEL_NAME)
-    response = model.invoke(
+    response = await model.ainvoke(
         [
             SystemMessage("Be helpful"),
             AIMessage("Hi, I'm LeoAI. How can I help?"),
@@ -709,7 +733,9 @@ def test_chat_model_multiple_system_message() -> None:
 
 @pytest.mark.release
 @pytest.mark.parametrize("method", [None, "json_mode"])
-def test_chat_vertexai_gemini_with_structured_output(
+@pytest.mark.release
+@pytest.mark.parametrize("method", [None, "json_mode"])
+async def test_chat_vertexai_gemini_with_structured_output(
     method: Literal["json_mode"] | None,
 ) -> None:
     class MyModel(BaseModel):
@@ -727,7 +753,7 @@ class MyModel(BaseModel):
     model = llm.with_structured_output(MyModel, method=method)
     message = HumanMessage(content="My name is Erick and I am 27 years old")
 
-    response = model.invoke([message])
+    response = await model.ainvoke([message])
     assert isinstance(response, MyModel)
     assert response == MyModel(name="Erick", age=27)
 
@@ -740,7 +766,7 @@ class MyModel(BaseModel):
             },
             method=method,
         )
-        response = model.invoke([message])
+        response = await model.ainvoke([message])
         assert response == {
             "name": "Erick",
             "age": 27,
@@ -759,7 +785,7 @@ class MyModel(BaseModel):
         },
         method=method,
     )
-    response = model.invoke([message])
+    response = await model.ainvoke([message])
     assert response == {
         "name": "Erick",
         "age": 27,
@@ -767,7 +793,8 @@ class MyModel(BaseModel):
 
 
 @pytest.mark.release
-def test_chat_vertexai_gemini_with_structured_output_nested_model() -> None:
+@pytest.mark.release
+async def test_chat_vertexai_gemini_with_structured_output_nested_model() -> None:
     class Argument(BaseModel):
         description: str
 
@@ -783,14 +810,16 @@ class Response(BaseModel):
         Response, method="json_mode"
     )
 
-    response = model.invoke("Why is Real Madrid better than Barcelona?")
+    response = await model.ainvoke("Why is Real Madrid better than Barcelona?")
 
     assert isinstance(response, Response)
 
 
 @pytest.mark.flaky(retries=3, delay=1)
 @pytest.mark.release
-def test_chat_vertexai_gemini_function_calling_with_multiple_parts() -> None:
+@pytest.mark.flaky(retries=3, delay=1)
+@pytest.mark.release
+async def test_chat_vertexai_gemini_function_calling_with_multiple_parts() -> None:
     @tool
     def search(
         question: str,
@@ -825,7 +854,7 @@ def search(
     request = HumanMessage(
         content="Please tell the primary color of following birds: sparrow, hawk, crow",
     )
-    response = llm_with_search_force.invoke([request])
+    response = await llm_with_search_force.ainvoke([request])
 
     assert isinstance(response, AIMessage)
     tool_calls = response.tool_calls
@@ -843,7 +872,7 @@ def search(
         )
         tool_messages.append(tool_message)
 
-    result = llm_with_search.invoke([request, response, *tool_messages])
+    result = await llm_with_search.ainvoke([request, response, *tool_messages])
 
     assert isinstance(result, AIMessage)
     assert "brown" in result.content
@@ -853,12 +882,14 @@ def search(
 # Image Generation is knwown to be flaky.
 @pytest.mark.flaky(retries=3, delay=1)
 @pytest.mark.release
-def test_chat_vertexai_gemini_image_output() -> None:
+@pytest.mark.flaky(retries=3, delay=1)
+@pytest.mark.release
+async def test_chat_vertexai_gemini_image_output() -> None:
     model = ChatVertexAI(
         model_name=_DEFAULT_IMAGE_GENERATION_MODEL_NAME,
         response_modalities=[Modality.TEXT, Modality.IMAGE],
     )
-    result = model.invoke("Generate an image of a cat. Then, say meow!")
+    result = await model.ainvoke("Generate an image of a cat. Then, say meow!")
 
     assert isinstance(result, AIMessage)
     assert isinstance(result.content, list)
@@ -881,9 +912,11 @@ def test_chat_vertexai_gemini_image_output() -> None:
 # Image Generation is knwown to be flaky.
 @pytest.mark.flaky(retries=3, delay=1)
 @pytest.mark.release
-def test_chat_vertexai_gemini_image_output_with_generation_config() -> None:
+@pytest.mark.flaky(retries=3, delay=1)
+@pytest.mark.release
+async def test_chat_vertexai_gemini_image_output_with_generation_config() -> None:
     model = ChatVertexAI(model_name=_DEFAULT_IMAGE_GENERATION_MODEL_NAME)
-    result = model.invoke(
+    result = await model.ainvoke(
         "Generate an image of a cat. Then, say meow!",
         response_modalities=[Modality.TEXT, Modality.IMAGE],
     )
@@ -915,9 +948,11 @@ def test_chat_vertexai_gemini_image_output_with_generation_config() -> None:
 
 @pytest.mark.flaky(retries=3, delay=1)
 @pytest.mark.release
-def test_chat_vertexai_gemini_thinking_auto() -> None:
+@pytest.mark.flaky(retries=3, delay=1)
+@pytest.mark.release
+async def test_chat_vertexai_gemini_thinking_auto() -> None:
     model = ChatVertexAI(model_name=_DEFAULT_THINKING_MODEL_NAME)
-    response = model.invoke("How many O's are in Google? Think before you answer.")
+    response = await model.ainvoke("How many O's are in Google? Think before you answer.")
     assert isinstance(response, AIMessage)
     assert response.usage_metadata is not None
     assert response.usage_metadata["output_token_details"]["reasoning"] > 0
@@ -930,9 +965,11 @@ def test_chat_vertexai_gemini_thinking_auto() -> None:
 
 @pytest.mark.flaky(retries=3, delay=1)
 @pytest.mark.release
-def test_chat_vertexai_gemini_thinking_configured() -> None:
+@pytest.mark.flaky(retries=3, delay=1)
+@pytest.mark.release
+async def test_chat_vertexai_gemini_thinking_configured() -> None:
     model = ChatVertexAI(model_name=_DEFAULT_THINKING_MODEL_NAME, thinking_budget=100)
-    response = model.invoke("How many O's are in Google? Think before you answer.")
+    response = await model.ainvoke("How many O's are in Google? Think before you answer.")
     assert isinstance(response, AIMessage)
     assert response.usage_metadata is not None
     assert response.usage_metadata["output_token_details"]["reasoning"] > 0
@@ -970,7 +1007,10 @@ def _check_thinking_output(content: list, output_version: str) -> None:
 @pytest.mark.flaky(retries=3, delay=1)
 @pytest.mark.release
 @pytest.mark.parametrize("output_version", ["v0", "v1"])
-def test_chat_vertexai_gemini_thinking_auto_include_thoughts(
+@pytest.mark.flaky(retries=3, delay=1)
+@pytest.mark.release
+@pytest.mark.parametrize("output_version", ["v0", "v1"])
+async def test_chat_vertexai_gemini_thinking_auto_include_thoughts(
     output_version: str,
 ) -> None:
     model = ChatVertexAI(
@@ -985,7 +1025,7 @@ def test_chat_vertexai_gemini_thinking_auto_include_thoughts(
     }
 
     full: AIMessageChunk | None = None
-    for chunk in model.stream([input_message]):
+    async for chunk in model.astream([input_message]):
         assert isinstance(chunk, AIMessageChunk)
         full = chunk if full is None else full + chunk
     assert isinstance(full, AIMessageChunk)
@@ -1001,11 +1041,12 @@ def test_chat_vertexai_gemini_thinking_auto_include_thoughts(
 
     # Test we can pass back in
     next_message = {"role": "user", "content": "Thanks!"}
-    _ = model.invoke([input_message, full, next_message])
+    _ = await model.ainvoke([input_message, full, next_message])
 
 
 @pytest.mark.release
-def test_thought_signatures() -> None:
+@pytest.mark.release
+async def test_thought_signatures() -> None:
     """Test Gemini thought signatures.
 
     Verifies that thought signature byte blobs flow correctly through the entire Gemini
@@ -1029,19 +1070,19 @@ def get_weather(location: str) -> str:
     }
 
     full: BaseMessageChunk | None = None
-    for chunk in llm_with_tools.stream([input_message]):
+    async for chunk in llm_with_tools.astream([input_message]):
         assert isinstance(chunk, AIMessageChunk)
         full = chunk if full is None else full + chunk
     assert isinstance(full, AIMessageChunk)
 
     next_message = {"role": "user", "content": "Thanks!"}
-    _ = llm_with_tools.invoke([input_message, full, next_message])
+    _ = await llm_with_tools.ainvoke([input_message, full, next_message])
 
 
 @pytest.mark.release
-def test_chat_vertexai_gemini_thinking_disabled() -> None:
+async def test_chat_vertexai_gemini_thinking_disabled() -> None:
     model = ChatVertexAI(model_name=_DEFAULT_THINKING_MODEL_NAME, thinking_budget=0)
-    response = model.invoke("How many O's are in Google?")
+    response = await model.ainvoke("How many O's are in Google?")
     assert isinstance(response, AIMessage)
     assert (
         response.usage_metadata["total_tokens"]  # type: ignore
@@ -1052,12 +1093,12 @@ def test_chat_vertexai_gemini_thinking_disabled() -> None:
 
 
 @pytest.mark.release
-def test_chat_vertexai_gemini_thinking_configurable() -> None:
+async def test_chat_vertexai_gemini_thinking_configurable() -> None:
     model = ChatVertexAI(model_name=_DEFAULT_THINKING_MODEL_NAME)
     configurable_model = model.configurable_fields(
         thinking_budget=ConfigurableField(id="thinking_budget")
     )
-    response = configurable_model.invoke(
+    response = await configurable_model.ainvoke(
         "How many O's are in Google?", {"configurable": {"thinking_budget": 0}}
     )
     assert isinstance(response, AIMessage)
@@ -1089,7 +1130,7 @@ def test_prediction_client_transport() -> None:
 
 
 @pytest.mark.extended
-def test_structured_output_schema_json() -> None:
+async def test_structured_output_schema_json() -> None:
     model = ChatVertexAI(
         rate_limiter=RATE_LIMITER,
         model_name=_DEFAULT_MODEL_NAME,
@@ -1108,7 +1149,7 @@ def test_structured_output_schema_json() -> None:
         },
     )
 
-    response = model.invoke("List a few popular cookie recipes")
+    response = await model.ainvoke("List a few popular cookie recipes")
 
     assert isinstance(response, AIMessage)
     assert isinstance(response.content, str)
@@ -1134,11 +1175,11 @@ def test_structured_output_schema_json() -> None:
         rate_limiter=RATE_LIMITER,
     )
     with pytest.raises(ValueError, match="response_mime_type"):
-        response = model.invoke("List a few popular cookie recipes")
+        response = await model.ainvoke("List a few popular cookie recipes")
 
 
 @pytest.mark.release
-def test_json_mode_typeddict() -> None:
+async def test_json_mode_typeddict() -> None:
     class MyModel(TypedDict):
         name: str
         age: int
@@ -1150,19 +1191,19 @@ class MyModel(TypedDict):
     model = llm.with_structured_output(MyModel, method="json_mode")
     message = HumanMessage(content="My name is Erick and I am 28 years old")
 
-    response = model.invoke([message])
+    response = await model.ainvoke([message])
     assert isinstance(response, dict)
     assert response == {"name": "Erick", "age": 28}
 
     # Test stream
-    for chunk in model.stream([message]):
+    async for chunk in model.astream([message]):
         assert isinstance(chunk, dict)
         assert all(key in ["name", "age"] for key in chunk)
     assert chunk == {"name": "Erick", "age": 28}
 
 
 @pytest.mark.extended
-def test_structured_output_schema_enum() -> None:
+async def test_structured_output_schema_enum() -> None:
     model = ChatVertexAI(
         model_name=_DEFAULT_MODEL_NAME,
         response_schema={"type": "STRING", "enum": ["drama", "comedy", "documentary"]},
@@ -1170,7 +1211,7 @@ def test_structured_output_schema_enum() -> None:
         rate_limiter=RATE_LIMITER,
     )
 
-    response = model.invoke(
+    response = await model.ainvoke(
         """
         The film aims to educate and inform viewers about real-life subjects, events, or
         people. It offers a factual record of a particular topic by combining interviews
@@ -1187,7 +1228,7 @@ def test_structured_output_schema_enum() -> None:
 
 @pytest.mark.extended
 @pytest.mark.first
-def test_context_catching() -> None:
+async def test_context_catching() -> None:
     system_instruction = """
 
     You are an expert researcher. You always stick to the facts in the sources provided,
@@ -1232,14 +1273,14 @@ def test_context_catching() -> None:
         rate_limiter=RATE_LIMITER,
     )
 
-    response = chat.invoke("What is the secret number?")
+    response = await chat.ainvoke("What is the secret number?")
 
     assert isinstance(response, AIMessage)
     assert isinstance(response.content, str)
 
     # Using cached content in request
     chat = ChatVertexAI(model_name=_DEFAULT_MODEL_NAME, rate_limiter=RATE_LIMITER)
-    response = chat.invoke("What is the secret number?", cached_content=cached_content)
+    response = await chat.ainvoke("What is the secret number?", cached_content=cached_content)
 
     assert isinstance(response, AIMessage)
     assert isinstance(response.content, str)
@@ -1247,7 +1288,7 @@ def test_context_catching() -> None:
 
 @pytest.mark.extended
 @pytest.mark.first
-def test_context_catching_tools() -> None:
+async def test_context_catching_tools() -> None:
     from langchain import agents
 
     @tool
@@ -1301,7 +1342,7 @@ def get_secret_number() -> int:
         model=chat,
         tools=tools,
     )
-    response = agent.invoke(
+    response = await agent.ainvoke(
         {"messages": [{"role": "user", "content": "what is the secret number?"}]}
     )
     assert "messages" in response
@@ -1321,7 +1362,7 @@ def test_json_serializable() -> None:
 
 
 @pytest.mark.release
-def test_langgraph_example() -> None:
+async def test_langgraph_example() -> None:
     llm = ChatVertexAI(
         model_name=_DEFAULT_MODEL_NAME,
         max_output_tokens=8192,
@@ -1364,11 +1405,11 @@ def test_langgraph_example() -> None:
         HumanMessage(content="Multiply 2 and 3"),
         HumanMessage(content="No, actually multiply 3 and 3!"),
     ]
-    step1 = llm.invoke(
+    step1 = await llm.ainvoke(
         messages,
         tools=[{"function_declarations": [add_declaration, multiply_declaration]}],
     )
-    step2 = llm.invoke(
+    step2 = await llm.ainvoke(
         [
             *messages,
             step1,
@@ -1438,7 +1479,7 @@ async def test_astream_events_langgraph_example() -> None:
 
 @pytest.mark.xfail(reason="can't add labels to the gemini content")
 @pytest.mark.release
-def test_label_metadata() -> None:
+async def test_label_metadata() -> None:
     llm = ChatVertexAI(
         model=_DEFAULT_MODEL_NAME,
         labels={
@@ -1446,14 +1487,14 @@ def test_label_metadata() -> None:
             "environment": "testing",
         },
     )
-    llm.invoke("hey! how are you")
+    await llm.ainvoke("hey! how are you")
 
 
 @pytest.mark.xfail(reason="can't add labels to the gemini content using invoke method")
 @pytest.mark.release
-def test_label_metadata_invoke_method() -> None:
+async def test_label_metadata_invoke_method() -> None:
     llm = ChatVertexAI(model=_DEFAULT_MODEL_NAME)
-    llm.invoke(
+    await llm.ainvoke(
         "hello! invoke method",
         labels={
             "task": "labels_using_invoke",
@@ -1486,25 +1527,25 @@ def multimodal_pdf_chain() -> RunnableSerializable:
 
 
 @pytest.mark.release
-def test_multimodal_pdf_input_gcs(multimodal_pdf_chain: RunnableSerializable) -> None:
+async def test_multimodal_pdf_input_gcs(multimodal_pdf_chain: RunnableSerializable) -> None:
     # TODO: parallelize with url and b64 tests
     gcs_uri = "gs://cloud-samples-data/generative-ai/pdf/2312.11805v3.pdf"
     # GCS URI
-    response = multimodal_pdf_chain.invoke({"image": gcs_uri})
+    response = await multimodal_pdf_chain.ainvoke({"image": gcs_uri})
     assert isinstance(response, AIMessage)
 
 
 @pytest.mark.release
-def test_multimodal_pdf_input_url(multimodal_pdf_chain: RunnableSerializable) -> None:
+async def test_multimodal_pdf_input_url(multimodal_pdf_chain: RunnableSerializable) -> None:
     # TODO: parallelize with gcs and b64 tests
     url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
     # URL
-    response = multimodal_pdf_chain.invoke({"image": url})
+    response = await multimodal_pdf_chain.ainvoke({"image": url})
     assert isinstance(response, AIMessage)
 
 
 @pytest.mark.release
-def test_multimodal_pdf_input_b64(multimodal_pdf_chain: RunnableSerializable) -> None:
+async def test_multimodal_pdf_input_b64(multimodal_pdf_chain: RunnableSerializable) -> None:
     # TODO: parallelize with gcs and url tests
     url = "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
     request_response = requests.get(url, allow_redirects=True)
@@ -1513,14 +1554,14 @@ def test_multimodal_pdf_input_b64(multimodal_pdf_chain: RunnableSerializable) ->
         stream.write(request_response.content)
         image_data = base64.b64encode(stream.getbuffer()).decode("utf-8")
         image = f"data:application/pdf;base64,{image_data}"
-        response = multimodal_pdf_chain.invoke({"image": image})
+        response = await multimodal_pdf_chain.ainvoke({"image": image})
         assert isinstance(response, AIMessage)
 
 
 @pytest.mark.release
-def test_response_metadata_avg_logprobs() -> None:
+async def test_response_metadata_avg_logprobs() -> None:
     llm = ChatVertexAI(model="gemini-2.0-flash-001")
-    response = llm.invoke("Hello!")
+    response = await llm.ainvoke("Hello!")
     probs = response.response_metadata.get("avg_logprobs")
     if probs is not None:
         assert isinstance(probs, float)
@@ -1528,9 +1569,11 @@ def test_response_metadata_avg_logprobs() -> None:
 
 @pytest.mark.xfail(reason="logprobs are subject to daily quotas")
 @pytest.mark.release
-def test_logprobs() -> None:
+@pytest.mark.xfail(reason="logprobs are subject to daily quotas")
+@pytest.mark.release
+async def test_logprobs() -> None:
     llm = ChatVertexAI(model=_DEFAULT_MODEL_NAME, logprobs=2)
-    msg = llm.invoke("hey")
+    msg = await llm.ainvoke("hey")
     tokenprobs = msg.response_metadata.get("logprobs_result")
     assert tokenprobs is None or isinstance(tokenprobs, list)
     if tokenprobs:
@@ -1547,11 +1590,11 @@ def test_logprobs() -> None:
                 stack.extend(token.get("top_logprobs", []))
 
     llm2 = ChatVertexAI(model=_DEFAULT_MODEL_NAME, logprobs=True)
-    msg2 = llm2.invoke("how are you")
+    msg2 = await llm2.ainvoke("how are you")
     assert msg2.response_metadata["logprobs_result"]
 
     llm3 = ChatVertexAI(model=_DEFAULT_MODEL_NAME, logprobs=False)
-    msg3 = llm3.invoke("howdy")
+    msg3 = await llm3.ainvoke("howdy")
     assert msg3.response_metadata.get("logprobs_result") is None
 
 
@@ -1579,7 +1622,7 @@ def test_location_init() -> None:
 @pytest.mark.release
 @pytest.mark.parametrize("model_name", model_names_to_test)
 @pytest.mark.parametrize("endpoint_version", endpoint_versions)
-def test_vertexai_global_location_single_call(
+async def test_vertexai_global_location_single_call(
     model_name: str | None, endpoint_version: str
 ) -> None:
     """Test ChatVertexAI single call with global location."""
@@ -1591,13 +1634,13 @@ def test_vertexai_global_location_single_call(
     )
     assert model.location == "global"
     message = HumanMessage(content="Hello")
-    response = model.invoke([message])
+    response = await model.ainvoke([message])
     assert isinstance(response, AIMessage)
     assert isinstance(response.content, str)
     _check_usage_metadata(response)
 
 
-def test_nested_bind_tools() -> None:
+async def test_nested_bind_tools() -> None:
     llm = ChatVertexAI(model=_DEFAULT_MODEL_NAME)
 
     class Person(BaseModel):
@@ -1610,7 +1653,7 @@ class People(BaseModel):
     llm = ChatVertexAI(model=_DEFAULT_MODEL_NAME)
     llm_with_tools = llm.bind_tools([People], tool_choice="People")
 
-    response = llm_with_tools.invoke("Chester, no hair color provided.")
+    response = await llm_with_tools.ainvoke("Chester, no hair color provided.")
     assert isinstance(response, AIMessage)
     assert response.tool_calls[0]["name"] == "People"
 
@@ -1633,7 +1676,7 @@ def _check_web_search_output(message: AIMessage, output_version: str) -> None:
 
 
 @pytest.mark.parametrize("output_version", ["v0", "v1"])
-def test_search_builtin(output_version: str) -> None:
+async def test_search_builtin(output_version: str) -> None:
     """Test the built-in search tool."""
     llm = ChatVertexAI(
         model=_DEFAULT_MODEL_NAME, output_version=output_version
@@ -1645,7 +1688,7 @@ def test_search_builtin(output_version: str) -> None:
 
     # Test streaming
     full: AIMessageChunk | None = None
-    for chunk in llm.stream([input_message]):
+    async for chunk in llm.astream([input_message]):
         assert isinstance(chunk, AIMessageChunk)
         full = chunk if full is None else full + chunk
     assert isinstance(full, AIMessageChunk)
@@ -1656,7 +1699,7 @@ def test_search_builtin(output_version: str) -> None:
         "role": "user",
         "content": "Tell me more about that last story.",
     }
-    response = llm.invoke([input_message, full, next_message])
+    response = await llm.ainvoke([input_message, full, next_message])
     _check_web_search_output(response, output_version)
 
 
@@ -1677,7 +1720,7 @@ def _check_code_execution_output(message: AIMessage, output_version: str) -> Non
 
 
 @pytest.mark.parametrize("output_version", ["v0", "v1"])
-def test_code_execution_builtin(output_version: str) -> None:
+async def test_code_execution_builtin(output_version: str) -> None:
     llm = ChatVertexAI(
         model=_DEFAULT_MODEL_NAME, output_version=output_version
     ).bind_tools([{"code_execution": {}}])
@@ -1687,7 +1730,7 @@ def test_code_execution_builtin(output_version: str) -> None:
     }
 
     full: AIMessageChunk | None = None
-    for chunk in llm.stream([input_message]):
+    async for chunk in llm.astream([input_message]):
         assert isinstance(chunk, AIMessageChunk)
         full = chunk if full is None else full + chunk
     assert isinstance(full, AIMessageChunk)
@@ -1699,5 +1742,5 @@ def test_code_execution_builtin(output_version: str) -> None:
         "role": "user",
         "content": "Can you show me the calculation again with comments?",
     }
-    response = llm.invoke([input_message, full, next_message])
+    response = await llm.ainvoke([input_message, full, next_message])
     _check_code_execution_output(response, output_version)
diff --git a/libs/vertexai/tests/integration_tests/test_embeddings.py b/libs/vertexai/tests/integration_tests/test_embeddings.py
index 302d8e4a7..562946383 100644
--- a/libs/vertexai/tests/integration_tests/test_embeddings.py
+++ b/libs/vertexai/tests/integration_tests/test_embeddings.py
@@ -37,12 +37,21 @@ def test_initialization() -> None:
     ("model_name", "embeddings_dim"),
     _EMBEDDING_MODELS,
 )
-def test_langchain_google_vertexai_embedding_documents(
+@pytest.mark.release
+@pytest.mark.parametrize(
+    "number_of_docs",
+    [1, 8],
+)
+@pytest.mark.parametrize(
+    ("model_name", "embeddings_dim"),
+    _EMBEDDING_MODELS,
+)
+async def test_langchain_google_vertexai_embedding_documents(
     number_of_docs: int, model_name: str, embeddings_dim: int
 ) -> None:
     documents = ["foo bar"] * number_of_docs
     model = VertexAIEmbeddings(model=model_name)
-    output = model.embed_documents(documents)
+    output = await model.aembed_documents(documents)
     assert len(output) == number_of_docs
     for embedding in output:
         assert len(embedding) == embeddings_dim
@@ -53,13 +62,18 @@ def test_langchain_google_vertexai_embedding_documents(
     ("model_name", "embeddings_dim"),
     _EMBEDDING_MODELS,
 )
-def test_langchain_google_vertexai_embedding_documents_with_task_type(
+@pytest.mark.release
+@pytest.mark.parametrize(
+    ("model_name", "embeddings_dim"),
+    _EMBEDDING_MODELS,
+)
+async def test_langchain_google_vertexai_embedding_documents_with_task_type(
     model_name: str,
     embeddings_dim: int,
 ) -> None:
     documents = ["foo bar"] * 8
     model = VertexAIEmbeddings(model=model_name)
-    output = model.embed_documents(documents)
+    output = await model.aembed_documents(documents)
     assert len(output) == 8
     for embedding in output:
         assert len(embedding) == embeddings_dim
@@ -70,10 +84,15 @@ def test_langchain_google_vertexai_embedding_documents_with_task_type(
     ("model_name", "embeddings_dim"),
     _EMBEDDING_MODELS,
 )
-def test_langchain_google_vertexai_embedding_query(model_name, embeddings_dim) -> None:
+@pytest.mark.release
+@pytest.mark.parametrize(
+    ("model_name", "embeddings_dim"),
+    _EMBEDDING_MODELS,
+)
+async def test_langchain_google_vertexai_embedding_query(model_name, embeddings_dim) -> None:
     document = "foo bar"
     model = VertexAIEmbeddings(model=model_name)
-    output = model.embed_query(document)
+    output = await model.aembed_query(document)
     assert len(output) == embeddings_dim
 
 
@@ -82,13 +101,18 @@ def test_langchain_google_vertexai_embedding_query(model_name, embeddings_dim) -
     ("model_name", "embeddings_dim"),
     _EMBEDDING_MODELS,
 )
-def test_langchain_google_vertexai_embedding_query_with_task_type(
+@pytest.mark.release
+@pytest.mark.parametrize(
+    ("model_name", "embeddings_dim"),
+    _EMBEDDING_MODELS,
+)
+async def test_langchain_google_vertexai_embedding_query_with_task_type(
     model_name: str,
     embeddings_dim: int,
 ) -> None:
     document = "foo bar"
     model = VertexAIEmbeddings(model=model_name)
-    output = model.embed_query(document)
+    output = await model.aembed_query(document)
     assert len(output) == embeddings_dim
 
 
diff --git a/libs/vertexai/tests/integration_tests/test_evaluation.py b/libs/vertexai/tests/integration_tests/test_evaluation.py
index 918f8df97..3ef8babb6 100644
--- a/libs/vertexai/tests/integration_tests/test_evaluation.py
+++ b/libs/vertexai/tests/integration_tests/test_evaluation.py
@@ -9,11 +9,12 @@
 
 
 @pytest.mark.extended
-def test_evaluate() -> None:
+@pytest.mark.extended
+async def test_evaluate() -> None:
     evaluator = VertexStringEvaluator(
         metric="bleu", project_id=os.environ["PROJECT_ID"]
     )
-    result = evaluator.evaluate(
+    result = await evaluator.aevaluate(
         examples=[
             {"reference": "This is a test."},
             {"reference": "This is another test."},
@@ -30,11 +31,13 @@ def test_evaluate() -> None:
 
 @pytest.mark.extended
 @pytest.mark.flaky(retries=3)
-def test_evaluate_strings() -> None:
+@pytest.mark.extended
+@pytest.mark.flaky(retries=3)
+async def test_evaluate_strings() -> None:
     evaluator = VertexStringEvaluator(
         metric="safety", project_id=os.environ["PROJECT_ID"]
     )
-    result = evaluator._evaluate_strings(prediction="This is a test")
+    result = await evaluator._aevaluate_strings(prediction="This is a test")
     assert isinstance(result, dict)
     assert "score" in result
     assert "explanation" in result
@@ -56,6 +59,8 @@ async def test_aevaluate_strings() -> None:
     assert "explanation" in result
 
 
+@pytest.mark.extended
+@pytest.mark.xfail(reason="TODO: investigate (started failing 2025-03-25).")
 @pytest.mark.extended
 @pytest.mark.xfail(reason="TODO: investigate (started failing 2025-03-25).")
 async def test_evaluate_pairwise() -> None:
@@ -63,7 +68,7 @@ async def test_evaluate_pairwise() -> None:
         metric="pairwise_question_answering_quality",
         project_id=os.environ["PROJECT_ID"],
     )
-    result = evaluator.evaluate_string_pairs(
+    result = await evaluator.aevaluate_string_pairs(
         prediction="London",
         prediction_b="Berlin",
         input="What is the capital of Great Britain?",
diff --git a/libs/vertexai/tests/integration_tests/test_llms.py b/libs/vertexai/tests/integration_tests/test_llms.py
index f8dd666b3..3533e13bb 100644
--- a/libs/vertexai/tests/integration_tests/test_llms.py
+++ b/libs/vertexai/tests/integration_tests/test_llms.py
@@ -24,16 +24,16 @@ def test_vertex_initialization() -> None:
 
 
 @pytest.mark.release
-def test_vertex_invoke() -> None:
+async def test_vertex_invoke() -> None:
     llm = VertexAI(model_name=_DEFAULT_MODEL_NAME, temperature=0)
-    output = llm.invoke("Say foo:")
+    output = await llm.ainvoke("Say foo:")
     assert isinstance(output, str)
 
 
 @pytest.mark.release
-def test_vertex_generate() -> None:
+async def test_vertex_generate() -> None:
     llm = VertexAI(model_name=_DEFAULT_MODEL_NAME, temperature=0)
-    output = llm.generate(["Say foo:"])
+    output = await llm.agenerate(["Say foo:"])
     assert isinstance(output, LLMResult)
     assert len(output.generations) == 1
     usage_metadata = output.generations[0][0].generation_info["usage_metadata"]  # type: ignore
@@ -43,9 +43,10 @@ def test_vertex_generate() -> None:
 
 @pytest.mark.release
 @pytest.mark.xfail(reason="VertexAI doesn't always respect number of candidates")
-def test_vertex_generate_multiple_candidates() -> None:
+@pytest.mark.xfail(reason="VertexAI doesn't always respect number of candidates")
+async def test_vertex_generate_multiple_candidates() -> None:
     llm = VertexAI(temperature=0.3, n=2, model_name="text-bison@001")
-    output = llm.generate(["Say foo:"])
+    output = await llm.agenerate(["Say foo:"])
     assert isinstance(output, LLMResult)
     assert len(output.generations) == 1
     assert len(output.generations[0]) == 2
@@ -62,9 +63,9 @@ async def test_vertex_agenerate() -> None:
 
 
 @pytest.mark.release
-def test_stream() -> None:
+async def test_stream() -> None:
     llm = VertexAI(temperature=0, model_name=_DEFAULT_MODEL_NAME)
-    for token in llm.stream("I'm Pickle Rick"):
+    async for token in llm.astream("I'm Pickle Rick"):
         assert isinstance(token, str)
 
 
@@ -93,7 +94,7 @@ def test_vertex_call_count_tokens() -> None:
 
 
 @pytest.mark.extended
-def test_structured_output_schema_json() -> None:
+async def test_structured_output_schema_json() -> None:
     model = VertexAI(
         rate_limiter=rate_limiter,
         model_name="gemini-2.0-flash-001",
@@ -112,7 +113,7 @@ def test_structured_output_schema_json() -> None:
         },
     )
 
-    response = model.invoke("List a few popular cookie recipes")
+    response = await model.ainvoke("List a few popular cookie recipes")
 
     assert isinstance(response, str)
     parsed_response = json.loads(response)
diff --git a/libs/vertexai/tests/integration_tests/test_llms_safety.py b/libs/vertexai/tests/integration_tests/test_llms_safety.py
index b1974820e..e02fe0840 100644
--- a/libs/vertexai/tests/integration_tests/test_llms_safety.py
+++ b/libs/vertexai/tests/integration_tests/test_llms_safety.py
@@ -43,9 +43,10 @@
 
 
 @pytest.mark.extended
-def test_gemini_safety_settings_generate() -> None:
+@pytest.mark.extended
+async def test_gemini_safety_settings_generate() -> None:
     llm = VertexAI(model_name=_DEFAULT_MODEL_NAME, safety_settings=SAFETY_SETTINGS)
-    output = llm.generate(["What do you think about child abuse:"])
+    output = await llm.agenerate(["What do you think about child abuse:"])
     assert isinstance(output, LLMResult)
     assert len(output.generations) == 1
     generation_info = output.generations[0][0].generation_info
@@ -53,7 +54,7 @@ def test_gemini_safety_settings_generate() -> None:
     assert len(generation_info) > 0
     assert not generation_info.get("is_blocked")
 
-    blocked_output = llm.generate([BLOCKED_PROMPT])
+    blocked_output = await llm.agenerate([BLOCKED_PROMPT])
     assert isinstance(blocked_output, LLMResult)
     assert len(blocked_output.generations) == 1
     generation_info = output.generations[0][0].generation_info
@@ -63,7 +64,7 @@ def test_gemini_safety_settings_generate() -> None:
 
     # test safety_settings passed directly to generate
     llm = VertexAI(model_name=_DEFAULT_MODEL_NAME)
-    output = llm.generate(
+    output = await llm.agenerate(
         ["What do you think about child abuse:"], safety_settings=SAFETY_SETTINGS
     )
     assert isinstance(output, LLMResult)
diff --git a/libs/vertexai/tests/integration_tests/test_maas.py b/libs/vertexai/tests/integration_tests/test_maas.py
index ab482f15c..14061b518 100644
--- a/libs/vertexai/tests/integration_tests/test_maas.py
+++ b/libs/vertexai/tests/integration_tests/test_maas.py
@@ -33,11 +33,11 @@
 
 @pytest.mark.extended
 @pytest.mark.parametrize("model_name", model_names)
-def test_generate(model_name: str) -> None:
+async def test_generate(model_name: str) -> None:
     llm = get_vertex_maas_model(
         model_name=model_name, location=model_locations.get(model_name, "us-central1")
     )
-    output = llm.invoke("What is the meaning of life?")
+    output = await llm.ainvoke("What is the meaning of life?")
     assert isinstance(output, AIMessage)
 
 
@@ -53,15 +53,15 @@ async def test_agenerate(model_name: str) -> None:
 
 @pytest.mark.extended
 @pytest.mark.parametrize("model_name", model_names)
-def test_stream(model_name: str) -> None:
+async def test_stream(model_name: str) -> None:
     # streaming currently fails with mistral-nemo@2407
     if "stral" in model_name:
         return
     llm = get_vertex_maas_model(
         model_name=model_name, location=model_locations.get(model_name, "us-central1")
     )
-    output = llm.stream("What is the meaning of life?")
-    for chunk in output:
+    output = llm.astream("What is the meaning of life?")
+    async for chunk in output:
         assert isinstance(chunk, AIMessageChunk)
 
 
@@ -108,13 +108,13 @@ def search(
     request = HumanMessage(
         content="Please tell the primary color of sparrow?",
     )
-    response = llm_with_search_force.invoke([request])
+    response = await llm_with_search_force.ainvoke([request])
 
     assert isinstance(response, AIMessage)
     tool_calls = response.tool_calls
     assert len(tool_calls) > 0
 
-    tool_response = search.invoke("sparrow")
+    tool_response = await search.ainvoke("sparrow")
     tool_messages: list[BaseMessage] = []
 
     for tool_call in tool_calls:
@@ -126,7 +126,7 @@ def search(
         )
         tool_messages.append(tool_message)
 
-    result = llm_with_search.invoke([request, response, *tool_messages])
+    result = await llm_with_search.ainvoke([request, response, *tool_messages])
 
     assert isinstance(result, AIMessage)
     if model_name in _MISTRAL_MODELS:
diff --git a/libs/vertexai/tests/integration_tests/test_model_garden.py b/libs/vertexai/tests/integration_tests/test_model_garden.py
index e7752ad24..81b08ef17 100644
--- a/libs/vertexai/tests/integration_tests/test_model_garden.py
+++ b/libs/vertexai/tests/integration_tests/test_model_garden.py
@@ -28,7 +28,7 @@
     ("endpoint_os_variable_name", "result_arg"),
     [("FALCON_ENDPOINT_ID", "generated_text"), ("LLAMA_ENDPOINT_ID", None)],
 )
-def test_model_garden(endpoint_os_variable_name: str, result_arg: str | None) -> None:
+async def test_model_garden(endpoint_os_variable_name: str, result_arg: str | None) -> None:
     """In order to run this test, you should provide endpoint names.
 
     Example:
@@ -45,7 +45,7 @@ def test_model_garden(endpoint_os_variable_name: str, result_arg: str | None) ->
         result_arg=result_arg,
         location=location,
     )
-    output = llm.invoke("What is the meaning of life?")
+    output = await llm.ainvoke("What is the meaning of life?")
     assert isinstance(output, str)
     assert llm._llm_type == "vertexai_model_garden"
 
@@ -55,7 +55,7 @@ def test_model_garden(endpoint_os_variable_name: str, result_arg: str | None) ->
     ("endpoint_os_variable_name", "result_arg"),
     [("FALCON_ENDPOINT_ID", "generated_text"), ("LLAMA_ENDPOINT_ID", None)],
 )
-def test_model_garden_generate(
+async def test_model_garden_generate(
     endpoint_os_variable_name: str, result_arg: str | None
 ) -> None:
     """In order to run this test, you should provide endpoint names.
@@ -74,7 +74,7 @@ def test_model_garden_generate(
         result_arg=result_arg,
         location=location,
     )
-    output = llm.generate(["What is the meaning of life?", "How much is 2+2"])
+    output = await llm.agenerate(["What is the meaning of life?", "How much is 2+2"])
     assert isinstance(output, LLMResult)
     assert len(output.generations) == 2
 
@@ -104,7 +104,7 @@ async def test_model_garden_agenerate(
 
 
 @pytest.mark.extended
-def test_anthropic() -> None:
+async def test_anthropic() -> None:
     project = os.environ["PROJECT_ID"]
     location = _ANTHROPIC_LOCATION
     model = ChatAnthropicVertex(
@@ -120,13 +120,13 @@ def test_anthropic() -> None:
     )
     context = SystemMessage(content=raw_context)
     message = HumanMessage(content=question)
-    response = model.invoke([context, message], model_name=_ANTHROPIC_CLAUDE_MODEL_NAME)
+    response = await model.ainvoke([context, message], model_name=_ANTHROPIC_CLAUDE_MODEL_NAME)
     assert isinstance(response, AIMessage)
     assert isinstance(response.content, str)
 
 
 @pytest.mark.extended
-def test_anthropic_stream() -> None:
+async def test_anthropic_stream() -> None:
     project = os.environ["PROJECT_ID"]
     location = _ANTHROPIC_LOCATION
     model = ChatAnthropicVertex(
@@ -137,13 +137,13 @@ def test_anthropic_stream() -> None:
         "Hello, could you recommend a good movie for me to watch this evening, please?"
     )
     message = HumanMessage(content=question)
-    sync_response = model.stream([message], model=_ANTHROPIC_CLAUDE_MODEL_NAME)
-    for chunk in sync_response:
+    sync_response = model.astream([message], model=_ANTHROPIC_CLAUDE_MODEL_NAME)
+    async for chunk in sync_response:
         assert isinstance(chunk, AIMessageChunk)
 
 
 @pytest.mark.extended
-def test_anthropic_thinking_stream() -> None:
+async def test_anthropic_thinking_stream() -> None:
     project = os.environ["PROJECT_ID"]
     location = "us-east5"
     model = ChatAnthropicVertex(
@@ -161,8 +161,8 @@ def test_anthropic_thinking_stream() -> None:
         "Hello, could you recommend a good movie for me to watch this evening, please?"
     )
     message = HumanMessage(content=question)
-    sync_response = model.stream([message], model="claude-sonnet-4-5@20250929")
-    for chunk in sync_response:
+    sync_response = model.astream([message], model="claude-sonnet-4-5@20250929")
+    async for chunk in sync_response:
         assert isinstance(chunk, AIMessageChunk)
 
 
@@ -203,7 +203,7 @@ def _check_tool_calls(response: BaseMessage, expected_name: str) -> None:
 
 @pytest.mark.extended
 @pytest.mark.flaky(retries=3)
-def test_anthropic_tool_calling() -> None:
+async def test_anthropic_tool_calling() -> None:
     project = os.environ["PROJECT_ID"]
     location = _ANTHROPIC_LOCATION
     model = ChatAnthropicVertex(
@@ -220,7 +220,7 @@ class MyModel(BaseModel):
     model_with_tools = model.bind_tools(
         [MyModel], model_name=_ANTHROPIC_CLAUDE_MODEL_NAME
     )
-    response = model_with_tools.invoke([message])
+    response = await model_with_tools.ainvoke([message])
     _check_tool_calls(response, "MyModel")
 
     # Test .bind_tools with function
@@ -230,7 +230,7 @@ def my_model(name: str, age: int) -> None:
     model_with_tools = model.bind_tools(
         [my_model], model_name=_ANTHROPIC_CLAUDE_MODEL_NAME
     )
-    response = model_with_tools.invoke([message])
+    response = await model_with_tools.ainvoke([message])
     _check_tool_calls(response, "my_model")
 
     # Test .bind_tools with tool
@@ -241,13 +241,13 @@ def my_tool(name: str, age: int) -> None:
     model_with_tools = model.bind_tools(
         [my_tool], model_name=_ANTHROPIC_CLAUDE_MODEL_NAME
     )
-    response = model_with_tools.invoke([message])
+    response = await model_with_tools.ainvoke([message])
     _check_tool_calls(response, "my_tool")
 
     # Test streaming
-    stream = model_with_tools.stream([message])
+    stream = model_with_tools.astream([message])
     first = True
-    for chunk in stream:
+    async for chunk in stream:
         if first:
             gathered = chunk
             first = False
@@ -263,7 +263,7 @@ def my_tool(name: str, age: int) -> None:
 
 
 @pytest.mark.extended
-def test_anthropic_with_structured_output() -> None:
+async def test_anthropic_with_structured_output() -> None:
     project = os.environ["PROJECT_ID"]
     location = _ANTHROPIC_LOCATION
     model = ChatAnthropicVertex(
@@ -278,7 +278,7 @@ class MyModel(BaseModel):
 
     message = HumanMessage(content="My name is Erick and I am 27 years old")
     model_with_structured_output = model.with_structured_output(MyModel)
-    response = model_with_structured_output.invoke([message])
+    response = await model_with_structured_output.ainvoke([message])
 
     assert isinstance(response, MyModel)
     assert response.name == "Erick"
@@ -287,7 +287,7 @@ class MyModel(BaseModel):
 
 @pytest.mark.extended
 @pytest.mark.flaky(retries=3)
-def test_anthropic_multiturn_tool_calling() -> None:
+async def test_anthropic_multiturn_tool_calling() -> None:
     """Test multi-turn conversation with tool calls and responses.
 
     This test ensures that ToolMessages with streaming metadata are properly
@@ -311,7 +311,7 @@ def get_weather(city: str) -> str:
 
     # First turn - user asks question, model calls tool
     user_message = HumanMessage("What's the weather in Paris?")
-    response1 = model_with_tools.invoke([user_message])
+    response1 = await model_with_tools.ainvoke([user_message])
 
     # Verify model made a tool call
     assert isinstance(response1, AIMessage)
@@ -327,7 +327,7 @@ def get_weather(city: str) -> str:
     )
 
     # This should NOT raise "Extra inputs are not permitted" error
-    response2 = model.invoke([user_message, response1, tool_result])
+    response2 = await model.ainvoke([user_message, response1, tool_result])
 
     # Verify model responded with final answer
     assert isinstance(response2, AIMessage)
@@ -337,7 +337,7 @@ def get_weather(city: str) -> str:
 
 @pytest.mark.extended
 @pytest.mark.flaky(retries=3)
-def test_anthropic_tool_error_handling() -> None:
+async def test_anthropic_tool_error_handling() -> None:
     """Test that tool errors are properly communicated with is_error flag."""
     project = os.environ["PROJECT_ID"]
     location = _ANTHROPIC_LOCATION
@@ -355,7 +355,7 @@ def failing_tool(x: int) -> str:
     model_with_tools = model.bind_tools([failing_tool])
 
     # First turn - model calls tool
-    response1 = model_with_tools.invoke([HumanMessage("Use failing_tool with x=5")])
+    response1 = await model_with_tools.ainvoke([HumanMessage("Use failing_tool with x=5")])
 
     # Verify tool call
     assert isinstance(response1, AIMessage)
@@ -370,7 +370,7 @@ def failing_tool(x: int) -> str:
     )
 
     # Should handle error gracefully (is_error flag should be sent)
-    response2 = model.invoke(
+    response2 = await model.ainvoke(
         [HumanMessage("Use failing_tool with x=5"), response1, error_result]
     )
 
diff --git a/libs/vertexai/tests/integration_tests/test_vectorstores.py b/libs/vertexai/tests/integration_tests/test_vectorstores.py
index a224be413..d8f3d26b4 100644
--- a/libs/vertexai/tests/integration_tests/test_vectorstores.py
+++ b/libs/vertexai/tests/integration_tests/test_vectorstores.py
@@ -123,7 +123,7 @@ def datastore_vector_store(
 
 
 @pytest.mark.extended
-def test_vector_search_sdk_manager(sdk_manager: VectorSearchSDKManager) -> None:
+async def test_vector_search_sdk_manager(sdk_manager: VectorSearchSDKManager) -> None:
     gcs_client = sdk_manager.get_gcs_client()
     assert isinstance(gcs_client, storage.Client)
 
@@ -176,7 +176,7 @@ def test_gcs_document_storage_valid_user_input(
         "datastore_document_storage",
     ],
 )
-def test_document_storage(
+async def test_document_storage(
     storage_class: str,
     request: pytest.FixtureRequest,
 ) -> None:
@@ -207,8 +207,8 @@ def test_document_storage(
     ids = [str(uuid4()) for i in range(N * len(weirdly_encoded_texts))]
 
     # Test batch storage and retrieval
-    document_storage.mset(list(zip(ids, documents, strict=False)))
-    retrieved_documents = document_storage.mget(ids)
+    await document_storage.amset(list(zip(ids, documents, strict=False)))
+    retrieved_documents = await document_storage.amget(ids)
 
     for og_document, retrieved_document in zip(
         documents, retrieved_documents, strict=False
@@ -216,16 +216,16 @@ def test_document_storage(
         assert og_document == retrieved_document
 
     # Test key yielding
-    keys = list(document_storage.yield_keys())
+    keys = [k async for k in document_storage.ayield_keys()]
     assert all(id in keys for id in ids)
 
     # Test deletion
-    document_storage.mdelete(ids)
-    assert all(item is None for item in document_storage.mget(ids))
+    await document_storage.amdelete(ids)
+    assert all(item is None for item in await document_storage.amget(ids))
 
 
 @pytest.mark.extended
-def test_public_endpoint_vector_searcher(
+async def test_public_endpoint_vector_searcher(
     embeddings: VertexAIEmbeddings, sdk_manager: VectorSearchSDKManager
 ) -> None:
     vertexai.init(api_transport="grpc")
@@ -236,7 +236,7 @@ def test_public_endpoint_vector_searcher(
 
     texts = ["What's your favourite animal", "What's your favourite city"]
 
-    embeddings_vector = embeddings.embed_documents(texts=texts)
+    embeddings_vector = await embeddings.aembed_documents(texts=texts)
 
     matching_neighbors_list = searcher.find_neighbors(embeddings=embeddings_vector, k=4)
 
@@ -247,18 +247,18 @@ def test_public_endpoint_vector_searcher(
 @pytest.mark.parametrize(
     "vector_store_class", ["vector_store", "datastore_vector_store"]
 )
-def test_vector_store(vector_store_class: str, request: pytest.FixtureRequest) -> None:
+async def test_vector_store(vector_store_class: str, request: pytest.FixtureRequest) -> None:
     vertexai.init(api_transport="grpc")
     vector_store: VectorSearchVectorStore = request.getfixturevalue(vector_store_class)
 
     query = "What are your favourite animals?"
-    docs_with_scores = vector_store.similarity_search_with_score(query, k=1)
+    docs_with_scores = await vector_store.asimilarity_search_with_score(query, k=1)
     assert len(docs_with_scores) == 1
     for doc, score in docs_with_scores:
         assert isinstance(doc, Document)
         assert isinstance(score, float)
 
-    docs = vector_store.similarity_search(query, k=2)
+    docs = await vector_store.asimilarity_search(query, k=2)
     assert len(docs) == 2
     for doc in docs:
         assert isinstance(doc, Document)
@@ -268,7 +268,7 @@ def test_vector_store(vector_store_class: str, request: pytest.FixtureRequest) -
 @pytest.mark.parametrize(
     "vector_store_class", ["vector_store", "datastore_vector_store"]
 )
-def test_vector_store_hybrid_search(
+async def test_vector_store_hybrid_search(
     vector_store_class: str,
     request: pytest.FixtureRequest,
     embeddings: VertexAIEmbeddings,
@@ -277,13 +277,13 @@ def test_vector_store_hybrid_search(
     vector_store: VectorSearchVectorStore = request.getfixturevalue(vector_store_class)
 
     query = "What are your favourite animals?"
-    embedding = embeddings.embed_query(query)
+    embedding = await embeddings.aembed_query(query)
     sparse_embedding: dict[str, list[int] | list[float]] = {
         "values": [0.5, 0.7],
         "dimensions": [2, 4],
     }
 
-    docs_with_scores = vector_store.similarity_search_by_vector_with_score(
+    docs_with_scores = await vector_store.asimilarity_search_by_vector_with_score(
         embedding=embedding, sparse_embedding=sparse_embedding, k=1
     )
     assert len(docs_with_scores) == 1
@@ -298,7 +298,7 @@ def test_vector_store_hybrid_search(
 
 @pytest.mark.extended
 @pytest.mark.parametrize("vector_store_class", ["datastore_vector_store"])
-def test_add_texts_with_embeddings(
+async def test_add_texts_with_embeddings(
     vector_store_class: str,
     request: pytest.FixtureRequest,
     embeddings: VertexAIEmbeddings,
@@ -307,8 +307,8 @@ def test_add_texts_with_embeddings(
 
     texts = ["my favourite animal is the elephant", "my favourite animal is the lion"]
     ids = ["idx1", "idx2"]
-    embs = embeddings.embed_documents(texts)
-    ids1 = vector_store.add_texts_with_embeddings(
+    embs = await embeddings.aembed_documents(texts)
+    ids1 = await vector_store.aadd_texts_with_embeddings(
         texts=texts, embeddings=embs, ids=ids, is_complete_overwrite=True
     )
     assert len(ids1) == 2
@@ -316,7 +316,7 @@ def test_add_texts_with_embeddings(
     sparse_embeddings: list[dict[str, list[int] | list[float]]] = [
         {"values": [0.5, 0.7], "dimensions": [2, 4]}
     ] * 2
-    ids2 = vector_store.add_texts_with_embeddings(
+    ids2 = await vector_store.aadd_texts_with_embeddings(
         texts=texts,
         embeddings=embs,
         sparse_embeddings=sparse_embeddings,
@@ -335,11 +335,11 @@ def test_add_texts_with_embeddings(
         # "datastore_vector_store" Waiting for the bug to be fixed as its stream
     ],
 )
-def test_vector_store_filtering(
+async def test_vector_store_filtering(
     vector_store_class: str, request: pytest.FixtureRequest
 ) -> None:
     vector_store: VectorSearchVectorStore = request.getfixturevalue(vector_store_class)
-    documents = vector_store.similarity_search(
+    documents = await vector_store.asimilarity_search(
         "I want some pants",
         filter=[Namespace(name="color", allow_tokens=["blue"])],
         numeric_filter=[NumericNamespace(name="price", value_float=20.0, op="LESS")],
@@ -351,18 +351,18 @@ def test_vector_store_filtering(
 
 
 @pytest.mark.long
-def test_vector_store_update_index(
+async def test_vector_store_update_index(
     vector_store: VectorSearchVectorStore, sample_documents: list[Document]
 ) -> None:
-    vector_store.add_documents(documents=sample_documents, is_complete_overwrite=True)
+    await vector_store.aadd_documents(documents=sample_documents, is_complete_overwrite=True)
 
 
 @pytest.mark.extended
-def test_vector_store_stream_update_index(
+async def test_vector_store_stream_update_index(
     datastore_vector_store: VectorSearchVectorStoreDatastore,
     sample_documents: list[Document],
 ) -> None:
-    datastore_vector_store.add_documents(
+    await datastore_vector_store.aadd_documents(
         documents=sample_documents, is_complete_overwrite=True
     )
 
diff --git a/libs/vertexai/tests/integration_tests/test_vision_models.py b/libs/vertexai/tests/integration_tests/test_vision_models.py
index b33996152..c53bce00b 100644
--- a/libs/vertexai/tests/integration_tests/test_vision_models.py
+++ b/libs/vertexai/tests/integration_tests/test_vision_models.py
@@ -136,11 +136,11 @@ def test_vertex_ai_visual_qna_chat(base64_image: str) -> None:
 
 @pytest.mark.release
 @pytest.mark.flaky(retries=3)
-def test_vertex_ai_image_generation_and_edition() -> None:
+async def test_vertex_ai_image_generation_and_edition() -> None:
     generator = VertexAIImageGeneratorChat()
 
     messages = [HumanMessage(content=["Generate a dog reading the newspaper"])]
-    response = generator.invoke(messages)
+    response = await generator.ainvoke(messages)
     assert isinstance(response, AIMessage)
 
     generated_image = response.content[0]
@@ -154,12 +154,12 @@ def test_vertex_ai_image_generation_and_edition() -> None:
 
     chain = prompt | model
 
-    response = chain.invoke({"img_object": "cat", "img_context": "beach"})
+    response = await chain.ainvoke({"img_object": "cat", "img_context": "beach"})
     assert isinstance(response, AIMessage)
 
     editor = VertexAIImageEditorChat()
 
     messages = [HumanMessage(content=[generated_image, "Change the dog for a cat"])]
 
-    response = editor.invoke(messages)
+    response = await editor.ainvoke(messages)
     assert isinstance(response, AIMessage)