crewAIInc
diff --git a/‎conftest.py‎
Lines changed: 26 additions & 0 deletions b/‎conftest.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎docs/en/concepts/llms.mdx‎
Lines changed: 44 additions & 0 deletions b/‎docs/en/concepts/llms.mdx‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎lib/crewai/pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎lib/crewai/pyproject.toml‎
Lines changed: 1 addition & 0 deletions
@@ -96,6 +96,30 @@ def setup_test_environment() -> Generator[None, Any, None]:
     "x-ratelimit-reset-requests": "X-RATELIMIT-RESET-REQUESTS-XXX",
     "x-ratelimit-reset-tokens": "X-RATELIMIT-RESET-TOKENS-XXX",
     "x-goog-api-key": "X-GOOG-API-KEY-XXX",
+    "api-key": "X-API-KEY-XXX",
+    "User-Agent": "X-USER-AGENT-XXX",
+    "apim-request-id:": "X-API-CLIENT-REQUEST-ID-XXX",
+    "azureml-model-session": "AZUREML-MODEL-SESSION-XXX",
+    "x-ms-client-request-id": "X-MS-CLIENT-REQUEST-ID-XXX",
+    "x-ms-region": "X-MS-REGION-XXX",
+    "apim-request-id": "APIM-REQUEST-ID-XXX",
+    "x-api-key": "X-API-KEY-XXX",
+    "anthropic-organization-id": "ANTHROPIC-ORGANIZATION-ID-XXX",
+    "request-id": "REQUEST-ID-XXX",
+    "anthropic-ratelimit-input-tokens-limit": "ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX",
+    "anthropic-ratelimit-input-tokens-remaining": "ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX",
+    "anthropic-ratelimit-input-tokens-reset": "ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX",
+    "anthropic-ratelimit-output-tokens-limit": "ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX",
+    "anthropic-ratelimit-output-tokens-remaining": "ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX",
+    "anthropic-ratelimit-output-tokens-reset": "ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX",
+    "anthropic-ratelimit-tokens-limit": "ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX",
+    "anthropic-ratelimit-tokens-remaining": "ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX",
+    "anthropic-ratelimit-tokens-reset": "ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX",
+    "x-amz-date": "X-AMZ-DATE-XXX",
+    "amz-sdk-invocation-id": "AMZ-SDK-INVOCATION-ID-XXX",
+    "accept-encoding": "ACCEPT-ENCODING-XXX",
+    "x-amzn-requestid": "X-AMZN-REQUESTID-XXX",
+    "x-amzn-RequestId": "X-AMZN-REQUESTID-XXX",
 }
 
 
@@ -105,6 +129,8 @@ def _filter_request_headers(request: Request) -> Request:  # type: ignore[no-any
         for variant in [header_name, header_name.upper(), header_name.title()]:
             if variant in request.headers:
                 request.headers[variant] = [replacement]
+
+    request.method = request.method.upper()
     return request
 
 
 
@@ -1089,6 +1089,50 @@ CrewAI supports streaming responses from LLMs, allowing your application to rece
   </Tab>
 </Tabs>
 
+## Async LLM Calls
+
+CrewAI supports asynchronous LLM calls for improved performance and concurrency in your AI workflows. Async calls allow you to run multiple LLM requests concurrently without blocking, making them ideal for high-throughput applications and parallel agent operations.
+
+<Tabs>
+  <Tab title="Basic Usage">
+    Use the `acall` method for asynchronous LLM requests:
+
+    ```python
+    import asyncio
+    from crewai import LLM
+
+    async def main():
+        llm = LLM(model="openai/gpt-4o")
+
+        # Single async call
+        response = await llm.acall("What is the capital of France?")
+        print(response)
+
+    asyncio.run(main())
+    ```
+
+    The `acall` method supports all the same parameters as the synchronous `call` method, including messages, tools, and callbacks.
+  </Tab>
+
+  <Tab title="With Streaming">
+    Combine async calls with streaming for real-time concurrent responses:
+
+    ```python
+    import asyncio
+    from crewai import LLM
+
+    async def stream_async():
+        llm = LLM(model="openai/gpt-4o", stream=True)
+
+        response = await llm.acall("Write a short story about AI")
+
+        print(response)
+
+    asyncio.run(stream_async())
+    ```
+  </Tab>
+</Tabs>
+
 ## Structured LLM Calls
 
 CrewAI supports structured responses from LLM calls by allowing you to define a `response_format` using a Pydantic model. This enables the framework to automatically parse and validate the output, making it easier to integrate the response into your application without manual post-processing.
 
@@ -68,6 +68,7 @@ qdrant = [
 ]
 aws = [
     "boto3~=1.40.38",
+    "aiobotocore~=2.25.2",
 ]
 watson = [
     "ibm-watsonx-ai~=1.3.39",
Original file line number	Diff line number	Diff line change
`@@ -68,6 +68,7 @@ qdrant = [`
`68`	`68`	`]`
`69`	`69`	`aws = [`
`70`	`70`	`"boto3~=1.40.38",`
	`71`	`+ "aiobotocore~=2.25.2",`
`71`	`72`	`]`
`72`	`73`	`watson = [`
`73`	`74`	`"ibm-watsonx-ai~=1.3.39",`