Merge branch 'master' into valekjo-webhook-globals

valekjo · web-flow · commit ec8d923f5e86 · 2025-02-10T09:20:38.000+01:00
diff --git a/nginx.conf b/nginx.conf
@@ -305,6 +305,10 @@ server {
   # Removed pages
   # GPT plugins were discontinued April 9th, 2024 - https://help.openai.com/en/articles/8988022-winding-down-the-chatgpt-plugins-beta
   rewrite ^/platform/integrations/chatgpt-plugin$            https://blog.apify.com/add-custom-actions-to-your-gpts/ redirect;
+
+  # Python docs
+
+  rewrite ^/api/client/python/docs$ /api/client/python/docs/overview/introduction permanent;
 }
 
 # Temporarily used to route crawlee.dev to the Crawlee GitHub pages.
diff --git a/sources/platform/actors/development/builds_and_runs/state_persistence.md b/sources/platform/actors/development/builds_and_runs/state_persistence.md
@@ -51,7 +51,7 @@ By default, an Actor keeps its state in the server's memory. During a server swi
 
 The [Apify SDKs](/sdk) handle state persistence automatically.
 
-This is done  using the `Actor.on()` method and the `migrating` event.
+This is done using the `Actor.on()` method and the `migrating` event.
 
 - The `migrating` event is triggered just before a migration occurs, allowing you to save your state.
 - To retrieve previously saved state, you can use the [`Actor.getValue`](/sdk/js/reference/class/Actor#getValue)/[`Actor.get_value`](/sdk/python/reference/class/Actor#get_value) methods.
@@ -81,15 +81,15 @@ await Actor.exit();
 <TabItem value="Python" label="Python">
 
 ```python
-from apify import Actor
+from apify import Actor, Event
 
-async def actor_migrate():
+async def actor_migrate(_event_data):
     await Actor.set_value('my-crawling-state', {'foo': 'bar'})
 
 async def main():
     async with Actor:
         # ...
-        Actor.on('migrating', actor_migrate)
+        Actor.on(Event.MIGRATING, actor_migrate)
         # ...
 ```
 
@@ -128,3 +128,50 @@ async def main():
 </Tabs>
 
 For improved Actor performance consider [caching repeated page data](/academy/expert-scraping-with-apify/saving-useful-stats).
+
+## Speeding up migrations
+
+Once your Actor receives the `migrating` event, the Apify platform will shut it down and restart it on a new server within one minute.
+To speed this process up, once you have persisted the Actor state,
+you can manually reboot the Actor in the `migrating` event handler using the `Actor.reboot()` method
+available in the [Apify SDK for JavaScript](/sdk/js/reference/class/Actor#reboot) or [Apify SDK for Python](/sdk/python/reference/class/Actor#reboot).
+
+<Tabs groupId="main">
+<TabItem value="JavaScript" label="JavaScript">
+
+```js
+import { Actor } from 'apify';
+
+await Actor.init();
+// ...
+Actor.on('migrating', async () => {
+    // ...
+    // save state
+    // ...
+    await Actor.reboot();
+});
+// ...
+await Actor.exit();
+```
+
+</TabItem>
+<TabItem value="Python" label="Python">
+
+```python
+from apify import Actor, Event
+
+async def actor_migrate(_event_data):
+    # ...
+    # save state
+    # ...
+    await Actor.reboot()
+
+async def main():
+    async with Actor:
+        # ...
+        Actor.on(Event.MIGRATING, actor_migrate)
+        # ...
+```
+
+</TabItem>
+</Tabs>
diff --git a/sources/platform/integrations/ai/haystack.md b/sources/platform/integrations/ai/haystack.md
@@ -185,4 +185,5 @@ To run it, you can use the following command: `python apify_integration.py`
 - [Apify-haystack integration documentation](https://haystack.deepset.ai/integrations/apify)
 - [Apify-haystack integration source code](https://github.com/apify/apify-haystack)
 - [Example: RAG - Extract and use website content for question answering](https://haystack.deepset.ai/cookbook/apify_haystack_rag)
+- [Example: RAG: Web Search and Analysis with Apify and Haystack](https://haystack.deepset.ai/cookbook/apify_haystack_rag_web_browser)
 - [Example: Analyze Your Instagram Comments’ Vibe](https://haystack.deepset.ai/cookbook/apify_haystack_instagram_comments_analysis)
diff --git a/sources/platform/integrations/ai/langchain.md b/sources/platform/integrations/ai/langchain.md
@@ -20,7 +20,7 @@ If you prefer to use JavaScript, you can follow the  [JavaScript LangChain docum
 
 Before we start with the integration, we need to install all dependencies:
 
-`pip install apify-client langchain langchain_community langchain_openai openai tiktoken`
+`pip install langchain langchain-openai langchain-apify`
 
 After successful installation of all dependencies, we can start writing code.
 
@@ -30,9 +30,10 @@ First, import all required packages:
 import os
 
 from langchain.indexes import VectorstoreIndexCreator
-from langchain_community.utilities import ApifyWrapper
-from langchain_core.document_loaders.base import Document
-from langchain_openai import OpenAI
+from langchain_apify import ApifyWrapper
+from langchain_core.documents import Document
+from langchain_core.vectorstores import InMemoryVectorStore
+from langchain_openai import ChatOpenAI
 from langchain_openai.embeddings import OpenAIEmbeddings
 ```
 
@@ -49,6 +50,7 @@ Note that if you already have some results in an Apify dataset, you can load the
 
 ```python
 apify = ApifyWrapper()
+llm = ChatOpenAI(model="gpt-4o-mini")
 
 loader = apify.call_actor(
     actor_id="apify/website-content-crawler",
@@ -68,14 +70,17 @@ The Actor call may take some time as it crawls the LangChain documentation websi
 Initialize the vector index from the crawled documents:
 
 ```python
-index = VectorstoreIndexCreator(embedding=OpenAIEmbeddings()).from_loaders([loader])
+index = VectorstoreIndexCreator(
+    vectorstore_cls=InMemoryVectorStore,
+    embedding=OpenAIEmbeddings()
+).from_loaders([loader])
 ```
 
 And finally, query the vector index:
 
 ```python
 query = "What is LangChain?"
-result = index.query_with_sources(query, llm=OpenAI())
+result = index.query_with_sources(query, llm=llm)
 
 print("answer:", result["answer"])
 print("source:", result["sources"])
@@ -87,15 +92,17 @@ If you want to test the whole example, you can simply create a new file, `langch
 import os
 
 from langchain.indexes import VectorstoreIndexCreator
-from langchain_community.utilities import ApifyWrapper
-from langchain_core.document_loaders.base import Document
-from langchain_openai import OpenAI
+from langchain_apify import ApifyWrapper
+from langchain_core.documents import Document
+from langchain_core.vectorstores import InMemoryVectorStore
+from langchain_openai import ChatOpenAI
 from langchain_openai.embeddings import OpenAIEmbeddings
 
 os.environ["OPENAI_API_KEY"] = "Your OpenAI API key"
 os.environ["APIFY_API_TOKEN"] = "Your Apify API token"
 
 apify = ApifyWrapper()
+llm = ChatOpenAI(model="gpt-4o-mini")
 
 print("Call website content crawler ...")
 loader = apify.call_actor(
@@ -104,9 +111,12 @@ loader = apify.call_actor(
     dataset_mapping_function=lambda item: Document(page_content=item["text"] or "", metadata={"source": item["url"]}),
 )
 print("Compute embeddings...")
-index = VectorstoreIndexCreator(embedding=OpenAIEmbeddings()).from_loaders([loader])
+index = VectorstoreIndexCreator(
+    vectorstore_cls=InMemoryVectorStore,
+    embedding=OpenAIEmbeddings()
+).from_loaders([loader])
 query = "What is LangChain?"
-result = index.query_with_sources(query, llm=OpenAI())
+result = index.query_with_sources(query, llm=llm)
 
 print("answer:", result["answer"])
 print("source:", result["sources"])
@@ -117,9 +127,11 @@ To run it, you can use the following command: `python langchain_integration.py`
 After running the code, you should see the following output:
 
 ```text
-answer: LangChain is a framework for developing applications powered by language models. It provides standard, extendable interfaces, external integrations, and end-to-end implementations for off-the-shelf use. It also integrates with other LLMs, systems, and products to create a vibrant and thriving ecosystem.
+answer: LangChain is a framework designed for developing applications powered by large language models (LLMs). It simplifies the
+ entire application lifecycle, from development to productionization and deployment. LangChain provides open-source components a
+nd integrates with various third-party tools, making it easier to build and optimize applications using language models.
 
-source: https://python.langchain.com
+source: https://python.langchain.com/docs/get_started/introduction
 ```
 
 LangChain is a standard interface through which you can interact with a variety of large language models (LLMs).
diff --git a/sources/platform/integrations/ai/langgraph.md b/sources/platform/integrations/ai/langgraph.md
@@ -0,0 +1,160 @@
+---
+title: 🦜🔘➡️ LangGraph integration
+sidebar_label: LangGraph
+description: Learn how to build AI Agents with Apify and LangGraph 🦜🔘➡️.
+sidebar_position: 1
+slug: /integrations/langgraph
+---
+
+**Learn how to build AI Agents with Apify and LangGraph.**
+
+---
+
+## What is LangGraph
+
+[LangGraph](https://www.langchain.com/langgraph) is a framework designed for constructing stateful, multi-agent applications with Large Language Models (LLMs), allowing developers to build complex AI agent workflows that can leverage tools, APIs, and databases.
+
+:::note Explore LangGraph
+
+For more in-depth details on LangGraph, check out its [official documentation](https://langchain-ai.github.io/langgraph/).
+
+:::
+
+## How to use Apify with LangGraph
+
+This guide will demonstrate how to use Apify Actors with LangGraph by building a ReAct agent that will use the [RAG Web Browser](https://apify.com/apify/rag-web-browser) Actor to search Google for TikTok profiles and [TikTok Data Extractor](https://apify.com/clockworks/free-tiktok-scraper) Actor to extract data from the TikTok profiles to analyze the profiles.
+
+### Prerequisites
+
+- **Apify API token**: To use Apify Actors in LangGraph, you need an Apify API token. If you don't have one, you can learn how to obtain it in the [Apify documentation](https://docs.apify.com/platform/integrations/api).
+
+- **OpenAI API key**: In order to work with agents in LangGraph, you need an OpenAI API key. If you don't have one, you can get it from the [OpenAI platform](https://platform.openai.com/account/api-keys).
+
+- **Python packages**:  You need to install the following Python packages:
+
+    ```bash
+    pip install langgraph langchain-apify langchain-openai
+    ```
+
+### Building the TikTok profile search and analysis agent
+
+First, import all required packages:
+
+```python
+import os
+
+from langchain_apify import ApifyActorsTool
+from langchain_core.messages import HumanMessage
+from langchain_openai import ChatOpenAI
+from langgraph.prebuilt import create_react_agent
+```
+
+Next, set the environment variables for the Apify API token and OpenAI API key:
+
+```python
+os.environ["OPENAI_API_KEY"] = "Your OpenAI API key"
+os.environ["APIFY_API_TOKEN"] = "Your Apify API token"
+```
+
+Instantiate LLM and Apify Actors tools:
+
+```python
+llm = ChatOpenAI(model="gpt-4o-mini")
+
+browser = ApifyActorsTool("apify/rag-web-browser")
+tiktok = ApifyActorsTool("clockworks/free-tiktok-scraper")
+```
+
+Create the ReAct agent with the LLM and Apify Actors tools:
+
+```python
+tools = [browser, tiktok]
+agent_executor = create_react_agent(llm, tools)
+```
+
+Finally, run the agent and stream the messages:
+
+```python
+for state in agent_executor.stream(
+    stream_mode="values",
+    input={
+        "messages": [
+            HumanMessage(content="Search the web for OpenAI TikTok profile and analyze their profile.")
+        ]
+    }):
+    state["messages"][-1].pretty_print()
+```
+
+:::note Search and analysis may take some time
+
+The agent tool call may take some time as it searches the web for OpenAI TikTok profiles and analyzes them.
+
+:::
+
+You will see the agent's messages in the console, which will show each step of the agent's workflow.
+
+```text
+================================ Human Message =================================
+
+Search the web for OpenAI TikTok profile and analyze their profile.
+================================== AI Message ==================================
+Tool Calls:
+  apify_actor_apify_rag-web-browser (call_y2rbmQ6gYJYC2lHzWJAoKDaq)
+ Call ID: call_y2rbmQ6gYJYC2lHzWJAoKDaq
+  Args:
+    run_input: {"query":"OpenAI TikTok profile","maxResults":1}
+
+...
+
+================================== AI Message ==================================
+
+The OpenAI TikTok profile is titled "OpenAI (@openai) Official." Here are some key details about the profile:
+
+- **Followers**: 592.3K
+- **Likes**: 3.3M
+- **Description**: The profile features "low key research previews" and includes videos that showcase their various projects and research developments.
+
+### Profile Overview:
+- **Profile URL**: [OpenAI TikTok Profile](https://www.tiktok.com/@openai?lang=en)
+- **Content Focus**: The posts primarily involve previews of OpenAI's research and various AI-related innovations.
+
+...
+
+```
+
+
+If you want to test the whole example, you can simply create a new file, `langgraph_integration.py`, and copy the whole code into it.
+
+```python
+import os
+
+from langchain_apify import ApifyActorsTool
+from langchain_core.messages import HumanMessage
+from langchain_openai import ChatOpenAI
+from langgraph.prebuilt import create_react_agent
+
+os.environ["OPENAI_API_KEY"] = "Your OpenAI API key"
+os.environ["APIFY_API_TOKEN"] = "Your Apify API token"
+
+llm = ChatOpenAI(model="gpt-4o-mini")
+
+browser = ApifyActorsTool("apify/rag-web-browser")
+tiktok = ApifyActorsTool("clockworks/free-tiktok-scraper")
+
+tools = [browser, tiktok]
+agent_executor = create_react_agent(llm, tools)
+
+for state in agent_executor.stream(
+    stream_mode="values",
+    input={
+        "messages": [
+            HumanMessage(content="Search the web for OpenAI TikTok profile and analyze their profile.")
+        ]
+    }):
+    state["messages"][-1].pretty_print()
+```
+
+## Resources
+
+- [Apify Actors](https://docs.apify.com/platform/actors)
+- [LangGraph - How to Create a ReAct Agent](https://langchain-ai.github.io/langgraph/how-tos/create-react-agent/)
diff --git a/sources/platform/storage/dataset.md b/sources/platform/storage/dataset.md
@@ -147,7 +147,7 @@ You can then use that variable to [access the dataset's items and manage it](/ap
 
 > When using the [`.list_items()`](/api/client/python/reference/class/DatasetClient#list_items) method, if you fill both `omit` and `field` parameters with the same value, then `omit` parameter will take precedence and the field is excluded from the results.
 
-Check out the [Python API client documentation](/api/client/python/reference/class/DatasetClient) for [help with setup](/api/client/python/docs) and more details.
+Check out the [Python API client documentation](/api/client/python/reference/class/DatasetClient) for [help with setup](/api/client/python/docs/overview/introduction) and more details.
 
 ### Apify SDKs
 
diff --git a/sources/platform/storage/key_value_store.md b/sources/platform/storage/key_value_store.md
@@ -124,7 +124,7 @@ my_key_val_store_client = apify_client.key_value_store('jane-doe/my-key-val-stor
 
 You can then use that variable to [access the key-value store's items and manage it](/api/client/python/reference/class/KeyValueStoreClient).
 
-Check out the [Python API client documentation](/api/client/python/reference/class/KeyValueStoreClient) for [help with setup](/api/client/python/docs) and more details.
+Check out the [Python API client documentation](/api/client/python/reference/class/KeyValueStoreClient) for [help with setup](/api/client/python/docs/overview/introduction) and more details.
 
 ### Apify SDKs
 
diff --git a/sources/platform/storage/request_queue.md b/sources/platform/storage/request_queue.md
@@ -135,7 +135,7 @@ my_queue_client = apify_client.request_queue('jane-doe/my-request-queue')
 
 You can then use that variable to [access the request queue's items and manage it](/api/client/python/reference/class/RequestQueueClient).
 
-Check out the [Python API client documentation](/api/client/python/reference/class/RequestQueueClient) for [help with setup](/api/client/python/docs) and more details.
+Check out the [Python API client documentation](/api/client/python/reference/class/RequestQueueClient) for [help with setup](/api/client/python/docs/overview/introduction) and more details.
 
 ### Apify SDKs