Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 22 additions & 72 deletions notebooks/AgenticRAG_with_LangGraph,TogetherAI.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
"- Preprocess and transform raw files into structured formats.\n",
"- Push the processed data directly into the database/vector store of your choice.\n",
"\n",
"In this notebook, we'll programmatically work with Unstructured by tapping into the **Workflow Endpoint** — a powerful interface that lets you define entire pipelines through code.\n",
"In this notebook, we'll programmatically work with Unstructured by tapping into the **Unstructured API** — a powerful interface that lets you define entire pipelines through code.\n",
"\n",
"With Unstructured API, you will set up a workflow by:\n",
"\n",
Expand All @@ -79,7 +79,7 @@
"\n",
"all without leaving this colab notebook.\n",
"\n",
"Before we dive in, make sure to **[sign up for Unstructured for Developers](https://unstructured.io/developers)** to access the platform and get your API credentials ready.\n",
"Before we dive in, make sure to **[sign up for Unstructured](https://unstructured.io/?modal=try-for-free)** to access the platform and get your API credentials ready.\n",
"\n",
"\n",
"\n"
Expand All @@ -96,50 +96,7 @@
"id": "JNUr_B5LfPd4",
"outputId": "826bc427-92dd-43d3-f5da-febaa50f3d59"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting unstructured-client\n",
" Downloading unstructured_client-0.34.0-py3-none-any.whl.metadata (21 kB)\n",
"Collecting aiofiles>=24.1.0 (from unstructured-client)\n",
" Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)\n",
"Requirement already satisfied: cryptography>=3.1 in /usr/local/lib/python3.11/dist-packages (from unstructured-client) (43.0.3)\n",
"Collecting eval-type-backport>=0.2.0 (from unstructured-client)\n",
" Downloading eval_type_backport-0.2.2-py3-none-any.whl.metadata (2.2 kB)\n",
"Requirement already satisfied: httpx>=0.27.0 in /usr/local/lib/python3.11/dist-packages (from unstructured-client) (0.28.1)\n",
"Requirement already satisfied: nest-asyncio>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from unstructured-client) (1.6.0)\n",
"Requirement already satisfied: pydantic>=2.11.2 in /usr/local/lib/python3.11/dist-packages (from unstructured-client) (2.11.3)\n",
"Collecting pypdf>=4.0 (from unstructured-client)\n",
" Downloading pypdf-5.4.0-py3-none-any.whl.metadata (7.3 kB)\n",
"Requirement already satisfied: requests-toolbelt>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from unstructured-client) (1.0.0)\n",
"Requirement already satisfied: typing-inspection>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from unstructured-client) (0.4.0)\n",
"Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.11/dist-packages (from cryptography>=3.1->unstructured-client) (1.17.1)\n",
"Requirement already satisfied: anyio in /usr/local/lib/python3.11/dist-packages (from httpx>=0.27.0->unstructured-client) (4.9.0)\n",
"Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from httpx>=0.27.0->unstructured-client) (2025.1.31)\n",
"Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx>=0.27.0->unstructured-client) (1.0.9)\n",
"Requirement already satisfied: idna in /usr/local/lib/python3.11/dist-packages (from httpx>=0.27.0->unstructured-client) (3.10)\n",
"Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx>=0.27.0->unstructured-client) (0.16.0)\n",
"Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2.11.2->unstructured-client) (0.7.0)\n",
"Requirement already satisfied: pydantic-core==2.33.1 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2.11.2->unstructured-client) (2.33.1)\n",
"Requirement already satisfied: typing-extensions>=4.12.2 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2.11.2->unstructured-client) (4.13.2)\n",
"Requirement already satisfied: requests<3.0.0,>=2.0.1 in /usr/local/lib/python3.11/dist-packages (from requests-toolbelt>=1.0.0->unstructured-client) (2.32.3)\n",
"Requirement already satisfied: pycparser in /usr/local/lib/python3.11/dist-packages (from cffi>=1.12->cryptography>=3.1->unstructured-client) (2.22)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3.0.0,>=2.0.1->requests-toolbelt>=1.0.0->unstructured-client) (3.4.1)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3.0.0,>=2.0.1->requests-toolbelt>=1.0.0->unstructured-client) (2.4.0)\n",
"Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/dist-packages (from anyio->httpx>=0.27.0->unstructured-client) (1.3.1)\n",
"Downloading unstructured_client-0.34.0-py3-none-any.whl (189 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m189.4/189.4 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hDownloading aiofiles-24.1.0-py3-none-any.whl (15 kB)\n",
"Downloading eval_type_backport-0.2.2-py3-none-any.whl (5.8 kB)\n",
"Downloading pypdf-5.4.0-py3-none-any.whl (302 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m302.3/302.3 kB\u001b[0m \u001b[31m16.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hInstalling collected packages: pypdf, eval-type-backport, aiofiles, unstructured-client\n",
"Successfully installed aiofiles-24.1.0 eval-type-backport-0.2.2 pypdf-5.4.0 unstructured-client-0.34.0\n"
]
}
],
"outputs": [],
"source": [
"!pip install -U \"unstructured-client\""
]
Expand Down Expand Up @@ -218,23 +175,19 @@
"outputs": [],
"source": [
"from unstructured_client.models.operations import CreateSourceRequest\n",
"from unstructured_client.models.shared import (\n",
" CreateSourceConnector,\n",
" SourceConnectorType,\n",
" S3SourceConnectorConfigInput\n",
")\n",
"from unstructured_client.models.shared import CreateSourceConnector\n",
"\n",
"source_response = client.sources.create_source(\n",
" request=CreateSourceRequest(\n",
" create_source_connector=CreateSourceConnector(\n",
" name=f\"law_rag_s3_source_{time.time()}\",\n",
" type=SourceConnectorType.S3,\n",
" config=S3SourceConnectorConfigInput(\n",
" key=os.environ.get('S3_AWS_KEY'),\n",
" secret=os.environ.get('S3_AWS_SECRET'),\n",
" remote_url=os.environ.get('S3_REMOTE_URL'),\n",
" recursive=True\n",
" )\n",
" type=\"s3\",\n",
" config={\n",
" \"key\": os.environ.get('S3_AWS_KEY'),\n",
" \"secret\": os.environ.get('S3_AWS_SECRET'),\n",
" \"remote_url\": os.environ.get('S3_REMOTE_URL'),\n",
" \"recursive\": True\n",
" }\n",
" )\n",
" )\n",
")"
Expand Down Expand Up @@ -319,24 +272,21 @@
"outputs": [],
"source": [
"from unstructured_client.models.operations import CreateDestinationRequest\n",
"from unstructured_client.models.shared import (\n",
" CreateDestinationConnector,\n",
" DestinationConnectorType,\n",
" AstraDBConnectorConfigInput\n",
")\n",
"from unstructured_client.models.shared import CreateDestinationConnector\n",
"\n",
"destination_response = client.destinations.create_destination(\n",
" request=CreateDestinationRequest(\n",
" create_destination_connector=CreateDestinationConnector(\n",
" name=f\"astra_destination_{time.time()}\",\n",
" type=DestinationConnectorType.ASTRADB,\n",
" config=AstraDBConnectorConfigInput(\n",
" token=os.environ.get('ASTRA_DB_APPLICATION_TOKEN'),\n",
" api_endpoint=os.environ.get('ASTRA_DB_API_ENDPOINT'),\n",
" collection_name=os.environ.get('ASTRA_DB_COLLECTION_NAME'),\n",
" keyspace=os.environ.get('ASTRA_DB_KEYSPACE'),\n",
" batch_size=20,\n",
" flatten_metadata=False\n",
" )\n",
" type=\"astradb\",\n",
" config={\n",
" \"token\": os.environ.get('ASTRA_DB_APPLICATION_TOKEN'),\n",
" \"api_endpoint\": os.environ.get('ASTRA_DB_API_ENDPOINT'),\n",
" \"collection_name\": os.environ.get('ASTRA_DB_COLLECTION_NAME'),\n",
" \"keyspace\": os.environ.get('ASTRA_DB_KEYSPACE'),\n",
" \"batch_size\": 20,\n",
" \"flatten_metadata\": False\n",
" }\n",
" )\n",
" )\n",
")"
Expand Down