diff --git a/examples/ingest-from-rest-api-example/VDK_Ingest_from_REST_API_Example.ipynb b/examples/ingest-from-rest-api-example/VDK_Ingest_from_REST_API_Example.ipynb new file mode 100644 index 0000000000..2d0cc0fac5 --- /dev/null +++ b/examples/ingest-from-rest-api-example/VDK_Ingest_from_REST_API_Example.ipynb @@ -0,0 +1,301 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "**Install VDK and the SQLite plugin**" + ], + "metadata": { + "id": "LUK5BrjelQcX" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1mAnQLHEk4uz", + "outputId": "8b841e80-8931-4849-d73a-6af5f01e5c12" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting vdk-core\n", + " Downloading vdk_core-0.3.1466514302.tar.gz (119 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m119.1/119.1 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Collecting vdk-sqlite\n", + " Downloading vdk_sqlite-0.1.1431637373.tar.gz (7.4 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (2.32.4)\n", + "Requirement already satisfied: click==8.* in /usr/local/lib/python3.12/dist-packages (from vdk-core) (8.3.1)\n", + "Requirement already satisfied: pluggy in /usr/local/lib/python3.12/dist-packages (from vdk-core) (1.6.0)\n", + "Collecting click_log (from vdk-core)\n", + " Downloading click_log-0.4.0-py2.py3-none-any.whl.metadata (1.2 kB)\n", + "Requirement already satisfied: click-plugins in /usr/local/lib/python3.12/dist-packages (from vdk-core) (1.1.1.2)\n", + "Requirement already satisfied: tenacity in /usr/local/lib/python3.12/dist-packages (from vdk-core) (9.1.2)\n", + "Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from vdk-core) (75.2.0)\n", + "Requirement already satisfied: tabulate in /usr/local/lib/python3.12/dist-packages (from vdk-sqlite) (0.9.0)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests) (3.4.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests) (3.11)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests) (2025.11.12)\n", + "Downloading click_log-0.4.0-py2.py3-none-any.whl (4.3 kB)\n", + "Building wheels for collected packages: vdk-core, vdk-sqlite\n", + " Building wheel for vdk-core (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for vdk-core: filename=vdk_core-0.3.1466514302-py2.py3-none-any.whl size=163460 sha256=4c11c764993c7f81827f3ccc2de2f74f50cd4e6ab68b291f881a99b2060eedba\n", + " Stored in directory: /root/.cache/pip/wheels/ee/3f/c8/010983f3969c74948165dbfb7ab7621cdf4ff619cc0aca83c2\n", + " Building wheel for vdk-sqlite (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for vdk-sqlite: filename=vdk_sqlite-0.1.1431637373-py3-none-any.whl size=7065 sha256=c33784b1cbd3b9ba9e8dc4df163d952762180c7a24c8e9fe6934514da0257c90\n", + " Stored in directory: /root/.cache/pip/wheels/1e/fa/b3/11d78000303a2c009ad8ca4679a5b8b210a9074ae78956f045\n", + "Successfully built vdk-core vdk-sqlite\n", + "Installing collected packages: click_log, vdk-core, vdk-sqlite\n", + "Successfully installed click_log-0.4.0 vdk-core-0.3.1466514302 vdk-sqlite-0.1.1431637373\n" + ] + } + ], + "source": [ + "!pip install vdk-core vdk-sqlite requests" + ] + }, + { + "cell_type": "markdown", + "source": [ + "**Set Environment Variables for SQLite Ingestion**" + ], + "metadata": { + "id": "sjbcK6rol8ia" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "os.environ[\"VDK_DB_DEFAULT_TYPE\"] = \"SQLITE\"\n", + "os.environ[\"VDK_INGEST_METHOD_DEFAULT\"] = \"SQLITE\"" + ], + "metadata": { + "id": "Lh7kxenylKy2" + }, + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "%%writefile 10_delete_table.sql\n", + "DROP TABLE IF EXISTS rest_target_table;" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "olUlK83QlK2i", + "outputId": "79849252-933e-4c5b-f174-7fd1d2553a67" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Writing 10_delete_table.sql\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%writefile 20_create_table.sql\n", + "CREATE TABLE rest_target_table (userId, id, title, completed);" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "CZ7Q07FumJb9", + "outputId": "0f85ce65-94f5-454b-aeea-31779cf1d8b6" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Writing 20_create_table.sql\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "%%writefile 30_rest_ingest.py\n", + "import requests\n", + "\n", + "def run(job_input):\n", + " # Fetching from the API\n", + " response = requests.get(\"https://jsonplaceholder.typicode.com/todos/1\")\n", + " response.raise_for_status()\n", + " payload = response.json()\n", + "\n", + " # Sending data to the target database\n", + " job_input.send_object_for_ingestion(\n", + " payload=payload,\n", + " destination_table=\"rest_target_table\"\n", + " )" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ToVbdX5DmMqT", + "outputId": "2177fd96-9ce2-442a-8796-cf9a5509baf1" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Writing 30_rest_ingest.py\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!vdk run ." + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xIaKrG5rmQ50", + "outputId": "1b0a6882-7eff-49e5-bab9-b431676e1b6c" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Versatile Data Kit (VDK)\n", + "Version: 0.3.1466514302\n", + "Build details: RELEASE_VERSION=0.3.1466514302, BUILD_DATE=Tue Sep 24 08:50:56 UTC 2024, BUILD_MACHINE_INFO=Linux runner--azerasq-project-28359933-concurrent-0 5.15.154+ #1 SMP Sat May 4 12:14:42 UTC 2024 x86_64 GNU/Linux, GITLAB_CI_JOB_ID=7902585266, GIT_COMMIT_SHA=880df089916daa16d3fe5fbe789b81dd6099911f, GIT_BRANCH=main\n", + "Python version: 3.12.12 64bit (/usr/bin/python3)\n", + "\n", + "Installed plugins:\n", + "vdk-sqlite (from package vdk-sqlite, version 0.1.1431637373)\n", + "--------------------------------------------------------------------------------\n", + "Run job with directory /content\n", + "Missing config.ini file.\n", + "2026-01-10 01:50:42,039 [VDK] content [INFO ] vdk.plugin.sqlite.sqlite_conne sqlite_connection.py:29 new_connection [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Creating new connection against local file database located at: /tmp/vdk-sqlite.db\n", + "2026-01-10 01:50:42,039 [VDK] content [INFO ] vdk.plugin.sqlite.sqlite_conne sqlite_connection.py:29 new_connection [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Creating new connection against local file database located at: /tmp/vdk-sqlite.db\n", + "2026-01-10 01:50:42,040 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:201 _execute_operati[id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Executing query:\n", + "-- job_name: content\n", + "-- op_id: 317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841\n", + "DROP TABLE IF EXISTS rest_target_table;\n", + "\n", + "2026-01-10 01:50:42,041 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:103 execute [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Executing query SUCCEEDED. Query duration 00h:00m:00s\n", + "2026-01-10 01:50:42,041 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_connection_b:135 execute_query [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Fetching query result...\n", + "2026-01-10 01:50:42,041 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:239 fetchall [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Fetching all results from query ...\n", + "2026-01-10 01:50:42,041 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:242 fetchall [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Fetching all results from query SUCCEEDED.\n", + "2026-01-10 01:50:42,041 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:249 close [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Closing DB cursor ...\n", + "2026-01-10 01:50:42,041 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:251 close [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Closing DB cursor SUCCEEDED.\n", + "2026-01-10 01:50:42,042 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:201 _execute_operati[id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Executing query:\n", + "-- job_name: content\n", + "-- op_id: 317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841\n", + " select 1 -- Testing if connection is alive.\n", + "2026-01-10 01:50:42,042 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:103 execute [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Executing query SUCCEEDED. Query duration 00h:00m:00s\n", + "2026-01-10 01:50:42,042 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:201 _execute_operati[id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Executing query:\n", + "-- job_name: content\n", + "-- op_id: 317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841\n", + "CREATE TABLE rest_target_table (userId, id, title, completed);\n", + "\n", + "2026-01-10 01:50:42,055 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:103 execute [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Executing query SUCCEEDED. Query duration 00h:00m:00s\n", + "2026-01-10 01:50:42,055 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_connection_b:135 execute_query [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Fetching query result...\n", + "2026-01-10 01:50:42,055 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:239 fetchall [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Fetching all results from query ...\n", + "2026-01-10 01:50:42,055 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:242 fetchall [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Fetching all results from query SUCCEEDED.\n", + "2026-01-10 01:50:42,055 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:249 close [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Closing DB cursor ...\n", + "2026-01-10 01:50:42,055 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:251 close [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Closing DB cursor SUCCEEDED.\n", + "2026-01-10 01:50:42,262 [VDK] content [INFO ] vdk.internal.builtin_plugins.r file_based_step.py:119 run_python_step [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Entering 30_rest_ingest.py#run(...) ...\n", + "2026-01-10 01:50:42,377 [VDK] content [INFO ] vdk.internal.builtin_plugins.i ingester_router.py:64 send_object_for_[id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Sending object for ingestion with method: sqlite and target: None\n", + "2026-01-10 01:50:42,867 [VDK] content [INFO ] numexpr.utils utils.py:164 _init_num_thread[id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- NumExpr defaulting to 2 threads.\n", + "2026-01-10 01:50:43,154 [VDK] content [INFO ] vdk.internal.builtin_plugins.r file_based_step.py:125 run_python_step [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Exiting 30_rest_ingest.py#run(...) SUCCESS\n", + "2026-01-10 01:50:45,155 [VDK] content [INFO ] vdk.plugin.sqlite.ingest_to_sq ingest_to_sqlite.py:76 ingest_payload [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Ingesting payloads for target: /tmp/vdk-sqlite.db; collection_id: content|317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841\n", + "2026-01-10 01:50:45,156 [VDK] content [INFO ] vdk.plugin.sqlite.sqlite_conne sqlite_connection.py:29 new_connection [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Creating new connection against local file database located at: /tmp/vdk-sqlite.db\n", + "2026-01-10 01:50:45,165 [VDK] content [INFO ] vdk.internal.builtin_plugins.i ingester_base.py:584 close_now [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Ingester statistics: \n", + "\t\tSuccessful uploads: 1\n", + "\t\tFailed uploads: 0\n", + "\t\tIngesting plugin errors: None\n", + "\t\t\n", + "2026-01-10 01:50:45,166 [VDK] content [INFO ] vdk.internal.builtin_plugins.r cli_run.py:170 __log_short_exec[id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Job execution result: SUCCESS\n", + "Step results:\n", + "10_delete_table.sql - SUCCESS\n", + "20_create_table.sql - SUCCESS\n", + "30_rest_ingest.py - SUCCESS\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!vdk sqlite-query -q 'SELECT * FROM rest_target_table'" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OdonSoTtmTco", + "outputId": "978f69eb-c39e-4a4c-da8d-c22543c6f5ad" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Creating new connection against local file database located at: /tmp/vdk-sqlite.db\n", + " userId id title completed\n", + "-------- ---- ------------------ -----------\n", + " 1 1 delectus aut autem 0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "9NUHJHASmZKl" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file