Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"**Install VDK and the SQLite plugin**"
],
"metadata": {
"id": "LUK5BrjelQcX"
}
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1mAnQLHEk4uz",
"outputId": "8b841e80-8931-4849-d73a-6af5f01e5c12"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting vdk-core\n",
" Downloading vdk_core-0.3.1466514302.tar.gz (119 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m119.1/119.1 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Collecting vdk-sqlite\n",
" Downloading vdk_sqlite-0.1.1431637373.tar.gz (7.4 kB)\n",
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (2.32.4)\n",
"Requirement already satisfied: click==8.* in /usr/local/lib/python3.12/dist-packages (from vdk-core) (8.3.1)\n",
"Requirement already satisfied: pluggy in /usr/local/lib/python3.12/dist-packages (from vdk-core) (1.6.0)\n",
"Collecting click_log (from vdk-core)\n",
" Downloading click_log-0.4.0-py2.py3-none-any.whl.metadata (1.2 kB)\n",
"Requirement already satisfied: click-plugins in /usr/local/lib/python3.12/dist-packages (from vdk-core) (1.1.1.2)\n",
"Requirement already satisfied: tenacity in /usr/local/lib/python3.12/dist-packages (from vdk-core) (9.1.2)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.12/dist-packages (from vdk-core) (75.2.0)\n",
"Requirement already satisfied: tabulate in /usr/local/lib/python3.12/dist-packages (from vdk-sqlite) (0.9.0)\n",
"Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests) (3.4.4)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests) (3.11)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests) (2.5.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests) (2025.11.12)\n",
"Downloading click_log-0.4.0-py2.py3-none-any.whl (4.3 kB)\n",
"Building wheels for collected packages: vdk-core, vdk-sqlite\n",
" Building wheel for vdk-core (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for vdk-core: filename=vdk_core-0.3.1466514302-py2.py3-none-any.whl size=163460 sha256=4c11c764993c7f81827f3ccc2de2f74f50cd4e6ab68b291f881a99b2060eedba\n",
" Stored in directory: /root/.cache/pip/wheels/ee/3f/c8/010983f3969c74948165dbfb7ab7621cdf4ff619cc0aca83c2\n",
" Building wheel for vdk-sqlite (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for vdk-sqlite: filename=vdk_sqlite-0.1.1431637373-py3-none-any.whl size=7065 sha256=c33784b1cbd3b9ba9e8dc4df163d952762180c7a24c8e9fe6934514da0257c90\n",
" Stored in directory: /root/.cache/pip/wheels/1e/fa/b3/11d78000303a2c009ad8ca4679a5b8b210a9074ae78956f045\n",
"Successfully built vdk-core vdk-sqlite\n",
"Installing collected packages: click_log, vdk-core, vdk-sqlite\n",
"Successfully installed click_log-0.4.0 vdk-core-0.3.1466514302 vdk-sqlite-0.1.1431637373\n"
]
}
],
"source": [
"!pip install vdk-core vdk-sqlite requests"
]
},
{
"cell_type": "markdown",
"source": [
"**Set Environment Variables for SQLite Ingestion**"
],
"metadata": {
"id": "sjbcK6rol8ia"
}
},
{
"cell_type": "code",
"source": [
"import os\n",
"os.environ[\"VDK_DB_DEFAULT_TYPE\"] = \"SQLITE\"\n",
"os.environ[\"VDK_INGEST_METHOD_DEFAULT\"] = \"SQLITE\""
],
"metadata": {
"id": "Lh7kxenylKy2"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"source": [
"%%writefile 10_delete_table.sql\n",
"DROP TABLE IF EXISTS rest_target_table;"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "olUlK83QlK2i",
"outputId": "79849252-933e-4c5b-f174-7fd1d2553a67"
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Writing 10_delete_table.sql\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"%%writefile 20_create_table.sql\n",
"CREATE TABLE rest_target_table (userId, id, title, completed);"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "CZ7Q07FumJb9",
"outputId": "0f85ce65-94f5-454b-aeea-31779cf1d8b6"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Writing 20_create_table.sql\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"%%writefile 30_rest_ingest.py\n",
"import requests\n",
"\n",
"def run(job_input):\n",
" # Fetching from the API\n",
" response = requests.get(\"https://jsonplaceholder.typicode.com/todos/1\")\n",
" response.raise_for_status()\n",
" payload = response.json()\n",
"\n",
" # Sending data to the target database\n",
" job_input.send_object_for_ingestion(\n",
" payload=payload,\n",
" destination_table=\"rest_target_table\"\n",
" )"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ToVbdX5DmMqT",
"outputId": "2177fd96-9ce2-442a-8796-cf9a5509baf1"
},
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Writing 30_rest_ingest.py\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!vdk run ."
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "xIaKrG5rmQ50",
"outputId": "1b0a6882-7eff-49e5-bab9-b431676e1b6c"
},
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"Versatile Data Kit (VDK)\n",
"Version: 0.3.1466514302\n",
"Build details: RELEASE_VERSION=0.3.1466514302, BUILD_DATE=Tue Sep 24 08:50:56 UTC 2024, BUILD_MACHINE_INFO=Linux runner--azerasq-project-28359933-concurrent-0 5.15.154+ #1 SMP Sat May 4 12:14:42 UTC 2024 x86_64 GNU/Linux, GITLAB_CI_JOB_ID=7902585266, GIT_COMMIT_SHA=880df089916daa16d3fe5fbe789b81dd6099911f, GIT_BRANCH=main\n",
"Python version: 3.12.12 64bit (/usr/bin/python3)\n",
"\n",
"Installed plugins:\n",
"vdk-sqlite (from package vdk-sqlite, version 0.1.1431637373)\n",
"--------------------------------------------------------------------------------\n",
"Run job with directory /content\n",
"Missing config.ini file.\n",
"2026-01-10 01:50:42,039 [VDK] content [INFO ] vdk.plugin.sqlite.sqlite_conne sqlite_connection.py:29 new_connection [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Creating new connection against local file database located at: /tmp/vdk-sqlite.db\n",
"2026-01-10 01:50:42,039 [VDK] content [INFO ] vdk.plugin.sqlite.sqlite_conne sqlite_connection.py:29 new_connection [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Creating new connection against local file database located at: /tmp/vdk-sqlite.db\n",
"2026-01-10 01:50:42,040 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:201 _execute_operati[id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Executing query:\n",
"-- job_name: content\n",
"-- op_id: 317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841\n",
"DROP TABLE IF EXISTS rest_target_table;\n",
"\n",
"2026-01-10 01:50:42,041 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:103 execute [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Executing query SUCCEEDED. Query duration 00h:00m:00s\n",
"2026-01-10 01:50:42,041 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_connection_b:135 execute_query [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Fetching query result...\n",
"2026-01-10 01:50:42,041 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:239 fetchall [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Fetching all results from query ...\n",
"2026-01-10 01:50:42,041 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:242 fetchall [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Fetching all results from query SUCCEEDED.\n",
"2026-01-10 01:50:42,041 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:249 close [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Closing DB cursor ...\n",
"2026-01-10 01:50:42,041 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:251 close [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Closing DB cursor SUCCEEDED.\n",
"2026-01-10 01:50:42,042 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:201 _execute_operati[id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Executing query:\n",
"-- job_name: content\n",
"-- op_id: 317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841\n",
" select 1 -- Testing if connection is alive.\n",
"2026-01-10 01:50:42,042 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:103 execute [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Executing query SUCCEEDED. Query duration 00h:00m:00s\n",
"2026-01-10 01:50:42,042 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:201 _execute_operati[id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Executing query:\n",
"-- job_name: content\n",
"-- op_id: 317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841\n",
"CREATE TABLE rest_target_table (userId, id, title, completed);\n",
"\n",
"2026-01-10 01:50:42,055 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:103 execute [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Executing query SUCCEEDED. Query duration 00h:00m:00s\n",
"2026-01-10 01:50:42,055 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_connection_b:135 execute_query [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Fetching query result...\n",
"2026-01-10 01:50:42,055 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:239 fetchall [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Fetching all results from query ...\n",
"2026-01-10 01:50:42,055 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:242 fetchall [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Fetching all results from query SUCCEEDED.\n",
"2026-01-10 01:50:42,055 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:249 close [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Closing DB cursor ...\n",
"2026-01-10 01:50:42,055 [VDK] content [INFO ] vdk.internal.builtin_plugins.c managed_cursor.py:251 close [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Closing DB cursor SUCCEEDED.\n",
"2026-01-10 01:50:42,262 [VDK] content [INFO ] vdk.internal.builtin_plugins.r file_based_step.py:119 run_python_step [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Entering 30_rest_ingest.py#run(...) ...\n",
"2026-01-10 01:50:42,377 [VDK] content [INFO ] vdk.internal.builtin_plugins.i ingester_router.py:64 send_object_for_[id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Sending object for ingestion with method: sqlite and target: None\n",
"2026-01-10 01:50:42,867 [VDK] content [INFO ] numexpr.utils utils.py:164 _init_num_thread[id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- NumExpr defaulting to 2 threads.\n",
"2026-01-10 01:50:43,154 [VDK] content [INFO ] vdk.internal.builtin_plugins.r file_based_step.py:125 run_python_step [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Exiting 30_rest_ingest.py#run(...) SUCCESS\n",
"2026-01-10 01:50:45,155 [VDK] content [INFO ] vdk.plugin.sqlite.ingest_to_sq ingest_to_sqlite.py:76 ingest_payload [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Ingesting payloads for target: /tmp/vdk-sqlite.db; collection_id: content|317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841\n",
"2026-01-10 01:50:45,156 [VDK] content [INFO ] vdk.plugin.sqlite.sqlite_conne sqlite_connection.py:29 new_connection [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Creating new connection against local file database located at: /tmp/vdk-sqlite.db\n",
"2026-01-10 01:50:45,165 [VDK] content [INFO ] vdk.internal.builtin_plugins.i ingester_base.py:584 close_now [id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Ingester statistics: \n",
"\t\tSuccessful uploads: 1\n",
"\t\tFailed uploads: 0\n",
"\t\tIngesting plugin errors: None\n",
"\t\t\n",
"2026-01-10 01:50:45,166 [VDK] content [INFO ] vdk.internal.builtin_plugins.r cli_run.py:170 __log_short_exec[id:317624ed-7ba7-4632-af5c-9d04c87281d1-1768009841-51ceb]- Job execution result: SUCCESS\n",
"Step results:\n",
"10_delete_table.sql - SUCCESS\n",
"20_create_table.sql - SUCCESS\n",
"30_rest_ingest.py - SUCCESS\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!vdk sqlite-query -q 'SELECT * FROM rest_target_table'"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "OdonSoTtmTco",
"outputId": "978f69eb-c39e-4a4c-da8d-c22543c6f5ad"
},
"execution_count": 7,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
"Creating new connection against local file database located at: /tmp/vdk-sqlite.db\n",
" userId id title completed\n",
"-------- ---- ------------------ -----------\n",
" 1 1 delectus aut autem 0\n"
]
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "9NUHJHASmZKl"
},
"execution_count": null,
"outputs": []
}
]
}
Loading