DataSQRL · ferenc-csaky · Aug 19, 2025 · Aug 18, 2025 · Aug 19, 2025 · Aug 19, 2025
diff --git a/finance-credit-card-chatbot/credit-card-views/creditcard_views_package_snowflake.json b/finance-credit-card-chatbot/credit-card-views/creditcard_views_package_snowflake.json
@@ -13,15 +13,15 @@
     "snowflake" : {
       "catalog-name": "MyCatalog",
       "external-volume": "MyNewVolume",
-      "url": "jdbc:snowflake://${SNOWFLAKE_ID}.snowflakecomputing.com/?user=${SNOWFLAKE_USER}&password=${SNOWFLAKE_PASSWORD}&warehouse=COMPUTE_WH&db=MYSNOWFLAKEDB&schema=public&disableSslHostnameVerification=true"
+      "url": "${SNOWFLAKE_JDBC_URL}"
     }
   },
   "connectors" : {
     "iceberg" : {
       "warehouse":"s3://my-iceberg-table-test",
       "catalog-impl":"org.apache.iceberg.aws.glue.GlueCatalog",
       "io-impl":"org.apache.iceberg.aws.s3.S3FileIO",
-      "catalog-name": "mydatabase",
+      "catalog-name": "mycatalog",
       "catalog-database": "mydatabase"
     }
   },

diff --git a/getting-started-examples/01_kafka_to_console/README.md b/getting-started-examples/01_kafka_to_console/README.md
@@ -1,16 +1,16 @@
-# Kafka-to-Kafka with Avro using DataSQRL
+# Kafka to Console with Avro using DataSQRL
 
 This project demonstrates how to use [DataSQRL](https://datasqrl.com) to build a streaming pipeline that:
 
-- Reads data from a kafka topic and prints output to console
+- Reads data from a Kafka topic and prints output to console
 - Kafka is part of the DataSQRL package.
 
 ## 🐳 Running DataSQRL
 
 Run the following command from the project root where your `package.json` and SQRL scripts reside:
 
 ```bash
-docker run -it --rm -p 8888:8888 -p 9092:9092 -v $PWD:/build datasqrl/cmd:0.7.1 run -c package.json
+docker run -it --rm -p 8888:8888 -p 9092:9092 -v $PWD:/build datasqrl/cmd:latest run -c package.json
 ```
 
 ## Generate Data

diff --git a/getting-started-examples/02_kafka_to_kafka/README.md b/getting-started-examples/02_kafka_to_kafka/README.md
@@ -1,16 +1,16 @@
-# Kafka-to-Kafka with Avro using DataSQRL
+# Kafka to Kafka with Avro using DataSQRL
 
 This project demonstrates how to use [DataSQRL](https://datasqrl.com) to build a streaming pipeline that:
 
-- Reads data from a kafka topic and writes to another kafka topic
+- Reads data from a Kafka topic and writes to another Kafka topic
 - Kafka is part of the DataSQRL package.
 
 ## 🐳 Running DataSQRL
 
 Run the following command from the project root where your `package.json` and SQRL scripts reside:
 
 ```bash
-docker run -it --rm -p 8888:8888 -p 9092:9092  -v $PWD:/build datasqrl/cmd:0.7.1 run -c package.json
+docker run -it --rm -p 8888:8888 -p 9092:9092  -v $PWD:/build datasqrl/cmd:latest run -c package.json
 ```
 
 ## Generate Data

diff --git a/...s/03_two_streams_kafka_to_kafka/README.md → ...-started-examples/03_kafka_join/README.md b/...s/03_two_streams_kafka_to_kafka/README.md → ...-started-examples/03_kafka_join/README.md
@@ -1,8 +1,8 @@
-# Kafka-to-Kafka with Avro using DataSQRL
+# Kafka Join using DataSQRL
 
 This project demonstrates how to use [DataSQRL](https://datasqrl.com) to build a streaming pipeline that:
 
-- Reads data from two kafka topics and combines the data from two streams using temporal join
+- Reads data from two Kafka topics and combines the data from two streams using temporal join
 - writes output to another kafka topic
 - Kafka is part of datasqrl package.
 
@@ -13,7 +13,7 @@ This project demonstrates how to use [DataSQRL](https://datasqrl.com) to build a
 Run the following command from the project root where your `package.json` and SQRL scripts reside:
 
 ```bash
-docker run -it --rm -p 8888:8888 -p 9092:9092 -v $PWD:/build datasqrl/cmd:0.7.1 run -c package.json
+docker run -it --rm -p 8888:8888 -p 9092:9092 -v $PWD:/build datasqrl/cmd:latest run -c package.json
 ```
 
 ## Generate Data

diff --git a/...fka_to_kafka/data-generator/contact.jsonl → ...3_kafka_join/data-generator/contact.jsonl b/...fka_to_kafka/data-generator/contact.jsonl → ...3_kafka_join/data-generator/contact.jsonl
diff --git a/...to_kafka/data-generator/contact.table.sql → ...fka_join/data-generator/contact.table.sql b/...to_kafka/data-generator/contact.table.sql → ...fka_join/data-generator/contact.table.sql
diff --git a/...afka_to_kafka/data-generator/load_data.py → ...03_kafka_join/data-generator/load_data.py b/...afka_to_kafka/data-generator/load_data.py → ...03_kafka_join/data-generator/load_data.py
diff --git a/...o_kafka/data-generator/organization.jsonl → ...ka_join/data-generator/organization.jsonl b/...o_kafka/data-generator/organization.jsonl → ...ka_join/data-generator/organization.jsonl
diff --git a/...fka/data-generator/organization.table.sql → ...oin/data-generator/organization.table.sql b/...fka/data-generator/organization.table.sql → ...oin/data-generator/organization.table.sql
diff --git a/...afka/kafka-sink/enrichedcontact.table.sql → ...join/kafka-sink/enrichedcontact.table.sql b/...afka/kafka-sink/enrichedcontact.table.sql → ...join/kafka-sink/enrichedcontact.table.sql
diff --git a/...a_to_kafka/kafka-source/Contact.table.sql → ...kafka_join/kafka-source/Contact.table.sql b/...a_to_kafka/kafka-source/Contact.table.sql → ...kafka_join/kafka-source/Contact.table.sql
diff --git a/...kafka/kafka-source/Organization.table.sql → ..._join/kafka-source/Organization.table.sql b/...kafka/kafka-source/Organization.table.sql → ..._join/kafka-source/Organization.table.sql
diff --git a/...wo_streams_kafka_to_kafka/kafka-test.sqrl → ...ed-examples/03_kafka_join/kafka-test.sqrl b/...wo_streams_kafka_to_kafka/kafka-test.sqrl → ...ed-examples/03_kafka_join/kafka-test.sqrl
diff --git a/...3_two_streams_kafka_to_kafka/package.json → ...arted-examples/03_kafka_join/package.json b/...3_two_streams_kafka_to_kafka/package.json → ...arted-examples/03_kafka_join/package.json
diff --git a/...d-examples/05_file_iceberg_test/README.md → ...xamples/04_file_to_iceberg_test/README.md b/...d-examples/05_file_iceberg_test/README.md → ...xamples/04_file_to_iceberg_test/README.md
@@ -9,13 +9,13 @@ This project demonstrates how to use [DataSQRL](https://datasqrl.com) to build a
 Run the following command from the project root where your `package.json` and SQRL scripts reside:
 
 ```bash
-docker run -it --rm -p 8888:8888 -p 8081:8081 -v $PWD:/build -v $PWD/data:/data datasqrl/cmd:0.7.1 run -c package.json
+docker run -it --rm -p 8888:8888 -p 8081:8081 -v $PWD:/build datasqrl/cmd:latest run -c package.json
 ```
 
 > [!NOTE]
 > Iceberg files will be stored in the `warehouse` directory set by `package.json`
 
 ## Output
 
-* There should be iceberg files and folders generated in `$PWD/data/iceberg` directory 
+* There should be iceberg files and folders generated in `$PWD/warehouse` directory 
 * Data for the output table will reside in `ProcessedData` (as defined in the sqrl script)
diff --git a/...05_file_iceberg_test/file-to-iceberg.sqrl → ...file_to_iceberg_test/file-to-iceberg.sqrl b/...05_file_iceberg_test/file-to-iceberg.sqrl → ...file_to_iceberg_test/file-to-iceberg.sqrl
diff --git a/...xamples/05_file_iceberg_test/package.json → ...ples/04_file_to_iceberg_test/package.json b/...xamples/05_file_iceberg_test/package.json → ...ples/04_file_to_iceberg_test/package.json
@@ -14,7 +14,7 @@
   },
   "connectors": {
     "iceberg": {
-      "warehouse": "/data/iceberg",
+      "warehouse": "warehouse",
       "catalog-type": "hadoop",
       "catalog-name": "mycatalog"
     }

diff --git a/...le_iceberg_test/testdata/MyFile.table.sql → ...to_iceberg_test/testdata/MyFile.table.sql b/...le_iceberg_test/testdata/MyFile.table.sql → ...to_iceberg_test/testdata/MyFile.table.sql
diff --git a/...5_file_iceberg_test/testdata/myfile.jsonl → ...ile_to_iceberg_test/testdata/myfile.jsonl b/...5_file_iceberg_test/testdata/myfile.jsonl → ...ile_to_iceberg_test/testdata/myfile.jsonl
diff --git a/getting-started-examples/04_two_streams_external_kafka_to_kafka/README.md b/getting-started-examples/04_two_streams_external_kafka_to_kafka/README.md
diff --git a/getting-started-examples/04_two_streams_external_kafka_to_kafka/docker-compose.yml b/getting-started-examples/04_two_streams_external_kafka_to_kafka/docker-compose.yml
diff --git a/...rted-examples/04_two_streams_external_kafka_to_kafka/kafka-sink/EnrichedContact.table.sql b/...rted-examples/04_two_streams_external_kafka_to_kafka/kafka-sink/EnrichedContact.table.sql
diff --git a/...ng-started-examples/04_two_streams_external_kafka_to_kafka/kafka-source/Contact.table.sql b/...ng-started-examples/04_two_streams_external_kafka_to_kafka/kafka-source/Contact.table.sql
diff --git a/...arted-examples/04_two_streams_external_kafka_to_kafka/kafka-source/Organization.table.sql b/...arted-examples/04_two_streams_external_kafka_to_kafka/kafka-source/Organization.table.sql
diff --git a/getting-started-examples/04_two_streams_external_kafka_to_kafka/kafka-test.sqrl b/getting-started-examples/04_two_streams_external_kafka_to_kafka/kafka-test.sqrl
diff --git a/getting-started-examples/04_two_streams_external_kafka_to_kafka/package.json b/getting-started-examples/04_two_streams_external_kafka_to_kafka/package.json
diff --git a/getting-started-examples/05_kafka_to_iceberg_local_test/README.md b/getting-started-examples/05_kafka_to_iceberg_local_test/README.md
@@ -0,0 +1,27 @@
+# Kafka to Local Iceberg Warehouse Using DataSQRL
+
+This project demonstrates how to use [DataSQRL](https://datasqrl.com) to build a streaming pipeline that:
+
+- Reads data from a Kafka topic
+- Writes data to an Iceberg table locally
+
+## 🐳 Running DataSQRL
+
+Run the following command from the project root where your `package.json` and SQRL scripts reside:
+```bash
+docker run -it --rm -p 8888:8888 -p 8081:8081 -p 9092:9092 -v $PWD:/build datasqrl/cmd:latest run -c package.json
+```
+
+## Generate Data
+
+* Go to `data-generator` folder
+   * `python3 load_data.py <jsonl_file> <topic_name>`
+* To send Contact data
+```bash
+python3 load_data.py contacts.jsonl contacts
+```
+
+## Output
+
+* There should be iceberg files and folders generated in `$PWD/warehouse` directory
+* Data for the output table will reside in `MyContacts` (as defined in the sqrl script)
diff --git a/...ceberg_test/data-generator/contacts.jsonl → ..._local_test/data-generator/contacts.jsonl b/...ceberg_test/data-generator/contacts.jsonl → ..._local_test/data-generator/contacts.jsonl
diff --git a/...rg_test/data-generator/contacts.table.sql → ...al_test/data-generator/contacts.table.sql b/...rg_test/data-generator/contacts.table.sql → ...al_test/data-generator/contacts.table.sql
diff --git a/..._iceberg_test/data-generator/load_data.py → ...rg_local_test/data-generator/load_data.py b/..._iceberg_test/data-generator/load_data.py → ...rg_local_test/data-generator/load_data.py
diff --git a/..._glue_test/kafka-source/Contact.table.sql → ...local_test/kafka-source/Contact.table.sql b/..._glue_test/kafka-source/Contact.table.sql → ...local_test/kafka-source/Contact.table.sql
@@ -6,9 +6,9 @@ CREATE TABLE Contact (
   WATERMARK FOR last_updated AS last_updated - INTERVAL '1' SECOND
 ) WITH (
   'connector' = 'kafka',
-  'topic' = 'contact',
-  'properties.bootstrap.servers' = 'host.docker.internal:9092',
-  'properties.group.id' = 'group1_contacts',
+  'topic' = 'contacts',
+  'properties.bootstrap.servers' = '${KAFKA_BOOTSTRAP_SERVERS}',
+  'properties.group.id' = '${KAFKA_GROUP_ID}',
   'scan.startup.mode' = 'earliest-offset',
   'format' = 'flexible-json'
 );
diff --git a/getting-started-examples/05_kafka_to_iceberg_local_test/kafka-to-iceberg.sqrl b/getting-started-examples/05_kafka_to_iceberg_local_test/kafka-to-iceberg.sqrl
@@ -0,0 +1,3 @@
+IMPORT kafka-source.Contact AS _Contacts;
+
+MyContacts := SELECT id, firstname, lastname, last_updated FROM _Contacts;
diff --git a/..._external_kafka_iceberg_test/package.json → ..._kafka_to_iceberg_local_test/package.json b/..._external_kafka_iceberg_test/package.json → ..._kafka_to_iceberg_local_test/package.json
@@ -1,6 +1,6 @@
 {
   "version": "1",
-  "enabled-engines": ["flink", "iceberg"],
+  "enabled-engines": ["flink", "kafka", "iceberg"],
   "script": {
     "main": "kafka-to-iceberg.sqrl"
   },
@@ -14,9 +14,12 @@
   },
   "connectors": {
     "iceberg": {
-      "warehouse": "/data/iceberg",
+      "warehouse": "warehouse",
       "catalog-type": "hadoop",
       "catalog-name": "mycatalog"
     }
+  },
+  "test-runner": {
+    "create-topics": ["contacts"]
   }
 }
diff --git a/getting-started-examples/06_external_kafka_iceberg_test/README.md b/getting-started-examples/06_external_kafka_iceberg_test/README.md
diff --git a/...ng-started-examples/06_external_kafka_iceberg_test/iceberg-sink/EnrichedContact.table.sql b/...ng-started-examples/06_external_kafka_iceberg_test/iceberg-sink/EnrichedContact.table.sql
diff --git a/getting-started-examples/06_external_kafka_iceberg_test/kafka-to-iceberg.sqrl b/getting-started-examples/06_external_kafka_iceberg_test/kafka-to-iceberg.sqrl
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		IMPORT kafka-source.Contact AS _Contacts;

		MyContacts := SELECT id, firstname, lastname, last_updated FROM _Contacts;