DataSQRL · ferenc-csaky · Jul 29, 2025 · Jul 29, 2025
diff --git a/finance-credit-card-chatbot/credit-card-analytics/creditcard-kafka/rewardsink.table.sql b/finance-credit-card-chatbot/credit-card-analytics/creditcard-kafka/rewardsink.table.sql
diff --git a/getting-started-examples/00_getting_started/README.md b/getting-started-examples/00_getting_started/README.md
diff --git a/getting-started-examples/01_kafka_to_console/README.md b/getting-started-examples/01_kafka_to_console/README.md
@@ -3,21 +3,19 @@
 This project demonstrates how to use [DataSQRL](https://datasqrl.com) to build a streaming pipeline that:
 
 - Reads data from a kafka topic and prints output to console
-- Kafka is part of datasqrl package.
-
-
+- Kafka is part of the DataSQRL package.
 
 ## 🐳 Running DataSQRL
 
 Run the following command from the project root where your `package.json` and SQRL scripts reside:
 
 ```bash
-docker run -it --rm   -p 8888:8888 -p 9092:9092  -v $PWD:/build   datasqrl/cmd:dev run -c package.json
+docker run -it --rm -p 8888:8888 -p 9092:9092 -v $PWD:/build datasqrl/cmd:0.7.0 run -c package.json
 ```
 
 ## Generate Data
 
 * Go to `data-generator` folder 
 ```bash
- python3 send_kafka_avro_records.py ../kafka-source/contact.avsc data.jsonl contact localhost:9092
-```
+python3 send_kafka_avro_records.py ../kafka-source/contact.avsc data.jsonl contact localhost:9092
+```
diff --git a/getting-started-examples/01_kafka_to_console/kafka-console.sqrl b/getting-started-examples/01_kafka_to_console/kafka-console.sqrl
@@ -1,4 +1,4 @@
 IMPORT kafka-source.Contact;
 
 -- Export the results to console (print)
-EXPORT Contact TO print.Contact;
+EXPORT Contact TO print.Contact;
diff --git a/getting-started-examples/01_kafka_to_console/package.json b/getting-started-examples/01_kafka_to_console/package.json
@@ -13,10 +13,5 @@
   },
   "test-runner": {
     "create-topics": ["contact", "contact"]
-  },
-  "dependencies": {
-    "metrics": {
-      "folder": "kafka-source"
-    }
   }
 }
diff --git a/getting-started-examples/02_kafka_to_kafka/README.md b/getting-started-examples/02_kafka_to_kafka/README.md
@@ -3,16 +3,14 @@
 This project demonstrates how to use [DataSQRL](https://datasqrl.com) to build a streaming pipeline that:
 
 - Reads data from a kafka topic and writes to another kafka topic
-- Kafka is part of datasqrl package.
-
-
+- Kafka is part of the DataSQRL package.
 
 ## 🐳 Running DataSQRL
 
 Run the following command from the project root where your `package.json` and SQRL scripts reside:
 
 ```bash
-docker run -it --rm   -p 8888:8888 -p 9092:9092  -v $PWD:/build   datasqrl/cmd:dev run -c package.json
+docker run -it --rm -p 8888:8888 -p 9092:9092  -v $PWD:/build datasqrl/cmd:0.7.0 run -c package.json
 ```
 
 ## Generate Data
@@ -25,4 +23,4 @@ docker run -it --rm   -p 8888:8888 -p 9092:9092  -v $PWD:/build   datasqrl/cmd:d
 
 ## Output
 
-* Updated records should be generated in contactupdated topic. 
+* Updated records should be generated in `contactupdated` topic. 
diff --git a/getting-started-examples/02_kafka_to_kafka/kafka-test.sqrl b/getting-started-examples/02_kafka_to_kafka/kafka-test.sqrl
@@ -2,4 +2,4 @@ IMPORT kafka-source.Contact as Contacts;
 
 ContactsUpdated := SELECT firstname, lastname, last_updated FROM Contacts;
 
-EXPORT ContactsUpdated TO kafkasink.ContactUpdated;
+EXPORT ContactsUpdated TO kafka-sink.ContactUpdated;
diff --git a/getting-started-examples/02_kafka_to_kafka/package.json b/getting-started-examples/02_kafka_to_kafka/package.json
@@ -13,13 +13,5 @@
   },
   "test-runner": {
     "create-topics": ["contact", "contactupdated"]
-  },
-  "dependencies": {
-    "kafkasource": {
-      "folder": "kafka-source"
-    },
-    "kafkasink": {
-      "folder": "kafka-sink"
-    }
   }
 }
diff --git a/getting-started-examples/03_two_streams_kafka_to_kafka/README.md b/getting-started-examples/03_two_streams_kafka_to_kafka/README.md
@@ -13,7 +13,7 @@ This project demonstrates how to use [DataSQRL](https://datasqrl.com) to build a
 Run the following command from the project root where your `package.json` and SQRL scripts reside:
 
 ```bash
-docker run -it --rm   -p 8888:8888 -p 9092:9092  -v $PWD:/build   datasqrl/cmd:dev run -c package.json
+docker run -it --rm -p 8888:8888 -p 9092:9092 -v $PWD:/build datasqrl/cmd:0.7.0 run -c package.json
 ```
 
 ## Generate Data
@@ -32,4 +32,4 @@ docker run -it --rm   -p 8888:8888 -p 9092:9092  -v $PWD:/build   datasqrl/cmd:d
 
 ## Output
 
-* Updated records should be generated in enrichedcontact topic. 
+* Updated records should be generated in enrichedcontact topic. 
diff --git a/getting-started-examples/03_two_streams_kafka_to_kafka/kafka-test.sqrl b/getting-started-examples/03_two_streams_kafka_to_kafka/kafka-test.sqrl
@@ -7,4 +7,4 @@ EnrichedContacts := SELECT c.id, c.firstname, c.lastname, o.orgname, c.last_upda
                         ON c.id = o.userid
                         AND c.last_updated BETWEEN o.last_updated - INTERVAL '30' SECOND AND o.last_updated + INTERVAL '30' SECOND;
 
-EXPORT EnrichedContacts TO kafkasink.EnrichedContact;
+EXPORT EnrichedContacts TO kafka-sink.EnrichedContact;
diff --git a/getting-started-examples/03_two_streams_kafka_to_kafka/package.json b/getting-started-examples/03_two_streams_kafka_to_kafka/package.json
@@ -13,13 +13,5 @@
   },
   "test-runner": {
     "create-topics": ["contact", "organization", "enrichedcontact"]
-  },
-  "dependencies": {
-    "kafkasource": {
-      "folder": "kafka-source"
-    },
-    "kafkasink": {
-      "folder": "kafka-sink"
-    }
   }
 }
diff --git a/getting-started-examples/04_two_streams_external_kafka_to_kafka/README.md b/getting-started-examples/04_two_streams_external_kafka_to_kafka/README.md
@@ -5,23 +5,23 @@ This project demonstrates how to use [DataSQRL](https://datasqrl.com) to build a
 - This example uses kafka that is running outside of docker on host machine
 - Reads data from two kafka topics and combines the data from two streams using temporal join
 - Writes output to another kafka topic running on host machine
-- We are not using kafka running inside of datasqrl
+- We are not using kafka running inside DataSQRL
 
 ## Few things to note
 
-1. `'properties.bootstrap.servers' = 'host.docker.internal:9092'`, -> this tells docker to connect to your host machine's kafka
-2. You don't need to create output topic `enrichedcontact`
-3. `create-topics` array from package.json was removed since we are using external kafka where we expect source topics to be present
-4. We removed kafka engine from `enabled-engines` array in package.json
-
+* `'properties.bootstrap.servers' = 'host.docker.internal:9092'` -> this tells docker to connect to your host machine's kafka 
+* You don't need to create output topic `enrichedcontact`
+* `create-topics` array from `package.json` was removed since we are using external Kafka, where we expect source topics to be present 
+* We removed kafka engine from `enabled-engines` array in `package.json`
 
 ## 🐳 Running DataSQRL
 
 Run the following command from the project root where your `package.json` and SQRL scripts reside:
-Note: We removed `-p 9092:9092` as we are using our own kafka running locally on host machine now
 ```bash
-docker run -it --rm -p 8888:8888 -p 8081:8081 -v $PWD:/build datasqrl/cmd:dev run -c package.json
+docker run -it --rm -p 8888:8888 -p 8081:8081 -v $PWD:/build -v $PWD/data:/data datasqrl/cmd:0.7.0 run -c package.json
 ```
+> [!NOTE]
+> We removed `-p 9092:9092` as we are using our own kafka running locally on host machine now
 
 ## Generate Data
 
@@ -36,9 +36,6 @@ docker run -it --rm -p 8888:8888 -p 8081:8081 -v $PWD:/build datasqrl/cmd:dev ru
  python3 load_data.py organization.jsonl localhost:9092 organization
 ```
 
-
 ## Output
 
-* Updated records should be generated in enrichedcontact topic.
-
-
+* Updated records should be generated in `enrichedcontact` table.
diff --git a/..._to_kafka/kafka-docker/docker-compose.yml → ...xternal_kafka_to_kafka/docker-compose.yml b/..._to_kafka/kafka-docker/docker-compose.yml → ...xternal_kafka_to_kafka/docker-compose.yml
diff --git a/getting-started-examples/04_two_streams_external_kafka_to_kafka/kafka-test.sqrl b/getting-started-examples/04_two_streams_external_kafka_to_kafka/kafka-test.sqrl
@@ -1,16 +1,15 @@
-IMPORT kafkasource.Contact AS Contacts;
-IMPORT kafkasource.Organization AS Organizations;
+IMPORT kafka-source.Contact AS Contacts;
+IMPORT kafka-source.Organization AS Organizations;
 
 /*
   Joins Contacts and Organizations
   Matches rows where the contact's id equals the organization's userid
   But only if their last_updated timestamps are within ±5 seconds of each other
 */
-
 EnrichedContacts := SELECT c.id, c.firstname, c.lastname, o.orgname, c.last_updated
                         FROM Contacts c
                         JOIN Organizations o
                         ON c.id = o.userid
                         AND c.last_updated BETWEEN o.last_updated - INTERVAL '30' SECOND AND o.last_updated + INTERVAL '30' SECOND;
 
-EXPORT EnrichedContacts TO kafkasink.EnrichedContact;
+EXPORT EnrichedContacts TO kafka-sink.EnrichedContact;
diff --git a/getting-started-examples/04_two_streams_external_kafka_to_kafka/package.json b/getting-started-examples/04_two_streams_external_kafka_to_kafka/package.json
@@ -10,13 +10,5 @@
         "table.exec.source.idle-timeout": "60 s"
       }
     }
-  },
-  "dependencies": {
-    "kafkasource": {
-      "folder": "kafka-source"
-    },
-    "kafkasink": {
-      "folder": "kafka-sink"
-    }
   }
 }
diff --git a/getting-started-examples/05_file_iceberg_test/README.md b/getting-started-examples/05_file_iceberg_test/README.md
@@ -9,20 +9,13 @@ This project demonstrates how to use [DataSQRL](https://datasqrl.com) to build a
 Run the following command from the project root where your `package.json` and SQRL scripts reside:
 
 ```bash
-docker run -it --rm \
-  -p 8888:8888 -p 8081:8081 \
-  -v $PWD:/build \
-  -e LOCAL_WAREHOUSE_DIR=warehouse \
-  datasqrl/cmd:dev run -c package.json
+docker run -it --rm -p 8888:8888 -p 8081:8081 -v $PWD:/build -v $PWD/data:/data datasqrl/cmd:0.7.0 run -c package.json
 ```
-Note: It will store iceberg files in `warehouse` directory
-
-
 
+> [!NOTE]
+> Iceberg files will be stored in the `warehouse` directory set by `package.json`
 
 ## Output
 
-* There should be iceberg files and folders generated in warehouse directory
-* Data for the output table will reside in ProcessedData (as defined in the sqrl script)
-
-
+* There should be iceberg files and folders generated in `$PWD/data/iceberg` directory 
+* Data for the output table will reside in `ProcessedData` (as defined in the sqrl script)
diff --git a/getting-started-examples/05_file_iceberg_test/file-to-iceberg.sqrl b/getting-started-examples/05_file_iceberg_test/file-to-iceberg.sqrl
@@ -1,4 +1,3 @@
-IMPORT file-data.MyFile AS InputFile;
+IMPORT testdata.MyFile AS InputFile;
 
 ProcessedData := SELECT id, name, amount, event_time FROM InputFile;
-
diff --git a/getting-started-examples/05_file_iceberg_test/package.json b/getting-started-examples/05_file_iceberg_test/package.json
@@ -18,10 +18,5 @@
       "catalog-type": "hadoop",
       "catalog-name": "mycatalog"
     }
-  },
-  "dependencies": {
-    "file-data": {
-      "folder": "file-data"
-    }
   }
 }
diff --git a/...e_iceberg_test/file-data/MyFile.table.sql → ...le_iceberg_test/testdata/MyFile.table.sql b/...e_iceberg_test/file-data/MyFile.table.sql → ...le_iceberg_test/testdata/MyFile.table.sql
diff --git a/..._file_iceberg_test/file-data/myfile.jsonl → ...5_file_iceberg_test/testdata/myfile.jsonl b/..._file_iceberg_test/file-data/myfile.jsonl → ...5_file_iceberg_test/testdata/myfile.jsonl
diff --git a/getting-started-examples/06_external_kafka_iceberg_test/README.md b/getting-started-examples/06_external_kafka_iceberg_test/README.md
@@ -2,21 +2,17 @@
 
 This project demonstrates how to use [DataSQRL](https://datasqrl.com) to build a streaming pipeline that:
 
-- This example uses kafka that is running outside of datasqrl docker on host machine
+- This example uses Kafka that is running outside DataSQRL docker on host machine
 - Reads data from kafka topic and writes to an iceberg table locally
 
-
 ## 🐳 Running DataSQRL
 
 Run the following command from the project root where your `package.json` and SQRL scripts reside:
-Note: We removed `-p 9092:9092` as we are using our own kafka running locally on host machine now
 ```bash
-docker run -it --rm \
-  -p 8888:8888 -p 8081:8081 \
-  -v $PWD:/build \
-  -e LOCAL_WAREHOUSE_DIR=warehouse \
-  datasqrl/cmd:dev run -c package.json
+docker run -it --rm -p 8888:8888 -p 8081:8081 -v $PWD:/build -v $PWD/data:/data datasqrl/cmd:0.7.0 run -c package.json
 ```
+> [!NOTE]
+> We removed `-p 9092:9092` as we are using our own kafka running locally on host machine now
 
 ## Generate Data
 
@@ -27,10 +23,6 @@ docker run -it --rm \
 python3 load_data.py contacts.jsonl contact
 ```
 
-
-
 ## Output
 
-* Updated records should be generated in enrichedcontact topic.
-
-
+* Updated records should be generated in `enrichedcontact` topic.
diff --git a/...ng-started-examples/06_external_kafka_iceberg_test/iceberg-sink/EnrichedContact.table.sql b/...ng-started-examples/06_external_kafka_iceberg_test/iceberg-sink/EnrichedContact.table.sql
@@ -4,6 +4,6 @@ CREATE TABLE EnrichedContact (
   'connector' = 'iceberg',
   'catalog-name' = 'mycatalog',
   'catalog-type' = 'hadoop',
-  'warehouse' = '${LOCAL_WAREHOUSE_DIR}',
+  'warehouse' = '/data/iceberg',
   'format-version' = '2'
 );
diff --git a/getting-started-examples/06_external_kafka_iceberg_test/kafka-to-iceberg.sqrl b/getting-started-examples/06_external_kafka_iceberg_test/kafka-to-iceberg.sqrl
@@ -6,4 +6,3 @@ EnrichedContacts := SELECT
   lastname,
   last_updated
 FROM Contacts;
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,4 +2,4 @@ IMPORT kafka-source.Contact as Contacts;

		ContactsUpdated := SELECT firstname, lastname, last_updated FROM Contacts;

		EXPORT ContactsUpdated TO kafkasink.ContactUpdated;
		EXPORT ContactsUpdated TO kafka-sink.ContactUpdated;
Original file line number	Diff line number	Diff line change
Expand Up		@@ -6,4 +6,3 @@ EnrichedContacts := SELECT
		lastname,
		last_updated
		FROM Contacts;