go-gitea
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎nessie-stack/docker-compose.yml‎
Lines changed: 69 additions & 0 deletions b/‎nessie-stack/docker-compose.yml‎
Lines changed: 69 additions & 0 deletions
diff --git a/‎nessie-stack/notebooks/.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎nessie-stack/notebooks/.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎nessie-stack/notebooks/.ipynb_checkpoints/test-checkpoint.ipynb‎
Lines changed: 210 additions & 0 deletions b/‎nessie-stack/notebooks/.ipynb_checkpoints/test-checkpoint.ipynb‎
Lines changed: 210 additions & 0 deletions
diff --git a/‎nessie-stack/notebooks/test.ipynb‎
Lines changed: 200 additions & 0 deletions b/‎nessie-stack/notebooks/test.ipynb‎
Lines changed: 200 additions & 0 deletions
diff --git a/‎nessie-stack/spark/.pyiceberg.yaml‎
Lines changed: 24 additions & 0 deletions b/‎nessie-stack/spark/.pyiceberg.yaml‎
Lines changed: 24 additions & 0 deletions
@@ -6,6 +6,7 @@
 # Folders
 _obj
 _test
+test-dbt-project
 
 # IntelliJ
 .idea
 
@@ -0,0 +1,69 @@
+version: '3.8'
+
+services:
+  minio:
+    image: quay.io/minio/minio:latest
+    container_name: minio
+    command: server /data --console-address ":9001"
+    environment:
+      MINIO_ROOT_USER: minioadmin
+      MINIO_ROOT_PASSWORD: minioadmin
+    ports:
+      - "9000:9000"
+      - "9001:9001"
+    volumes:
+      - minio_data:/data
+    networks:
+      - common_network
+
+  minio-client:
+    image: minio/mc
+    depends_on:
+      - minio
+    entrypoint: >
+      /bin/sh -c "
+        sleep 5;
+        mc alias set local http://minio:9000 minioadmin minioadmin;
+        mc mb local/warehouse;
+        exit 0;
+      "
+    networks:
+      - common_network
+
+  nessie:
+    image: projectnessie/nessie:latest
+    container_name: nessie
+    ports:
+      - "19120:19120"
+    environment:
+      QUARKUS_HTTP_PORT: 19120
+    networks:
+      - common_network
+
+  spark-iceberg:
+    container_name: spark-iceberg
+    build: spark/
+    depends_on:
+      - nessie
+      - minio
+    volumes:
+      - ./warehouse:/home/iceberg/warehouse
+      - ./notebooks:/home/iceberg/notebooks/notebooks
+    environment:
+      - AWS_ACCESS_KEY_ID=minioadmin
+      - AWS_SECRET_ACCESS_KEY=minioadmin
+      - AWS_REGION=us-east-1
+    ports:
+      - 8888:8888
+      - 8080:8080
+      - 10000:10000
+      - 10001:10001
+    networks:
+      - common_network
+
+volumes:
+  minio_data:
+
+networks:
+  common_network:
+    driver: bridge
@@ -0,0 +1 @@
+metastore*
@@ -0,0 +1,200 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pyspark.sql import SparkSession\n",
+    "\n",
+    "ICEBERG_VERSION = \"1.8.1\"\n",
+    "NESSIE_VERSION = \"0.103.0\"\n",
+    "SPARK_VERSION = \"3.5\"\n",
+    "\n",
+    "spark = (\n",
+    "    SparkSession.builder.appName(\"IcebergNessieExample\")\n",
+    "    # Use JARs from local Spark installation\n",
+    "#    .config(\"spark.driver.extraClassPath\", \"/opt/spark/jars/*\")\n",
+    "#    .config(\"spark.executor.extraClassPath\", \"/opt/spark/jars/*\")\n",
+    "    # Use correct Iceberg & Nessie JARs for Spark 3.5\n",
+    " #   .config(\"spark.jars.packages\", f\"org.apache.iceberg:iceberg-spark-runtime-{SPARK_VERSION}_2.12:{ICEBERG_VERSION},\"\n",
+    " #                                   f\"org.projectnessie.nessie-integrations:nessie-spark-extensions-{SPARK_VERSION}_2.12:{NESSIE_VERSION}\")\n",
+    " #   .config(\"spark.sql.extensions\", \"org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions\")\n",
+    " #   .config(\"spark.sql.catalog.nessie\", \"org.apache.iceberg.spark.SparkCatalog\")\n",
+    " #   .config(\"spark.sql.catalog.nessie.catalog-impl\", \"org.apache.iceberg.nessie.NessieCatalog\")\n",
+    " #   .config(\"spark.sql.catalog.nessie.uri\", \"http://nessie:19120/api/v1\")\n",
+    " #   .config(\"spark.sql.catalog.nessie.ref\", \"main\")\n",
+    "    .config(\"spark.sql.catalog.nessie.warehouse\", \"s3a://warehouse/\")\n",
+    " ###   .config(\"spark.hadoop.fs.s3a.endpoint\", \"http://minio:9000\")\n",
+    "#    .config(\"spark.hadoop.fs.s3a.access.key\", \"minioadmin\")\n",
+    "##    .config(\"spark.hadoop.fs.s3a.secret.key\", \"minioadmin\")\n",
+    " #   .config(\"spark.hadoop.fs.s3a.path.style.access\", \"true\")\n",
+    " #   .config(\"spark.hadoop.fs.s3a.impl\", \"org.apache.hadoop.fs.s3a.S3AFileSystem\")\n",
+    "    .getOrCreate()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('spark.hadoop.fs.s3a.path.style.access', 'true'),\n",
+       " ('spark.driver.port', '35325'),\n",
+       " ('spark.sql.warehouse.dir',\n",
+       "  'file:/home/iceberg/notebooks/notebooks/spark-warehouse'),\n",
+       " ('spark.app.submitTime', '1742217572814'),\n",
+       " ('spark.sql.catalog.iceberg.s3.path-style-access', 'true'),\n",
+       " ('spark.sql.catalog.iceberg.s3.endpoint', 'http://minio:9000'),\n",
+       " ('spark.sql.catalog.iceberg.type', 'nessie'),\n",
+       " ('spark.app.id', 'local-1742217573458'),\n",
+       " ('spark.hadoop.fs.s3a.access.key', 'minioadmin'),\n",
+       " ('spark.serializer.objectStreamReset', '100'),\n",
+       " ('spark.master', 'local[*]'),\n",
+       " ('spark.driver.host', 'ff5452dce47b'),\n",
+       " ('spark.submit.deployMode', 'client'),\n",
+       " ('spark.sql.extensions',\n",
+       "  'org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,org.projectnessie.spark.extensions.NessieSparkSessionExtensions'),\n",
+       " ('spark.hadoop.fs.s3a.secret.key', 'minioadmin'),\n",
+       " ('spark.driver.extraJavaOptions',\n",
+       "  '-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false'),\n",
+       " ('spark.sql.catalog.iceberg.io-impl', 'org.apache.iceberg.aws.s3.S3FileIO'),\n",
+       " ('spark.executor.id', 'driver'),\n",
+       " ('spark.sql.catalog.iceberg.ref', 'main'),\n",
+       " ('spark.app.name', 'PySparkShell'),\n",
+       " ('spark.hadoop.fs.s3a.impl', 'org.apache.hadoop.fs.s3a.S3AFileSystem'),\n",
+       " ('spark.sql.catalog.iceberg', 'org.apache.iceberg.spark.SparkCatalog'),\n",
+       " ('spark.sql.catalogImplementation', 'hive'),\n",
+       " ('spark.rdd.compress', 'True'),\n",
+       " ('spark.executor.extraJavaOptions',\n",
+       "  '-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false'),\n",
+       " ('spark.app.startTime', '1742217573143'),\n",
+       " ('spark.submit.pyFiles', ''),\n",
+       " ('spark.hadoop.fs.s3a.endpoint', 'http://minio:9000'),\n",
+       " ('spark.sql.catalog.iceberg.warehouse', 's3a://warehouse'),\n",
+       " ('spark.sql.catalog.iceberg.uri', 'http://nessie:19120/api/v1'),\n",
+       " ('spark.ui.showConsoleProgress', 'true')]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "spark.sparkContext.getConf().getAll()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "25/03/17 13:19:42 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout does not exist\n",
+      "25/03/17 13:19:42 WARN HiveConf: HiveConf of name hive.stats.retries.wait does not exist\n",
+      "25/03/17 13:19:43 WARN ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 2.3.0\n",
+      "25/03/17 13:19:43 WARN ObjectStore: setMetaStoreSchemaVersion called but recording version is disabled: version = 2.3.0, comment = Set by MetaStore [email protected]\n",
+      "25/03/17 13:19:43 WARN ObjectStore: Failed to get database global_temp, returning NoSuchObjectException\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "DataFrame[]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "spark.sql('use iceberg')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "DataFrame[]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "spark.sql('create namespace default')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+---+-----+-------+\n",
+      "| id| name| salary|\n",
+      "+---+-----+-------+\n",
+      "|  1|Alice|75000.0|\n",
+      "|  2|  Bob|80000.0|\n",
+      "+---+-----+-------+\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "spark.sql(\"CREATE TABLE IF NOT EXISTS default.employees (id INT, name STRING, salary DOUBLE) USING iceberg\")\n",
+    "\n",
+    "spark.sql(\"INSERT INTO default.employees VALUES (1, 'Alice', 75000), (2, 'Bob', 80000)\")\n",
+    "\n",
+    "df = spark.sql(\"SELECT * FROM default.employees\")\n",
+    "df.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+catalog:
+  default:
+    uri: http://rest:8181
+    s3.endpoint: http://minio:9000
+    s3.access-key-id: admin
+    s3.secret-access-key: password