daphne-project · EricBenschneider · Nov 28, 2024 · Nov 29, 2024 · Jan 11, 2025 · Feb 4, 2025
diff --git a/UserConfig.json b/UserConfig.json
@@ -1,4 +1,5 @@
 {
+    "use_positional_map": false,
     "matmul_vec_size_bits": 0,
     "matmul_tile": false,
     "matmul_use_fixed_tile_sizes": true,

diff --git a/containers/entrypoint-interactive.sh b/containers/entrypoint-interactive.sh
@@ -15,6 +15,29 @@
 # limitations under the License.
 
 /usr/sbin/sshd -f /etc/ssh/sshd_config
+
+
+# Allow root login and password authentication
+sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
+sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config
+sed -i 's/KbdInteractiveAuthentication no/KbdInteractiveAuthentication yes/' /etc/ssh/sshd_config
+sed -i 's/ChallengeResponseAuthentication no/ChallengeResponseAuthentication yes/' /etc/ssh/sshd_config
+
+# Allow port forwarding
+sed -i 's/#AllowTcpForwarding yes/AllowTcpForwarding yes/' /etc/ssh/sshd_config
+sed -i 's/#GatewayPorts no/GatewayPorts yes/' /etc/ssh/sshd_config
+
+#enable logging
+sed -i 's/#SyslogFacility AUTH/SyslogFacility AUTH/' /etc/ssh/sshd_config
+sed -i 's/#LogLevel INFO/LogLevel INFO/' /etc/ssh/sshd_config
+
+# Uncomment the Port 22 line
+sed -i 's/#Port 22/Port 22/' /etc/ssh/sshd_config
+
+echo "root:x" | chpasswd
+
+/usr/sbin/sshd -D &
+
 /usr/sbin/groupadd -g "$GID" dockerusers
 /usr/sbin/useradd -c 'Docker Container User' -u $UID -g "$GID" -G sudo -m -s /bin/bash -d /home/"$USER" "$USER"
 printf "${USER} ALL=(ALL:ALL) NOPASSWD:ALL" | sudo EDITOR="tee -a" visudo #>> /dev/null
@@ -23,8 +46,8 @@ chmod 700 /home/"$USER"/.ssh
 touch /home/"$USER"/.sudo_as_admin_successful
 # set a default password
 SALT=$(date +%M%S)
-PASS=Docker!"$SALT"
-echo "${USER}":"$PASS" | chpasswd
+PASS=x # Docker!"1234"
+#echo "${USER}":"$PASS" | chpasswd
 echo
 echo For longer running containers consider running \'unminimize\' to update packages
 echo and make the container more suitable for interactive use.
@@ -33,5 +56,15 @@ echo "Use "$USER" with password "$PASS" for SSH login"
 echo "Docker Container IP address(es):"
 awk '/32 host/ { print f } {f=$2}' <<< "$(</proc/net/fib_trie)" | grep -vE "127.0." | sort -u
 # shellcheck disable=SC2068
-#exec su "$USER" -c $@
-sudo --preserve-env=PATH,LD_LIBRARY_PATH,TERM -u $USER $@
+#/usr/sbin/sshd -D &
+#exec "$@"
+
+# Restart SSH service
+service ssh restart
+
+# Add rsync to PATH
+export PATH=$PATH:/usr/bin
+export PATH=$PATH:/usr/bin/rsync
+
+exec su "$USER" -c $@
+sudo --preserve-env=PATH,LD_LIBRARY_PATH,TERM -u $USER $@
diff --git a/containers/run-docker-example.sh b/containers/run-docker-example.sh
@@ -1,9 +1,11 @@
 #!/usr/bin/env bash
+#run using:
+# ./containers/run-docker-example.sh
 
 # Copyright 2023 The DAPHNE Consortium
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
+# you may not use this file except in compliance´ß´´with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
@@ -26,8 +28,8 @@ if [[ "$(arch)" == arm*  ]] || [ $(arch) == 'aarch64' ]; then
 fi
 
 #on some installations docker can only be run with sudo
-USE_SUDO=
-#USE_SUDO=sudo
+#USE_SUDO=
+USE_SUDO=sudo
 
 # run this script from the base path of your DAPHNE source tree
 DAPHNE_ROOT=$PWD
@@ -56,19 +58,20 @@ DEBUG_FLAGS=""
 # set bash as the default command if none is provided
 command=$*
 if [ "$#" -eq 0 ]; then
-    command=bash
+    command="bash"
 fi
 
 # non-interactive: launch with PWD mounted
-#docker run $DEVICE_FLAGS --user=$UID:$GID --rm -w "$DAPHNE_ROOT" -v "$DAPHNE_ROOT:$DAPHNE_ROOT" \
+#$USE_SUDO docker run $DEVICE_FLAGS --user=$UID:$GID -d --rm -w "$DAPHNE_ROOT" -v "$DAPHNE_ROOT:$DAPHNE_ROOT" \
 #    -e TERM=screen-256color -e PATH="$PATH" -e LD_LIBRARY_PATH="$LD_LIBRARY_PATH" -e USER=$USERNAME -e UID=$UID \
-#    "$DOCKER_IMAGE:$DOCKER_TAG" $@
+#   "$DOCKER_IMAGE:$DOCKER_TAG" $@
 
 # for interactive use:
 $USE_SUDO docker run $DEBUG_FLAGS $DEVICE_FLAGS -it --rm --hostname daphne-container -w $DAPHNE_ROOT_CONTAINER \
     -v "$DAPHNE_ROOT:$DAPHNE_ROOT_CONTAINER" -e GID=$GID -e TERM=screen-256color -e PATH -e LD_LIBRARY_PATH \
-    -e USER=$USERNAME -e UID=$UID \
-    "$DOCKER_IMAGE:$DOCKER_TAG" $command
+    -e USER=$USERNAME -e UID=$UID -p 22222:22 \
+    --entrypoint /daphne/containers/entrypoint-interactive.sh \
+      "$DOCKER_IMAGE:$DOCKER_TAG" $command # "$DOCKER_IMAGE:$DOCKER_TAG" $command
 
 # move this up to above the DOCKER_IMAGE line to override the entrypoint:
 #    --entrypoint /daphne/containers/entrypoint-interactive.sh
diff --git a/daphne-opt/daphne-opt.cpp b/daphne-opt/daphne-opt.cpp
@@ -36,17 +36,14 @@ int main(int argc, char **argv) {
     mlir::daphne::registerDaphnePasses();
 
     mlir::DialectRegistry registry;
-    registry.insert<mlir::daphne::DaphneDialect, mlir::arith::ArithDialect,
-                    mlir::func::FuncDialect, mlir::scf::SCFDialect,
-                    mlir::LLVM::LLVMDialect, mlir::AffineDialect,
-                    mlir::memref::MemRefDialect, mlir::linalg::LinalgDialect,
-                    mlir::math::MathDialect>();
+    registry.insert<mlir::daphne::DaphneDialect, mlir::arith::ArithDialect, mlir::func::FuncDialect,
+                    mlir::scf::SCFDialect, mlir::LLVM::LLVMDialect, mlir::AffineDialect, mlir::memref::MemRefDialect,
+                    mlir::linalg::LinalgDialect, mlir::math::MathDialect>();
     // Add the following to include *all* MLIR Core dialects, or selectively
     // include what you need like above. You only need to register dialects that
     // will be *parsed* by the tool, not the one generated
     // registerAllDialects(registry);
 
-    return mlir::asMainReturnCode(mlir::MlirOptMain(
-        argc, argv, "Standalone DAPHNE optimizing compiler driver\n",
-        registry));
+    return mlir::asMainReturnCode(
+        mlir::MlirOptMain(argc, argv, "Standalone DAPHNE optimizing compiler driver\n", registry));
 }
diff --git a/doc/SchedulingOptions.md b/doc/SchedulingOptions.md
@@ -92,6 +92,7 @@ DAPHNE Options:
   --libdir=<string>     - The directory containing kernel libraries
   --no-obj-ref-mgnt     - Switch off garbage collection by not managing data objects' reference counters
   --select-matrix-repr  - Automatically choose physical matrix representations (e.g., dense/sparse)
+  --use-positional-map  - Enable multiple read optimization for csv files using positional map
 Generic Options:
   --help                - Display available options (--help-hidden for more)
   --help-list           - Display list of available options (--help-list-hidden for more)

diff --git a/doc/docs-build-requirements.txt b/doc/docs-build-requirements.txt
@@ -1 +1 @@
-mkdocs-material
+mkdocs-material
diff --git a/evaluation/build-charts.py b/evaluation/build-charts.py
@@ -0,0 +1,152 @@
+import glob
+import re
+import os
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+
+# Folder where logs are stored.
+results_dir = './results'
+
+# This function extracts dimensions (number of rows and columns) from the filename.
+# e.g. "frame_100000r_20c_MIXED.csv" -> (100000,20)
+def extract_dims(filename):
+    m = re.search(r'(\d+)r_(\d+)c', filename)
+    if m:
+        rows = int(m.group(1))
+        cols = int(m.group(2))
+        return rows, cols
+    else:
+        return None, None
+
+# This function extracts the overall data type from the filename.
+# It considers the main type (matrix if the filename starts with "matrix_",
+# otherwise frame) combined with a subtype (mixed, str, float, etc.).
+def extract_data_type(filename):
+    base = os.path.basename(filename)
+    main_type = "matrix" if base.startswith("matrix_") else "frame"
+    m = re.search(r'(mixed|str|float|rep|strdiff|fixedstr|number)', base, re.IGNORECASE)
+    subtype = m.group(1).lower() if m else "unknown"
+    # Map fixedstr and strdiff to "str" for comparison purposes
+    if subtype in ["fixedstr", "strdiff"]:
+        subtype = "str"
+    return f"{main_type}_{subtype}"
+
+# Load CSV logs for each experiment.
+def load_log(experiment, pattern):
+    # We assume files are named like evaluation_results_*_{experiment}.csv in the results folder.
+    files = glob.glob(os.path.join(results_dir, f"evaluation_results_*_{experiment}.csv"))
+    dfs = []
+    for f in files:
+        # The CSV already has a header:
+        # CSVFile,Experiment,Trial,ReadTime,WriteTime,PosmapReadTime,StartupSeconds,ParsingSeconds,CompilationSeconds,ExecutionSeconds,TotalSeconds
+        df = pd.read_csv(f)
+        # Extract dimensions and add them as columns.
+        dims = df['CSVFile'].apply(lambda x: extract_dims(x))
+        df['Rows'] = dims.apply(lambda x: x[0] if x else np.nan)
+        df['Cols'] = dims.apply(lambda x: x[1] if x else np.nan)
+        # Compute a size measure (for example, total cells)
+        df['Size'] = df['Rows'] * df['Cols']
+        # Extract a combined data type (main type and subtype).
+        df['DataType'] = df['CSVFile'].apply(extract_data_type)
+        dfs.append(df)
+    if dfs:
+        return pd.concat(dfs, ignore_index=True)
+    else:
+        return pd.DataFrame()
+
+# Load the three experiment logs.
+df_normal = load_log("normal", "evaluation_results_*_normal.csv")
+df_create = load_log("create", "evaluation_results_*_create.csv")
+df_opt = load_log("opt", "evaluation_results_*_opt.csv")
+
+# Compute average timings per dataset (grouped by CSVFile, Size, Rows, Cols, and DataType)
+def aggregate_log(df):
+    # Convert timing fields to numeric type.
+    cols_to_numeric = ['ReadTime', 'WriteTime', 'PosmapReadTime',
+                       'StartupSeconds', 'ParsingSeconds', 'CompilationSeconds',
+                       'ExecutionSeconds', 'TotalSeconds']
+    for col in cols_to_numeric:
+        df[col] = pd.to_numeric(df[col], errors='coerce')
+    # Group including DataType so that it is preserved in the aggregation.
+    return df.groupby(['CSVFile', 'Size', 'Rows', 'Cols', 'DataType'])[cols_to_numeric].mean().reset_index()
+
+agg_normal = aggregate_log(df_normal)
+agg_create = aggregate_log(df_create)
+agg_opt = aggregate_log(df_opt)
+
+# Plot 1: Overall read time comparison for Normal, First (Create) and Second (Opt) reads.
+plt.figure(figsize=(10,6))
+agg_normal = agg_normal.sort_values("Size")
+agg_create = agg_create.sort_values("Size")
+agg_opt = agg_opt.sort_values("Size")
+
+plt.plot(agg_normal["Size"], agg_normal["ReadTime"], marker="o", label="Normal Read")
+plt.plot(agg_create["Size"], agg_create["ReadTime"], marker="s", label="First Read (Overall)")
+plt.plot(agg_opt["Size"], agg_opt["ReadTime"], marker="^", label="Second Read (Overall)")
+plt.xlabel("Dataset Size (Rows x Cols)")
+plt.ylabel("Overall Read Time (seconds)")
+plt.title("Overall Read Time vs Dataset Size")
+plt.xscale("log")  # Added: logarithmic scale on x-axis.
+plt.yscale("log")  # Added: logarithmic scale on y-axis.
+plt.legend()
+plt.grid(True, which="both", ls="--")
+plt.tight_layout()
+plt.savefig("/fig/overall_read_time.png")
+plt.close()
+
+# Plot 2: Three read comparison per dataset size for each data type.
+unique_types = agg_normal["DataType"].unique()
+for dt in unique_types:
+    sub_normal = agg_normal[agg_normal["DataType"] == dt].sort_values("Size")
+    sub_create = agg_create[agg_create["DataType"] == dt].sort_values("Size")
+    sub_opt = agg_opt[agg_opt["DataType"] == dt].sort_values("Size")
+
+    plt.figure(figsize=(10,6))
+    plt.plot(sub_normal["Size"], sub_normal["ReadTime"], marker="o", label="Normal Read")
+    plt.plot(sub_create["Size"], sub_create["ReadTime"], marker="s", label="First Read (Overall)")
+    plt.plot(sub_opt["Size"], sub_opt["ReadTime"], marker="^", label="Second Read (Overall)")
+    plt.xlabel("Dataset Size (Rows x Cols)")
+    plt.ylabel("Overall Read Time (seconds)")
+    plt.title(f"Overall Read Time vs Dataset Size for {dt}")
+    plt.xscale("log")  # Added: logarithmic scale on x-axis.
+    plt.yscale("log")  # Added: logarithmic scale on y-axis.
+    plt.legend()
+    plt.grid(True, which="both", ls="--")
+    plt.tight_layout()
+    plt.savefig(f"/fig/overall_read_time_{dt}.png")
+    plt.close()
+
+# Plot 3: Breakdown for First Read (Create) – Stacked bar: Overall Read Time and Posmap Write Time.
+if not agg_create.empty:
+    ind = np.arange(len(agg_create))
+    width = 0.6
+    fig, ax = plt.subplots(figsize=(10,6))
+    p1 = ax.bar(ind, agg_create["ReadTime"], width, label="Overall Read Time")
+    p2 = ax.bar(ind, agg_create["WriteTime"], width, bottom=agg_create["ReadTime"], label="Posmap Write Time")
+    ax.set_xticks(ind)
+    ax.set_xticklabels(agg_create["CSVFile"], rotation=45, ha="right")
+    ax.set_ylabel("Time (seconds)")
+    ax.set_title("First Read Breakdown (Create): Read vs. Write Posmap")
+    ax.legend()
+    plt.tight_layout()
+    plt.savefig("/fig/create_read_breakdown.png")
+    plt.close()
+
+# Plot 4: Breakdown for Second Read (Opt) – Stacked bar: Posmap Read Time and Overall Read Time.
+if not agg_opt.empty:
+    ind = np.arange(len(agg_opt))
+    width = 0.6
+    fig, ax = plt.subplots(figsize=(10,6))
+    p1 = ax.bar(ind, agg_opt["PosmapReadTime"], width, label="Posmap Read Time")
+    p2 = ax.bar(ind, agg_opt["ReadTime"], width, bottom=agg_opt["PosmapReadTime"], label="Overall Read Time")
+    ax.set_xticks(ind)
+    ax.set_xticklabels(agg_opt["CSVFile"], rotation=45, ha="right")
+    ax.set_ylabel("Time (seconds)")
+    ax.set_title("Second Read Breakdown (Opt): Posmap vs. Overall Read")
+    ax.legend()
+    plt.tight_layout()
+    plt.savefig("/fig/opt_read_breakdown.png")
+    plt.close()
+
+print("Charts generated and saved as PNG files.")