Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
b3d829e
added generateFileMetaData
EricBenschneider Nov 28, 2024
30edf69
added tests for meta data generation
EricBenschneider Nov 29, 2024
b9f5913
updated read kernel and readMetaData for meta data generation
EricBenschneider Jan 11, 2025
312b30c
used matrix/frame flag for meta data generation
EricBenschneider Feb 4, 2025
030aa48
ran clang-format
EricBenschneider Feb 5, 2025
37f7d68
fixed runtime error when trying to save generated file
EricBenschneider Feb 5, 2025
b7be227
using positional map for frame reading
EricBenschneider Feb 6, 2025
dcd653e
added positional map utility functions
EricBenschneider Feb 6, 2025
2f82483
posMap working but indexes screwed
EricBenschneider Feb 6, 2025
697e105
new tests
EricBenschneider Feb 6, 2025
52a7d2b
update tests to not use newline
EricBenschneider Feb 6, 2025
b0f011c
wsl stuff
EricBenschneider Feb 6, 2025
8e01228
refactor old readcsvfile for frames
EricBenschneider Feb 7, 2025
e43ea36
added daphne file util to csv
EricBenschneider Feb 9, 2025
4febfe8
conv to unix file endings
EricBenschneider Feb 9, 2025
9468475
added config for read optimizations
EricBenschneider Feb 9, 2025
e258539
fixed flag usage
EricBenschneider Feb 9, 2025
68abc97
added config for read optimization
EricBenschneider Feb 9, 2025
e4979a2
metadata test fix
EricBenschneider Feb 9, 2025
4e72247
added generateFileMetaData
EricBenschneider Nov 28, 2024
e7c0751
added tests for meta data generation
EricBenschneider Nov 29, 2024
9bdbaf3
updated read kernel and readMetaData for meta data generation
EricBenschneider Jan 11, 2025
6e84c9e
updated DaphneDSL to use label flag
EricBenschneider Feb 11, 2025
ad8650f
improved generateMetaDataTest
EricBenschneider Feb 12, 2025
6b724e8
added systest for reading frame without meta data
EricBenschneider Feb 12, 2025
40c3ffe
Revert "updated DaphneDSL to use label flag"
EricBenschneider Feb 12, 2025
d3cbc9e
removed label flag
EricBenschneider Feb 12, 2025
29973e1
improved generateMetaDataTest
EricBenschneider Feb 12, 2025
0b22256
added sample rows for meta data generation
EricBenschneider Feb 13, 2025
9172035
refactor generateMetaData
EricBenschneider Feb 13, 2025
feabae1
fixed usage of singlevaluetype
EricBenschneider Feb 13, 2025
272e405
updated generateMetadata test
EricBenschneider Feb 13, 2025
ac29475
moved isMatrix flag
EricBenschneider Feb 13, 2025
cfb6703
fixed single value type in test
EricBenschneider Feb 13, 2025
9a05c05
added multi line support
EricBenschneider Feb 13, 2025
e918b3a
finished bin files and added tests
EricBenschneider Feb 9, 2025
56dfd15
added support for dense matrix
EricBenschneider Feb 9, 2025
ca7f2e4
added support for csr matrix
EricBenschneider Feb 9, 2025
a70701d
changes to matrix optimization
EricBenschneider Feb 11, 2025
549dbf3
added readopt commandline flag
EricBenschneider Feb 15, 2025
51455f6
used dbdf file ending
EricBenschneider Feb 15, 2025
29e7058
finished frames opt
EricBenschneider Feb 15, 2025
7f4785a
added evaluation artifacts
EricBenschneider Feb 15, 2025
b131054
positional map overhaul
EricBenschneider Feb 15, 2025
8b8cce8
Revert "positional map overhaul"
EricBenschneider Feb 15, 2025
fd0f031
positional map update
EricBenschneider Feb 15, 2025
ae8f4ac
removed positional map
EricBenschneider Feb 16, 2025
8c7d91d
removed line prints
EricBenschneider Feb 16, 2025
7961621
updated eval
EricBenschneider Feb 16, 2025
65b33cb
removed posmap
EricBenschneider Feb 16, 2025
fbcc087
eval code
EricBenschneider Feb 16, 2025
03590e9
automated evaluation result saving
EricBenschneider Feb 16, 2025
3abd2ba
added systests for reads using optimization
EricBenschneider Feb 17, 2025
3999735
fixed rebase errors
EricBenschneider Feb 17, 2025
2c0b679
commented prints
EricBenschneider Feb 17, 2025
bfdbfde
fixed tests and rebase errors
EricBenschneider Feb 17, 2025
ffa39bc
added experiment script
EricBenschneider Feb 24, 2025
7f6e37e
ran first experiments and created charts
EricBenschneider Feb 23, 2025
1beb863
used single flag for optimizations
EricBenschneider Feb 24, 2025
4821bcf
added prints for evaluation
EricBenschneider Feb 24, 2025
953a5e4
finished evaluation
EricBenschneider Feb 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions UserConfig.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"save_csv_as_bin": false,
"matmul_vec_size_bits": 0,
"matmul_tile": false,
"matmul_use_fixed_tile_sizes": true,
Expand Down
41 changes: 37 additions & 4 deletions containers/entrypoint-interactive.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,29 @@
# limitations under the License.

/usr/sbin/sshd -f /etc/ssh/sshd_config


# Allow root login and password authentication
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config
sed -i 's/KbdInteractiveAuthentication no/KbdInteractiveAuthentication yes/' /etc/ssh/sshd_config
sed -i 's/ChallengeResponseAuthentication no/ChallengeResponseAuthentication yes/' /etc/ssh/sshd_config

# Allow port forwarding
sed -i 's/#AllowTcpForwarding yes/AllowTcpForwarding yes/' /etc/ssh/sshd_config
sed -i 's/#GatewayPorts no/GatewayPorts yes/' /etc/ssh/sshd_config

#enable logging
sed -i 's/#SyslogFacility AUTH/SyslogFacility AUTH/' /etc/ssh/sshd_config
sed -i 's/#LogLevel INFO/LogLevel INFO/' /etc/ssh/sshd_config

# Uncomment the Port 22 line
sed -i 's/#Port 22/Port 22/' /etc/ssh/sshd_config

echo "root:x" | chpasswd

/usr/sbin/sshd -D &

/usr/sbin/groupadd -g "$GID" dockerusers
/usr/sbin/useradd -c 'Docker Container User' -u $UID -g "$GID" -G sudo -m -s /bin/bash -d /home/"$USER" "$USER"
printf "${USER} ALL=(ALL:ALL) NOPASSWD:ALL" | sudo EDITOR="tee -a" visudo #>> /dev/null
Expand All @@ -23,8 +46,8 @@ chmod 700 /home/"$USER"/.ssh
touch /home/"$USER"/.sudo_as_admin_successful
# set a default password
SALT=$(date +%M%S)
PASS=Docker!"$SALT"
echo "${USER}":"$PASS" | chpasswd
PASS=x # Docker!"1234"
#echo "${USER}":"$PASS" | chpasswd
echo
echo For longer running containers consider running \'unminimize\' to update packages
echo and make the container more suitable for interactive use.
Expand All @@ -33,5 +56,15 @@ echo "Use "$USER" with password "$PASS" for SSH login"
echo "Docker Container IP address(es):"
awk '/32 host/ { print f } {f=$2}' <<< "$(</proc/net/fib_trie)" | grep -vE "127.0." | sort -u
# shellcheck disable=SC2068
#exec su "$USER" -c $@
sudo --preserve-env=PATH,LD_LIBRARY_PATH,TERM -u $USER $@
#/usr/sbin/sshd -D &
#exec "$@"

# Restart SSH service
service ssh restart

# Add rsync to PATH
export PATH=$PATH:/usr/bin
export PATH=$PATH:/usr/bin/rsync

exec su "$USER" -c $@
sudo --preserve-env=PATH,LD_LIBRARY_PATH,TERM -u $USER $@
19 changes: 11 additions & 8 deletions containers/run-docker-example.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#!/usr/bin/env bash
#run using:
# ./containers/run-docker-example.sh

# Copyright 2023 The DAPHNE Consortium
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance´ß´´with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
Expand All @@ -26,8 +28,8 @@ if [[ "$(arch)" == arm* ]] || [ $(arch) == 'aarch64' ]; then
fi

#on some installations docker can only be run with sudo
USE_SUDO=
#USE_SUDO=sudo
#USE_SUDO=
USE_SUDO=sudo

# run this script from the base path of your DAPHNE source tree
DAPHNE_ROOT=$PWD
Expand Down Expand Up @@ -56,19 +58,20 @@ DEBUG_FLAGS=""
# set bash as the default command if none is provided
command=$*
if [ "$#" -eq 0 ]; then
command=bash
command="bash"
fi

# non-interactive: launch with PWD mounted
#docker run $DEVICE_FLAGS --user=$UID:$GID --rm -w "$DAPHNE_ROOT" -v "$DAPHNE_ROOT:$DAPHNE_ROOT" \
#$USE_SUDO docker run $DEVICE_FLAGS --user=$UID:$GID -d --rm -w "$DAPHNE_ROOT" -v "$DAPHNE_ROOT:$DAPHNE_ROOT" \
# -e TERM=screen-256color -e PATH="$PATH" -e LD_LIBRARY_PATH="$LD_LIBRARY_PATH" -e USER=$USERNAME -e UID=$UID \
# "$DOCKER_IMAGE:$DOCKER_TAG" $@
# "$DOCKER_IMAGE:$DOCKER_TAG" $@

# for interactive use:
$USE_SUDO docker run $DEBUG_FLAGS $DEVICE_FLAGS -it --rm --hostname daphne-container -w $DAPHNE_ROOT_CONTAINER \
-v "$DAPHNE_ROOT:$DAPHNE_ROOT_CONTAINER" -e GID=$GID -e TERM=screen-256color -e PATH -e LD_LIBRARY_PATH \
-e USER=$USERNAME -e UID=$UID \
"$DOCKER_IMAGE:$DOCKER_TAG" $command
-e USER=$USERNAME -e UID=$UID -p 22222:22 \
--entrypoint /daphne/containers/entrypoint-interactive.sh \
"$DOCKER_IMAGE:$DOCKER_TAG" $command # "$DOCKER_IMAGE:$DOCKER_TAG" $command

# move this up to above the DOCKER_IMAGE line to override the entrypoint:
# --entrypoint /daphne/containers/entrypoint-interactive.sh
13 changes: 5 additions & 8 deletions daphne-opt/daphne-opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,14 @@ int main(int argc, char **argv) {
mlir::daphne::registerDaphnePasses();

mlir::DialectRegistry registry;
registry.insert<mlir::daphne::DaphneDialect, mlir::arith::ArithDialect,
mlir::func::FuncDialect, mlir::scf::SCFDialect,
mlir::LLVM::LLVMDialect, mlir::AffineDialect,
mlir::memref::MemRefDialect, mlir::linalg::LinalgDialect,
mlir::math::MathDialect>();
registry.insert<mlir::daphne::DaphneDialect, mlir::arith::ArithDialect, mlir::func::FuncDialect,
mlir::scf::SCFDialect, mlir::LLVM::LLVMDialect, mlir::AffineDialect, mlir::memref::MemRefDialect,
mlir::linalg::LinalgDialect, mlir::math::MathDialect>();
// Add the following to include *all* MLIR Core dialects, or selectively
// include what you need like above. You only need to register dialects that
// will be *parsed* by the tool, not the one generated
// registerAllDialects(registry);

return mlir::asMainReturnCode(mlir::MlirOptMain(
argc, argv, "Standalone DAPHNE optimizing compiler driver\n",
registry));
return mlir::asMainReturnCode(
mlir::MlirOptMain(argc, argv, "Standalone DAPHNE optimizing compiler driver\n", registry));
}
2 changes: 1 addition & 1 deletion doc/docs-build-requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
mkdocs-material
mkdocs-material
137 changes: 137 additions & 0 deletions evaluation/build-charts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import glob
import re
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Folder where logs are stored.
results_dir = './results'

# This function extracts dimensions (number of rows and columns) from the filename.
# e.g. "frame_100000r_20c_MIXED.csv" -> (100000,20)
def extract_dims(filename):
m = re.search(r'(\d+)r_(\d+)c', filename)
if m:
rows = int(m.group(1))
cols = int(m.group(2))
return rows, cols
else:
return None, None

# This function extracts the overall data type from the filename.
# It considers the main type (matrix if the filename starts with "matrix_",
# otherwise frame) combined with a subtype (mixed, str, float, etc.).
def extract_data_type(filename):
base = os.path.basename(filename)
main_type = "matrix" if base.startswith("matrix_") else "frame"
m = re.search(r'(mixed|str|float|rep|strdiff|fixedstr|number)', base, re.IGNORECASE)
subtype = m.group(1).lower() if m else "unknown"
# Map fixedstr and strdiff to "str" for comparison purposes
if subtype in ["fixedstr", "strdiff"]:
subtype = "str"
return f"{main_type}_{subtype}"

# Load CSV logs for each experiment.
def load_log(experiment, pattern):
# We assume files are named like evaluation_results_*_{experiment}.csv in the results folder.
files = glob.glob(os.path.join(results_dir, f"evaluation_results_*_{experiment}.csv"))
dfs = []
for f in files:
# The CSV already has a header:
# CSVFile,Experiment,Trial,ReadTime,WriteTime,dbdfReadTime,StartupSeconds,ParsingSeconds,CompilationSeconds,ExecutionSeconds,TotalSeconds
df = pd.read_csv(f)
# Extract dimensions and add them as columns.
dims = df['CSVFile'].apply(lambda x: extract_dims(x))
df['Rows'] = dims.apply(lambda x: x[0] if x else np.nan)
df['Cols'] = dims.apply(lambda x: x[1] if x else np.nan)
# Compute a size measure (for example, total cells)
df['Size'] = df['Rows'] * df['Cols']
# Extract a combined data type (main type and subtype).
df['DataType'] = df['CSVFile'].apply(extract_data_type)
dfs.append(df)
if dfs:
return pd.concat(dfs, ignore_index=True)
else:
return pd.DataFrame()

# Load the three experiment logs.
df_normal = load_log("normal", "evaluation_results_*_normal.csv")
df_create = load_log("create", "evaluation_results_*_create.csv")
df_opt = load_log("opt", "evaluation_results_*_opt.csv")

# Compute average timings per dataset (grouped by CSVFile, Size, Rows, Cols, and DataType)
def aggregate_log(df):
# Convert timing fields to numeric type.
cols_to_numeric = ['ReadTime', 'WriteTime',
'StartupSeconds', 'ParsingSeconds', 'CompilationSeconds',
'ExecutionSeconds', 'TotalSeconds']
for col in cols_to_numeric:
df[col] = pd.to_numeric(df[col], errors='coerce')
# Group including DataType so that it is preserved in the aggregation.
return df.groupby(['CSVFile', 'Size', 'Rows', 'Cols', 'DataType'])[cols_to_numeric].mean().reset_index()

agg_normal = aggregate_log(df_normal)
agg_create = aggregate_log(df_create)
agg_opt = aggregate_log(df_opt)

# Plot 1: Overall read time comparison for Normal, First (Create) and Second (Opt) reads.
plt.figure(figsize=(10,6))
agg_normal = agg_normal.sort_values("Size")
agg_create = agg_create.sort_values("Size")
agg_opt = agg_opt.sort_values("Size")

plt.plot(agg_normal["Size"], agg_normal["ReadTime"], marker="o", label="Normal Read")
plt.plot(agg_create["Size"], agg_create["ReadTime"], marker="s", label="First Read (Overall)")
plt.plot(agg_opt["Size"], agg_opt["ReadTime"], marker="^", label="Second Read (Overall)")
plt.xlabel("Dataset Size (Rows x Cols)")
plt.ylabel("Overall Read Time (seconds)")
plt.title("Overall Read Time vs Dataset Size")
plt.xscale("log") # Added: logarithmic scale on x-axis.
plt.yscale("log") # Added: logarithmic scale on y-axis.
plt.legend()
plt.grid(True, which="both", ls="--")
plt.tight_layout()
plt.savefig("fig/overall_read_time.png")
plt.close()

# Plot 2: Three read comparison per dataset size for each data type.
unique_types = agg_normal["DataType"].unique()
for dt in unique_types:
sub_normal = agg_normal[agg_normal["DataType"] == dt].sort_values("Size")
sub_create = agg_create[agg_create["DataType"] == dt].sort_values("Size")
sub_opt = agg_opt[agg_opt["DataType"] == dt].sort_values("Size")

plt.figure(figsize=(10,6))
plt.plot(sub_normal["Size"], sub_normal["ReadTime"], marker="o", label="Normal Read")
plt.plot(sub_create["Size"], sub_create["ReadTime"], marker="s", label="First Read (Overall)")
plt.plot(sub_opt["Size"], sub_opt["ReadTime"], marker="^", label="Second Read (Overall)")
plt.xlabel("Dataset Size (Rows x Cols)")
plt.ylabel("Overall Read Time (seconds)")
plt.title(f"Overall Read Time vs Dataset Size for {dt}")
plt.xscale("log") # Added: logarithmic scale on x-axis.
plt.yscale("log") # Added: logarithmic scale on y-axis.
plt.legend()
plt.grid(True, which="both", ls="--")
plt.tight_layout()
plt.savefig(f"fig/overall_read_time_{dt}.png")
plt.close()

# Plot 3: Breakdown for First Read (Create) – Stacked bar: Overall Read Time and dbdf Write Time.
if not agg_create.empty:
ind = np.arange(len(agg_create))
width = 0.6
fig, ax = plt.subplots(figsize=(10,6))
p1 = ax.bar(ind, agg_create["ReadTime"], width, label="Overall Read Time")
p2 = ax.bar(ind, agg_create["WriteTime"], width, bottom=agg_create["ReadTime"], label="dbdf Write Time")
ax.set_xticks(ind)
ax.set_xticklabels(agg_create["CSVFile"], rotation=45, ha="right")
ax.set_ylabel("Time (seconds)")
ax.set_title("First Read Breakdown (Create): Read vs. Write dbdf")
ax.legend()
plt.tight_layout()
plt.savefig("fig/create_read_breakdown.png")
plt.close()


print("Charts generated and saved as PNG files.")
Loading