Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
72 commits
Select commit Hold shift + click to select a range
b3d829e
added generateFileMetaData
EricBenschneider Nov 28, 2024
30edf69
added tests for meta data generation
EricBenschneider Nov 29, 2024
b9f5913
updated read kernel and readMetaData for meta data generation
EricBenschneider Jan 11, 2025
312b30c
used matrix/frame flag for meta data generation
EricBenschneider Feb 4, 2025
030aa48
ran clang-format
EricBenschneider Feb 5, 2025
37f7d68
fixed runtime error when trying to save generated file
EricBenschneider Feb 5, 2025
698e3ca
1
EricBenschneider Feb 6, 2025
8abddf6
added positional map utility functions
EricBenschneider Feb 6, 2025
4bdbcf1
using positional map for frame reading
EricBenschneider Feb 6, 2025
7f29271
posMap working but indexes screwed
EricBenschneider Feb 6, 2025
26ef589
new tests
EricBenschneider Feb 6, 2025
7765485
update tests to not use newline
EricBenschneider Feb 6, 2025
e8530f5
wsl stuff
EricBenschneider Feb 6, 2025
61f6673
refactor old readcsvfile for frames
EricBenschneider Feb 7, 2025
8d71bcc
added daphne file util to csv
EricBenschneider Feb 9, 2025
45ee7c6
conv to unix file endings
EricBenschneider Feb 9, 2025
f066c11
added config for read optimizations
EricBenschneider Feb 9, 2025
63175fe
fixed flag usage
EricBenschneider Feb 9, 2025
51b7842
added config for read optimization
EricBenschneider Feb 9, 2025
744cf21
metadata test fix
EricBenschneider Feb 9, 2025
ace898a
added generateFileMetaData
EricBenschneider Nov 28, 2024
a11191d
added tests for meta data generation
EricBenschneider Nov 29, 2024
db777dc
updated read kernel and readMetaData for meta data generation
EricBenschneider Jan 11, 2025
bd94011
updated DaphneDSL to use label flag
EricBenschneider Feb 11, 2025
033ee14
improved generateMetaDataTest
EricBenschneider Feb 12, 2025
a607add
added systest for reading frame without meta data
EricBenschneider Feb 12, 2025
d633884
Revert "updated DaphneDSL to use label flag"
EricBenschneider Feb 12, 2025
5fdca5a
removed label flag
EricBenschneider Feb 12, 2025
04d5f8e
improved generateMetaDataTest
EricBenschneider Feb 12, 2025
fed8e8b
added sample rows for meta data generation
EricBenschneider Feb 13, 2025
6df7b5d
refactor generateMetaData
EricBenschneider Feb 13, 2025
7d7a1d7
fixed usage of singlevaluetype
EricBenschneider Feb 13, 2025
d0842c9
updated generateMetadata test
EricBenschneider Feb 13, 2025
2bee195
moved isMatrix flag
EricBenschneider Feb 13, 2025
a9e2b5e
fixed single value type in test
EricBenschneider Feb 13, 2025
a517fb1
added multi line support
EricBenschneider Feb 13, 2025
e2f508e
finished bin files and added tests
EricBenschneider Feb 9, 2025
69d9099
added support for dense matrix
EricBenschneider Feb 9, 2025
e7400f0
added support for csr matrix
EricBenschneider Feb 9, 2025
353d330
changes to matrix optimization
EricBenschneider Feb 11, 2025
32b54e6
added readopt commandline flag
EricBenschneider Feb 15, 2025
2f07403
used dbdf file ending
EricBenschneider Feb 15, 2025
3ff3cfe
finished frames opt
EricBenschneider Feb 15, 2025
c3ef683
added evaluation artifacts
EricBenschneider Feb 15, 2025
861a35e
positional map overhaul
EricBenschneider Feb 15, 2025
abb83c7
Revert "positional map overhaul"
EricBenschneider Feb 15, 2025
3379270
positional map update
EricBenschneider Feb 15, 2025
d2b12fd
posmap final
EricBenschneider Feb 15, 2025
4f86996
removed binary optimization and posmap for matrix
EricBenschneider Feb 16, 2025
644e699
removed binary optimization
EricBenschneider Feb 16, 2025
b9335ef
removed posmap matrix tests
EricBenschneider Feb 16, 2025
0801805
removed prints
EricBenschneider Feb 16, 2025
2f12c70
added evaluation artifacts
EricBenschneider Feb 16, 2025
f86b48a
used time measuring correctly
EricBenschneider Feb 17, 2025
c8d8282
fixed tests and rebase errors
EricBenschneider Feb 17, 2025
afb6a65
updated tests
EricBenschneider Feb 18, 2025
4d262d5
strings without multiline
EricBenschneider Feb 21, 2025
00708d0
added double quote encoding
EricBenschneider Feb 21, 2025
bbbc7ff
added fixedstr matrix optimization
EricBenschneider Feb 21, 2025
edecf1b
used one read for posmap reading
EricBenschneider Feb 21, 2025
d32dd75
optimized positional map
EricBenschneider Feb 21, 2025
6ae0e68
added positional map for string matrix
EricBenschneider Feb 22, 2025
434874d
added positional map for general matrix
EricBenschneider Feb 22, 2025
f03b0ac
last fixes
EricBenschneider Feb 22, 2025
029515d
test update
EricBenschneider Feb 22, 2025
70ba3a6
read matrix string opt
EricBenschneider Feb 23, 2025
6ea67b8
added experiment script
EricBenschneider Feb 23, 2025
ff8f53c
precomputed nextPos
EricBenschneider Feb 23, 2025
39d6911
ran first experiments and created charts
EricBenschneider Feb 23, 2025
d65c9fb
changed usage to single flag
EricBenschneider Feb 23, 2025
377a781
added documentation
EricBenschneider Feb 24, 2025
a16d3c4
changed flag default to false
EricBenschneider Feb 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions UserConfig.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"use_positional_map": false,
"matmul_vec_size_bits": 0,
"matmul_tile": false,
"matmul_use_fixed_tile_sizes": true,
Expand Down
41 changes: 37 additions & 4 deletions containers/entrypoint-interactive.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,29 @@
# limitations under the License.

/usr/sbin/sshd -f /etc/ssh/sshd_config


# Allow root login and password authentication
sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config
sed -i 's/#PasswordAuthentication yes/PasswordAuthentication yes/' /etc/ssh/sshd_config
sed -i 's/KbdInteractiveAuthentication no/KbdInteractiveAuthentication yes/' /etc/ssh/sshd_config
sed -i 's/ChallengeResponseAuthentication no/ChallengeResponseAuthentication yes/' /etc/ssh/sshd_config

# Allow port forwarding
sed -i 's/#AllowTcpForwarding yes/AllowTcpForwarding yes/' /etc/ssh/sshd_config
sed -i 's/#GatewayPorts no/GatewayPorts yes/' /etc/ssh/sshd_config

#enable logging
sed -i 's/#SyslogFacility AUTH/SyslogFacility AUTH/' /etc/ssh/sshd_config
sed -i 's/#LogLevel INFO/LogLevel INFO/' /etc/ssh/sshd_config

# Uncomment the Port 22 line
sed -i 's/#Port 22/Port 22/' /etc/ssh/sshd_config

echo "root:x" | chpasswd

/usr/sbin/sshd -D &

/usr/sbin/groupadd -g "$GID" dockerusers
/usr/sbin/useradd -c 'Docker Container User' -u $UID -g "$GID" -G sudo -m -s /bin/bash -d /home/"$USER" "$USER"
printf "${USER} ALL=(ALL:ALL) NOPASSWD:ALL" | sudo EDITOR="tee -a" visudo #>> /dev/null
Expand All @@ -23,8 +46,8 @@ chmod 700 /home/"$USER"/.ssh
touch /home/"$USER"/.sudo_as_admin_successful
# set a default password
SALT=$(date +%M%S)
PASS=Docker!"$SALT"
echo "${USER}":"$PASS" | chpasswd
PASS=x # Docker!"1234"
#echo "${USER}":"$PASS" | chpasswd
echo
echo For longer running containers consider running \'unminimize\' to update packages
echo and make the container more suitable for interactive use.
Expand All @@ -33,5 +56,15 @@ echo "Use "$USER" with password "$PASS" for SSH login"
echo "Docker Container IP address(es):"
awk '/32 host/ { print f } {f=$2}' <<< "$(</proc/net/fib_trie)" | grep -vE "127.0." | sort -u
# shellcheck disable=SC2068
#exec su "$USER" -c $@
sudo --preserve-env=PATH,LD_LIBRARY_PATH,TERM -u $USER $@
#/usr/sbin/sshd -D &
#exec "$@"

# Restart SSH service
service ssh restart

# Add rsync to PATH
export PATH=$PATH:/usr/bin
export PATH=$PATH:/usr/bin/rsync

exec su "$USER" -c $@
sudo --preserve-env=PATH,LD_LIBRARY_PATH,TERM -u $USER $@
19 changes: 11 additions & 8 deletions containers/run-docker-example.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#!/usr/bin/env bash
#run using:
# ./containers/run-docker-example.sh

# Copyright 2023 The DAPHNE Consortium
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance´ß´´with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
Expand All @@ -26,8 +28,8 @@ if [[ "$(arch)" == arm* ]] || [ $(arch) == 'aarch64' ]; then
fi

#on some installations docker can only be run with sudo
USE_SUDO=
#USE_SUDO=sudo
#USE_SUDO=
USE_SUDO=sudo

# run this script from the base path of your DAPHNE source tree
DAPHNE_ROOT=$PWD
Expand Down Expand Up @@ -56,19 +58,20 @@ DEBUG_FLAGS=""
# set bash as the default command if none is provided
command=$*
if [ "$#" -eq 0 ]; then
command=bash
command="bash"
fi

# non-interactive: launch with PWD mounted
#docker run $DEVICE_FLAGS --user=$UID:$GID --rm -w "$DAPHNE_ROOT" -v "$DAPHNE_ROOT:$DAPHNE_ROOT" \
#$USE_SUDO docker run $DEVICE_FLAGS --user=$UID:$GID -d --rm -w "$DAPHNE_ROOT" -v "$DAPHNE_ROOT:$DAPHNE_ROOT" \
# -e TERM=screen-256color -e PATH="$PATH" -e LD_LIBRARY_PATH="$LD_LIBRARY_PATH" -e USER=$USERNAME -e UID=$UID \
# "$DOCKER_IMAGE:$DOCKER_TAG" $@
# "$DOCKER_IMAGE:$DOCKER_TAG" $@

# for interactive use:
$USE_SUDO docker run $DEBUG_FLAGS $DEVICE_FLAGS -it --rm --hostname daphne-container -w $DAPHNE_ROOT_CONTAINER \
-v "$DAPHNE_ROOT:$DAPHNE_ROOT_CONTAINER" -e GID=$GID -e TERM=screen-256color -e PATH -e LD_LIBRARY_PATH \
-e USER=$USERNAME -e UID=$UID \
"$DOCKER_IMAGE:$DOCKER_TAG" $command
-e USER=$USERNAME -e UID=$UID -p 22222:22 \
--entrypoint /daphne/containers/entrypoint-interactive.sh \
"$DOCKER_IMAGE:$DOCKER_TAG" $command # "$DOCKER_IMAGE:$DOCKER_TAG" $command

# move this up to above the DOCKER_IMAGE line to override the entrypoint:
# --entrypoint /daphne/containers/entrypoint-interactive.sh
13 changes: 5 additions & 8 deletions daphne-opt/daphne-opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,14 @@ int main(int argc, char **argv) {
mlir::daphne::registerDaphnePasses();

mlir::DialectRegistry registry;
registry.insert<mlir::daphne::DaphneDialect, mlir::arith::ArithDialect,
mlir::func::FuncDialect, mlir::scf::SCFDialect,
mlir::LLVM::LLVMDialect, mlir::AffineDialect,
mlir::memref::MemRefDialect, mlir::linalg::LinalgDialect,
mlir::math::MathDialect>();
registry.insert<mlir::daphne::DaphneDialect, mlir::arith::ArithDialect, mlir::func::FuncDialect,
mlir::scf::SCFDialect, mlir::LLVM::LLVMDialect, mlir::AffineDialect, mlir::memref::MemRefDialect,
mlir::linalg::LinalgDialect, mlir::math::MathDialect>();
// Add the following to include *all* MLIR Core dialects, or selectively
// include what you need like above. You only need to register dialects that
// will be *parsed* by the tool, not the one generated
// registerAllDialects(registry);

return mlir::asMainReturnCode(mlir::MlirOptMain(
argc, argv, "Standalone DAPHNE optimizing compiler driver\n",
registry));
return mlir::asMainReturnCode(
mlir::MlirOptMain(argc, argv, "Standalone DAPHNE optimizing compiler driver\n", registry));
}
1 change: 1 addition & 0 deletions doc/SchedulingOptions.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ DAPHNE Options:
--libdir=<string> - The directory containing kernel libraries
--no-obj-ref-mgnt - Switch off garbage collection by not managing data objects' reference counters
--select-matrix-repr - Automatically choose physical matrix representations (e.g., dense/sparse)
--use-positional-map - Enable multiple read optimization for csv files using positional map
Generic Options:
--help - Display available options (--help-hidden for more)
--help-list - Display list of available options (--help-list-hidden for more)
Expand Down
2 changes: 1 addition & 1 deletion doc/docs-build-requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
mkdocs-material
mkdocs-material
152 changes: 152 additions & 0 deletions evaluation/build-charts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import glob
import re
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Folder where logs are stored.
results_dir = './results'

# This function extracts dimensions (number of rows and columns) from the filename.
# e.g. "frame_100000r_20c_MIXED.csv" -> (100000,20)
def extract_dims(filename):
m = re.search(r'(\d+)r_(\d+)c', filename)
if m:
rows = int(m.group(1))
cols = int(m.group(2))
return rows, cols
else:
return None, None

# This function extracts the overall data type from the filename.
# It considers the main type (matrix if the filename starts with "matrix_",
# otherwise frame) combined with a subtype (mixed, str, float, etc.).
def extract_data_type(filename):
base = os.path.basename(filename)
main_type = "matrix" if base.startswith("matrix_") else "frame"
m = re.search(r'(mixed|str|float|rep|strdiff|fixedstr|number)', base, re.IGNORECASE)
subtype = m.group(1).lower() if m else "unknown"
# Map fixedstr and strdiff to "str" for comparison purposes
if subtype in ["fixedstr", "strdiff"]:
subtype = "str"
return f"{main_type}_{subtype}"

# Load CSV logs for each experiment.
def load_log(experiment, pattern):
# We assume files are named like evaluation_results_*_{experiment}.csv in the results folder.
files = glob.glob(os.path.join(results_dir, f"evaluation_results_*_{experiment}.csv"))
dfs = []
for f in files:
# The CSV already has a header:
# CSVFile,Experiment,Trial,ReadTime,WriteTime,PosmapReadTime,StartupSeconds,ParsingSeconds,CompilationSeconds,ExecutionSeconds,TotalSeconds
df = pd.read_csv(f)
# Extract dimensions and add them as columns.
dims = df['CSVFile'].apply(lambda x: extract_dims(x))
df['Rows'] = dims.apply(lambda x: x[0] if x else np.nan)
df['Cols'] = dims.apply(lambda x: x[1] if x else np.nan)
# Compute a size measure (for example, total cells)
df['Size'] = df['Rows'] * df['Cols']
# Extract a combined data type (main type and subtype).
df['DataType'] = df['CSVFile'].apply(extract_data_type)
dfs.append(df)
if dfs:
return pd.concat(dfs, ignore_index=True)
else:
return pd.DataFrame()

# Load the three experiment logs.
df_normal = load_log("normal", "evaluation_results_*_normal.csv")
df_create = load_log("create", "evaluation_results_*_create.csv")
df_opt = load_log("opt", "evaluation_results_*_opt.csv")

# Compute average timings per dataset (grouped by CSVFile, Size, Rows, Cols, and DataType)
def aggregate_log(df):
# Convert timing fields to numeric type.
cols_to_numeric = ['ReadTime', 'WriteTime', 'PosmapReadTime',
'StartupSeconds', 'ParsingSeconds', 'CompilationSeconds',
'ExecutionSeconds', 'TotalSeconds']
for col in cols_to_numeric:
df[col] = pd.to_numeric(df[col], errors='coerce')
# Group including DataType so that it is preserved in the aggregation.
return df.groupby(['CSVFile', 'Size', 'Rows', 'Cols', 'DataType'])[cols_to_numeric].mean().reset_index()

agg_normal = aggregate_log(df_normal)
agg_create = aggregate_log(df_create)
agg_opt = aggregate_log(df_opt)

# Plot 1: Overall read time comparison for Normal, First (Create) and Second (Opt) reads.
plt.figure(figsize=(10,6))
agg_normal = agg_normal.sort_values("Size")
agg_create = agg_create.sort_values("Size")
agg_opt = agg_opt.sort_values("Size")

plt.plot(agg_normal["Size"], agg_normal["ReadTime"], marker="o", label="Normal Read")
plt.plot(agg_create["Size"], agg_create["ReadTime"], marker="s", label="First Read (Overall)")
plt.plot(agg_opt["Size"], agg_opt["ReadTime"], marker="^", label="Second Read (Overall)")
plt.xlabel("Dataset Size (Rows x Cols)")
plt.ylabel("Overall Read Time (seconds)")
plt.title("Overall Read Time vs Dataset Size")
plt.xscale("log") # Added: logarithmic scale on x-axis.
plt.yscale("log") # Added: logarithmic scale on y-axis.
plt.legend()
plt.grid(True, which="both", ls="--")
plt.tight_layout()
plt.savefig("/fig/overall_read_time.png")
plt.close()

# Plot 2: Three read comparison per dataset size for each data type.
unique_types = agg_normal["DataType"].unique()
for dt in unique_types:
sub_normal = agg_normal[agg_normal["DataType"] == dt].sort_values("Size")
sub_create = agg_create[agg_create["DataType"] == dt].sort_values("Size")
sub_opt = agg_opt[agg_opt["DataType"] == dt].sort_values("Size")

plt.figure(figsize=(10,6))
plt.plot(sub_normal["Size"], sub_normal["ReadTime"], marker="o", label="Normal Read")
plt.plot(sub_create["Size"], sub_create["ReadTime"], marker="s", label="First Read (Overall)")
plt.plot(sub_opt["Size"], sub_opt["ReadTime"], marker="^", label="Second Read (Overall)")
plt.xlabel("Dataset Size (Rows x Cols)")
plt.ylabel("Overall Read Time (seconds)")
plt.title(f"Overall Read Time vs Dataset Size for {dt}")
plt.xscale("log") # Added: logarithmic scale on x-axis.
plt.yscale("log") # Added: logarithmic scale on y-axis.
plt.legend()
plt.grid(True, which="both", ls="--")
plt.tight_layout()
plt.savefig(f"/fig/overall_read_time_{dt}.png")
plt.close()

# Plot 3: Breakdown for First Read (Create) – Stacked bar: Overall Read Time and Posmap Write Time.
if not agg_create.empty:
ind = np.arange(len(agg_create))
width = 0.6
fig, ax = plt.subplots(figsize=(10,6))
p1 = ax.bar(ind, agg_create["ReadTime"], width, label="Overall Read Time")
p2 = ax.bar(ind, agg_create["WriteTime"], width, bottom=agg_create["ReadTime"], label="Posmap Write Time")
ax.set_xticks(ind)
ax.set_xticklabels(agg_create["CSVFile"], rotation=45, ha="right")
ax.set_ylabel("Time (seconds)")
ax.set_title("First Read Breakdown (Create): Read vs. Write Posmap")
ax.legend()
plt.tight_layout()
plt.savefig("/fig/create_read_breakdown.png")
plt.close()

# Plot 4: Breakdown for Second Read (Opt) – Stacked bar: Posmap Read Time and Overall Read Time.
if not agg_opt.empty:
ind = np.arange(len(agg_opt))
width = 0.6
fig, ax = plt.subplots(figsize=(10,6))
p1 = ax.bar(ind, agg_opt["PosmapReadTime"], width, label="Posmap Read Time")
p2 = ax.bar(ind, agg_opt["ReadTime"], width, bottom=agg_opt["PosmapReadTime"], label="Overall Read Time")
ax.set_xticks(ind)
ax.set_xticklabels(agg_opt["CSVFile"], rotation=45, ha="right")
ax.set_ylabel("Time (seconds)")
ax.set_title("Second Read Breakdown (Opt): Posmap vs. Overall Read")
ax.legend()
plt.tight_layout()
plt.savefig("/fig/opt_read_breakdown.png")
plt.close()

print("Charts generated and saved as PNG files.")
Loading