Skip to content
11 changes: 10 additions & 1 deletion src/api/daphnelib/DaphneLibResult.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,19 @@

struct DaphneLibResult {
// For matrices.
void *address;
int64_t rows;
int64_t cols;
int64_t vtc;
bool isSparse;

// For DenseMatrix
void *address;

// For Sparse Matrix
void *data;
void *row_related;
void *col_related;

// For frames.
int64_t *vtcs;
char **labels;
Expand Down
4 changes: 2 additions & 2 deletions src/api/daphnelib/daphnelib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ extern "C" DaphneLibResult getResult() { return daphneLibRes; }
* dir.
*/
extern "C" int daphne(const char *libDirPath, const char *scriptPath) {
const char *argv[] = {"daphne", "--libdir", libDirPath, scriptPath};
int argc = 4;
const char *argv[] = {"daphne", "--libdir", libDirPath, "--select-matrix-repr", scriptPath};
int argc = 5;

return mainInternal(argc, argv, &daphneLibRes);
}
297 changes: 251 additions & 46 deletions src/api/python/daphne/context/daphne_context.py

Large diffs are not rendered by default.

40 changes: 35 additions & 5 deletions src/api/python/daphne/operator/operation_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@

import numpy as np
import pandas as pd
try:
import scipy.sparse as sp
except ImportError as e:
sp = e
try:
import torch as torch
except ImportError as e:
Expand Down Expand Up @@ -188,11 +192,37 @@ def compute(self, type="shared memory", verbose=False, asTensorFlow=False, asPyT
self.clear_tmp()
elif self._output_type == OutputType.MATRIX and type=="shared memory":
daphneLibResult = DaphneLib.getResult()
result = np.ctypeslib.as_array(
ctypes.cast(daphneLibResult.address, ctypes.POINTER(self.getType(daphneLibResult.vtc))),
shape=[daphneLibResult.rows, daphneLibResult.cols]
)
self.clear_tmp()
if not daphneLibResult.isSparse:
# Dense Matrix
daphneLibResult = DaphneLib.getResult()
result = np.ctypeslib.as_array(
ctypes.cast(daphneLibResult.address, ctypes.POINTER(self.getType(daphneLibResult.vtc))),
shape=[daphneLibResult.rows, daphneLibResult.cols]
)
else:
# CSR Matrix
VT = self.getType(daphneLibResult.vtc)

# wrap each pointer into a numpy array
indptr = np.ctypeslib.as_array(
ctypes.cast(daphneLibResult.row_related, ctypes.POINTER(ctypes.c_size_t)),
shape=(daphneLibResult.rows + 1,)
)
nnz = int(indptr[-1] - indptr[0])


data = np.ctypeslib.as_array(
ctypes.cast(daphneLibResult.data, ctypes.POINTER(VT)),
shape=(nnz,)
)

indices = np.ctypeslib.as_array(
ctypes.cast(daphneLibResult.col_related, ctypes.POINTER(ctypes.c_size_t)),
shape=(nnz,)
)
# build scipy CSR
result = sp.csr_matrix((data, indices, indptr), shape=(daphneLibResult.rows, daphneLibResult.cols))
self.clear_tmp()
elif self._output_type == OutputType.MATRIX and type=="files":
# Ensure string data is handled correctly
arr = np.genfromtxt(result, delimiter=',', dtype=None, encoding='utf-8')
Expand Down
6 changes: 6 additions & 0 deletions src/api/python/daphne/utils/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,9 @@
F32 = 6
F64 = 7
STR = 8

# Sparse Matrix Representations codes.
CSR = 0
COO = 1
CSC = 2

16 changes: 13 additions & 3 deletions src/api/python/daphne/utils/daphnelib.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,28 @@
# Python representation of the struct DaphneLibResult.
class DaphneLibResult(ctypes.Structure):
_fields_ = [
# For matrices.
("address", ctypes.c_void_p),
# For matrices
("rows", ctypes.c_int64),
("cols", ctypes.c_int64),
("vtc", ctypes.c_int64),
("isSparse", ctypes.c_bool),

# For dense matrices
("address", ctypes.c_void_p),

# For sparse matrices
("data", ctypes.c_void_p),
("row_related",ctypes.c_void_p),
("col_related",ctypes.c_void_p),

# For frames.
("vtcs", ctypes.POINTER(ctypes.c_int64)),
("labels", ctypes.POINTER(ctypes.c_char_p)),
("columns", ctypes.POINTER(ctypes.c_void_p)),

# To pass error messages to Python code.
("error_message", ctypes.c_char_p)
]

DaphneLib = ctypes.CDLL(os.path.join(PROTOTYPE_PATH, DAPHNELIB_FILENAME))
DaphneLib.getResult.restype = DaphneLibResult
DaphneLib.getResult.restype = DaphneLibResult
6 changes: 5 additions & 1 deletion src/compiler/inference/SelectMatrixRepresentationsPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@ class SelectMatrixRepresentationsPass
const DaphneUserConfig &cfg;

std::function<WalkResult(Operation *)> walkOp = [&](Operation *op) {
if (returnsKnownProperties(op)) {
if (llvm::isa<daphne::ReceiveFromScipyOp>(op)) {
mlir::Value res = op->getResults()[0];
res.setType(
res.getType().dyn_cast<daphne::MatrixType>().withRepresentation(daphne::MatrixRepresentation::Sparse));
} else if (returnsKnownProperties(op)) {
const bool isScfOp = op->getDialect() == op->getContext()->getOrLoadDialect<scf::SCFDialect>();
// ----------------------------------------------------------------
// Handle all non-SCF operations
Expand Down
5 changes: 5 additions & 0 deletions src/ir/daphneir/DaphneOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -1732,6 +1732,11 @@ def Daphne_ReceiveFromNumpyOp: Daphne_Op<"receiveFromNumpy">{
let results = (outs MatrixOrU:$res);
}

def Daphne_ReceiveFromScipyOp: Daphne_Op<"receiveFromScipy">{
let arguments = (ins UI64: $valuesAddr, UI64: $rowRelatedIdx, UI64: $colRelatedIdx, SI64:$numRows, SI64:$numCols, SI64:$nnz, UI64:$format);
let results = (outs MatrixOrU:$res);
}

def Daphne_SaveDaphneLibResultOp : Daphne_Op<"saveDaphneLibResult"> {
let arguments = (ins MatrixOrFrame:$arg);
let results = (outs); // no results
Expand Down
40 changes: 40 additions & 0 deletions src/parser/daphnedsl/DaphneDSLBuiltins.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1194,6 +1194,46 @@ antlrcpp::Any DaphneDSLBuiltins::build(mlir::Location loc, const std::string &fu
return static_cast<mlir::Value>(
builder.create<ReceiveFromNumpyOp>(loc, utils.matrixOf(vt), address, rows, cols));
}
if (func == "receiveFromScipy") {
checkNumArgsExact(loc, func, numArgs, 8);

mlir::Value data_addr = utils.castUI64If(args[0]);
mlir::Value row_related_addr = utils.castUI64If(args[1]);
mlir::Value col_related_addr = utils.castUI64If(args[2]);
mlir::Value rows = args[3];
mlir::Value cols = args[4];
mlir::Value nnz = args[5];
mlir::Value format = utils.castUI64If(args[6]);
mlir::Value valueType = args[7];

int64_t valueTypeCode = CompilerUtils::constantOrThrow<int64_t>(
valueType, "the value type code in ReceiveFromScipyOp must be a constant");

// TODO Is there a utility for this mapping from value type code to MLIR
// type?
mlir::Type vt;
if (valueTypeCode == (int64_t)ValueTypeCode::F32)
vt = builder.getF32Type();
else if (valueTypeCode == (int64_t)ValueTypeCode::F64)
vt = builder.getF64Type();
else if (valueTypeCode == (int64_t)ValueTypeCode::SI8)
vt = builder.getIntegerType(8, true);
else if (valueTypeCode == (int64_t)ValueTypeCode::SI32)
vt = builder.getIntegerType(32, true);
else if (valueTypeCode == (int64_t)ValueTypeCode::SI64)
vt = builder.getIntegerType(64, true);
else if (valueTypeCode == (int64_t)ValueTypeCode::UI8)
vt = builder.getIntegerType(8, false);
else if (valueTypeCode == (int64_t)ValueTypeCode::UI32)
vt = builder.getIntegerType(32, false);
else if (valueTypeCode == (int64_t)ValueTypeCode::UI64)
vt = builder.getIntegerType(64, false);
else
throw ErrorHandler::compilerError(loc, "DSLBuiltins", "invalid value type code");

return static_cast<mlir::Value>(builder.create<ReceiveFromScipyOp>(
loc, utils.matrixOf(vt), data_addr, row_related_addr, col_related_addr, rows, cols, nnz, format));
}
if (func == "saveDaphneLibResult") {
checkNumArgsExact(loc, func, numArgs, 1);
mlir::Value arg = args[0];
Expand Down
1 change: 1 addition & 0 deletions src/runtime/local/datastructures/CSRMatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* limitations under the License.
*/

#include <runtime/local/datastructures/AllocationDescriptorHost.h>
#include <runtime/local/io/DaphneSerializer.h>

#include "CSRMatrix.h"
Expand Down
23 changes: 19 additions & 4 deletions src/runtime/local/datastructures/CSRMatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,12 @@ template <typename ValueType> class CSRMatrix : public Matrix<ValueType> {
rowOffsets = std::shared_ptr<size_t[]>(src->rowOffsets, src->rowOffsets.get() + rowLowerIncl);
}

CSRMatrix(size_t numRows, size_t numCols, size_t numNonZeros, std::shared_ptr<ValueType[]> &values,
std::shared_ptr<size_t[]> &colIdxs, std::shared_ptr<size_t[]> &rowOffsets)
: Matrix<ValueType>(numRows, numCols), numRowsAllocated(numRows), isRowAllocatedBefore(false),
maxNumNonZeros(numNonZeros), values(values), colIdxs(colIdxs), rowOffsets(rowOffsets), lastAppendedRowIdx(0) {
}

virtual ~CSRMatrix() {
// nothing to do
}
Expand Down Expand Up @@ -163,11 +169,20 @@ template <typename ValueType> class CSRMatrix : public Matrix<ValueType> {
}

void shrinkNumNonZeros(size_t numNonZeros) {
if (numNonZeros > getNumNonZeros())
size_t actualNumNonZeros = getNumNonZeros();
if (numNonZeros > actualNumNonZeros)
throw std::runtime_error("CSRMatrix (shrinkNumNonZeros): "
"numNonZeros can only be shrunk");
// TODO Here we could reduce the allocated size of the values and
// colIdxs arrays.
"cannot shrink below actual non-zero count");
// allocate new buffers
auto newValues = std::shared_ptr<ValueType[]>(new ValueType[numNonZeros], std::default_delete<ValueType[]>());
auto newColIdxs = std::shared_ptr<size_t[]>(new size_t[numNonZeros], std::default_delete<size_t[]>());
// copy first numNonZeros entries
std::memcpy(newValues.get(), values.get(), numNonZeros * sizeof(ValueType));
std::memcpy(newColIdxs.get(), colIdxs.get(), numNonZeros * sizeof(size_t));

values = std::move(newValues);
colIdxs = std::move(newColIdxs);
maxNumNonZeros = numNonZeros;
}

ValueType *getValues() { return values.get(); }
Expand Down
139 changes: 139 additions & 0 deletions src/runtime/local/kernels/ReceiveFromScipy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/*
* Copyright 2022 The DAPHNE Consortium
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef SRC_RUNTIME_LOCAL_KERNELS_RECEIVEFROMSCIPY_H
#define SRC_RUNTIME_LOCAL_KERNELS_RECEIVEFROMSCIPY_H

#include <runtime/local/context/DaphneContext.h>
#include <runtime/local/datastructures/CSRMatrix.h>
#include <runtime/local/datastructures/DataObjectFactory.h>

#include <memory>

// ****************************************************************************
// Struct for partial template specialization
// ****************************************************************************
template <class DTRes> struct ReceiveFromScipy {
static void apply(DTRes *&res, uint64_t valuesAddr, uint64_t rowRelatedIdx, uint64_t colRelatedIdx, int64_t numRows,
int64_t numCols, int64_t nnz, uint64_t format, DCTX(ctx)) = delete;
};

// ****************************************************************************
// Convenience function
// ****************************************************************************

template <class DTRes>
void receiveFromScipy(DTRes *&res, uint64_t valuesAddr, uint64_t rowRelatedIdx, uint64_t colRelatedIdx, int64_t numRows,
int64_t numCols, int64_t nnz, uint64_t format, DCTX(ctx)) {
ReceiveFromScipy<DTRes>::apply(res, valuesAddr, rowRelatedIdx, colRelatedIdx, numRows, numCols, nnz, format, ctx);
}

// ****************************************************************************
// (Partial) template specializations for different data/value types
// ****************************************************************************

// TODO Should we make this a central utility?
template <typename VT> struct NoOpDeleter {
void operator()(VT *p) {
// Don't delete p because the memory comes from numpy.
}
};

template <typename VT> struct ReceiveFromScipy<CSRMatrix<VT>> {
static void apply(CSRMatrix<VT> *&res, uint64_t valuesAddr, uint64_t rowRelatedIdx, uint64_t colRelatedIdx,
int64_t numRows, int64_t numCols, int64_t nnz, uint64_t format, DCTX(ctx)) {
std::shared_ptr<VT[]> values((VT *)valuesAddr, NoOpDeleter<VT>());
std::shared_ptr<size_t[]> colIdxs;
std::shared_ptr<size_t[]> rowOffsets;

if (format == 0) { // CSR
auto rowOffsetsRaw = (size_t *)rowRelatedIdx;
auto colIdxsRaw = (size_t *)colRelatedIdx;

rowOffsets = std::shared_ptr<size_t[]>(rowOffsetsRaw, NoOpDeleter<size_t>());
colIdxs = std::shared_ptr<size_t[]>(colIdxsRaw, NoOpDeleter<size_t>());
} else if (format == 1) { // COO
auto rowIdxsRaw = (size_t *)rowRelatedIdx;
auto colIdxsRaw = (size_t *)colRelatedIdx;

colIdxs = std::shared_ptr<size_t[]>(colIdxsRaw, NoOpDeleter<size_t>());

size_t *rowOffsetsRaw = new size_t[numRows + 1]();

for (int64_t i = 0; i < nnz; i++) {
size_t r = rowIdxsRaw[i];
if (r >= (size_t)numRows)
throw std::runtime_error("ReceiveFromScipy: failed to convert COO input to CSR.");
rowOffsetsRaw[r + 1]++;
}

for (int64_t i = 0; i < numRows; i++) {
rowOffsetsRaw[i + 1] += rowOffsetsRaw[i];
}

colIdxs = std::shared_ptr<size_t[]>(colIdxsRaw, NoOpDeleter<size_t>());
rowOffsets = std::shared_ptr<size_t[]>(rowOffsetsRaw, std::default_delete<size_t[]>());

} else if (format == 2) { // CSC
auto rowIdxsRaw = (size_t *)rowRelatedIdx;
auto colOffsetsRaw = (size_t *)colRelatedIdx;

// Build CSR row-offsets
size_t *rowOffsetsRaw = new size_t[numRows + 1]();
for (int64_t col = 0; col < numCols; col++) {
size_t start = colOffsetsRaw[col], end = colOffsetsRaw[col + 1];
for (size_t i = start; i < end; i++) {
size_t r = rowIdxsRaw[i];
if (r >= (size_t)numRows)
throw std::runtime_error("ReceiveFromScipy: failed to convert CSC input to CSR.");
rowOffsetsRaw[r + 1]++;
}
}
for (int64_t i = 0; i < numRows; i++)
rowOffsetsRaw[i + 1] += rowOffsetsRaw[i];

// Allocate CSR storage column-indices and values.
size_t *colIdxsRaw = new size_t[nnz];
VT *dataCsrRaw = new VT[nnz];

// Scatter into CSR, tracking per-row insertion
std::vector<size_t> nextInsert(numRows);
for (int64_t i = 0; i < numRows; i++)
nextInsert[i] = rowOffsetsRaw[i];

for (int64_t col = 0; col < numCols; col++) {
size_t start = colOffsetsRaw[col], end = colOffsetsRaw[col + 1];
for (size_t i = start; i < end; i++) {
size_t r = rowIdxsRaw[i];
size_t pos = nextInsert[r]++;
colIdxsRaw[pos] = col;
dataCsrRaw[pos] = values.get()[i];
}
}

// Wrap into shared_ptrs
colIdxs = std::shared_ptr<size_t[]>(colIdxsRaw, std::default_delete<size_t[]>());
rowOffsets = std::shared_ptr<size_t[]>(rowOffsetsRaw, std::default_delete<size_t[]>());
values = std::shared_ptr<VT[]>(dataCsrRaw, std::default_delete<VT[]>());
} else {
throw std::runtime_error("ReceiveFromScipy: Unknown sparse matrix representation.");
}

res = DataObjectFactory::create<CSRMatrix<VT>>(numRows, numCols, nnz, values, colIdxs, rowOffsets);
}
};

#endif // SRC_RUNTIME_LOCAL_KERNELS_RECEIVEFROMSCIPY_H
Loading
Loading