Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ option(KUN_AVX512 "Enable AVX512 instruction set" OFF)
option(KUN_AVX512DQ "Enable AVX512DQ instruction set" OFF)
option(KUN_AVX512VL "Enable AVX512VL instruction set" OFF)
option(KUN_NO_AVX2 "Disable AVX2 and FMA instruction set" OFF)
option(KUN_SANITIZER "Enable sanitizer" OFF)


if (CMAKE_CXX_COMPILER_ID MATCHES "(Clang|GNU|AppleClang)")
Expand All @@ -36,6 +37,9 @@ if (CMAKE_CXX_COMPILER_ID MATCHES "(Clang|GNU|AppleClang)")
if(KUN_AVX512VL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512vl")
endif()
if(KUN_SANITIZER)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -static-libasan")
endif()
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4251 /wd4200 /wd4305")
if (NOT KUN_NO_AVX2)
Expand Down
7 changes: 4 additions & 3 deletions KunQuant/passes/CodegenCpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,13 @@ def codegen_cpp(prefix: str, f: Function, input_name_to_idx: Dict[str, int], inp
return "", f'''static auto stage_{prefix}__{f.name} = {f.ops[1].__class__.__name__}Stocks<Mapper{f.ops[0].attrs["layout"]}<{elem_type}, {simd_lanes}>, Mapper{f.ops[2].attrs["layout"]}<{elem_type}, {simd_lanes}>>;'''

is_cross_sectional = _is_cross_sectional(f)
time_or_stock, ctx_or_stage = ("__time_idx", "RuntimeStage *stage") if is_cross_sectional else ("__stock_idx", "Context* __ctx")
time_or_stock = "__time_idx" if is_cross_sectional else "__stock_idx"
func_name = _generate_cross_sectional_func_name(is_cross_sectional, inputs, outputs) if is_cross_sectional else f.name
header = f'''{"static " if static else ""}void stage_{prefix}__{func_name}({ctx_or_stage}, size_t {time_or_stock}, size_t __total_time, size_t __start, size_t __length) '''
header = f'''{"static " if static else ""}void stage_{prefix}__{func_name}(RuntimeStage *stage, size_t {time_or_stock}, size_t __total_time, size_t __start, size_t __length) '''
if static:
decl = ""
else:
decl = f'''extern void stage_{prefix}__{func_name}({ctx_or_stage}, size_t {time_or_stock}, size_t __total_time, size_t __start, size_t __length);'''
decl = f'''extern void stage_{prefix}__{func_name}(RuntimeStage *stage, size_t {time_or_stock}, size_t __total_time, size_t __start, size_t __length);'''
if is_cross_sectional:
decl = f"{decl}\nstatic auto stage_{prefix}__{f.name} = stage_{prefix}__{func_name};"
if func_name in generated_cross_sectional_func:
Expand Down Expand Up @@ -138,6 +138,7 @@ def codegen_cpp(prefix: str, f: Function, input_name_to_idx: Dict[str, int], inp
}}''', decl

toplevel = _CppScope(None)
toplevel.scope.append(_CppSingleLine(toplevel, f"auto __ctx = stage->ctx;"))
buffer_type: Dict[OpBase, str] = dict()
ptrname = "" if elem_type == "float" else "D"
for inp, buf_kind in inputs:
Expand Down
28 changes: 8 additions & 20 deletions Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ lib = cfake.compileit([("alpha101", f, KunCompilerConfig(input_layout="TS", outp
modu = lib.getModule("alpha101")
```

We will explain the function `cfake.compileit` in [Customize.md](./Customize.md). Let's continue to see how to use the compiled `lib`.
We will explain the function `cfake.compileit` in [Customize.md](./doc/Customize.md). Let's continue to see how to use the compiled `lib`.

Load your stock data. In this example, load from local pandas files. We assume the open, close, high, low, volumn and amount data for different stocks are stored in different files.

Expand Down Expand Up @@ -231,7 +231,7 @@ Note that the executors are reusable. A multithread executor is actually a threa

## Customized factors

KunQuant is a tool for general expressions. You can further read [Customize.md](./Customize.md) for how you can compile your own customized factors. This document also provides infomation on
KunQuant is a tool for general expressions. You can further read [Customize.md](./doc/Customize.md) for how you can compile your own customized factors. This document also provides infomation on
* building and keeping the compilation result for later use
* Loading existing compiled factor library
* enabling AVX512
Expand Down Expand Up @@ -290,34 +290,22 @@ On x86-64 CPUs, AVX2-FMA is used by default in the built KunQuant core library.

## Streaming mode

KunQuant can be configured to generate factor libraries for streaming, when the data arrive one at a time. See [Stream.md](./Stream.md)
KunQuant can be configured to generate factor libraries for streaming, when the data arrive one at a time. See [Stream.md](./doc/Stream.md)

## Row-to-row correlation (for IC/IR calculation)
## Utility functions

```python
from KunQuant.runner import KunRunner as kr
data1 = ... # np.ndarray of shape [time*stocks]. For example, a factor's results
data2 = ... # np.ndarray of shape [time*stocks]. For example, a factor's results
valid_in = {"alpha1": data1, "alpha2": data2}
returns = ... # np.ndarray of shape [time*stocks]. For example, the rank of returns
valid_corr = {"alpha1": np.empty((time,), dtype="float32"), "alpha2": np.empty((time,), dtype="float32")}
kr.corrWith(executor, valid_in, returns, valid_corr, layout = "TS", rank_inputs = True)
# outputs in valid_corr
alpha1_ic = valid_corr["alpha1"].mean()
```

The parameter `rank_inputs=True` will first compute rank in the first input array (e.g. `valid_in` above) and compute the correlation with the second input (e.g. `returns` above). It will not compute the rank of the second input.
To compute row-to-row correlation (for IC/IR calculation) and aggregrating functions (like `pd.groupby(...)`), please see [Utility.md](./doc/Utility.md).

## Using C-style APIs

KunQuant provides C-style APIs to call the generated factor code in shared libraries. See [CAPI.md](./CAPI.md)
KunQuant provides C-style APIs to call the generated factor code in shared libraries. See [CAPI.md](./doc/CAPI.md)


## Operator definitions

See [Operators.md](./Operators.md)
See [Operators.md](./doc/Operators.md)

To add new operators, see [NewOperators.md](./NewOperators.md)
To add new operators, see [NewOperators.md](./doc/NewOperators.md)

## Testing and validation

Expand Down
122 changes: 122 additions & 0 deletions cpp/Kun/Aggregration.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@

#include <Kun/Context.hpp>
#include <Kun/LayoutMappers.hpp>
#include <Kun/Module.hpp>
#include <Kun/Ops.hpp>
#include <Kun/RunGraph.hpp>

namespace kun {
namespace ops {

template <typename T, int simdlen>
static void aggregration(RuntimeStage *stage, size_t stock_idx,
size_t __total_time, size_t __start, size_t __length) {
auto num_stock = stage->ctx->stock_count;
auto &buffers = stage->ctx->buffers;
auto &module_in_buffers = stage->stage->in_buffers;
auto &module_out_buffers = stage->stage->out_buffers;
auto &lablebuf = buffers[module_in_buffers[0]->id];
auto &inbuf_orig = buffers[module_in_buffers[1]->id];

auto &sumbuf_orig = buffers[module_out_buffers[AGGREGRATION_SUM]->id];
auto &minbuf_orig = buffers[module_out_buffers[AGGREGRATION_MIN]->id];
auto &maxbuf_orig = buffers[module_out_buffers[AGGREGRATION_MAX]->id];
auto &firstbuf_orig = buffers[module_out_buffers[AGGREGRATION_FIRST]->id];
auto &lastbuf_orig = buffers[module_out_buffers[AGGREGRATION_LAST]->id];
auto &countbuf_orig = buffers[module_out_buffers[AGGREGRATION_COUNT]->id];
auto &meanbuf_orig = buffers[module_out_buffers[AGGREGRATION_MEAN]->id];

T *labels = lablebuf.getPtr<T>() + __start;
InputTS<T, simdlen> inbuf{inbuf_orig.getPtr<T>(), stock_idx, num_stock,
__total_time, __start};

OutputTS<T, simdlen> sumbuf{sumbuf_orig.getPtr<T>(), stock_idx, num_stock,
__total_time, 0};
OutputTS<T, simdlen> minbuf{minbuf_orig.getPtr<T>(), stock_idx, num_stock,
__total_time, 0};
OutputTS<T, simdlen> maxbuf{maxbuf_orig.getPtr<T>(), stock_idx, num_stock,
__total_time, 0};
OutputTS<T, simdlen> firstbuf{firstbuf_orig.getPtr<T>(), stock_idx,
num_stock, __total_time, 0};
OutputTS<T, simdlen> lastbuf{lastbuf_orig.getPtr<T>(), stock_idx, num_stock,
__total_time, 0};
OutputTS<T, simdlen> countbuf{countbuf_orig.getPtr<T>(), stock_idx,
num_stock, __total_time, 0};
OutputTS<T, simdlen> meanbuf{meanbuf_orig.getPtr<T>(), stock_idx, num_stock,
__total_time, 0};

using SimdT = kun_simd::vec<T, simdlen>;
ReduceMin<T, simdlen> reduce_min;
ReduceMax<T, simdlen> reduce_max;
ReduceAdd<T, simdlen> reduce_add;
SimdT first = inbuf.step(0);
SimdT last;
SimdT count{0};

auto todo_count = num_stock - stock_idx * simdlen;
todo_count = todo_count > simdlen ? simdlen : todo_count;
auto mask = SimdT::make_mask(todo_count);
T last_label = labels[0];
size_t store_idx = 0;
for (size_t i = 0; i < __length; i++) {
auto label = labels[i];
auto cur = inbuf.step(i);
if (label != last_label) {
if (sumbuf.buf)
sumbuf.store(store_idx, reduce_add, mask);
if (minbuf.buf)
minbuf.store(store_idx, reduce_min, mask);
if (maxbuf.buf)
maxbuf.store(store_idx, reduce_max, mask);
if (firstbuf.buf)
firstbuf.store(store_idx, first, mask);
if (lastbuf.buf)
lastbuf.store(store_idx, last, mask);
if (countbuf.buf)
countbuf.store(store_idx, count, mask);
if (meanbuf.buf)
meanbuf.store(store_idx, reduce_add / count, mask);
store_idx++;
first = cur;
reduce_min = ReduceMin<T, simdlen>{};
reduce_max = ReduceMax<T, simdlen>{};
reduce_add = ReduceAdd<T, simdlen>{};
count = T{0};
}
count = count + T{1};
last = cur;
last_label = label;
reduce_max.step(cur, i);
reduce_min.step(cur, i);
reduce_add.step(cur, i);
}
if (sumbuf.buf)
sumbuf.store(store_idx, reduce_add, mask);
if (minbuf.buf)
minbuf.store(store_idx, reduce_min, mask);
if (maxbuf.buf)
maxbuf.store(store_idx, reduce_max, mask);
if (firstbuf.buf)
firstbuf.store(store_idx, first, mask);
if (lastbuf.buf)
lastbuf.store(store_idx, last, mask);
if (countbuf.buf)
countbuf.store(store_idx, count, mask);
if (meanbuf.buf)
meanbuf.store(store_idx, reduce_add / count, mask);
}

void aggregrationFloat(RuntimeStage *stage, size_t stock_idx,
size_t __total_time, size_t __start, size_t __length) {
aggregration<float, KUN_DEFAULT_FLOAT_SIMD_LEN>(
stage, stock_idx, __total_time, __start, __length);
}

void aggregrationDouble(RuntimeStage *stage, size_t stock_idx,
size_t __total_time, size_t __start, size_t __length) {
aggregration<double, KUN_DEFAULT_DOUBLE_SIMD_LEN>(
stage, stock_idx, __total_time, __start, __length);
}

} // namespace ops
} // namespace kun
14 changes: 12 additions & 2 deletions cpp/Kun/Base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#ifdef __cplusplus
namespace kun {
struct Context;
static constexpr size_t time_stride = 8;
constexpr size_t time_stride = 8;
} // namespace kun
#endif

Expand All @@ -25,4 +25,14 @@ static constexpr size_t time_stride = 8;
// g++ has an strange behavior, it needs T to be
// exported if we want to export func<T>
#define KUN_TEMPLATE_ARG KUN_API
#endif
#endif


#ifdef __AVX__
#define KUN_DEFAULT_FLOAT_SIMD_LEN 8
#define KUN_DEFAULT_DOUBLE_SIMD_LEN 4
#else
// neon
#define KUN_DEFAULT_FLOAT_SIMD_LEN 4
#define KUN_DEFAULT_DOUBLE_SIMD_LEN 2
#endif
8 changes: 4 additions & 4 deletions cpp/Kun/CorrWith.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ namespace ops {
RuntimeStage * stage, size_t time_idx, size_t __total_time, \
size_t __start, size_t __length);

DEF_INSTANCE(CorrWith, MapperSTs<float, 8>)
DEF_INSTANCE(CorrWith, MapperTS<float, 8>)
DEF_INSTANCE(RankCorrWith, MapperSTs<float, 8>)
DEF_INSTANCE(RankCorrWith, MapperTS<float, 8>)
DEF_INSTANCE(CorrWith, MapperSTsFloat)
DEF_INSTANCE(CorrWith, MapperTSFloat)
DEF_INSTANCE(RankCorrWith, MapperSTsFloat)
DEF_INSTANCE(RankCorrWith, MapperTSFloat)

} // namespace ops
} // namespace kun
12 changes: 6 additions & 6 deletions cpp/Kun/CorrWith.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
namespace kun {
namespace ops {
template <typename INPUT>
void KUN_TEMPLATE_EXPORT CorrWith(RuntimeStage *stage, size_t time_idx,
void CorrWith(RuntimeStage *stage, size_t time_idx,
size_t __total_time, size_t __start,
size_t __length) {
auto num_stocks = stage->ctx->stock_count;
Expand Down Expand Up @@ -54,7 +54,7 @@ void KUN_TEMPLATE_EXPORT CorrWith(RuntimeStage *stage, size_t time_idx,
}

template <typename INPUT>
void KUN_TEMPLATE_EXPORT RankCorrWith(RuntimeStage *stage, size_t time_idx,
void RankCorrWith(RuntimeStage *stage, size_t time_idx,
size_t __total_time, size_t __start,
size_t __length) {
auto num_stocks = stage->ctx->stock_count;
Expand Down Expand Up @@ -120,22 +120,22 @@ void KUN_TEMPLATE_EXPORT RankCorrWith(RuntimeStage *stage, size_t time_idx,
}
}

extern template void CorrWith<MapperSTs<float, 8>>(RuntimeStage *stage,
extern template void CorrWith<MapperSTsFloat>(RuntimeStage *stage,
size_t time_idx,
size_t __total_time,
size_t __start,
size_t __length);
extern template void CorrWith<MapperTS<float, 8>>(RuntimeStage *stage,
extern template void CorrWith<MapperTSFloat>(RuntimeStage *stage,
size_t time_idx,
size_t __total_time,
size_t __start,
size_t __length);
extern template void RankCorrWith<MapperSTs<float, 8>>(RuntimeStage *stage,
extern template void RankCorrWith<MapperSTsFloat>(RuntimeStage *stage,
size_t time_idx,
size_t __total_time,
size_t __start,
size_t __length);
extern template void RankCorrWith<MapperTS<float, 8>>(RuntimeStage *stage,
extern template void RankCorrWith<MapperTSFloat>(RuntimeStage *stage,
size_t time_idx,
size_t __total_time,
size_t __start,
Expand Down
7 changes: 7 additions & 0 deletions cpp/Kun/LayoutMappers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ struct KUN_TEMPLATE_ARG MapperSTREAM {
}
};

using MapperSTsFloat = MapperSTs<float, KUN_DEFAULT_FLOAT_SIMD_LEN>;
using MapperTSFloat = MapperTS<float, KUN_DEFAULT_FLOAT_SIMD_LEN>;
using MapperSTREAMFloat = MapperSTREAM<float, KUN_DEFAULT_FLOAT_SIMD_LEN>;
using MapperSTsDouble = MapperSTs<double, KUN_DEFAULT_DOUBLE_SIMD_LEN>;
using MapperTSDouble = MapperTS<double, KUN_DEFAULT_DOUBLE_SIMD_LEN>;
using MapperSTREAMDouble = MapperSTREAM<double, KUN_DEFAULT_DOUBLE_SIMD_LEN>;

namespace {
template <typename Mapper>
struct ExtractInputBuffer {
Expand Down
10 changes: 5 additions & 5 deletions cpp/Kun/Rank.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ namespace ops {
RuntimeStage * stage, size_t time_idx, size_t __total_time, \
size_t __start, size_t __length);

DEF_INSTANCE(MapperSTs<float, 8>, MapperSTs<float, 8>)
DEF_INSTANCE(MapperSTs<float, 8>, MapperTS<float, 8>)
DEF_INSTANCE(MapperTS<float, 8>, MapperTS<float, 8>)
DEF_INSTANCE(MapperTS<float, 8>, MapperSTs<float, 8>)
DEF_INSTANCE(MapperSTREAM<float, 8>, MapperSTREAM<float, 8>)
DEF_INSTANCE(MapperSTsFloat, MapperSTsFloat)
DEF_INSTANCE(MapperSTsFloat, MapperTSFloat)
DEF_INSTANCE(MapperTSFloat, MapperTSFloat)
DEF_INSTANCE(MapperTSFloat, MapperSTsFloat)
DEF_INSTANCE(MapperSTREAMFloat, MapperSTREAMFloat)

} // namespace ops
} // namespace kun
10 changes: 5 additions & 5 deletions cpp/Kun/Rank.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,19 @@ void KUN_TEMPLATE_EXPORT RankStocks(RuntimeStage *stage, size_t time_idx,
}
}

extern template void RankStocks<MapperSTs<float, 8>, MapperSTs<float, 8>>(
extern template void RankStocks<MapperSTsFloat, MapperSTsFloat>(
RuntimeStage *stage, size_t time_idx, size_t __total_time, size_t __start,
size_t __length);
extern template void RankStocks<MapperSTs<float, 8>, MapperTS<float, 8>>(
extern template void RankStocks<MapperSTsFloat, MapperTSFloat>(
RuntimeStage *stage, size_t time_idx, size_t __total_time, size_t __start,
size_t __length);
extern template void RankStocks<MapperTS<float, 8>, MapperTS<float, 8>>(
extern template void RankStocks<MapperTSFloat, MapperTSFloat>(
RuntimeStage *stage, size_t time_idx, size_t __total_time, size_t __start,
size_t __length);
extern template void RankStocks<MapperTS<float, 8>, MapperSTs<float, 8>>(
extern template void RankStocks<MapperTSFloat, MapperSTsFloat>(
RuntimeStage *stage, size_t time_idx, size_t __total_time, size_t __start,
size_t __length);
extern template void RankStocks<MapperSTREAM<float, 8>, MapperSTREAM<float, 8>>(
extern template void RankStocks<MapperSTREAMFloat, MapperSTREAMFloat>(
RuntimeStage *stage, size_t time_idx, size_t __total_time, size_t __start,
size_t __length);

Expand Down
Loading