Skip to content

Commit d77866a

Browse files
committed
Merge branch 'master' of github.com:flexflow/flexflow-train into repo-refactor-fix-kernels-tests
2 parents d7d59a3 + 5e7f207 commit d77866a

32 files changed

+1705
-201
lines changed

.flake/pkgs/ffdb/default.nix

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{ lib
2+
, stdenv
3+
, makeWrapper
4+
, gdb
5+
, python3
6+
, proj
7+
}:
8+
9+
stdenv.mkDerivation rec {
10+
pname = "ffdb";
11+
version = "0.1";
12+
13+
pythonPath = with python3.pkgs; makePythonPath [
14+
proj
15+
];
16+
17+
dontBuild = true;
18+
19+
nativeBuildInputs = [ makeWrapper ];
20+
21+
src = ./.;
22+
23+
installPhase = ''
24+
mkdir -p $out/share/ffdb
25+
cp ffdb.py $out/share/ffdb
26+
makeWrapper ${gdb}/bin/gdb $out/bin/gdb \
27+
--add-flags "-q -x $out/share/ffdb/ffdb.py" \
28+
--set NIX_PYTHONPATH ${pythonPath} \
29+
--prefix PATH : ${lib.makeBinPath [
30+
python3
31+
]}
32+
cp $out/bin/gdb $out/bin/ffdb
33+
'';
34+
35+
nativeCheckInputs = [
36+
gdb
37+
python3
38+
proj
39+
];
40+
}

.flake/pkgs/ffdb/ffdb.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from proj.config_file import get_config_root
2+
from pathlib import Path
3+
import gdb
4+
5+
gdb.execute(f'directory {get_config_root(Path.cwd())}')
6+
gdb.prompt_hook = lambda x: '(ffdb) '
7+
gdb.execute('set history save on')

.github/runs-on.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ images:
88
runners:
99
gpu-nvidia:
1010
family: ["g4dn.xlarge"]
11-
image: dlami-x64
11+
image: dlami-x64

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# gdb history
2+
.gdb_history
3+
14
# dtgen files
25
*.dtg.cc
36
*.dtg.h

README.md

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,21 @@
1-
# FlexFlow
2-
![build](https://github.com/flexflow/flexflow/workflows/build/badge.svg?branch=master) ![gpu tests](https://github.com/flexflow/flexflow/workflows/gpu-ci/badge.svg?branch=master) ![multinode gpu tests](https://github.com/flexflow/flexflow/workflows/multinode-test/badge.svg?branch=master) ![docker](https://github.com/flexflow/flexflow/workflows/docker-build/badge.svg?branch=master) ![pip](https://github.com/flexflow/flexflow/workflows/pip-install/badge.svg?branch=master) ![shell-check](https://github.com/flexflow/flexflow/workflows/Shell%20Check/badge.svg?branch=master) ![clang-format](https://github.com/flexflow/flexflow/workflows/clang-format%20Check/badge.svg?branch=master) [![Documentation Status](https://readthedocs.org/projects/flexflow/badge/?version=latest)](https://flexflow.readthedocs.io/en/latest/?badge=latest)
1+
# flexflow-train
2+
[![clang-format Check](https://github.com/flexflow/flexflow-train/actions/workflows/clang-format-check.yml/badge.svg?branch=master)](https://github.com/flexflow/flexflow-train/actions/workflows/clang-format-check.yml)
3+
[![per-lib-checks](https://github.com/flexflow/flexflow-train/actions/workflows/per-lib-check.yml/badge.svg)](https://github.com/flexflow/flexflow-train/actions/workflows/per-lib-check.yml)
4+
[![shell-check](https://github.com/flexflow/flexflow-train/actions/workflows/shell-check.yml/badge.svg)](https://github.com/flexflow/flexflow-train/actions/workflows/shell-check.yml)
5+
[![Documentation Status](https://readthedocs.org/projects/flexflow/badge/?version=latest)](https://flexflow.readthedocs.io/en/latest/?badge=latest)
36

4-
FlexFlow is a deep learning framework that accelerates distributed DNN training by automatically searching for efficient parallelization strategies. FlexFlow provides a drop-in replacement for PyTorch and TensorFlow Keras. Running existing PyTorch and Keras programs in FlexFlow only requires [a few lines of changes to the program](https://flexflow.ai/keras).
7+
> [!WARNING]
8+
> The FlexFlow repository has been split into separate [flexflow-train](https://github.com/flexflow/flexflow-train) and [flexflow-serve](https://github.com/flexflow/flexflow-serve) repositories.
9+
> You are currently viewing [flexflow-train](https://github.com/flexflow/flexflow-train).
10+
> For anything inference/serving-related, go to [flexflow-serve](https://github.com/flexflow/flexflow-serve).
511
12+
FlexFlow is a deep learning framework that accelerates distributed DNN training by automatically searching for efficient parallelization strategies.
13+
14+
<!--
15+
FlexFlow provides a drop-in replacement for PyTorch and TensorFlow Keras. Running existing PyTorch and Keras programs in FlexFlow only requires [a few lines of changes to the program](https://flexflow.ai/keras).
16+
-->
17+
18+
<!--
619
## Install FlexFlow
720
To install FlexFlow from source code, please read the [instructions](INSTALL.md). If you would like to quickly try FlexFlow, we also provide pre-built Docker packages ([flexflow-cuda](https://github.com/flexflow/FlexFlow/pkgs/container/flexflow-cuda) with a CUDA backend, [flexflow-hip_rocm](https://github.com/flexflow/FlexFlow/pkgs/container/flexflow-hip_rocm) with a HIP-ROCM backend) with all dependencies pre-installed (N.B.: currently, the CUDA pre-built containers are only fully compatible with host machines that have CUDA 11.7 installed), together with [Dockerfiles](./docker) if you wish to build the containers manually. You can also use `conda` to install the FlexFlow Python package (coming soon).
821
@@ -67,10 +80,11 @@ Performance auto-tuning flags:
6780
* `--enable-parameter-parallel`: allow FlexFlow to explore parameter parallelism for performance auto-tuning. (By default FlexFlow only considers data and model parallelism.)
6881
* `--enable-attribute-parallel`: allow FlexFlow to explore attribute parallelism for performance auto-tuning. (By default FlexFlow only considers data and model parallelism.)
6982
For performance tuning related flags: see [performance autotuning](https://flexflow.ai/search).
83+
-->
7084

7185
## Contributing
7286

73-
Please let us know if you encounter any bugs or have any suggestions by [submitting an issue](https://github.com/flexflow/flexflow/issues).
87+
Please let us know if you encounter any bugs or have any suggestions by [submitting an issue](https://github.com/flexflow/flexflow-train/issues).
7488

7589
We welcome all contributions to FlexFlow from bug fixes to new features and extensions.
7690

cmake/flexflow-utils.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ function(ff_set_cxx_properties target)
3939
CXX_EXTENSIONS NO
4040
)
4141
target_compile_options(${target}
42-
PRIVATE $<$<COMPILE_LANGUAGE:CXX>:> # add C++ compile flags here
42+
PRIVATE $<$<COMPILE_LANGUAGE:CXX>:> "-ffile-prefix-map=${CMAKE_SOURCE_DIR}=." # add C++ compile flags here
4343
)
4444
endfunction()
4545

flake.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

flake.nix

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,13 @@
4141
mkShell = pkgs.mkShell.override {
4242
stdenv = pkgs.cudaPackages.backendStdenv;
4343
};
44+
45+
proj = proj-repo.packages.${system}.proj;
4446
in
4547
{
4648
packages = {
4749
legion = pkgs.callPackage ./.flake/pkgs/legion.nix { };
50+
ffdb = pkgs.callPackage ./.flake/pkgs/ffdb { inherit proj; };
4851
hpp2plantuml = pkgs.python3Packages.callPackage ./.flake/pkgs/hpp2plantuml.nix { };
4952
rapidcheckFull = pkgs.symlinkJoin {
5053
name = "rapidcheckFull";
@@ -138,7 +141,6 @@
138141
gh-markdown-preview
139142
shellcheck
140143
plantuml
141-
gdb
142144
ruff
143145
compdb
144146
jq
@@ -158,6 +160,9 @@
158160
black
159161
toml
160162
])
163+
(with self.packages.${system}; [
164+
ffdb
165+
])
161166
];
162167
};
163168
};

lib/compiler/include/compiler/cost_estimator/cost_estimator.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define _FLEXFLOW_LIB_COMPILER_INCLUDE_COMPILER_COST_ESTIMATOR_COST_ESTIMATOR_H
33

44
#include "compiler/cost_estimator/op_cost_estimate_key.dtg.h"
5+
#include "compiler/cost_estimator/op_cost_metrics.dtg.h"
56
#include "compiler/cost_estimator/tensor_set_movement.dtg.h"
67
#include "op-attrs/parallel_tensor_shape.dtg.h"
78
#include "op-attrs/pcg_operator_attrs.dtg.h"
@@ -11,7 +12,7 @@
1112
namespace FlexFlow {
1213

1314
struct ICostEstimator {
14-
virtual float estimate_cost(OpCostEstimateKey const &) const = 0;
15+
virtual OpCostMetrics estimate_cost(OpCostEstimateKey const &) const = 0;
1516
virtual float estimate_cost(TensorSetMovement const &) const = 0;
1617

1718
ICostEstimator() = default;
@@ -23,7 +24,7 @@ struct ICostEstimator {
2324
CHECK_RC_COPY_VIRTUAL_COMPLIANT(ICostEstimator);
2425

2526
struct CostEstimator {
26-
float estimate_cost(OpCostEstimateKey const &k) const;
27+
OpCostMetrics estimate_cost(OpCostEstimateKey const &) const;
2728
float estimate_cost(TensorSetMovement const &m) const;
2829

2930
template <typename T, typename... Args>
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
namespace = "FlexFlow"
2+
name = "OpCostMetrics"
3+
features = [
4+
"eq",
5+
"fmt",
6+
"hash",
7+
]
8+
9+
includes = [
10+
]
11+
12+
[[fields]]
13+
name = "runtime"
14+
type = "float"
15+
16+
[[fields]]
17+
name = "memory"
18+
type = "size_t"

0 commit comments

Comments
 (0)