Skip to content

Commit 703b26e

Browse files
committed
add profiler, parallel_executor back
1 parent 935387f commit 703b26e

File tree

14 files changed

+293
-308
lines changed

14 files changed

+293
-308
lines changed

paddle/fluid/framework/CMakeLists.txt

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,7 @@ function(windows_symbolic TARGET)
3131
endfunction()
3232

3333
add_subdirectory(ir)
34-
if (NOT WIN32)
3534
add_subdirectory(details)
36-
endif (NOT WIN32)
3735
# ddim lib
3836
proto_library(framework_proto SRCS framework.proto)
3937

@@ -118,13 +116,8 @@ cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
118116
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
119117
cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context)
120118

121-
if (NOT WIN32)
122119
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
123120
shape_inference data_transform lod_tensor profiler)
124-
else()
125-
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
126-
shape_inference data_transform lod_tensor)
127-
endif(NOT WIN32)
128121

129122
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context)
130123

@@ -179,12 +172,10 @@ else()
179172
cc_test(test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op)
180173
endif()
181174

182-
if (NOT WIN32)
183175
cc_library(parallel_executor SRCS parallel_executor.cc DEPS
184176
threaded_ssa_graph_executor scope_buffered_ssa_graph_executor
185177
graph build_strategy
186178
fast_threaded_ssa_graph_executor)
187-
endif() # NOT WIN32
188179

189180
cc_library(prune SRCS prune.cc DEPS framework_proto)
190181
cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)

paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
// limitations under the License.
1414

1515
#pragma once
16+
#include <ThreadPool.h>
1617
#include <string>
1718
#include <vector>
18-
#include "ThreadPool.h"
1919
#include "paddle/fluid/framework/blocking_queue.h"
2020
#include "paddle/fluid/framework/details/exception_holder.h"
2121
#include "paddle/fluid/framework/details/execution_strategy.h"

paddle/fluid/memory/allocation/cpu_allocator.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717

1818
#ifdef _WIN32
1919
#define posix_memalign_free _aligned_free
20-
#define posix_memalign(p, a, s) (((*(p)) = _aligned_malloc((s), (a))), *(p) ? 0 : errno)
20+
#define posix_memalign(p, a, s) \
21+
(((*(p)) = _aligned_malloc((s), (a))), *(p) ? 0 : errno)
2122
#endif
2223

2324
namespace paddle {

paddle/fluid/platform/CMakeLists.txt

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,23 @@
1-
if (NOT WIN32)
21
proto_library(profiler_proto SRCS profiler.proto DEPS framework_proto)
32
py_proto_compile(profiler_py_proto SRCS profiler.proto)
43

54
add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
65

76
add_dependencies(profiler_py_proto profiler_py_proto_init)
87

8+
if (NOT WIN32)
99
add_custom_command(TARGET profiler_py_proto POST_BUILD
1010
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler
1111
COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler
1212
COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler."
1313
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
14+
else(NOT WIN32)
15+
string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler/")
16+
add_custom_command(TARGET profiler_py_proto POST_BUILD
17+
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler
18+
COMMAND copy /Y *.py ${proto_dstpath}
19+
COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler."
20+
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
1421
endif(NOT WIN32)
1522

1623
if(WITH_GPU)
@@ -60,12 +67,9 @@ cc_test(init_test SRCS init_test.cc DEPS device_context)
6067
nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)
6168
nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context)
6269

63-
64-
if (NOT WIN32)
6570
cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS})
6671
cc_library(profiler SRCS profiler.cc DEPS device_context device_tracer)
6772
cc_test(profiler_test SRCS profiler_test.cc DEPS profiler)
68-
endif(NOT WIN32)
6973

7074
nv_test(float16_gpu_test SRCS float16_test.cu DEPS lod_tensor)
7175
cc_test(float16_test SRCS float16_test.cc DEPS lod_tensor)

paddle/fluid/platform/device_tracer.h

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,11 @@ See the License for the specific language governing permissions and
1313
limitations under the License. */
1414
#pragma once
1515

16-
#if !defined(_WIN32)
17-
#include <sys/time.h>
18-
#else
19-
#include <windows.h>
20-
#endif // !_WIN32
21-
22-
#include <time.h>
2316
#include <chrono> // NOLINT
2417
#include <string>
2518

2619
#include "paddle/fluid/platform/dynload/cupti.h"
20+
#include "paddle/fluid/platform/port.h"
2721
#include "paddle/fluid/platform/profiler.pb.h"
2822

2923
namespace paddle {
@@ -32,15 +26,11 @@ namespace platform {
3226
///////////////////////
3327
// WARN: Under Development. Don't depend on it yet.
3428
//////////////////////
35-
#if !defined(_WIN32)
3629
inline uint64_t PosixInNsec() {
3730
struct timeval tv;
3831
gettimeofday(&tv, nullptr);
3932
return 1000 * (static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec);
4033
}
41-
#else
42-
inline uint64_t PosixInNsec() { return static_cast<uint64_t>(0); }
43-
#endif // !_WIN32
4434

4535
// DeviceTracer performs the following tasks:
4636
// 1. Register cuda callbacks for various events: kernel, memcpy, etc.

paddle/fluid/platform/enforce.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ struct EOFException : public std::exception {
134134
#define LIKELY(condition) __builtin_expect(static_cast<bool>(condition), 1)
135135
#else
136136
// there is no equivalent intrinsics in msvc.
137-
#define LIKELY(condition) !(condition)
137+
#define LIKELY(condition) (condition)
138138
#endif
139139

140140
template <typename... Args>

paddle/fluid/platform/port.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <cstdio>
1818
#include <stdexcept>
1919

20+
#include <time.h>
2021
#include <memory>
2122
#include <string>
2223

@@ -27,6 +28,7 @@
2728
#include <dlfcn.h> // dladdr
2829
#include <execinfo.h> // backtrace
2930
#include <sys/stat.h>
31+
#include <sys/time.h>
3032
#include <algorithm> // std::accumulate
3133
#else
3234
#include <io.h> // _popen, _pclose
@@ -57,6 +59,25 @@ static void *dlopen(const char *filename, int flag) {
5759
return reinterpret_cast<void *>(hModule);
5860
}
5961

62+
static int gettimeofday(struct timeval *tp, void *tzp) {
63+
time_t clock;
64+
struct tm tm;
65+
SYSTEMTIME wtm;
66+
67+
GetLocalTime(&wtm);
68+
tm.tm_year = wtm.wYear - 1900;
69+
tm.tm_mon = wtm.wMonth - 1;
70+
tm.tm_mday = wtm.wDay;
71+
tm.tm_hour = wtm.wHour;
72+
tm.tm_min = wtm.wMinute;
73+
tm.tm_sec = wtm.wSecond;
74+
tm.tm_isdst = -1;
75+
clock = mktime(&tm);
76+
tp->tv_sec = clock;
77+
tp->tv_usec = wtm.wMilliseconds * 1000;
78+
79+
return (0);
80+
}
6081
#endif // !_WIN32
6182

6283
static void ExecShellCommand(const std::string &cmd, std::string *message) {

paddle/fluid/platform/profiler.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

1515
#include "paddle/fluid/platform/profiler.h"
16+
#include "paddle/fluid/platform/port.h"
1617

17-
#include <sys/time.h>
1818
#include <algorithm>
1919
#include <iomanip>
2020
#include <limits>
@@ -438,10 +438,10 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
438438
event_items[index].total_time += event_time;
439439
// min time
440440
event_items[index].min_time =
441-
std::min(event_time, event_items[index].min_time);
441+
(std::min)(event_time, event_items[index].min_time);
442442
// max time
443443
event_items[index].max_time =
444-
std::max(event_time, event_items[index].max_time);
444+
(std::max)(event_time, event_items[index].max_time);
445445
}
446446

447447
// remove the push marker from the list

paddle/fluid/platform/profiler.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ void PushEvent(const std::string& name, const DeviceContext* dev_ctx);
6969

7070
void PopEvent(const std::string& name, const DeviceContext* dev_ctx);
7171

72-
#if !defined(_WIN32)
7372
struct RecordEvent {
7473
// dev_ctx can be set to nullptr if device is cpu.
7574
RecordEvent(const std::string& name, const DeviceContext* dev_ctx);
@@ -106,15 +105,6 @@ struct RecordBlock {
106105
std::string name_;
107106
uint64_t start_ns_;
108107
};
109-
#else
110-
// windows do not support profiler temporarily.
111-
struct RecordEvent {
112-
RecordEvent(const std::string& name, const DeviceContext* dev_ctx) {}
113-
};
114-
struct RecordBlock {
115-
explicit RecordBlock(int block_id) {}
116-
};
117-
#endif
118108

119109
// Return the event list of all threads. Assumed the returned value calls
120110
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.

paddle/fluid/platform/stream_callback_manager.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,16 +45,15 @@ class StreamCallbackManager {
4545
inline void AddCallback(Callback &&callback) const {
4646
auto *stream_callback_context =
4747
new StreamCallbackContext(this, std::forward<Callback>(callback));
48-
PADDLE_ENFORCE(
4948
#if CUDA_VERSION >= 10000
50-
cudaLaunchHostFunc(stream_, StreamCallbackManager::StreamCallbackFunc,
51-
stream_callback_context)
49+
PADDLE_ENFORCE(cudaLaunchHostFunc(stream_,
50+
StreamCallbackManager::StreamCallbackFunc,
51+
stream_callback_context)); // NOLINT
5252
#else
53-
cudaStreamAddCallback(stream_,
54-
StreamCallbackManager::StreamCallbackFunc,
55-
stream_callback_context, 0)
53+
PADDLE_ENFORCE(cudaStreamAddCallback(
54+
stream_, StreamCallbackManager::StreamCallbackFunc,
55+
stream_callback_context, 0)); // NOLINT
5656
#endif
57-
); // NOLINT
5857
}
5958

6059
void Wait() const { thread_pool_.reset(new ThreadPool(1)); }

0 commit comments

Comments
 (0)