Skip to content

Commit 41bd1f9

Browse files
committed
fix gpu test, clean code and add cmake
1 parent a5feb77 commit 41bd1f9

File tree

5 files changed

+109
-212
lines changed

5 files changed

+109
-212
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF)
5656
option(GLIDE_INSTALL "Download and install go dependencies " ON)
5757
option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
5858
option(USE_EIGEN_FOR_BLAS "Use matrix multiplication in Eigen" OFF)
59+
option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF)
5960

6061
# CMAKE_BUILD_TYPE
6162
if(NOT CMAKE_BUILD_TYPE)

cmake/configure.cmake

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ if(WITH_DOUBLE)
2424
add_definitions(-DPADDLE_TYPE_DOUBLE)
2525
endif(WITH_DOUBLE)
2626

27+
if(WITH_ARM_FP16)
28+
add_definitions(-DPADDLE_ARM_FP16)
29+
add_definitions("-march=armv8.2-a+fp16+simd")
30+
endif(WITH_ARM_FP16)
31+
2732
if(WITH_TESTING)
2833
add_definitions(-DPADDLE_WITH_TESTING)
2934
endif(WITH_TESTING)

paddle/math/float16.h

Lines changed: 61 additions & 156 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ limitations under the License. */
1414

1515
#pragma once
1616

17-
#include <cstdint>
17+
#include <stdint.h>
1818

1919
#ifdef PADDLE_WITH_CUDA
2020
#include <cuda.h>
@@ -71,6 +71,7 @@ struct PADDLE_ALIGN(2) float16 {
7171
public:
7272
uint16_t x;
7373

74+
// Constructors
7475
HOSTDEVICE inline float16() : x(0) {}
7576

7677
HOSTDEVICE inline float16(const float16& h) : x(h.x) {}
@@ -89,8 +90,7 @@ struct PADDLE_ALIGN(2) float16 {
8990

9091
#ifdef PADDLE_WITH_NATIVE_FP16
9192
// __fp16 is a native half precision data type for arm cpu,
92-
// float16_t is an alias for __fp16 in arm_fp16.h,
93-
// which is included in arm_neon.h.
93+
// float16_t is an alias for __fp16
9494
HOSTDEVICE inline explicit float16(const float16_t& h) {
9595
x = *reinterpret_cast<const uint16_t*>(&h);
9696
}
@@ -141,6 +141,7 @@ struct PADDLE_ALIGN(2) float16 {
141141
return *this;
142142
}
143143

144+
// Assignment operators
144145
#ifdef PADDLE_CUDA_FP16
145146
HOSTDEVICE inline float16& operator=(const half& rhs) {
146147
#if CUDA_VERSION >= 9000
@@ -219,6 +220,7 @@ struct PADDLE_ALIGN(2) float16 {
219220
return *this;
220221
}
221222

223+
// Conversion opertors
222224
#ifdef PADDLE_CUDA_FP16
223225
HOSTDEVICE inline explicit operator half() const {
224226
#if CUDA_VERSION >= 9000
@@ -353,27 +355,54 @@ struct PADDLE_ALIGN(2) float16 {
353355
// CUDA 7.5 and 8.0 do not. The arithmetic operators defined here are
354356
// for users to write similar CUDA code in CUDA 7.5 and 8.0 as in
355357
// CUDA 9.0 regarding the half data type.
356-
#if defined(PADDLE_CUDA_FP16) && defined(__CUDA_ARCH__) && \
357-
__CUDA_ARCH__ >= 530 && CUDA_VERSION < 9000
358+
#if defined(PADDLE_CUDA_FP16) && CUDA_VERSION < 9000
359+
358360
DEVICE inline half operator+(const half& a, const half& b) {
361+
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
359362
return __hadd(a, b);
363+
#else
364+
float res = float(float16(a)) + float(float16(b));
365+
return half(float16(res));
366+
#endif
360367
}
361368

362369
DEVICE inline half operator-(const half& a, const half& b) {
370+
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
363371
return __hsub(a, b);
372+
#else
373+
float res = float(float16(a)) - float(float16(b));
374+
return half(float16(res));
375+
#endif
364376
}
365377

366378
DEVICE inline half operator*(const half& a, const half& b) {
379+
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
367380
return __hmul(a, b);
381+
#else
382+
float res = float(float16(a)) * float(float16(b));
383+
return half(float16(res));
384+
#endif
368385
}
369386

370387
DEVICE inline half operator/(const half& a, const half& b) {
388+
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300
371389
float num = __half2float(a);
372390
float denom = __half2float(b);
373391
return __float2half(num / denom);
392+
#else
393+
float res = float(float16(a)) / float(float16(b));
394+
return half(float16(res));
395+
#endif
374396
}
375397

376-
DEVICE inline half operator-(const half& a) { return __hneg(a); }
398+
DEVICE inline half operator-(const half& a) {
399+
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
400+
return __hneg(a);
401+
#else
402+
float res = -float(float16(a));
403+
return half(float16(res));
404+
#endif
405+
}
377406

378407
DEVICE inline half& operator+=(half& a, const half& b) {
379408
a = a + b;
@@ -396,99 +425,57 @@ DEVICE inline half& operator/=(half& a, const half& b) {
396425
}
397426

398427
DEVICE inline bool operator==(const half& a, const half& b) {
428+
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
399429
return __heq(a, b);
430+
#else
431+
return float(float16(a)) == float(float16(b));
432+
#endif
400433
}
401434

402435
DEVICE inline bool operator!=(const half& a, const half& b) {
436+
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
403437
return __hne(a, b);
438+
#else
439+
return float(float16(a)) != float(float16(b));
440+
#endif
404441
}
405442

406443
DEVICE inline bool operator<(const half& a, const half& b) {
444+
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
407445
return __hlt(a, b);
446+
#else
447+
return float(float16(a)) < float(float16(b));
448+
#endif
408449
}
409450

410451
DEVICE inline bool operator<=(const half& a, const half& b) {
452+
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
411453
return __hle(a, b);
454+
#else
455+
return float(float16(a)) <= float(float16(b));
456+
#endif
412457
}
413458

414459
DEVICE inline bool operator>(const half& a, const half& b) {
460+
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
415461
return __hgt(a, b);
462+
#else
463+
return float(float16(a)) > float(float16(b));
464+
#endif
416465
}
417466

418467
DEVICE inline bool operator>=(const half& a, const half& b) {
468+
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
419469
return __hge(a, b);
470+
#else
471+
return float(float16(a)) >= float(float16(b));
472+
#endif
420473
}
421474

422-
/*
423-
DEVICE inline float16 operator+(const float16& a, const float16& b) {
424-
return float16(__hadd(half(a), half(b)));
425-
}
426-
427-
DEVICE inline float16 operator-(const float16& a, const float16& b) {
428-
return float16(__hsub(half(a), half(b)));
429-
}
430-
431-
DEVICE inline float16 operator*(const float16& a, const float16& b) {
432-
return float16(__hmul(half(a), half(b)));
433-
}
434-
435-
DEVICE inline float16 operator/(const float16& a, const float16& b) {
436-
float num = __half2float(half(a));
437-
float denom = __half2float(half(b));
438-
return float16(num / denom);
439-
}
440-
441-
DEVICE inline float16 operator-(const float16& a) {
442-
return float16(__hneg(half(a)));
443-
}
444-
445-
DEVICE inline float16& operator+=(float16& a, const float16& b) {
446-
a = a + b;
447-
return a;
448-
}
449-
450-
DEVICE inline float16& operator-=(float16& a, const float16& b) {
451-
a = a - b;
452-
return a;
453-
}
454-
455-
DEVICE inline float16& operator*=(float16& a, const float16& b) {
456-
a = a * b;
457-
return a;
458-
}
459-
460-
DEVICE inline float16& operator/=(float16& a, const float16& b) {
461-
a = a / b;
462-
return a;
463-
}
464-
465-
DEVICE inline bool operator==(const float16& a, const float16& b) {
466-
return __heq(half(a), half(b));
467-
}
468-
469-
DEVICE inline bool operator!=(const float16& a, const float16& b) {
470-
return __hne(half(a), half(b));
471-
}
472-
473-
DEVICE inline bool operator<(const float16& a, const float16& b) {
474-
return __hlt(half(a), half(b));
475-
}
476-
477-
DEVICE inline bool operator<=(const float16& a, const float16& b) {
478-
return __hle(half(a), half(b));
479-
}
480-
481-
DEVICE inline bool operator>(const float16& a, const float16& b) {
482-
return __hgt(half(a), half(b));
483-
}
484-
485-
DEVICE inline bool operator>=(const float16& a, const float16& b) {
486-
return __hge(half(a), half(b));
487-
}
488-
*/
475+
#endif // PADDLE_CUDA_FP16
489476

490477
// Arithmetic operators on ARMv8.2-A CPU
491-
#elif defined(PADDLE_WITH_NATIVE_FP16)
478+
#if defined(PADDLE_WITH_NATIVE_FP16)
492479
HOST inline float16 operator+(const float16& a, const float16& b) {
493480
float16 res;
494481
asm volatile(
@@ -681,88 +668,6 @@ HOST inline bool operator>=(const float16& a, const float16& b) {
681668
return (res & 0xffff) != 0;
682669
}
683670

684-
/*
685-
HOST inline float16 operator+(const float16& a, const float16& b) {
686-
return float16(vaddh_f16(float16_t(a), float16_t(b)));
687-
}
688-
689-
HOST inline float16 operator-(const float16& a, const float16& b) {
690-
return float16(vsubh_f16(float16_t(a), float16_t(b)));
691-
}
692-
693-
HOST inline float16 operator*(const float16& a, const float16& b) {
694-
return float16(vmulh_f16(float16_t(a), float16_t(b)));
695-
}
696-
697-
HOST inline float16 operator/(const float16& a, const float16& b) {
698-
return float16(vdivh_f16(float16_t(a), float16_t(b)));
699-
}
700-
701-
HOST inline float16 operator-(const float16& a) {
702-
return float16(vnegh_f16(float16_t(a)));
703-
}
704-
705-
HOST inline float16& operator+=(float16& a, const float16& b) {
706-
a = a + b;
707-
return a;
708-
}
709-
710-
HOST inline float16& operator-=(float16& a, const float16& b) {
711-
a = a - b;
712-
return a;
713-
}
714-
715-
HOST inline float16& operator*=(float16& a, const float16& b) {
716-
a = a * b;
717-
return a;
718-
}
719-
720-
HOST inline float16& operator/=(float16& a, const float16& b) {
721-
a = a / b;
722-
return a;
723-
}
724-
725-
HOST inline bool operator==(const float16& a, const float16& b) {
726-
return static_cast<bool>(vceqh_f16(float16_t(a), float16_t(b)));
727-
}
728-
729-
HOST inline bool operator!=(const float16& a, const float16& b) {
730-
return !(a == b);
731-
}
732-
733-
HOST inline bool operator<(const float16& a, const float16& b) {
734-
#ifdef PADDLE_NEON_64
735-
return static_cast<bool>(vclth_f16(float16_t(a), float16_t(b)));
736-
#else
737-
return float(a) < float(b);
738-
#endif // PADDLE_NEON_64
739-
}
740-
741-
HOST inline bool operator<=(const float16& a, const float16& b) {
742-
#ifdef PADDLE_NEON_64
743-
return static_cast<bool>(vcleh_f16(float16_t(a), float16_t(b)));
744-
#else
745-
return float(a) <= float(b);
746-
#endif // PADDLE_NEON_64
747-
}
748-
749-
HOST inline bool operator>(const float16& a, const float16& b) {
750-
#ifdef PADDLE_NEON_64
751-
return static_cast<bool>(vcgth_f16(float16_t(a), float16_t(b)));
752-
#else
753-
return float(a) > float(b);
754-
#endif // PADDLE_NEON_64
755-
}
756-
757-
HOST inline bool operator>=(const float16& a, const float16& b) {
758-
#ifdef PADDLE_NEON_64
759-
return static_cast<bool>(vcgeh_f16(float16_t(a), float16_t(b)));
760-
#else
761-
return float(a) >= float(b);
762-
#endif // PADDLE_NEON_64
763-
}
764-
*/
765-
766671
// Arithmetic operators, software emulated on other CPU
767672
#else
768673
HOSTDEVICE inline float16 operator+(const float16& a, const float16& b) {

paddle/math/tests/test_float16.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,6 @@ TEST(float16, conversion_cpu) {
5454
EXPECT_EQ(float16(true).x, 0x3c00);
5555
EXPECT_EQ(float16(false).x, 0x0000);
5656

57-
// Implicit conversion to and from Eigen::half
58-
/*
59-
Eigen::half tmp = float16(1.0f);
60-
float16 v_conv = tmp;
61-
EXPECT_EQ(tmp.x, 0x3c00);
62-
EXPECT_EQ(v_conv.x, 0x3c00);
63-
*/
64-
6557
// Default constructor
6658
float16 v_def;
6759
EXPECT_EQ(v_def.x, 0x0000);

0 commit comments

Comments
 (0)