Skip to content

Commit f434474

Browse files
Chirag Khandelwalbryanpkc
authored andcommitted
[Flang2] Support for vector always loop directive
1 parent aa9a6ee commit f434474

File tree

6 files changed

+160
-21
lines changed

6 files changed

+160
-21
lines changed

test/directives/vector_directive1.f90

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
! RUN: %flang -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-CLAUSE
1+
! RUN: %flang -c %s 2>&1 -o - | FileCheck %s --check-prefix=CHECK-NO-CLAUSE
2+
! RUN: %flang -S -emit-llvm %s -o - | FileCheck %s --check-prefix=METADATA
3+
! RUN: %flang -S -emit-llvm -O2 %s 2>&1 -o - | FileCheck %s
24

35
subroutine add(arr1,arr2,arr3,N)
46
integer :: i,N
@@ -11,4 +13,10 @@ subroutine add(arr1,arr2,arr3,N)
1113
arr3(i) = arr1(i) - arr2(i)
1214
end do
1315
end subroutine
14-
! CHECK-NO-CLAUSE: F90-W-0602-No clause specified for the vector directive. Note: Only the always clause is supported.
16+
! CHECK-NO-CLAUSE-NOT: F90-S-0602
17+
! CHECK-NO-CLAUSE-NOT: F90-S-0603
18+
19+
! METADATA: !"llvm.loop.vectorize.enable", i1 true
20+
! CHECK: load <[[VF:[0-9]+]] x i32>
21+
! CHECK: store <[[VF]] x i32>
22+
Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,22 @@
1-
! RUN: %flang -c %s 2>&1 | FileCheck %s -allow-empty --check-prefix=CHECK
1+
!! check for pragma support for !dir$ vector always
2+
!RUN: %flang -S -O2 -emit-llvm %s -o - | FileCheck %s
3+
!CHECK: define void @sumsimd_{{.*$}}
4+
!CHECK: {{.*}}!llvm.access.group ![[ACCGRP:[0-9]+]]
5+
!CHECK: vector.ph:{{.*}}
6+
!CHECK: vector.body:{{.*}}
7+
!CHECK: {{.*}}shufflevector{{.*}}
8+
!CHECK: {{.*}}add <2 x i64>{{.*}}
9+
!CHECK: {{.*}}"llvm.loop.parallel_accesses", ![[ACCGRP]]}
10+
!CHECK: {{.*}}"llvm.loop.isvectorized", i32 1{{.*}}
11+
!CHECK: {{.*}}"llvm.loop.unroll.runtime.disable"{{.*}}
212

3-
subroutine add(arr1,arr2,arr3,N)
4-
integer :: i,N
5-
integer :: arr1(N)
6-
integer :: arr2(N)
7-
integer :: arr3(N)
13+
SUBROUTINE sumsimd(myarr1,myarr2,ub)
14+
INTEGER, POINTER :: myarr1(:)
15+
INTEGER, POINTER :: myarr2(:)
16+
INTEGER :: ub
817

9-
!dir$ vector always
10-
do i = 1, N
11-
arr3(i) = arr1(i) - arr2(i)
12-
end do
13-
end subroutine
14-
! CHECK-NOT: F90-S-0602
15-
! CHECK-NOT: F90-S-0603
18+
!DIR$ VECTOR ALWAYS
19+
DO i=1,ub
20+
myarr1(i) = myarr1(i)+myarr2(i)
21+
END DO
22+
END SUBROUTINE

tools/flang2/docs/xflag.n

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5110,6 +5110,8 @@ Turn on C++ prototype implementation of the gnu visibility attribute
51105110
"hidden"
51115111
.XB 0x02:
51125112
Enable "alwaysinline" attribute for a function, using "forceinline" pragma
5113+
.XB 0x04:
5114+
Enable vectorize always loop directive
51135115

51145116
.XF "192:"
51155117
More Accelerator flags
@@ -5308,7 +5310,7 @@ This makes it easier to compare two outputs from slightly different versions.
53085310
.XB 0x80000000:
53095311
Enable unified memory support for OpenACC
53105312

5311-
.XF "199:"
5313+
.XF "199:"
53125314
Non-zero value enable -Mvect=fastfuse. This flag is/must be passed only when
53135315
-fast is enabled. Value other than 0 represents the miximum number of blocks
53145316
to enable -Mvect=fastfuse. default value is 10.

tools/flang2/flang2exe/cgmain.cpp

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "main.h"
4242
#include "symfun.h"
4343
#include "ilidir.h"
44+
#include "fdirect.h"
4445

4546
#ifdef OMP_OFFLOAD_LLVM
4647
#include "ompaccel.h"
@@ -209,6 +210,7 @@ static struct {
209210
unsigned _fcmp_negate : 1;
210211
unsigned _last_stmt_is_branch : 1;
211212
unsigned _rw_no_dep_check : 1;
213+
unsigned _rw_acc_grp_check : 1;
212214
} CGMain;
213215

214216
#define new_ebb (CGMain._new_ebb)
@@ -220,6 +222,7 @@ static struct {
220222
#define fcmp_negate (CGMain._fcmp_negate)
221223
#define last_stmt_is_branch (CGMain._last_stmt_is_branch)
222224
#define rw_nodepcheck (CGMain._rw_no_dep_check)
225+
#define rw_access_group (CGMain._rw_acc_grp_check)
223226

224227
static int funcId;
225228
static int fnegcc[17] = LLCCF_NEG;
@@ -234,6 +237,7 @@ static hashmap_t sincos_imap;
234237
static LL_MDRef cached_loop_metadata;
235238
static LL_MDRef cached_unroll_enable_metadata;
236239
static LL_MDRef cached_unroll_disable_metadata;
240+
static LL_MDRef cached_access_group_metadata;
237241

238242
static bool CG_cpu_compile = false;
239243

@@ -799,6 +803,21 @@ clear_rw_nodepchk(void)
799803
cached_loop_metadata = ll_get_md_null();
800804
}
801805

806+
INLINE static void
807+
mark_rw_access_grp(int bih)
808+
{
809+
rw_access_group = 1;
810+
if (!BIH_NODEPCHK2(bih))
811+
cached_loop_metadata = ll_get_md_null();
812+
}
813+
814+
INLINE static void
815+
clear_rw_access_grp(void)
816+
{
817+
rw_access_group = 0;
818+
cached_loop_metadata = ll_get_md_null();
819+
}
820+
802821
void
803822
print_personality(void)
804823
{
@@ -949,6 +968,20 @@ assign_fortran_storage_classes(void)
949968
}
950969
} /* end assign_fortran_storage_classes() */
951970

971+
/*
972+
* when vector always pragma is specified, "llvm.loop.parallel_accesses" metadata has
973+
* to be generated along with "llvm.access.group" for each load/store instructions.
974+
*/
975+
INLINE static LL_MDRef
976+
cons_loop_parallel_accesses_metadata(void)
977+
{
978+
LL_MDRef lvcomp[2];
979+
980+
lvcomp[0] = ll_get_md_string(cpu_llvm_module, "llvm.loop.parallel_accesses");
981+
lvcomp[1] = cached_access_group_metadata;
982+
return ll_get_md_node(cpu_llvm_module, LL_PlainMDNode, lvcomp, 2);
983+
} // cons_loop_parallel_accesses_metadata
984+
952985
INLINE static LL_MDRef
953986
cons_novectorize_metadata(void)
954987
{
@@ -1289,6 +1322,21 @@ cons_no_depchk_metadata(void)
12891322
return cached_loop_metadata;
12901323
}
12911324

1325+
static LL_MDRef
1326+
cons_vec_always_metadata(void)
1327+
{
1328+
if (LL_MDREF_IS_NULL(cached_loop_metadata)) {
1329+
LL_MDRef vectorize = cons_vectorize_metadata();
1330+
LL_MDRef paraccess = cons_loop_parallel_accesses_metadata();
1331+
LL_MDRef md = ll_create_flexible_md_node(cpu_llvm_module);
1332+
ll_extend_md_node(cpu_llvm_module, md, md);
1333+
ll_extend_md_node(cpu_llvm_module, md, vectorize);
1334+
ll_extend_md_node(cpu_llvm_module, md, paraccess);
1335+
cached_loop_metadata = md;
1336+
}
1337+
return cached_loop_metadata;
1338+
}
1339+
12921340
static LL_MDRef
12931341
cons_unroll_metadata(void) //Calls the metadata for unroll
12941342
{
@@ -1344,6 +1392,36 @@ remove_dead_instrs(void)
13441392
}
13451393
}
13461394

1395+
/*
1396+
* Check if the branch instruction is having a loop pragma
1397+
* xbit/xflag pair.
1398+
*/
1399+
static bool check_for_loop_directive(int branch_line_number, int xbit, int xflag) {
1400+
int iter;
1401+
LPPRG *lpprg;
1402+
1403+
// Check if any loop pragmas are specified
1404+
if (direct.lpg.avail > 1) {
1405+
// Loop thru all the loop pragmas
1406+
for (iter = 1; iter < direct.lpg.avail; iter++) {
1407+
lpprg = direct.lpg.stgb + iter;
1408+
// check if xbit/xflag pair is available
1409+
if ((lpprg->dirset.x[xbit] & xflag)
1410+
&&
1411+
(branch_line_number == lpprg->end_line)) {
1412+
return true;
1413+
} // if
1414+
1415+
if (branch_line_number < lpprg->beg_line) {
1416+
// branch instruction is not having any pragma specified.
1417+
break;
1418+
} // if
1419+
} // for
1420+
} // if
1421+
1422+
return false;
1423+
} // check_for_loop_directive
1424+
13471425
/**
13481426
\brief process debug info of constants with parameter attribute.
13491427
*/
@@ -1538,6 +1616,9 @@ schedule(void)
15381616
bih = BIH_NEXT(0);
15391617
if ((XBIT(34, 0x200) || gbl.usekmpc) && !processHostConcur)
15401618
bih = gbl.entbih;
1619+
1620+
cached_access_group_metadata = ll_create_distinct_md_node(cpu_llvm_module, LL_PlainMDNode, NULL, 0);
1621+
15411622
/* construct the body of the function */
15421623
for (; bih; bih = BIH_NEXT(bih))
15431624
for (ilt = BIH_ILTFIRST(bih); ilt; ilt = ILT_NEXT(ilt))
@@ -1620,6 +1701,12 @@ schedule(void)
16201701
} else {
16211702
clear_rw_nodepchk();
16221703
}
1704+
if (XBIT(191, 0x4)) {
1705+
fix_nodepchk_flag(bih);
1706+
mark_rw_access_grp(bih);
1707+
} else {
1708+
clear_rw_access_grp();
1709+
}
16231710
if (flg.x[9] > 0)
16241711
unroll_factor = flg.x[9];
16251712
if (XBIT(11, 0x2) && unroll_factor)
@@ -1683,6 +1770,14 @@ schedule(void)
16831770
i->misc_metadata = loop_md;
16841771
}
16851772
}
1773+
if ((check_for_loop_directive(ILT_LINENO(ilt), 191, 0x4))) {
1774+
LL_MDRef loop_md = cons_vec_always_metadata();
1775+
INSTR_LIST *i = find_last_executable(llvm_info.last_instr);
1776+
if (i) {
1777+
i->flags |= LOOP_BACKEDGE_FLAG;
1778+
i->misc_metadata = loop_md;
1779+
}
1780+
}
16861781
if (BIH_UNROLL(bih)) {
16871782
LL_MDRef loop_md = cons_unroll_metadata();
16881783
INSTR_LIST *i = find_last_executable(llvm_info.last_instr);
@@ -2786,6 +2881,20 @@ write_no_depcheck_metadata(LL_Module *module, INSTR_LIST *insn)
27862881
}
27872882
}
27882883

2884+
INLINE static void
2885+
write_llaccgroup_metadata(LL_Module *module, INSTR_LIST *insn)
2886+
{
2887+
if (insn->flags & LDST_HAS_ACCESSGRP_METADATA) {
2888+
char buf[64];
2889+
int n;
2890+
DEBUG_ASSERT(insn->misc_metadata, "missing metadata");
2891+
n = snprintf(buf, 64, ", !llvm.access.group !%u",
2892+
LL_MDREF_value(cached_access_group_metadata));
2893+
DEBUG_ASSERT(n < 64, "buffer overrun");
2894+
print_token(buf);
2895+
}
2896+
}
2897+
27892898
/* write out the struct member types */
27902899
static void
27912900
write_verbose_type(LL_Type *ll_type)
@@ -3190,6 +3299,7 @@ write_instructions(LL_Module *module)
31903299
assert(p->next == NULL, "write_instructions(), bad next ptr", 0,
31913300
ERR_Fatal);
31923301
write_no_depcheck_metadata(module, instrs);
3302+
write_llaccgroup_metadata(module, instrs);
31933303
write_tbaa_metadata(module, instrs->ilix, instrs->operands,
31943304
instrs->flags);
31953305
break;
@@ -3210,6 +3320,7 @@ write_instructions(LL_Module *module)
32103320

32113321
write_memory_order_and_alignment(instrs);
32123322
write_no_depcheck_metadata(module, instrs);
3323+
write_llaccgroup_metadata(module, instrs);
32133324
write_tbaa_metadata(module, instrs->ilix, instrs->operands->next,
32143325
instrs->flags & VOLATILE_FLAG);
32153326
break;
@@ -3425,6 +3536,10 @@ mk_store_instr(OPERAND *val, OPERAND *addr)
34253536
insn->flags |= LDST_HAS_METADATA;
34263537
insn->misc_metadata = cons_no_depchk_metadata();
34273538
}
3539+
if (rw_access_group) {
3540+
insn->flags |= LDST_HAS_ACCESSGRP_METADATA;
3541+
insn->misc_metadata = cons_vec_always_metadata();
3542+
}
34283543
ad_instr(0, insn);
34293544
return insn;
34303545
}
@@ -3686,6 +3801,10 @@ ad_csed_instr(LL_InstrName instr_name, int ilix, LL_Type *ll_type,
36863801
flags |= LDST_HAS_METADATA;
36873802
instr->misc_metadata = cons_no_depchk_metadata();
36883803
}
3804+
if ((instr_name == I_LOAD) && rw_access_group) {
3805+
flags |= LDST_HAS_ACCESSGRP_METADATA;
3806+
instr->misc_metadata = cons_vec_always_metadata();
3807+
}
36893808
instr->flags = flags;
36903809
ad_instr(ilix, instr);
36913810
return operand;
@@ -6733,6 +6852,10 @@ make_load(int ilix, OPERAND *load_op, LL_Type *rslt_type, MSZ msz,
67336852
flags |= LDST_HAS_METADATA;
67346853
Curr_Instr->misc_metadata = cons_no_depchk_metadata();
67356854
}
6855+
if (rw_access_group) {
6856+
flags |= LDST_HAS_ACCESSGRP_METADATA;
6857+
Curr_Instr->misc_metadata = cons_vec_always_metadata();
6858+
}
67366859
Curr_Instr->flags = (LL_InstrListFlags)flags;
67376860
load_op->next = NULL;
67386861
ad_instr(ilix, Curr_Instr);

tools/flang2/flang2exe/llutil.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ typedef enum LL_InstrListFlags {
284284
NOUNSIGNEDWRAP = (1 << 12),
285285
FUNC_RETURN_IS_FUNC_PTR = (1 << 13),
286286
LDST_HAS_METADATA = (1 << 13), /**< I_LOAD, I_STORE only */
287+
LDST_HAS_ACCESSGRP_METADATA = (1 << 14), /**< I_LOAD, I_STORE only, for llvm.loop.parallel_accesses */
287288

288289
/* Information for atomic operations.
289290
This information overlaps 12 of the calling convention bits. In earlier

tools/shared/pragma.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -657,16 +657,14 @@ do_sw(void)
657657
if(craydir) {
658658
typ = gtok();
659659
if (typ != T_IDENT) {
660-
backup_nowarn = gbl.nowarn;
661-
gbl.nowarn = false;
662-
errwarn((error_code_t)602);
663-
gbl.nowarn = backup_nowarn;
660+
bclr(DIR_OFFSET(currdir, x[19]), 0x18);
661+
bset(DIR_OFFSET(currdir, x[19]), 0x400);
664662
break;
665663
}
666664
LCASE(ctok);
667665
if (strcmp(ctok, "always") == 0) {
668666
bclr(DIR_OFFSET(currdir, x[19]), 0x18);
669-
bset(DIR_OFFSET(currdir, x[19]), 0x400);
667+
bset(DIR_OFFSET(currdir, x[191]), 0x4);
670668
} else {
671669
backup_nowarn = gbl.nowarn;
672670
gbl.nowarn = false;

0 commit comments

Comments
 (0)