Vectorizer update FDIV insturctions enabled

esukhov · igcbot · commit 1fe31c8b87ef · 2025-02-20T15:38:33.000+01:00
Support for FDIV insturctions inside IGCVectorizer has been added.
We only support FDIV that is converted to INV, due to current emitter.
diff --git a/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp b/IGC/Compiler/CISACodeGen/EmitVISAPass.cpp
@@ -4592,7 +4592,7 @@ void EmitPass::Mul64(CVariable* dst, CVariable* src[2], SIMDMode simdMode, bool
     m_encoder->Push();
 }
 
-static unsigned int getVectorSize(Instruction *I) {
+static unsigned int getVectorSize(Value *I) {
     IGCLLVM::FixedVectorType *VecType =
         llvm::dyn_cast<IGCLLVM::FixedVectorType>(I->getType());
     if (!VecType)
@@ -4663,7 +4663,9 @@ void EmitPass::Add(const SSource sources[2], const DstModifier& modifier)
             else
                 m_encoder->SetSrcSubVar(1, i);
 
-            m_encoder->SetDstSubVar(i);
+            if (src[0]->IsUniform() && src[1]->IsUniform()) m_encoder->SetDstSubReg(i);
+            else m_encoder->SetDstSubVar(i);
+
             m_encoder->Add(m_destination, src[0], src[1]);
             m_encoder->Push();
         }
@@ -4695,8 +4697,9 @@ void EmitPass::Mul(const SSource sources[2], const DstModifier& modifier)
             else m_encoder->SetSrcSubVar(0, i);
             if (src[1]->IsUniform()) { m_encoder->SetSrcSubReg(1, i); }
             else m_encoder->SetSrcSubVar(1, i);
+            if (src[0]->IsUniform() && src[1]->IsUniform()) m_encoder->SetDstSubReg(i);
+            else m_encoder->SetDstSubVar(i);
 
-            m_encoder->SetDstSubVar(i);
             m_encoder->Mul(m_destination, src[0], src[1]);
             m_encoder->Push();
         }
@@ -4715,8 +4718,97 @@ void EmitPass::Mul(const SSource sources[2], const DstModifier& modifier)
     }
 }
 
+
+bool isVectorOfOnes(llvm::Value* zero) {
+
+    const auto* constVec = llvm::dyn_cast<llvm::ConstantDataVector>(zero);
+    if (!constVec) return false;
+    if (!constVec->getType()->getElementType()->isFloatTy()) return false;
+
+    unsigned numElements = constVec->getNumElements();
+    for (unsigned i = 0; i < numElements; i++) {
+        const auto* constFloat = llvm::dyn_cast<llvm::ConstantFP>(constVec->getElementAsConstant(i));
+        if (!constFloat) return false;
+        if (!constFloat->isExactlyValue(1.f)) return false;
+    }
+
+    return true;
+}
+
+void EmitPass::Div(const SSource sources[2], const DstModifier& modifier)
+{
+    CVariable* src[2];
+    for (int i = 0; i < 2; ++i) src[i] = GetSrcVariable(sources[i]);
+
+    if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) && sources[0].value->getType()->isVectorTy() && sources[1].value->getType()->isVectorTy()) {
+
+        unsigned int VectorSize = 0;
+        if (llvm::isa<Instruction>(sources[0].value))
+            VectorSize = getVectorSize(llvm::cast<Instruction>(sources[0].value));
+
+        for (unsigned int i = 0; i < VectorSize; ++i) {
+            SetSourceModifiers(0, sources[0]);
+            SetSourceModifiers(1, sources[1]);
+
+            if (src[0]->IsUniform()) { m_encoder->SetSrcSubReg(0, i); }
+            else m_encoder->SetSrcSubVar(0, i);
+            if (src[1]->IsUniform()) { m_encoder->SetSrcSubReg(1, i); }
+            else m_encoder->SetSrcSubVar(1, i);
+            if (src[0]->IsUniform() && src[1]->IsUniform()) m_encoder->SetDstSubReg(i);
+            else m_encoder->SetDstSubVar(i);
+
+            m_encoder->Div(m_destination, src[0], src[1]);
+            m_encoder->Push();
+        }
+    }
+    return;
+}
+
+
+void EmitPass::Inv(const SSource sources[2], const DstModifier& modifier) {
+
+    if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) &&
+            sources[0].value->getType()->isVectorTy() &&
+            sources[1].value->getType()->isVectorTy()) {
+
+        unsigned int VectorSize = 0;
+        if (llvm::isa<Value>(sources[0].value))
+            VectorSize = getVectorSize(llvm::cast<Value>(sources[0].value));
+
+        CVariable* src[1];
+        // sources[0] got used to check that it contains all 1
+        src[0] = GetSrcVariable(sources[1]);
+
+        for (unsigned int i = 0; i < VectorSize; ++i) {
+            SetSourceModifiers(0, sources[1]);
+
+            if (src[0]->IsUniform()) {
+                m_encoder->SetSrcSubReg(0, i);
+                m_encoder->SetDstSubReg(i);
+            }
+            else {
+                m_encoder->SetSrcSubVar(0, i);
+                m_encoder->SetDstSubVar(i);
+            }
+            m_encoder->Inv(m_destination, src[0]);
+            m_encoder->Push();
+        }
+    }
+    return;
+}
+
 void EmitPass::FDiv(const SSource sources[2], const DstModifier& modifier)
 {
+    if (IGC_IS_FLAG_ENABLED(EnableVectorEmitter) &&
+            sources[0].value->getType()->isVectorTy() &&
+            sources[1].value->getType()->isVectorTy()) {
+
+        if (isVectorOfOnes(sources[0].value)) Inv(sources, modifier);
+        else Div(sources,modifier);
+
+        return;
+    }
+
     if (isOne(sources[0].value))
     {
         Unary(EOPCODE_INV, &sources[1], modifier);
diff --git a/IGC/Compiler/CISACodeGen/EmitVISAPass.hpp b/IGC/Compiler/CISACodeGen/EmitVISAPass.hpp
@@ -101,6 +101,8 @@ class EmitPass : public llvm::FunctionPass
     void Select(const SSource sources[3], const DstModifier& modifier);
     void PredAdd(const SSource& pred, bool invert, const SSource sources[2], const DstModifier& modifier);
     void Mul(const SSource[2], const DstModifier& modifier);
+    void Div(const SSource[2], const DstModifier& modifier);
+    void Inv(const SSource[2], const DstModifier& modifier);
     void Add(const SSource[2], const DstModifier& modifier);
     void FPTrunc(const SSource[2], const DstModifier& modifier);
     void Powi(const SSource[2], const DstModifier& modifier);
diff --git a/IGC/Compiler/CISACodeGen/IGCVectorizer.cpp b/IGC/Compiler/CISACodeGen/IGCVectorizer.cpp
@@ -179,14 +179,30 @@ unsigned int getVectorSize(Value *I) {
 }
 
 
+// due to our emitter, currently we only process float fdiv's that we can
+// construct as INV (first operand is 1.0f);
+bool isFDivSafe(Instruction *I) {
+    if (!IGC_GET_FLAG_VALUE(VectorizerAllowFDIV)) return false;
+    auto* Binary = llvm::dyn_cast<BinaryOperator>(I);
+    auto OpCode = Binary->getOpcode();
+    if (!(OpCode == Instruction::FDiv && I->getType()->isFloatTy())) return false;
+
+    //auto* constFloat = llvm::dyn_cast<llvm::ConstantFP>(I->getOperand(0));
+    //if (!constFloat) return false;
+    //if (!constFloat->isExactlyValue(1.f)) return false;
+
+    return true;
+}
+
 bool isBinarySafe(Instruction *I) {
 
     bool Result = false;
     auto* Binary = llvm::dyn_cast<BinaryOperator>(I);
     if (Binary) {
         auto OpCode = Binary->getOpcode();
-        Result  |=  OpCode == Instruction::FMul;
-        Result  |=  OpCode == Instruction::FAdd;
+        Result |=  OpCode == Instruction::FMul;
+        Result |=  (OpCode == Instruction::FAdd && IGC_GET_FLAG_VALUE(VectorizerAllowFADD));
+        Result |= isFDivSafe(I);
     }
     return Result;
 }
@@ -209,6 +225,11 @@ bool IGCVectorizer::handlePHI(VecArr &Slice, Type *VectorType) {
     if (!checkPHI(ScalarPhi, Slice))
         return false;
 
+    if (ScalarToVector.count(ScalarPhi)) {
+        PRINT_LOG_NL(" PHI was vectorized before, no bother ");
+        return true;
+    }
+
     PHINode *Phi = PHINode::Create(VectorType, 2);
     Phi->setName("vectorized_phi");
 
@@ -294,6 +315,7 @@ bool IGCVectorizer::handleInsertElement(VecArr &Slice, Instruction* Final) {
         return false;
 
     PRINT_LOG_NL("InsertElement substituted with vectorized instruction");
+    PRINT_LOG_NL("");
     Value *Compare = ScalarToVector[First->getOperand(1)];
     *(Final->use_begin()) = Compare;
     return true;
@@ -302,6 +324,11 @@ bool IGCVectorizer::handleInsertElement(VecArr &Slice, Instruction* Final) {
 
 InsertElementInst* IGCVectorizer::createVector(VecArr& Slice, Instruction* InsertPoint) {
 
+    if (llvm::isa<PHINode>(InsertPoint)) {
+        InsertPoint = InsertPoint->getParent()->getFirstNonPHI();
+        PRINT_LOG_NL("insertPoint moved to FirstNonPHI");
+    }
+
     llvm::Type* elementType = Slice[0]->getType();
     llvm::VectorType* vectorType = llvm::FixedVectorType::get(elementType, Slice.size());
     llvm::Value* UndefVector = llvm::UndefValue::get(vectorType);
@@ -384,6 +411,11 @@ bool IGCVectorizer::handleCastInstruction(VecArr &Slice) {
 
     Instruction *First = Slice.front();
 
+    if (ScalarToVector.count(First)) {
+        PRINT_LOG_NL("Cast was vectorized before by other slice");
+        return true;
+    }
+
     unsigned int OperNum = 0;
     Value* Vectorized = checkOperandsToBeVectorized(First, OperNum, Slice);
     if (!Vectorized) Vectorized = vectorizeSlice(Slice, OperNum);
@@ -526,7 +558,7 @@ void IGCVectorizer::buildTree(VecArr &V, VecOfSlices& Chain) {
             PRINT_DS("   check: ", LocalVector);
             if (IsSame) {
                 PRINT_LOG_NL("Pushed");
-                Chain.push_back({ OpNum, LocalVector, CurSlice});
+                Chain.push_back({OpNum, std::move(LocalVector), CurSlice});
                 BFSQ.push(&Chain.back());
             }
         }
@@ -719,6 +751,7 @@ bool IGCVectorizer::runOnFunction(llvm::Function &F) {
     M = F.getParent();
     CGCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
     initializeLogFile(F);
+    PRINT_LOG_NL("vectorizer: fadd, fdiv, fptrunc");
 
     VecArr ToProcess;
     // we collect operands that seem promising for vectorization
diff --git a/IGC/Compiler/CISACodeGen/IGCVectorizer.h b/IGC/Compiler/CISACodeGen/IGCVectorizer.h
@@ -37,7 +37,7 @@ class IGCVectorizer : public llvm::FunctionPass {
         Slice* Parent;
     };
 
-    typedef llvm::SmallVector<Slice, 8> VecOfSlices;
+    typedef llvm::SmallVector<Slice, 32> VecOfSlices;
     typedef llvm::SmallVector<VecOfSlices, 3> Tree;
     typedef std::unordered_map<Instruction*, VecArr*> InstructionToSliceMap;
 
diff --git a/IGC/Compiler/tests/EmitVISAPass/vectorizer-vector-emission-fdiv.ll b/IGC/Compiler/tests/EmitVISAPass/vectorizer-vector-emission-fdiv.ll
@@ -0,0 +1,69 @@
+; REQUIRES: pvc-supported, regkeys
+
+; RUN: igc_opt -S -dce -platformpvc -rev-id B -has-emulated-64-bit-insts -igc-emit-visa --regkey=DumpVISAASMToConsole=1 -simd-mode 16 < %s | FileCheck %s
+
+; CHECK: .decl vectorized_binary378 v_type=G type=f num_elts=8 align=dword
+; CHECK: .decl V0035 v_type=G type=f num_elts=8 align=wordx32
+; CHECK: .decl vectorized_binary402 v_type=G type=f num_elts=128 align=wordx32
+; CHECK: .decl V0036 v_type=G type=f num_elts=8 align=wordx32
+
+; CHECK:     inv (M1_NM, 1) vectorized_binary378(0,0)<1> V0035(0,0)<0;1,0>
+; CHECK:     inv (M1_NM, 1) vectorized_binary378(0,1)<1> V0035(0,1)<0;1,0>
+; CHECK:     inv (M1_NM, 1) vectorized_binary378(0,2)<1> V0035(0,2)<0;1,0>
+; CHECK:     inv (M1_NM, 1) vectorized_binary378(0,3)<1> V0035(0,3)<0;1,0>
+; CHECK:     inv (M1_NM, 1) vectorized_binary378(0,4)<1> V0035(0,4)<0;1,0>
+; CHECK:     inv (M1_NM, 1) vectorized_binary378(0,5)<1> V0035(0,5)<0;1,0>
+; CHECK:     inv (M1_NM, 1) vectorized_binary378(0,6)<1> V0035(0,6)<0;1,0>
+; CHECK:     inv (M1_NM, 1) vectorized_binary378(0,7)<1> V0035(0,7)<0;1,0>
+; CHECK:     div (M1, 16) vectorized_binary402(0,0)<1> V0032(0,0)<1;1,0> V0036(0,0)<0;1,0>
+; CHECK:     div (M1, 16) vectorized_binary402(1,0)<1> V0032(1,0)<1;1,0> V0036(0,1)<0;1,0>
+; CHECK:     div (M1, 16) vectorized_binary402(2,0)<1> V0032(2,0)<1;1,0> V0036(0,2)<0;1,0>
+; CHECK:     div (M1, 16) vectorized_binary402(3,0)<1> V0032(3,0)<1;1,0> V0036(0,3)<0;1,0>
+; CHECK:     div (M1, 16) vectorized_binary402(4,0)<1> V0032(4,0)<1;1,0> V0036(0,4)<0;1,0>
+; CHECK:     div (M1, 16) vectorized_binary402(5,0)<1> V0032(5,0)<1;1,0> V0036(0,5)<0;1,0>
+; CHECK:     div (M1, 16) vectorized_binary402(6,0)<1> V0032(6,0)<1;1,0> V0036(0,6)<0;1,0>
+; CHECK:     div (M1, 16) vectorized_binary402(7,0)<1> V0032(7,0)<1;1,0> V0036(0,7)<0;1,0>
+
+define spir_kernel void @_attn_fwd(half addrspace(1)* %0, half addrspace(1)* %1, half addrspace(1)* %2, float %3, i8 addrspace(1)* %4, float addrspace(1)* %5, <8 x i32> %r0) {
+  br label %._crit_edge
+
+._crit_edge:                                      ; preds = %._crit_edge, %6
+  %7 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
+  br i1 false, label %._crit_edge, label %8
+
+8:                                                ; preds = %._crit_edge
+  %vectorized_binary378 = fdiv <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, zeroinitializer
+  %vectorized_binary402 = fdiv <8 x float> %7, zeroinitializer
+  %9 = bitcast <8 x float> %vectorized_binary378 to <8 x i32>
+  call void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false, i32 0, <8 x i32> %9)
+  %10 = bitcast <8 x float> %vectorized_binary402 to <8 x i32>
+  call void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false, i32 0, <8 x i32> %10)
+  ret void
+}
+
+declare <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1)
+
+declare void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32, <8 x i32>)
+
+; uselistorder directives
+uselistorder void (i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32, <8 x i32>)* @llvm.genx.GenISA.LSC2DBlockWrite.v8i32, { 1, 0 }
+
+!igc.functions = !{!0}
+
+!0 = !{void (half addrspace(1)*, half addrspace(1)*, half addrspace(1)*, float, i8 addrspace(1)*, float addrspace(1)*, <8 x i32>, <8 x i32>, i32, i32, i32, i32, i32)* bitcast (void (half addrspace(1)*, half addrspace(1)*, half addrspace(1)*, float, i8 addrspace(1)*, float addrspace(1)*, <8 x i32>)* @_attn_fwd to void (half addrspace(1)*, half addrspace(1)*, half addrspace(1)*, float, i8 addrspace(1)*, float addrspace(1)*, <8 x i32>, <8 x i32>, i32, i32, i32, i32, i32)*), !1}
+!1 = !{!2, !3, !16}
+!2 = !{!"function_type", i32 0}
+!3 = !{!"implicit_arg_desc", !4, !5, !6, !8, !10, !12, !14}
+!4 = !{i32 0}
+!5 = !{i32 1}
+!6 = !{i32 14, !7}
+!7 = !{!"explicit_arg_num", i32 0}
+!8 = !{i32 14, !9}
+!9 = !{!"explicit_arg_num", i32 1}
+!10 = !{i32 14, !11}
+!11 = !{!"explicit_arg_num", i32 2}
+!12 = !{i32 14, !13}
+!13 = !{!"explicit_arg_num", i32 4}
+!14 = !{i32 14, !15}
+!15 = !{!"explicit_arg_num", i32 5}
+!16 = !{!"sub_group_size", i32 16}
diff --git a/IGC/Compiler/tests/IGCVectorizer/vectorizer-test-fdiv-inv.ll b/IGC/Compiler/tests/IGCVectorizer/vectorizer-test-fdiv-inv.ll
@@ -0,0 +1,75 @@
+; UNSUPPORTED: system-windows
+; REQUIRES: regkeys
+
+; RUN: igc_opt -S  --igc-vectorizer -dce < %s 2>&1 | FileCheck %s
+
+; CHECK: %vectorized_binary = fdiv <8 x float>
+
+; ModuleID = 'reduced.ll'
+source_filename = "initial_test.ll"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
+target triple = "spir64-unknown-unknown"
+
+; Function Attrs: convergent nounwind
+define spir_kernel void @_attn_fwd() #0 {
+  br label %._crit_edge
+
+._crit_edge:                                      ; preds = %._crit_edge, %0
+  %1 = phi float [ 1.000000e+00, %0 ], [ %35, %._crit_edge ]
+  %2 = phi float [ 1.000000e+00, %0 ], [ %36, %._crit_edge ]
+  %3 = phi float [ 1.000000e+00, %0 ], [ %37, %._crit_edge ]
+  %4 = phi float [ 1.000000e+00, %0 ], [ %38, %._crit_edge ]
+  %5 = phi float [ 1.000000e+00, %0 ], [ %39, %._crit_edge ]
+  %6 = phi float [ 1.000000e+00, %0 ], [ %40, %._crit_edge ]
+  %7 = phi float [ 1.000000e+00, %0 ], [ %41, %._crit_edge ]
+  %8 = phi float [ 1.000000e+00, %0 ], [ %42, %._crit_edge ]
+  %9 =  call float @llvm.exp2.f32(float %1)
+  %10 = call float @llvm.exp2.f32(float %2)
+  %11 = call float @llvm.exp2.f32(float %3)
+  %12 = call float @llvm.exp2.f32(float %4)
+  %13 = call float @llvm.exp2.f32(float %5)
+  %14 = call float @llvm.exp2.f32(float %6)
+  %15 = call float @llvm.exp2.f32(float %7)
+  %16 = call float @llvm.exp2.f32(float %8)
+  %17 = fdiv fast float 1.000000e+00, %9
+  %18 = fdiv fast float 1.000000e+00, %10
+  %19 = fdiv fast float 1.000000e+00, %11
+  %20 = fdiv fast float 1.000000e+00, %12
+  %21 = fdiv fast float 1.000000e+00, %13
+  %22 = fdiv fast float 1.000000e+00, %14
+  %23 = fdiv fast float 1.000000e+00, %15
+  %24 = fdiv fast float 1.000000e+00, %16
+  %25 = insertelement <8 x float> zeroinitializer, float %17, i64 0
+  %26 = insertelement <8 x float> %25, float %18, i64 1
+  %27 = insertelement <8 x float> %26, float %19, i64 2
+  %28 = insertelement <8 x float> %27, float %20, i64 3
+  %29 = insertelement <8 x float> %28, float %21, i64 4
+  %30 = insertelement <8 x float> %29, float %22, i64 5
+  %31 = insertelement <8 x float> %30, float %23, i64 6
+  %32 = insertelement <8 x float> %31, float %24, i64 7
+  %33 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> %32, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
+  %34 = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> %33, <8 x i16> zeroinitializer, <8 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0, i1 false)
+  %35 = extractelement <8 x float> %34, i64 0
+  %36 = extractelement <8 x float> %34, i64 1
+  %37 = extractelement <8 x float> %34, i64 2
+  %38 = extractelement <8 x float> %34, i64 3
+  %39 = extractelement <8 x float> %34, i64 4
+  %40 = extractelement <8 x float> %34, i64 5
+  %41 = extractelement <8 x float> %34, i64 6
+  %42 = extractelement <8 x float> %34, i64 7
+  br label %._crit_edge
+}
+
+; Function Attrs: convergent nounwind readnone willreturn
+declare <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1) #1
+
+; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
+declare float @llvm.exp2.f32(float) #2
+
+; uselistorder directives
+uselistorder <8 x float> (<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1)* @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32, { 1, 0 }
+uselistorder float (float)* @llvm.exp2.f32, { 7, 6, 5, 4, 3, 2, 1, 0 }
+
+attributes #0 = { convergent nounwind }
+attributes #1 = { convergent nounwind readnone willreturn }
+attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }
diff --git a/IGC/Compiler/tests/IGCVectorizer/vectorizer-test-fdiv-not-inv.ll b/IGC/Compiler/tests/IGCVectorizer/vectorizer-test-fdiv-not-inv.ll
diff --git a/IGC/common/igc_flags.h b/IGC/common/igc_flags.h