@@ -64,8 +64,10 @@ function vector_cost(ic::InstructionCost, Wshift, sizeof_T)
64
64
end
65
65
srt, sl, srp
66
66
end
67
- instruction_cost (instruction:: Symbol ) = get (COST, instruction, OPAQUE_INSTRUCTION)
68
- instruction_cost (instruction:: Instruction ) = instruction_cost (instruction. instr)
67
+ # instruction_cost(instruction::Symbol) = get(COST, instruction, OPAQUE_INSTRUCTION)
68
+ # instruction_cost(instruction::Instruction) = instruction_cost(instruction.instr)
69
+ instruction_cost (instruction:: Instruction ) = get (COST, instruction, OPAQUE_INSTRUCTION)
70
+ instruction_cost (instruction:: Symbol ) = instruction_cost (Instruction (instruction))
69
71
scalar_cost (instr:: Instruction ) = scalar_cost (instruction_cost (instr))
70
72
vector_cost (instr:: Instruction , Wshift, sizeof_T) = vector_cost (instruction_cost (instr), Wshift, sizeof_T)
71
73
function cost (instruction:: InstructionCost , Wshift, sizeof_T)
@@ -87,76 +89,78 @@ const OPAQUE_INSTRUCTION = InstructionCost(50, 50.0, -1.0, VectorizationBase.REG
87
89
# as a heuristic means of approximating register pressure, since many loads can be
88
90
# consolidated into a single register. The number of LICM-ed setindex!, on the other
89
91
# hand, should indicate how many registers we're keeping live for the sake of eventually storing.
90
- const COST = Dict {Symbol ,InstructionCost} (
91
- :getindex => InstructionCost (- 3.0 ,0.5 ,3 ,1 ),
92
- :setindex! => InstructionCost (- 3.0 ,1.0 ,3 ,0 ),
93
- :conditionalstore! => InstructionCost (- 3.0 ,1.0 ,3 ,0 ),
94
- :zero => InstructionCost (1 ,0.5 ),
95
- :one => InstructionCost (3 ,0.5 ),
96
- :(+ ) => InstructionCost (4 ,0.5 ),
97
- :(- ) => InstructionCost (4 ,0.5 ),
98
- :(* ) => InstructionCost (4 ,0.5 ),
99
- :(/ ) => InstructionCost (13 ,4.0 ,- 2.0 ),
100
- :vadd => InstructionCost (4 ,0.5 ),
101
- :vsub => InstructionCost (4 ,0.5 ),
102
- :vmul => InstructionCost (4 ,0.5 ),
103
- :vfdiv => InstructionCost (13 ,4.0 ,- 2.0 ),
104
- :evadd => InstructionCost (4 ,0.5 ),
105
- :evsub => InstructionCost (4 ,0.5 ),
106
- :evmul => InstructionCost (4 ,0.5 ),
107
- :evfdiv => InstructionCost (13 ,4.0 ,- 2.0 ),
108
- :reduced_add => InstructionCost (4 ,0.5 ),# ignoring reduction part of cost, might be nop
109
- :reduced_prod => InstructionCost (4 ,0.5 ),# ignoring reduction part of cost, might be nop
110
- :reduce_to_add => InstructionCost (0 ,0.0 ,0.0 ,0 ),
111
- :reduce_to_prod => InstructionCost (0 ,0.0 ,0.0 ,0 ),
112
- :abs2 => InstructionCost (4 ,0.5 ),
113
- :vabs2 => InstructionCost (4 ,0.5 ),
114
- :(== ) => InstructionCost (1 , 0.5 ),
115
- :isequal => InstructionCost (1 , 0.5 ),
116
- :(~ ) => InstructionCost (1 , 0.5 ),
117
- :(& ) => InstructionCost (1 , 0.5 ),
118
- :(| ) => InstructionCost (1 , 0.5 ),
119
- :(> ) => InstructionCost (1 , 0.5 ),
120
- :(< ) => InstructionCost (1 , 0.5 ),
121
- :(>= ) => InstructionCost (1 , 0.5 ),
122
- :(<= ) => InstructionCost (1 , 0.5 ),
123
- :ifelse => InstructionCost (1 , 0.5 ),
124
- :vifelse => InstructionCost (1 , 0.5 ),
125
- :inv => InstructionCost (13 ,4.0 ,- 2.0 ,1 ),
126
- :vinv => InstructionCost (13 ,4.0 ,- 2.0 ,1 ),
127
- :muladd => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
128
- :fma => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
129
- :vmuladd => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
130
- :vfma => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
131
- :vfmadd => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
132
- :vfmsub => InstructionCost (4 ,0.5 ), # - and * will fuse into this, so much of the time they're not twice as expensive
133
- :vfnmadd => InstructionCost (4 ,0.5 ), # + and -* will fuse into this, so much of the time they're not twice as expensive
134
- :vfnmsub => InstructionCost (4 ,0.5 ), # - and -* will fuse into this, so much of the time they're not twice as expensive
135
- :vfmadd_fast => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
136
- :vfmsub_fast => InstructionCost (4 ,0.5 ), # - and * will fuse into this, so much of the time they're not twice as expensive
137
- :vfnmadd_fast => InstructionCost (4 ,0.5 ), # + and -* will fuse into this, so much of the time they're not twice as expensive
138
- :vfnmsub_fast => InstructionCost (4 ,0.5 ), # - and -* will fuse into this, so much of the time they're not twice as expensive
139
- :sqrt => InstructionCost (15 ,4.0 ,- 2.0 ),
140
- :sqrt_fast => InstructionCost (15 ,4.0 ,- 2.0 ),
141
- :log => InstructionCost (20 ,20.0 ,40.0 ,20 ),
142
- :exp => InstructionCost (20 ,20.0 ,20.0 ,18 ),
143
- : ^ => InstructionCost (40 ,40.0 ,40.0 ,26 ), # FIXME
144
- :sin => InstructionCost (18 ,15.0 ,68.0 ,23 ),
145
- :cos => InstructionCost (18 ,15.0 ,68.0 ,26 ),
146
- :sincos => InstructionCost (25 ,22.0 ,70.0 ,26 ),
147
- :log_fast => InstructionCost (20 ,20.0 ,40.0 ,20 ),
148
- :exp_fast => InstructionCost (20 ,20.0 ,20.0 ,18 ),
149
- :sin_fast => InstructionCost (18 ,15.0 ,68.0 ,23 ),
150
- :cos_fast => InstructionCost (18 ,15.0 ,68.0 ,26 ),
151
- :sincos_fast => InstructionCost (25 ,22.0 ,70.0 ,26 ),
152
- :identity => InstructionCost (0 ,0.0 ,0.0 ,0 ),
153
- :adjoint => InstructionCost (0 ,0.0 ,0.0 ,0 ),
154
- :transpose => InstructionCost (0 ,0.0 ,0.0 ,0 ),
92
+ const COST = Dict {Instruction ,InstructionCost} (
93
+ Instruction ( :getindex ) => InstructionCost (- 3.0 ,0.5 ,3 ,1 ),
94
+ Instruction ( :setindex! ) => InstructionCost (- 3.0 ,1.0 ,3 ,0 ),
95
+ Instruction ( :conditionalstore! ) => InstructionCost (- 3.0 ,1.0 ,3 ,0 ),
96
+ Instruction ( :zero ) => InstructionCost (1 ,0.5 ),
97
+ Instruction ( :one ) => InstructionCost (3 ,0.5 ),
98
+ Instruction ( :(+ ) ) => InstructionCost (4 ,0.5 ),
99
+ Instruction ( :(- ) ) => InstructionCost (4 ,0.5 ),
100
+ Instruction ( :(* ) ) => InstructionCost (4 ,0.5 ),
101
+ Instruction ( :(/ ) ) => InstructionCost (13 ,4.0 ,- 2.0 ),
102
+ Instruction ( :vadd ) => InstructionCost (4 ,0.5 ),
103
+ Instruction ( :vsub ) => InstructionCost (4 ,0.5 ),
104
+ Instruction ( :vmul ) => InstructionCost (4 ,0.5 ),
105
+ Instruction ( :vfdiv ) => InstructionCost (13 ,4.0 ,- 2.0 ),
106
+ Instruction ( :evadd ) => InstructionCost (4 ,0.5 ),
107
+ Instruction ( :evsub ) => InstructionCost (4 ,0.5 ),
108
+ Instruction ( :evmul ) => InstructionCost (4 ,0.5 ),
109
+ Instruction ( :evfdiv ) => InstructionCost (13 ,4.0 ,- 2.0 ),
110
+ Instruction ( :reduced_add ) => InstructionCost (4 ,0.5 ),# ignoring reduction part of cost, might be nop
111
+ Instruction ( :reduced_prod ) => InstructionCost (4 ,0.5 ),# ignoring reduction part of cost, might be nop
112
+ Instruction ( :reduce_to_add ) => InstructionCost (0 ,0.0 ,0.0 ,0 ),
113
+ Instruction ( :reduce_to_prod ) => InstructionCost (0 ,0.0 ,0.0 ,0 ),
114
+ Instruction ( :abs2 ) => InstructionCost (4 ,0.5 ),
115
+ Instruction ( :vabs2 ) => InstructionCost (4 ,0.5 ),
116
+ Instruction ( :(== ) ) => InstructionCost (1 , 0.5 ),
117
+ Instruction ( :isequal ) => InstructionCost (1 , 0.5 ),
118
+ Instruction ( :(~ ) ) => InstructionCost (1 , 0.5 ),
119
+ Instruction ( :(& ) ) => InstructionCost (1 , 0.5 ),
120
+ Instruction ( :(| ) ) => InstructionCost (1 , 0.5 ),
121
+ Instruction ( :(> ) ) => InstructionCost (1 , 0.5 ),
122
+ Instruction ( :(< ) ) => InstructionCost (1 , 0.5 ),
123
+ Instruction ( :(>= ) ) => InstructionCost (1 , 0.5 ),
124
+ Instruction ( :(<= ) ) => InstructionCost (1 , 0.5 ),
125
+ Instruction ( :ifelse ) => InstructionCost (1 , 0.5 ),
126
+ Instruction ( :vifelse ) => InstructionCost (1 , 0.5 ),
127
+ Instruction ( :inv ) => InstructionCost (13 ,4.0 ,- 2.0 ,1 ),
128
+ Instruction ( :vinv ) => InstructionCost (13 ,4.0 ,- 2.0 ,1 ),
129
+ Instruction ( :muladd ) => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
130
+ Instruction ( :fma ) => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
131
+ Instruction ( :vmuladd ) => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
132
+ Instruction ( :vfma ) => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
133
+ Instruction ( :vfmadd ) => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
134
+ Instruction ( :vfmsub ) => InstructionCost (4 ,0.5 ), # - and * will fuse into this, so much of the time they're not twice as expensive
135
+ Instruction ( :vfnmadd ) => InstructionCost (4 ,0.5 ), # + and -* will fuse into this, so much of the time they're not twice as expensive
136
+ Instruction ( :vfnmsub ) => InstructionCost (4 ,0.5 ), # - and -* will fuse into this, so much of the time they're not twice as expensive
137
+ Instruction ( :vfmadd_fast ) => InstructionCost (4 ,0.5 ), # + and * will fuse into this, so much of the time they're not twice as expensive
138
+ Instruction ( :vfmsub_fast ) => InstructionCost (4 ,0.5 ), # - and * will fuse into this, so much of the time they're not twice as expensive
139
+ Instruction ( :vfnmadd_fast ) => InstructionCost (4 ,0.5 ), # + and -* will fuse into this, so much of the time they're not twice as expensive
140
+ Instruction ( :vfnmsub_fast ) => InstructionCost (4 ,0.5 ), # - and -* will fuse into this, so much of the time they're not twice as expensive
141
+ Instruction ( :sqrt ) => InstructionCost (15 ,4.0 ,- 2.0 ),
142
+ Instruction ( :sqrt_fast ) => InstructionCost (15 ,4.0 ,- 2.0 ),
143
+ Instruction ( :log ) => InstructionCost (20 ,20.0 ,40.0 ,20 ),
144
+ Instruction ( :exp ) => InstructionCost (20 ,20.0 ,20.0 ,18 ),
145
+ Instruction (:( ^ )) => InstructionCost (40 ,40.0 ,40.0 ,26 ), # FIXME
146
+ Instruction ( :sin ) => InstructionCost (18 ,15.0 ,68.0 ,23 ),
147
+ Instruction ( :cos ) => InstructionCost (18 ,15.0 ,68.0 ,26 ),
148
+ Instruction ( :sincos ) => InstructionCost (25 ,22.0 ,70.0 ,26 ),
149
+ Instruction ( :log_fast ) => InstructionCost (20 ,20.0 ,40.0 ,20 ),
150
+ Instruction ( :exp_fast ) => InstructionCost (20 ,20.0 ,20.0 ,18 ),
151
+ Instruction ( :sin_fast ) => InstructionCost (18 ,15.0 ,68.0 ,23 ),
152
+ Instruction ( :cos_fast ) => InstructionCost (18 ,15.0 ,68.0 ,26 ),
153
+ Instruction ( :sincos_fast ) => InstructionCost (25 ,22.0 ,70.0 ,26 ),
154
+ Instruction ( :identity ) => InstructionCost (0 ,0.0 ,0.0 ,0 ),
155
+ Instruction ( :adjoint ) => InstructionCost (0 ,0.0 ,0.0 ,0 ),
156
+ Instruction ( :transpose ) => InstructionCost (0 ,0.0 ,0.0 ,0 ),
155
157
# Symbol("##CONSTANT##") => InstructionCost(0,0.0)
156
158
)
157
159
158
- const KNOWNINSTRUCTIONS = keys (COST)
159
- instruction (f, m) = f ∈ KNOWNINSTRUCTIONS ? Instruction (:LoopVectorization , f) : Instruction (m, f)
160
+ # const KNOWNINSTRUCTIONS = keys(COST)
161
+ # instruction(f, m) = f ∈ KNOWNINSTRUCTIONS ? Instruction(:LoopVectorization, f) : Instruction(m, f)
162
+ instruction (f:: Symbol , m) = Instruction (f) ∈ keys (COST) ? Instruction (f) : Instruction (m, f)
163
+ # instruction(f, m) = get(COST, f, Instruction(m, f))
160
164
161
165
# for (k, v) ∈ COST # so we can look up Symbol(typeof(function))
162
166
# COST[Symbol("typeof(", lower(k), ")")] = v
0 commit comments