-
Notifications
You must be signed in to change notification settings - Fork 196
Open
Description
For the expression - y(i) = A(i,j) * x(j) + z(i), the documentation suggests z(i) is added exactly one for each i outside the j loop. This is consistent with the result from taco "y(i) = A(i,j) * x(j) + z(i)" -f=y:d -f=A:dd -f=x:d -f=z:d -
int compute(taco_tensor_t *y, taco_tensor_t *A, taco_tensor_t *x, taco_tensor_t *z) {
int y1_dimension = (int)(y->dimensions[0]);
double* restrict y_vals = (double*)(y->vals);
int A1_dimension = (int)(A->dimensions[0]);
int A2_dimension = (int)(A->dimensions[1]);
double* restrict A_vals = (double*)(A->vals);
int x1_dimension = (int)(x->dimensions[0]);
double* restrict x_vals = (double*)(x->vals);
int z1_dimension = (int)(z->dimensions[0]);
double* restrict z_vals = (double*)(z->vals);
#pragma omp parallel for schedule(runtime)
for (int32_t i = 0; i < z1_dimension; i++) {
double tj_val = 0.0;
for (int32_t j = 0; j < x1_dimension; j++) {
int32_t jA = i * A2_dimension + j;
tj_val += A_vals[jA] * x_vals[j];
}
y_vals[i] = tj_val + z_vals[i];
}
return 0;
}
However adding any more terms pushes z(i) inside the loop. For example - taco "y(i) = A(i,j) * x(j) + z(i) + 0 " -f=y:d -f=A:dd -f=x:d -f=z:d generates -
int compute(taco_tensor_t *y, taco_tensor_t *A, taco_tensor_t *x, taco_tensor_t *z) {
int y1_dimension = (int)(y->dimensions[0]);
double* restrict y_vals = (double*)(y->vals);
int A1_dimension = (int)(A->dimensions[0]);
int A2_dimension = (int)(A->dimensions[1]);
double* restrict A_vals = (double*)(A->vals);
int x1_dimension = (int)(x->dimensions[0]);
double* restrict x_vals = (double*)(x->vals);
int z1_dimension = (int)(z->dimensions[0]);
double* restrict z_vals = (double*)(z->vals);
#pragma omp parallel for schedule(runtime)
for (int32_t i = 0; i < z1_dimension; i++) {
double tj_val = 0.0;
for (int32_t j = 0; j < x1_dimension; j++) {
int32_t jA = i * A2_dimension + j;
tj_val += A_vals[jA] * x_vals[j] + z_vals[i];
}
y_vals[i] = tj_val;
}
return 0;
}
Similar behavior is seen when z(i) is simply added twice.
Metadata
Metadata
Assignees
Labels
No labels