[AVX2] SAD pattern detection is too strict

Reference code: [Zig Godbolt](https://zig.godbolt.org/z/c5ezdxojG)

Some opportunities for producing optimized sum of absolute differences (SAD) calculations are being missed. It looks like [prior support for this was overly restrictive](https://github.com/llvm/llvm-project/commit/6f879d9eb1a111a0c99f2a69e4ad30b220f4926a).

Essentially, the absolute difference is being calculated, when it should just be handled by the dedicated SAD instruction.

Here's the code inline:

```zig
const block_width = 8;
const T = u8;
const VT = @Vector(block_width, T);

export fn sad(noalias srcp: [*]const u8, noalias refp: [*]const u8, height: usize, stride: usize) u32 {
    const src = srcp[0..height * stride];
    const ref = refp[0..height * stride];
    
    var sum: u32 = 0;

    const s: VT = src[0*stride..][0..block_width].*;
    const r: VT = ref[0*stride..][0..block_width].*;

    // Should work, but doesn't.
    const absdiff = @max(s,r) - @min(s,r);
    sum += @reduce(.Add, absdiff);
        
    // Should work, but doesn't
    //const VTI = @Vector(block_width, i16);
    //sum += @reduce(.Add, @abs(@as(VTI, s) - @as(VTI, r)));

    // Does work
    //const VTI = @Vector(block_width, i32);
    //sum += @reduce(.Add, @abs(@as(VTI, s) - @as(VTI, r)));
    
    return sum;
}
```

Which produces:

```asm
sad:
        push    rbp
        mov     rbp, rsp
        vmovq   xmm0, qword ptr [rdi]
        vmovq   xmm1, qword ptr [rsi]
        vpminub xmm2, xmm0, xmm1
        vpmaxub xmm0, xmm0, xmm1
        vpxor   xmm1, xmm1, xmm1
        vpsubb  xmm0, xmm0, xmm2
        vpsadbw xmm0, xmm0, xmm1
        vpextrb eax, xmm0, 0
        pop     rbp
        ret
```

But it should be:

```asm
sad:
        push    rbp
        mov     rbp, rsp
        vmovq   xmm0, qword ptr [rdi]
        vmovq   xmm1, qword ptr [rsi]
        vpsadbw xmm0, xmm0, xmm1
        vmovd   eax, xmm0
        pop     rbp
        ret
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AVX2] SAD pattern detection is too strict #143456

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

[AVX2] SAD pattern detection is too strict #143456

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions