Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
.file "bilateralFilterKernel.c"
.section .rodata.cst8,"aM",@progbits,8
.p2align 3, 0x0 # -- Begin function bilateralFilterKernel
.LCPI0_0:
.dword 0x4072000000000000 # double 288
.LCPI0_1:
.dword 0x408c463abeccb2bb # double 904.77868423386042
.LCPI0_2:
.dword 0xbf60000000000000 # double -0.001953125
.LCPI0_3:
.dword 0x409921fb54442d18 # double 1608.4954386379741
.text
.globl bilateralFilterKernel
.globl bilateralFilterKernel # -- Begin function bilateralFilterKernel
.p2align 5
.type bilateralFilterKernel,@function
bilateralFilterKernel: # @bilateralFilterKernel
Expand Down Expand Up @@ -38,32 +28,40 @@ bilateralFilterKernel: # @bilateralFilterKernel
move $s8, $a4
bstrpick.d $a4, $a4, 31, 31
add.w $a4, $s8, $a4
srai.d $s5, $a4, 1
sub.w $a0, $a0, $s5
srai.d $s0, $a4, 1
sub.w $a0, $a0, $s0
st.d $a3, $sp, 16 # 8-byte Folded Spill
st.d $a2, $sp, 32 # 8-byte Folded Spill
st.d $a0, $sp, 24 # 8-byte Folded Spill
bge $s5, $a0, .LBB0_11
bge $s0, $a0, .LBB0_11
# %bb.1: # %.preheader77.lr.ph
sub.w $a0, $a1, $s5
sub.w $a0, $a1, $s0
st.d $a0, $sp, 56 # 8-byte Folded Spill
bge $s5, $a0, .LBB0_11
bge $s0, $a0, .LBB0_11
# %bb.2: # %.preheader77.lr.ph
blez $s8, .LBB0_11
# %bb.3: # %.preheader77.us.us.preheader
pcalau12i $a0, %pc_hi20(.LCPI0_0)
fld.d $fs0, $a0, %pc_lo12(.LCPI0_0)
pcalau12i $a0, %pc_hi20(.LCPI0_1)
fld.d $fs1, $a0, %pc_lo12(.LCPI0_1)
pcalau12i $a0, %pc_hi20(.LCPI0_2)
fld.d $fs2, $a0, %pc_lo12(.LCPI0_2)
pcalau12i $a0, %pc_hi20(.LCPI0_3)
fld.d $fs3, $a0, %pc_lo12(.LCPI0_3)
bstrpick.d $a0, $a1, 31, 0
st.d $a0, $sp, 8 # 8-byte Folded Spill
movgr2fr.d $fs4, $zero
movgr2fr.d $fs0, $zero
ori $a0, $zero, 0
lu32i.d $a0, 131072
lu52i.d $a0, $a0, 1031
movgr2fr.d $fs1, $a0
lu12i.w $a0, -267061
ori $a0, $a0, 699
lu32i.d $a0, -244166
lu52i.d $a0, $a0, 1032
movgr2fr.d $fs2, $a0
lu52i.d $a0, $zero, -1034
movgr2fr.d $fs3, $a0
lu12i.w $a0, 345154
ori $a0, $a0, 3352
lu32i.d $a0, -450053
lu52i.d $a0, $a0, 1033
movgr2fr.d $fs4, $a0
ld.d $s7, $sp, 32 # 8-byte Folded Reload
move $a3, $s5
move $a3, $s0
.p2align 4, , 16
.LBB0_4: # %.preheader77.us.us
# =>This Loop Header: Depth=1
Expand All @@ -81,7 +79,7 @@ bilateralFilterKernel: # @bilateralFilterKernel
alsl.d $a0, $a0, $a1, 2
st.d $a0, $sp, 64 # 8-byte Folded Spill
st.d $s7, $sp, 48 # 8-byte Folded Spill
move $s0, $s5
move $s5, $s0
.p2align 4, , 16
.LBB0_5: # %.preheader76.us.us.us
# Parent Loop BB0_4 Depth=1
Expand All @@ -90,21 +88,21 @@ bilateralFilterKernel: # @bilateralFilterKernel
# Child Loop BB0_7 Depth 4
move $s3, $zero
ld.d $a0, $sp, 72 # 8-byte Folded Reload
alsl.d $s1, $s0, $a0, 2
alsl.d $s1, $s5, $a0, 2
st.d $s7, $sp, 80 # 8-byte Folded Spill
fmov.d $fs5, $fs4
fmov.d $fs6, $fs4
fmov.d $fs5, $fs0
fmov.d $fs6, $fs0
.p2align 4, , 16
.LBB0_6: # %.preheader.us.us.us.us
# Parent Loop BB0_4 Depth=1
# Parent Loop BB0_5 Depth=2
# => This Loop Header: Depth=3
# Child Loop BB0_7 Depth 4
sub.d $a0, $s5, $s3
sub.d $a0, $s0, $s3
mul.d $s4, $a0, $a0
move $fp, $s8
move $s6, $s7
move $s2, $s5
move $s2, $s0
.p2align 4, , 16
.LBB0_7: # Parent Loop BB0_4 Depth=1
# Parent Loop BB0_5 Depth=2
Expand All @@ -117,10 +115,10 @@ bilateralFilterKernel: # @bilateralFilterKernel
add.d $a0, $a1, $a0
movgr2fr.w $fa0, $a0
ffint.d.w $fa0, $fa0
fdiv.d $fa0, $fa0, $fs0
fdiv.d $fa0, $fa0, $fs1
pcaddu18i $ra, %call36(exp)
jirl $ra, $ra, 0
fdiv.d $fs7, $fa0, $fs1
fdiv.d $fs7, $fa0, $fs2
mul.d $a0, $s2, $s2
add.d $a0, $a0, $s4
bstrpick.d $a0, $a0, 31, 0
Expand All @@ -130,11 +128,11 @@ bilateralFilterKernel: # @bilateralFilterKernel
fcvt.s.d $fa0, $fa0
fmul.s $fa0, $fa0, $fa0
fcvt.d.s $fa0, $fa0
fmul.d $fa0, $fa0, $fs2
fmul.d $fa0, $fa0, $fs3
pcaddu18i $ra, %call36(exp)
jirl $ra, $ra, 0
ld.w $a0, $s6, 0
fdiv.d $fa0, $fa0, $fs3
fdiv.d $fa0, $fa0, $fs4
fmul.d $fa0, $fs7, $fa0
movgr2fr.w $fa1, $a0
ffint.d.w $fa1, $fa1
Expand All @@ -155,14 +153,14 @@ bilateralFilterKernel: # @bilateralFilterKernel
fdiv.d $fa0, $fs6, $fs5
ftintrz.w.d $fa0, $fa0
movfr2gr.s $a0, $fa0
slli.d $a1, $s0, 2
slli.d $a1, $s5, 2
ld.d $a2, $sp, 64 # 8-byte Folded Reload
stx.w $a0, $a2, $a1
addi.d $s0, $s0, 1
addi.d $s5, $s5, 1
ld.d $s7, $sp, 80 # 8-byte Folded Reload
addi.d $s7, $s7, 4
ld.d $a0, $sp, 56 # 8-byte Folded Reload
bne $s0, $a0, .LBB0_5
bne $s5, $a0, .LBB0_5
# %bb.10: # %._crit_edge.split.us.us.us
# in Loop: Header=BB0_4 Depth=1
ld.d $a3, $sp, 40 # 8-byte Folded Reload
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
.file "gaussianBlurKernel.c"
.section .rodata.cst4,"aM",@progbits,4
.p2align 2, 0x0 # -- Begin function gaussianBlurKernel
.LCPI0_0:
.word 0x43220000 # float 162
.section .rodata.cst8,"aM",@progbits,8
.p2align 3, 0x0
.LCPI0_1:
.dword 0x407fcf0216a64912 # double 508.93800988154646
.text
.globl gaussianBlurKernel
.globl gaussianBlurKernel # -- Begin function gaussianBlurKernel
.p2align 5
.type gaussianBlurKernel,@function
gaussianBlurKernel: # @gaussianBlurKernel
Expand Down Expand Up @@ -44,11 +36,14 @@ gaussianBlurKernel: # @gaussianBlurKernel
movgr2fr.w $fs1, $zero
addi.w $s3, $zero, -4
addi.d $s4, $sp, 40
pcalau12i $a0, %pc_hi20(.LCPI0_0)
fld.s $fs4, $a0, %pc_lo12(.LCPI0_0)
pcalau12i $a0, %pc_hi20(.LCPI0_1)
fld.d $fs5, $a0, %pc_lo12(.LCPI0_1)
addi.w $s5, $zero, -16
lu12i.w $a0, 274976
movgr2fr.w $fs4, $a0
lu12i.w $a0, 92772
ori $a0, $a0, 2322
lu32i.d $a0, -12542
lu52i.d $a0, $a0, 1031
movgr2fr.d $fs5, $a0
addi.w $s6, $zero, -9
ori $s7, $zero, 5
move $s8, $s3
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
.file "orderedDitherKernel.c"
.section .rodata.cst8,"aM",@progbits,8
.p2align 3, 0x0 # -- Begin function orderedDitherKernel
.LCPI0_0:
.dword 0x406fe00000000000 # double 255
.section .rodata.cst16,"aM",@progbits,16
.p2align 4, 0x0
.LCPI0_1:
.p2align 4, 0x0 # -- Begin function orderedDitherKernel
.LCPI0_0:
.dword 2 # 0x2
.dword 3 # 0x3
.LCPI0_2:
.LCPI0_1:
.dword 0 # 0x0
.dword 1 # 0x1
.LCPI0_3:
.LCPI0_2:
.word 0 # 0x0
.word 1 # 0x1
.word 2 # 0x2
Expand Down Expand Up @@ -46,11 +42,10 @@ orderedDitherKernel: # @orderedDitherKernel
sltui $t4, $a1, 4
or $t3, $t4, $t3
andi $t3, $t3, 1
pcalau12i $t4, %pc_hi20(.LCPI0_0)
fld.d $fa0, $t4, %pc_lo12(.LCPI0_0)
ori $t4, $zero, 0
lu32i.d $t4, -8192
lu52i.d $t4, $t4, 1030
movgr2fr.d $fa0, $t4
vreplgr2vr.d $vr1, $t4
move $t4, $a4
b .LBB0_4
Expand Down Expand Up @@ -346,8 +341,8 @@ orderedDitherKernel: # @orderedDitherKernel
move $t0, $zero
ori $t1, $zero, 4
ori $t2, $zero, 255
pcalau12i $t3, %pc_hi20(.LCPI0_3)
vld $vr0, $t3, %pc_lo12(.LCPI0_3)
pcalau12i $t3, %pc_hi20(.LCPI0_2)
vld $vr0, $t3, %pc_lo12(.LCPI0_2)
vreplgr2vr.w $vr1, $a5
lu32i.d $a5, 0
vrepli.w $vr2, 3
Expand Down Expand Up @@ -455,10 +450,10 @@ orderedDitherKernel: # @orderedDitherKernel
addi.d $a5, $a5, %pc_lo12(.L__const.orderedDitherKernel.dither.3)
move $a6, $zero
addi.w $t0, $zero, -5
pcalau12i $t1, %pc_hi20(.LCPI0_0)
vld $vr0, $t1, %pc_lo12(.LCPI0_0)
pcalau12i $t1, %pc_hi20(.LCPI0_1)
vld $vr0, $t1, %pc_lo12(.LCPI0_1)
pcalau12i $t1, %pc_hi20(.LCPI0_2)
vld $vr1, $t1, %pc_lo12(.LCPI0_2)
vld $vr1, $t1, %pc_lo12(.LCPI0_1)
ori $t1, $zero, 255
vrepli.d $vr2, 7
vrepli.w $vr3, 255
Expand Down
Loading