Skip to content

Commit 02ea4a2

Browse files
committed
Performance optimizations
1 parent ac5141d commit 02ea4a2

File tree

2 files changed

+147
-96
lines changed

2 files changed

+147
-96
lines changed

buffer.go

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -221,10 +221,6 @@ func (b *Buffer) has(count int) bool {
221221
}
222222

223223
func (b *Buffer) read(count int) int {
224-
if !b.has(count) {
225-
return 0
226-
}
227-
228224
value := 0
229225
for count != 0 {
230226
currentByte := int(b.Bytes()[b.bitIndex>>3])
@@ -248,10 +244,6 @@ func (b *Buffer) read(count int) int {
248244
}
249245

250246
func (b *Buffer) read1() int {
251-
if !b.has(1) {
252-
return 0
253-
}
254-
255247
currentByte := int(b.Bytes()[b.bitIndex>>3])
256248

257249
shift := 7 - (b.bitIndex & 7)
@@ -287,8 +279,9 @@ func (b *Buffer) skipBytes(v byte) int {
287279
func (b *Buffer) nextStartCode() int {
288280
b.align()
289281

290-
for b.has(5 << 3) {
291-
data := b.Bytes()
282+
retry:
283+
for ((len(b.bytes) << 3) - b.bitIndex) >= (5 << 3) {
284+
data := b.bytes
292285
byteIndex := b.bitIndex >> 3
293286
if data[byteIndex] == 0x00 &&
294287
data[byteIndex+1] == 0x00 &&
@@ -301,6 +294,10 @@ func (b *Buffer) nextStartCode() int {
301294
b.bitIndex += 8
302295
}
303296

297+
if b.has(5 << 3) {
298+
goto retry
299+
}
300+
304301
return -1
305302
}
306303

@@ -329,7 +326,7 @@ func (b *Buffer) hasStartCode(code int) int {
329326
func (b *Buffer) findFrameSync() bool {
330327
var i int
331328
for i = b.bitIndex >> 3; i < len(b.bytes)-1; i++ {
332-
if b.Bytes()[i] == 0xFF && (b.Bytes()[i+1]&0xFE) == 0xFC {
329+
if b.bytes[i] == 0xFF && (b.bytes[i+1]&0xFE) == 0xFC {
333330
b.bitIndex = ((i + 1) << 3) + 3
334331

335332
return true

video.go

Lines changed: 139 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,95 +1039,153 @@ func (v *Video) decodeBlock(block int) {
10391039
// Overwrite (no prediction)
10401040
if n == 1 {
10411041
value := (s[0] + 128) >> 8
1042-
copyValueToDest(int(clamp(value)), d, di, scan)
1042+
copyValueToDest(clamp(value), d, di, scan)
10431043
s[0] = 0
10441044
} else {
1045-
v.idct(s)
1045+
v.idct(s, n)
10461046
copyBlockToDest(s, d, di, scan)
1047-
for i := range v.blockData {
1048-
v.blockData[i] = 0
1049-
}
1047+
clear(v.blockData)
10501048
}
10511049
} else {
10521050
// Add data to the predicted macroblock
10531051
if n == 1 {
10541052
value := (s[0] + 128) >> 8
1055-
addValueToDest(value, d, di, scan)
1053+
addValueToDest(byte(value), d, di, scan)
10561054
s[0] = 0
10571055
} else {
1058-
v.idct(s)
1056+
v.idct(s, n)
10591057
addBlockToDest(s, d, di, scan)
1060-
for i := range v.blockData {
1061-
v.blockData[i] = 0
1062-
}
1058+
clear(v.blockData)
10631059
}
10641060
}
10651061
}
10661062

1067-
func (v *Video) idct(block []int) {
1063+
func (v *Video) idct(block []int, maxIndex int) {
10681064
// See http://vsr.informatik.tu-chemnitz.de/~jan/MPEG/HTML/IDCT.html for more info.
10691065

10701066
var b1, b3, b4, b6, b7, tmp1, tmp2, m0,
10711067
x0, x1, x2, x3, x4, y3, y4, y5, y6, y7 int
10721068

1073-
// Transform columns
1074-
for i := 0; i < 8; i++ {
1075-
b1 = block[4*8+i]
1076-
b3 = block[2*8+i] + block[6*8+i]
1077-
b4 = block[5*8+i] - block[3*8+i]
1078-
tmp1 = block[1*8+i] + block[7*8+i]
1079-
tmp2 = block[3*8+i] + block[5*8+i]
1080-
b6 = block[1*8+i] - block[7*8+i]
1081-
b7 = tmp1 + tmp2
1082-
m0 = block[0*8+i]
1083-
x4 = ((b6*473 - b4*196 + 128) >> 8) - b7
1084-
x0 = x4 - (((tmp1-tmp2)*362 + 128) >> 8)
1085-
x1 = m0 - b1
1086-
x2 = (((block[2*8+i]-block[6*8+i])*362 + 128) >> 8) - b3
1087-
x3 = m0 + b1
1088-
y3 = x1 + x2
1089-
y4 = x3 + b3
1090-
y5 = x1 - x2
1091-
y6 = x3 - b3
1092-
y7 = -x0 - ((b4*473 + b6*196 + 128) >> 8)
1093-
block[0*8+i] = b7 + y4
1094-
block[1*8+i] = x4 + y3
1095-
block[2*8+i] = y5 - x0
1096-
block[3*8+i] = y6 - y7
1097-
block[4*8+i] = y6 + y7
1098-
block[5*8+i] = x0 + y5
1099-
block[6*8+i] = y3 - x4
1100-
block[7*8+i] = y4 - b7
1101-
}
1069+
if maxIndex < 10 { // much simpler calculations when the matrix is mostly empty
1070+
// max column is 4th and max row is 4th (at least 3/4 of the matrix is empty)
1071+
for i := 0; i < 4; i++ { // only need to do 4 columns because the rest result in all 0'sAdd commentMore actions
1072+
b1 = 0
1073+
b3 = block[2*8+i]
1074+
b4 = 0 - block[3*8+i]
1075+
tmp1 = block[1*8+i]
1076+
tmp2 = block[3*8+i]
1077+
b6 = block[1*8+i]
1078+
b7 = tmp1 + tmp2
1079+
m0 = block[0*8+i]
1080+
x4 = ((b6*473 - b4*196 + 128) >> 8) - b7
1081+
x0 = x4 - (((tmp1-tmp2)*362 + 128) >> 8)
1082+
x1 = m0 - b1
1083+
x2 = (((block[2*8+i])*362 + 128) >> 8) - b3
1084+
x3 = m0 + b1
1085+
y3 = x1 + x2
1086+
y4 = x3 + b3
1087+
y5 = x1 - x2
1088+
y6 = x3 - b3
1089+
y7 = -x0 - ((b4*473 + b6*196 + 128) >> 8)
1090+
block[0*8+i] = b7 + y4
1091+
block[1*8+i] = x4 + y3
1092+
block[2*8+i] = y5 - x0
1093+
block[3*8+i] = y6 - y7
1094+
block[4*8+i] = y6 + y7
1095+
block[5*8+i] = x0 + y5
1096+
block[6*8+i] = y3 - x4
1097+
block[7*8+i] = y4 - b7
1098+
}
1099+
1100+
// Transform rows
1101+
for i := 0; i < 64; i += 8 {
1102+
b1 = 0
1103+
b3 = block[2+i]
1104+
b4 = 0 - block[3+i]
1105+
tmp1 = block[1+i]
1106+
tmp2 = block[3+i]
1107+
b6 = block[1+i]
1108+
b7 = tmp1 + tmp2
1109+
m0 = block[0+i]
1110+
x4 = ((b6*473 - b4*196 + 128) >> 8) - b7
1111+
x0 = x4 - (((tmp1-tmp2)*362 + 128) >> 8)
1112+
x1 = m0 - b1
1113+
x2 = (((block[2+i])*362 + 128) >> 8) - b3
1114+
x3 = m0 + b1
1115+
y3 = x1 + x2
1116+
y4 = x3 + b3
1117+
y5 = x1 - x2
1118+
y6 = x3 - b3
1119+
y7 = -x0 - ((b4*473 + b6*196 + 128) >> 8)
1120+
block[0+i] = (b7 + y4 + 128) >> 8
1121+
block[1+i] = (x4 + y3 + 128) >> 8
1122+
block[2+i] = (y5 - x0 + 128) >> 8
1123+
block[3+i] = (y6 - y7 + 128) >> 8
1124+
block[4+i] = (y6 + y7 + 128) >> 8
1125+
block[5+i] = (x0 + y5 + 128) >> 8
1126+
block[6+i] = (y3 - x4 + 128) >> 8
1127+
block[7+i] = (y4 - b7 + 128) >> 8
1128+
}
1129+
} else {
1130+
// Transform columns
1131+
for i := 0; i < 8; i++ {
1132+
b1 = block[4*8+i]
1133+
b3 = block[2*8+i] + block[6*8+i]
1134+
b4 = block[5*8+i] - block[3*8+i]
1135+
tmp1 = block[1*8+i] + block[7*8+i]
1136+
tmp2 = block[3*8+i] + block[5*8+i]
1137+
b6 = block[1*8+i] - block[7*8+i]
1138+
b7 = tmp1 + tmp2
1139+
m0 = block[0*8+i]
1140+
x4 = ((b6*473 - b4*196 + 128) >> 8) - b7
1141+
x0 = x4 - (((tmp1-tmp2)*362 + 128) >> 8)
1142+
x1 = m0 - b1
1143+
x2 = (((block[2*8+i]-block[6*8+i])*362 + 128) >> 8) - b3
1144+
x3 = m0 + b1
1145+
y3 = x1 + x2
1146+
y4 = x3 + b3
1147+
y5 = x1 - x2
1148+
y6 = x3 - b3
1149+
y7 = -x0 - ((b4*473 + b6*196 + 128) >> 8)
1150+
block[0*8+i] = b7 + y4
1151+
block[1*8+i] = x4 + y3
1152+
block[2*8+i] = y5 - x0
1153+
block[3*8+i] = y6 - y7
1154+
block[4*8+i] = y6 + y7
1155+
block[5*8+i] = x0 + y5
1156+
block[6*8+i] = y3 - x4
1157+
block[7*8+i] = y4 - b7
1158+
}
11021159

1103-
// Transform rows
1104-
for i := 0; i < 64; i += 8 {
1105-
b1 = block[4+i]
1106-
b3 = block[2+i] + block[6+i]
1107-
b4 = block[5+i] - block[3+i]
1108-
tmp1 = block[1+i] + block[7+i]
1109-
tmp2 = block[3+i] + block[5+i]
1110-
b6 = block[1+i] - block[7+i]
1111-
b7 = tmp1 + tmp2
1112-
m0 = block[0+i]
1113-
x4 = ((b6*473 - b4*196 + 128) >> 8) - b7
1114-
x0 = x4 - (((tmp1-tmp2)*362 + 128) >> 8)
1115-
x1 = m0 - b1
1116-
x2 = (((block[2+i]-block[6+i])*362 + 128) >> 8) - b3
1117-
x3 = m0 + b1
1118-
y3 = x1 + x2
1119-
y4 = x3 + b3
1120-
y5 = x1 - x2
1121-
y6 = x3 - b3
1122-
y7 = -x0 - ((b4*473 + b6*196 + 128) >> 8)
1123-
block[0+i] = (b7 + y4 + 128) >> 8
1124-
block[1+i] = (x4 + y3 + 128) >> 8
1125-
block[2+i] = (y5 - x0 + 128) >> 8
1126-
block[3+i] = (y6 - y7 + 128) >> 8
1127-
block[4+i] = (y6 + y7 + 128) >> 8
1128-
block[5+i] = (x0 + y5 + 128) >> 8
1129-
block[6+i] = (y3 - x4 + 128) >> 8
1130-
block[7+i] = (y4 - b7 + 128) >> 8
1160+
// Transform rows
1161+
for i := 0; i < 64; i += 8 {
1162+
b1 = block[4+i]
1163+
b3 = block[2+i] + block[6+i]
1164+
b4 = block[5+i] - block[3+i]
1165+
tmp1 = block[1+i] + block[7+i]
1166+
tmp2 = block[3+i] + block[5+i]
1167+
b6 = block[1+i] - block[7+i]
1168+
b7 = tmp1 + tmp2
1169+
m0 = block[0+i]
1170+
x4 = ((b6*473 - b4*196 + 128) >> 8) - b7
1171+
x0 = x4 - (((tmp1-tmp2)*362 + 128) >> 8)
1172+
x1 = m0 - b1
1173+
x2 = (((block[2+i]-block[6+i])*362 + 128) >> 8) - b3
1174+
x3 = m0 + b1
1175+
y3 = x1 + x2
1176+
y4 = x3 + b3
1177+
y5 = x1 - x2
1178+
y6 = x3 - b3
1179+
y7 = -x0 - ((b4*473 + b6*196 + 128) >> 8)
1180+
block[0+i] = (b7 + y4 + 128) >> 8
1181+
block[1+i] = (x4 + y3 + 128) >> 8
1182+
block[2+i] = (y5 - x0 + 128) >> 8
1183+
block[3+i] = (y6 - y7 + 128) >> 8
1184+
block[4+i] = (y6 + y7 + 128) >> 8
1185+
block[5+i] = (x0 + y5 + 128) >> 8
1186+
block[6+i] = (y3 - x4 + 128) >> 8
1187+
block[7+i] = (y4 - b7 + 128) >> 8
1188+
}
11311189
}
11321190
}
11331191

@@ -1174,7 +1232,7 @@ func addBlockToDest(block []int, dest []byte, index, scan int) {
11741232
}
11751233
}
11761234

1177-
func copyValueToDest(value int, dest []byte, index, scan int) {
1235+
func copyValueToDest(value byte, dest []byte, index, scan int) {
11781236
val := clamp(value)
11791237
for n := 0; n < 64; n += 8 {
11801238
dest[index+0] = val
@@ -1190,16 +1248,16 @@ func copyValueToDest(value int, dest []byte, index, scan int) {
11901248
}
11911249
}
11921250

1193-
func addValueToDest(value int, dest []byte, index, scan int) {
1251+
func addValueToDest(value byte, dest []byte, index, scan int) {
11941252
for n := 0; n < 64; n += 8 {
1195-
dest[index+0] = clamp(int(dest[index+0]) + value)
1196-
dest[index+1] = clamp(int(dest[index+1]) + value)
1197-
dest[index+2] = clamp(int(dest[index+2]) + value)
1198-
dest[index+3] = clamp(int(dest[index+3]) + value)
1199-
dest[index+4] = clamp(int(dest[index+4]) + value)
1200-
dest[index+5] = clamp(int(dest[index+5]) + value)
1201-
dest[index+6] = clamp(int(dest[index+6]) + value)
1202-
dest[index+7] = clamp(int(dest[index+7]) + value)
1253+
dest[index+0] = clamp(dest[index+0] + value)
1254+
dest[index+1] = clamp(dest[index+1] + value)
1255+
dest[index+2] = clamp(dest[index+2] + value)
1256+
dest[index+3] = clamp(dest[index+3] + value)
1257+
dest[index+4] = clamp(dest[index+4] + value)
1258+
dest[index+5] = clamp(dest[index+5] + value)
1259+
dest[index+6] = clamp(dest[index+6] + value)
1260+
dest[index+7] = clamp(dest[index+7] + value)
12031261

12041262
index += scan + 8
12051263
}
@@ -1213,14 +1271,10 @@ func abs(x int) int {
12131271
return x
12141272
}
12151273

1216-
func clamp(n int) byte {
1217-
if n > 255 {
1218-
n = 255
1219-
} else if n < 0 {
1220-
n = 0
1221-
}
1274+
type number interface{ int | uint8 }
12221275

1223-
return byte(n)
1276+
func clamp[T number](n T) byte {
1277+
return byte(min(max(n, 0), 255))
12241278
}
12251279

12261280
func startIsSlice(c int) bool {

0 commit comments

Comments
 (0)