@@ -1039,95 +1039,153 @@ func (v *Video) decodeBlock(block int) {
10391039 // Overwrite (no prediction)
10401040 if n == 1 {
10411041 value := (s [0 ] + 128 ) >> 8
1042- copyValueToDest (int ( clamp (value ) ), d , di , scan )
1042+ copyValueToDest (clamp (value ), d , di , scan )
10431043 s [0 ] = 0
10441044 } else {
1045- v .idct (s )
1045+ v .idct (s , n )
10461046 copyBlockToDest (s , d , di , scan )
1047- for i := range v .blockData {
1048- v .blockData [i ] = 0
1049- }
1047+ clear (v .blockData )
10501048 }
10511049 } else {
10521050 // Add data to the predicted macroblock
10531051 if n == 1 {
10541052 value := (s [0 ] + 128 ) >> 8
1055- addValueToDest (value , d , di , scan )
1053+ addValueToDest (byte ( value ) , d , di , scan )
10561054 s [0 ] = 0
10571055 } else {
1058- v .idct (s )
1056+ v .idct (s , n )
10591057 addBlockToDest (s , d , di , scan )
1060- for i := range v .blockData {
1061- v .blockData [i ] = 0
1062- }
1058+ clear (v .blockData )
10631059 }
10641060 }
10651061}
10661062
1067- func (v * Video ) idct (block []int ) {
1063+ func (v * Video ) idct (block []int , maxIndex int ) {
10681064 // See http://vsr.informatik.tu-chemnitz.de/~jan/MPEG/HTML/IDCT.html for more info.
10691065
10701066 var b1 , b3 , b4 , b6 , b7 , tmp1 , tmp2 , m0 ,
10711067 x0 , x1 , x2 , x3 , x4 , y3 , y4 , y5 , y6 , y7 int
10721068
1073- // Transform columns
1074- for i := 0 ; i < 8 ; i ++ {
1075- b1 = block [4 * 8 + i ]
1076- b3 = block [2 * 8 + i ] + block [6 * 8 + i ]
1077- b4 = block [5 * 8 + i ] - block [3 * 8 + i ]
1078- tmp1 = block [1 * 8 + i ] + block [7 * 8 + i ]
1079- tmp2 = block [3 * 8 + i ] + block [5 * 8 + i ]
1080- b6 = block [1 * 8 + i ] - block [7 * 8 + i ]
1081- b7 = tmp1 + tmp2
1082- m0 = block [0 * 8 + i ]
1083- x4 = ((b6 * 473 - b4 * 196 + 128 ) >> 8 ) - b7
1084- x0 = x4 - (((tmp1 - tmp2 )* 362 + 128 ) >> 8 )
1085- x1 = m0 - b1
1086- x2 = (((block [2 * 8 + i ]- block [6 * 8 + i ])* 362 + 128 ) >> 8 ) - b3
1087- x3 = m0 + b1
1088- y3 = x1 + x2
1089- y4 = x3 + b3
1090- y5 = x1 - x2
1091- y6 = x3 - b3
1092- y7 = - x0 - ((b4 * 473 + b6 * 196 + 128 ) >> 8 )
1093- block [0 * 8 + i ] = b7 + y4
1094- block [1 * 8 + i ] = x4 + y3
1095- block [2 * 8 + i ] = y5 - x0
1096- block [3 * 8 + i ] = y6 - y7
1097- block [4 * 8 + i ] = y6 + y7
1098- block [5 * 8 + i ] = x0 + y5
1099- block [6 * 8 + i ] = y3 - x4
1100- block [7 * 8 + i ] = y4 - b7
1101- }
1069+ if maxIndex < 10 { // much simpler calculations when the matrix is mostly empty
1070+ // max column is 4th and max row is 4th (at least 3/4 of the matrix is empty)
1071+ for i := 0 ; i < 4 ; i ++ { // only need to do 4 columns because the rest result in all 0'sAdd commentMore actions
1072+ b1 = 0
1073+ b3 = block [2 * 8 + i ]
1074+ b4 = 0 - block [3 * 8 + i ]
1075+ tmp1 = block [1 * 8 + i ]
1076+ tmp2 = block [3 * 8 + i ]
1077+ b6 = block [1 * 8 + i ]
1078+ b7 = tmp1 + tmp2
1079+ m0 = block [0 * 8 + i ]
1080+ x4 = ((b6 * 473 - b4 * 196 + 128 ) >> 8 ) - b7
1081+ x0 = x4 - (((tmp1 - tmp2 )* 362 + 128 ) >> 8 )
1082+ x1 = m0 - b1
1083+ x2 = (((block [2 * 8 + i ])* 362 + 128 ) >> 8 ) - b3
1084+ x3 = m0 + b1
1085+ y3 = x1 + x2
1086+ y4 = x3 + b3
1087+ y5 = x1 - x2
1088+ y6 = x3 - b3
1089+ y7 = - x0 - ((b4 * 473 + b6 * 196 + 128 ) >> 8 )
1090+ block [0 * 8 + i ] = b7 + y4
1091+ block [1 * 8 + i ] = x4 + y3
1092+ block [2 * 8 + i ] = y5 - x0
1093+ block [3 * 8 + i ] = y6 - y7
1094+ block [4 * 8 + i ] = y6 + y7
1095+ block [5 * 8 + i ] = x0 + y5
1096+ block [6 * 8 + i ] = y3 - x4
1097+ block [7 * 8 + i ] = y4 - b7
1098+ }
1099+
1100+ // Transform rows
1101+ for i := 0 ; i < 64 ; i += 8 {
1102+ b1 = 0
1103+ b3 = block [2 + i ]
1104+ b4 = 0 - block [3 + i ]
1105+ tmp1 = block [1 + i ]
1106+ tmp2 = block [3 + i ]
1107+ b6 = block [1 + i ]
1108+ b7 = tmp1 + tmp2
1109+ m0 = block [0 + i ]
1110+ x4 = ((b6 * 473 - b4 * 196 + 128 ) >> 8 ) - b7
1111+ x0 = x4 - (((tmp1 - tmp2 )* 362 + 128 ) >> 8 )
1112+ x1 = m0 - b1
1113+ x2 = (((block [2 + i ])* 362 + 128 ) >> 8 ) - b3
1114+ x3 = m0 + b1
1115+ y3 = x1 + x2
1116+ y4 = x3 + b3
1117+ y5 = x1 - x2
1118+ y6 = x3 - b3
1119+ y7 = - x0 - ((b4 * 473 + b6 * 196 + 128 ) >> 8 )
1120+ block [0 + i ] = (b7 + y4 + 128 ) >> 8
1121+ block [1 + i ] = (x4 + y3 + 128 ) >> 8
1122+ block [2 + i ] = (y5 - x0 + 128 ) >> 8
1123+ block [3 + i ] = (y6 - y7 + 128 ) >> 8
1124+ block [4 + i ] = (y6 + y7 + 128 ) >> 8
1125+ block [5 + i ] = (x0 + y5 + 128 ) >> 8
1126+ block [6 + i ] = (y3 - x4 + 128 ) >> 8
1127+ block [7 + i ] = (y4 - b7 + 128 ) >> 8
1128+ }
1129+ } else {
1130+ // Transform columns
1131+ for i := 0 ; i < 8 ; i ++ {
1132+ b1 = block [4 * 8 + i ]
1133+ b3 = block [2 * 8 + i ] + block [6 * 8 + i ]
1134+ b4 = block [5 * 8 + i ] - block [3 * 8 + i ]
1135+ tmp1 = block [1 * 8 + i ] + block [7 * 8 + i ]
1136+ tmp2 = block [3 * 8 + i ] + block [5 * 8 + i ]
1137+ b6 = block [1 * 8 + i ] - block [7 * 8 + i ]
1138+ b7 = tmp1 + tmp2
1139+ m0 = block [0 * 8 + i ]
1140+ x4 = ((b6 * 473 - b4 * 196 + 128 ) >> 8 ) - b7
1141+ x0 = x4 - (((tmp1 - tmp2 )* 362 + 128 ) >> 8 )
1142+ x1 = m0 - b1
1143+ x2 = (((block [2 * 8 + i ]- block [6 * 8 + i ])* 362 + 128 ) >> 8 ) - b3
1144+ x3 = m0 + b1
1145+ y3 = x1 + x2
1146+ y4 = x3 + b3
1147+ y5 = x1 - x2
1148+ y6 = x3 - b3
1149+ y7 = - x0 - ((b4 * 473 + b6 * 196 + 128 ) >> 8 )
1150+ block [0 * 8 + i ] = b7 + y4
1151+ block [1 * 8 + i ] = x4 + y3
1152+ block [2 * 8 + i ] = y5 - x0
1153+ block [3 * 8 + i ] = y6 - y7
1154+ block [4 * 8 + i ] = y6 + y7
1155+ block [5 * 8 + i ] = x0 + y5
1156+ block [6 * 8 + i ] = y3 - x4
1157+ block [7 * 8 + i ] = y4 - b7
1158+ }
11021159
1103- // Transform rows
1104- for i := 0 ; i < 64 ; i += 8 {
1105- b1 = block [4 + i ]
1106- b3 = block [2 + i ] + block [6 + i ]
1107- b4 = block [5 + i ] - block [3 + i ]
1108- tmp1 = block [1 + i ] + block [7 + i ]
1109- tmp2 = block [3 + i ] + block [5 + i ]
1110- b6 = block [1 + i ] - block [7 + i ]
1111- b7 = tmp1 + tmp2
1112- m0 = block [0 + i ]
1113- x4 = ((b6 * 473 - b4 * 196 + 128 ) >> 8 ) - b7
1114- x0 = x4 - (((tmp1 - tmp2 )* 362 + 128 ) >> 8 )
1115- x1 = m0 - b1
1116- x2 = (((block [2 + i ]- block [6 + i ])* 362 + 128 ) >> 8 ) - b3
1117- x3 = m0 + b1
1118- y3 = x1 + x2
1119- y4 = x3 + b3
1120- y5 = x1 - x2
1121- y6 = x3 - b3
1122- y7 = - x0 - ((b4 * 473 + b6 * 196 + 128 ) >> 8 )
1123- block [0 + i ] = (b7 + y4 + 128 ) >> 8
1124- block [1 + i ] = (x4 + y3 + 128 ) >> 8
1125- block [2 + i ] = (y5 - x0 + 128 ) >> 8
1126- block [3 + i ] = (y6 - y7 + 128 ) >> 8
1127- block [4 + i ] = (y6 + y7 + 128 ) >> 8
1128- block [5 + i ] = (x0 + y5 + 128 ) >> 8
1129- block [6 + i ] = (y3 - x4 + 128 ) >> 8
1130- block [7 + i ] = (y4 - b7 + 128 ) >> 8
1160+ // Transform rows
1161+ for i := 0 ; i < 64 ; i += 8 {
1162+ b1 = block [4 + i ]
1163+ b3 = block [2 + i ] + block [6 + i ]
1164+ b4 = block [5 + i ] - block [3 + i ]
1165+ tmp1 = block [1 + i ] + block [7 + i ]
1166+ tmp2 = block [3 + i ] + block [5 + i ]
1167+ b6 = block [1 + i ] - block [7 + i ]
1168+ b7 = tmp1 + tmp2
1169+ m0 = block [0 + i ]
1170+ x4 = ((b6 * 473 - b4 * 196 + 128 ) >> 8 ) - b7
1171+ x0 = x4 - (((tmp1 - tmp2 )* 362 + 128 ) >> 8 )
1172+ x1 = m0 - b1
1173+ x2 = (((block [2 + i ]- block [6 + i ])* 362 + 128 ) >> 8 ) - b3
1174+ x3 = m0 + b1
1175+ y3 = x1 + x2
1176+ y4 = x3 + b3
1177+ y5 = x1 - x2
1178+ y6 = x3 - b3
1179+ y7 = - x0 - ((b4 * 473 + b6 * 196 + 128 ) >> 8 )
1180+ block [0 + i ] = (b7 + y4 + 128 ) >> 8
1181+ block [1 + i ] = (x4 + y3 + 128 ) >> 8
1182+ block [2 + i ] = (y5 - x0 + 128 ) >> 8
1183+ block [3 + i ] = (y6 - y7 + 128 ) >> 8
1184+ block [4 + i ] = (y6 + y7 + 128 ) >> 8
1185+ block [5 + i ] = (x0 + y5 + 128 ) >> 8
1186+ block [6 + i ] = (y3 - x4 + 128 ) >> 8
1187+ block [7 + i ] = (y4 - b7 + 128 ) >> 8
1188+ }
11311189 }
11321190}
11331191
@@ -1174,7 +1232,7 @@ func addBlockToDest(block []int, dest []byte, index, scan int) {
11741232 }
11751233}
11761234
1177- func copyValueToDest (value int , dest []byte , index , scan int ) {
1235+ func copyValueToDest (value byte , dest []byte , index , scan int ) {
11781236 val := clamp (value )
11791237 for n := 0 ; n < 64 ; n += 8 {
11801238 dest [index + 0 ] = val
@@ -1190,16 +1248,16 @@ func copyValueToDest(value int, dest []byte, index, scan int) {
11901248 }
11911249}
11921250
1193- func addValueToDest (value int , dest []byte , index , scan int ) {
1251+ func addValueToDest (value byte , dest []byte , index , scan int ) {
11941252 for n := 0 ; n < 64 ; n += 8 {
1195- dest [index + 0 ] = clamp (int ( dest [index + 0 ]) + value )
1196- dest [index + 1 ] = clamp (int ( dest [index + 1 ]) + value )
1197- dest [index + 2 ] = clamp (int ( dest [index + 2 ]) + value )
1198- dest [index + 3 ] = clamp (int ( dest [index + 3 ]) + value )
1199- dest [index + 4 ] = clamp (int ( dest [index + 4 ]) + value )
1200- dest [index + 5 ] = clamp (int ( dest [index + 5 ]) + value )
1201- dest [index + 6 ] = clamp (int ( dest [index + 6 ]) + value )
1202- dest [index + 7 ] = clamp (int ( dest [index + 7 ]) + value )
1253+ dest [index + 0 ] = clamp (dest [index + 0 ] + value )
1254+ dest [index + 1 ] = clamp (dest [index + 1 ] + value )
1255+ dest [index + 2 ] = clamp (dest [index + 2 ] + value )
1256+ dest [index + 3 ] = clamp (dest [index + 3 ] + value )
1257+ dest [index + 4 ] = clamp (dest [index + 4 ] + value )
1258+ dest [index + 5 ] = clamp (dest [index + 5 ] + value )
1259+ dest [index + 6 ] = clamp (dest [index + 6 ] + value )
1260+ dest [index + 7 ] = clamp (dest [index + 7 ] + value )
12031261
12041262 index += scan + 8
12051263 }
@@ -1213,14 +1271,10 @@ func abs(x int) int {
12131271 return x
12141272}
12151273
1216- func clamp (n int ) byte {
1217- if n > 255 {
1218- n = 255
1219- } else if n < 0 {
1220- n = 0
1221- }
1274+ type number interface { int | uint8 }
12221275
1223- return byte (n )
1276+ func clamp [T number ](n T ) byte {
1277+ return byte (min (max (n , 0 ), 255 ))
12241278}
12251279
12261280func startIsSlice (c int ) bool {
0 commit comments