Skip to content

Commit 2b57d16

Browse files
LIBPNG Upstreamblowekamp
authored andcommitted
PNG 2024-09-12 (a7e0c0c8)
Run the UpdateFromUpstream.sh script to extract upstream PNG using the following shell commands. $ git archive --prefix=upstream-png/ a7e0c0c8 -- png*.c png*.h arm/* LICENSE scripts/pnglibconf.h.prebuilt | tar x $ git shortlog --perl-regexp --author='^((?!Kitware Robot).*)$' --no-merges --abbrev=8 --format='%h %s' 7a614829..a7e0c0c8 Ben Wagner (1): 40878fd6 fix: Restore STDERR in pngtest.c Benjamin Buch (2): 7b888092 build: Mark the installed libpng headers as system headers in CMake 0e204b73 build: Add a CMake config file compatible with the FindPNG module Cosmin Truta (35): 8120345c ci: Update (again) the ci_verify_*.sh scripts; update .shellcheckrc 1cdde118 build: Update the makefiles for the benefit of cross-compilation 63c715b7 ci: Remove the workaround for CI_AR from ci_verify_makefiles.sh fb65436e ci: Fix the check of `PNG_LIBPNG_VER_BUILD` in ci_verify_version.sh f74d5ecc ci: Pacify shellcheck version 0.8 and apply other linting improvements 42c8fcbf Add a GitHub Action for linting 4191872d chore: Update the .editorconfig files and pacify editorconfig-checker 0fa3c0f6 chore: Clean up the spurious uses of `sizeof(png_byte)`; fix the manual 72c4520d ci: Allow the user to force an in-tree cleanup before verification dddaf0c6 ci: Fix the reporting in ci_lint.sh 6b5a2da0 Fix "ci: Fix the reporting in ci_lint.sh" aa95dee6 build: Update the CMake build options PNG_TOOLS and PNG_FRAMEWORK d165a20a build: Improve the search for an AWK processor in the CMake build 29e31f62 build: Add an explicit declaration of the AWK variable to configure.ac 14a348dd build: Checking for compiler support of LoongArch LSX should be guarded ec2e58c1 pngexif: Import pngexifinfo as an externally-contributed project e05ebfba doc: Update the README file 3b9a73ed doc: Review the libpng history and update scripts/cmake/AUTHORS.md 89023102 chore: Delete comments and config settings and stuff from here and there 80691b9d test: Fix a compiler warning in pngtest.c e1fa61da ci: Add the libpng release tags to the list of exclusions ed217e3e Release libpng version 1.6.43 f1848a3b Bump version to 1.6.44.git e4a31f02 arm: Add a placeholder file in lieu of the former `filter_neon.S` 532fec02 ci: Fix the verification of the msys2 toolchain on AppVeyor CI 33ef48b6 cmake: Fix the handling of PNG_HARDWARE_OPTIMIZATIONS on FreeBSD/amd64 7e18d142 ci: Add the targets/ subdirectory to facilitate cross-platform testing 22208658 chore: Pacify editorconfig-checker version 3.0 fcdec9c6 chore: Delete contrib/tools/chkfmt.sh 53a7f4e3 ci: Simplify the Travis CI configuration matrix 1964d560 Deprecate PNGARG and remove all of its remaining uses 843dbb75 Revert "cmake: Fix an error in the declaration of target include directories" 3117b5f9 oss-fuzz: Update the README file, the Docker file and the build script 88ab4f59 chore: Rerun `./autogen.sh --maintainer` f5e92d76 Release libpng version 1.6.44 Dan Rosser (1): e7ba9c0d build: Fix a CMake build regression introduced in version 1.6.41 Eric Riff (1): 43d6ad3e cmake: Honor CMAKE_SYSROOT if set Green Sky (2): 1d1cc9ae cmake: Fix an error in the declaration of target include directories 8cc22a8c cmake: Fix an error in the declaration of target include directories John Bowler (3): ceed2a3c SECURITY: disable build of filter_neon.S on arm 20f819c2 fix: Remove cHRM check to accomodate ACES AP1 5a7e87fc fix: Prevent overflow in chromaticity calculations Mikhail Khachayants (1): 68ba4f1f oss-fuzz: Add fuzzing targets for simplified READ API Ross Burton (1): 9e538750 arm: Remove obsolete assembler implementation filter_neon.S Change-Id: I5a75ec7e571b5b316397bd60c5d4e1038945b447
1 parent 55292c9 commit 2b57d16

File tree

9 files changed

+144
-369
lines changed

9 files changed

+144
-369
lines changed

arm/filter_neon.S

Lines changed: 37 additions & 229 deletions
Original file line numberDiff line numberDiff line change
@@ -1,253 +1,61 @@
11

2-
/* filter_neon.S - NEON optimised filter functions
2+
/* filter_neon.S - placeholder file
33
*
4-
* Copyright (c) 2018 Cosmin Truta
5-
* Copyright (c) 2014,2017 Glenn Randers-Pehrson
6-
* Written by Mans Rullgard, 2011.
4+
* Copyright (c) 2024 Cosmin Truta
75
*
86
* This code is released under the libpng license.
97
* For conditions of distribution and use, see the disclaimer
108
* and license in png.h
119
*/
1210

11+
/* IMPORTANT NOTE:
12+
*
13+
* Historically, the hand-coded assembler implementation of Neon optimizations
14+
* in this module had not been in sync with the intrinsics-based implementation
15+
* in filter_neon_intrinsics.c and palette_neon_intrinsics.c, at least since
16+
* the introduction of riffled palette optimizations. Moreover, the assembler
17+
* code used to work on 32-bit ARM only, and it caused problems, even if empty,
18+
* on 64-bit ARM.
19+
*
20+
* All references to this module from our internal build scripts and projects
21+
* have been removed.
22+
*
23+
* For the external projects that might still expect this module to be present,
24+
* we leave this stub in place, for the remaining lifetime of libpng-1.6.x.
25+
* Everything should continue to function normally, as long as there are no
26+
* deliberate attempts to use the old hand-made assembler code. A build error
27+
* will be raised otherwise.
28+
*/
29+
1330
/* This is required to get the symbol renames, which are #defines, and the
1431
* definitions (or not) of PNG_ARM_NEON_OPT and PNG_ARM_NEON_IMPLEMENTATION.
1532
*/
1633
#define PNG_VERSION_INFO_ONLY
1734
#include "../pngpriv.h"
1835

19-
#if (defined(__linux__) || defined(__FreeBSD__)) && defined(__ELF__)
20-
.section .note.GNU-stack,"",%progbits /* mark stack as non-executable */
21-
#endif
22-
2336
#ifdef PNG_READ_SUPPORTED
24-
25-
/* Assembler NEON support - only works for 32-bit ARM (i.e. it does not work for
26-
* ARM64). The code in arm/filter_neon_intrinsics.c supports ARM64, however it
27-
* only works if -mfpu=neon is specified on the GCC command line. See pngpriv.h
28-
* for the logic which sets PNG_USE_ARM_NEON_ASM:
29-
*/
3037
#if PNG_ARM_NEON_IMPLEMENTATION == 2 /* hand-coded assembler */
31-
3238
#if PNG_ARM_NEON_OPT > 0
3339

34-
#ifdef __ELF__
35-
# define ELF
40+
#if defined(__clang__)
41+
#define GNUC_VERSION 0 /* not gcc, although it might pretend to be */
42+
#elif defined(__GNUC__)
43+
#define GNUC_MAJOR (__GNUC__ + 0)
44+
#define GNUC_MINOR (__GNUC_MINOR__ + 0)
45+
#define GNUC_PATCHLEVEL (__GNUC_PATCHLEVEL__ + 0)
46+
#define GNUC_VERSION (GNUC_MAJOR * 10000 + GNUC_MINOR * 100 + GNUC_PATCHLEVEL)
3647
#else
37-
# define ELF @
48+
#define GNUC_VERSION 0 /* not gcc */
3849
#endif
3950

40-
.arch armv7-a
41-
.fpu neon
42-
43-
.macro func name, export=0
44-
.macro endfunc
45-
ELF .size \name, . - \name
46-
.endfunc
47-
.purgem endfunc
48-
.endm
49-
.text
50-
51-
/* Explicitly specifying alignment here because some versions of
52-
* GAS don't align code correctly. This is harmless in correctly
53-
* written versions of GAS.
54-
*/
55-
.align 2
56-
57-
.if \export
58-
.global \name
59-
.endif
60-
ELF .type \name, STT_FUNC
61-
.func \name
62-
\name:
63-
.endm
64-
65-
func png_read_filter_row_sub4_neon, export=1
66-
ldr r3, [r0, #4] @ rowbytes
67-
vmov.i8 d3, #0
68-
1:
69-
vld4.32 {d4[],d5[],d6[],d7[]}, [r1,:128]
70-
vadd.u8 d0, d3, d4
71-
vadd.u8 d1, d0, d5
72-
vadd.u8 d2, d1, d6
73-
vadd.u8 d3, d2, d7
74-
vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
75-
subs r3, r3, #16
76-
bgt 1b
77-
78-
bx lr
79-
endfunc
80-
81-
func png_read_filter_row_sub3_neon, export=1
82-
ldr r3, [r0, #4] @ rowbytes
83-
vmov.i8 d3, #0
84-
mov r0, r1
85-
mov r2, #3
86-
mov r12, #12
87-
vld1.8 {q11}, [r0], r12
88-
1:
89-
vext.8 d5, d22, d23, #3
90-
vadd.u8 d0, d3, d22
91-
vext.8 d6, d22, d23, #6
92-
vadd.u8 d1, d0, d5
93-
vext.8 d7, d23, d23, #1
94-
vld1.8 {q11}, [r0], r12
95-
vst1.32 {d0[0]}, [r1,:32], r2
96-
vadd.u8 d2, d1, d6
97-
vst1.32 {d1[0]}, [r1], r2
98-
vadd.u8 d3, d2, d7
99-
vst1.32 {d2[0]}, [r1], r2
100-
vst1.32 {d3[0]}, [r1], r2
101-
subs r3, r3, #12
102-
bgt 1b
103-
104-
bx lr
105-
endfunc
106-
107-
func png_read_filter_row_up_neon, export=1
108-
ldr r3, [r0, #4] @ rowbytes
109-
1:
110-
vld1.8 {q0}, [r1,:128]
111-
vld1.8 {q1}, [r2,:128]!
112-
vadd.u8 q0, q0, q1
113-
vst1.8 {q0}, [r1,:128]!
114-
subs r3, r3, #16
115-
bgt 1b
116-
117-
bx lr
118-
endfunc
119-
120-
func png_read_filter_row_avg4_neon, export=1
121-
ldr r12, [r0, #4] @ rowbytes
122-
vmov.i8 d3, #0
123-
1:
124-
vld4.32 {d4[],d5[],d6[],d7[]}, [r1,:128]
125-
vld4.32 {d16[],d17[],d18[],d19[]},[r2,:128]!
126-
vhadd.u8 d0, d3, d16
127-
vadd.u8 d0, d0, d4
128-
vhadd.u8 d1, d0, d17
129-
vadd.u8 d1, d1, d5
130-
vhadd.u8 d2, d1, d18
131-
vadd.u8 d2, d2, d6
132-
vhadd.u8 d3, d2, d19
133-
vadd.u8 d3, d3, d7
134-
vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
135-
subs r12, r12, #16
136-
bgt 1b
137-
138-
bx lr
139-
endfunc
140-
141-
func png_read_filter_row_avg3_neon, export=1
142-
push {r4,lr}
143-
ldr r12, [r0, #4] @ rowbytes
144-
vmov.i8 d3, #0
145-
mov r0, r1
146-
mov r4, #3
147-
mov lr, #12
148-
vld1.8 {q11}, [r0], lr
149-
1:
150-
vld1.8 {q10}, [r2], lr
151-
vext.8 d5, d22, d23, #3
152-
vhadd.u8 d0, d3, d20
153-
vext.8 d17, d20, d21, #3
154-
vadd.u8 d0, d0, d22
155-
vext.8 d6, d22, d23, #6
156-
vhadd.u8 d1, d0, d17
157-
vext.8 d18, d20, d21, #6
158-
vadd.u8 d1, d1, d5
159-
vext.8 d7, d23, d23, #1
160-
vld1.8 {q11}, [r0], lr
161-
vst1.32 {d0[0]}, [r1,:32], r4
162-
vhadd.u8 d2, d1, d18
163-
vst1.32 {d1[0]}, [r1], r4
164-
vext.8 d19, d21, d21, #1
165-
vadd.u8 d2, d2, d6
166-
vhadd.u8 d3, d2, d19
167-
vst1.32 {d2[0]}, [r1], r4
168-
vadd.u8 d3, d3, d7
169-
vst1.32 {d3[0]}, [r1], r4
170-
subs r12, r12, #12
171-
bgt 1b
172-
173-
pop {r4,pc}
174-
endfunc
175-
176-
.macro paeth rx, ra, rb, rc
177-
vaddl.u8 q12, \ra, \rb @ a + b
178-
vaddl.u8 q15, \rc, \rc @ 2*c
179-
vabdl.u8 q13, \rb, \rc @ pa
180-
vabdl.u8 q14, \ra, \rc @ pb
181-
vabd.u16 q15, q12, q15 @ pc
182-
vcle.u16 q12, q13, q14 @ pa <= pb
183-
vcle.u16 q13, q13, q15 @ pa <= pc
184-
vcle.u16 q14, q14, q15 @ pb <= pc
185-
vand q12, q12, q13 @ pa <= pb && pa <= pc
186-
vmovn.u16 d28, q14
187-
vmovn.u16 \rx, q12
188-
vbsl d28, \rb, \rc
189-
vbsl \rx, \ra, d28
190-
.endm
191-
192-
func png_read_filter_row_paeth4_neon, export=1
193-
ldr r12, [r0, #4] @ rowbytes
194-
vmov.i8 d3, #0
195-
vmov.i8 d20, #0
196-
1:
197-
vld4.32 {d4[],d5[],d6[],d7[]}, [r1,:128]
198-
vld4.32 {d16[],d17[],d18[],d19[]},[r2,:128]!
199-
paeth d0, d3, d16, d20
200-
vadd.u8 d0, d0, d4
201-
paeth d1, d0, d17, d16
202-
vadd.u8 d1, d1, d5
203-
paeth d2, d1, d18, d17
204-
vadd.u8 d2, d2, d6
205-
paeth d3, d2, d19, d18
206-
vmov d20, d19
207-
vadd.u8 d3, d3, d7
208-
vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r1,:128]!
209-
subs r12, r12, #16
210-
bgt 1b
211-
212-
bx lr
213-
endfunc
214-
215-
func png_read_filter_row_paeth3_neon, export=1
216-
push {r4,lr}
217-
ldr r12, [r0, #4] @ rowbytes
218-
vmov.i8 d3, #0
219-
vmov.i8 d4, #0
220-
mov r0, r1
221-
mov r4, #3
222-
mov lr, #12
223-
vld1.8 {q11}, [r0], lr
224-
1:
225-
vld1.8 {q10}, [r2], lr
226-
paeth d0, d3, d20, d4
227-
vext.8 d5, d22, d23, #3
228-
vadd.u8 d0, d0, d22
229-
vext.8 d17, d20, d21, #3
230-
paeth d1, d0, d17, d20
231-
vst1.32 {d0[0]}, [r1,:32], r4
232-
vext.8 d6, d22, d23, #6
233-
vadd.u8 d1, d1, d5
234-
vext.8 d18, d20, d21, #6
235-
paeth d2, d1, d18, d17
236-
vext.8 d7, d23, d23, #1
237-
vld1.8 {q11}, [r0], lr
238-
vst1.32 {d1[0]}, [r1], r4
239-
vadd.u8 d2, d2, d6
240-
vext.8 d19, d21, d21, #1
241-
paeth d3, d2, d19, d18
242-
vst1.32 {d2[0]}, [r1], r4
243-
vmov d4, d19
244-
vadd.u8 d3, d3, d7
245-
vst1.32 {d3[0]}, [r1], r4
246-
subs r12, r12, #12
247-
bgt 1b
51+
#if (GNUC_VERSION > 0) && (GNUC_VERSION < 40300)
52+
#error "PNG_ARM_NEON is not supported with gcc versions earlier than 4.3.0"
53+
#elif GNUC_VERSION == 40504
54+
#error "PNG_ARM_NEON is not supported with gcc version 4.5.4"
55+
#else
56+
#error "Please use 'arm/*_neon_intrinsics.c' for PNG_ARM_NEON support"
57+
#endif
24858

249-
pop {r4,pc}
250-
endfunc
25159
#endif /* PNG_ARM_NEON_OPT > 0 */
252-
#endif /* PNG_ARM_NEON_IMPLEMENTATION == 2 (assembler) */
60+
#endif /* PNG_ARM_NEON_IMPLEMENTATION == 2 */
25361
#endif /* READ */

0 commit comments

Comments
 (0)