1+ <?php
2+
3+ namespace Backstage \Fields \Services ;
4+
5+ class ContentCleaningService
6+ {
7+ /**
8+ * Clean HTML content by removing figcaption elements and unwrapping img tags from anchor links
9+ */
10+ public static function cleanRichEditorContent (?string $ content ): ?string
11+ {
12+ if (empty ($ content )) {
13+ return $ content ;
14+ }
15+
16+ // Remove figcaption elements completely
17+ $ content = preg_replace ('/<figcaption[^>]*>.*?<\/figcaption>/is ' , '' , $ content );
18+
19+ // Unwrap img tags from anchor links, keeping only the img tag
20+ $ content = preg_replace ('/<a[^>]*>(<img[^>]*>).*?<\/a>/is ' , '$1 ' , $ content );
21+
22+ // Clean up any empty figure tags that might be left
23+ $ content = preg_replace ('/<figure[^>]*>\s*<\/figure>/is ' , '' , $ content );
24+
25+ // Clean up any empty figure tags that only contain img
26+ $ content = preg_replace ('/<figure[^>]*>(<img[^>]*>)<\/figure>/is ' , '$1 ' , $ content );
27+
28+ return $ content ;
29+ }
30+
31+ /**
32+ * Clean HTML content with more specific options
33+ */
34+ public static function cleanHtmlContent (?string $ content , array $ options = []): ?string
35+ {
36+ if (empty ($ content )) {
37+ return $ content ;
38+ }
39+
40+ $ defaultOptions = [
41+ 'removeFigcaption ' => true ,
42+ 'unwrapImages ' => true ,
43+ 'removeEmptyFigures ' => true ,
44+ 'preserveCustomCaptions ' => false , // If true, only remove default captions
45+ ];
46+
47+ $ options = array_merge ($ defaultOptions , $ options );
48+
49+ if ($ options ['removeFigcaption ' ]) {
50+ if ($ options ['preserveCustomCaptions ' ]) {
51+ // Only remove figcaption if it contains default content (filename and size)
52+ $ content = preg_replace ('/<figcaption[^>]*>\s*<span[^>]*class="[^"]*attachment__name[^"]*"[^>]*>.*?<\/span>\s*<span[^>]*class="[^"]*attachment__size[^"]*"[^>]*>.*?<\/span>\s*<\/figcaption>/is ' , '' , $ content );
53+ } else {
54+ // Remove all figcaption elements
55+ $ content = preg_replace ('/<figcaption[^>]*>.*?<\/figcaption>/is ' , '' , $ content );
56+ }
57+ }
58+
59+ if ($ options ['unwrapImages ' ]) {
60+ // Unwrap img tags from anchor links, keeping only the img tag
61+ $ content = preg_replace ('/<a[^>]*>(<img[^>]*>).*?<\/a>/is ' , '$1 ' , $ content );
62+ }
63+
64+ if ($ options ['removeEmptyFigures ' ]) {
65+ // Clean up any empty figure tags that might be left
66+ $ content = preg_replace ('/<figure[^>]*>\s*<\/figure>/is ' , '' , $ content );
67+
68+ // Clean up any figure tags that only contain img
69+ $ content = preg_replace ('/<figure[^>]*>(<img[^>]*>)<\/figure>/is ' , '$1 ' , $ content );
70+ }
71+
72+ return $ content ;
73+ }
74+ }
0 commit comments