@@ -20,8 +20,13 @@ package com.ichi2.libanki
2020import androidx.annotation.WorkerThread
2121import com.google.protobuf.kotlin.toByteString
2222import com.ichi2.libanki.exception.EmptyMediaException
23+ import com.ichi2.libanki.template.TemplateFilters
2324import timber.log.Timber
2425import java.io.File
26+ import java.util.Locale
27+ import java.util.TreeSet
28+ import java.util.regex.Matcher
29+ import java.util.regex.Pattern
2530
2631/* *
2732 * Media manager - handles the addition and removal of media files from the media directory (collection.media) and
@@ -65,15 +70,135 @@ open class Media(
6570 * @param string The string to scan for media filenames ([sound:...] or <img...>).
6671 * @return A list containing all the sound and image filenames found in the input string.
6772 */
68- fun filesInStr (string : String ): List <String > =
69- col.backend
70- .extractAvTags(string, true )
71- .avTagsList
72- .filter {
73- it.hasSoundOrVideo()
74- }.map {
75- it.soundOrVideo
73+ fun filesInStr (
74+ currentCard : Card ,
75+ includeRemote : Boolean = false,
76+ ): List <String > {
77+ val l: MutableList <String > = ArrayList ()
78+ val model = currentCard.noteType(col)
79+ val renderOutput = currentCard.renderOutput(col)
80+ val string = renderOutput.questionText + renderOutput.answerText
81+
82+ val strings: MutableList <String ?> =
83+ if (model!! .isCloze && string.contains(" {{c" )) {
84+ // Expand clozes if necessary
85+ expandClozes(string)
86+ } else {
87+ mutableListOf (string)
88+ }
89+
90+ for (s in strings) {
91+ var s = s
92+ // Handle LaTeX
93+ val svg = model.optBoolean(" latexsvg" , false )
94+ s = LaTeX .mungeQA(s!! , col, svg)
95+
96+ // Extract filenames from the strings using regex patterns
97+ var m: Matcher
98+ for (p in REGEXPS ) {
99+ val fnameIdx =
100+ when (p) {
101+ fSoundRegexps -> 2
102+ fImgAudioRegExpU -> 2
103+ else -> 3
104+ }
105+ m = p.matcher(s)
106+ while (m.find()) {
107+ val fname = m.group(fnameIdx)!!
108+ val isLocal = ! fRemotePattern.matcher(fname.lowercase(Locale .getDefault())).find()
109+ if (isLocal || includeRemote) {
110+ l.add(fname)
111+ }
112+ }
113+ }
114+
115+ val ankiPlayPattern = Pattern .compile(" \\ [anki:play:(q|a):(\\ d+)]" )
116+ m = ankiPlayPattern.matcher(s)
117+ while (m.find()) {
118+ val side = m.group(1 ) // 'q' or 'a'
119+ val index = m.group(2 )!! .toInt()
120+
121+ val avTag =
122+ if (side == " q" ) {
123+ if (index < renderOutput.questionAvTags.size) {
124+ renderOutput.questionAvTags[index]
125+ } else {
126+ null
127+ }
128+ } else {
129+ if (index < renderOutput.answerAvTags.size) {
130+ renderOutput.answerAvTags[index]
131+ } else {
132+ null
133+ }
134+ }
135+
136+ if (avTag != null ) {
137+ val fname = extractFilenameFromAvTag(avTag)
138+ if (fname != null ) {
139+ val isLocal = ! fRemotePattern.matcher(fname.lowercase(Locale .getDefault())).find()
140+ if (isLocal || includeRemote) {
141+ l.add(fname)
142+ }
143+ }
144+ }
76145 }
146+ }
147+
148+ return l
149+ }
150+
151+ private fun expandClozes (string : String ): MutableList <String ?> {
152+ val ords: MutableSet <String > = TreeSet ()
153+ var m = Pattern .compile(" \\ {\\ {c(\\ d+)::.+?\\ }\\ }" ).matcher(string)
154+
155+ while (m.find()) {
156+ ords.add(m.group(1 )!! )
157+ }
158+
159+ val strings = ArrayList <String ?>(ords.size + 1 )
160+ val clozeReg = " (?si)\\ {\\ {(c)%s::(.*?)(::(.*?))?\\ }\\ }"
161+
162+ for (ord in ords) {
163+ val buf = StringBuffer ()
164+ m = Pattern .compile(String .format(Locale .US , clozeReg, ord)).matcher(string)
165+
166+ while (m.find()) {
167+ if (! m.group(4 ).isNullOrEmpty()) {
168+ m.appendReplacement(buf, " [${m.group(4 )} ]" )
169+ } else {
170+ m.appendReplacement(buf, TemplateFilters .CLOZE_DELETION_REPLACEMENT )
171+ }
172+ }
173+
174+ m.appendTail(buf)
175+ val s =
176+ buf.toString().replace(
177+ String .format(Locale .US , clozeReg, " .+?" ).toRegex(),
178+ " $2" ,
179+ )
180+ strings.add(s)
181+ }
182+
183+ strings.add(
184+ string.replace(
185+ String .format(Locale .US , clozeReg, " .+?" ).toRegex(),
186+ " $2" ,
187+ ),
188+ )
189+
190+ return strings
191+ }
192+
193+ private fun extractFilenameFromAvTag (avTag : AvTag ): String? {
194+ val tagString = avTag.toString()
195+ val fname = tagString.substringAfter(" filename=" ).substringBefore(' )' )
196+ if (fname.isNotEmpty()) {
197+ return fname
198+ }
199+
200+ return null // Could not extract filename
201+ }
77202
78203 fun findUnusedMediaFiles (): List <File > = check().unusedFileNames.map { File (dir, it) }
79204
@@ -136,6 +261,52 @@ open class Media(
136261 private fun restoreTrash () {
137262 col.backend.restoreTrash()
138263 }
264+
265+ companion object {
266+ // Upstream illegal chars defined on disallowed_char()
267+ // in https://github.com/ankitects/anki/blob/main/rslib/src/media/files.rs
268+ private val fIllegalCharReg = Pattern .compile(" [\\ [\\ ]><:\" /?*^\\\\ |\\ x00\\ r\\ n]" )
269+ private val fRemotePattern = Pattern .compile(" (https?|ftp)://" )
270+ /*
271+ * A note about the regular expressions below: the python code uses named groups for the image and sound patterns.
272+ * Our version of Java doesn't support named groups, so we must use indexes instead. In the expressions below, the
273+ * group names (e.g., ?P<fname>) have been stripped and a comment placed above indicating the index of the group
274+ * name in the original. Refer to these indexes whenever the python code makes use of a named group.
275+ */
276+ /* *
277+ * Group 1 = Contents of [sound:] tag
278+ * Group 2 = "fname"
279+ */
280+ // Regexes defined on https://github.com/ankitects/anki/blob/b403f20cae8fcdd7c3ff4c8d21766998e8efaba0/pylib/anki/media.py#L34-L45
281+ private val fSoundRegexps = Pattern .compile(" (?i)(\\ [sound:([^]]+)])" )
282+ // src element quoted case
283+ /* *
284+ * Group 1 = Contents of `<img>|<audio>` tag
285+ * Group 2 = "str"
286+ * Group 3 = "fname"
287+ * Group 4 = Backreference to "str" (i.e., same type of quote character) */
288+ private val fImgAudioRegExpQ =
289+ Pattern .compile(" (?i)(<(?:img|audio)\\ b[^>]* src=([\" '])([^>]+?)(\\ 2)[^>]*>)" )
290+ private val fObjectRegExpQ =
291+ Pattern .compile(" (?i)(<object\\ b[^>]* data=([\" '])([^>]+?)(\\ 2)[^>]*>)" )
292+ // unquoted case
293+ /* *
294+ * Group 1 = Contents of `<img>|<audio>` tag
295+ * Group 2 = "fname"
296+ */
297+ private val fImgAudioRegExpU =
298+ Pattern .compile(" (?i)(<(?:img|audio)\\ b[^>]* src=(?!['\" ])([^ >]+)[^>]*?>)" )
299+ private val fObjectRegExpU =
300+ Pattern .compile(" (?i)(<object\\ b[^>]* data=(?!['\" ])([^ >]+)[^>]*?>)" )
301+ val REGEXPS =
302+ listOf (
303+ fSoundRegexps,
304+ fImgAudioRegExpQ,
305+ fImgAudioRegExpU,
306+ fObjectRegExpQ,
307+ fObjectRegExpU,
308+ )
309+ }
139310}
140311
141312fun getCollectionMediaPath (collectionPath : String ): String = collectionPath.replaceFirst(" \\ .anki2$" .toRegex(), " .media" )
0 commit comments