Skip to content

Commit 7af49dd

Browse files
ArnyminerZrfc2822
andauthored
Pre-process iCalendars line by line to avoid OOM on large files (#92)
* Fix OOM issues * Remove reset * Get rid of Android's Log * Fix function calling * Ignore exception in try-catch * Fix tests * Get rid of `regexpForProblem` in `StreamPreprocessor` * Improve KDoc * Rename argument * Move warning * Closing readers * Make it clear why we use LF * Use Guava's `CharSource` and get rid of `SequenceReader` * Optimize imports Signed-off-by: Arnau Mora <[email protected]> * Disable inspection Signed-off-by: Arnau Mora <[email protected]> * Added Spotbugs annotations Signed-off-by: Arnau Mora <[email protected]> * Add annotations Signed-off-by: Arnau Mora <[email protected]> * [WIP] Process line by line * KDoc * Move ICalPreprocessorInstrumentedTest to unit tests * KDoc * Add test * Make some fields internal * KDoc --------- Signed-off-by: Arnau Mora <[email protected]> Co-authored-by: Ricki Hirner <[email protected]>
1 parent 0bc15ea commit 7af49dd

File tree

9 files changed

+215
-93
lines changed

9 files changed

+215
-93
lines changed

gradle/libs.versions.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ kotlin = "2.2.21"
1515
mockk = "1.14.6"
1616
roboelectric = "4.16"
1717
slf4j = "2.0.17"
18-
spotbugs-annotations = "4.9.8"
18+
spotbugs = "4.9.8"
1919

2020
[libraries]
2121
android-desugar = { module = "com.android.tools:desugar_jdk_libs", version.ref = "android-desugar" }
@@ -32,7 +32,7 @@ mockk = { module = "io.mockk:mockk", version.ref = "mockk" }
3232
mockk-android = { module = "io.mockk:mockk-android", version.ref = "mockk" }
3333
roboelectric = { module = "org.robolectric:robolectric", version.ref = "roboelectric" }
3434
slf4j-jdk = { module = "org.slf4j:slf4j-jdk14", version.ref = "slf4j" }
35-
spotbugs-annotations = { module = "com.github.spotbugs:spotbugs-annotations", version.ref = "spotbugs-annotations" }
35+
spotbugs-annotations = { module = "com.github.spotbugs:spotbugs-annotations", version.ref = "spotbugs" }
3636

3737
[plugins]
3838
android-library = { id = "com.android.library", version.ref = "agp" }

lib/build.gradle.kts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ dependencies {
131131
// synctools.test package also provide test rules
132132
implementation(libs.androidx.test.rules)
133133

134+
// Useful annotations
135+
api(libs.spotbugs.annotations)
136+
134137
// instrumented tests
135138
androidTestImplementation(libs.androidx.test.rules)
136139
androidTestImplementation(libs.androidx.test.runner)

lib/src/main/kotlin/at/bitfire/synctools/icalendar/validation/FixInvalidDayOffsetPreprocessor.kt

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,16 @@
66

77
package at.bitfire.synctools.icalendar.validation
88

9+
import androidx.annotation.VisibleForTesting
10+
911
/**
1012
* Fixes durations with day offsets with the 'T' prefix.
1113
* See also https://github.com/bitfireAT/ical4android/issues/77
1214
*/
13-
class FixInvalidDayOffsetPreprocessor : StreamPreprocessor() {
15+
class FixInvalidDayOffsetPreprocessor : StreamPreprocessor {
1416

15-
override fun regexpForProblem() = Regex(
17+
@VisibleForTesting
18+
internal val regexpForProblem = Regex(
1619
// Examples:
1720
// TRIGGER:-P2DT
1821
// TRIGGER:-PT2D
@@ -21,11 +24,11 @@ class FixInvalidDayOffsetPreprocessor : StreamPreprocessor() {
2124
setOf(RegexOption.MULTILINE, RegexOption.IGNORE_CASE)
2225
)
2326

24-
override fun fixString(original: String): String {
25-
var iCal: String = original
27+
override fun repairLine(line: String): String {
28+
var iCal: String = line
2629

2730
// Find all instances matching the defined expression
28-
val found = regexpForProblem().findAll(iCal).toList()
31+
val found = regexpForProblem.findAll(iCal).toList()
2932

3033
// ... and repair them. Use reversed order so that already replaced occurrences don't interfere with the following matches.
3134
for (match in found.reversed()) {

lib/src/main/kotlin/at/bitfire/synctools/icalendar/validation/FixInvalidUtcOffsetPreprocessor.kt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,28 @@
66

77
package at.bitfire.synctools.icalendar.validation
88

9+
import androidx.annotation.VisibleForTesting
910
import java.util.logging.Level
1011
import java.util.logging.Logger
1112

1213

1314
/**
1415
* Some servers modify UTC offsets in TZOFFSET(FROM,TO) like "+005730" to an invalid "+5730".
1516
*
16-
* Rewrites values of all TZOFFSETFROM and TZOFFSETTO properties which match [TZOFFSET_REGEXP]
17+
* Rewrites values of all TZOFFSETFROM and TZOFFSETTO properties which match [regexpForProblem]
1718
* so that an hour value of 00 is inserted.
1819
*/
19-
class FixInvalidUtcOffsetPreprocessor: StreamPreprocessor() {
20+
class FixInvalidUtcOffsetPreprocessor: StreamPreprocessor {
2021

2122
private val logger
2223
get() = Logger.getLogger(javaClass.name)
2324

24-
private val TZOFFSET_REGEXP = Regex("^(TZOFFSET(FROM|TO):[+\\-]?)((18|19|[2-6]\\d)\\d\\d)$",
25+
@VisibleForTesting
26+
internal val regexpForProblem = Regex("^(TZOFFSET(FROM|TO):[+\\-]?)((18|19|[2-6]\\d)\\d\\d)$",
2527
setOf(RegexOption.MULTILINE, RegexOption.IGNORE_CASE))
2628

27-
override fun regexpForProblem() = TZOFFSET_REGEXP
28-
29-
override fun fixString(original: String) =
30-
original.replace(TZOFFSET_REGEXP) {
29+
override fun repairLine(line: String) =
30+
line.replace(regexpForProblem) {
3131
logger.log(Level.FINE, "Applying Synology WebDAV fix to invalid utc-offset", it.value)
3232
"${it.groupValues[1]}00${it.groupValues[3]}"
3333
}

lib/src/main/kotlin/at/bitfire/synctools/icalendar/validation/ICalPreprocessor.kt

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,18 @@
77
package at.bitfire.synctools.icalendar.validation
88

99
import androidx.annotation.VisibleForTesting
10+
import com.google.common.io.CharSource
1011
import net.fortuna.ical4j.model.Calendar
1112
import net.fortuna.ical4j.model.Property
1213
import net.fortuna.ical4j.transform.rfc5545.CreatedPropertyRule
1314
import net.fortuna.ical4j.transform.rfc5545.DateListPropertyRule
1415
import net.fortuna.ical4j.transform.rfc5545.DatePropertyRule
1516
import net.fortuna.ical4j.transform.rfc5545.Rfc5545PropertyRule
17+
import java.io.BufferedReader
1618
import java.io.Reader
1719
import java.util.logging.Level
1820
import java.util.logging.Logger
21+
import javax.annotation.WillNotClose
1922

2023
/**
2124
* Applies some rules to increase compatibility of parsed (incoming) iCalendars:
@@ -40,18 +43,50 @@ class ICalPreprocessor {
4043
FixInvalidDayOffsetPreprocessor() // fix things like DURATION:PT2D
4144
)
4245

46+
/**
47+
* Applies [streamPreprocessors] to a given iCalendar [line].
48+
*
49+
* @param line original line (taken from an iCalendar)
50+
* @return the potentially repaired iCalendar line
51+
*/
52+
@VisibleForTesting
53+
fun applyPreprocessors(line: String): String {
54+
var newLine = line
55+
for (preprocessor in streamPreprocessors)
56+
newLine = preprocessor.repairLine(newLine)
57+
return newLine
58+
}
59+
4360
/**
4461
* Applies [streamPreprocessors] to a given [Reader] that reads an iCalendar object
4562
* in order to repair some things that must be fixed before parsing.
4663
*
47-
* @param original original iCalendar object
48-
* @return the potentially repaired iCalendar object
64+
* The original reader content is processed line by line to avoid loading
65+
* the whole content into memory at once.
66+
*
67+
* This method works in a streaming way, so **[original] must not be closed before
68+
* the result of this method is consumed** like that:
69+
*
70+
* ~~~
71+
* someSource.reader().use { original ->
72+
* val repaired = preprocessStream(original)
73+
* // closing original here would render repaired unusable, too
74+
* parse(repaired)
75+
* } // use will close original
76+
* ~~~
77+
*
78+
* @param original original iCalendar object (must be closed by caller _after_ consuming the result of this method)
79+
* @return potentially repaired iCalendar object (doesn't need to be closed separately)
4980
*/
50-
fun preprocessStream(original: Reader): Reader {
51-
var reader = original
52-
for (preprocessor in streamPreprocessors)
53-
reader = preprocessor.preprocess(reader)
54-
return reader
81+
fun preprocessStream(@WillNotClose original: Reader): Reader {
82+
val repairedLines = BufferedReader(original)
83+
.lineSequence()
84+
.map { line -> // BufferedReader provides line without line break
85+
val fixed = applyPreprocessors(line)
86+
CharSource.wrap(fixed + "\r\n") // iCalendar uses CR+LF
87+
}
88+
.asIterable()
89+
return CharSource.concat(repairedLines).openStream()
5590
}
5691

5792

lib/src/main/kotlin/at/bitfire/synctools/icalendar/validation/StreamPreprocessor.kt

Lines changed: 11 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -6,51 +6,20 @@
66

77
package at.bitfire.synctools.icalendar.validation
88

9-
import java.io.IOException
10-
import java.io.Reader
11-
import java.io.StringReader
12-
import java.util.Scanner
13-
14-
abstract class StreamPreprocessor {
15-
16-
abstract fun regexpForProblem(): Regex?
9+
/**
10+
* This is a pre-processor for iCalendar lines that can detect and repair errors which
11+
* cannot be repaired on a higher level (because parsing alone would cause syntax
12+
* or other unrecoverable errors).
13+
*/
14+
interface StreamPreprocessor {
1715

1816
/**
19-
* Fixes an iCalendar string.
17+
* Validates and potentially repairs an iCalendar string.
2018
*
21-
* @param original The complete iCalendar string
22-
* @return The complete iCalendar string, but fixed
19+
* @param line full line of an iCalendar lines to validate / fix (without line break)
20+
*
21+
* @return the potentially fixed version of [line] (without line break)
2322
*/
24-
abstract fun fixString(original: String): String
25-
26-
fun preprocess(reader: Reader): Reader {
27-
var result: String? = null
28-
29-
val resetSupported = try {
30-
reader.reset()
31-
true
32-
} catch(_: IOException) {
33-
false
34-
}
35-
36-
if (resetSupported) {
37-
val regex = regexpForProblem()
38-
// reset is supported, no need to copy the whole stream to another String (unless we have to fix the TZOFFSET)
39-
if (regex == null || Scanner(reader).findWithinHorizon(regex.toPattern(), 0) != null) {
40-
reader.reset()
41-
result = fixString(reader.readText())
42-
}
43-
} else
44-
// reset not supported, always generate a new String that will be returned
45-
result = fixString(reader.readText())
46-
47-
if (result != null)
48-
// modified or reset not supported, return new stream
49-
return StringReader(result)
50-
51-
// not modified, return original iCalendar
52-
reader.reset()
53-
return reader
54-
}
23+
fun repairLine(line: String): String
5524

5625
}

lib/src/test/kotlin/at/bitfire/synctools/icalendar/validation/FixInvalidDayOffsetPreprocessorTest.kt

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class FixInvalidDayOffsetPreprocessorTest {
2525
*/
2626
private fun assertFixedEquals(expected: String, testValue: String, parseDuration: Boolean = true) {
2727
// Fix the duration string
28-
val fixed = processor.fixString(testValue)
28+
val fixed = processor.repairLine(testValue)
2929

3030
// Test the duration can now be parsed
3131
if (parseDuration)
@@ -39,15 +39,15 @@ class FixInvalidDayOffsetPreprocessorTest {
3939
}
4040

4141
@Test
42-
fun test_FixString_NoOccurrence() {
42+
fun test_repairLine_NoOccurrence() {
4343
assertEquals(
4444
"Some String",
45-
processor.fixString("Some String"),
45+
processor.repairLine("Some String"),
4646
)
4747
}
4848

4949
@Test
50-
fun test_FixString_SucceedsAsValueOnCorrectProperties() {
50+
fun test_repairLine_SucceedsAsValueOnCorrectProperties() {
5151
// By RFC 5545 the only properties allowed to hold DURATION as a VALUE are:
5252
// DURATION, REFRESH, RELATED, TRIGGER
5353
assertFixedEquals("DURATION;VALUE=DURATION:P1D", "DURATION;VALUE=DURATION:PT1D")
@@ -57,18 +57,18 @@ class FixInvalidDayOffsetPreprocessorTest {
5757
}
5858

5959
@Test
60-
fun test_FixString_FailsAsValueOnWrongProperty() {
60+
fun test_repairLine_FailsAsValueOnWrongProperty() {
6161
// The update from RFC 2445 to RFC 5545 disallows using DURATION as a VALUE in FREEBUSY
6262
assertFixedEquals("FREEBUSY;VALUE=DURATION:PT1D", "FREEBUSY;VALUE=DURATION:PT1D", parseDuration = false)
6363
}
6464

6565
@Test
66-
fun test_FixString_FailsIfNotAtStartOfLine() {
66+
fun test_repairLine_FailsIfNotAtStartOfLine() {
6767
assertFixedEquals("xxDURATION;VALUE=DURATION:PT1D", "xxDURATION;VALUE=DURATION:PT1D", parseDuration = false)
6868
}
6969

7070
@Test
71-
fun test_FixString_DayOffsetFrom_Invalid() {
71+
fun test_repairLine_DayOffsetFrom_Invalid() {
7272
assertFixedEquals("DURATION:-P1D", "DURATION:-PT1D")
7373
assertFixedEquals("TRIGGER:-P2D", "TRIGGER:-PT2D")
7474

@@ -77,38 +77,38 @@ class FixInvalidDayOffsetPreprocessorTest {
7777
}
7878

7979
@Test
80-
fun test_FixString_DayOffsetFrom_Valid() {
80+
fun test_repairLine_DayOffsetFrom_Valid() {
8181
assertFixedEquals("DURATION:-PT12H", "DURATION:-PT12H")
8282
assertFixedEquals("TRIGGER:-PT12H", "TRIGGER:-PT12H")
8383
}
8484

8585
@Test
86-
fun test_FixString_DayOffsetFromMultiple_Invalid() {
86+
fun test_repairLine_DayOffsetFromMultiple_Invalid() {
8787
assertFixedEquals("DURATION:-P1D\nTRIGGER:-P2D", "DURATION:-PT1D\nTRIGGER:-PT2D")
8888
assertFixedEquals("DURATION:-P1D\nTRIGGER:-P2D", "DURATION:-P1DT\nTRIGGER:-P2DT")
8989
}
9090

9191
@Test
92-
fun test_FixString_DayOffsetFromMultiple_Valid() {
92+
fun test_repairLine_DayOffsetFromMultiple_Valid() {
9393
assertFixedEquals("DURATION:-PT12H\nTRIGGER:-PT12H", "DURATION:-PT12H\nTRIGGER:-PT12H")
9494
}
9595

9696
@Test
97-
fun test_FixString_DayOffsetFromMultiple_Mixed() {
97+
fun test_repairLine_DayOffsetFromMultiple_Mixed() {
9898
assertFixedEquals("DURATION:-P1D\nDURATION:-PT12H\nTRIGGER:-P2D", "DURATION:-PT1D\nDURATION:-PT12H\nTRIGGER:-PT2D")
9999
assertFixedEquals("DURATION:-P1D\nDURATION:-PT12H\nTRIGGER:-P2D", "DURATION:-P1DT\nDURATION:-PT12H\nTRIGGER:-P2DT")
100100
}
101101

102102
@Test
103103
fun test_RegexpForProblem_DayOffsetTo_Invalid() {
104-
val regex = processor.regexpForProblem()
104+
val regex = processor.regexpForProblem
105105
assertTrue(regex.matches("DURATION:PT2D"))
106106
assertTrue(regex.matches("TRIGGER:PT1D"))
107107
}
108108

109109
@Test
110110
fun test_RegexpForProblem_DayOffsetTo_Valid() {
111-
val regex = processor.regexpForProblem()
111+
val regex = processor.regexpForProblem
112112
assertFalse(regex.matches("DURATION:-PT12H"))
113113
assertFalse(regex.matches("TRIGGER:-PT15M"))
114114
}

lib/src/test/kotlin/at/bitfire/synctools/icalendar/validation/FixInvalidUtcOffsetPreprocessorTest.kt

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,46 +16,46 @@ class FixInvalidUtcOffsetPreprocessorTest {
1616
private val processor = FixInvalidUtcOffsetPreprocessor()
1717

1818
@Test
19-
fun test_FixString_NoOccurrence() {
19+
fun test_repairLine_NoOccurrence() {
2020
assertEquals(
2121
"Some String",
22-
processor.fixString("Some String"))
22+
processor.repairLine("Some String"))
2323
}
2424

2525
@Test
26-
fun test_FixString_TzOffsetFrom_Invalid() {
26+
fun test_repairLine_TzOffsetFrom_Invalid() {
2727
assertEquals("TZOFFSETFROM:+005730",
28-
processor.fixString("TZOFFSETFROM:+5730"))
28+
processor.repairLine("TZOFFSETFROM:+5730"))
2929
}
3030

3131
@Test
32-
fun test_FixString_TzOffsetFrom_Valid() {
32+
fun test_repairLine_TzOffsetFrom_Valid() {
3333
assertEquals("TZOFFSETFROM:+005730",
34-
processor.fixString("TZOFFSETFROM:+005730"))
34+
processor.repairLine("TZOFFSETFROM:+005730"))
3535
}
3636

3737
@Test
38-
fun test_FixString_TzOffsetTo_Invalid() {
38+
fun test_repairLine_TzOffsetTo_Invalid() {
3939
assertEquals("TZOFFSETTO:+005730",
40-
processor.fixString("TZOFFSETTO:+5730"))
40+
processor.repairLine("TZOFFSETTO:+5730"))
4141
}
4242

4343
@Test
44-
fun test_FixString_TzOffsetTo_Valid() {
44+
fun test_repairLine_TzOffsetTo_Valid() {
4545
assertEquals("TZOFFSETTO:+005730",
46-
processor.fixString("TZOFFSETTO:+005730"))
46+
processor.repairLine("TZOFFSETTO:+005730"))
4747
}
4848

4949

5050
@Test
5151
fun test_RegexpForProblem_TzOffsetTo_Invalid() {
52-
val regex = processor.regexpForProblem()
52+
val regex = processor.regexpForProblem
5353
assertTrue(regex.matches("TZOFFSETTO:+5730"))
5454
}
5555

5656
@Test
5757
fun test_RegexpForProblem_TzOffsetTo_Valid() {
58-
val regex = processor.regexpForProblem()
58+
val regex = processor.regexpForProblem
5959
assertFalse(regex.matches("TZOFFSETTO:+005730"))
6060
}
6161

0 commit comments

Comments
 (0)