Skip to content

Commit 40e1c5b

Browse files
committed
Instead of using urls, fields and objects, just use a single map metadata that can contain different types of metadata.
1 parent c3c2fd9 commit 40e1c5b

File tree

16 files changed

+55
-88
lines changed

16 files changed

+55
-88
lines changed

src/main/kotlin/com/chimbori/crux/api/Fields.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package com.chimbori.crux.api
22

3-
/** Well-known keys to use in [Resource.fields] & [Resource.urls]. */
3+
/** Well-known keys to use in [Resource.metadata]. */
44
public object Fields {
55
public const val TITLE: String = "title"
66
public const val DESCRIPTION: String = "description"

src/main/kotlin/com/chimbori/crux/api/Plugins.kt

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,9 @@ public fun interface Rewriter : Plugin {
1717
* provided a fully-parsed HTML DOM to extract fields from, and can also make additional HTTP requests if necessary to
1818
* retrieve additional metadata or to follow redirects.
1919
*
20-
* Text fields can be set via the [Resource.fields] property, and URLs via the [Resource.urls] property. Plugins
21-
* can also rewrite the canonical URL, and can provide an updated DOM tree if the canonical URL is changed. The
22-
* updated URL and DOM tree will be passed on to the next plugin in sequence, so the exact ordering of plugins is
23-
* important.
20+
* Metadata fields can be set via the [Resource.metadata] property. Plugins can also rewrite the canonical URL, and can
21+
* provide an updated DOM tree if the canonical URL is changed. The updated URL and DOM tree will be passed on to the
22+
* next plugin in sequence, so the exact ordering of plugins is important.
2423
*/
2524
public interface Extractor : Plugin {
2625
/**

src/main/kotlin/com/chimbori/crux/api/Resource.kt

Lines changed: 6 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,11 @@ public data class Resource(
1818
*/
1919
val article: Element? = null,
2020

21-
/**
22-
* Text fields extracted from this resource, stored as key-value pairs. It is recommended to use well-defined keys
23-
* from [com.chimbori.crux.Fields] for all standard fields. Custom fields are also supported, in case none of the
24-
* pre-defined keys are applicable.
25-
*/
26-
val fields: Map<String, String?> = emptyMap(),
27-
28-
/**
29-
* URL fields extracted from this resource. Storing these as key-value pairs of [HttpUrl]s avoids re-parsing the same
30-
* URLs multiple times. URLs can also be retrieved as strings via the [get] indexed accessor.
31-
*/
32-
val urls: Map<String, HttpUrl?> = emptyMap(),
33-
3421
/** A holder for any kind of custom objects that library users may want to use. */
35-
val objects: Map<String, Any?> = emptyMap(),
22+
val metadata: Map<String, Any?> = emptyMap(),
3623
) {
37-
/**
38-
* @return value of a named field. If there’s no named [String] field corresponding to this key in [Resource.fields],
39-
* but a [HttpUrl] exists in [Resource.urls], the latter will be stringified and returned instead.
40-
*/
41-
public operator fun get(key: String): String? = fields[key] ?: urls[key]?.toString()
24+
/** @return value of a named field in [Resource.metadata]. */
25+
public operator fun get(key: String): Any? = metadata[key]
4226

4327
/**
4428
* Merges non-null fields from another [Resource] with this object, and returns a new immutable object. Prefer to use
@@ -48,19 +32,12 @@ public data class Resource(
4832
url = anotherResource?.url ?: url,
4933
document = anotherResource?.document ?: document,
5034
article = anotherResource?.article ?: article,
51-
fields = if (anotherResource?.fields == null) fields else fields + anotherResource.fields,
52-
urls = if (anotherResource?.urls == null) urls else urls + anotherResource.urls,
53-
objects = if (anotherResource?.objects == null) objects else objects + anotherResource.objects,
35+
metadata = if (anotherResource?.metadata == null) metadata else metadata + anotherResource.metadata,
5436
)
5537

56-
/**
57-
* Removes an immutable copy of this [Resource] that only contains non-null values for each key in both [fields]
58-
* and [urls].
59-
*/
38+
/** Removes an immutable copy of this [Resource] that only contains non-null values for each key in [metadata]. */
6039
public fun removeNullValues(): Resource = copy(
61-
fields = fields.filterValues { !it.isNullOrBlank() },
62-
urls = urls.filterValues { it != null },
63-
objects = objects.filterValues { it != null },
40+
metadata = metadata.filterValues { it != null },
6441
)
6542

6643
/** For any potential extension functions to be defined on the companion object. */

src/main/kotlin/com/chimbori/crux/plugins/AmpRedirector.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ public class AmpRedirector(
2828
return if (refetchContentFromCanonicalUrl && canonicalUrl != null) {
2929
Resource.fetchFromUrl(url = canonicalUrl, okHttpClient = okHttpClient)
3030
} else {
31-
Resource(url = canonicalUrl, urls = mapOf(CANONICAL_URL to canonicalUrl))
31+
Resource(url = canonicalUrl, metadata = mapOf(CANONICAL_URL to canonicalUrl))
3232
}
3333
}
3434
}

src/main/kotlin/com/chimbori/crux/plugins/ArticleExtractor.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ public class ArticleExtractor(private val okHttpClient: OkHttpClient) : Extracto
4646

4747
val extractedDoc = PostprocessHelpers.postprocess(bestMatchElement)
4848
return Resource(
49-
objects = mapOf(DURATION_MS to extractedDoc.text().estimatedReadingTimeMs()),
49+
metadata = mapOf(DURATION_MS to extractedDoc.text().estimatedReadingTimeMs()),
5050
article = extractedDoc
5151
)
5252
}

src/main/kotlin/com/chimbori/crux/plugins/FaviconExtractor.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,6 @@ public class FaviconExtractor : Extractor {
1414

1515
override suspend fun extract(request: Resource): Resource {
1616
val canonicalUrl = request.document?.extractCanonicalUrl()?.let { request.url?.resolve(it) } ?: request.url
17-
return Resource(urls = mapOf(FAVICON_URL to request.document?.extractFaviconUrl(canonicalUrl))).removeNullValues()
17+
return Resource(metadata = mapOf(FAVICON_URL to request.document?.extractFaviconUrl(canonicalUrl))).removeNullValues()
1818
}
1919
}

src/main/kotlin/com/chimbori/crux/plugins/HtmlMetadataExtractor.kt

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,17 +60,15 @@ public class HtmlMetadataExtractor(private val okHttpClient: OkHttpClient) : Ext
6060
return Resource(
6161
url = if (resourceToUse.url != request.url) resourceToUse.url else null,
6262
document = resourceToUse.document,
63-
fields = mapOf(
63+
metadata = mapOf(
64+
CANONICAL_URL to canonicalUrl,
6465
TITLE to resourceToUse.document?.extractTitle(),
6566
DESCRIPTION to resourceToUse.document?.extractDescription(),
6667
SITE_NAME to resourceToUse.document?.extractSiteName(),
6768
THEME_COLOR_HEX to resourceToUse.document?.extractThemeColor(),
6869
PUBLISHED_AT to resourceToUse.document?.extractPublishedAt(),
6970
MODIFIED_AT to resourceToUse.document?.extractModifiedAt(),
7071
KEYWORDS_CSV to resourceToUse.document?.extractKeywords()?.joinToString(separator = ","),
71-
),
72-
urls = mapOf(
73-
CANONICAL_URL to canonicalUrl,
7472
NEXT_PAGE_URL to resourceToUse.document?.extractPaginationUrl(resourceToUse.url, "next"),
7573
PREVIOUS_PAGE_URL to resourceToUse.document?.extractPaginationUrl(resourceToUse.url, "prev"),
7674
BANNER_IMAGE_URL to resourceToUse.document?.extractImageUrl(canonicalUrl),

src/main/kotlin/com/chimbori/crux/plugins/WebAppManifestParser.kt

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,17 +45,15 @@ public class WebAppManifestParser(private val okHttpClient: OkHttpClient) : Extr
4545
val themeColorHtml = manifest.element("theme_color")
4646
val backgroundColorHtml = manifest.element("background_color")
4747
return Resource(
48-
fields = mapOf(
48+
metadata = mapOf(
49+
WEB_APP_MANIFEST_URL to webAppManifestUrl,
4950
TITLE to manifest.element("name"),
5051
LANGUAGE to manifest.element("lang"),
5152
DISPLAY to manifest.element("display"),
5253
ORIENTATION to manifest.element("orientation"),
54+
FAVICON_URL to getLargestIconUrl(webAppManifestUrl, manifest?.array<JsonObject>("icons")),
5355
(if (themeColorHtml?.startsWith("#") == true) THEME_COLOR_HEX else THEME_COLOR_HTML) to themeColorHtml,
5456
(if (backgroundColorHtml?.startsWith("#") == true) BACKGROUND_COLOR_HEX else BACKGROUND_COLOR_HTML) to backgroundColorHtml,
55-
),
56-
urls = mapOf(
57-
WEB_APP_MANIFEST_URL to webAppManifestUrl,
58-
FAVICON_URL to getLargestIconUrl(webAppManifestUrl, manifest?.array<JsonObject>("icons"))
5957
)
6058
).removeNullValues()
6159
}

src/test/kotlin/com/chimbori/crux/CruxTest.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,13 @@ class CruxTest {
111111
override suspend fun extract(request: Resource) =
112112
Resource(
113113
url = request.url?.newBuilder()?.encodedPath("/bar")?.build(),
114-
fields = mapOf(TITLE to "Foo Title")
114+
metadata = mapOf(TITLE to "Foo Title")
115115
)
116116
}
117117

118118
val generateTitleForBarPlugin = object : Extractor {
119119
override fun canExtract(url: HttpUrl) = url.encodedPath == "/bar"
120-
override suspend fun extract(request: Resource) = Resource(fields = mapOf(TITLE to "Bar Title"))
120+
override suspend fun extract(request: Resource) = Resource(metadata = mapOf(TITLE to "Bar Title"))
121121
}
122122

123123
// Test Foo before Bar.

src/test/kotlin/com/chimbori/crux/api/ResourceTest.kt

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,15 @@ class ResourceTest {
1414
fun testResourceMetadataApiExamples() {
1515
val resource = Resource(
1616
url = "https://chimbori.com/".toHttpUrl(),
17-
fields = mapOf(
17+
metadata = mapOf(
1818
TITLE to "Life, the Universe, and Everything",
19-
DESCRIPTION to "42"
20-
), urls = mapOf(
19+
DESCRIPTION to "42",
2120
CANONICAL_URL to "https://chimbori.com/".toHttpUrl()
2221
)
2322
)
2423
assertEquals("Life, the Universe, and Everything", resource[TITLE])
2524
assertEquals("42", resource[DESCRIPTION])
26-
assertEquals("https://chimbori.com/".toHttpUrl(), resource.urls[CANONICAL_URL])
27-
assertNull(resource.urls[BANNER_IMAGE_URL])
25+
assertEquals("https://chimbori.com/".toHttpUrl(), resource[CANONICAL_URL])
26+
assertNull(resource[BANNER_IMAGE_URL])
2827
}
2928
}

0 commit comments

Comments
 (0)