From b37ad5bc8ccd77e1e996c03427c06a7e32654933 Mon Sep 17 00:00:00 2001 From: Pablo Duboue Date: Tue, 13 Dec 2022 22:01:09 -0800 Subject: [PATCH] Added localized date time parsers (fixes #65). It provides localized datetime parsers in the extraction API similar to the handling of local time zones. A test case is also provided. The pattern and example comes from ruippeixotog/ebay-snipe-server #7. --- .../scalascraper/scraper/ContentParsers.scala | 9 +++++++++ core/src/test/resources/test2.html | 3 +++ .../scalascraper/dsl/DSLExtractingSpec.scala | 9 +++++++++ 3 files changed, 21 insertions(+) diff --git a/core/src/main/scala/net/ruippeixotog/scalascraper/scraper/ContentParsers.scala b/core/src/main/scala/net/ruippeixotog/scalascraper/scraper/ContentParsers.scala index 0bd6ab26c..0aeb300f8 100644 --- a/core/src/main/scala/net/ruippeixotog/scalascraper/scraper/ContentParsers.scala +++ b/core/src/main/scala/net/ruippeixotog/scalascraper/scraper/ContentParsers.scala @@ -110,6 +110,15 @@ object ContentParsers { * a content parser for parsing text content as a `DateTime` using `tz` as default time zone. */ def withZone(tz: DateTimeZone): String => DateTime = formatter.withZone(tz).parseDateTime(_).withZone(tz) + + /** Parses text content as a `DateTime` using a provided locale. + * + * @param locale + * the locale to be used by the parser + * @return + * a content parser for parsing text content as a `DateTime` using `locale` as the locale. + */ + def withLocale(locale: java.util.Locale): String => DateTime = formatter.withLocale(locale).parseDateTime(_) } /** A content parser with extra options for the retrieval of the first match of a regex. diff --git a/core/src/test/resources/test2.html b/core/src/test/resources/test2.html index f4f1de148..9ca13603a 100644 --- a/core/src/test/resources/test2.html +++ b/core/src/test/resources/test2.html @@ -64,6 +64,9 @@

Section 3 h3

+
+ okt. 1, 2022 15:22:10 PDT +