Skip to content

Commit 8269bd9

Browse files
authored
Merge pull request #30 from cedardevs/parsing_additions
Parsing additions
2 parents 795bd3a + a20520e commit 8269bd9

File tree

5 files changed

+59
-17
lines changed

5 files changed

+59
-17
lines changed

schemas-analyze/src/main/java/org/cedar/schemas/analyze/Analyzers.java

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,14 @@ public class Analyzers {
2626
.appendOptional(DateTimeFormatter.ISO_LOCAL_DATE_TIME) // e.g. 2010-12-30T00:00:00
2727
.appendOptional(DateTimeFormatter.ISO_LOCAL_DATE) // e.g. 2010-12-30
2828
.appendOptional(new DateTimeFormatterBuilder()
29-
.appendValue(ChronoField.YEAR)
30-
.appendPattern("-MM-dd").toFormatter()) // e.g. -200-01-01
31-
.appendOptional(new DateTimeFormatterBuilder()
32-
.appendValue(ChronoField.YEAR).toFormatter()) // e.g. -200
29+
.appendValue(ChronoField.YEAR) // e.g. -200
30+
.optionalStart()
31+
.appendPattern("-MM") // e.g. -200-10
32+
.optionalEnd()
33+
.optionalStart()
34+
.appendPattern("-dd") // e.g. -200-01-01
35+
.optionalEnd()
36+
.toFormatter())
3337
.toFormatter()
3438
.withResolverStyle(ResolverStyle.STRICT);
3539

@@ -256,6 +260,7 @@ private static TemporalAccessor parseDate(String date) {
256260
ZonedDateTime::from,
257261
LocalDateTime::from,
258262
LocalDate::from,
263+
YearMonth::from,
259264
Year::from);
260265
} catch (Exception e) {
261266
return null;
@@ -298,13 +303,19 @@ static String utcDateTimeString(TemporalAccessor parsedDate, boolean start) {
298303
if (parsedDate instanceof Year) {
299304
LocalDateTime yearDate = start ?
300305
((Year) parsedDate).atMonth(1).atDay(1).atStartOfDay() :
301-
((Year) parsedDate).atMonth(12).atEndOfMonth().atTime(23, 59, 59);
306+
((Year) parsedDate).atMonth(12).atEndOfMonth().atTime(23, 59, 59, 999000000);
302307
return DateTimeFormatter.ISO_ZONED_DATE_TIME.format(yearDate.atZone(ZoneOffset.UTC));
303308
}
309+
if (parsedDate instanceof YearMonth) {
310+
LocalDateTime yearMonthDate = start ?
311+
((YearMonth) parsedDate).atDay(1).atStartOfDay() :
312+
((YearMonth) parsedDate).atEndOfMonth().atTime(23, 59, 59, 999000000);
313+
return DateTimeFormatter.ISO_ZONED_DATE_TIME.format((yearMonthDate.atZone(ZoneOffset.UTC)));
314+
}
304315
if (parsedDate instanceof LocalDate) {
305316
LocalDateTime localDate = start ?
306317
((LocalDate) parsedDate).atStartOfDay() :
307-
((LocalDate) parsedDate).atTime(23, 59, 59);
318+
((LocalDate) parsedDate).atTime(23, 59, 59, 999000000);
308319
return DateTimeFormatter.ISO_ZONED_DATE_TIME.format(localDate.atZone(ZoneOffset.UTC));
309320
}
310321
if (parsedDate instanceof LocalDateTime) {
@@ -318,7 +329,7 @@ static String utcDateTimeString(TemporalAccessor parsedDate, boolean start) {
318329
}
319330

320331
static String utcDateTimeString(Long year, boolean start) {
321-
return start ? year.toString() + "-01-01T00:00:00Z" : year.toString() + "-12-31T23:59:59Z";
332+
return start ? year.toString() + "-01-01T00:00:00Z" : year.toString() + "-12-31T23:59:59.999Z";
322333
}
323334

324335
static TimeRangeDescriptor rangeDescriptor(DateInfo beginInfo, DateInfo endInfo, DateInfo instantInfo) {

schemas-analyze/src/test/groovy/org/cedar/schemas/analyze/AnalyzersSpec.groovy

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ class AnalyzersSpec extends Specification {
7979
endPrecision : ChronoUnit.DAYS.toString(),
8080
endIndexable : true,
8181
endZoneSpecified : null,
82-
endUtcDateTimeString : '2010-10-01T23:59:59Z',
82+
endUtcDateTimeString : '2010-10-01T23:59:59.999Z',
8383
instantDescriptor : ValidDescriptor.UNDEFINED,
8484
instantPrecision : null,
8585
instantIndexable : true,
@@ -132,6 +132,7 @@ class AnalyzersSpec extends Specification {
132132
def 'extracts date info from date strings'() {
133133
when:
134134
def result = new Analyzers.DateInfo(input, start)
135+
println(result.utcDateTimeString)
135136

136137
then:
137138
result.descriptor == descriptor
@@ -144,11 +145,16 @@ class AnalyzersSpec extends Specification {
144145
input | start || descriptor | precision | indexable | zone | string
145146
'2042-04-02T00:42:42Z' | false || ValidDescriptor.VALID | 'Nanos' | true | 'Z' | '2042-04-02T00:42:42Z'
146147
'2042-04-02T00:42:42' | false || ValidDescriptor.VALID | 'Nanos' | true | null | '2042-04-02T00:42:42Z'
147-
'2042-04-02' | false || ValidDescriptor.VALID | 'Days' | true | null | '2042-04-02T23:59:59Z'
148+
'2042-04-02' | false || ValidDescriptor.VALID | 'Days' | true | null | '2042-04-02T23:59:59.999Z'
148149
'2042-04-02' | true || ValidDescriptor.VALID | 'Days' | true | null | '2042-04-02T00:00:00Z'
150+
'2042-05' | true || ValidDescriptor.VALID | 'Months' | true | null | '2042-05-01T00:00:00Z'
151+
'-2042-05' | false || ValidDescriptor.VALID | 'Months' | true | null | '-2042-05-31T23:59:59.999Z'
149152
'2042' | true || ValidDescriptor.VALID | 'Years' | true | null | '2042-01-01T00:00:00Z'
153+
'1965' | false || ValidDescriptor.VALID | 'Years' | true | null | '1965-12-31T23:59:59.999Z'
150154
'-5000' | true || ValidDescriptor.VALID | 'Years' | true | null | '-5000-01-01T00:00:00Z'
155+
'-3000' | false || ValidDescriptor.VALID | 'Years' | true | null | '-3000-12-31T23:59:59.999Z'
151156
'-100000001' | true || ValidDescriptor.VALID | 'Years' | false | null | '-100000001-01-01T00:00:00Z'
157+
'-100000002' | false || ValidDescriptor.VALID | 'Years' | false | null | '-100000002-12-31T23:59:59.999Z'
152158
'ABC' | true || ValidDescriptor.INVALID | null | false | null | null
153159
'' | true || ValidDescriptor.UNDEFINED | null | true | null | null
154160
null | true || ValidDescriptor.UNDEFINED | null | true | null | null

schemas-parse/src/main/groovy/org/cedar/schemas/parse/ISOParser.groovy

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -425,19 +425,27 @@ class ISOParser {
425425
}
426426

427427
static List<Link> parseLinks(GPathResult metadata) {
428-
def linkNodes = metadata.distributionInfo.MD_Distribution.'**'.findAll {
428+
def allLinkNodes = metadata.distributionInfo.MD_Distribution.'**'.findAll {
429429
it.name() == 'CI_OnlineResource'
430430
}
431-
def uniqueLinks = linkNodes.collect(ISOParser.&parseLink).findAll() as Set
432-
return uniqueLinks.toList()
431+
def allUniqueLinks = allLinkNodes.collect(ISOParser.&parseLink).findAll() as Set
432+
433+
// Find all contact links and remove them
434+
def allContactLinkNodes = metadata.distributionInfo.MD_Distribution.distributor.MD_Distributor.distributorContact.'**'.findAll {
435+
it.name() == 'CI_OnlineResource'
436+
}
437+
def allUniqueContactLinks = allContactLinkNodes.collect(ISOParser.&parseLink).findAll() as Set
438+
439+
allUniqueLinks.removeAll(allUniqueContactLinks)
440+
return allUniqueLinks.toList()
433441
}
434442

435443
static Link parseLink(GPathResult node) {
436444
if (!node) { return null }
437445
def builder = Link.newBuilder()
438446
builder.linkName = node.name?.CharacterString?.text()?.trim() ?: null
439447
builder.linkProtocol = node.protocol?.CharacterString?.text()?.trim() ?: null
440-
builder.linkUrl = node.linkage?.URL?.text() ? StringEscapeUtils.unescapeXml(node.linkage.URL.text()) : null
448+
builder.linkUrl = StringEscapeUtils.unescapeXml(node.linkage?.URL?.text()?.trim()) ?: null
441449
builder.linkDescription = node.description?.CharacterString?.text()?.trim() ?: null
442450
builder.linkFunction = node.function?.CI_OnLineFunctionCode?.@codeListValue?.text()?.trim() ?: null
443451
return builder.build()

schemas-parse/src/test/groovy/org/cedar/schemas/parse/ISOParserSpec.groovy

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.cedar.schemas.parse
22

3-
import org.cedar.schemas.avro.psi.Discovery
43
import org.cedar.schemas.avro.geojson.LineStringType
54
import org.cedar.schemas.avro.geojson.PointType
65
import org.cedar.schemas.avro.geojson.PolygonType
@@ -370,10 +369,10 @@ class ISOParserSpec extends Specification {
370369
then:
371370
links instanceof List
372371
links.every { it instanceof Link }
373-
links.size() == 1
372+
links.size() == 1 // Distributor Contact link should not appear
374373
links[0].linkName == 'Super Important Access Link'
375374
links[0].linkProtocol == 'HTTP'
376-
links[0].linkUrl == 'http://www.example.com'
375+
links[0].linkUrl == 'http://www.example.com' // Whitespace needs to be cleaned up
377376
links[0].linkDescription == 'Everything Important, All In One Place'
378377
links[0].linkFunction == 'search'
379378
}

schemas-parse/src/test/resources/test-iso-metadata.xml

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1359,7 +1359,7 @@
13591359
<gmd:onLine>
13601360
<gmd:CI_OnlineResource>
13611361
<gmd:linkage>
1362-
<gmd:URL>http://www.example.com</gmd:URL>
1362+
<gmd:URL> http://www.example.com </gmd:URL>
13631363
</gmd:linkage>
13641364
<gmd:protocol>
13651365
<gco:CharacterString>HTTP</gco:CharacterString>
@@ -1380,6 +1380,24 @@
13801380
</gmd:onLine>
13811381
</gmd:MD_DigitalTransferOptions>
13821382
</gmd:distributorTransferOptions>
1383+
<gmd:distributorContact>
1384+
<gmd:CI_ResponsibleParty>
1385+
<gmd:contactInfo>
1386+
<gmd:CI_Contact>
1387+
<gmd:onlineResource>
1388+
<gmd:CI_OnlineResource>
1389+
<gmd:linkage>
1390+
<gmd:URL>http://www.contact-example.com</gmd:URL>
1391+
</gmd:linkage>
1392+
<gmd:name>
1393+
<gco:CharacterString>Contact Link That Shouldn't Appear</gco:CharacterString>
1394+
</gmd:name>
1395+
</gmd:CI_OnlineResource>
1396+
</gmd:onlineResource>
1397+
</gmd:CI_Contact>
1398+
</gmd:contactInfo>
1399+
</gmd:CI_ResponsibleParty>
1400+
</gmd:distributorContact>
13831401
</gmd:MD_Distributor>
13841402
</gmd:distributor>
13851403
<gmd:distributionFormat>

0 commit comments

Comments
 (0)