Skip to content

Commit 9e6c0cd

Browse files
committed
Refactored Global Directives so that they can be expressed in any order
and also added some further tests around Global Directives
1 parent 60718d8 commit 9e6c0cd

File tree

3 files changed

+149
-9
lines changed

3 files changed

+149
-9
lines changed

csv-validator-core/src/main/scala/uk/gov/nationalarchives/csv/validator/schema/SchemaParser.scala

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,10 @@ trait SchemaParser extends RegexParsers {
6464
def versionDecl: Parser[String] = ("version" ~> Schema.version <~ eol).withFailureMessage(s"Schema version declaration 'version ${Schema.version}' missing or incorrect")
6565

6666
/**
67-
* [4] GlobalDirectives ::= SeparatorDirective? QuotedDirective? TotalColumnsDirective? (NoHeaderDirective | IgnoreColumnNameCaseDirective)?
67+
* [4] GlobalDirectives ::= SeparatorDirective? QuotedDirective? TotalColumnsDirective? (NoHeaderDirective | IgnoreColumnNameCaseDirective)? /* expr: unordered */
6868
*/
69-
def globalDirectives: Parser[List[GlobalDirective]] = rep(positioned((separatorDirective | quotedDirective | totalColumnsDirective | (noHeaderDirective | ignoreColumnNameCaseDirective)) <~ opt(eol)))
69+
def globalDirectives: Parser[List[GlobalDirective]] = opt(mingle(List(separatorDirective, quotedDirective, totalColumnsDirective, noHeaderDirective | ignoreColumnNameCaseDirective).map(positioned(_) <~ opt(eol)))
70+
.withFailureMessage("Invalid global directive")) ^^ { _.getOrElse(List.empty) }
7071

7172
/**
7273
* [5] DirectivePrefix ::= "@"
@@ -96,17 +97,20 @@ trait SchemaParser extends RegexParsers {
9697
/**
9798
* [10] TotalColumnsDirective ::= DirectivePrefix "totalColumns" PositiveNonZeroIntegerLiteral
9899
*/
100+
//def totalColumnsDirective: Parser[TotalColumns] = (directivePrefix ~> "totalColumns" ~> positiveNonZeroIntegerLiteral ^^ { TotalColumns(_) }).withFailureMessage("@totalColumns invalid")
99101
def totalColumnsDirective: Parser[TotalColumns] = (directivePrefix ~> "totalColumns" ~> positiveNonZeroIntegerLiteral ^^ { TotalColumns(_) }).withFailureMessage("@totalColumns invalid")
100102

101103
/**
102104
* [11] NoHeaderDirective ::= DirectivePrefix "noHeader"
103105
*/
106+
// def noHeaderDirective: Parser[NoHeader] = directivePrefix ~> "noHeader" ^^^ NoHeader()
104107
def noHeaderDirective: Parser[NoHeader] = directivePrefix ~> "noHeader" ^^^ NoHeader()
105108

106109
/**
107110
* [12] IgnoreColumnNameCaseDirective ::= DirectivePrefix "ignoreColumnNameCase"
108111
*/
109112
def ignoreColumnNameCaseDirective: Parser[IgnoreColumnNameCase] = directivePrefix ~> "ignoreColumnNameCase" ^^^ IgnoreColumnNameCase()
113+
//def ignoreColumnNameCaseDirective: Parser[IgnoreColumnNameCase] = "ignoreColumnNameCase" ^^^ IgnoreColumnNameCase()
110114

111115
/**
112116
* [13] Body ::= BodyPart+
@@ -418,6 +422,58 @@ trait SchemaParser extends RegexParsers {
418422
val nonBreakingCharPattern = """[^\r\n\f]"""
419423
//</editor-fold>
420424

425+
/**
426+
* Given 1 or more Parsers
427+
* this function produces
428+
* all permutations of
429+
* all combinations.
430+
*
431+
* Put more simply if you have a List
432+
* of Parsers, we create a Parser
433+
* that matches n of those parsers
434+
* in any order
435+
*
436+
* @param parsers A list of parsers to mingle
437+
* @return A parser that represents all permutations of
438+
* all combinations of the parsers
439+
*/
440+
private def mingle[T, U](parsers : List[Parser[T]]): Parser[List[T]] = {
441+
442+
/**
443+
* All permutations of all combinations
444+
* of a List
445+
*/
446+
def mingle[T](data: List[T]): List[List[T]] = {
447+
(for(i <- 1 to data.length) yield
448+
data.combinations(i).flatMap(_.permutations)
449+
).toList.flatten
450+
}
451+
452+
/**
453+
* Combines n parsers together
454+
* in the same manner as p1 ~ p2 ~ ... pN
455+
*/
456+
def combine[T](parsers: List[Parser[T]]): Parser[List[T]] = {
457+
parsers.foldRight(success(List.empty[T])) {
458+
case (p, acc) => for {
459+
pRes <- p
460+
accRes <- acc
461+
} yield pRes :: accRes
462+
}
463+
}
464+
465+
def longestFirst(l1: List[_], l2: List[_]) = l1.length > l2.length
466+
467+
val mingled = mingle[Parser[T]](parsers)
468+
.sortWith(longestFirst)
469+
//we sort longest first here,
470+
//to make sure the parser that matches
471+
//the most input will always be put first
472+
473+
val alternates = mingled.map(combine(_))
474+
alternates.reduceLeft(_ | _)
475+
}
476+
421477

422478
def parseAndValidate(reader: Reader): ValidationNel[FailMessage, Schema] = {
423479

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/**
2+
* Copyright (c) 2013, The National Archives <[email protected]>
3+
* http://www.nationalarchives.gov.uk
4+
*
5+
* This Source Code Form is subject to the terms of the Mozilla Public
6+
* License, v. 2.0. If a copy of the MPL was not distributed with this
7+
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
8+
*/
9+
package uk.gov.nationalarchives.csv.validator.schema
10+
11+
import org.specs2.mutable._
12+
import scalaz.{Failure => FailureZ}
13+
import java.io.StringReader
14+
15+
16+
class SchemaParserGlobalDirectivesSpec extends Specification {
17+
18+
object TestSchemaParser extends SchemaParser { val pathSubstitutions = List[(String,String)](); val enforceCaseSensitivePathChecks = false }
19+
20+
import TestSchemaParser._
21+
22+
"Schema" should {
23+
24+
"succeed for a @totalColumns global directive" in {
25+
val schema =
26+
"""version 1.0
27+
|@totalColumns 1
28+
|column1: """.stripMargin
29+
30+
parse(new StringReader(schema)) must beLike { case Success(Schema(List(TotalColumns(_)), List(ColumnDefinition("column1", Nil, Nil))),_) => ok }
31+
}
32+
33+
"with @totalColumns and @noHeader global directives" should {
34+
"succeed on seperate lines" in {
35+
val schema =
36+
"""version 1.0
37+
|@totalColumns 1
38+
|@noHeader
39+
|column1: """.stripMargin
40+
41+
parse(new StringReader(schema)) must beLike { case Success(Schema(List(TotalColumns(_), NoHeader()), List(ColumnDefinition("column1", Nil, Nil))), _) => ok}
42+
}
43+
44+
"succeed on same line" in {
45+
val schema =
46+
"""version 1.0
47+
|@totalColumns 1 @noHeader
48+
|column1: """.stripMargin
49+
50+
parse(new StringReader(schema)) must beLike { case Success(Schema(List(TotalColumns(_), NoHeader()), List(ColumnDefinition("column1", Nil, Nil))), _) => ok}
51+
}
52+
}
53+
54+
"@noHeader and @ignoreColumnNameCase global directives (mutually exclusive)" should {
55+
"fail for @noHeader followed by @ignoreColumnNameCase" >> pending("Need to improve error messages") {
56+
val schema =
57+
"""version 1.0
58+
|@noHeader
59+
|@ignoreColumnNameCase
60+
|column1: """.stripMargin
61+
62+
parse(new StringReader(schema)) must beLike { case Failure(message, _) => message mustEqual "Invalid global directive" }
63+
}
64+
65+
"fail for @ignoreColumnNameCase followed by @noHeader" >> pending("Need to improve error messages") {
66+
val schema =
67+
"""version 1.0
68+
|@ignoreColumnNameCase
69+
|@noHeader
70+
|column1: """.stripMargin
71+
72+
parse(new StringReader(schema)) must beLike { case Failure(message, _) => message mustEqual "Invalid global directive" }
73+
}
74+
}
75+
76+
"succeed with no global directives" in {
77+
val schema =
78+
"""version 1.0
79+
|column1: """.stripMargin
80+
81+
parse(new StringReader(schema)) must beLike { case Success(Schema(Nil, List(ColumnDefinition("column1", Nil, Nil))), _) => ok}
82+
}
83+
}
84+
}

csv-validator-core/src/test/scala/uk/gov/nationalarchives/csv/validator/schema/SchemaParserTotalColumnsSpec.scala

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,35 +20,35 @@ class SchemaParserTotalColumnsSpec extends Specification {
2020
"Schema" should {
2121

2222
"fail for TotalColumns with missing value" in {
23-
parse(new StringReader("version 1.0\n@totalColumns")) must beLike { case Failure(message, _) => message mustEqual "@totalColumns invalid" }
23+
parse(new StringReader("version 1.0\n@totalColumns")) must beLike { case Failure(message, _) => message mustEqual "Invalid global directive" }
2424
}
2525

2626
"fail for incorrect TotalColumns field name" in {
27-
parse(new StringReader("version 1.0\n@ToalColumns 23")) must beLike { case Failure(message, _) => message mustEqual "Invalid column definition" }
27+
parse(new StringReader("version 1.0\n@ToalColumns 23")) must beLike { case Failure(message, _) => message mustEqual "Invalid global directive" }
2828
}
2929

3030
"fail for incorrect TotalColumns field name with no value" in {
31-
parse(new StringReader("version 1.0\n@TtalColumn")) must beLike { case Failure(message, _) => message mustEqual "Invalid column definition" }
31+
parse(new StringReader("version 1.0\n@TtalColumn")) must beLike { case Failure(message, _) => message mustEqual "Invalid global directive" }
3232
}
3333

3434
"fail for TotalColumns field name incorrect case" in {
35-
parse(new StringReader("version 1.0\n@TotalColumns 65")) must beLike { case Failure(message, _) => message mustEqual "Invalid column definition" }
35+
parse(new StringReader("version 1.0\n@TotalColumns 65")) must beLike { case Failure(message, _) => message mustEqual "Invalid global directive" }
3636
}
3737

3838
"fail for TotalColumns of zero" in {
39-
parse(new StringReader("version 1.0\n@totalColumns 0")) must beLike { case Failure(message, _) => message mustEqual "@totalColumns invalid" }
39+
parse(new StringReader("version 1.0\n@totalColumns 0")) must beLike { case Failure(message, _) => message mustEqual "Invalid global directive" }
4040
}
4141

4242
"fail for TotalColumns with negative integer" in {
43-
parse(new StringReader("version 1.0\n@totalColumns -23")) must beLike { case Failure(message, _) => message mustEqual "@totalColumns invalid" }
43+
parse(new StringReader("version 1.0\n@totalColumns -23")) must beLike { case Failure(message, _) => message mustEqual "Invalid global directive" }
4444
}
4545

4646
"fail for TotalColumns with non integer" in {
4747
parse(new StringReader("version 1.0\n@totalColumns 132.45")) must beLike { case Failure(message, _) => message mustEqual "Invalid column definition" }
4848
}
4949

5050
"fail for TotalColumns with non numeric" in {
51-
parse(new StringReader("version 1.0\n@totalColumns blah")) must beLike { case Failure(message, _) => message mustEqual "@totalColumns invalid" }
51+
parse(new StringReader("version 1.0\n@totalColumns blah")) must beLike { case Failure(message, _) => message mustEqual "Invalid global directive" }
5252
}
5353
}
5454
}

0 commit comments

Comments
 (0)