99package uk .gov .nationalarchives .csv .validator
1010
1111
12- import uk .gov .nationalarchives .utf8 .validator .{Utf8Validator , ValidationHandler }
13-
14- import scala .language .{postfixOps , reflectiveCalls }
15- import scala .util .{Try , Using }
16-
17- import java .io .{BufferedInputStream , IOException , FileInputStream => JFileInputStream , InputStreamReader => JInputStreamReader , LineNumberReader => JLineNumberReader , Reader => JReader }
18- import java .nio .charset .{Charset , StandardCharsets }
19- import uk .gov .nationalarchives .csv .validator .schema ._
20- import uk .gov .nationalarchives .csv .validator .metadata .Cell
21- import org .apache .commons .io .input .BOMInputStream
22- import com .univocity .parsers .common .TextParsingException
12+ import cats .data .{Chain , Validated , ValidatedNel }
13+ import cats .syntax .all ._
2314import com .univocity .parsers .csv .{CsvParser , CsvParserSettings }
24- import uk .gov .nationalarchives .csv .validator .metadata .Row
25-
26- import scala .annotation .tailrec
15+ import org .apache .commons .io .input .BOMInputStream
2716import uk .gov .nationalarchives .csv .validator .api .TextFile
17+ import uk .gov .nationalarchives .csv .validator .metadata .{Cell , Row }
18+ import uk .gov .nationalarchives .csv .validator .schema ._
19+ import uk .gov .nationalarchives .utf8 .validator .{Utf8Validator , ValidationHandler }
2820
21+ import java .io .{BufferedInputStream , IOException , InputStreamReader => JInputStreamReader , LineNumberReader => JLineNumberReader , Reader => JReader }
22+ import java .nio .charset .{Charset , StandardCharsets }
2923import java .nio .file .{Files , Path }
30- import cats .data .ValidatedNel
31- import cats .syntax .all ._
24+ import scala .annotation .tailrec
25+ import scala .language .{postfixOps , reflectiveCalls }
26+ import scala .util .{Try , Using }
3227
3328// error reporting classes
3429sealed trait ErrorType
@@ -57,11 +52,39 @@ case class ProgressFor(rowsToValidate: Int, progress: ProgressCallback)
5752trait MetaDataValidator {
5853 // Helper functions for checking if a result contains a warning or error.
5954 def containsErrors (e : MetaDataValidation [Any ]): Boolean = e.fold(_.collectFirst(FailMessage .isError).nonEmpty, _ => false )
55+
6056 def containsWarnings (e : MetaDataValidation [Any ]): Boolean = e.fold(_.collectFirst(FailMessage .isWarning).nonEmpty, _ => false )
6157
6258 type MetaDataValidation [S ] = ValidatedNel [FailMessage , S ]
6359
64- def validate (csv : JReader , schema : Schema , progress : Option [ProgressCallback ]): MetaDataValidation [Any ] = {
60+ @ deprecated(" use validateReader or validateCsvFile" )
61+ def validate (
62+ csv : JReader ,
63+ schema : Schema ,
64+ progress : Option [ProgressCallback ]
65+ ): MetaDataValidation [Any ] = {
66+ var results : Chain [List [FailMessage ]] = Chain .empty
67+ validateReader(
68+ csv,
69+ schema,
70+ progress,
71+ {
72+ case Validated .Invalid (x) => results = results :+ x.toList
73+ case _ =>
74+ }
75+ )
76+ results.toList.flatten.toNel match {
77+ case None => ().valid
78+ case Some (errors) => Validated .invalid(errors)
79+ }
80+ }
81+
82+ def validateReader (
83+ csv : JReader ,
84+ schema : Schema ,
85+ progress : Option [ProgressCallback ],
86+ rowCallback : MetaDataValidation [Any ] => Unit
87+ ): Boolean = {
6588 // try to find the number of rows for the
6689 // purposes pf reporting progress
6790 // can only do that if we can reset()
@@ -78,10 +101,15 @@ trait MetaDataValidator {
78101 None
79102 }
80103
81- validateKnownRows(csv, schema, pf)
104+ validateKnownRows(csv, schema, pf, rowCallback )
82105 }
83106
84- def validateKnownRows (csv : JReader , schema : Schema , progress : Option [ProgressFor ]): MetaDataValidation [Any ] = {
107+ def validateKnownRows (
108+ csv : JReader ,
109+ schema : Schema ,
110+ progress : Option [ProgressFor ],
111+ rowCallback : MetaDataValidation [Any ] => Unit
112+ ): Boolean = {
85113
86114 val separator : Char = schema.globalDirectives.collectFirst {
87115 case Separator (sep) =>
@@ -107,7 +135,7 @@ trait MetaDataValidator {
107135 // format.setLineSeparator(CSV_RFC1480_LINE_SEPARATOR) // CRLF
108136
109137 // we need a better CSV Reader!
110- val result : Try [MetaDataValidation [ Any ] ] = Using {
138+ val result : Try [Boolean ] = Using {
111139 val parser = new CsvParser (settings)
112140 parser.beginParsing(csv)
113141 parser
@@ -146,20 +174,21 @@ trait MetaDataValidator {
146174
147175 maybeNoData match {
148176 case Some (noData) =>
149- noData
177+ rowCallback(noData)
178+ false
150179 case None =>
151- validateRows(rowIt, schema)
180+ validateRows(rowIt, schema, rowCallback )
152181 }
153182
154183 } (_.stopParsing());
155184
156185 result match {
157186 case util.Success (metadataValidation) =>
158187 metadataValidation
159-
160188 case util.Failure (ts) =>
161189 // TODO(AR) emit all errors not just first!
162- FailMessage (ValidationError , ts.toString).invalidNel[Any ]
190+ rowCallback(FailMessage (ValidationError , ts.toString).invalidNel[Any ])
191+ false
163192// ts.toList.map(t => FailMessage(ValidationError, t.toString).failureNel[Any]).sequence[MetaDataValidation, Any]
164193 }
165194 }
@@ -177,9 +206,11 @@ trait MetaDataValidator {
177206
178207 def filename (row : Row ,titleIndex : Int ): String = row.cells(titleIndex).value
179208
180-
181- def validateRows (rows : Iterator [Row ], schema : Schema ): MetaDataValidation [Any ]
182-
209+ def validateRows (
210+ rows : Iterator [Row ],
211+ schema : Schema ,
212+ rowCallback : MetaDataValidation [Any ] => Unit
213+ ): Boolean
183214
184215 def validateHeader (header : Row , schema : Schema ): Option [MetaDataValidation [Any ]] = {
185216 val icnc : Option [IgnoreColumnNameCase ] = schema.globalDirectives.collectFirst {case i @ IgnoreColumnNameCase () => i }
@@ -200,7 +231,7 @@ trait MetaDataValidator {
200231 def validateRow (row : Row , schema : Schema , mayBeLast : Option [Boolean ] = None ): MetaDataValidation [Any ] = {
201232 val totalColumnsV = totalColumns(row, schema)
202233 val rulesV = rules(row, schema, mayBeLast)
203- (totalColumnsV,rulesV).mapN { _ :: _ }
234+ (totalColumnsV, rulesV).mapN { _ :: _ }
204235 }
205236
206237 def validateUtf8Encoding (file : Path ): MetaDataValidation [Any ] = {
0 commit comments