Skip to content

Commit 3fc13c6

Browse files
committed
updated tests, fixed row counter and headers
1 parent 378626b commit 3fc13c6

File tree

5 files changed

+75
-39
lines changed

5 files changed

+75
-39
lines changed

README.md

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,21 @@ from pathlib import Path
2828
fn main():
2929
var csv_path = Path("path/to/csv/file.csv")
3030
var reader = CsvReader(csv_path, delimiter="|", quotation_mark='*')
31-
for i in range(len(reader.elements)):
31+
for i in range(len(reader.elements):
3232
print(reader.elements[i])
3333
```
3434

3535
### Attributes
3636

3737
```mojo
38-
reader.raw
39-
reader.headers
40-
reader.row_count
41-
reader.column_count
42-
reader.length
38+
reader.raw : String # raw csv string
39+
reader.raw_length : Int # total number of Chars
40+
reader.headers : List[String] # first row of csv file
41+
reader.row_count : Int # total number of rows T->B
42+
reader.column_count : Int # total number of columns L->R
43+
reader.elements : List[String] # all delimited elements
44+
reader.length : Int # total number of elements
4345
```
46+
47+
##### Indexing
48+

src/csv_reader.mojo

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,59 @@
11
from collections import Dict, List
2-
from pathlib import Path, cwd
3-
from sys import argv, exit
2+
from pathlib import Path
3+
from sys import exit
44
from testing import assert_true
55

6-
7-
struct CsvReader[]:
6+
# https://www.rfc-editor.org/rfc/rfc4180
7+
#
8+
struct CsvReader:
89
# var data: Dict[String,String]
910
var headers: List[String]
1011
var elements: List[String]
1112
var raw: String
12-
var length: Int
13+
var raw_length: Int
1314
var delimiter: String
14-
var escape: String
15+
# var escape: String
1516
var CR: String
1617
var LFCR: String
1718
var QM: String
1819
var row_count: Int
1920
var col_count: Int
21+
var length: Int
2022

2123
fn __init__(
2224
out self,
2325
owned in_csv: Path,
2426
owned delimiter: String = ",",
2527
owned quotation_mark: String = '"',
26-
):
28+
) raises:
2729
self.raw = ""
30+
self.raw_length = 0
2831
self.length = 0
2932
self.QM = quotation_mark
3033
self.delimiter = delimiter
31-
self.escape = "\\"
34+
#self.escape = "\\"
3235
self.CR = "\n"
3336
self.LFCR = "\r\n"
3437
self.row_count = 0
3538
self.col_count = 0
3639
self.elements = List[String]()
3740
self.headers = List[String]()
3841
self._open(in_csv)
39-
self.length = self.raw.__len__()
42+
self.raw_length = self.raw.__len__()
4043
self._create_reader()
44+
self.length = self.elements.__len__()
45+
# Just always treat the first row as optional headers
46+
self.headers = self.elements[0:self.col_count]
4147

4248
fn _create_reader(mut self):
43-
var col: Int = 0
4449
var col_start: Int = 0
4550
var in_quotes: Bool = False
4651
var skip: Bool = False
47-
for pos in range(self.length):
52+
for pos in range(self.raw_length):
53+
# StringSlice is still not a Char
4854
var char: String = self.raw[pos]
4955
# --------
50-
56+
# Handle bypasses/escapes
5157
if skip:
5258
skip = False
5359
continue
@@ -62,51 +68,65 @@ struct CsvReader[]:
6268
in_quotes = True
6369
continue
6470
# --------
65-
71+
# Delimiter
6672
if char == self.delimiter:
6773
self.elements.append(self.raw[col_start:pos])
6874
col_start = pos + 1
6975

7076
if self.row_count == 0:
7177
self.col_count += 1
7278

73-
if pos + 1 < self.length:
79+
# handle trailing delimiter
80+
if pos + 1 <= self.raw_length:
7481
if (
7582
self.raw[pos + 1] == self.CR
7683
or self.raw[pos + 1] == self.LFCR
7784
):
7885
skip = True
7986
col_start = pos + 2
87+
self.row_count += 1
8088
else:
8189
break
8290

8391
# --------
84-
# case end of row
92+
# end of row no trailing delimiter
8593
elif char == self.CR or char == self.LFCR:
8694
self.elements.append(self.raw[col_start:pos])
8795

8896
if self.row_count == 0:
8997
self.col_count += 1
9098

91-
if pos + 1 < self.length:
99+
if pos + 1 <= self.raw_length:
92100
self.row_count += 1
93101
col_start = pos + 1
94-
95-
elif pos == self.length:
102+
# end of file, even if not CR :: Spec #2
103+
elif pos == self.raw_length:
96104
self.elements.append(self.raw[col_start:pos])
105+
self.row_count += 1
97106
# -------
98107
# -------------
99108

100-
fn _open(mut self, in_csv: Path):
109+
fn _open(mut self, in_csv: Path) raises:
101110
try:
102111
assert_true(in_csv.exists())
103112
self.raw = in_csv.read_text()
104113
assert_true(self.raw != "")
105114
except AssertionError:
106115
print("Error opening file:", in_csv)
107-
exit()
116+
raise AssertionError
117+
118+
fn __getitem__(self, index: Int) raises -> String:
119+
if index < 0 or index >= self.row_count:
120+
raise Error("Index out of range")
121+
return self.elements[index]
122+
123+
# fn __repr__(self) -> String:
124+
# var out = String("[")
125+
# for el in self.elements:
126+
# out.append(el)
127+
# return out
108128

109-
# fn __copyinit__(mut self, existing: _Self) -> CsvReader:
129+
# fn __copyinit__(out self) -> CsvReader:
110130
# self.raw = existing.raw
111131
# self.delimiter = existing.delimiter
112132
# self.QM = existing.QM

test.csv

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
item1,item2,"ite,em3"
2-
pic, pi c,pic,
3-
r_i_1,r_i_2,r_i_3,
2+
"p""ic", pi c,pic,
3+
r_i_1,"r_i_2""",r_i_3,

test.mojo

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from collections import Dict, List
21
from pathlib import Path, cwd
32
from sys import argv, exit
43
from testing import assert_true
@@ -9,22 +8,22 @@ var VALID = List[String](
98
"item1",
109
"item2",
1110
'"ite,em3"',
12-
"pic",
11+
'"p""ic"',
1312
" pi c",
1413
"pic",
1514
"r_i_1",
16-
"r_i_2",
15+
'"r_i_2"""',
1716
"r_i_3",
1817
)
1918

2019

2120
fn main() raises:
2221
var in_csv: Path = Path(argv()[1])
2322
var rd = CsvReader(in_csv)
24-
print(in_csv)
25-
print("columns:", rd.col_count)
23+
print("parsing:", in_csv)
2624
print("----------")
2725
try:
26+
assert_true(rd.col_count == 3)
2827
for x in range(len(rd.elements)):
2928
print(rd.elements[x])
3029
assert_true(
@@ -33,8 +32,15 @@ fn main() raises:
3332
rd.elements[x], VALID[x], x
3433
),
3534
)
35+
print("----------")
36+
# print(rd)
37+
print("columns:", rd.col_count, "of 3")
38+
print("rows:", rd.row_count, "of 3")
39+
assert_true(rd.row_count == 3)
40+
print("elements:", rd.length, "of 9")
3641
assert_true(len(rd.elements) == 9)
3742
except AssertionError:
43+
print(AssertionError)
3844
raise AssertionError
3945
print("----------")
4046
print("parse successful")

test_pack.mojo

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,22 @@ var VALID = List[String](
99
"item1",
1010
"item2",
1111
'"ite,em3"',
12-
"pic",
12+
'"p""ic"',
1313
" pi c",
1414
"pic",
1515
"r_i_1",
16-
"r_i_2",
16+
'"r_i_2"""',
1717
"r_i_3",
1818
)
1919

2020

2121
fn main() raises:
2222
var in_csv: Path = Path(argv()[1])
2323
var rd = CsvReader(in_csv)
24-
print(in_csv)
25-
print("columns:", rd.col_count)
24+
print("parsing:", in_csv)
2625
print("----------")
2726
try:
27+
assert_true(rd.col_count == 3)
2828
for x in range(len(rd.elements)):
2929
print(rd.elements[x])
3030
assert_true(
@@ -33,9 +33,14 @@ fn main() raises:
3333
rd.elements[x], VALID[x], x
3434
),
3535
)
36+
print("----------")
37+
print("columns:", rd.col_count, "of 3")
38+
print("rows:", rd.row_count, "of 3")
39+
assert_true(rd.row_count == 3)
40+
print("elements:", rd.length, "of 9")
3641
assert_true(len(rd.elements) == 9)
3742
except AssertionError:
43+
print(AssertionError)
3844
raise AssertionError
3945
print("----------")
4046
print("parse successful")
41-

0 commit comments

Comments
 (0)