12
12
{-# LANGUAGE RecordWildCards #-}
13
13
{-# LANGUAGE ScopedTypeVariables #-}
14
14
{-# LANGUAGE StrictData #-}
15
- {-# LANGUAGE TemplateHaskell #-}
16
15
{-# LANGUAGE TypeApplications #-}
17
16
{-# LANGUAGE UndecidableInstances #-}
18
17
@@ -39,7 +38,6 @@ module Codec.Xlsx.Parser.Stream
39
38
runXlsxM ,
40
39
WorkbookInfo (.. ),
41
40
SheetInfo (.. ),
42
- wiSheets ,
43
41
getOrParseSharedStringss ,
44
42
getWorkbookInfo ,
45
43
CellRow ,
@@ -54,13 +52,9 @@ module Codec.Xlsx.Parser.Stream
54
52
55
53
-- ** SheetItem
56
54
SheetItem (.. ),
57
- si_sheet_index ,
58
- si_row ,
59
55
60
56
-- ** Row
61
57
Row (.. ),
62
- ri_row_index ,
63
- ri_cell_row ,
64
58
65
59
-- * Errors
66
60
SheetErrors (.. ),
@@ -136,24 +130,21 @@ type CellRow = IntMap Cell
136
130
-- The current sheet at a time, every sheet is constructed of these items.
137
131
data SheetItem = MkSheetItem
138
132
{ -- | The sheet number
139
- _si_sheet_index :: Int ,
140
- _si_row :: ~ Row
133
+ si_sheet_index :: Int ,
134
+ si_row :: ~ Row
141
135
}
142
136
deriving stock (Generic , Show )
143
137
deriving anyclass (NFData )
144
138
145
139
data Row = MkRow
146
140
{ -- | Row number
147
- _ri_row_index :: RowIndex ,
141
+ ri_row_index :: RowIndex ,
148
142
-- | Row itself
149
- _ri_cell_row :: ~ CellRow
143
+ ri_cell_row :: ~ CellRow
150
144
}
151
145
deriving stock (Generic , Show )
152
146
deriving anyclass (NFData )
153
147
154
- makeLenses 'MkSheetItem
155
- makeLenses 'MkRow
156
-
157
148
type SharedStringsMap = V. Vector Text
158
149
159
150
-- | Type of the excel value
@@ -179,43 +170,39 @@ data ExcelValueType
179
170
-- | State for parsing sheets
180
171
data SheetState = MkSheetState
181
172
{ -- | Current row
182
- _ps_row :: ~ CellRow ,
173
+ ps_row :: ~ CellRow ,
183
174
-- | Current sheet ID (AKA 'sheetInfoSheetId')
184
- _ps_sheet_index :: Int ,
175
+ ps_sheet_index :: Int ,
185
176
-- | Current row number
186
- _ps_cell_row_index :: RowIndex ,
177
+ ps_cell_row_index :: RowIndex ,
187
178
-- | Current column number
188
- _ps_cell_col_index :: ColumnIndex ,
189
- _ps_cell_style :: Maybe Int ,
179
+ ps_cell_col_index :: ColumnIndex ,
180
+ ps_cell_style :: Maybe Int ,
190
181
-- | Flag for indexing wheter the parser is in value or not
191
- _ps_is_in_val :: Bool ,
182
+ ps_is_in_val :: Bool ,
192
183
-- | Shared string map
193
- _ps_shared_strings :: SharedStringsMap ,
184
+ ps_shared_strings :: SharedStringsMap ,
194
185
-- | The last detected value type
195
- _ps_type :: ExcelValueType ,
186
+ ps_type :: ExcelValueType ,
196
187
-- | for hexpat only, which can break up char data into multiple events
197
- _ps_text_buf :: Text ,
188
+ ps_text_buf :: Text ,
198
189
-- | For hexpat only, which can throw errors right at the end of the sheet
199
190
-- rather than ending gracefully.
200
- _ps_worksheet_ended :: Bool
191
+ ps_worksheet_ended :: Bool
201
192
}
202
193
deriving stock (Generic , Show )
203
194
204
- makeLenses 'MkSheetState
205
-
206
195
-- | State for parsing shared strings
207
196
data SharedStringsState = MkSharedStringsState
208
197
{ -- | String we are parsing
209
198
-- TODO: At the moment SharedStrings can be used only to create CellText values.
210
199
-- We should add support for CellRich values.
211
- _ss_string :: TB. Builder ,
200
+ ss_string :: TB. Builder ,
212
201
-- | list of shared strings
213
- _ss_list :: DL. DList Text
202
+ ss_list :: DL. DList Text
214
203
}
215
204
deriving stock (Generic , Show )
216
205
217
- makeLenses 'MkSharedStringsState
218
-
219
206
type HasSheetState = MonadState SheetState
220
207
221
208
type HasSharedStringsState = MonadState SharedStringsState
@@ -234,17 +221,16 @@ data SheetInfo = SheetInfo
234
221
-- | Information about the workbook contained in xl/workbook.xml
235
222
-- (currently a subset)
236
223
data WorkbookInfo = WorkbookInfo
237
- { _wiSheets :: [SheetInfo ]
224
+ { wiSheets :: [SheetInfo ]
238
225
}
239
- deriving (Show )
240
-
241
- makeLenses 'WorkbookInfo
226
+ deriving (Show , Generic )
242
227
243
228
data XlsxMState = MkXlsxMState
244
- { _xs_shared_strings :: Memoized (V. Vector Text ),
245
- _xs_workbook_info :: Memoized WorkbookInfo ,
246
- _xs_relationships :: Memoized Relationships
229
+ { xs_shared_strings :: Memoized (V. Vector Text ),
230
+ xs_workbook_info :: Memoized WorkbookInfo ,
231
+ xs_relationships :: Memoized Relationships
247
232
}
233
+ deriving (Show , Generic )
248
234
249
235
newtype XlsxM a = XlsxM { _unXlsxM :: ReaderT XlsxMState Zip. ZipArchive a }
250
236
deriving newtype
@@ -264,24 +250,24 @@ newtype XlsxM a = XlsxM {_unXlsxM :: ReaderT XlsxMState Zip.ZipArchive a}
264
250
initialSheetState :: SheetState
265
251
initialSheetState =
266
252
MkSheetState
267
- { _ps_row = mempty ,
268
- _ps_sheet_index = 0 ,
269
- _ps_cell_row_index = 0 ,
270
- _ps_cell_col_index = 0 ,
271
- _ps_is_in_val = False ,
272
- _ps_shared_strings = mempty ,
273
- _ps_type = Untyped ,
274
- _ps_text_buf = mempty ,
275
- _ps_worksheet_ended = False ,
276
- _ps_cell_style = Nothing
253
+ { ps_row = mempty ,
254
+ ps_sheet_index = 0 ,
255
+ ps_cell_row_index = 0 ,
256
+ ps_cell_col_index = 0 ,
257
+ ps_is_in_val = False ,
258
+ ps_shared_strings = mempty ,
259
+ ps_type = Untyped ,
260
+ ps_text_buf = mempty ,
261
+ ps_worksheet_ended = False ,
262
+ ps_cell_style = Nothing
277
263
}
278
264
279
265
-- | Initial parsing state
280
266
initialSharedStrings :: SharedStringsState
281
267
initialSharedStrings =
282
268
MkSharedStringsState
283
- { _ss_string = mempty ,
284
- _ss_list = mempty
269
+ { ss_string = mempty ,
270
+ ss_list = mempty
285
271
}
286
272
287
273
-- | Parse shared string entry from xml event and return it once
@@ -295,19 +281,19 @@ parseSharedStrings ::
295
281
m (Maybe Text )
296
282
parseSharedStrings = \ case
297
283
-- TODO: Add parsing of text styles to further create CellRich values.
298
- StartElement " si" _ -> Nothing <$ (ss_string .= mempty )
299
- EndElement " si" -> Just . LT. toStrict . TB. toLazyText <$> gets _ss_string
300
- CharacterData txt -> Nothing <$ (ss_string <>= TB. fromText txt)
284
+ StartElement " si" _ -> Nothing <$ (# ss_string .= mempty )
285
+ EndElement " si" -> Just . LT. toStrict . TB. toLazyText <$> gets ss_string
286
+ CharacterData txt -> Nothing <$ (# ss_string <>= TB. fromText txt)
301
287
_ -> pure Nothing
302
288
303
289
-- | Run a series of actions on an Xlsx file
304
290
runXlsxM :: (MonadIO m ) => FilePath -> XlsxM a -> m a
305
291
runXlsxM xlsxFile (XlsxM act) = liftIO $ do
306
292
-- TODO: don't run the withArchive multiple times but use liftWith or runInIO instead
307
- _xs_workbook_info <- memoizeRef (Zip. withArchive xlsxFile readWorkbookInfo)
308
- _xs_relationships <-
293
+ xs_workbook_info <- memoizeRef (Zip. withArchive xlsxFile readWorkbookInfo)
294
+ xs_relationships <-
309
295
memoizeRef (Zip. withArchive xlsxFile readWorkbookRelationships)
310
- _xs_shared_strings <- memoizeRef (Zip. withArchive xlsxFile parseSharedStringss)
296
+ xs_shared_strings <- memoizeRef (Zip. withArchive xlsxFile parseSharedStringss)
311
297
Zip. withArchive xlsxFile $ runReaderT act $ MkXlsxMState {.. }
312
298
313
299
liftZip :: Zip. ZipArchive a -> XlsxM a
@@ -325,12 +311,12 @@ parseSharedStringss = do
325
311
st <- liftIO $ runExpat state0 byteSrc $ \ evs -> forM_ evs $ \ ev -> do
326
312
mTxt <- parseSharedStrings ev
327
313
for_ mTxt $ \ txt ->
328
- ss_list %= (`DL.snoc` txt)
329
- pure $ V. fromList $ DL. toList $ _ss_list st
314
+ # ss_list %= (`DL.snoc` txt)
315
+ pure $ V. fromList $ DL. toList $ ss_list st
330
316
331
317
{-# SCC getOrParseSharedStringss #-}
332
318
getOrParseSharedStringss :: XlsxM (V. Vector Text )
333
- getOrParseSharedStringss = runMemoized =<< asks _xs_shared_strings
319
+ getOrParseSharedStringss = runMemoized =<< asks xs_shared_strings
334
320
335
321
readWorkbookInfo :: Zip. ZipArchive WorkbookInfo
336
322
readWorkbookInfo = do
@@ -354,7 +340,7 @@ lookupBy fields attrs = maybe (throwM $ LookupError attrs fields) pure $ lookup
354
340
-- xl/workbook.xml. The result is cached so the XML will only be
355
341
-- decompressed and parsed once inside a larger XlsxM action.
356
342
getWorkbookInfo :: XlsxM WorkbookInfo
357
- getWorkbookInfo = runMemoized =<< asks _xs_workbook_info
343
+ getWorkbookInfo = runMemoized =<< asks xs_workbook_info
358
344
359
345
readWorkbookRelationships :: Zip. ZipArchive Relationships
360
346
readWorkbookRelationships = do
@@ -380,7 +366,7 @@ readWorkbookRelationships = do
380
366
-- The relationships xml file will only be parsed once when called
381
367
-- multiple times within a larger XlsxM action.
382
368
getWorkbookRelationships :: XlsxM Relationships
383
- getWorkbookRelationships = runMemoized =<< asks _xs_relationships
369
+ getWorkbookRelationships = runMemoized =<< asks xs_relationships
384
370
385
371
type HexpatEvent = SAXEvent ByteString Text
386
372
@@ -463,14 +449,14 @@ runExpatForSheet ::
463
449
runExpatForSheet initState byteSource inner =
464
450
void $ liftIO $ runExpat initState byteSource handler
465
451
where
466
- sheetName = _ps_sheet_index initState
452
+ sheetName = ps_sheet_index initState
467
453
handler evs = forM_ evs $ \ ev -> do
468
454
parseRes <- runExceptT $ matchHexpatEvent ev
469
455
case parseRes of
470
456
Left err -> throwM err
471
457
Right (Just cellRow)
472
458
| not (IntMap. null cellRow) -> do
473
- rowNum <- use ps_cell_row_index
459
+ rowNum <- use # ps_cell_row_index
474
460
liftIO $ inner $ MkSheetItem sheetName $ MkRow rowNum cellRow
475
461
_ -> pure ()
476
462
@@ -505,7 +491,7 @@ makeIndexFromName sheetName = do
505
491
-- names differ only in alphabetic case (at least for ascii...)
506
492
let sheetNameCI = T. toLower sheetName
507
493
findRes :: Maybe SheetInfo
508
- findRes = find ((== sheetNameCI) . T. toLower . sheetInfoName) $ _wiSheets wi
494
+ findRes = find ((== sheetNameCI) . T. toLower . sheetInfoName) $ wiSheets wi
509
495
pure $ makeIndex . sheetInfoSheetId <$> findRes
510
496
511
497
readSheet ::
@@ -524,9 +510,9 @@ readSheet (MkSheetIndex sheetId) inner = do
524
510
sharedStrs <- getOrParseSharedStringss
525
511
let sheetState0 =
526
512
initialSheetState
527
- & ps_shared_strings
513
+ & # ps_shared_strings
528
514
.~ sharedStrs
529
- & ps_sheet_index
515
+ & # ps_sheet_index
530
516
.~ sheetId
531
517
runExpatForSheet sheetState0 sourceSheetXml inner
532
518
pure True
@@ -549,8 +535,8 @@ countRowsInSheet (MkSheetIndex sheetId) = do
549
535
-- | Return row from the state and empty it
550
536
popRow :: (HasSheetState m ) => m CellRow
551
537
popRow = do
552
- row <- use ps_row
553
- ps_row .= mempty
538
+ row <- use # ps_row
539
+ # ps_row .= mempty
554
540
pure row
555
541
556
542
data AddCellErrors
@@ -606,25 +592,25 @@ addCellToRow ::
606
592
m ()
607
593
addCellToRow txt = do
608
594
st <- get
609
- style <- use ps_cell_style
610
- when (_ps_is_in_val st) $ do
595
+ style <- use # ps_cell_style
596
+ when (ps_is_in_val st) $ do
611
597
val <-
612
598
liftEither $
613
599
first ParseCellError $
614
- parseValue (_ps_shared_strings st) txt (_ps_type st)
600
+ parseValue (ps_shared_strings st) txt (ps_type st)
615
601
put $
616
602
st
617
- { _ps_row =
603
+ { ps_row =
618
604
IntMap. insert
619
- (unColumnIndex $ _ps_cell_col_index st)
605
+ (unColumnIndex $ ps_cell_col_index st)
620
606
( Cell
621
607
{ _cellStyle = style,
622
608
_cellValue = Just val,
623
609
_cellComment = Nothing ,
624
610
_cellFormula = Nothing
625
611
}
626
612
)
627
- $ _ps_row st
613
+ $ ps_row st
628
614
}
629
615
630
616
data SheetErrors
@@ -680,14 +666,14 @@ matchHexpatEvent ev = case ev of
680
666
CharacterData txt ->
681
667
{-# SCC "handle_CharData" #-}
682
668
do
683
- inVal <- use ps_is_in_val
669
+ inVal <- use # ps_is_in_val
684
670
when inVal $
685
- {-# SCC "append_text_buf" #-} (ps_text_buf <>= txt)
671
+ {-# SCC "append_text_buf" #-} (# ps_text_buf <>= txt)
686
672
pure Nothing
687
673
StartElement " c" attrs -> Nothing <$ (setCoord attrs *> setType attrs *> setStyle attrs)
688
- StartElement " is" _ -> Nothing <$ (ps_is_in_val .= True )
674
+ StartElement " is" _ -> Nothing <$ (# ps_is_in_val .= True )
689
675
EndElement " is" -> Nothing <$ finaliseCellValue
690
- StartElement " v" _ -> Nothing <$ (ps_is_in_val .= True )
676
+ StartElement " v" _ -> Nothing <$ (# ps_is_in_val .= True )
691
677
EndElement " v" -> Nothing <$ finaliseCellValue
692
678
-- If beginning of row, empty the state and return nothing.
693
679
-- We don't know if there is anything in the state, the user may have
@@ -697,15 +683,15 @@ matchHexpatEvent ev = case ev of
697
683
-- If at the end of the row, we have collected the whole row into
698
684
-- the current state. Empty the state and return the row.
699
685
EndElement " row" -> Just <$> popRow
700
- StartElement " worksheet" _ -> ps_worksheet_ended .= False >> pure Nothing
701
- EndElement " worksheet" -> ps_worksheet_ended .= True >> pure Nothing
686
+ StartElement " worksheet" _ -> # ps_worksheet_ended .= False >> pure Nothing
687
+ EndElement " worksheet" -> # ps_worksheet_ended .= True >> pure Nothing
702
688
-- Skip everything else, e.g. the formula elements <f>
703
689
FailDocument err -> do
704
690
-- this event is emitted at the end the xml stream (possibly
705
691
-- because the xml files in xlsx archives don't end in a
706
692
-- newline, but that's a guess), so we use state to determine if
707
693
-- it's expected.
708
- finished <- use ps_worksheet_ended
694
+ finished <- use # ps_worksheet_ended
709
695
unless finished $
710
696
throwError $
711
697
HexpatParseError err
@@ -716,12 +702,12 @@ matchHexpatEvent ev = case ev of
716
702
finaliseCellValue ::
717
703
(MonadError SheetErrors m , HasSheetState m ) => m ()
718
704
finaliseCellValue = do
719
- txt <- gets _ps_text_buf
705
+ txt <- gets ps_text_buf
720
706
addCellToRow txt
721
707
modify' $ \ st ->
722
708
st
723
- { _ps_is_in_val = False ,
724
- _ps_text_buf = mempty
709
+ { ps_is_in_val = False ,
710
+ ps_text_buf = mempty
725
711
}
726
712
727
713
-- | Update state coordinates accordingly to @parseCoordinates@
@@ -734,8 +720,8 @@ setCoord ::
734
720
m ()
735
721
setCoord list = do
736
722
coordinates <- liftEither $ first ParseCoordinateError $ parseCoordinates list
737
- ps_cell_col_index .= (coordinates ^. _2)
738
- ps_cell_row_index .= (coordinates ^. _1)
723
+ # ps_cell_col_index .= (coordinates ^. _2)
724
+ # ps_cell_row_index .= (coordinates ^. _1)
739
725
740
726
-- | Parse type from values and update state accordingly
741
727
setType ::
@@ -746,7 +732,7 @@ setType ::
746
732
m ()
747
733
setType list = do
748
734
type' <- liftEither $ first ParseTypeError $ parseType list
749
- ps_type .= type'
735
+ # ps_type .= type'
750
736
751
737
-- | Find sheet value by its name
752
738
findName :: ByteString -> SheetValues -> Maybe SheetValue
@@ -756,7 +742,7 @@ findName name = find ((name ==) . fst)
756
742
setStyle :: (MonadError SheetErrors m , HasSheetState m ) => SheetValues -> m ()
757
743
setStyle list = do
758
744
style <- liftEither $ first ParseStyleErrors $ parseStyle list
759
- ps_cell_style .= style
745
+ # ps_cell_style .= style
760
746
761
747
data StyleError = InvalidStyleRef { seInput :: Text , seErrorMsg :: String }
762
748
deriving (Show )
0 commit comments