Skip to content

Commit aa4e4db

Browse files
authored
Merge pull request #364 from poseidon-framework/versionedcsvparsing
.janno & .ssf field parsing depending on a package's Poseidon version
2 parents 0abb550 + fa035f8 commit aa4e4db

File tree

47 files changed

+394
-373
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+394
-373
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
- V X.X.X.X:
2+
- Introduced smart .janno field construction based on the relevant Poseidon version.
23
- Changes to .janno columns according to Poseidon v3.0.0:
34
- Replaced column `Source_Tissue` with column `Source_Material`.
45
- New column `Individual_ID`.

src/Poseidon/CLI/Jannocoalesce.hs

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,29 @@
33

44
module Poseidon.CLI.Jannocoalesce where
55

6-
import Poseidon.Janno (JannoRow (..), JannoRows (..),
7-
parseJannoRowFromNamedRecord,
8-
readJannoFile, writeJannoFile)
9-
import Poseidon.Package (PackageReadOptions (..),
10-
defaultPackageReadOptions,
11-
getJointJanno,
12-
readPoseidonPackageCollection)
13-
import Poseidon.Utils (PoseidonException (..), PoseidonIO,
14-
logDebug, logInfo, logWarning)
6+
import Poseidon.Janno (JannoRow (..), JannoRows (..),
7+
parseJannoRowFromNamedRecord,
8+
readJannoFile, writeJannoFile)
9+
import Poseidon.Package (PackageReadOptions (..),
10+
defaultPackageReadOptions,
11+
getJointJanno,
12+
readPoseidonPackageCollection)
13+
import Poseidon.Utils (PoseidonException (..), PoseidonIO,
14+
logDebug, logInfo, logWarning)
1515

16-
import Control.Monad (filterM, forM_, when)
17-
import Control.Monad.Catch (MonadThrow, throwM)
18-
import Control.Monad.IO.Class (liftIO)
19-
import qualified Data.ByteString.Char8 as BSC
20-
import qualified Data.Csv as Csv
21-
import qualified Data.HashMap.Strict as HM
22-
import qualified Data.IORef as R
23-
import Data.List ((\\))
24-
import Data.Text (pack, replace, unpack)
25-
import System.Directory (createDirectoryIfMissing)
26-
import System.FilePath (takeDirectory)
27-
import Text.Regex.TDFA ((=~))
16+
import Control.Monad (filterM, forM_, when)
17+
import Control.Monad.Catch (MonadThrow, throwM)
18+
import Control.Monad.IO.Class (liftIO)
19+
import qualified Data.ByteString.Char8 as BSC
20+
import qualified Data.Csv as Csv
21+
import qualified Data.HashMap.Strict as HM
22+
import qualified Data.IORef as R
23+
import Data.List ((\\))
24+
import Data.Text (pack, replace, unpack)
25+
import Poseidon.PoseidonVersion (latestPoseidonVersion)
26+
import System.Directory (createDirectoryIfMissing)
27+
import System.FilePath (takeDirectory)
28+
import Text.Regex.TDFA ((=~))
2829

2930
-- the source can be a single janno file, or a set of base directories as usual.
3031
data JannoSourceSpec = JannoSourceSingle FilePath | JannoSourceBaseDirs [FilePath]
@@ -48,7 +49,7 @@ data JannoCoalesceOptions = JannoCoalesceOptions
4849
runJannocoalesce :: JannoCoalesceOptions -> PoseidonIO ()
4950
runJannocoalesce (JannoCoalesceOptions sourceSpec target outSpec fields overwrite sKey tKey maybeStrip) = do
5051
JannoRows sourceRows <- case sourceSpec of
51-
JannoSourceSingle sourceFile -> readJannoFile [] sourceFile
52+
JannoSourceSingle sourceFile -> readJannoFile latestPoseidonVersion [] sourceFile
5253
JannoSourceBaseDirs sourceDirs -> do
5354
let pacReadOpts = defaultPackageReadOptions {
5455
_readOptIgnoreChecksums = True
@@ -57,7 +58,7 @@ runJannocoalesce (JannoCoalesceOptions sourceSpec target outSpec fields overwrit
5758
, _readOptOnlyLatest = True
5859
}
5960
getJointJanno <$> readPoseidonPackageCollection pacReadOpts sourceDirs
60-
JannoRows targetRows <- readJannoFile [] target
61+
JannoRows targetRows <- readJannoFile latestPoseidonVersion [] target
6162

6263
newJanno <- makeNewJannoRows sourceRows targetRows fields overwrite sKey tKey maybeStrip
6364

@@ -123,7 +124,7 @@ mergeRow cp targetRow sourceRow fields overwrite sKey tKey = do
123124
-- fill in the target row with dummy values for desired fields that might not be present yet
124125
targetComplete = HM.union targetRowRecord (HM.fromList $ map (, BSC.empty) sourceKeysDesired)
125126
newRowRecord = HM.mapWithKey fillFromSource targetComplete
126-
parseResult = Csv.runParser . parseJannoRowFromNamedRecord [] $ newRowRecord
127+
parseResult = Csv.runParser . parseJannoRowFromNamedRecord latestPoseidonVersion [] $ newRowRecord
127128
logInfo $ "matched target " ++ BSC.unpack (targetComplete HM.! BSC.pack tKey) ++
128129
" with source " ++ BSC.unpack (sourceRowRecord HM.! BSC.pack sKey)
129130
case parseResult of

src/Poseidon/CLI/Validate.hs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import qualified Data.ByteString.Char8 as Bchs
2727
import Data.List (groupBy, intercalate, sortOn)
2828
import Data.Yaml (decodeEither')
2929
import Poseidon.EntityTypes (IndividualInfo (..))
30+
import Poseidon.PoseidonVersion (latestPoseidonVersion)
3031
import System.Exit (exitFailure, exitSuccess)
3132

3233
-- | A datatype representing command line options for the validate command
@@ -106,12 +107,12 @@ runValidate (ValidateOptions (ValPlanGeno geno) _ _ noExitCode _) = do
106107
conclude True noExitCode
107108
runValidate (ValidateOptions (ValPlanJanno path) mandatoryJannoCols _ noExitCode _) = do
108109
logInfo $ "Validating: " ++ path
109-
(JannoRows entries) <- readJannoFile mandatoryJannoCols path
110+
(JannoRows entries) <- readJannoFile latestPoseidonVersion mandatoryJannoCols path
110111
logInfo $ "All " ++ show (length entries) ++ " entries are valid"
111112
conclude True noExitCode
112113
runValidate (ValidateOptions (ValPlanSSF path) _ mandatorySSFCols noExitCode _) = do
113114
logInfo $ "Validating: " ++ path
114-
(SeqSourceRows entries) <- readSeqSourceFile mandatorySSFCols path
115+
(SeqSourceRows entries) <- readSeqSourceFile latestPoseidonVersion mandatorySSFCols path
115116
logInfo $ "All " ++ show (length entries) ++ " entries are valid"
116117
conclude True noExitCode
117118
runValidate (ValidateOptions (ValPlanBib path) _ _ noExitCode _) = do

0 commit comments

Comments
 (0)