diff --git a/glean/lang/scip/indexer/Glean/Indexer/SCIP.hs b/glean/lang/scip/indexer/Glean/Indexer/SCIP.hs index 55e13a633..d2535f8b0 100644 --- a/glean/lang/scip/indexer/Glean/Indexer/SCIP.hs +++ b/glean/lang/scip/indexer/Glean/Indexer/SCIP.hs @@ -25,11 +25,41 @@ import qualified Glean import System.Directory (doesFileExist) import Util.OptParse (maybeStrOption) --- | A generic SCIP indexer, for existing scip files -newtype SCIP = SCIP - { scipIndexFile :: Maybe FilePath +data SCIP = SCIP + { scipFile :: FilePath -- ^ input file + , outputFile :: FilePath -- ^ output file + , scipLanguage :: Maybe LanguageId -- ^ a default language if known + , inferLanguage :: Bool -- ^ default False, infer language using file suffix + , scipPathPrefix :: Maybe FilePath -- ^ optional path to prefix file paths + , stripPathPrefix :: Maybe FilePath -- ^ optional prefix to drop from paths } - -- no options currently + +options :: Parser SCIP +options = do + scipFile <- option str $ long "input" <> + metavar "PATH" <> + help "Path to a specific SCIP file to convert" + outputFile <- option str $ long "output" + <> metavar "PATH" + <> help "Output filepath to write encoded schema info" + scipLanguage <- option (Just <$> readLanguage) $ long "language" <> + metavar "LANGUAGE" <> + value Nothing <> + help "Default language of files in the index" + inferLanguage <- switch $ + short 'i' <> + long "infer-language" <> + help ("Infer symbol language based on file suffix" <> + "(when set this takes precedence over --language)") + scipPathPrefix <- option (Just <$> str) $ long "root-prefix" <> + metavar "PATH" <> + value Nothing <> + help "Path to prepend to file path data" + stripPathPrefix <- option (Just <$> str) $ long "strip-prefix" <> + metavar "PATH" <> + value Nothing <> + help "Path prefix to strip from path data" + return SCIP{..} options :: Parser SCIP options = @@ -59,6 +89,22 @@ indexer = Indexer { sendJsonBatches backend repo "scip" val derive backend repo } + +-- If the indexer doesn't set the langauge Id of the files, we +-- can assert it here. Otherwise Glean won't know what language the +-- symbols are in +readLanguage :: ReadM LanguageId +readLanguage = do + ln <- Text.toLower <$> str + case ln of + "typescript" -> return TypeScript + "rust" -> return Rust + "go" -> return Go + "java" -> return Java + "kotlin" -> return Kotlin + "csharp" -> return CSharp + "swift" -> return Swift + _ -> readerError "Unrecognized SCIP language" -- | Derive any SCIP stored predicates derive :: Glean.Backend b => b -> Glean.Repo -> IO ()