-
Notifications
You must be signed in to change notification settings - Fork 18
DRAFT - add support for private github URLs #531
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| java temurin-11.0.21+9 | ||
| sbt 1.9.7 | ||
| python 3.11.6 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,7 +14,8 @@ import dx.core.languages.wdl.WdlOptions | |
| import dx.dxni.DxNativeInterface | ||
| import dx.translator.{Extras, TranslatorFactory} | ||
| import dx.util.protocols.DxFileAccessProtocol | ||
| import dx.util.{Enum, FileSourceResolver, FileUtils, Logger, TraceLevel} | ||
| import dx.util.{Enum, FileAccessProtocol, FileSourceResolver, FileUtils, LocalFileAccessProtocol, Logger, TraceLevel} | ||
| import dx.core.io.AuthenticatedHttpFileAccessProtocol | ||
| import spray.json.{JsNull, JsValue} | ||
| import wdlTools.types.TypeCheckingRegime | ||
|
|
||
|
|
@@ -67,17 +68,25 @@ object Main { | |
| * - creates a FileSourceResolver that looks for local files in any configured -imports | ||
| * directories and has a DxFileAccessProtocol | ||
| * - initializes a Logger | ||
| * - configures authenticated HTTP imports if WDL_IMPORT_TOKEN is set | ||
| * @param options parsed options | ||
| * @return (FileSourceResolver, Logger) | ||
| */ | ||
| private def initCommon(options: Options): (FileSourceResolver, Logger) = { | ||
| val logger = initLogger(options) | ||
| val imports: Vector[Path] = options.getList[Path]("imports") | ||
| val fileResolver = FileSourceResolver.create( | ||
| imports, | ||
| Vector(DxFileAccessProtocol()), | ||
| logger | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is right and the order look like it will be the same (imports, now http then dx file access) Here's the code for LocalFileAccessProtocol and FileSourceResolver |
||
| // Create authenticated HTTP protocol for importing from private repositories | ||
| val httpProtocol = AuthenticatedHttpFileAccessProtocol.fromEnvironment(logger) | ||
|
|
||
| // Build protocol list - order matters, first matching protocol wins | ||
| val protocols: Vector[FileAccessProtocol] = Vector( | ||
| LocalFileAccessProtocol(imports, logger), | ||
| httpProtocol, | ||
| DxFileAccessProtocol() | ||
| ) | ||
|
|
||
| val fileResolver = FileSourceResolver(protocols) | ||
| FileSourceResolver.set(fileResolver) | ||
| (fileResolver, logger) | ||
| } | ||
|
|
@@ -877,7 +886,8 @@ object Main { | |
| | input values may only be specified for the top-level workflow. | ||
| | -leaveWorkflowsOpen Leave created workflows open (otherwise they are closed). | ||
| | -p | -imports <string> Directory to search for imported WDL or CWL files. May be specified | ||
| | multiple times. | ||
| | multiple times. For HTTP imports from private repositories, | ||
| | set the WDL_IMPORT_TOKEN environment variable (see below). | ||
| | -projectWideReuse Look for existing applets/workflows in the entire project | ||
| | before generating new ones. The default search scope is the | ||
| | target folder only. | ||
|
|
@@ -926,6 +936,12 @@ object Main { | |
| | -verboseKey <module> Print verbose output only for a specific module. May be | ||
| | specified multiple times. | ||
| | -logFile <path> File to use for logging output; defaults to stderr. | ||
| | | ||
| |Environment variables | ||
| | WDL_IMPORT_TOKEN Bearer token for authenticated HTTP imports (e.g., GitHub PAT | ||
| | for private repositories). Token is sent only to allowed domains. | ||
| | WDL_IMPORT_TOKEN_DOMAINS Comma-separated list of domains to send the token to. | ||
| | Defaults to: github.com,raw.githubusercontent.com | ||
| |""".stripMargin | ||
|
|
||
| def main(args: Vector[String]): Unit = { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| package dx.core.io | ||
|
|
||
| import dx.util.{FileAccessProtocol, FileUtils, Logger} | ||
| import java.net.URI | ||
| import java.nio.charset.Charset | ||
|
|
||
| /** | ||
| * HTTP file access protocol with Bearer token authentication support. | ||
| * | ||
| * Reads authentication token from WDL_IMPORT_TOKEN environment variable. | ||
| * Only sends tokens to allowed domains (configurable via WDL_IMPORT_TOKEN_DOMAINS) | ||
| * to prevent credential leakage to untrusted servers. | ||
| * | ||
| * @param token Optional Bearer token (defaults to WDL_IMPORT_TOKEN env var) | ||
| * @param allowedDomains Set of domains to send auth token to | ||
| * @param encoding Character encoding for file content | ||
| * @param logger Logger for trace/debug output | ||
| */ | ||
| case class AuthenticatedHttpFileAccessProtocol( | ||
| token: Option[String] = None, | ||
| allowedDomains: Set[String] = AuthenticatedHttpFileAccessProtocol.defaultAllowedDomains, | ||
| encoding: Charset = FileUtils.DefaultEncoding, | ||
| logger: Logger = Logger.Quiet | ||
| ) extends FileAccessProtocol { | ||
|
|
||
| override val schemes: Vector[String] = Vector(FileUtils.HttpScheme, FileUtils.HttpsScheme) | ||
| override val supportsDirectories: Boolean = true | ||
|
|
||
| /** | ||
| * Determines if authentication should be used for the given URI. | ||
| * Only returns true if a token is configured AND the domain is in the allowed list. | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. checking my understanding here, if the import belongs to one of the allowed domains AND a token is set in the env then this protocol will be used. Is this done on an import-by-import basis? What happens if we have imports from multiple sources? e.g. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I'm reading this correctly, it does it per source in the resolver (above). Likely worth just trying it just to make sure but it looks like that's what it's doing. |
||
| */ | ||
| private def shouldAuthenticate(uri: URI): Boolean = { | ||
| token.isDefined && Option(uri.getHost).exists(host => | ||
| allowedDomains.exists(_.equalsIgnoreCase(host)) | ||
| ) | ||
| } | ||
|
|
||
| override def resolve(address: String): AuthenticatedHttpFileSource = { | ||
| val uri = URI.create(address) | ||
| val useAuth = shouldAuthenticate(uri) | ||
| if (useAuth) { | ||
| logger.trace(s"Using authenticated HTTP for import from: ${uri.getHost}") | ||
| } | ||
| AuthenticatedHttpFileSource(uri, encoding, isDirectory = false, if (useAuth) token else None)(address) | ||
| } | ||
|
|
||
| override def resolveDirectory(address: String): AuthenticatedHttpFileSource = { | ||
| val uri = URI.create(address) | ||
| val useAuth = shouldAuthenticate(uri) | ||
| if (useAuth) { | ||
| logger.trace(s"Using authenticated HTTP for directory import from: ${uri.getHost}") | ||
| } | ||
| AuthenticatedHttpFileSource(uri, encoding, isDirectory = true, if (useAuth) token else None)(address) | ||
| } | ||
| } | ||
|
|
||
| object AuthenticatedHttpFileAccessProtocol { | ||
|
|
||
| /** Environment variable name for the Bearer token */ | ||
| val TokenEnvVar: String = "WDL_IMPORT_TOKEN" | ||
|
|
||
| /** Environment variable name for custom allowed domains */ | ||
| val DomainsEnvVar: String = "WDL_IMPORT_TOKEN_DOMAINS" | ||
|
|
||
| /** Default allowed domains that will receive the auth token */ | ||
| val defaultDomains: Set[String] = Set( | ||
| "github.com", | ||
| "raw.githubusercontent.com" | ||
| ) | ||
|
|
||
| /** | ||
| * Gets the allowed domains from environment variable or defaults. | ||
| * WDL_IMPORT_TOKEN_DOMAINS should be a comma-separated list of domains. | ||
| */ | ||
| lazy val defaultAllowedDomains: Set[String] = { | ||
| sys.env.get(DomainsEnvVar) match { | ||
| case Some(domains) => | ||
| domains.split(",").map(_.trim.toLowerCase).filter(_.nonEmpty).toSet | ||
| case None => | ||
| defaultDomains | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Creates an instance with configuration from environment variables. | ||
| * | ||
| * @param logger Logger for trace output (token values are never logged) | ||
| * @return AuthenticatedHttpFileAccessProtocol configured from environment | ||
| */ | ||
| def fromEnvironment(logger: Logger = Logger.Quiet): AuthenticatedHttpFileAccessProtocol = { | ||
| val tokenOpt = sys.env.get(TokenEnvVar) | ||
| if (tokenOpt.isDefined) { | ||
| logger.trace( | ||
| s"${TokenEnvVar} found; authenticated HTTP imports enabled for domains: ${defaultAllowedDomains.mkString(", ")}" | ||
| ) | ||
| } | ||
| AuthenticatedHttpFileAccessProtocol( | ||
| token = tokenOpt, | ||
| allowedDomains = defaultAllowedDomains, | ||
| logger = logger | ||
| ) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: DX may ask to revert this however I personally appreciate it