-
Notifications
You must be signed in to change notification settings - Fork 774
Data lineage programmatic API #6003
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
bbc21d8
fbdbeef
3aa4f0f
cdd9e89
62a2cfa
3e37728
ba309cd
6abae3c
00f7f2e
584c4ee
4974bee
b5a2671
117ef30
321c02a
224d92d
9136ccc
6bf2c75
689cdcc
9d6b77a
e77a9c0
064349a
251c06e
8b4ca1e
880461c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
| package nextflow.extension | ||
|
|
||
| import groovyx.gpars.dataflow.DataflowWriteChannel | ||
| import nextflow.Session | ||
|
|
||
| interface LinChannelEx { | ||
| void viewLineage(Session session, DataflowWriteChannel channel, URI uri) | ||
|
|
||
| void queryLineage(Session session, DataflowWriteChannel channel, String query) | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -66,7 +66,7 @@ class PathVisitor { | |
| applyRegexPattern0(filePattern) | ||
|
|
||
| else if( filePattern != null ) | ||
| applyGlobPattern0(filePattern as Path) | ||
| applyPathPattern0(filePattern as Path) | ||
|
|
||
| else | ||
| throw new IllegalArgumentException("Missing file pattern argument") | ||
|
|
@@ -103,6 +103,27 @@ class PathVisitor { | |
| target.bind(STOP) | ||
| } | ||
|
|
||
| private void applyPathPattern0(Path filePattern) { | ||
| if( isQuery(filePattern) ) | ||
| applyQueryablePath0(filePattern as QueryablePath) | ||
| else | ||
| applyGlobPattern0(filePattern) | ||
| } | ||
|
|
||
| private static boolean isQuery(Path filePattern) { | ||
| log.debug("Checking if query: $filePattern.class ") | ||
| return filePattern instanceof QueryablePath && (filePattern as QueryablePath).hasQuery() | ||
| } | ||
|
|
||
| private boolean applyQueryablePath0(QueryablePath path) { | ||
|
||
| final paths = path.resolveQuery() | ||
| if( !paths ) | ||
| throw new FileNotFoundException("No files found for ${path}") | ||
|
|
||
| paths.forEach { emit0(it) } | ||
| close0() | ||
| } | ||
|
|
||
| private void applyGlobPattern0(Path filePattern) { | ||
|
|
||
| final glob = opts?.containsKey('glob') ? opts.glob as boolean : true | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| package nextflow.file | ||
|
|
||
| import java.nio.file.Path | ||
|
|
||
| /** | ||
| * Interface to indicate a Path could contain a query that is resolved to several real paths. | ||
| * | ||
| * @author Jorge Ejarque <jorge.ejarque@seqera.io> | ||
| */ | ||
| interface QueryablePath { | ||
| boolean hasQuery(); | ||
| List<Path> resolveQuery(); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| /* | ||
| * Copyright 2013-2025, Seqera Labs | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package nextflow.lineage | ||
|
|
||
| import groovy.transform.CompileStatic | ||
| import groovy.util.logging.Slf4j | ||
| import groovyx.gpars.dataflow.DataflowWriteChannel | ||
| import nextflow.Channel | ||
| import nextflow.Session | ||
| import nextflow.extension.LinChannelEx | ||
| import nextflow.lineage.fs.LinPath | ||
| import nextflow.lineage.fs.LinPathFactory | ||
| import nextflow.lineage.serde.LinSerializable | ||
|
|
||
| /** | ||
| * Lineage channel extensions | ||
| * | ||
| * @author Jorge Ejarque <jorge.ejarque@seqera.io> | ||
| */ | ||
| @CompileStatic | ||
| @Slf4j | ||
| class LinChanneExImpl implements LinChannelEx{ | ||
|
|
||
| void viewLineage(Session session, DataflowWriteChannel channel, URI uri) { | ||
| final store = getStore(session) | ||
| emitResults(channel, LinUtils.query(store, uri)) | ||
| channel.bind(Channel.STOP) | ||
| } | ||
|
|
||
| void queryLineage(Session session, DataflowWriteChannel channel, String query) { | ||
| final store = getStore(session) | ||
| emitSearchResults(channel, store.search(query)) | ||
| channel.bind(Channel.STOP) | ||
| } | ||
|
|
||
|
|
||
| protected LinStore getStore(Session session){ | ||
| final store = LinStoreFactory.getOrCreate(session) | ||
| if( !store ) { | ||
| throw new Exception("Lineage store not found - Check Nextflow configuration") | ||
| } | ||
| return store | ||
| } | ||
|
|
||
| private static void emitResults(DataflowWriteChannel channel, Collection results){ | ||
| if( !results ) { | ||
| return | ||
| } | ||
| // Remove nested collections of a single element | ||
| if( results.size() == 1 ) { | ||
| final entry = results[0] | ||
| if( entry instanceof Collection ) { | ||
| emitResults(channel, entry) | ||
| } else { | ||
| channel.bind(LinUtils.encodeSearchOutputs(entry)) | ||
| } | ||
| } else | ||
| results.forEach { channel.bind(LinUtils.encodeSearchOutputs(it)) } | ||
| } | ||
|
|
||
| private void emitSearchResults(DataflowWriteChannel channel, Map<String, LinSerializable> results) { | ||
| if( !results ) { | ||
| return | ||
| } | ||
| results.keySet().forEach { channel.bind(LinPathFactory.create(LinPath.LID_PROT + it)) } | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's just use a Map here instead of query string. I would apply the same change to the
findcommand as well. There is no need to add the extra complexity of URL encoding