Skip to content

Commit 8067145

Browse files
bentshermanjorgee
andauthored
Allow collection-type params to be loaded from files (#6675)
Co-authored-by: Jorge Ejarque <[email protected]>
1 parent e723425 commit 8067145

File tree

3 files changed

+248
-10
lines changed

3 files changed

+248
-10
lines changed

docs/workflow.md

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,7 @@ params {
5353
}
5454
```
5555

56-
The following types can be used for parameters:
57-
58-
- {ref}`stdlib-types-boolean`
59-
- {ref}`stdlib-types-float`
60-
- {ref}`stdlib-types-integer`
61-
- {ref}`stdlib-types-path`
62-
- {ref}`stdlib-types-string`
56+
All {ref}`standard types <stdlib-types>` except for the dataflow types (`Channel` and `Value`) can be used for parameters.
6357

6458
Parameters can be used in the entry workflow:
6559

@@ -75,7 +69,16 @@ As a best practice, parameters should only be referenced in the entry workflow o
7569

7670
The default value can be overridden by the command line, params file, or config file. Parameters from multiple sources are resolved in the order described in {ref}`cli-params`. Parameters specified on the command line are converted to the appropriate type based on the corresponding type annotation.
7771

78-
A parameter that doesn't specify a default value is a *required* param. If a required param is not given a value at runtime, the run will fail.
72+
A parameter that doesn't specify a default value is a *required* parameter. If a required parameter is not given a value at runtime, the run will fail.
73+
74+
:::{versionadded} 26.04.0
75+
:::
76+
77+
Parameters with a collection type (i.e., `List`, `Set`, or `Bag`) can be supplied a file path instead of a literal collection. The file must be CSV, JSON, or YAML. Nextflow will parse the file contents and assign the resuling collection to the parameter. An error is thrown if the file contents do not match the parameter type.
78+
79+
:::{note}
80+
When supplying a CSV file to a collection parameter, the CSV file must contain a header row and must use a comma (`,`) as the column separator.
81+
:::
7982

8083
(workflow-params-legacy)=
8184

modules/nextflow/src/main/groovy/nextflow/script/ParamsDsl.groovy

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,22 @@ package nextflow.script
1818

1919
import java.nio.file.Path
2020

21+
import groovy.json.JsonSlurper
22+
import groovy.yaml.YamlSlurper
2123
import groovy.transform.Canonical
2224
import groovy.transform.CompileStatic
2325
import groovy.util.logging.Slf4j
2426
import nextflow.Session
2527
import nextflow.file.FileHelper
2628
import nextflow.exception.ScriptRuntimeException
29+
import nextflow.script.types.Bag
2730
import nextflow.script.types.Types
31+
import nextflow.splitter.CsvSplitter
32+
import nextflow.util.ArrayBag
33+
import nextflow.util.Duration
34+
import nextflow.util.MemoryUnit
35+
import org.codehaus.groovy.runtime.typehandling.DefaultTypeTransformation
36+
import org.codehaus.groovy.runtime.typehandling.GroovyCastException
2837
/**
2938
* Implements the DSL for defining workflow params
3039
*
@@ -102,6 +111,18 @@ class ParamsDsl {
102111
if( str.isBigDecimal() ) return str.toBigDecimal()
103112
}
104113

114+
if( decl.type == Duration ) {
115+
return Duration.of(str)
116+
}
117+
118+
if( decl.type == MemoryUnit ) {
119+
return MemoryUnit.of(str)
120+
}
121+
122+
if( Collection.class.isAssignableFrom(decl.type) ) {
123+
return resolveFromFile(decl.name, decl.type, FileHelper.asPath(str))
124+
}
125+
105126
if( decl.type == Path ) {
106127
return FileHelper.asPath(str)
107128
}
@@ -113,12 +134,41 @@ class ParamsDsl {
113134
if( value == null )
114135
return null
115136

116-
if( decl.type == Path && value instanceof CharSequence )
117-
return FileHelper.asPath(value.toString())
137+
if( value !instanceof CharSequence )
138+
return value
139+
140+
final str = value.toString()
141+
142+
if( Collection.class.isAssignableFrom(decl.type) )
143+
return resolveFromFile(decl.name, decl.type, FileHelper.asPath(str))
144+
145+
if( decl.type == Path )
146+
return FileHelper.asPath(str)
118147

119148
return value
120149
}
121150

151+
private Object resolveFromFile(String name, Class type, Path file) {
152+
final ext = file.getExtension()
153+
final value = switch( ext ) {
154+
case 'csv' -> new CsvSplitter().options(header: true, sep: ',').target(file).list()
155+
case 'json' -> new JsonSlurper().parse(file)
156+
case 'yaml' -> new YamlSlurper().parse(file)
157+
case 'yml' -> new YamlSlurper().parse(file)
158+
default -> throw new ScriptRuntimeException("Unrecognized file format '${ext}' for input file '${file}' supplied for parameter `${name}` -- should be CSV, JSON, or YAML")
159+
}
160+
161+
try {
162+
if( Bag.class.isAssignableFrom(type) && value instanceof Collection )
163+
return new ArrayBag(value)
164+
return DefaultTypeTransformation.castToType(value, type)
165+
}
166+
catch( GroovyCastException e ) {
167+
final actualType = value.getClass()
168+
throw new ScriptRuntimeException("Parameter `${name}` with type ${Types.getName(type)} cannot be assigned to contents of '${file}' [${Types.getName(actualType)}]")
169+
}
170+
}
171+
122172
private boolean isAssignableFrom(Class target, Class source) {
123173
if( target == Float.class )
124174
return Number.class.isAssignableFrom(source)

modules/nextflow/src/test/groovy/nextflow/script/ParamsDslTest.groovy

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
package nextflow.script
22

3+
import java.nio.file.Files
34
import java.nio.file.Path
45

56
import nextflow.Session
67
import nextflow.file.FileHelper
78
import nextflow.exception.ScriptRuntimeException
9+
import nextflow.script.types.Bag
810
import spock.lang.Specification
911
import spock.lang.Unroll
1012
/**
@@ -128,4 +130,187 @@ class ParamsDslTest extends Specification {
128130
DEF_VALUE << [ 100i, 100l, 100g ]
129131
}
130132

133+
def 'should load collection param from CSV file'() {
134+
given:
135+
def csvFile = Files.createTempFile('test', '.csv')
136+
csvFile.text = '''\
137+
id,name,value
138+
1,sample1,100
139+
2,sample2,200
140+
3,sample3,300
141+
'''.stripIndent()
142+
def cliParams = [samples: csvFile.toString()]
143+
def session = new Session()
144+
session.init(null, null, cliParams, [:])
145+
146+
when:
147+
def dsl = new ParamsDsl()
148+
dsl.declare('samples', List, false)
149+
dsl.apply(session)
150+
151+
then:
152+
def samples = session.binding.getParams().samples
153+
samples instanceof List
154+
samples.size() == 3
155+
samples[0].id == '1'
156+
samples[0].name == 'sample1'
157+
samples[0].value == '100'
158+
samples[1].id == '2'
159+
samples[2].id == '3'
160+
161+
cleanup:
162+
csvFile?.delete()
163+
}
164+
165+
def 'should load collection param from JSON file'() {
166+
given:
167+
def jsonFile = Files.createTempFile('test', '.json')
168+
jsonFile.text = '''\
169+
[
170+
{"id": 1, "name": "sample1", "value": 100},
171+
{"id": 2, "name": "sample2", "value": 200},
172+
{"id": 3, "name": "sample3", "value": 300}
173+
]
174+
'''.stripIndent()
175+
def cliParams = [
176+
samplesList: jsonFile.toString(),
177+
samplesBag: jsonFile.toString(),
178+
samplesSet: jsonFile.toString()
179+
]
180+
def session = new Session()
181+
session.init(null, null, cliParams, [:])
182+
183+
when:
184+
def dsl = new ParamsDsl()
185+
dsl.declare('samplesList', List, false)
186+
dsl.declare('samplesBag', Bag, false)
187+
dsl.declare('samplesSet', Set, false)
188+
dsl.apply(session)
189+
190+
then:
191+
def samplesList = session.binding.getParams().samplesList
192+
samplesList instanceof List
193+
samplesList.size() == 3
194+
samplesList[0].id == 1
195+
samplesList[0].name == 'sample1'
196+
samplesList[0].value == 100
197+
samplesList[1].id == 2
198+
samplesList[2].id == 3
199+
200+
def samplesBag = session.binding.getParams().samplesBag
201+
samplesBag instanceof Bag
202+
samplesBag.size() == 3
203+
204+
def samplesSet = session.binding.getParams().samplesSet
205+
samplesSet instanceof Set
206+
samplesSet.size() == 3
207+
208+
cleanup:
209+
jsonFile?.delete()
210+
}
211+
212+
def 'should load collection param from YAML file'() {
213+
given:
214+
def yamlFile = Files.createTempFile('test', '.yml')
215+
yamlFile.text = '''\
216+
- id: 1
217+
name: sample1
218+
value: 100
219+
- id: 2
220+
name: sample2
221+
value: 200
222+
- id: 3
223+
name: sample3
224+
value: 300
225+
'''.stripIndent()
226+
def cliParams = [samples: yamlFile.toString()]
227+
def session = new Session()
228+
session.init(null, null, cliParams, [:])
229+
230+
when:
231+
def dsl = new ParamsDsl()
232+
dsl.declare('samples', List, false)
233+
dsl.apply(session)
234+
235+
then:
236+
def samples = session.binding.getParams().samples
237+
samples instanceof List
238+
samples.size() == 3
239+
samples[0].id == 1
240+
samples[0].name == 'sample1'
241+
samples[0].value == 100
242+
samples[1].id == 2
243+
samples[2].id == 3
244+
245+
cleanup:
246+
yamlFile?.delete()
247+
}
248+
249+
def 'should load collection param from file specified in config'() {
250+
given:
251+
def jsonFile = Files.createTempFile('test', '.json')
252+
jsonFile.text = '[{"x": 1}, {"x": 2}]'
253+
def configParams = [items: jsonFile.toString()]
254+
def session = new Session()
255+
session.init(null, null, [:], configParams)
256+
257+
when:
258+
def dsl = new ParamsDsl()
259+
dsl.declare('items', List, false)
260+
dsl.apply(session)
261+
262+
then:
263+
def items = session.binding.getParams().items
264+
items instanceof List
265+
items.size() == 2
266+
items[0].x == 1
267+
items[1].x == 2
268+
269+
cleanup:
270+
jsonFile?.delete()
271+
}
272+
273+
def 'should report error for unrecognized file format'() {
274+
given:
275+
def txtFile = Files.createTempFile('test', '.txt')
276+
txtFile.text = 'some text'
277+
def cliParams = [items: txtFile.toString()]
278+
def session = new Session()
279+
session.init(null, null, cliParams, [:])
280+
281+
when:
282+
def dsl = new ParamsDsl()
283+
dsl.declare('items', List, false)
284+
dsl.apply(session)
285+
286+
then:
287+
def e = thrown(ScriptRuntimeException)
288+
e.message.contains("Unrecognized file format 'txt'")
289+
e.message.contains("supplied for parameter `items` -- should be CSV, JSON, or YAML")
290+
291+
cleanup:
292+
txtFile?.delete()
293+
}
294+
295+
def 'should report error for invalid file content type'() {
296+
given:
297+
def jsonFile = Files.createTempFile('test', '.json')
298+
jsonFile.text = '{"not": "a list"}'
299+
def cliParams = [items: jsonFile.toString()]
300+
def session = new Session()
301+
session.init(null, null, cliParams, [:])
302+
303+
when:
304+
def dsl = new ParamsDsl()
305+
dsl.declare('items', List, false)
306+
dsl.apply(session)
307+
308+
then:
309+
def e = thrown(ScriptRuntimeException)
310+
e.message.contains('Parameter `items` with type List cannot be assigned to contents of')
311+
312+
cleanup:
313+
jsonFile?.delete()
314+
}
315+
131316
}

0 commit comments

Comments
 (0)