55from dffml .df .types import DataFlow , Input , Definition
66
77
8+ class InvalidCustomRunDataFlowContext (Exception ):
9+ """
10+ Thrown when custom inputs for dffml.dataflow.run do not list an input with
11+ string as its primitive as the first input.
12+ """
13+
14+
15+ class InvalidCustomRunDataFlowOutputs (Exception ):
16+ """
17+ Thrown when outputs for a custom dffml.dataflow.run do not match that of
18+ it's subflow.
19+ """
20+
21+
822@config
923class RunDataFlowConfig :
1024 dataflow : DataFlow
1125
1226
27+ DEFAULT_INPUTS = {
28+ "inputs" : Definition (name = "flow_inputs" , primitive = "Dict[str,Any]" )
29+ }
30+
31+
1332@op (
1433 name = "dffml.dataflow.run" ,
15- inputs = {
16- "inputs" : Definition (name = "flow_inputs" , primitive = "Dict[str,Any]" )
17- },
34+ inputs = DEFAULT_INPUTS ,
1835 outputs = {
1936 "results" : Definition (name = "flow_results" , primitive = "Dict[str,Any]" )
2037 },
@@ -40,6 +57,8 @@ class run_dataflow(OperationImplementationContext):
4057 Examples
4158 ++++++++
4259
60+ The following shows how to use run dataflow in its default behavior.
61+
4362 >>> URL = Definition(name="URL", primitive="string")
4463 >>>
4564 >>> subflow = DataFlow.auto(GetSingle)
@@ -80,10 +99,63 @@ class run_dataflow(OperationImplementationContext):
8099 >>>
81100 >>> asyncio.run(main())
82101 {'flow_results': {'dffml': {'URL': 'https://github.com/intel/dffml'}}}
102+
103+ The following shows how to use run dataflow with custom inputs and outputs.
104+ This allows you to run a subflow as if it were an opertion.
105+
106+ >>> URL = Definition(name="URL", primitive="string")
107+ >>>
108+ >>> @op(
109+ ... inputs={"url": URL},
110+ ... outputs={"last": Definition("last_element_in_path", primitive="string")},
111+ ... )
112+ ... def last_path(url):
113+ ... return {"last": url.split("/")[-1]}
114+ >>>
115+ >>> subflow = DataFlow.auto(last_path, GetSingle)
116+ >>> subflow.seed.append(
117+ ... Input(
118+ ... value=[last_path.op.outputs["last"].name],
119+ ... definition=GetSingle.op.inputs["spec"],
120+ ... )
121+ ... )
122+ >>>
123+ >>> dataflow = DataFlow.auto(run_dataflow, GetSingle)
124+ >>> dataflow.operations[run_dataflow.op.name] = run_dataflow.op._replace(
125+ ... inputs={"URL": URL},
126+ ... outputs={last_path.op.outputs["last"].name: last_path.op.outputs["last"]},
127+ ... expand=[],
128+ ... )
129+ >>> dataflow.configs[run_dataflow.op.name] = RunDataFlowConfig(subflow)
130+ >>> dataflow.seed.append(
131+ ... Input(
132+ ... value=[last_path.op.outputs["last"].name],
133+ ... definition=GetSingle.op.inputs["spec"],
134+ ... )
135+ ... )
136+ >>> dataflow.update(auto_flow=True)
137+ >>>
138+ >>> async def main():
139+ ... async for ctx, results in MemoryOrchestrator.run(
140+ ... dataflow,
141+ ... {
142+ ... "run_subflow": [
143+ ... Input(value="https://github.com/intel/dffml", definition=URL)
144+ ... ]
145+ ... },
146+ ... ):
147+ ... print(results)
148+ >>>
149+ >>> asyncio.run(main())
150+ {'last_element_in_path': 'dffml'}
83151 """
84152
85- async def run (self , inputs : Dict [str , Any ]) -> Dict [str , Any ]:
86- inputs = inputs ["inputs" ]
153+ async def run_default (self , inputs : Dict [str , Any ]) -> Dict [str , Any ]:
154+ """
155+ The default implementation for the dataflow.run operation is the uctx
156+ mode. This mode is when we map unique strings to a list of inputs to be
157+ given to the respective string's context.
158+ """
87159 inputs_created = {}
88160 definitions = self .config .dataflow .definitions
89161
@@ -102,3 +174,36 @@ async def run(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
102174 ]
103175
104176 return {"results" : results }
177+
178+ async def run_custom (self , inputs : Dict [str , Any ]) -> Dict [str , Any ]:
179+ # TODO Move string primitive validation into init of
180+ # an OperationImplementation (and then keep this as the context).
181+ ctx_input_name , ctx_definition = list (self .parent .op .inputs .items ())[0 ]
182+
183+ if ctx_definition .primitive != "string" :
184+ raise InvalidCustomRunDataFlowContext (ctx_definition .export ())
185+
186+ subflow_inputs = {inputs [ctx_input_name ]: []}
187+
188+ for input_name , value in inputs .items ():
189+ definition = self .parent .op .inputs [input_name ]
190+ subflow_inputs [inputs [ctx_input_name ]].append (
191+ Input (value = value , definition = definition )
192+ )
193+
194+ op_outputs = sorted (self .parent .op .outputs .keys ())
195+
196+ async with self .subflow (self .config .dataflow ) as octx :
197+ async for ctx , result in octx .run (subflow_inputs ):
198+ if op_outputs != sorted (result .keys ()):
199+ raise InvalidCustomRunDataFlowOutputs (
200+ ctx_definition .export ()
201+ )
202+ return result
203+
204+ async def run (self , inputs : Dict [str , Any ]) -> Dict [str , Any ]:
205+ # Support redefinition of operation
206+ if self .parent .op .inputs == DEFAULT_INPUTS :
207+ return await self .run_default (inputs ["inputs" ])
208+ else :
209+ return await self .run_custom (inputs )
0 commit comments