Skip to content

Commit 25d55b0

Browse files
authored
Add support for multi task operators and stream context (#59)
* Add support for multi-task operators and stream contexts. * Update comment. * Rename method.
1 parent 1b108c6 commit 25d55b0

File tree

3 files changed

+149
-38
lines changed

3 files changed

+149
-38
lines changed

examples/multitask/multitask.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#
2+
# Copyright IBM Corporation 2021
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
#
16+
17+
import ray
18+
import rayvens
19+
import sys
20+
21+
# Send message to Slack sink using a multi-tasking operator. The operator
22+
# will be a Ray Task which spawns three other Ray Tasks. Each of the three
23+
# spawned tasks will send their event to the sinks attached to the stream.
24+
25+
# Command line arguments and validation:
26+
if len(sys.argv) < 4:
27+
print(f'usage: {sys.argv[0]} <slack_channel> <slack_webhook> <run_mode>')
28+
sys.exit(1)
29+
slack_channel = sys.argv[1]
30+
slack_webhook = sys.argv[2]
31+
run_mode = sys.argv[3]
32+
if run_mode not in ['local', 'mixed', 'operator']:
33+
raise RuntimeError(f'Invalid run mode provided: {run_mode}')
34+
35+
# Initialize ray either on the cluster or locally otherwise.
36+
if run_mode == 'operator':
37+
ray.init(address='auto')
38+
else:
39+
ray.init()
40+
41+
# Start rayvens in operator mode.
42+
rayvens.init(mode=run_mode)
43+
44+
# Create stream.
45+
stream = rayvens.Stream('slack')
46+
47+
48+
# Operator sub-task:
49+
@ray.remote
50+
def sub_task(context, intermediate_data):
51+
sub_task_outgoing_event = "sub-task " + intermediate_data
52+
print(sub_task_outgoing_event)
53+
context.publish(sub_task_outgoing_event)
54+
55+
56+
# Operator task:
57+
@ray.remote
58+
def multi_part_task(context, incoming_event):
59+
print("multi-part-task:", incoming_event)
60+
for i in range(3):
61+
sub_task.remote(context, "sub-event" + str(i))
62+
63+
64+
# Event sink config.
65+
sink_config = dict(kind='slack-sink',
66+
route='/toslack',
67+
channel=slack_channel,
68+
webhook_url=slack_webhook)
69+
70+
# Add sink to stream.
71+
sink = stream.add_sink(sink_config)
72+
73+
# Add multi-task operator to stream.
74+
stream.add_multitask_operator(multi_part_task)
75+
76+
# Sends message to all sinks attached to this stream.
77+
stream << f'Sending message to Slack sink in run mode {run_mode}.'
78+
79+
# Disconnect any sources or sinks attached to the stream 2 seconds after
80+
# the stream is idle (i.e. no events were propagated by the stream).
81+
stream.disconnect_all(after_idle_for=2)

rayvens/api.py

Lines changed: 65 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ def append(self, data):
5656
def add_operator(self, operator):
5757
return ray.get(self.actor.add_operator.remote(operator))
5858

59+
def add_multi_operator(self, operator):
60+
return ray.get(self.actor.add_multi_operator.remote(operator))
61+
5962
def add_source(self, source_config):
6063
return ray.get(self.actor.add_source.remote(self, source_config))
6164

@@ -112,45 +115,72 @@ def _idle_time(self):
112115
return time.time() - latest_timestamp
113116

114117

118+
class StreamContext:
119+
def __init__(self):
120+
self.sink_restrictions = {}
121+
self.subscribers = {}
122+
self.latest_sent_event_timestamp = None
123+
self.event_counter = 0
124+
self.limit_subscribers = False
125+
self.is_multi_operator = False
126+
127+
def publish(self, data):
128+
if data is not None:
129+
for name, subscriber in self.subscribers.items():
130+
if name in self.sink_restrictions:
131+
type_restrictions = self.sink_restrictions[name]
132+
if not self._accepts_data_type(data, type_restrictions):
133+
continue
134+
_eval(self, subscriber, data)
135+
136+
# Check if the sink we are routing the message to has any restrictions
137+
# in terms of message type. A message will only be routed to a sink
138+
# if the sink accepts its type.
139+
def _accepts_data_type(self, data, type_restrictions):
140+
# If there are no restrictions return immediately:
141+
if len(type_restrictions) == 0:
142+
return True
143+
for restricted_type in type_restrictions:
144+
if isinstance(data, restricted_type):
145+
return True
146+
return False
147+
148+
115149
@ray.remote(num_cpus=0)
116150
class StreamActor:
117151
def __init__(self, name, operator=None):
118152
self.name = name
119-
self._subscribers = {}
120153
self._operator = operator
121154
self._sources = {}
122155
self._sinks = {}
123-
self._latest_sent_event_timestamp = None
124-
self._limit_subscribers = False
125-
self._event_counter = 0
156+
self.context = StreamContext()
126157

127158
def send_to(self, subscriber, name=None):
128-
if self._limit_subscribers:
159+
if self.context.limit_subscribers:
129160
return
130-
if name in self._subscribers:
161+
if name in self.context.subscribers:
131162
raise RuntimeError(
132163
f'Stream {self.name} already has a subscriber named {name}.')
133164
if name is None:
134165
name = object()
135-
self._subscribers[name] = subscriber
166+
self.context.subscribers[name] = subscriber
136167

137168
def append(self, data):
138169
if data is None:
139170
return
140171
if self._operator is not None:
141-
data = _eval(self._operator, data)
142-
for name, subscriber in self._subscribers.items():
143-
if name in self._sinks:
144-
integration = self._sinks[name]
145-
if not integration.accepts_data_type(data):
146-
continue
147-
_eval(subscriber, data)
148-
self._latest_sent_event_timestamp = time.time()
149-
self._event_counter += 1
172+
data = _eval(self.context, self._operator, data)
173+
self.context.publish(data)
174+
self.context.latest_sent_event_timestamp = time.time()
175+
self.context.event_counter += 1
150176

151177
def add_operator(self, operator):
152178
self._operator = operator
153179

180+
def add_multitask_operator(self, operator):
181+
self._operator = operator
182+
self.context.is_multi_operator = True
183+
154184
def add_source(self, stream, source_config):
155185
source_config["integration_type"] = 'source'
156186
source_name = name_source(source_config)
@@ -170,13 +200,15 @@ def add_sink(self, stream, sink_config):
170200
f'Stream {self.name} already has a sink named {sink_name}.')
171201
self._sinks[sink_name] = _global_camel.add_sink(
172202
stream, sink_config, sink_name)
203+
self.context.sink_restrictions[sink_name] = self._sinks[
204+
sink_name].get_restricted_data_type()
173205
return sink_name
174206

175207
def unsubscribe(self, subscriber_name):
176208
if subscriber_name not in self._subscribers:
177209
raise RuntimeError(f'Stream {self.name} has no subscriber named'
178210
f' {subscriber_name}.')
179-
self._subscribers.pop(subscriber_name)
211+
self.context.subscribers.pop(subscriber_name)
180212

181213
def disconnect_source(self, source_name):
182214
if source_name not in self._sources:
@@ -191,7 +223,8 @@ def disconnect_sink(self, sink_name):
191223
f'Stream {self.name} has no sink named {sink_name}.')
192224
_global_camel.disconnect(self._sinks[sink_name])
193225
self._sinks.pop(sink_name)
194-
self._subscribers.pop(sink_name)
226+
self.context.sink_restrictions.pop(sink_name)
227+
self.context.subscribers.pop(sink_name)
195228

196229
def disconnect_all(self, stream_drain_timeout):
197230
for source_name in dict(self._sources):
@@ -201,29 +234,35 @@ def disconnect_all(self, stream_drain_timeout):
201234
self.disconnect_sink(sink_name)
202235

203236
def event_count(self):
204-
return self.event_count
237+
return self.context.event_counter
205238

206239
def _meta(self, action, *args, **kwargs):
207240
return verify_do(self, _global_camel, action, *args, **kwargs)
208241

209242
def _get_latest_timestamp(self):
210-
return self._latest_sent_event_timestamp
243+
return self.context.latest_sent_event_timestamp
211244

212245
def _fetch_processors(self):
213-
self._limit_subscribers = True
214-
return self._subscribers, self._operator
246+
self.context.limit_subscribers = True
247+
return self.context.subscribers, self._operator
215248

216249
def _update_timestamp(self, timestamp):
217-
self._latest_sent_event_timestamp = timestamp
250+
self.context.latest_sent_event_timestamp = timestamp
218251

219252

220-
def _eval(f, data):
253+
def _eval(context, f, data):
221254
if isinstance(f, Stream):
222255
return f.append(data)
223256
elif isinstance(f, ray.actor.ActorHandle):
224257
return f.append.remote(data)
225-
elif isinstance(f, ray.actor.ActorMethod) or isinstance(
226-
f, ray.remote_function.RemoteFunction):
258+
elif isinstance(f, ray.actor.ActorMethod):
259+
return f.remote(data)
260+
elif isinstance(f, ray.remote_function.RemoteFunction):
261+
if context.is_multi_operator:
262+
if context.subscribers is None:
263+
raise RuntimeError('No subscribers or sinks provided.')
264+
f.remote(context, data)
265+
return None
227266
return f.remote(data)
228267
else:
229268
return f(data)

rayvens/core/integration.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -132,19 +132,10 @@ def disconnect(self, mode):
132132
# process that runs it.
133133
self.invocation.kill()
134134

135-
# Check if the sink we are routing the message to has any restrictions
136-
# in terms of message type. A message will only be routed to a sink
137-
# if the sink accepts its type.
138-
def accepts_data_type(self, data):
135+
# Get any type restrictions:
136+
def get_restricted_data_type(self):
139137
# If there are no restrictions return immediately:
140-
restricted_message_types = self.input_restrictions[
141-
'restricted_message_types']
142-
if len(restricted_message_types) == 0:
143-
return True
144-
for restricted_type in restricted_message_types:
145-
if isinstance(data, restricted_type):
146-
return True
147-
return False
138+
return self.input_restrictions['restricted_message_types']
148139

149140
# Method that checks if, based on the configuration, the integration
150141
# requires something to be run or created before the integration is run.

0 commit comments

Comments
 (0)