|
| 1 | +# |
| 2 | +# Copyright IBM Corporation 2021 |
| 3 | +# |
| 4 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +# you may not use this file except in compliance with the License. |
| 6 | +# You may obtain a copy of the License at |
| 7 | +# |
| 8 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +# |
| 10 | +# Unless required by applicable law or agreed to in writing, software |
| 11 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +# See the License for the specific language governing permissions and |
| 14 | +# limitations under the License. |
| 15 | +# |
| 16 | + |
| 17 | +import ray |
| 18 | +import rayvens |
| 19 | +import sys |
| 20 | +import json |
| 21 | +import time |
| 22 | + |
| 23 | +# This example demonstrates how to receive events. |
| 24 | + |
| 25 | +# Parse command-line arguments |
| 26 | +if len(sys.argv) < 2: |
| 27 | + print(f'usage: {sys.argv[0]} <run_mode>') |
| 28 | + sys.exit(1) |
| 29 | + |
| 30 | +# Check run mode: |
| 31 | +run_mode = sys.argv[1] |
| 32 | +if run_mode not in ['local', 'operator']: |
| 33 | + raise RuntimeError(f'Invalid run mode provided: {run_mode}') |
| 34 | + |
| 35 | +# Initialize ray either on the cluster or locally otherwise. |
| 36 | +if run_mode == 'operator': |
| 37 | + ray.init(address='auto') |
| 38 | +else: |
| 39 | + ray.init() |
| 40 | + |
| 41 | +# Start rayvens in the desired mode. |
| 42 | +rayvens.init(mode=run_mode) |
| 43 | + |
| 44 | +# Create an object stream: |
| 45 | +stream = rayvens.Stream('bucket') |
| 46 | + |
| 47 | +# Configure the source: |
| 48 | +source_config = dict(kind='file-watch-source', |
| 49 | + path='test_files', |
| 50 | + events='CREATE') |
| 51 | + |
| 52 | +# Run the source: |
| 53 | +source = stream.add_source(source_config) |
| 54 | + |
| 55 | + |
| 56 | +@ray.remote |
| 57 | +class Filename: |
| 58 | + def __init__(self): |
| 59 | + self.filename = None |
| 60 | + |
| 61 | + def set_filename(self, filename): |
| 62 | + self.filename = filename |
| 63 | + |
| 64 | + def get_filename(self): |
| 65 | + return self.filename |
| 66 | + |
| 67 | + |
| 68 | +filename_obj = Filename.remote() |
| 69 | + |
| 70 | + |
| 71 | +def process_file(event, filename_obj): |
| 72 | + print(f'received {len(event)} bytes') |
| 73 | + json_event = json.loads(event) |
| 74 | + print("Contents:") |
| 75 | + print("Filename:", json_event['filename']) |
| 76 | + print("Event type:", json_event['event_type']) |
| 77 | + filename_obj.set_filename.remote(json_event['filename']) |
| 78 | + |
| 79 | + # filename = json_event['filename'] |
| 80 | + # WARNING: Cannot pickle Ray itself so we cannot read a |
| 81 | + # file using Datasets API in response to an event. |
| 82 | + # ds = ray.experimental.data.read_json([filename]) |
| 83 | + # print(ds) |
| 84 | + |
| 85 | + |
| 86 | +# Process incoming file name. |
| 87 | +stream >> (lambda event: process_file(event, filename_obj)) |
| 88 | + |
| 89 | +# Create a data set and write the csv file using datasets. |
| 90 | +# TODO: Ray 1.5.1 uses pandas to write a CSV file so we avoid |
| 91 | +# using this method for writing CSV files. |
| 92 | +# test_ds = ray.experimental.data.range(100) |
| 93 | +# test_ds.write_csv("test_files/test.csv") |
| 94 | + |
| 95 | +# Read JSON file to Ray dataset: |
| 96 | +timeout_counter = 100 |
| 97 | +filename = ray.get(filename_obj.get_filename.remote()) |
| 98 | +while filename is None and timeout_counter > 0: |
| 99 | + filename = ray.get(filename_obj.get_filename.remote()) |
| 100 | + timeout_counter -= 1 |
| 101 | + time.sleep(1) |
| 102 | +if filename is not None: |
| 103 | + ds = ray.experimental.data.read_json([filename]) |
| 104 | + print(ds) |
| 105 | + print("Dataset constructed correctly") |
| 106 | +else: |
| 107 | + print("No file was received") |
| 108 | + |
| 109 | +# Run while events are still being received then stop if not. |
| 110 | +stream.disconnect_all(after_idle_for=2) |
0 commit comments