|
| 1 | +import os |
| 2 | +import socket |
| 3 | +import signal |
| 4 | +import logging |
| 5 | +from typing import Tuple |
| 6 | + |
| 7 | +from djl_python.service_loader import get_annotated_function, load_model_service, has_function_in_module |
| 8 | +from djl_python.inputs import Input |
| 9 | +from djl_python.outputs import Output |
| 10 | + |
| 11 | +SOCKET_ACCEPT_TIMEOUT = 30.0 |
| 12 | + |
| 13 | + |
| 14 | +class PythonSyncEngine(object): |
| 15 | + """ |
| 16 | + Backend engine to run python code |
| 17 | + """ |
| 18 | + |
| 19 | + def __init__(self, args, service): |
| 20 | + # Support MPI environment args |
| 21 | + if os.getenv('OMPI_COMM_WORLD_SIZE'): |
| 22 | + os.environ["WORLD_SIZE"] = os.getenv('OMPI_COMM_WORLD_SIZE') |
| 23 | + if os.getenv('OMPI_COMM_WORLD_LOCAL_RANK'): |
| 24 | + os.environ["LOCAL_RANK"] = os.getenv('OMPI_COMM_WORLD_LOCAL_RANK') |
| 25 | + rank = os.environ.get("OMPI_COMM_WORLD_RANK") |
| 26 | + if rank: |
| 27 | + os.environ["RANK"] = rank |
| 28 | + |
| 29 | + self.model_dir = args.model_dir |
| 30 | + self.sock_type = args.sock_type |
| 31 | + self.sock_name = args.sock_name |
| 32 | + self.port = args.port |
| 33 | + self.service = service |
| 34 | + self.device_id = args.device_id |
| 35 | + self.tensor_parallel_degree = args.tensor_parallel_degree |
| 36 | + self.pipeline_parallel_degree = args.pipeline_parallel_degree |
| 37 | + self.cluster_size = args.cluster_size |
| 38 | + self.entry_point = args.entry_point |
| 39 | + self.recommended_entry_point = args.recommended_entry_point |
| 40 | + self.output_formatter = get_annotated_function(args.model_dir, |
| 41 | + "is_output_formatter") |
| 42 | + self.input_formatter = get_annotated_function(args.model_dir, |
| 43 | + "is_input_formatter") |
| 44 | + self.is_entry_point_verified = False |
| 45 | + |
| 46 | + if self.sock_type == "unix": |
| 47 | + if self.sock_name is None: |
| 48 | + raise ValueError("Missing sock-name argument.") |
| 49 | + self.sock_name = f"{args.sock_name}.{rank}" if rank else args.sock_name |
| 50 | + |
| 51 | + self.clean_up() |
| 52 | + elif self.sock_type == "tcp": |
| 53 | + if self.sock_name is None: |
| 54 | + self.sock_name = "0.0.0.0" |
| 55 | + if self.port is None: |
| 56 | + raise ValueError("Missing port argument.") |
| 57 | + self.port = int(self.port) + int(rank) if rank else self.port |
| 58 | + else: |
| 59 | + raise ValueError(f"Invalid socket-type: {self.sock_type}.") |
| 60 | + |
| 61 | + socket_family = socket.AF_INET if self.sock_type == "tcp" else socket.AF_UNIX |
| 62 | + self.sock = socket.socket(socket_family, socket.SOCK_STREAM) |
| 63 | + self.sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) |
| 64 | + self.sock.settimeout(SOCKET_ACCEPT_TIMEOUT) |
| 65 | + self.cl_socket = None |
| 66 | + |
| 67 | + def clean_up(self): |
| 68 | + pid_file = f"{self.sock_name}.pid" |
| 69 | + if os.path.exists(pid_file): |
| 70 | + with open(pid_file, "r") as f: |
| 71 | + pid = f.readline() |
| 72 | + if pid: |
| 73 | + try: |
| 74 | + os.kill(int(pid), signal.SIGKILL) |
| 75 | + logging.warning( |
| 76 | + f"{self.sock_name} - kill dangling process: {pid}") |
| 77 | + except ProcessLookupError: |
| 78 | + pass |
| 79 | + |
| 80 | + with open(pid_file, "w") as f: |
| 81 | + f.write(str(os.getpid())) |
| 82 | + |
| 83 | + if os.path.exists(self.sock_name): |
| 84 | + os.remove(self.sock_name) |
| 85 | + |
| 86 | + def _prepare_inputs(self) -> Tuple[Input, str]: |
| 87 | + inputs = Input() |
| 88 | + inputs.read(self.cl_socket) |
| 89 | + prop = inputs.get_properties() |
| 90 | + if self.tensor_parallel_degree: |
| 91 | + prop["tensor_parallel_degree"] = self.tensor_parallel_degree |
| 92 | + if self.pipeline_parallel_degree: |
| 93 | + prop["pipeline_parallel_degree"] = self.pipeline_parallel_degree |
| 94 | + if self.cluster_size: |
| 95 | + prop["cluster_size"] = self.cluster_size |
| 96 | + prop["device_id"] = self.device_id |
| 97 | + |
| 98 | + if "output_formatter" in prop: |
| 99 | + if hasattr(self.service, prop["output_formatter"]): |
| 100 | + # TODO: custom output_formatter in serving.properties is deprecated. Remove users are migrated. |
| 101 | + prop["output_formatter"] = getattr(self.service, |
| 102 | + prop["output_formatter"]) |
| 103 | + elif self.output_formatter: |
| 104 | + prop["output_formatter"] = self.output_formatter |
| 105 | + |
| 106 | + if self.input_formatter: |
| 107 | + prop["input_formatter"] = self.input_formatter |
| 108 | + function_name = inputs.get_function_name() |
| 109 | + if not self.is_entry_point_verified: |
| 110 | + if self.recommended_entry_point: |
| 111 | + if not has_function_in_module(self.service.module, |
| 112 | + function_name): |
| 113 | + self.service = load_model_service( |
| 114 | + self.model_dir, self.recommended_entry_point, |
| 115 | + self.device_id) |
| 116 | + logging.info( |
| 117 | + f"{self.entry_point} file has no handler function {function_name}." |
| 118 | + f"Hence choosing the LMI recommended entry point {self.recommended_entry_point}" |
| 119 | + ) |
| 120 | + self.is_entry_point_verified = True |
| 121 | + return inputs, function_name |
| 122 | + |
| 123 | + def _create_cl_socket(self): |
| 124 | + if self.sock_type == "unix": |
| 125 | + self.sock.bind(self.sock_name) |
| 126 | + else: |
| 127 | + logging.info( |
| 128 | + f"Socket bind on address: {self.sock_name}:{self.port}") |
| 129 | + self.sock.bind((self.sock_name, int(self.port))) |
| 130 | + |
| 131 | + self.sock.listen(128) |
| 132 | + logging.info("Python engine started.") |
| 133 | + |
| 134 | + (cl_socket, _) = self.sock.accept() |
| 135 | + # workaround error(35, 'Resource temporarily unavailable') on OSX |
| 136 | + cl_socket.setblocking(True) |
| 137 | + self.cl_socket = cl_socket |
| 138 | + |
| 139 | + def run_server(self): |
| 140 | + """ |
| 141 | + Run the backend worker process and listen on a socket |
| 142 | + :return: |
| 143 | + """ |
| 144 | + self._create_cl_socket() |
| 145 | + |
| 146 | + while True: |
| 147 | + inputs, function_name = self._prepare_inputs() |
| 148 | + try: |
| 149 | + outputs = self.service.invoke_handler(function_name, inputs) |
| 150 | + if outputs is None: |
| 151 | + outputs = Output(code=204, message="No content") |
| 152 | + elif not isinstance(outputs, Output): |
| 153 | + outputs = Output().error( |
| 154 | + f"Invalid output type: {type(outputs)}") |
| 155 | + except Exception as e: |
| 156 | + logging.exception("Failed invoke service.invoke_handler()") |
| 157 | + if (type(e).__name__ == "OutOfMemoryError" |
| 158 | + or type(e).__name__ == "MemoryError" |
| 159 | + or "No available memory for the cache blocks" in str(e) |
| 160 | + or "CUDA error: out of memory" in str(e)): |
| 161 | + outputs = Output(code=507, message=str(e)) |
| 162 | + else: |
| 163 | + outputs = Output().error(str(e)) |
| 164 | + |
| 165 | + outputs.send(self.cl_socket) |
| 166 | + logging.debug("Outputs is sent to DJL engine.") |
| 167 | + try: |
| 168 | + outputs.execute_finalize() |
| 169 | + except Exception as e: |
| 170 | + logging.exception(f"Failed on finalize function: {e}") |
0 commit comments