|
13 | 13 | # See the License for the specific language governing permissions and |
14 | 14 | # limitations under the License. |
15 | 15 | import logging |
16 | | -import os |
17 | 16 |
|
18 | 17 | import torch |
19 | 18 | import torch.nn.functional as F |
@@ -98,15 +97,6 @@ def __init__( |
98 | 97 |
|
99 | 98 | self.model_cls = model_cls |
100 | 99 | self.model = None |
101 | | - self.is_compiled = False |
102 | | - self.serialize_base_path = None |
103 | | - |
104 | | - base_compile_work_dir = os.environ.get("BASE_COMPILE_WORK_DIR", "/tmp/nxd_model/") |
105 | | - self.compiler_workdir = os.path.join(base_compile_work_dir, self.tag) |
106 | | - |
107 | | - def load_state_dict(self, state_dict, strict: bool = True, assign: bool = False): |
108 | | - self.model = self.model_cls(self.config, self.neuron_config) |
109 | | - self.model.load_state_dict(state_dict, strict=strict, assign=assign) |
110 | 100 |
|
111 | 101 | def input_generator( |
112 | 102 | self, |
@@ -226,10 +216,6 @@ def pad_to_max_compiled_seq(self, *args): |
226 | 216 |
|
227 | 217 | return args |
228 | 218 |
|
229 | | - def _get_async_output(self, ranked_async_tensor): |
230 | | - outputs = [[async_tensor[0].cpu()] for async_tensor in ranked_async_tensor] |
231 | | - return outputs[0][0] |
232 | | - |
233 | 219 | def forward(self, input_ids, attention_mask, position_ids, seq_ids, sampling_params): |
234 | 220 | input_ids, attention_mask, position_ids, seq_ids = self.convert_int64_to_int32( |
235 | 221 | input_ids, attention_mask, position_ids, seq_ids |
|
0 commit comments