remove: serialization (#1)

IgnatovFedor · web-flow · commit c8264bf82eaa · 2022-09-12T11:25:55.000+03:00
diff --git a/deeppavlov/core/commands/infer.py b/deeppavlov/core/commands/infer.py
@@ -29,14 +29,10 @@
 
 
 def build_model(config: Union[str, Path, dict], mode: str = 'infer',
-                load_trained: bool = False, download: bool = False,
-                serialized: Optional[bytes] = None) -> Chainer:
+                load_trained: bool = False, download: bool = False) -> Chainer:
     """Build and return the model described in corresponding configuration file."""
     config = parse_config(config)
 
-    if serialized:
-        serialized: list = pickle.loads(serialized)
-
     if download:
         deep_download(config)
 
@@ -54,12 +50,7 @@ def build_model(config: Union[str, Path, dict], mode: str = 'infer',
                 log.warning('No "save_path" parameter for the {} component, so "load_path" will not be renewed'
                             .format(component_config.get('class_name', component_config.get('ref', 'UNKNOWN'))))
 
-        if serialized and 'in' in component_config:
-            component_serialized = serialized.pop(0)
-        else:
-            component_serialized = None
-
-        component = from_params(component_config, mode=mode, serialized=component_serialized)
+        component = from_params(component_config, mode=mode)
 
         if 'id' in component_config:
             model._components_dict[component_config['id']] = component
diff --git a/deeppavlov/core/common/chainer.py b/deeppavlov/core/common/chainer.py
@@ -299,16 +299,3 @@ def destroy(self):
         if hasattr(self, 'pipe'):
             self.pipe.clear()
         super().destroy()
-
-    def serialize(self) -> bytes:
-        data = []
-        for in_params, out_params, component in self.train_pipe:
-            serialized = component.serialize() if isinstance(component, Component) else None
-            data.append(serialized)
-        return pickle.dumps(data, protocol=4)
-
-    def deserialize(self, data: bytes) -> None:
-        data = pickle.loads(data)
-        for (in_params, out_params, component), component_data in zip(self.train_pipe, data):
-            if isinstance(component, Component):
-                component.deserialize(component_data)
diff --git a/deeppavlov/core/common/params.py b/deeppavlov/core/common/params.py
@@ -55,18 +55,15 @@ def _init_param(param, mode):
     return param
 
 
-def from_params(params: Dict, mode: str = 'infer', serialized: Any = None, **kwargs) -> Union[Component, FunctionType]:
+def from_params(params: Dict, mode: str = 'infer', **kwargs) -> Union[Component, FunctionType]:
     """Builds and returns the Component from corresponding dictionary of parameters."""
     # what is passed in json:
     config_params = {k: _resolve(v) for k, v in params.items()}
 
     # get component by reference (if any)
     if 'ref' in config_params:
         try:
-            component = _refs[config_params['ref']]
-            if serialized is not None:
-                component.deserialize(serialized)
-            return component
+            return _refs[config_params['ref']]
         except KeyError:
             e = ConfigError('Component with id "{id}" was referenced but not initialized'
                             .format(id=config_params['ref']))
@@ -78,7 +75,7 @@ def from_params(params: Dict, mode: str = 'infer', serialized: Any = None, **kwa
         refs = _refs.copy()
         _refs.clear()
         config = parse_config(expand_path(config_params['config_path']), config_params.get('overwrite'))
-        model = build_model(config, serialized=serialized)
+        model = build_model(config)
         _refs.clear()
         _refs.update(refs)
         try:
@@ -97,7 +94,6 @@ def from_params(params: Dict, mode: str = 'infer', serialized: Any = None, **kwa
     if inspect.isclass(obj):
         # find the submodels params recursively
         config_params = {k: _init_param(v, mode) for k, v in config_params.items()}
-
         try:
             spec = inspect.getfullargspec(obj)
             if 'mode' in spec.args + spec.kwonlyargs or spec.varkw is not None:
@@ -111,9 +107,6 @@ def from_params(params: Dict, mode: str = 'infer', serialized: Any = None, **kwa
         except Exception:
             log.exception("Exception in {}".format(obj))
             raise
-
-        if serialized is not None:
-            component.deserialize(serialized)
     else:
         component = obj
 
diff --git a/deeppavlov/core/data/simple_vocab.py b/deeppavlov/core/data/simple_vocab.py
@@ -105,9 +105,6 @@ def save(self):
                 cnt = self.freqs[token]
                 f.write('{}\t{:d}\n'.format(token, cnt))
 
-    def serialize(self) -> List[Tuple[str, int]]:
-        return [(token, self.freqs[token]) for token in self._i2t]
-
     def load(self):
         self.reset()
         if self.load_path:
@@ -125,12 +122,6 @@ def load(self):
         else:
             raise ConfigError("`load_path` for {} is not provided!".format(self))
 
-    def deserialize(self, data: List[Tuple[str, int]]) -> None:
-        self.reset()
-        if data:
-            tokens, counts = zip(*data)
-            self._add_tokens_with_freqs(tokens, counts)
-
     def load_line(self, ln):
         if self.freq_drop_load:
             token = ln.strip().split()[0]
diff --git a/deeppavlov/core/models/component.py b/deeppavlov/core/models/component.py
@@ -36,17 +36,3 @@ def destroy(self):
             if hasattr(attr, 'destroy'):
                 attr.destroy()
             delattr(self, attr_name)
-
-    def serialize(self):
-        from deeppavlov.core.models.serializable import Serializable
-        if isinstance(self, Serializable):
-            log.warning(f'Method for {self.__class__.__name__} serialization is not implemented!'
-                        f' Will not be able to load without using load_path')
-        return None
-
-    def deserialize(self, data):
-        from deeppavlov.core.models.serializable import Serializable
-        if isinstance(self, Serializable):
-            log.warning(f'Method for {self.__class__.__name__} deserialization is not implemented!'
-                        f' Please, use traditional load_path for this component')
-        pass
diff --git a/deeppavlov/core/models/serializable.py b/deeppavlov/core/models/serializable.py
@@ -23,10 +23,7 @@
 
 
 class Serializable(metaclass=ABCMeta):
-    """
-    :class:`deeppavlov.models.model.serializable.Serializable` is an abstract base class that expresses the interface
-    for all models that can serialize data to a path.
-    """
+    """Abstract base class that expresses the interface for all models that can serialize data to a path."""
 
     def __init__(self, save_path: Optional[Union[str, Path]], load_path: Optional[Union[str, Path]] = None,
                  mode: str = 'infer',
diff --git a/tests/test_quick_start.py b/tests/test_quick_start.py
@@ -46,10 +46,9 @@
 
 TEST_MODES = ['IP',  # test_inferring_pretrained_model
               'TI',  # test_consecutive_training_and_inferring
-              'SR',  # test_serialization
               ]
 
-ALL_MODES = ('IP', 'TI', 'SR')
+ALL_MODES = ('IP', 'TI')
 
 ONE_ARGUMENT_INFER_CHECK = ('Dummy text', None)
 TWO_ARGUMENTS_INFER_CHECK = ('Dummy text', 'Dummy text', None)
@@ -376,11 +375,6 @@ def teardown_module():
         shutil.rmtree(str(cache_dir), ignore_errors=True)
 
 
-def _serialize(config):
-    chainer = build_model(config, download=True)
-    return chainer.serialize()
-
-
 def _infer(config, inputs, download=False):
     chainer = build_model(config, download=download)
     if inputs:
@@ -392,18 +386,6 @@ def _infer(config, inputs, download=False):
     return prediction
 
 
-def _deserialize(config, raw_bytes, examples):
-    chainer = build_model(config, serialized=raw_bytes)
-    for *query, expected_response in examples:
-        query = [[q] for q in query]
-        actual_response = chainer(*query)
-        if expected_response is not None:
-            if actual_response is not None and len(actual_response) > 0:
-                actual_response = actual_response[0]
-            assert expected_response == str(actual_response), \
-                f"Error in interacting with {model_dir} ({conf_file}): {query}"
-
-
 @pytest.mark.parametrize("model,conf_file,model_dir,mode", TEST_GRID, scope='class')
 class TestQuickStart(object):
     @staticmethod
@@ -555,28 +537,6 @@ def test_inferring_pretrained_model_socket(self, model, conf_file, model_dir, mo
         else:
             pytest.skip(f"Unsupported mode: {mode}")
 
-    def test_serialization(self, model, conf_file, model_dir, mode):
-        if 'SR' not in mode:
-            return pytest.skip("Unsupported mode: {}".format(mode))
-
-        config_file_path = test_configs_path / conf_file
-
-        with ProcessPoolExecutor(max_workers=1) as executor:
-            f = executor.submit(_serialize, config_file_path)
-        raw_bytes = f.result()
-
-        serialized: list = pickle.loads(raw_bytes)
-        if not any(serialized):
-            pytest.skip("Serialization not supported: {}".format(conf_file))
-            return
-        serialized.clear()
-
-        with ProcessPoolExecutor(max_workers=1) as executor:
-            f = executor.submit(_deserialize, config_file_path, raw_bytes, PARAMS[model][(conf_file, model_dir, mode)])
-
-        exc = f.exception()
-        if exc is not None:
-            raise exc
 
     def test_consecutive_training_and_inferring(self, model, conf_file, model_dir, mode):
         if 'TI' in mode: