diff --git a/.gitignore b/.gitignore index f4876f8..58c271d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,12 @@ # Python cache *.pyc +# Ignore Results +*.pb +*.pbtxt +*.mat +.tmp/ + +# Ignore Development setting +.vscode/ +.ipynb_checkpoints/ diff --git a/README.md b/README.md index f1ce6be..21d3bf6 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,24 @@ Convert [Caffe](https://github.com/BVLC/caffe/) models to [TensorFlow](https://github.com/tensorflow/tensorflow). +## Usage with standalone model + +### 1 - Install caffe-tensorflow + git clone https://github.com/linkfluence/caffe-tensorflow + # Optional: create a Python 2.7 env and activate it + # This fork has only be tested with Python 2.7 + +### 2 - (Optional) Switch to Tensorflow CPU +You might bump into memory issues if you don't have enough memory. In this case just uninstall `tensorflow-gpu` and install `tensorflow` + +### 3 - Convert your model + python convert.py --caffemodel ./model.caffemodel ./model.prototxt --data-output-path ./output.mat --code-output-path ./output.py --standalone-output-path ./standalone.pb` + +### 4 - (Optional) Re-install Tensorflow GPU + +### 5- Use the standalone.pb file +It contains the weights and the architecture of the network. + ## Usage Run `convert.py` to convert an existing Caffe model to TensorFlow. @@ -13,6 +31,8 @@ The output consists of two files: 1. A data file (in NumPy's native format) containing the model's learned parameters. 2. A Python class that constructs the model's graph. +Alternatively, you can save a standalone GraphDef model file containing the model's graph and learned parameters. + ### Examples See the [examples](examples/) folder for more details. diff --git a/convert.py b/convert.py index 30dcd4a..e5fd09d 100755 --- a/convert.py +++ b/convert.py @@ -2,8 +2,14 @@ import os import sys -import numpy as np import argparse + +import shutil +import pickle +import tensorflow as tf +from tensorflow.python.tools.freeze_graph import freeze_graph +from tensorflow.python.tools import optimize_for_inference_lib + from kaffe import KaffeError, print_stderr from kaffe.tensorflow import TensorFlowTransformer @@ -16,25 +22,113 @@ def fatal_error(msg): def validate_arguments(args): if (args.data_output_path is not None) and (args.caffemodel is None): fatal_error('No input data path provided.') - if (args.caffemodel is not None) and (args.data_output_path is None): + if (args.caffemodel is not None) and (args.data_output_path is None) and \ + (args.standalone_output_path is None): fatal_error('No output data path provided.') - if (args.code_output_path is None) and (args.data_output_path is None): + if (args.code_output_path is None) and (args.data_output_path is None) and \ + (args.standalone_output_path is None): fatal_error('No output path specified.') -def convert(def_path, caffemodel_path, data_output_path, code_output_path, phase): +def convert(def_path, caffemodel_path, data_output_path, code_output_path, standalone_output_path, + phase, freeze): try: + sess = tf.InteractiveSession() transformer = TensorFlowTransformer(def_path, caffemodel_path, phase=phase) print_stderr('Converting data...') - if caffemodel_path is not None: + if data_output_path is not None: data = transformer.transform_data() print_stderr('Saving data...') - with open(data_output_path, 'wb') as data_out: - np.save(data_out, data) - if code_output_path: + with open(data_output_path, 'wb') as handle: + pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL) + if code_output_path is not None: print_stderr('Saving source...') with open(code_output_path, 'wb') as src_out: src_out.write(transformer.transform_source()) + + if standalone_output_path: + filename, _ = os.path.splitext(os.path.basename(standalone_output_path)) + temp_folder = os.path.join(os.path.dirname(standalone_output_path), '.tmp') + if not os.path.exists(temp_folder): + os.makedirs(temp_folder) + shutil.rmtree(temp_folder) # Delete old graphs + + if data_output_path is None: + data = transformer.transform_data() + print_stderr('Saving data...') + data_output_path = os.path.join(temp_folder, filename) + '.npy' + with open(data_output_path, 'wb') as handle: + pickle.dump(data, handle, protocol=pickle.HIGHEST_PROTOCOL) + + if code_output_path is None: + print_stderr('Saving source...') + code_output_path = os.path.join(temp_folder, filename) + '.py' + with open(code_output_path, 'wb') as src_out: + src_out.write(transformer.transform_source()) + + checkpoint_path = os.path.join(temp_folder, filename + '.ckpt') + graph_name = os.path.basename(standalone_output_path) + graph_folder = os.path.dirname(standalone_output_path) + input_node = transformer.graph.nodes[0].name + output_node = transformer.graph.nodes[-1].name + tensor_shape = transformer.graph.get_node(input_node).output_shape + tensor_shape_list = [tensor_shape.batch_size, tensor_shape.height, + tensor_shape.width, tensor_shape.channels] + + sys.path.append(os.path.dirname(code_output_path)) + module = os.path.splitext(os.path.basename(code_output_path))[0] + class_name = transformer.graph.name + KaffeNet = getattr(__import__(module), class_name) + + data_placeholder = tf.compat.v1.placeholder( + tf.float32, tensor_shape_list, name=input_node) + net = KaffeNet({input_node: data_placeholder}) + + # load weights stored in numpy format + net.load(data_output_path, sess) + + print_stderr('Saving checkpoint...') + saver = tf.compat.v1.train.Saver() + saver.save(sess, checkpoint_path) + + print_stderr('Saving graph definition as protobuf...') + tf.io.write_graph(sess.graph.as_graph_def(), graph_folder, graph_name, False) + writer = tf.compat.v1.summary.FileWriter('.tmp', sess.graph) + writer.close() + + input_graph_path = standalone_output_path + input_saver_def_path = "" + input_binary = True + input_checkpoint_path = checkpoint_path + output_node_names = output_node + restore_op_name = 'save/restore_all' + filename_tensor_name = 'save/Const:0' + output_graph_path = standalone_output_path + clear_devices = True + + print_stderr('Saving standalone model...') + output_node_names = '{0}/{0}'.format(output_node_names) + if freeze == 'freeze_graph': + freeze_graph(input_graph_path, input_saver_def_path, + input_binary, input_checkpoint_path, + output_node_names, restore_op_name, + filename_tensor_name, output_graph_path, + clear_devices, '') + elif freeze == 'optimize_for_inference': + graph_def = sess.graph.as_graph_def() + graph_def = tf.graph_util.convert_variables_to_constants( + sess, graph_def, [output_node_names]) + graph_def_f32 = optimize_for_inference_lib.optimize_for_inference( + graph_def, ['data'], [output_node_names], tf.float32.as_datatype_enum) + tf.train.write_graph( + graph_def_f32, "", standalone_output_path.rsplit('.',1)[0] + '.pb', as_text=False) + tf.train.write_graph( + graph_def_f32, "", standalone_output_path.rsplit('.',1)[0] + '.pbtxt', as_text=True) + + #f = shutil.rmtree(temp_folder) + writer = tf.compat.v1.summary.FileWriter('.tmp', sess.graph) + writer.close() + print_stderr('Done.') except KaffeError as err: fatal_error('Error encountered: {}'.format(err)) @@ -46,14 +140,21 @@ def main(): parser.add_argument('--caffemodel', help='Model data (.caffemodel) path') parser.add_argument('--data-output-path', help='Converted data output path') parser.add_argument('--code-output-path', help='Save generated source to this path') + parser.add_argument('--standalone-output-path', + help='Save generated standalone tensorflow model to this path') parser.add_argument('-p', '--phase', default='test', help='The phase to convert: test (default) or train') + parser.add_argument('-fz', + '--freeze', + default=None, + help="""Freeze option for inference: No (default), + freeze_graph or optimize_for_inference(e.g. for OpenCV)""") args = parser.parse_args() validate_arguments(args) convert(args.def_path, args.caffemodel, args.data_output_path, args.code_output_path, - args.phase) + args.standalone_output_path, args.phase, args.freeze) if __name__ == '__main__': diff --git a/examples/mnist/README.md b/examples/mnist/README.md index bdda642..cc0ba7f 100644 --- a/examples/mnist/README.md +++ b/examples/mnist/README.md @@ -34,3 +34,11 @@ with tf.Session() as sesh: # Forward pass output = sesh.run(net.get_output(), ...) ``` + +#### Standalone model file: + +You can save a standalone GraphDef model file as follows: + + $ ./convert.py examples/mnist/lenet.prototxt --caffemodel examples/mnist/lenet_iter_10000.caffemodel --standalone-output-path=mynet.pb + +This generates a protobuf file named `mynet.pb` containing the model's graph and parameters. The [TensorFlow Image Recognition tutorial](https://www.tensorflow.org/versions/r0.11/tutorials/image_recognition/index.html) shows how to use models constructed in this way in [Python](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/models/image/imagenet) or [C++](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/label_image). \ No newline at end of file diff --git a/kaffe/caffe/caffepb.py b/kaffe/caffe/caffe_pb2.py similarity index 100% rename from kaffe/caffe/caffepb.py rename to kaffe/caffe/caffe_pb2.py diff --git a/kaffe/caffe/resolver.py b/kaffe/caffe/resolver.py index b9580a7..3de3ea2 100644 --- a/kaffe/caffe/resolver.py +++ b/kaffe/caffe/resolver.py @@ -14,8 +14,8 @@ def import_caffe(self): self.caffe = caffe except ImportError: # Fall back to the protobuf implementation - from . import caffepb - self.caffepb = caffepb + from . import caffe_pb2 + self.caffepb = caffe_pb2 show_fallback_warning() if self.caffe: # Use the protobuf code from the imported distribution. diff --git a/kaffe/graph.py b/kaffe/graph.py index bec2b3a..045c4d8 100644 --- a/kaffe/graph.py +++ b/kaffe/graph.py @@ -119,7 +119,12 @@ def __str__(self): for node in self.topologically_sorted(): # If the node has learned parameters, display the first one's shape. # In case of convolutions, this corresponds to the weights. - data_shape = node.data[0].shape if node.data else '--' + if node.data is None: + data_shape = '--' + elif isinstance(node.data, dict): + data_shape = node.data['weights'].shape#'dict({})'.format(node.data.keys()) + else: + data_shape = node.data[0].shape out_shape = node.output_shape or '--' s.append('{:<20} {:<30} {:>20} {:>20}'.format(node.kind, node.name, data_shape, tuple(out_shape))) diff --git a/kaffe/layers.py b/kaffe/layers.py index c3c5955..bdcf26f 100644 --- a/kaffe/layers.py +++ b/kaffe/layers.py @@ -38,6 +38,7 @@ 'Pooling': shape_pool, 'Power': shape_identity, 'ReLU': shape_identity, + 'PReLU': shape_identity, 'Scale': shape_identity, 'Sigmoid': shape_identity, 'SigmoidCrossEntropyLoss': shape_scalar, @@ -81,7 +82,7 @@ class NodeDispatch(object): @staticmethod def get_handler_name(node_kind): - if len(node_kind) <= 4: + if len(node_kind) <= 4 or node_kind == 'PReLU': # A catch-all for things like ReLU and tanh return node_kind.lower() # Convert from CamelCase to under_scored diff --git a/kaffe/tensorflow/network.py b/kaffe/tensorflow/network.py index 6f3b153..a9c1bf0 100644 --- a/kaffe/tensorflow/network.py +++ b/kaffe/tensorflow/network.py @@ -1,6 +1,11 @@ import numpy as np +import pickle import tensorflow as tf +from tensorflow.python.framework import ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops + DEFAULT_PADDING = 'SAME' @@ -41,9 +46,9 @@ def __init__(self, inputs, trainable=True): # If true, the resulting variables are set as trainable self.trainable = trainable # Switch variable for dropout - self.use_dropout = tf.placeholder_with_default(tf.constant(1.0), - shape=[], - name='use_dropout') + self.use_dropout = tf.compat.v1.placeholder_with_default(tf.constant(1.0), + shape=[], + name='use_dropout') self.setup() def setup(self): @@ -56,16 +61,26 @@ def load(self, data_path, session, ignore_missing=False): session: The current TensorFlow session ignore_missing: If true, serialized weights for missing layers are ignored. ''' - data_dict = np.load(data_path).item() + with open(data_path, 'rb') as handle: + data_dict = pickle.load(handle) for op_name in data_dict: - with tf.variable_scope(op_name, reuse=True): - for param_name, data in data_dict[op_name].iteritems(): + with tf.compat.v1.variable_scope(op_name, reuse=True): + # TODO not sure why name mapping does not work + if 'relu' in op_name: try: - var = tf.get_variable(param_name) - session.run(var.assign(data)) + var = tf.compat.v1.get_variable(op_name) + session.run(var.assign(data_dict[op_name][0])) except ValueError: if not ignore_missing: raise + else: + for param_name, data in data_dict[op_name].iteritems(): + try: + var = tf.compat.v1.get_variable(param_name) + session.run(var.assign(data)) + except ValueError: + if not ignore_missing: + raise def feed(self, *args): '''Set the input(s) for the next operation by replacing the terminal nodes. @@ -95,15 +110,34 @@ def get_unique_name(self, prefix): def make_var(self, name, shape): '''Creates a new TensorFlow variable.''' - return tf.get_variable(name, shape, trainable=self.trainable) + return tf.compat.v1.get_variable(name, shape, trainable=self.trainable) def validate_padding(self, padding): '''Verifies that the padding is one of the supported ones.''' assert padding in ('SAME', 'VALID') + def prelu_layer(self, x, weights, biases, name=None): + """Computes PRelu(x * weight + biases). + Args: + x: a 2D tensor. Dimensions typically: batch, in_units + weights: a 2D tensor. Dimensions typically: in_units, out_units + biases: a 1D tensor. Dimensions: out_units + name: A name for the operation (optional). If not specified + "nn_prelu_layer" is used. + Returns: + A 2-D Tensor computing prelu(matmul(x, weights) + biases). + Dimensions typically: batch, out_units. + """ + with ops.name_scope(name, "prelu_layer", [x, weights, biases]) as name: + x = ops.convert_to_tensor(x, name="x") + weights = ops.convert_to_tensor(weights, name="weights") + biases = ops.convert_to_tensor(biases, name="biases") + xw_plus_b = nn_ops.bias_add(math_ops.matmul(x, weights), biases) + return self.parametric_relu(xw_plus_b, name=name) + @layer def conv(self, - input, + inputs, k_h, k_w, c_o, @@ -111,26 +145,27 @@ def conv(self, s_w, name, relu=True, + prelu=False, padding=DEFAULT_PADDING, group=1, biased=True): # Verify that the padding is acceptable self.validate_padding(padding) # Get the number of channels in the input - c_i = input.get_shape()[-1] + c_i = inputs.get_shape()[-1] # Verify that the grouping parameter is valid assert c_i % group == 0 assert c_o % group == 0 # Convolution for a given input and kernel convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) - with tf.variable_scope(name) as scope: + with tf.compat.v1.variable_scope(name) as scope: kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o]) if group == 1: # This is the common-case. Convolve the input without any further complications. - output = convolve(input, kernel) + output = convolve(inputs, kernel) else: # Split the input into groups and then convolve each of them independently - input_groups = tf.split(3, group, input) + input_groups = tf.split(3, group, inputs) kernel_groups = tf.split(3, group, kernel) output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] # Concatenate the groups @@ -142,33 +177,66 @@ def conv(self, if relu: # ReLU non-linearity output = tf.nn.relu(output, name=scope.name) + elif prelu: + output = self.parametric_relu(output, scope=scope) return output @layer - def relu(self, input, name): - return tf.nn.relu(input, name=name) + def relu(self, x, name): + return tf.nn.relu(x, name=name) + + @layer + def prelu(self, x, name): + return self.parametric_relu(x, name=name) + + def parametric_relu(self, x, scope=None, name="PReLU"): + """ PReLU. + + Parametric Rectified Linear Unit. Base on: + https://github.com/tflearn/tflearn/blob/5c23566de6e614a36252a5828d107d001a0d0482/tflearn/activations.py#L188 + + Arguments: + x: A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`, + `int16`, or `int8`. + name: A name for this activation op (optional). + + Returns: + A `Tensor` with the same type as `x`. + """ + #tf.zeros(x.shape, dtype=dtype) + with tf.compat.v1.variable_scope(scope, default_name=name, values=[x]) as scope: + #W_init=tf.constant_initializer(0.0) + #alphas = tf.compat.v1.get_variable(name="alphas", shape=x.get_shape()[-1], + # initializer=W_init, + # dtype=tf.float32) + alphas = self.make_var(name, x.get_shape()[-1]) + x = tf.nn.relu(x) + tf.multiply(alphas, (x - tf.abs(x))) * 0.5 + + x.scope = scope + x.alphas = alphas + return x @layer - def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): + def max_pool(self, x, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): self.validate_padding(padding) - return tf.nn.max_pool(input, - ksize=[1, k_h, k_w, 1], - strides=[1, s_h, s_w, 1], - padding=padding, - name=name) + return tf.nn.max_pool2d(x, + ksize=[1, k_h, k_w, 1], + strides=[1, s_h, s_w, 1], + padding=padding, + name=name) @layer - def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): + def avg_pool(self, x, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): self.validate_padding(padding) - return tf.nn.avg_pool(input, + return tf.nn.avg_pool(x, ksize=[1, k_h, k_w, 1], strides=[1, s_h, s_w, 1], padding=padding, name=name) @layer - def lrn(self, input, radius, alpha, beta, name, bias=1.0): - return tf.nn.local_response_normalization(input, + def lrn(self, x, radius, alpha, beta, name, bias=1.0): + return tf.nn.local_response_normalization(x, depth_radius=radius, alpha=alpha, beta=beta, @@ -177,55 +245,60 @@ def lrn(self, input, radius, alpha, beta, name, bias=1.0): @layer def concat(self, inputs, axis, name): - return tf.concat(concat_dim=axis, values=inputs, name=name) + return tf.concat(values=inputs, axis=axis, name=name) @layer def add(self, inputs, name): return tf.add_n(inputs, name=name) @layer - def fc(self, input, num_out, name, relu=True): - with tf.variable_scope(name) as scope: - input_shape = input.get_shape() + def fc(self, x, num_out, name, relu=True, prelu=False): + with tf.compat.v1.variable_scope(name) as scope: + input_shape = x.get_shape() if input_shape.ndims == 4: # The input is spatial. Vectorize it first. dim = 1 for d in input_shape[1:].as_list(): dim *= d - feed_in = tf.reshape(input, [-1, dim]) + feed_in = tf.reshape(x, [-1, dim]) else: - feed_in, dim = (input, input_shape[-1].value) + feed_in, dim = (x, input_shape[-1].value) weights = self.make_var('weights', shape=[dim, num_out]) biases = self.make_var('biases', [num_out]) - op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b + if relu: + op = tf.nn.relu_layer + elif prelu: + op = self.prelu_layer + else: + op = tf.compat.v1.nn.xw_plus_b fc = op(feed_in, weights, biases, name=scope.name) return fc @layer - def softmax(self, input, name): - input_shape = map(lambda v: v.value, input.get_shape()) + def softmax(self, x, name): + input_shape = map(lambda v: v.value, x.get_shape()) if len(input_shape) > 2: # For certain models (like NiN), the singleton spatial dimensions # need to be explicitly squeezed, since they're not broadcast-able # in TensorFlow's NHWC ordering (unlike Caffe's NCHW). if input_shape[1] == 1 and input_shape[2] == 1: - input = tf.squeeze(input, squeeze_dims=[1, 2]) + x = tf.squeeze(x, squeeze_dims=[1, 2]) else: raise ValueError('Rank 2 tensor input expected for softmax!') - return tf.nn.softmax(input, name=name) + return tf.nn.softmax(x, name=name) @layer - def batch_normalization(self, input, name, scale_offset=True, relu=False): + def batch_normalization(self, x, name, scale_offset=True, relu=False, prelu=False): # NOTE: Currently, only inference is supported - with tf.variable_scope(name) as scope: - shape = [input.get_shape()[-1]] + with tf.compat.v1.variable_scope(name) as scope: + shape = [x.get_shape()[-1]] if scale_offset: scale = self.make_var('scale', shape=shape) offset = self.make_var('offset', shape=shape) else: scale, offset = (None, None) output = tf.nn.batch_normalization( - input, + x, mean=self.make_var('mean', shape=shape), variance=self.make_var('variance', shape=shape), offset=offset, @@ -236,9 +309,11 @@ def batch_normalization(self, input, name, scale_offset=True, relu=False): name=name) if relu: output = tf.nn.relu(output) + elif prelu: + output = self.parametric_relu(output, name=scope.name) return output @layer - def dropout(self, input, keep_prob, name): + def dropout(self, x, keep_prob, name): keep = 1 - self.use_dropout + (self.use_dropout * keep_prob) - return tf.nn.dropout(input, keep, name=name) + return tf.nn.dropout(x, keep, name=name) diff --git a/kaffe/tensorflow/transformer.py b/kaffe/tensorflow/transformer.py index 34bfc9a..2fdb96e 100644 --- a/kaffe/tensorflow/transformer.py +++ b/kaffe/tensorflow/transformer.py @@ -3,7 +3,7 @@ from ..errors import KaffeError, print_stderr from ..graph import GraphBuilder, NodeMapper from ..layers import NodeKind -from ..transformers import (DataInjector, DataReshaper, NodeRenamer, ReLUFuser, +from ..transformers import (DataInjector, DataReshaper, NodeRenamer, ReLUFuser, PReLUFuser, BatchNormScaleBiasFuser, BatchNormPreprocessor, ParameterNamer) from . import network @@ -69,6 +69,8 @@ def __init__(self, node, default=True): self.inject_kwargs = {} if node.metadata.get('relu', False) != default: self.inject_kwargs['relu'] = not default + if node.metadata.get('prelu'): + self.inject_kwargs['prelu'] = node.metadata.get('prelu') def __call__(self, *args, **kwargs): kwargs.update(self.inject_kwargs) @@ -103,6 +105,9 @@ def map_convolution(self, node): def map_relu(self, node): return TensorFlowNode('relu') + + def map_prelu(self, node): + return TensorFlowNode('prelu') def map_pooling(self, node): pool_type = node.parameters.pool @@ -229,7 +234,7 @@ def load(self, def_path, data_path, phase): if data_path is not None: # Load and associate learned parameters graph = DataInjector(def_path, data_path)(graph) - + # Transform the graph transformers = [ # Fuse split batch normalization layers @@ -240,6 +245,8 @@ def load(self, def_path, data_path, phase): # any arbitrary operation to be optionally activated. ReLUFuser(allowed_parent_types=[NodeKind.Convolution, NodeKind.InnerProduct, NodeKind.BatchNorm]), + #PReLUFuser(allowed_parent_types=[NodeKind.Convolution, NodeKind.InnerProduct, + # NodeKind.BatchNorm]), # Rename nodes # Slashes are used for scoping in TensorFlow. Replace slashes @@ -263,7 +270,10 @@ def transform_data(self): NodeKind.Convolution: (2, 3, 1, 0), # (c_o, c_i) -> (c_i, c_o) - NodeKind.InnerProduct: (1, 0) + NodeKind.InnerProduct: (1, 0), + + # one dimensional + NodeKind.PReLU: (0) }), # Pre-process batch normalization data @@ -281,5 +291,7 @@ def transform_source(self): mapper = TensorFlowMapper(self.graph) chains = mapper.map() emitter = TensorFlowEmitter() + if not self.graph.name: + self.graph.name = 'MyNet' self.source = emitter.emit(self.graph.name, chains) return self.source diff --git a/kaffe/transformers.py b/kaffe/transformers.py index cd8a07d..44c54d4 100644 --- a/kaffe/transformers.py +++ b/kaffe/transformers.py @@ -51,7 +51,7 @@ def load_using_pb(self): def normalize_pb_data(self, layer): transformed = [] for blob in layer.blobs: - if len(blob.shape.dim): + if blob.shape.dim: dims = blob.shape.dim c_o, c_i, h, w = map(int, [1] * (4 - len(dims)) + list(dims)) else: @@ -122,6 +122,7 @@ def __call__(self, graph): # Check for 2+ dimensional data if any(len(tensor.shape) > 1 for tensor in node.data): print_stderr('Warning: parmaters not reshaped for node: {}'.format(node)) + print('Some infos', node.kind, self.reshaped_node_types) continue transpose_order = self.map(node.kind) weights = node.data[0] @@ -205,6 +206,20 @@ def is_eligible_pair(self, parent, child): def merge(self, parent, _): parent.metadata['relu'] = True +class PReLUFuser(SubNodeFuser): + """ Fuses parametric rectified linear units with their parent nodes. + See ReLUFuser as reference + """ + + def __init__(self, allowed_parent_types=None): + self.allowed_parent_types = allowed_parent_types + + def is_eligible_pair(self, parent, child): + return ((self.allowed_parent_types is None or parent.kind in self.allowed_parent_types) and + child.kind == NodeKind.PReLU) + + def merge(self, parent, _): + parent.metadata['prelu'] = True class BatchNormScaleBiasFuser(SubNodeFuser): ''' @@ -282,6 +297,27 @@ def __call__(self, graph): names = ('mean', 'variance') if len(node.data) == 4: names += ('scale', 'offset') + elif node.kind == NodeKind.PReLU: + names = ('weights',) + # TODO Not sure how to handle PReLUParameter shapes + # Missing example caffe model to test + # https://caffe.berkeleyvision.org/tutorial/layers/prelu.html + + continue + # optional FillerParameter filler = 1; + if node.parameters.filler: #caffe_pb2.FillerParameter + print(node.parameters.filler.type) + print(node.parameters.filler.value) + print(node.parameters.filler.min) + print(node.parameters.filler.max) + print(node.parameters.filler.mean) + print(node.parameters.filler.std) + print(node.parameters.filler.sparse) + print(node.parameters.filler.variance_norm) + + # optional bool channel_shared = 2 [default = false]; + if node.parameters.channel_shared: # type bool + print(node.parameters.channel_shared) else: print_stderr('WARNING: Unhandled parameters: {}'.format(node.kind)) continue