Include id to features

Neural-Link Team · tensorflow-copybara · commit 596b71019308 · 2020-05-07T15:03:42.000-07:00
PiperOrigin-RevId: 310444078
diff --git a/neural_structured_learning/examples/graph_keras_mlp_cora.py b/neural_structured_learning/examples/graph_keras_mlp_cora.py
@@ -125,13 +125,16 @@ def parse_example(example_proto):
         nbr_feature_key = '{}{}_{}'.format(NBR_FEATURE_PREFIX, i, 'words')
         nbr_weight_key = '{}{}{}'.format(NBR_FEATURE_PREFIX, i,
                                          NBR_WEIGHT_SUFFIX)
+        nbr_id_key = '{}{}_{}'.format(NBR_FEATURE_PREFIX, i, 'id')
         feature_spec[nbr_feature_key] = tf.io.FixedLenFeature(
             [hparams.max_seq_length],
             tf.int64,
             default_value=tf.constant(
                 0, dtype=tf.int64, shape=[hparams.max_seq_length]))
         feature_spec[nbr_weight_key] = tf.io.FixedLenFeature(
             [1], tf.float32, default_value=tf.constant([0.0]))
+        feature_spec[nbr_id_key] = tf.io.FixedLenFeature(
+            (), tf.string, default_value='')
 
     features = tf.io.parse_single_example(example_proto, feature_spec)
 
diff --git a/neural_structured_learning/examples/preprocess/cora/preprocess_cora_dataset.py b/neural_structured_learning/examples/preprocess/cora/preprocess_cora_dataset.py
@@ -95,6 +95,12 @@ def _int64_feature(*value):
   return tf.train.Feature(int64_list=tf.train.Int64List(value=list(value)))
 
 
+def _bytes_feature(value):
+  """Returns bytes tf.train.Feature from a string."""
+  return tf.train.Feature(
+      bytes_list=tf.train.BytesList(value=[value.encode('utf-8')]))
+
+
 def parse_cora_content(in_file, train_percentage):
   """Converts the Cora content (in TSV) to `tf.train.Example` instances.
 
@@ -132,13 +138,14 @@ def parse_cora_content(in_file, train_percentage):
       entries = line.rstrip('\n').split('\t')
       # entries contains [ID, Word1, Word2, ..., Label]; 'Words' are 0/1 values.
       words = map(int, entries[1:-1])
+      example_id = entries[0]
       features = {
+          'id': _bytes_feature(example_id),
           'words': _int64_feature(*words),
           'label': _int64_feature(label_index[entries[-1]]),
       }
       example_features = tf.train.Example(
           features=tf.train.Features(feature=features))
-      example_id = entries[0]
       if random.uniform(0, 1) <= train_percentage:  # for train/test split.
         train_examples[example_id] = example_features
       else: