Skip to content

Commit 21aea3d

Browse files
committed
Revert "Two new classes:"
This reverts commit 44af5a4.
1 parent 2d6d581 commit 21aea3d

File tree

16 files changed

+2
-1394
lines changed

16 files changed

+2
-1394
lines changed

WORKSPACE

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -94,20 +94,6 @@ http_archive(
9494
],
9595
)
9696

97-
# NOTE: according to
98-
# https://docs.bazel.build/versions/master/external.html#transitive-dependencies
99-
# we should list the transitive dependencies of @org_tensorflow_hub in this
100-
# WORKSPACE file. Still, all of them are already listed by tf_workspace() which
101-
# is called later in this file.
102-
http_archive(
103-
name = "org_tensorflow_hub",
104-
strip_prefix = "hub-0.8.0",
105-
sha256 = "968af30c448d51c36501b68df2c916fb4a61007db3240adc9248fa3a9be2da6f",
106-
urls = [
107-
"https://github.com/tensorflow/hub/archive/v0.8.0.zip"
108-
],
109-
)
110-
11197
http_archive(
11298
name = "org_tensorflow",
11399
strip_prefix = "tensorflow-2.2.0",

oss_scripts/model_server/save_models.py

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -115,30 +115,10 @@ def __call__(self, x):
115115
(sentencepiece, _, _) = sp_tokenizer.tokenize_with_offsets(sentencepiece)
116116
sentencepiece_size = sp_tokenizer.vocab_size()
117117
sentencepiece_id = sp_tokenizer.id_to_string(1)
118-
# Split merge tokenizer
118+
# Split merge tokenizer - not in this version
119119
sm_tokenizer = text.SplitMergeTokenizer()
120120
split_merge = sm_tokenizer.tokenize(b'IloveFlume!',
121121
[0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0])
122-
# Split merge from logits tokenizer
123-
smfl_tokenizer = text.SplitMergeFromLogitsTokenizer()
124-
split_merge_from_logits = smfl_tokenizer.tokenize(
125-
b'IloveFlume!',
126-
# One pair of logits for each Unicode character from the text. Each
127-
# pair indicates a "split" action if the first component is greater than
128-
# the second one, and a "merge" otherwise.
129-
[
130-
[2.7, -0.3], # I: split
131-
[4.1, 0.82], # l: split
132-
[-2.3, 4.3], # o: merge
133-
[3.1, 12.2], # v: merge
134-
[-3.0, 4.7], # e: merge
135-
[2.7, -0.7], # F: split
136-
[0.7, 15.0], # l: merge
137-
[1.6, 23.0], # u: merge
138-
[2.1, 11.0], # m: merge
139-
[0.0, 20.0], # e: merge
140-
[18.0, 0.7], # !: split
141-
])
142122
# Confirm TF unicode_script op that requires ICU works
143123
tf_unicode_script = tf.strings.unicode_script(
144124
[ord('a'), 0x0411, 0x82b8, ord(',')])
@@ -174,7 +154,6 @@ def assert_check(tensor):
174154
sentencepiece_id_assert = assert_check(sentencepiece_id)
175155
sentencepiece_size_assert = assert_check(sentencepiece_size)
176156
split_merge_assert = assert_check(split_merge)
177-
split_merge_from_logits_assert = assert_check(split_merge_from_logits)
178157
tf_unicode_script_assert = assert_check(tf_unicode_script)
179158
unicode_script_assert = assert_check(unicode_script.to_tensor())
180159
whitespace_assert = assert_check(whitespace.to_tensor())
@@ -191,7 +170,6 @@ def assert_check(tensor):
191170
sentencepiece_id_assert,
192171
sentencepiece_size_assert,
193172
split_merge_assert,
194-
split_merge_from_logits_assert,
195173
tf_unicode_script_assert,
196174
unicode_script_assert,
197175
whitespace_assert,
@@ -224,3 +202,4 @@ def assert_check(tensor):
224202
continue
225203
os.remove(dst_file)
226204
shutil.move(src_file, dst_dir)
205+

tensorflow_text/BUILD

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ cc_library(
3232
":regex_split_ops_cc",
3333
":sentence_breaking_ops_cc",
3434
":sentencepiece_tokenizer_cc",
35-
":split_merge_from_logits_tokenizer_cc",
3635
":split_merge_tokenizer_cc",
3736
":text_similarity_metric_ops_cc",
3837
":unicode_script_tokenizer_cc",
@@ -57,7 +56,6 @@ py_library(
5756
":bert_tokenizer",
5857
":create_feature_bitmask_op",
5958
":greedy_constrained_sequence_op",
60-
":hub_module_tokenizer",
6159
":mst_ops",
6260
":ngrams_op",
6361
":normalize_ops",
@@ -67,7 +65,6 @@ py_library(
6765
":sentence_breaking_ops",
6866
":sentencepiece_tokenizer",
6967
":sliding_window_op",
70-
":split_merge_from_logits_tokenizer",
7168
":split_merge_tokenizer",
7269
":string_ops",
7370
":text_similarity_metric_ops",
@@ -700,65 +697,6 @@ py_test(
700697
],
701698
)
702699

703-
py_library(
704-
name = "hub_module_tokenizer",
705-
srcs = ["python/ops/hub_module_tokenizer.py"],
706-
deps = [
707-
":tokenization",
708-
"@org_tensorflow_hub//tensorflow_hub",
709-
# python:array_ops tensorflow dep,
710-
# python/ops/ragged:ragged_tensor tensorflow dep,
711-
],
712-
)
713-
714-
py_test(
715-
name = "hub_module_tokenizer_test",
716-
size = "large",
717-
srcs = ["python/ops/hub_module_tokenizer_test.py"],
718-
data = [
719-
":python/ops/test_data/segmenter_hub_module",
720-
],
721-
srcs_version = "PY2AND3",
722-
deps = [
723-
":ops",
724-
# python:client_testlib tensorflow dep,
725-
# python:framework_ops tensorflow dep,
726-
# python:framework_test_lib tensorflow dep,
727-
# python:lookup_ops tensorflow dep,
728-
# python:variables tensorflow dep,
729-
# python/ops/ragged:ragged_factory_ops tensorflow dep,
730-
],
731-
)
732-
733-
py_tf_text_library(
734-
name = "split_merge_from_logits_tokenizer",
735-
srcs = ["python/ops/split_merge_from_logits_tokenizer.py"],
736-
cc_op_defs = ["core/ops/tokenizer_from_logits_op.cc"],
737-
cc_op_kernels = [
738-
"//tensorflow_text/core/kernels:tokenizer_from_logits_kernel",
739-
],
740-
deps = [
741-
":tokenization",
742-
# python:dtypes tensorflow dep,
743-
# python:framework_ops tensorflow dep,
744-
# python/ops/ragged tensorflow dep,
745-
],
746-
)
747-
748-
py_test(
749-
name = "split_merge_from_logits_tokenizer_test",
750-
size = "small",
751-
srcs = ["python/ops/split_merge_from_logits_tokenizer_test.py"],
752-
srcs_version = "PY2AND3",
753-
deps = [
754-
":split_merge_from_logits_tokenizer",
755-
# python:client_testlib tensorflow dep,
756-
# python:errors tensorflow dep,
757-
# python:framework_test_lib tensorflow dep,
758-
# python/ops/ragged:ragged_factory_ops tensorflow dep,
759-
],
760-
)
761-
762700
py_library(
763701
name = "unicode_char_tokenizer",
764702
srcs = ["python/ops/unicode_char_tokenizer.py"],

tensorflow_text/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,7 @@
5151
"UnicodeScriptTokenizer",
5252
"viterbi_constrained_sequence",
5353
"WhitespaceTokenizer",
54-
"HubModuleTokenizer",
5554
"SplitMergeTokenizer",
56-
"SplitMergeFromLogitsTokenizer",
5755
"wordshape",
5856
"WordShape",
5957
"WordpieceTokenizer",

tensorflow_text/core/kernels/BUILD

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -433,18 +433,6 @@ tf_kernel_library(
433433
],
434434
)
435435

436-
tf_kernel_library(
437-
name = "tokenizer_from_logits_kernel",
438-
srcs = ["tokenizer_from_logits_kernel.cc"],
439-
deps = OSS_DEPS + [
440-
"@icu//:common",
441-
# absl/strings dep
442-
"@com_google_absl//absl/base:core_headers",
443-
# tf:framework tensorflow dep,
444-
# tf:lib tensorflow dep,
445-
],
446-
)
447-
448436
cc_library(
449437
name = "wordpiece_tokenizer",
450438
srcs = ["wordpiece_tokenizer.cc"],

0 commit comments

Comments
 (0)