Skip to content
This repository was archived by the owner on Jul 31, 2023. It is now read-only.

Commit 684d2db

Browse files
committed
Adding support for dataflow.
Change-Id: I7945a29f27b6af54c51b651471c3c70133778cbd
1 parent dc8e657 commit 684d2db

File tree

9 files changed

+417
-354
lines changed

9 files changed

+417
-354
lines changed

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ nose >= 1.3.7
88
pylint >= 2.5.3
99
fire >= 0.3.1
1010
jupyter >= 1.0.0
11-
tensorflow >= 2.2.0
11+
tensorflow >= 2.2.0
12+
gcsfs >= 0.6.2

samples/Basic - Using TFRUtil.ipynb

Lines changed: 18 additions & 216 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
},
1414
{
1515
"cell_type": "code",
16-
"execution_count": 1,
16+
"execution_count": null,
1717
"metadata": {},
1818
"outputs": [],
1919
"source": [
@@ -23,7 +23,16 @@
2323
},
2424
{
2525
"cell_type": "code",
26-
"execution_count": 2,
26+
"execution_count": null,
27+
"metadata": {},
28+
"outputs": [],
29+
"source": [
30+
"OUTPUT_PATH=\"./out\" # YOUR LOCAL OUTPUT PATH HERE"
31+
]
32+
},
33+
{
34+
"cell_type": "code",
35+
"execution_count": null,
2736
"metadata": {},
2837
"outputs": [],
2938
"source": [
@@ -32,236 +41,29 @@
3241
},
3342
{
3443
"cell_type": "code",
35-
"execution_count": 3,
44+
"execution_count": null,
3645
"metadata": {},
37-
"outputs": [
38-
{
39-
"data": {
40-
"text/html": [
41-
"<div>\n",
42-
"<style scoped>\n",
43-
" .dataframe tbody tr th:only-of-type {\n",
44-
" vertical-align: middle;\n",
45-
" }\n",
46-
"\n",
47-
" .dataframe tbody tr th {\n",
48-
" vertical-align: top;\n",
49-
" }\n",
50-
"\n",
51-
" .dataframe thead th {\n",
52-
" text-align: right;\n",
53-
" }\n",
54-
"</style>\n",
55-
"<table border=\"1\" class=\"dataframe\">\n",
56-
" <thead>\n",
57-
" <tr style=\"text-align: right;\">\n",
58-
" <th></th>\n",
59-
" <th>split</th>\n",
60-
" <th>image_uri</th>\n",
61-
" <th>label</th>\n",
62-
" </tr>\n",
63-
" </thead>\n",
64-
" <tbody>\n",
65-
" <tr>\n",
66-
" <th>0</th>\n",
67-
" <td>TRAIN</td>\n",
68-
" <td>../tfrutil/test_data/images/cat/cat-640x853-1.jpg</td>\n",
69-
" <td>cat</td>\n",
70-
" </tr>\n",
71-
" <tr>\n",
72-
" <th>1</th>\n",
73-
" <td>VALIDATION</td>\n",
74-
" <td>../tfrutil/test_data/images/cat/cat-800x600-2.jpg</td>\n",
75-
" <td>cat</td>\n",
76-
" </tr>\n",
77-
" <tr>\n",
78-
" <th>2</th>\n",
79-
" <td>TEST</td>\n",
80-
" <td>../tfrutil/test_data/images/cat/cat-800x600-3.jpg</td>\n",
81-
" <td>cat</td>\n",
82-
" </tr>\n",
83-
" <tr>\n",
84-
" <th>3</th>\n",
85-
" <td>TRAIN</td>\n",
86-
" <td>../tfrutil/test_data/images/goat/goat-640x640-...</td>\n",
87-
" <td>goat</td>\n",
88-
" </tr>\n",
89-
" <tr>\n",
90-
" <th>4</th>\n",
91-
" <td>VALIDATION</td>\n",
92-
" <td>../tfrutil/test_data/images/goat/goat-320x320-...</td>\n",
93-
" <td>goat</td>\n",
94-
" </tr>\n",
95-
" <tr>\n",
96-
" <th>5</th>\n",
97-
" <td>TEST</td>\n",
98-
" <td>../tfrutil/test_data/images/goat/goat-640x427-...</td>\n",
99-
" <td>goat</td>\n",
100-
" </tr>\n",
101-
" </tbody>\n",
102-
"</table>\n",
103-
"</div>"
104-
],
105-
"text/plain": [
106-
" split image_uri label\n",
107-
"0 TRAIN ../tfrutil/test_data/images/cat/cat-640x853-1.jpg cat\n",
108-
"1 VALIDATION ../tfrutil/test_data/images/cat/cat-800x600-2.jpg cat\n",
109-
"2 TEST ../tfrutil/test_data/images/cat/cat-800x600-3.jpg cat\n",
110-
"3 TRAIN ../tfrutil/test_data/images/goat/goat-640x640-... goat\n",
111-
"4 VALIDATION ../tfrutil/test_data/images/goat/goat-320x320-... goat\n",
112-
"5 TEST ../tfrutil/test_data/images/goat/goat-640x427-... goat"
113-
]
114-
},
115-
"execution_count": 3,
116-
"metadata": {},
117-
"output_type": "execute_result"
118-
}
119-
],
46+
"outputs": [],
12047
"source": [
12148
"df"
12249
]
12350
},
12451
{
12552
"cell_type": "code",
126-
"execution_count": 4,
53+
"execution_count": null,
12754
"metadata": {
12855
"scrolled": true
12956
},
130-
"outputs": [
131-
{
132-
"name": "stdout",
133-
"output_type": "stream",
134-
"text": [
135-
"Starting DataFlow Transform. This may take a while. Please wait.\n"
136-
]
137-
},
138-
{
139-
"data": {
140-
"application/javascript": [
141-
"\n",
142-
" if (typeof window.interactive_beam_jquery == 'undefined') {\n",
143-
" var jqueryScript = document.createElement('script');\n",
144-
" jqueryScript.src = 'https://code.jquery.com/jquery-3.4.1.slim.min.js';\n",
145-
" jqueryScript.type = 'text/javascript';\n",
146-
" jqueryScript.onload = function() {\n",
147-
" var datatableScript = document.createElement('script');\n",
148-
" datatableScript.src = 'https://cdn.datatables.net/1.10.20/js/jquery.dataTables.min.js';\n",
149-
" datatableScript.type = 'text/javascript';\n",
150-
" datatableScript.onload = function() {\n",
151-
" window.interactive_beam_jquery = jQuery.noConflict(true);\n",
152-
" window.interactive_beam_jquery(document).ready(function($){\n",
153-
" \n",
154-
" });\n",
155-
" }\n",
156-
" document.head.appendChild(datatableScript);\n",
157-
" };\n",
158-
" document.head.appendChild(jqueryScript);\n",
159-
" } else {\n",
160-
" window.interactive_beam_jquery(document).ready(function($){\n",
161-
" \n",
162-
" });\n",
163-
" }"
164-
]
165-
},
166-
"metadata": {},
167-
"output_type": "display_data"
168-
},
169-
{
170-
"data": {
171-
"application/javascript": [
172-
"\n",
173-
" var import_html = () => {\n",
174-
" ['https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html'].forEach(href => {\n",
175-
" var link = document.createElement('link');\n",
176-
" link.rel = 'import'\n",
177-
" link.href = href;\n",
178-
" document.head.appendChild(link);\n",
179-
" });\n",
180-
" }\n",
181-
" if ('import' in document.createElement('link')) {\n",
182-
" import_html();\n",
183-
" } else {\n",
184-
" var webcomponentScript = document.createElement('script');\n",
185-
" webcomponentScript.src = 'https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js';\n",
186-
" webcomponentScript.type = 'text/javascript';\n",
187-
" webcomponentScript.onload = function(){\n",
188-
" import_html();\n",
189-
" };\n",
190-
" document.head.appendChild(webcomponentScript);\n",
191-
" }"
192-
]
193-
},
194-
"metadata": {},
195-
"output_type": "display_data"
196-
},
197-
{
198-
"name": "stdout",
199-
"output_type": "stream",
200-
"text": [
201-
"TFRecords created. Output stored in ./out\n"
202-
]
203-
}
204-
],
57+
"outputs": [],
20558
"source": [
206-
"df.tensorflow.to_tfr(output_path=\"./out\")"
59+
"df.tensorflow.to_tfr(output_dir=OUTPUT_PATH)"
20760
]
20861
},
20962
{
21063
"cell_type": "code",
211-
"execution_count": 7,
64+
"execution_count": null,
21265
"metadata": {},
213-
"outputs": [
214-
{
215-
"name": "stdout",
216-
"output_type": "stream",
217-
"text": [
218-
"./out:\r\n",
219-
"tfrutil-20200629-192138-to-tfr\ttfrutil-beam.log\r\n",
220-
"\r\n",
221-
"./out/tfrutil-20200629-192138-to-tfr:\r\n",
222-
"discarded-data-00000-of-00001\t train-00000-of-00001.tfrecord.gz\r\n",
223-
"schema.pbtxt\t\t\t transformed_metadata\r\n",
224-
"test-00000-of-00001.tfrecord.gz transform_fn\r\n",
225-
"tft_tmp\t\t\t\t val-00000-of-00001.tfrecord.gz\r\n",
226-
"\r\n",
227-
"./out/tfrutil-20200629-192138-to-tfr/tft_tmp:\r\n",
228-
"tftransform_tmp\r\n",
229-
"\r\n",
230-
"./out/tfrutil-20200629-192138-to-tfr/tft_tmp/tftransform_tmp:\r\n",
231-
"5584526e1f6f4d7d9f5d25ad896a1ecf 940f1e45d0fa4c8f93e58ac8fb7cdc5c\r\n",
232-
"8441d43a7b774700bdda1a61797ab274 vocab_compute_and_apply_vocabulary_vocabulary\r\n",
233-
"\r\n",
234-
"./out/tfrutil-20200629-192138-to-tfr/tft_tmp/tftransform_tmp/5584526e1f6f4d7d9f5d25ad896a1ecf:\r\n",
235-
"saved_model.pb\tvariables\r\n",
236-
"\r\n",
237-
"./out/tfrutil-20200629-192138-to-tfr/tft_tmp/tftransform_tmp/5584526e1f6f4d7d9f5d25ad896a1ecf/variables:\r\n",
238-
"\r\n",
239-
"./out/tfrutil-20200629-192138-to-tfr/tft_tmp/tftransform_tmp/8441d43a7b774700bdda1a61797ab274:\r\n",
240-
"assets\tsaved_model.pb\tvariables\r\n",
241-
"\r\n",
242-
"./out/tfrutil-20200629-192138-to-tfr/tft_tmp/tftransform_tmp/8441d43a7b774700bdda1a61797ab274/assets:\r\n",
243-
"vocab_compute_and_apply_vocabulary_vocabulary\r\n",
244-
"\r\n",
245-
"./out/tfrutil-20200629-192138-to-tfr/tft_tmp/tftransform_tmp/8441d43a7b774700bdda1a61797ab274/variables:\r\n",
246-
"\r\n",
247-
"./out/tfrutil-20200629-192138-to-tfr/tft_tmp/tftransform_tmp/940f1e45d0fa4c8f93e58ac8fb7cdc5c:\r\n",
248-
"saved_model.pb\tvariables\r\n",
249-
"\r\n",
250-
"./out/tfrutil-20200629-192138-to-tfr/tft_tmp/tftransform_tmp/940f1e45d0fa4c8f93e58ac8fb7cdc5c/variables:\r\n",
251-
"\r\n",
252-
"./out/tfrutil-20200629-192138-to-tfr/transformed_metadata:\r\n",
253-
"schema.pbtxt\r\n",
254-
"\r\n",
255-
"./out/tfrutil-20200629-192138-to-tfr/transform_fn:\r\n",
256-
"assets\tsaved_model.pb\tvariables\r\n",
257-
"\r\n",
258-
"./out/tfrutil-20200629-192138-to-tfr/transform_fn/assets:\r\n",
259-
"vocab_compute_and_apply_vocabulary_vocabulary\r\n",
260-
"\r\n",
261-
"./out/tfrutil-20200629-192138-to-tfr/transform_fn/variables:\r\n"
262-
]
263-
}
264-
],
66+
"outputs": [],
26567
"source": [
26668
"!ls -R ./out"
26769
]

0 commit comments

Comments
 (0)