55< html xmlns ="http://www.w3.org/1999/xhtml " lang ="en ">
66 < head >
77 < meta http-equiv ="Content-Type " content ="text/html; charset=utf-8 " />
8- < title > tensorflowonspark.TFCluster — TensorFlowOnSpark 1.1 .0 documentation</ title >
8+ < title > tensorflowonspark.TFCluster — TensorFlowOnSpark 1.2 .0 documentation</ title >
99 < link rel ="stylesheet " href ="../../_static/classic.css " type ="text/css " />
1010 < link rel ="stylesheet " href ="../../_static/pygments.css " type ="text/css " />
1111 < script type ="text/javascript ">
1212 var DOCUMENTATION_OPTIONS = {
1313 URL_ROOT : '../../' ,
14- VERSION : '1.1 .0' ,
14+ VERSION : '1.2 .0' ,
1515 COLLAPSE_INDEX : false ,
1616 FILE_SUFFIX : '.html' ,
1717 HAS_SOURCE : true ,
@@ -34,7 +34,7 @@ <h3>Navigation</h3>
3434 < li class ="right " >
3535 < a href ="../../py-modindex.html " title ="Python Module Index "
3636 > modules</ a > |</ li >
37- < li class ="nav-item nav-item-0 "> < a href ="../../index.html "> TensorFlowOnSpark 1.1 .0 documentation</ a > »</ li >
37+ < li class ="nav-item nav-item-0 "> < a href ="../../index.html "> TensorFlowOnSpark 1.2 .0 documentation</ a > »</ li >
3838 < li class ="nav-item nav-item-1 "> < a href ="../index.html " accesskey ="U "> Module code</ a > »</ li >
3939 </ ul >
4040 </ div >
@@ -233,7 +233,8 @@ <h1>Source code for tensorflowonspark.TFCluster</h1><div class="highlight"><pre>
233233 < span class ="n "> tb_url</ span > < span class ="o "> =</ span > < span class ="s2 "> "http://</ span > < span class ="si "> {0}</ span > < span class ="s2 "> :</ span > < span class ="si "> {1}</ span > < span class ="s2 "> "</ span > < span class ="o "> .</ span > < span class ="n "> format</ span > < span class ="p "> (</ span > < span class ="n "> node</ span > < span class ="p "> [</ span > < span class ="s1 "> 'host'</ span > < span class ="p "> ],</ span > < span class ="n "> node</ span > < span class ="p "> [</ span > < span class ="s1 "> 'tb_port'</ span > < span class ="p "> ])</ span >
234234 < span class ="k "> return</ span > < span class ="n "> tb_url</ span > </ div > </ div >
235235
236- < div class ="viewcode-block " id ="run "> < a class ="viewcode-back " href ="../../tensorflowonspark.TFCluster.html#tensorflowonspark.TFCluster.run "> [docs]</ a > < span class ="k "> def</ span > < span class ="nf "> run</ span > < span class ="p "> (</ span > < span class ="n "> sc</ span > < span class ="p "> ,</ span > < span class ="n "> map_fun</ span > < span class ="p "> ,</ span > < span class ="n "> tf_args</ span > < span class ="p "> ,</ span > < span class ="n "> num_executors</ span > < span class ="p "> ,</ span > < span class ="n "> num_ps</ span > < span class ="p "> ,</ span > < span class ="n "> tensorboard</ span > < span class ="o "> =</ span > < span class ="kc "> False</ span > < span class ="p "> ,</ span > < span class ="n "> input_mode</ span > < span class ="o "> =</ span > < span class ="n "> InputMode</ span > < span class ="o "> .</ span > < span class ="n "> TENSORFLOW</ span > < span class ="p "> ,</ span > < span class ="n "> log_dir</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span > < span class ="p "> ,</ span > < span class ="n "> queues</ span > < span class ="o "> =</ span > < span class ="p "> [</ span > < span class ="s1 "> 'input'</ span > < span class ="p "> ,</ span > < span class ="s1 "> 'output'</ span > < span class ="p "> ]):</ span >
236+ < div class ="viewcode-block " id ="run "> < a class ="viewcode-back " href ="../../tensorflowonspark.TFCluster.html#tensorflowonspark.TFCluster.run "> [docs]</ a > < span class ="k "> def</ span > < span class ="nf "> run</ span > < span class ="p "> (</ span > < span class ="n "> sc</ span > < span class ="p "> ,</ span > < span class ="n "> map_fun</ span > < span class ="p "> ,</ span > < span class ="n "> tf_args</ span > < span class ="p "> ,</ span > < span class ="n "> num_executors</ span > < span class ="p "> ,</ span > < span class ="n "> num_ps</ span > < span class ="p "> ,</ span > < span class ="n "> tensorboard</ span > < span class ="o "> =</ span > < span class ="kc "> False</ span > < span class ="p "> ,</ span > < span class ="n "> input_mode</ span > < span class ="o "> =</ span > < span class ="n "> InputMode</ span > < span class ="o "> .</ span > < span class ="n "> TENSORFLOW</ span > < span class ="p "> ,</ span >
237+ < span class ="n "> log_dir</ span > < span class ="o "> =</ span > < span class ="kc "> None</ span > < span class ="p "> ,</ span > < span class ="n "> driver_ps_nodes</ span > < span class ="o "> =</ span > < span class ="kc "> False</ span > < span class ="p "> ,</ span > < span class ="n "> queues</ span > < span class ="o "> =</ span > < span class ="p "> [</ span > < span class ="s1 "> 'input'</ span > < span class ="p "> ,</ span > < span class ="s1 "> 'output'</ span > < span class ="p "> ]):</ span >
237238 < span class ="sd "> """Starts the TensorFlowOnSpark cluster and Runs the TensorFlow "main" function on the Spark executors</ span >
238239
239240< span class ="sd "> Args:</ span >
@@ -245,6 +246,7 @@ <h1>Source code for tensorflowonspark.TFCluster</h1><div class="highlight"><pre>
245246< span class ="sd "> :tensorboard: boolean indicating if the chief worker should spawn a Tensorboard server.</ span >
246247< span class ="sd "> :input_mode: TFCluster.InputMode</ span >
247248< span class ="sd "> :log_dir: directory to save tensorboard event logs. If None, defaults to a fixed path on local filesystem.</ span >
249+ < span class ="sd "> :driver_ps_nodes: run the PS nodes on the driver locally instead of on the spark executors; this help maximizing computing resources (esp. GPU). You will need to set cluster_size = num_executors + num_ps</ span >
248250< span class ="sd "> :queues: *INTERNAL_USE*</ span >
249251
250252< span class ="sd "> Returns:</ span >
@@ -253,10 +255,14 @@ <h1>Source code for tensorflowonspark.TFCluster</h1><div class="highlight"><pre>
253255 < span class ="n "> logging</ span > < span class ="o "> .</ span > < span class ="n "> info</ span > < span class ="p "> (</ span > < span class ="s2 "> "Reserving TFSparkNodes </ span > < span class ="si "> {0}</ span > < span class ="s2 "> "</ span > < span class ="o "> .</ span > < span class ="n "> format</ span > < span class ="p "> (</ span > < span class ="s2 "> "w/ TensorBoard"</ span > < span class ="k "> if</ span > < span class ="n "> tensorboard</ span > < span class ="k "> else</ span > < span class ="s2 "> ""</ span > < span class ="p "> ))</ span >
254256 < span class ="k "> assert</ span > < span class ="n "> num_ps</ span > < span class ="o "> <</ span > < span class ="n "> num_executors</ span >
255257
258+ < span class ="k "> if</ span > < span class ="n "> driver_ps_nodes</ span > < span class ="ow "> and</ span > < span class ="n "> input_mode</ span > < span class ="o "> !=</ span > < span class ="n "> InputMode</ span > < span class ="o "> .</ span > < span class ="n "> TENSORFLOW</ span > < span class ="p "> :</ span >
259+ < span class ="k "> raise</ span > < span class ="ne "> Exception</ span > < span class ="p "> (</ span > < span class ="s1 "> 'running PS nodes on driver locally is only supported in InputMode.TENSORFLOW'</ span > < span class ="p "> )</ span >
260+
256261 < span class ="c1 "> # build a cluster_spec template using worker_nums</ span >
257262 < span class ="n "> cluster_template</ span > < span class ="o "> =</ span > < span class ="p "> {}</ span >
258263 < span class ="n "> cluster_template</ span > < span class ="p "> [</ span > < span class ="s1 "> 'ps'</ span > < span class ="p "> ]</ span > < span class ="o "> =</ span > < span class ="nb "> range</ span > < span class ="p "> (</ span > < span class ="n "> num_ps</ span > < span class ="p "> )</ span >
259264 < span class ="n "> cluster_template</ span > < span class ="p "> [</ span > < span class ="s1 "> 'worker'</ span > < span class ="p "> ]</ span > < span class ="o "> =</ span > < span class ="nb "> range</ span > < span class ="p "> (</ span > < span class ="n "> num_ps</ span > < span class ="p "> ,</ span > < span class ="n "> num_executors</ span > < span class ="p "> )</ span >
265+ < span class ="n "> logging</ span > < span class ="o "> .</ span > < span class ="n "> info</ span > < span class ="p "> (</ span > < span class ="s2 "> "worker node range </ span > < span class ="si "> %s</ span > < span class ="s2 "> , ps node range </ span > < span class ="si "> %s</ span > < span class ="s2 "> "</ span > < span class ="o "> %</ span > < span class ="p "> (</ span > < span class ="n "> cluster_template</ span > < span class ="p "> [</ span > < span class ="s1 "> 'worker'</ span > < span class ="p "> ],</ span > < span class ="n "> cluster_template</ span > < span class ="p "> [</ span > < span class ="s1 "> 'ps'</ span > < span class ="p "> ]))</ span >
260266
261267 < span class ="c1 "> # get default filesystem from spark</ span >
262268 < span class ="n "> defaultFS</ span > < span class ="o "> =</ span > < span class ="n "> sc</ span > < span class ="o "> .</ span > < span class ="n "> _jsc</ span > < span class ="o "> .</ span > < span class ="n "> hadoopConfiguration</ span > < span class ="p "> ()</ span > < span class ="o "> .</ span > < span class ="n "> get</ span > < span class ="p "> (</ span > < span class ="s2 "> "fs.defaultFS"</ span > < span class ="p "> )</ span >
@@ -281,7 +287,25 @@ <h1>Source code for tensorflowonspark.TFCluster</h1><div class="highlight"><pre>
281287 < span class ="s1 "> 'working_dir'</ span > < span class ="p "> :</ span > < span class ="n "> working_dir</ span > < span class ="p "> ,</ span >
282288 < span class ="s1 "> 'server_addr'</ span > < span class ="p "> :</ span > < span class ="n "> server_addr</ span >
283289 < span class ="p "> }</ span >
284- < span class ="n "> nodeRDD</ span > < span class ="o "> =</ span > < span class ="n "> sc</ span > < span class ="o "> .</ span > < span class ="n "> parallelize</ span > < span class ="p "> (</ span > < span class ="nb "> range</ span > < span class ="p "> (</ span > < span class ="n "> num_executors</ span > < span class ="p "> ),</ span > < span class ="n "> num_executors</ span > < span class ="p "> )</ span >
290+ < span class ="k "> if</ span > < span class ="n "> driver_ps_nodes</ span > < span class ="p "> :</ span >
291+ < span class ="n "> nodeRDD</ span > < span class ="o "> =</ span > < span class ="n "> sc</ span > < span class ="o "> .</ span > < span class ="n "> parallelize</ span > < span class ="p "> (</ span > < span class ="nb "> range</ span > < span class ="p "> (</ span > < span class ="n "> num_ps</ span > < span class ="p "> ,</ span > < span class ="n "> num_executors</ span > < span class ="p "> ),</ span > < span class ="n "> num_executors</ span > < span class ="o "> -</ span > < span class ="n "> num_ps</ span > < span class ="p "> )</ span >
292+ < span class ="k "> else</ span > < span class ="p "> :</ span >
293+ < span class ="n "> nodeRDD</ span > < span class ="o "> =</ span > < span class ="n "> sc</ span > < span class ="o "> .</ span > < span class ="n "> parallelize</ span > < span class ="p "> (</ span > < span class ="nb "> range</ span > < span class ="p "> (</ span > < span class ="n "> num_executors</ span > < span class ="p "> ),</ span > < span class ="n "> num_executors</ span > < span class ="p "> )</ span >
294+
295+ < span class ="k "> if</ span > < span class ="n "> driver_ps_nodes</ span > < span class ="p "> :</ span >
296+ < span class ="k "> def</ span > < span class ="nf "> _start_ps</ span > < span class ="p "> (</ span > < span class ="n "> node_index</ span > < span class ="p "> ):</ span >
297+ < span class ="n "> logging</ span > < span class ="o "> .</ span > < span class ="n "> info</ span > < span class ="p "> (</ span > < span class ="s2 "> "starting ps node locally </ span > < span class ="si "> %d</ span > < span class ="s2 "> "</ span > < span class ="o "> %</ span > < span class ="n "> node_index</ span > < span class ="p "> )</ span >
298+ < span class ="n "> TFSparkNode</ span > < span class ="o "> .</ span > < span class ="n "> run</ span > < span class ="p "> (</ span > < span class ="n "> map_fun</ span > < span class ="p "> ,</ span >
299+ < span class ="n "> tf_args</ span > < span class ="p "> ,</ span >
300+ < span class ="n "> cluster_meta</ span > < span class ="p "> ,</ span >
301+ < span class ="n "> tensorboard</ span > < span class ="p "> ,</ span >
302+ < span class ="n "> log_dir</ span > < span class ="p "> ,</ span >
303+ < span class ="n "> queues</ span > < span class ="p "> ,</ span >
304+ < span class ="n "> background</ span > < span class ="o "> =</ span > < span class ="p "> (</ span > < span class ="n "> input_mode</ span > < span class ="o "> ==</ span > < span class ="n "> InputMode</ span > < span class ="o "> .</ span > < span class ="n "> SPARK</ span > < span class ="p "> ))([</ span > < span class ="n "> node_index</ span > < span class ="p "> ])</ span >
305+ < span class ="k "> for</ span > < span class ="n "> i</ span > < span class ="ow "> in</ span > < span class ="n "> cluster_template</ span > < span class ="p "> [</ span > < span class ="s1 "> 'ps'</ span > < span class ="p "> ]:</ span >
306+ < span class ="n "> ps_thread</ span > < span class ="o "> =</ span > < span class ="n "> threading</ span > < span class ="o "> .</ span > < span class ="n "> Thread</ span > < span class ="p "> (</ span > < span class ="n "> target</ span > < span class ="o "> =</ span > < span class ="k "> lambda</ span > < span class ="p "> :</ span > < span class ="n "> _start_ps</ span > < span class ="p "> (</ span > < span class ="n "> i</ span > < span class ="p "> ))</ span >
307+ < span class ="n "> ps_thread</ span > < span class ="o "> .</ span > < span class ="n "> daemon</ span > < span class ="o "> =</ span > < span class ="kc "> True</ span >
308+ < span class ="n "> ps_thread</ span > < span class ="o "> .</ span > < span class ="n "> start</ span > < span class ="p "> ()</ span >
285309
286310 < span class ="c1 "> # start TF on a background thread (on Spark driver) to allow for feeding job</ span >
287311 < span class ="k "> def</ span > < span class ="nf "> _start</ span > < span class ="p "> ():</ span >
@@ -291,7 +315,7 @@ <h1>Source code for tensorflowonspark.TFCluster</h1><div class="highlight"><pre>
291315 < span class ="n "> tensorboard</ span > < span class ="p "> ,</ span >
292316 < span class ="n "> log_dir</ span > < span class ="p "> ,</ span >
293317 < span class ="n "> queues</ span > < span class ="p "> ,</ span >
294- < span class ="p "> (</ span > < span class ="n "> input_mode</ span > < span class ="o "> ==</ span > < span class ="n "> InputMode</ span > < span class ="o "> .</ span > < span class ="n "> SPARK</ span > < span class ="p "> )))</ span >
318+ < span class ="n " > background </ span > < span class =" o " > = </ span > < span class =" p "> (</ span > < span class ="n "> input_mode</ span > < span class ="o "> ==</ span > < span class ="n "> InputMode</ span > < span class ="o "> .</ span > < span class ="n "> SPARK</ span > < span class ="p "> )))</ span >
295319 < span class ="n "> t</ span > < span class ="o "> =</ span > < span class ="n "> threading</ span > < span class ="o "> .</ span > < span class ="n "> Thread</ span > < span class ="p "> (</ span > < span class ="n "> target</ span > < span class ="o "> =</ span > < span class ="n "> _start</ span > < span class ="p "> )</ span >
296320 < span class ="n "> t</ span > < span class ="o "> .</ span > < span class ="n "> start</ span > < span class ="p "> ()</ span >
297321
@@ -366,13 +390,13 @@ <h3>Navigation</h3>
366390 < li class ="right " >
367391 < a href ="../../py-modindex.html " title ="Python Module Index "
368392 > modules</ a > |</ li >
369- < li class ="nav-item nav-item-0 "> < a href ="../../index.html "> TensorFlowOnSpark 1.1 .0 documentation</ a > »</ li >
393+ < li class ="nav-item nav-item-0 "> < a href ="../../index.html "> TensorFlowOnSpark 1.2 .0 documentation</ a > »</ li >
370394 < li class ="nav-item nav-item-1 "> < a href ="../index.html " > Module code</ a > »</ li >
371395 </ ul >
372396 </ div >
373397 < div class ="footer " role ="contentinfo ">
374398 © Copyright 2017, Yahoo Inc.
375- Created using < a href ="http://sphinx-doc.org/ "> Sphinx</ a > 1.6.5 .
399+ Created using < a href ="http://sphinx-doc.org/ "> Sphinx</ a > 1.6.7 .
376400 </ div >
377401 </ body >
378402</ html >
0 commit comments