@@ -28,82 +28,87 @@ limitations under the License.
2828
2929using stream_executor::DeviceMemoryBase;
3030
31- namespace tensorflow {
31+ namespace stream_executor {
3232namespace tpu {
3333
3434namespace {
3535using xla::Status;
3636} // namespace
3737
3838TpuExecutor::~TpuExecutor () {
39- tpu::ExecutorApiFn ()->TpuExecutor_FreeFn (executor_);
39+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_FreeFn (executor_);
4040}
4141
4242Status TpuExecutor::Init (int device_ordinal,
4343 ::stream_executor::DeviceOptions device_options) {
4444 StatusHelper status;
4545 SE_DeviceOptions* options =
46- tpu::ExecutorApiFn ()->TpuExecutor_NewDeviceOptionsFn (
46+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_NewDeviceOptionsFn (
4747 device_options.flags ());
48- tpu::ExecutorApiFn ()->TpuExecutor_InitFn (executor_, device_ordinal, options,
49- status.c_status );
50- tpu::ExecutorApiFn ()->TpuExecutor_FreeDeviceOptionsFn (options);
48+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_InitFn (
49+ executor_, device_ordinal, options, status.c_status );
50+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_FreeDeviceOptionsFn (options);
5151 return status.status ();
5252}
5353
5454int TpuExecutor::PlatformDeviceCount () {
55- return tpu::ExecutorApiFn ()->TpuExecutor_PlatformDeviceCountFn (executor_);
55+ return tensorflow::tpu::ExecutorApiFn ()->TpuExecutor_PlatformDeviceCountFn (
56+ executor_);
5657}
5758
5859void TpuExecutor::SyncAndForgetFailedStreams () {
59- tpu::ExecutorApiFn ()->TpuExecutor_SyncAndForgetFailedStreamsFn (executor_);
60+ tensorflow::tpu::ExecutorApiFn ()->TpuExecutor_SyncAndForgetFailedStreamsFn (
61+ executor_);
6062}
6163
6264bool TpuExecutor::SynchronizeAllActivity () {
63- return tpu::ExecutorApiFn ()->TpuExecutor_SynchronizeAllActivityFn (executor_);
65+ return tensorflow::tpu::ExecutorApiFn ()->TpuExecutor_SynchronizeAllActivityFn (
66+ executor_);
6467}
6568
6669Status TpuExecutor::BlockHostUntilDone (Stream* stream) {
6770 StatusHelper status;
68- tpu::ExecutorApiFn ()->TpuExecutor_BlockHostUntilDoneFn (
71+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_BlockHostUntilDoneFn (
6972 executor_, get_stream (stream->implementation ()), status.c_status );
7073 return status.status ();
7174}
7275
7376Status TpuExecutor::BlockUntilDoneOrFailed () {
7477 StatusHelper status;
75- tpu::ExecutorApiFn ()->TpuExecutor_BlockUntilDoneOrFailedFn (executor_,
76- status.c_status );
78+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_BlockUntilDoneOrFailedFn (
79+ executor_, status.c_status );
7780 return status.status ();
7881}
7982
8083Status TpuExecutor::GetStatus (Stream* stream) {
8184 StatusHelper status;
82- tpu::ExecutorApiFn ()->TpuExecutor_GetStatusFn (
85+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_GetStatusFn (
8386 executor_, get_stream (stream->implementation ()), status.c_status );
8487 return status.status ();
8588}
8689
87- tpu::TpuCoreLocationExternal TpuExecutor::GetCoreLocationExternal () const {
88- return tpu::TpuCoreLocationExternal (
89- tpu::ExecutorApiFn ()->TpuExecutor_GetCoreLocationFn (executor_));
90+ tensorflow::tpu::TpuCoreLocationExternal TpuExecutor::GetCoreLocationExternal ()
91+ const {
92+ return tensorflow::tpu::TpuCoreLocationExternal (
93+ tensorflow::tpu::ExecutorApiFn ()->TpuExecutor_GetCoreLocationFn (
94+ executor_));
9095}
9196
9297bool TpuExecutor::AllocateStream (Stream* stream) {
93- return tpu::ExecutorApiFn ()->TpuExecutor_AllocateStreamFn (
98+ return tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_AllocateStreamFn (
9499 executor_, get_stream (stream->implementation ()));
95100}
96101
97102void TpuExecutor::DeallocateStream (Stream* stream) {
98- tpu::ExecutorApiFn ()->TpuExecutor_DeallocateStreamFn (
103+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_DeallocateStreamFn (
99104 executor_, get_stream (stream->implementation ()));
100105 tpu_platform ().mutex ().Lock ();
101106 stream_map ().erase (stream->implementation ());
102107 tpu_platform ().mutex ().Unlock ();
103108}
104109
105110bool TpuExecutor::CreateStreamDependency (Stream* dependent, Stream* other) {
106- return tpu::ExecutorApiFn ()->TpuExecutor_CreateStreamDependencyFn (
111+ return tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_CreateStreamDependencyFn (
107112 executor_, get_stream (dependent->implementation ()),
108113 get_stream (other->implementation ()));
109114}
@@ -121,13 +126,13 @@ bool TpuExecutor::AllocateTimer(Timer* timer) { return true; }
121126void TpuExecutor::DeallocateTimer (Timer* timer) {}
122127
123128bool TpuExecutor::StartTimer (Stream* stream, ::stream_executor::Timer* timer) {
124- return tpu::ExecutorApiFn ()->TpuExecutor_StartTimerFn (
129+ return tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_StartTimerFn (
125130 executor_, get_stream (stream->implementation ()),
126131 timer_map_.at (timer->implementation ()));
127132}
128133
129134bool TpuExecutor::StopTimer (Stream* stream, ::stream_executor::Timer* timer) {
130- return tpu::ExecutorApiFn ()->TpuExecutor_StopTimerFn (
135+ return tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_StopTimerFn (
131136 executor_, get_stream (stream->implementation ()),
132137 timer_map_.at (timer->implementation ()));
133138}
@@ -136,15 +141,15 @@ stream_executor::Event::Status TpuExecutor::PollForEventStatus(
136141 stream_executor::Event* event) {
137142 auto se_event = tpu_platform ().LookupEvent (event->implementation ());
138143 return stream_executor::Event::Status (
139- tpu::ExecutorApiFn ()->TpuExecutor_PollForEventStatusFn (executor_,
140- se_event));
144+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_PollForEventStatusFn (
145+ executor_, se_event));
141146}
142147
143148Status TpuExecutor::RecordEvent (Stream* stream,
144149 ::stream_executor::Event* event) {
145150 StatusHelper status;
146151 auto se_event = tpu_platform ().LookupEvent (event->implementation ());
147- tpu::ExecutorApiFn ()->TpuExecutor_RecordEventFn (
152+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_RecordEventFn (
148153 executor_, get_stream (stream->implementation ()), se_event,
149154 status.c_status );
150155 return status.status ();
@@ -154,7 +159,7 @@ Status TpuExecutor::WaitForEvent(Stream* stream,
154159 ::stream_executor::Event* event) {
155160 StatusHelper status;
156161 auto se_event = tpu_platform ().LookupEvent (event->implementation ());
157- tpu::ExecutorApiFn ()->TpuExecutor_WaitForEventFn (
162+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_WaitForEventFn (
158163 executor_, get_stream (stream->implementation ()), se_event,
159164 status.c_status );
160165 return status.status ();
@@ -169,17 +174,19 @@ Status TpuExecutor::WaitForEvent(Stream* stream,
169174// Called by Timer::Timer
170175std::unique_ptr<::stream_executor::internal::TimerInterface>
171176TpuExecutor::GetTimerImplementation () {
172- SE_Timer* tpu_timer = tpu::ExecutorApiFn ()->TpuTimer_NewFn (executor_);
173- auto ptr = std::make_unique<TpuTimer>(tpu_timer);
177+ SE_Timer* tpu_timer =
178+ tensorflow::tpu::ExecutorApiFn ()->TpuTimer_NewFn (executor_);
179+ auto ptr = std::make_unique<tensorflow::TpuTimer>(tpu_timer);
174180 timer_map_[ptr.get ()] = tpu_timer;
175181 return ptr;
176182}
177183
178184// Called by Stream::Stream
179185std::unique_ptr<::stream_executor::internal::StreamInterface>
180186TpuExecutor::GetStreamImplementation () {
181- SE_Stream* tpu_stream = tpu::ExecutorApiFn ()->TpuStream_NewFn (executor_);
182- auto ptr = std::make_unique<tpu::TpuStream>(tpu_stream);
187+ SE_Stream* tpu_stream =
188+ tensorflow::tpu::ExecutorApiFn ()->TpuStream_NewFn (executor_);
189+ auto ptr = std::make_unique<tensorflow::tpu::TpuStream>(tpu_stream);
183190 tpu_platform ().mutex ().Lock ();
184191 stream_map ()[ptr.get ()] = tpu_stream;
185192 tpu_platform ().mutex ().Unlock ();
@@ -189,33 +196,37 @@ TpuExecutor::GetStreamImplementation() {
189196// Called by Event::Event
190197std::unique_ptr<::stream_executor::internal::EventInterface>
191198TpuExecutor::CreateEventImplementation () {
192- SE_Event* tpu_event = tpu::ExecutorApiFn ()->TpuEvent_NewFn (executor_);
199+ SE_Event* tpu_event =
200+ tensorflow::tpu::ExecutorApiFn ()->TpuEvent_NewFn (executor_);
193201 auto ptr = std::make_unique<stream_executor::tpu::TpuEvent>(tpu_event);
194202 tpu_platform ().InsertEvent (ptr.get (), tpu_event);
195203 return ptr;
196204}
197205
198206DeviceMemoryBase TpuExecutor::Allocate (uint64_t size, int64_t memory_space) {
199- SE_DeviceMemoryBase se_base = tpu::ExecutorApiFn ()->TpuExecutor_AllocateFn (
200- executor_, size, memory_space);
207+ SE_DeviceMemoryBase se_base =
208+ tensorflow::tpu::ExecutorApiFn ()->TpuExecutor_AllocateFn (executor_, size,
209+ memory_space);
201210 return ApiConverter::FromC (se_base);
202211}
203212
204213void TpuExecutor::Deallocate (const DeviceMemoryBase& memory) {
205214 SE_DeviceMemoryBase se_base = ApiConverter::ToC (memory);
206- tpu::ExecutorApiFn ()->TpuExecutor_DeallocateFn (executor_, &se_base);
215+ tensorflow::tpu::ExecutorApiFn ()->TpuExecutor_DeallocateFn (executor_,
216+ &se_base);
207217}
208218
209219void TpuExecutor::Deallocate (DeviceMemoryBase* memory) {
210220 SE_DeviceMemoryBase se_base = ApiConverter::ToC (*memory);
211- tpu::ExecutorApiFn ()->TpuExecutor_DeallocateFn (executor_, &se_base);
221+ tensorflow::tpu::ExecutorApiFn ()->TpuExecutor_DeallocateFn (executor_,
222+ &se_base);
212223}
213224
214225bool TpuExecutor::DeviceMemoryUsage (int64_t * free, int64_t * total) const {
215226 int64_t _free;
216227 int64_t _total;
217- if (tpu::ExecutorApiFn ()->TpuExecutor_DeviceMemoryUsageFn (executor_, &_free,
218- &_total)) {
228+ if (tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_DeviceMemoryUsageFn (
229+ executor_, &_free, &_total)) {
219230 *free = _free;
220231 *total = _total;
221232 return true ;
@@ -226,8 +237,8 @@ bool TpuExecutor::DeviceMemoryUsage(int64_t* free, int64_t* total) const {
226237std::optional<stream_executor::AllocatorStats>
227238TpuExecutor::GetAllocatorStats () {
228239 SE_AllocatorStats c_stats;
229- if (tpu::ExecutorApiFn ()->TpuExecutor_GetAllocatorStatsFn (executor_,
230- &c_stats)) {
240+ if (tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_GetAllocatorStatsFn (
241+ executor_, &c_stats)) {
231242 ::stream_executor::AllocatorStats stats;
232243 stats.num_allocs = c_stats.num_allocs ;
233244 stats.bytes_in_use = c_stats.bytes_in_use ;
@@ -249,14 +260,14 @@ TpuExecutor::GetAllocatorStats() {
249260
250261Status TpuExecutor::WaitForInfeedReady (int32_t infeed_queue_index) {
251262 StatusHelper status;
252- tpu::ExecutorApiFn ()->TpuExecutor_WaitForInfeedReadyFn (
263+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_WaitForInfeedReadyFn (
253264 executor_, infeed_queue_index, status.c_status );
254265 return status.status ();
255266}
256267
257268Status TpuExecutor::WaitForOutfeedReady (int32_t outfeed_queue_index) {
258269 StatusHelper status;
259- tpu::ExecutorApiFn ()->TpuExecutor_WaitForOutfeedReadyFn (
270+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_WaitForOutfeedReadyFn (
260271 executor_, outfeed_queue_index, status.c_status );
261272 return status.status ();
262273}
@@ -265,7 +276,7 @@ void TpuExecutor::DequeueOutfeed(int32_t outfeed_queue_index,
265276 absl::Span<uint8_t > bytes,
266277 StatusCallback done) {
267278 StatusHelper status;
268- tpu::ExecutorApiFn ()->TpuExecutor_DequeueOutfeedFn (
279+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_DequeueOutfeedFn (
269280 executor_, outfeed_queue_index, bytes.data (), bytes.size (),
270281 status.c_status );
271282 done (status.status ());
@@ -274,7 +285,7 @@ void TpuExecutor::DequeueOutfeed(int32_t outfeed_queue_index,
274285Status TpuExecutor::EnqueueInfeed (int32_t infeed_queue_index,
275286 absl::Span<const uint8_t > bytes) {
276287 StatusHelper status;
277- tpu::ExecutorApiFn ()->TpuExecutor_EnqueueInfeedFn (
288+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_EnqueueInfeedFn (
278289 executor_, infeed_queue_index, bytes.data (), bytes.size (),
279290 status.c_status );
280291 return status.status ();
@@ -284,7 +295,7 @@ bool TpuExecutor::Memcpy(Stream* stream, void* host_dst,
284295 const ::stream_executor::DeviceMemoryBase& device_src,
285296 uint64_t size) {
286297 SE_DeviceMemoryBase se_base = ApiConverter::ToC (device_src);
287- return tpu::ExecutorApiFn ()->TpuExecutor_MemcpyToHostFn (
298+ return tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_MemcpyToHostFn (
288299 executor_, get_stream (stream->implementation ()), host_dst, &se_base,
289300 size);
290301}
@@ -293,7 +304,7 @@ bool TpuExecutor::Memcpy(Stream* stream,
293304 ::stream_executor::DeviceMemoryBase* device_dst,
294305 const void * host_src, uint64_t size) {
295306 SE_DeviceMemoryBase se_base = ApiConverter::ToC (*device_dst);
296- return tpu::ExecutorApiFn ()->TpuExecutor_MemcpyFromHostFn (
307+ return tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_MemcpyFromHostFn (
297308 executor_, get_stream (stream->implementation ()), &se_base, host_src,
298309 size);
299310}
@@ -303,7 +314,7 @@ Status TpuExecutor::SynchronousMemcpy(
303314 uint64_t size) {
304315 StatusHelper status;
305316 SE_DeviceMemoryBase se_base = ApiConverter::ToC (*device_dst);
306- tpu::ExecutorApiFn ()->TpuExecutor_SynchronousMemcpyFromHostFn (
317+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_SynchronousMemcpyFromHostFn (
307318 executor_, &se_base, host_src, size, status.c_status );
308319 return status.status ();
309320}
@@ -313,7 +324,7 @@ Status TpuExecutor::SynchronousMemcpy(
313324 uint64_t size) {
314325 StatusHelper status;
315326 SE_DeviceMemoryBase se_base = ApiConverter::ToC (device_src);
316- tpu::ExecutorApiFn ()->TpuExecutor_SynchronousMemcpyToHostFn (
327+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_SynchronousMemcpyToHostFn (
317328 executor_, host_dst, &se_base, size, status.c_status );
318329 return status.status ();
319330}
@@ -333,16 +344,17 @@ bool TpuExecutor::MemcpyDeviceToDevice(
333344
334345Status TpuExecutor::UnloadAllPrograms () {
335346 StatusHelper status;
336- tpu::ExecutorApiFn ()->TpuExecutor_UnloadAllProgramsFn (executor_,
337- status.c_status );
347+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_UnloadAllProgramsFn (
348+ executor_, status.c_status );
338349 return status.status ();
339350}
340351
341352Status TpuExecutor::EnqueueCompactionOnStreamForHbm (Stream* compaction_stream) {
342353 StatusHelper status;
343- tpu::ExecutorApiFn ()->TpuExecutor_EnqueueCompactionOnStreamForHbmFn (
344- executor_, get_stream (compaction_stream->implementation ()),
345- status.c_status );
354+ tensorflow::tpu::ExecutorApiFn ()
355+ ->TpuExecutor_EnqueueCompactionOnStreamForHbmFn (
356+ executor_, get_stream (compaction_stream->implementation ()),
357+ status.c_status );
346358 return status.status ();
347359}
348360
@@ -353,7 +365,7 @@ struct HostCallbackContext {
353365TSL_Status* HostCallbackTrampoline (void * ctx) {
354366 HostCallbackContext* host_ctx = reinterpret_cast <HostCallbackContext*>(ctx);
355367 Status status = host_ctx->callback ();
356- TSL_Status* c_status = tpu::ExecutorApiFn ()->TpuStatus_CreateFn (
368+ TSL_Status* c_status = tensorflow:: tpu::ExecutorApiFn ()->TpuStatus_CreateFn (
357369 status.code (), status.error_message ().c_str ());
358370 delete host_ctx;
359371 return c_status;
@@ -362,7 +374,7 @@ TSL_Status* HostCallbackTrampoline(void* ctx) {
362374bool TpuExecutor::HostCallback (Stream* stream,
363375 std::function<Status()> callback) {
364376 HostCallbackContext* ctx = new HostCallbackContext{callback};
365- return tpu::ExecutorApiFn ()->TpuExecutor_HostCallbackFn (
377+ return tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_HostCallbackFn (
366378 executor_, get_stream (stream->implementation ()), &HostCallbackTrampoline,
367379 ctx);
368380}
@@ -371,11 +383,11 @@ TpuExecutor::StatusOr<std::unique_ptr<::stream_executor::DeviceDescription>>
371383TpuExecutor::CreateDeviceDescription () const {
372384 StatusHelper status;
373385 SE_DeviceDescription* description =
374- tpu::ExecutorApiFn ()->TpuDeviceDescription_NewFn ();
386+ tensorflow:: tpu::ExecutorApiFn ()->TpuDeviceDescription_NewFn ();
375387 absl::Cleanup cleanup = [description]() {
376- tpu::ExecutorApiFn ()->TpuDeviceDescription_FreeFn (description);
388+ tensorflow:: tpu::ExecutorApiFn ()->TpuDeviceDescription_FreeFn (description);
377389 };
378- tpu::ExecutorApiFn ()->TpuExecutor_CreateDeviceDescriptionFn (
390+ tensorflow:: tpu::ExecutorApiFn ()->TpuExecutor_CreateDeviceDescriptionFn (
379391 executor_, description, status.c_status );
380392 if (status.status ().ok ()) {
381393 stream_executor::internal::DeviceDescriptionBuilder builder;
@@ -393,4 +405,4 @@ TpuExecutor::CreateDeviceDescription() const {
393405}
394406
395407} // namespace tpu
396- } // namespace tensorflow
408+ } // namespace stream_executor
0 commit comments