Add Bind GPU bound Tensor (#213)

ryanlai2 · web-flow · commit f2a21529f179 · 2019-04-15T16:38:21.000-07:00
* Add feature to bind GPU tensors

* Add test to check for invalid arg

* Spacing

* Make heap properties and resource desc more simple

* Moved GPU upload tensor code under if statement
diff --git a/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp b/Testing/WinMLRunnerTest/WinMLRunnerTest.cpp
@@ -284,7 +284,6 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
         }
-
         TEST_METHOD(GarbageInputCpuWinMLDeviceCpuBoundRGBImage)
         {
             const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
@@ -320,7 +319,6 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
         }
-
         TEST_METHOD(GarbageInputCpuWinMLDeviceGpuBoundRGBImage)
         {
             const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
@@ -344,19 +342,15 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
             // We need to expect one more line because of the header
             Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
         }
-
         TEST_METHOD(GarbageInputCpuWinMLDeviceGpuBoundTensor)
         {
             const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
             const std::wstring command =
                 BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU",
                                L"-GPUBoundInput", L"-tensor", L"-CreateDeviceInWinML" });
-            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
-
-            // We need to expect one more line because of the header
-            Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());
+            // Binding GPU Tensor with Session created with CPU device isn't supported.
+            Assert::AreEqual(E_INVALIDARG, RunProc((wchar_t*)command.c_str()));
         }
-
         TEST_METHOD(GarbageInputGpuClientDeviceCpuBoundRGBImage)
         {
             const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
@@ -646,7 +640,7 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
             const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath });
             Assert::AreEqual(HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER), RunProc((wchar_t *)command.c_str()));
         }
-        TEST_METHOD(ProvidedCSVInputGPUSaveTensor)
+        TEST_METHOD(ProvidedCSVInputGPUSaveCpuBoundTensor)
         {
             const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
             const std::wstring inputPath = CURRENT_PATH + L"fish.csv";
@@ -656,7 +650,17 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
             Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_GPU.csv",
                                                   TENSOR_DATA_PATH + L"\\softmaxout_1GpuIteration1.csv"));
         }
-        TEST_METHOD(ProvidedCSVInputCPUSaveTensor)
+        TEST_METHOD(ProvidedCSVInputGPUSaveGpuBoundTensor)
+        {
+            const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
+            const std::wstring inputPath = CURRENT_PATH + L"fish.csv";
+            const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath,
+                                                        L"-SaveTensorData", L"First", TENSOR_DATA_PATH, L"-GPU", L"-GPUBoundInput" });
+            Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));
+            Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_GPU.csv",
+                TENSOR_DATA_PATH + L"\\softmaxout_1GpuIteration1.csv"));
+        }
+        TEST_METHOD(ProvidedCSVInputCPUSaveCpuBoundTensor)
         {
             const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";
             const std::wstring inputPath = CURRENT_PATH + L"fish.csv";
@@ -666,7 +670,7 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
             Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_CPU.csv",
                                                   TENSOR_DATA_PATH + L"\\softmaxout_1CpuIteration1.csv"));
         }
-        TEST_METHOD(ProvidedCSVInputGPUSaveTensorFp16)
+        TEST_METHOD(ProvidedCSVInputGPUSaveCpuBoundTensorFp16)
         {
             const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet_fp16.onnx";
             const std::wstring inputPath = CURRENT_PATH + L"fish.csv";
@@ -676,7 +680,7 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
             Assert::AreEqual(true, CompareTensorsFP16(L"OutputTensorData\\Squeezenet_fp16_fish_input_GPU.csv",
                                                       TENSOR_DATA_PATH + L"\\softmaxout_1GpuIteration1.csv"));
         }
-        TEST_METHOD(ProvidedCSVInputCPUSaveTensorFp16)
+        TEST_METHOD(ProvidedCSVInputCPUSaveCpuBoundTensorFp16)
         {
             const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet_fp16.onnx";
             const std::wstring inputPath = CURRENT_PATH + L"fish.csv";
@@ -687,7 +691,7 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
                                                       TENSOR_DATA_PATH + L"\\softmaxout_1CpuIteration1.csv"));
         }
 
-        TEST_METHOD(ProvidedCSVInputOnlyGpuSaveTensorImageDenotation)
+        TEST_METHOD(ProvidedCSVInputOnlyGpuSaveCpuBoundTensorImageDenotation)
         {
             const std::wstring modelPath = CURRENT_PATH + L"mnist.onnx";
             const std::wstring inputPath = CURRENT_PATH + L"mnist_28.csv";
@@ -697,7 +701,7 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {
             Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Mnist_8_input_GPU.csv",
                 TENSOR_DATA_PATH + L"\\Plus214_Output_0GpuIteration1.csv"));
         }
-        TEST_METHOD(ProvidedCSVInputOnlyCpuSaveTensorImageDenotation)
+        TEST_METHOD(ProvidedCSVInputOnlyCpuSaveCpuBoundTensorImageDenotation)
         {
             const std::wstring modelPath = CURRENT_PATH + L"mnist.onnx";
             const std::wstring inputPath = CURRENT_PATH + L"mnist_28.csv";
diff --git a/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj b/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj
@@ -31,7 +31,6 @@
     <ClInclude Include="src/CommandLineArgs.h" />
     <ClInclude Include="src/Common.h" />
     <ClInclude Include="src/Filehelper.h" />
-    <ClInclude Include="src/ModelBinding.h" />
     <ClInclude Include="src/OutputHelper.h" />
     <ClInclude Include="src/Run.h" />
     <ClInclude Include="src/TimerHelper.h" />
@@ -234,6 +233,7 @@
       <LanguageStandard>stdcpp17</LanguageStandard>
       <PrecompiledHeaderFile />
       <PrecompiledHeaderOutputFile />
+      <AdditionalIncludeDirectories>..\..\Samples\CustomTensorization\CustomTensorization;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
@@ -254,6 +254,7 @@
       <PrecompiledHeaderFile />
       <PrecompiledHeaderOutputFile />
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <AdditionalIncludeDirectories>..\..\Samples\CustomTensorization\CustomTensorization;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
diff --git a/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj.filters b/Tools/WinMLRunner/WinMLRunnerStaticLib.vcxproj.filters
@@ -36,9 +36,6 @@
     <ClInclude Include="src/TypeHelper.h">
       <Filter>Header Files</Filter>
     </ClInclude>
-    <ClInclude Include="src/ModelBinding.h">
-      <Filter>Source Files</Filter>
-    </ClInclude>
     <ClInclude Include="src/Run.h">
       <Filter>Header Files</Filter>
     </ClInclude>
diff --git a/Tools/WinMLRunner/src/BindingUtilities.h b/Tools/WinMLRunner/src/BindingUtilities.h
@@ -3,7 +3,7 @@
 #include <time.h>
 #include "Common.h"
 #include "Windows.AI.Machinelearning.Native.h"
-
+#include "d3dx12.h"
 using namespace winrt::Windows::Media;
 using namespace winrt::Windows::Storage;
 using namespace winrt::Windows::AI::MachineLearning;
@@ -327,7 +327,7 @@ namespace BindingUtilities
 
     template <TensorKind T>
     static ITensor CreateTensor(const CommandLineArgs& args, const std::vector<std::string>& tensorStringInput,
-                                const IVectorView<int64_t>& tensorShape)
+                                const IVectorView<int64_t>& tensorShape, const InputBindingType inputBindingType)
     {
         using TensorValue = typename TensorKindToValue<T>::Type;
         using DataType = typename TensorKindToType<T>::Type;
@@ -372,11 +372,106 @@ namespace BindingUtilities
             // Creating Tensors for Input Images haven't been added yet.
             throw hresult_not_implemented(L"Creating Tensors for Input Images haven't been implemented yet!");
         }
-        return tensorValue;  
+
+        if (inputBindingType == InputBindingType::CPU)
+        {
+            return tensorValue;
+        }
+        else // GPU Tensor
+        {
+            com_ptr<ID3D12Resource> pGPUResource = nullptr;
+            try
+            {
+                // create the d3d device.
+                com_ptr<ID3D12Device> pD3D12Device = nullptr;
+                D3D12CreateDevice(nullptr, D3D_FEATURE_LEVEL::D3D_FEATURE_LEVEL_11_0, __uuidof(ID3D12Device),
+                                  reinterpret_cast<void**>(&pD3D12Device));
+                
+                pD3D12Device->CreateCommittedResource(
+                    &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
+                    D3D12_HEAP_FLAG_NONE,
+                    &CD3DX12_RESOURCE_DESC::Buffer(
+                        actualSizeInBytes,
+                        D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS),
+                    D3D12_RESOURCE_STATE_COMMON, nullptr,
+                    __uuidof(ID3D12Resource), pGPUResource.put_void());
+                if (!args.IsGarbageInput())
+                {
+                    com_ptr<ID3D12Resource> imageUploadHeap;
+                    // Create the GPU upload buffer.
+                    pD3D12Device->CreateCommittedResource(
+                        &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), D3D12_HEAP_FLAG_NONE,
+                        &CD3DX12_RESOURCE_DESC::Buffer(actualSizeInBytes), D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
+                        __uuidof(ID3D12Resource), imageUploadHeap.put_void());
+
+                    // create the command queue.
+                    com_ptr<ID3D12CommandQueue> dxQueue = nullptr;
+                    D3D12_COMMAND_QUEUE_DESC commandQueueDesc = {};
+                    commandQueueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
+                    pD3D12Device->CreateCommandQueue(&commandQueueDesc, __uuidof(ID3D12CommandQueue),
+                                                     reinterpret_cast<void**>(&dxQueue));
+                    com_ptr<ILearningModelDeviceFactoryNative> devicefactory =
+                        get_activation_factory<LearningModelDevice, ILearningModelDeviceFactoryNative>();
+                    com_ptr<::IUnknown> spUnk;
+                    devicefactory->CreateFromD3D12CommandQueue(dxQueue.get(), spUnk.put());
+
+                    // Create ID3D12GraphicsCommandList and Allocator
+                    D3D12_COMMAND_LIST_TYPE queuetype = dxQueue->GetDesc().Type;
+                    com_ptr<ID3D12CommandAllocator> alloctor;
+                    com_ptr<ID3D12GraphicsCommandList> cmdList;
+                    pD3D12Device->CreateCommandAllocator(queuetype, winrt::guid_of<ID3D12CommandAllocator>(),
+                                                         alloctor.put_void());
+                    pD3D12Device->CreateCommandList(0, queuetype, alloctor.get(), nullptr,
+                                                    winrt::guid_of<ID3D12CommandList>(), cmdList.put_void());
+
+                    // Copy from Cpu to GPU
+                    D3D12_SUBRESOURCE_DATA CPUData = {};
+                    CPUData.pData = actualData;
+                    CPUData.RowPitch = actualSizeInBytes;
+                    CPUData.SlicePitch = actualSizeInBytes;
+                    UpdateSubresources(cmdList.get(), pGPUResource.get(), imageUploadHeap.get(), 0, 0, 1, &CPUData);
+
+                    // Close the command list and execute it to begin the initial GPU setup.
+                    cmdList->Close();
+                    ID3D12CommandList* ppCommandLists[] = { cmdList.get() };
+                    dxQueue->ExecuteCommandLists(_countof(ppCommandLists), ppCommandLists);
+
+                    // Create Event
+                    HANDLE directEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr);
+
+                    // Create Fence
+                    Microsoft::WRL::ComPtr<ID3D12Fence> spDirectFence = nullptr;
+                    THROW_IF_FAILED(pD3D12Device->CreateFence(0, D3D12_FENCE_FLAG_NONE,
+                                                              IID_PPV_ARGS(spDirectFence.ReleaseAndGetAddressOf())));
+                    // Adds fence to queue
+                    THROW_IF_FAILED(dxQueue->Signal(spDirectFence.Get(), 1));
+                    THROW_IF_FAILED(spDirectFence->SetEventOnCompletion(1, directEvent));
+
+                    // Wait for signal
+                    DWORD retVal = WaitForSingleObject(directEvent, INFINITE);
+                    if (retVal != WAIT_OBJECT_0)
+                    {
+                        THROW_IF_FAILED(E_UNEXPECTED);
+                    }
+                }
+            }
+            catch (...)
+            {
+                std::cout << "Couldn't create and copy CPU tensor resource to GPU resource" << std::endl;
+                throw;
+            }
+            com_ptr<ITensorStaticsNative> tensorfactory = get_activation_factory<TensorValue, ITensorStaticsNative>();
+            com_ptr<::IUnknown> spUnkTensor;
+            tensorfactory->CreateFromD3D12Resource(pGPUResource.get(), vecShape.data(), static_cast<int>(vecShape.size()), spUnkTensor.put());
+            TensorValue returnTensor(nullptr);
+            spUnkTensor.try_as(returnTensor);
+            return returnTensor;
+        }
     }
 
     // Binds tensor floats, ints, doubles from CSV data.
-    ITensor CreateBindableTensor(const ILearningModelFeatureDescriptor& description, const CommandLineArgs& args)
+    ITensor CreateBindableTensor(const ILearningModelFeatureDescriptor& description, const CommandLineArgs& args,
+                                 const InputBindingType inputBindingType)
     {
         std::vector<std::string> elementStrings;
         if (!args.CsvPath().empty())
@@ -407,7 +502,7 @@ namespace BindingUtilities
             std::vector<int64_t> shape = { 1, channels, imageFeatureDescriptor.Height(),
                                            imageFeatureDescriptor.Width() };
             IVectorView<int64_t> shapeVectorView = single_threaded_vector(std::move(shape)).GetView();
-            return CreateTensor<TensorKind::Float>(args, elementStrings, shapeVectorView);
+            return CreateTensor<TensorKind::Float>(args, elementStrings, shapeVectorView, inputBindingType);
         }
 
         auto tensorDescriptor = description.try_as<TensorFeatureDescriptor>();
@@ -422,57 +517,68 @@ namespace BindingUtilities
                 }
                 case TensorKind::Float:
                 {
-                    return CreateTensor<TensorKind::Float>(args, elementStrings, tensorDescriptor.Shape());
+                    return CreateTensor<TensorKind::Float>(args, elementStrings, tensorDescriptor.Shape(),
+                                                           inputBindingType);
                 }
                 break;
                 case TensorKind::Float16:
                 {
-                    return CreateTensor<TensorKind::Float16>(args, elementStrings, tensorDescriptor.Shape());
+                    return CreateTensor<TensorKind::Float16>(args, elementStrings, tensorDescriptor.Shape(),
+                                                             inputBindingType);
                 }
                 break;
                 case TensorKind::Double:
                 {
-                    return CreateTensor<TensorKind::Double>(args, elementStrings, tensorDescriptor.Shape());
+                    return CreateTensor<TensorKind::Double>(args, elementStrings, tensorDescriptor.Shape(),
+                                                            inputBindingType);
                 }
                 break;
                 case TensorKind::Int8:
                 {
-                    return CreateTensor<TensorKind::Int8>(args, elementStrings, tensorDescriptor.Shape());
+                    return CreateTensor<TensorKind::Int8>(args, elementStrings, tensorDescriptor.Shape(),
+                                                          inputBindingType);
                 }
                 break;
                 case TensorKind::UInt8:
                 {
-                    return CreateTensor<TensorKind::UInt8>(args, elementStrings, tensorDescriptor.Shape());
+                    return CreateTensor<TensorKind::UInt8>(args, elementStrings, tensorDescriptor.Shape(),
+                                                           inputBindingType);
                 }
                 break;
                 case TensorKind::Int16:
                 {
-                    return CreateTensor<TensorKind::Int16>(args, elementStrings, tensorDescriptor.Shape());
+                    return CreateTensor<TensorKind::Int16>(args, elementStrings, tensorDescriptor.Shape(),
+                                                           inputBindingType);
                 }
                 break;
                 case TensorKind::UInt16:
                 {
-                    return CreateTensor<TensorKind::UInt16>(args, elementStrings, tensorDescriptor.Shape());
+                    return CreateTensor<TensorKind::UInt16>(args, elementStrings, tensorDescriptor.Shape(),
+                                                            inputBindingType);
                 }
                 break;
                 case TensorKind::Int32:
                 {
-                    return CreateTensor<TensorKind::Int32>(args, elementStrings, tensorDescriptor.Shape());
+                    return CreateTensor<TensorKind::Int32>(args, elementStrings, tensorDescriptor.Shape(),
+                                                           inputBindingType);
                 }
                 break;
                 case TensorKind::UInt32:
                 {
-                    return CreateTensor<TensorKind::UInt32>(args, elementStrings, tensorDescriptor.Shape());
+                    return CreateTensor<TensorKind::UInt32>(args, elementStrings, tensorDescriptor.Shape(),
+                                                            inputBindingType);
                 }
                 break;
                 case TensorKind::Int64:
                 {
-                    return CreateTensor<TensorKind::Int64>(args, elementStrings, tensorDescriptor.Shape());
+                    return CreateTensor<TensorKind::Int64>(args, elementStrings, tensorDescriptor.Shape(),
+                                                           inputBindingType);
                 }
                 break;
                 case TensorKind::UInt64:
                 {
-                    return CreateTensor<TensorKind::UInt64>(args, elementStrings, tensorDescriptor.Shape());
+                    return CreateTensor<TensorKind::UInt64>(args, elementStrings, tensorDescriptor.Shape(),
+                                                            inputBindingType);
                 }
                 break;
             }
diff --git a/Tools/WinMLRunner/src/Run.cpp b/Tools/WinMLRunner/src/Run.cpp

Original file line number	Diff line number	Diff line change
`@@ -284,7 +284,6 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {`
`284`	`284`	`// We need to expect one more line because of the header`
`285`	`285`	`Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());`
`286`	`286`	`}`
`287`		`-`
`288`	`287`	`TEST_METHOD(GarbageInputCpuWinMLDeviceCpuBoundRGBImage)`
`289`	`288`	`{`
`290`	`289`	`const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";`
`@@ -320,7 +319,6 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {`
`320`	`319`	`// We need to expect one more line because of the header`
`321`	`320`	`Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());`
`322`	`321`	`}`
`323`		`-`
`324`	`322`	`TEST_METHOD(GarbageInputCpuWinMLDeviceGpuBoundRGBImage)`
`325`	`323`	`{`
`326`	`324`	`const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";`
`@@ -344,19 +342,15 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {`
`344`	`342`	`// We need to expect one more line because of the header`
`345`	`343`	`Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());`
`346`	`344`	`}`
`347`		`-`
`348`	`345`	`TEST_METHOD(GarbageInputCpuWinMLDeviceGpuBoundTensor)`
`349`	`346`	`{`
`350`	`347`	`const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";`
`351`	`348`	`const std::wstring command =`
`352`	`349`	`BuildCommand({ EXE_PATH, L"-model", modelPath, L"-PerfOutput", OUTPUT_PATH, L"-perf", L"-CPU",`
`353`	`350`	`L"-GPUBoundInput", L"-tensor", L"-CreateDeviceInWinML" });`
`354`		`- Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));`
`355`		`-`
`356`		`- // We need to expect one more line because of the header`
`357`		`- Assert::AreEqual(static_cast<size_t>(2), GetOutputCSVLineCount());`
	`351`	`+ // Binding GPU Tensor with Session created with CPU device isn't supported.`
	`352`	`+ Assert::AreEqual(E_INVALIDARG, RunProc((wchar_t*)command.c_str()));`
`358`	`353`	`}`
`359`		`-`
`360`	`354`	`TEST_METHOD(GarbageInputGpuClientDeviceCpuBoundRGBImage)`
`361`	`355`	`{`
`362`	`356`	`const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";`
`@@ -646,7 +640,7 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {`
`646`	`640`	`const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath });`
`647`	`641`	`Assert::AreEqual(HRESULT_FROM_WIN32(ERROR_INVALID_PARAMETER), RunProc((wchar_t *)command.c_str()));`
`648`	`642`	`}`
`649`		`- TEST_METHOD(ProvidedCSVInputGPUSaveTensor)`
	`643`	`+ TEST_METHOD(ProvidedCSVInputGPUSaveCpuBoundTensor)`
`650`	`644`	`{`
`651`	`645`	`const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";`
`652`	`646`	`const std::wstring inputPath = CURRENT_PATH + L"fish.csv";`
`@@ -656,7 +650,17 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {`
`656`	`650`	`Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_GPU.csv",`
`657`	`651`	`TENSOR_DATA_PATH + L"\\softmaxout_1GpuIteration1.csv"));`
`658`	`652`	`}`
`659`		`- TEST_METHOD(ProvidedCSVInputCPUSaveTensor)`
	`653`	`+ TEST_METHOD(ProvidedCSVInputGPUSaveGpuBoundTensor)`
	`654`	`+ {`
	`655`	`+ const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";`
	`656`	`+ const std::wstring inputPath = CURRENT_PATH + L"fish.csv";`
	`657`	`+ const std::wstring command = BuildCommand({ EXE_PATH, L"-model", modelPath, L"-input", inputPath,`
	`658`	`+ L"-SaveTensorData", L"First", TENSOR_DATA_PATH, L"-GPU", L"-GPUBoundInput" });`
	`659`	`+ Assert::AreEqual(S_OK, RunProc((wchar_t*)command.c_str()));`
	`660`	`+ Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_GPU.csv",`
	`661`	`+ TENSOR_DATA_PATH + L"\\softmaxout_1GpuIteration1.csv"));`
	`662`	`+ }`
	`663`	`+ TEST_METHOD(ProvidedCSVInputCPUSaveCpuBoundTensor)`
`660`	`664`	`{`
`661`	`665`	`const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet.onnx";`
`662`	`666`	`const std::wstring inputPath = CURRENT_PATH + L"fish.csv";`
`@@ -666,7 +670,7 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {`
`666`	`670`	`Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Squeezenet_fish_input_CPU.csv",`
`667`	`671`	`TENSOR_DATA_PATH + L"\\softmaxout_1CpuIteration1.csv"));`
`668`	`672`	`}`
`669`		`- TEST_METHOD(ProvidedCSVInputGPUSaveTensorFp16)`
	`673`	`+ TEST_METHOD(ProvidedCSVInputGPUSaveCpuBoundTensorFp16)`
`670`	`674`	`{`
`671`	`675`	`const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet_fp16.onnx";`
`672`	`676`	`const std::wstring inputPath = CURRENT_PATH + L"fish.csv";`
`@@ -676,7 +680,7 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {`
`676`	`680`	`Assert::AreEqual(true, CompareTensorsFP16(L"OutputTensorData\\Squeezenet_fp16_fish_input_GPU.csv",`
`677`	`681`	`TENSOR_DATA_PATH + L"\\softmaxout_1GpuIteration1.csv"));`
`678`	`682`	`}`
`679`		`- TEST_METHOD(ProvidedCSVInputCPUSaveTensorFp16)`
	`683`	`+ TEST_METHOD(ProvidedCSVInputCPUSaveCpuBoundTensorFp16)`
`680`	`684`	`{`
`681`	`685`	`const std::wstring modelPath = CURRENT_PATH + L"SqueezeNet_fp16.onnx";`
`682`	`686`	`const std::wstring inputPath = CURRENT_PATH + L"fish.csv";`
`@@ -687,7 +691,7 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {`
`687`	`691`	`TENSOR_DATA_PATH + L"\\softmaxout_1CpuIteration1.csv"));`
`688`	`692`	`}`
`689`	`693`
`690`		`- TEST_METHOD(ProvidedCSVInputOnlyGpuSaveTensorImageDenotation)`
	`694`	`+ TEST_METHOD(ProvidedCSVInputOnlyGpuSaveCpuBoundTensorImageDenotation)`
`691`	`695`	`{`
`692`	`696`	`const std::wstring modelPath = CURRENT_PATH + L"mnist.onnx";`
`693`	`697`	`const std::wstring inputPath = CURRENT_PATH + L"mnist_28.csv";`
`@@ -697,7 +701,7 @@ public: TEST_CLASS_INITIALIZE(SetupClass) {`
`697`	`701`	`Assert::AreEqual(true, CompareTensors(L"OutputTensorData\\Mnist_8_input_GPU.csv",`
`698`	`702`	`TENSOR_DATA_PATH + L"\\Plus214_Output_0GpuIteration1.csv"));`
`699`	`703`	`}`
`700`		`- TEST_METHOD(ProvidedCSVInputOnlyCpuSaveTensorImageDenotation)`
	`704`	`+ TEST_METHOD(ProvidedCSVInputOnlyCpuSaveCpuBoundTensorImageDenotation)`
`701`	`705`	`{`
`702`	`706`	`const std::wstring modelPath = CURRENT_PATH + L"mnist.onnx";`
`703`	`707`	`const std::wstring inputPath = CURRENT_PATH + L"mnist_28.csv";`