diff --git a/csharp/README.md b/csharp/README.md new file mode 100644 index 00000000..3c1b9f2a --- /dev/null +++ b/csharp/README.md @@ -0,0 +1,65 @@ +# C\# Arrow Flight Client Application Example + +This simple example C-sharp client application connects to the Dremio Arrow Flight server endpoint. Developers can use admin or regular user credentials for authentication. Any datasets in Dremio that are accessible by the provided Dremio user can be queried. By default, the hostname is `localhost` and the port is `32010`. Developers can change these default settings by providing the hostname and port as arguments when running the client. + +Note: This uses Microsoft.Data.Analysis as an example library for working with the data -- this is similar to python pandas. However, the python pandas DataFrame is more mature and supports more data types. There are some basic checks and examples of exceptions to look out for due to this issue. + +### Prerequisites +- dotnet 7 [sdk](https://dotnet.microsoft.com/en-us/download/dotnet/7.0) +- Dremio 21 or later + +NOTE: This code was tested using MacOS x64 with localhost running on Docker + - `docker run -p 9047:9047 -p 31010:31010 -p 32010:32010 dremio/dremio-oss:latest` + - For quick setup, login to your local docker instance using http://localhost:9047 in a browser to add the 'dremio/dremio123' user as ADMIN + +### Build the C\# sample application +- Clone this repository. + - `git clone https://github.com/dremio-hub/arrow-flight-client-examples` +- Navigate to arrow-flight-client-examples/csharp/example. + - `cd csharp/example` +- Build the sample application on the command line with: + - `dotnet build` + +### Instructions on using this C\# sample application +- By default, the hostname is `localhost` and the port is `32010` with user `dremio` and password `dremio123`. There is also a default query on Samples datasource + - `dotnet run` + - NOTE: To use the default query you will need to first add the Samples datasource in Dremio. "Format" the zips.json file in the Dremio. +- Run the dotnet sample application with command line args: + - `dotnet run -query -host -port -user -pass ` + - `dotnet run -host localhost -user dremio -pass dremio123 -port 32010 -query "SELECT job_id, status, query from sys.jobs"` + +### Usage +``` +usage: dotnet run + +Arguments: + -port + Dremio flight server port. + Defaults to 32010. Use 443 for Dremio Cloud. + -host + Dremio coordinator hostname. + Defaults to "localhost". Use data.dremio.cloud for Dremio Cloud. + -pass + Dremio password. + Defaults to "dremio123". + -pat + Personal access token -- use instead of password when authenticating. This is required for Dremio Cloud. + -protocol + The protocol to use when connecting -- use 'https' for production or Dremio Cloud + -query + SQL query to test. + -user + Dremio username. + Defaults to "dremio". +``` + +### Usage with Dremio Cloud + +The following example is how to run a Dremio Cloud query. Dremio Cloud has the following differences from previous examples: + +- Host name is data.dremio.cloud or data.eu.dremio.cloud +- Port is 443 +- Protocol is https +- Only Personal access token authentication supported +- `dotnet run -protocol https -host data.dremio.cloud -port 443 -pat abc123abc123abc123abc123 -query "SELECT job_id, status, query from sys.project.jobs"` + diff --git a/csharp/example/Program.cs b/csharp/example/Program.cs new file mode 100644 index 00000000..abb17527 --- /dev/null +++ b/csharp/example/Program.cs @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2023 Dremio Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using Microsoft.Data.Analysis; +using Grpc.Net.Client; +using Grpc.Core; +using Apache.Arrow.Flight.Client; +using Apache.Arrow.Flight; +using Apache.Arrow; + +namespace SimpleFlightClientExample +{ + public class Program + { + private const string HOST = "host"; + private const string PORT = "port"; + private const string USER = "user"; + private const string PASS = "pass"; + private const string PAT = "pat"; + private const string QUERY = "query"; + private const string PROTOCOL = "protocol"; + + public static class DefaultValues + { + public static readonly Dictionary args = new Dictionary{ + { HOST, "localhost" }, + { PORT, "32010" }, + { USER, "dremio" }, + { PASS, "dremio123" }, + { QUERY, "SELECT city, loc[0] AS longitude, loc[1] AS latitude, pop, state, _id FROM Samples.\"samples.dremio.com\".\"zips.json\" LIMIT 100" }, + { PROTOCOL, "http" }, + { PAT, "" } + }; + } + + public static async Task Main(string[] args) + { + var arguments = BuildArgumentsDictionary(args); + var address = $"{arguments[PROTOCOL]}://{arguments[HOST]}:{arguments[PORT]}"; + var authHeader = BuildAuthorizationHeader(arguments); + var channel = BuildGrpcChannel(address, authHeader); + + FlightClient client = new FlightClient(channel); + + Console.WriteLine("-----Address-----"); + Console.WriteLine(address); + Console.WriteLine(); + + Console.WriteLine("-----Query-----"); + Console.WriteLine(arguments[QUERY]); + Console.WriteLine(); + + // Pass the query text as the Command Descriptor + var descriptor = FlightDescriptor.CreateCommandDescriptor(arguments[QUERY]); + var schema = await client.GetSchema(descriptor).ResponseAsync; + + Console.WriteLine("-----Schema Items-----"); + Console.WriteLine(ConvertSchemaToJsonString(schema)); + + // Get Flight Info + var info = await client.GetInfo(descriptor).ResponseAsync; + + Console.WriteLine("-----DATA-----"); + await foreach (var batch in StreamRecordBatches(info, channel)) + { + // Microsoft.Data.Analysis library has DataFrame which behaves similar to python pandas, but has limited support for DataTypes at time of writing + var df = DataFrame.FromArrowRecordBatch(batch); + + for (long index = 0; index < df.Rows.Count; index++) + { + DataFrameRow row = df.Rows[index]; + Console.WriteLine(row); + } + } + } + + /* + * The following "ParseCommandLineArgs" function is close to a generic commmand-line parser + * + * Params: + * args = arguments from Main command line + * supportedArgs = List of the expected command-line params to look for (ignore everything else) + * argPrefix = Default to "-" but could also work if someone wants to use something else instead to indicate argument (e.g. "--") + */ + private static Dictionary ParseCommandLineArgs(string[] args, string[] supportedArgs, string argPrefix = "-") + { + Dictionary dictionaryOfArgs = new Dictionary(); + + for (int i = 0; i < args.Length - 1; i++) + { + if (i % 2 == 0 && args[i].StartsWith(argPrefix)) + { + var key = args[i].Substring(argPrefix.Length); + var value = args[i+1]; + + if (supportedArgs.Contains(key)) + { + // Console.WriteLine($"Argument: {key} = {value}"); + dictionaryOfArgs.Add(key, value); + } else { + Console.WriteLine($"Value '{key}' is not a supported argument, this is ignored."); + } + } + } + + return dictionaryOfArgs; + } + + // Uses default values for arguments (DefaultValues.args) unless we have command-line override for these defaults + private static Dictionary BuildArgumentsDictionary(string[] args) + { + Dictionary arguments = new Dictionary(DefaultValues.args); + + // Override arguments: Replace default settings with values from command line such as -host + foreach (var item in ParseCommandLineArgs(args, DefaultValues.args.Keys.ToArray())) + { + arguments[item.Key] = item.Value; + } + + return arguments; + } + + private static string BuildAuthorizationHeader(Dictionary arguments) + { + var authorizationHeader = ""; + + // If -pat (Personal Access Token) set, use that instead for auth + if (arguments.ContainsKey(PAT) && arguments[PAT] != "") + { + Console.WriteLine("Using personal access token for authorization"); + authorizationHeader = "Bearer " + arguments[PAT]; + } else { + Console.WriteLine("Using Basic auth with user and pass for authorization"); + authorizationHeader = "Basic " + System.Convert.ToBase64String(System.Text.Encoding.GetEncoding("ISO-8859-1").GetBytes(arguments[USER] + ":" + arguments[PASS])); + } + + return authorizationHeader; + } + + private static GrpcChannel BuildGrpcChannel(string address, string authHeader) + { + // Console.WriteLine($"Creating Grpc Channel for address: {address}"); + + var handler = new HttpClientHandler(); + + // For localhost https (TLS) endpoint testing with a self-signed cert, uncomment the following to avoid a cert error. Not for production. + //handler.ServerCertificateCustomValidationCallback = HttpClientHandler.DangerousAcceptAnyServerCertificateValidator; + + var httpClient = new HttpClient(handler); + httpClient.DefaultRequestHeaders.Add("Authorization", authHeader); + + var channel = GrpcChannel.ForAddress(address, new GrpcChannelOptions + { + HttpClient = httpClient + }); + + return channel; + } + + // This function used to clean up the console output to make it more readable + private static string ConvertSchemaToJsonString (Schema schema) { + var schemaMessage = ""; + + // ** NOTE ** + // The Microsoft.Data.Analysis library did not work well with item.Value.DataType.Name == "list" || item.Value.DataType.Name == "timestamp" + // The fix would be to create a VDS that converts this column to a string instead (or a VDS that does not include this column) + + foreach(var item in schema.Fields) + { + schemaMessage += " \"" + item.Key + "\": \"" + item.Value.DataType.Name + "\",\n"; + } + + return "{\n" + schemaMessage + "}\n"; + } + + public static async IAsyncEnumerable StreamRecordBatches(FlightInfo info, GrpcChannel channel) + { + // Assuming one endpoint for example + var endpoint = info.Endpoints[0]; + //Console.WriteLine($"endpoint.Ticket.GetHashCode: {endpoint.Ticket.GetHashCode()}"); + //Console.WriteLine($"endpoint locations uri: \n {endpoint.Locations.First().Uri}"); + + var download_client = new FlightClient(channel); + var stream = download_client.GetStream(endpoint.Ticket); + + // TODO: Potential RPC Exception? https://groups.google.com/g/grpc-io/c/MS7uCIabkO4 + while (await stream.ResponseStream.MoveNext()) + { + yield return stream.ResponseStream.Current; + } + } + } +} diff --git a/csharp/example/dremio_arrow_example.csproj b/csharp/example/dremio_arrow_example.csproj new file mode 100644 index 00000000..47ea84f5 --- /dev/null +++ b/csharp/example/dremio_arrow_example.csproj @@ -0,0 +1,19 @@ + + + + Exe + net7.0 + dremio_arrow_example + enable + enable + + + + + + + + + + +