Skip to content

Commit 7bb3dd1

Browse files
authored
Hyperspace Extension (#555)
1 parent 6f835a5 commit 7bb3dd1

File tree

14 files changed

+691
-0
lines changed

14 files changed

+691
-0
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
namespace Microsoft.Spark.Extensions.Hyperspace.E2ETest
6+
{
7+
/// <summary>
8+
/// Constants related to the Hyperspace test suite.
9+
/// </summary>
10+
internal class Constants
11+
{
12+
public const string HyperspaceTestContainerName = "Hyperspace Tests";
13+
}
14+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System;
6+
using Microsoft.Spark.E2ETest;
7+
using Xunit;
8+
9+
namespace Microsoft.Spark.Extensions.Hyperspace.E2ETest
10+
{
11+
public class HyperspaceFixture
12+
{
13+
public HyperspaceFixture()
14+
{
15+
Environment.SetEnvironmentVariable(
16+
SparkFixture.EnvironmentVariableNames.ExtraSparkSubmitArgs,
17+
"--packages com.microsoft.hyperspace:hyperspace-core_2.11:0.1.0");
18+
19+
SparkFixture = new SparkFixture();
20+
}
21+
22+
public SparkFixture SparkFixture { get; private set; }
23+
}
24+
25+
[CollectionDefinition(Constants.HyperspaceTestContainerName)]
26+
public class HyperspaceTestCollection : ICollectionFixture<HyperspaceFixture>
27+
{
28+
// This class has no code, and is never created. Its purpose is simply
29+
// to be the place to apply [CollectionDefinition] and all the
30+
// ICollectionFixture<> interfaces.
31+
}
32+
}
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System;
6+
using Microsoft.Spark.E2ETest.Utils;
7+
using Microsoft.Spark.Extensions.Hyperspace.Index;
8+
using Microsoft.Spark.Sql;
9+
using Microsoft.Spark.UnitTest.TestUtils;
10+
using Xunit;
11+
12+
namespace Microsoft.Spark.Extensions.Hyperspace.E2ETest
13+
{
14+
/// <summary>
15+
/// Test suite for Hyperspace index management APIs.
16+
/// </summary>
17+
[Collection(Constants.HyperspaceTestContainerName)]
18+
public class HyperspaceTests : IDisposable
19+
{
20+
private readonly SparkSession _spark;
21+
private readonly TemporaryDirectory _hyperspaceSystemDirectory;
22+
private readonly Hyperspace _hyperspace;
23+
24+
// Fields needed for sample DataFrame.
25+
private readonly DataFrame _sampleDataFrame;
26+
private readonly string _sampleIndexName;
27+
private readonly IndexConfig _sampleIndexConfig;
28+
29+
public HyperspaceTests(HyperspaceFixture fixture)
30+
{
31+
_spark = fixture.SparkFixture.Spark;
32+
_hyperspaceSystemDirectory = new TemporaryDirectory();
33+
_spark.Conf().Set("spark.hyperspace.system.path", _hyperspaceSystemDirectory.Path);
34+
_hyperspace = new Hyperspace(_spark);
35+
36+
_sampleDataFrame = _spark.Read()
37+
.Option("header", true)
38+
.Option("delimiter", ";")
39+
.Csv("Resources\\people.csv");
40+
_sampleIndexName = "sample_dataframe";
41+
_sampleIndexConfig = new IndexConfig(_sampleIndexName, new[] { "job" }, new[] { "name" });
42+
_hyperspace.CreateIndex(_sampleDataFrame, _sampleIndexConfig);
43+
}
44+
45+
/// <summary>
46+
/// Clean up the Hyperspace system directory in between tests.
47+
/// </summary>
48+
public void Dispose()
49+
{
50+
_hyperspaceSystemDirectory.Dispose();
51+
}
52+
53+
/// <summary>
54+
/// Test the method signatures for all Hyperspace APIs.
55+
/// </summary>
56+
[SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
57+
public void TestSignatures()
58+
{
59+
// Indexes API.
60+
Assert.IsType<DataFrame>(_hyperspace.Indexes());
61+
62+
// Delete and Restore APIs.
63+
_hyperspace.DeleteIndex(_sampleIndexName);
64+
_hyperspace.RestoreIndex(_sampleIndexName);
65+
66+
// Refresh API.
67+
_hyperspace.RefreshIndex(_sampleIndexName);
68+
69+
// Cancel API.
70+
Assert.Throws<Exception>(() => _hyperspace.Cancel(_sampleIndexName));
71+
72+
// Explain API.
73+
_hyperspace.Explain(_sampleDataFrame, true);
74+
_hyperspace.Explain(_sampleDataFrame, true, s => Console.WriteLine(s));
75+
76+
// Delete and Vacuum APIs.
77+
_hyperspace.DeleteIndex(_sampleIndexName);
78+
_hyperspace.VacuumIndex(_sampleIndexName);
79+
80+
// Enable and disable Hyperspace.
81+
Assert.IsType<SparkSession>(_spark.EnableHyperspace());
82+
Assert.IsType<SparkSession>(_spark.DisableHyperspace());
83+
Assert.IsType<bool>(_spark.IsHyperspaceEnabled());
84+
}
85+
86+
/// <summary>
87+
/// Test E2E functionality of index CRUD APIs.
88+
/// </summary>
89+
[SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
90+
public void TestIndexCreateAndDelete()
91+
{
92+
// Should be one active index.
93+
DataFrame indexes = _hyperspace.Indexes();
94+
Assert.Equal(1, indexes.Count());
95+
Assert.Equal(_sampleIndexName, indexes.SelectExpr("name").First()[0]);
96+
Assert.Equal(States.Active, indexes.SelectExpr("state").First()[0]);
97+
98+
// Delete the index then verify it has been deleted.
99+
_hyperspace.DeleteIndex(_sampleIndexName);
100+
indexes = _hyperspace.Indexes();
101+
Assert.Equal(1, indexes.Count());
102+
Assert.Equal(States.Deleted, indexes.SelectExpr("state").First()[0]);
103+
104+
// Restore the index to active state and verify it is back.
105+
_hyperspace.RestoreIndex(_sampleIndexName);
106+
indexes = _hyperspace.Indexes();
107+
Assert.Equal(1, indexes.Count());
108+
Assert.Equal(States.Active, indexes.SelectExpr("state").First()[0]);
109+
110+
// Delete and vacuum the index, then verify it is gone.
111+
_hyperspace.DeleteIndex(_sampleIndexName);
112+
_hyperspace.VacuumIndex(_sampleIndexName);
113+
Assert.Equal(0, _hyperspace.Indexes().Count());
114+
}
115+
116+
/// <summary>
117+
/// Test that the explain API generates the expected string.
118+
/// </summary>
119+
[SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
120+
public void TestExplainAPI()
121+
{
122+
// Run a query that hits the index.
123+
DataFrame queryDataFrame = _sampleDataFrame
124+
.Where("job == 'Developer'")
125+
.Select("name");
126+
127+
string explainString = string.Empty;
128+
_hyperspace.Explain(queryDataFrame, true, s => explainString = s);
129+
Assert.False(string.IsNullOrEmpty(explainString));
130+
}
131+
132+
/// <summary>
133+
/// Index states used in testing.
134+
/// </summary>
135+
private static class States
136+
{
137+
public const string Active = "ACTIVE";
138+
public const string Deleted = "DELETED";
139+
}
140+
}
141+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System.Collections.Generic;
6+
using System.Linq;
7+
using Microsoft.Spark.E2ETest.Utils;
8+
using Microsoft.Spark.Extensions.Hyperspace.Index;
9+
using Xunit;
10+
11+
namespace Microsoft.Spark.Extensions.Hyperspace.E2ETest.Index
12+
{
13+
/// <summary>
14+
/// Test suite for Hyperspace IndexConfig tests.
15+
/// </summary>
16+
[Collection(Constants.HyperspaceTestContainerName)]
17+
public class IndexConfigTests
18+
{
19+
public IndexConfigTests(HyperspaceFixture fixture)
20+
{
21+
}
22+
23+
/// <summary>
24+
/// Test the method signatures for IndexConfig and IndexConfigBuilder APIs.
25+
/// </summary>
26+
[SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
27+
public void TestSignatures()
28+
{
29+
string indexName = "testIndexName";
30+
var indexConfig = new IndexConfig(indexName, new[] { "Id" }, new string[] { });
31+
Assert.IsType<string>(indexConfig.IndexName);
32+
Assert.IsType<List<string>>(indexConfig.IndexedColumns);
33+
Assert.IsType<List<string>>(indexConfig.IncludedColumns);
34+
Assert.IsType<Builder>(IndexConfig.Builder());
35+
Assert.IsType<bool>(indexConfig.Equals(indexConfig));
36+
Assert.IsType<int>(indexConfig.GetHashCode());
37+
Assert.IsType<string>(indexConfig.ToString());
38+
39+
Builder builder = IndexConfig.Builder();
40+
Assert.IsType<Builder>(builder);
41+
Assert.IsType<Builder>(builder.IndexName("indexName"));
42+
Assert.IsType<Builder>(builder.IndexBy("indexed1", "indexed2"));
43+
Assert.IsType<Builder>(builder.Include("included1"));
44+
Assert.IsType<IndexConfig>(builder.Create());
45+
}
46+
47+
/// <summary>
48+
/// Test creating an IndexConfig using its class constructor.
49+
/// </summary>
50+
[SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
51+
public void TestIndexConfigConstructor()
52+
{
53+
string indexName = "indexName";
54+
string[] indexedColumns = { "idx1" };
55+
string[] includedColumns = { "inc1", "inc2", "inc3" };
56+
var config = new IndexConfig(indexName, indexedColumns, includedColumns);
57+
58+
// Validate that the config was built correctly.
59+
Assert.Equal(indexName, config.IndexName);
60+
Assert.Equal(indexedColumns, config.IndexedColumns);
61+
Assert.Equal(includedColumns, config.IncludedColumns);
62+
}
63+
64+
/// <summary>
65+
/// Test creating an IndexConfig using the builder pattern.
66+
/// </summary>
67+
[SkipIfSparkVersionIsLessThan(Versions.V2_4_0)]
68+
public void TestIndexConfigBuilder()
69+
{
70+
string indexName = "indexName";
71+
string[] indexedColumns = { "idx1" };
72+
string[] includedColumns = { "inc1", "inc2", "inc3" };
73+
74+
Builder builder = IndexConfig.Builder();
75+
builder.IndexName(indexName);
76+
builder.Include(includedColumns[0], includedColumns[1], includedColumns[2]);
77+
builder.IndexBy(indexedColumns[0]);
78+
79+
// Validate that the config was built correctly.
80+
IndexConfig config = builder.Create();
81+
Assert.Equal(indexName, config.IndexName);
82+
Assert.Equal(indexedColumns, config.IndexedColumns);
83+
Assert.Equal(includedColumns, config.IncludedColumns);
84+
}
85+
}
86+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
3+
<PropertyGroup>
4+
<TargetFramework>netcoreapp3.1</TargetFramework>
5+
<IsPackable>false</IsPackable>
6+
</PropertyGroup>
7+
8+
<ItemGroup>
9+
<ProjectReference Include="..\..\Microsoft.Spark.E2ETest\Microsoft.Spark.E2ETest.csproj" />
10+
<ProjectReference Include="..\..\Microsoft.Spark\Microsoft.Spark.csproj" />
11+
<ProjectReference Include="..\Microsoft.Spark.Extensions.Hyperspace\Microsoft.Spark.Extensions.Hyperspace.csproj" />
12+
</ItemGroup>
13+
</Project>

0 commit comments

Comments
 (0)