Skip to content

Commit d3039e2

Browse files
authored
Support for Hyperspace 0.4.0 (#815)
1 parent b16b4f6 commit d3039e2

File tree

4 files changed

+54
-2
lines changed

4 files changed

+54
-2
lines changed

src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/HyperspaceFixture.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ public HyperspaceFixture()
1515
Version sparkVersion = SparkSettings.Version;
1616
string hyperspaceVersion = sparkVersion.Major switch
1717
{
18-
2 => "hyperspace-core_2.11:0.2.0",
19-
3 => "hyperspace-core_2.12:0.2.0",
18+
2 => "hyperspace-core_2.11:0.4.0",
19+
3 => "hyperspace-core_2.12:0.4.0",
2020
_ => throw new NotSupportedException($"Spark {sparkVersion} not supported.")
2121
};
2222

src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/HyperspaceTests.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,14 @@ public void TestSignatures()
6666

6767
// Refresh API.
6868
_hyperspace.RefreshIndex(_sampleIndexName);
69+
_hyperspace.RefreshIndex(_sampleIndexName, "incremental");
70+
71+
// Optimize API.
72+
_hyperspace.OptimizeIndex(_sampleIndexName);
73+
_hyperspace.OptimizeIndex(_sampleIndexName, "quick");
74+
75+
// Index metadata API.
76+
Assert.IsType<DataFrame>(_hyperspace.Index(_sampleIndexName));
6977

7078
// Cancel API.
7179
Assert.Throws<Exception>(() => _hyperspace.Cancel(_sampleIndexName));

src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/Hyperspace.cs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,39 @@ public void CreateIndex(DataFrame df, IndexConfig indexConfig) =>
7676
[HyperspaceSince(HyperspaceVersions.V0_0_1)]
7777
public void RefreshIndex(string indexName) => _jvmObject.Invoke("refreshIndex", indexName);
7878

79+
/// <summary>
80+
/// Update indexes for the latest version of the data. This API provides a few supported refresh
81+
/// modes as listed below.
82+
/// </summary>
83+
/// <param name="indexName">Name of the index to refresh.</param>
84+
/// <param name="mode">Refresh mode. Currently supported modes are <c>incremental</c> and
85+
/// <c>full</c>.</param>
86+
[HyperspaceSince(HyperspaceVersions.V0_0_3)]
87+
public void RefreshIndex(string indexName, string mode) =>
88+
_jvmObject.Invoke("refreshIndex", indexName, mode);
89+
90+
/// <summary>
91+
/// Optimize index by changing the underlying index data layout (e.g., compaction).
92+
///
93+
/// Note: This API does NOT refresh (i.e. update) the index if the underlying data changes. It only
94+
/// rearranges the index data into a better layout, by compacting small index files. The index files
95+
/// larger than a threshold remain untouched to avoid rewriting large contents.
96+
///
97+
/// <c>quick</c> optimize mode is used by default.
98+
///
99+
/// Available modes:
100+
/// <c>quick</c> mode: This mode allows for fast optimization. Files smaller than a predefined
101+
/// threshold <c>spark.hyperspace.index.optimize.fileSizeThreshold</c> will be picked for compaction.
102+
///
103+
/// <c>full</c> mode: This allows for slow but complete optimization. ALL index files are picked for
104+
/// compaction.
105+
/// </summary>
106+
/// <param name="indexName">Name of the index to optimize.</param>
107+
/// <param name="mode">Optimize mode <c>quick</c> or <c>full</c>.</param>
108+
[HyperspaceSince(HyperspaceVersions.V0_0_3)]
109+
public void OptimizeIndex(string indexName, string mode = "quick") =>
110+
_jvmObject.Invoke("optimizeIndex", indexName, mode);
111+
79112
/// <summary>
80113
/// Cancel api to bring back index from an inconsistent state to the last known stable
81114
/// state.
@@ -120,5 +153,14 @@ public void Explain(DataFrame df, bool verbose, Action<string> redirectFunc)
120153
verbose);
121154
redirectFunc(explainString);
122155
}
156+
157+
/// <summary>
158+
/// Get index metadata and detailed index statistics for a given index.
159+
/// </summary>
160+
/// <param name="indexName">Name of the index to get stats for.</param>
161+
/// <returns>Index metadata and statistics as a <see cref="DataFrame"/>.</returns>
162+
[HyperspaceSince(HyperspaceVersions.V0_0_4)]
163+
public DataFrame Index(string indexName) =>
164+
new DataFrame((JvmObjectReference)_jvmObject.Invoke("index", indexName));
123165
}
124166
}

src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace/HyperspaceVersions.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,7 @@ namespace Microsoft.Spark.Extensions.Hyperspace
77
internal static class HyperspaceVersions
88
{
99
internal const string V0_0_1 = "0.0.1";
10+
internal const string V0_0_3 = "0.0.3";
11+
internal const string V0_0_4 = "0.0.4";
1012
}
1113
}

0 commit comments

Comments
 (0)