Skip to content

Commit 077bcdd

Browse files
authored
fix: compare nodespecs based on ngpu/ncpu/mem first, before price (#49)
1 parent 6799ba1 commit 077bcdd

File tree

6 files changed

+151
-25
lines changed

6 files changed

+151
-25
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22

33
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
44

5+
## Version v0.1.9 - 2024-03-13
6+
7+
### Fixed
8+
9+
* `JuliaHub.nodespec` now correctly prioritizes the GPU, CPU, and memory counts, rather than the hourly price, when picking a "smallest node for a given spec". (#49)
10+
511
## Version v0.1.8 - 2024-02-21
612

713
### Added

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "JuliaHub"
22
uuid = "bc7fa6ce-b75e-4d60-89ad-56c957190b6e"
33
authors = ["JuliaHub Inc."]
4-
version = "0.1.8"
4+
version = "0.1.9"
55

66
[deps]
77
Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"

docs/src/reference/job-submission.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,13 @@ A list of these node specifications can be obtained with the [`nodespecs`](@ref)
4646
```jldoctest
4747
julia> JuliaHub.nodespecs()
4848
9-element Vector{JuliaHub.NodeSpec}:
49-
JuliaHub.nodespec(#= m6: 3.5 GHz Intel Xeon Platinum 8375C, 0.33/hr =#; ncpu=4, memory=16, ngpu=false, exactmatch=true)
50-
JuliaHub.nodespec(#= m6: 3.5 GHz Intel Xeon Platinum 8375C, 0.65/hr =#; ncpu=8, memory=32, ngpu=false, exactmatch=true)
51-
JuliaHub.nodespec(#= m6: 3.5 GHz Intel Xeon Platinum 8375C, 2.4/hr =#; ncpu=32, memory=128, ngpu=false, exactmatch=true)
49+
JuliaHub.nodespec(#= m6: 3.5 GHz Intel Xeon Platinum 8375C, 0.17/hr =#; ncpu=2, memory=8, ngpu=false, exactmatch=true)
5250
JuliaHub.nodespec(#= r6: 3.5 GHz Intel Xeon Platinum 8375C, 0.22/hr =#; ncpu=2, memory=16, ngpu=false, exactmatch=true)
51+
JuliaHub.nodespec(#= m6: 3.5 GHz Intel Xeon Platinum 8375C, 0.33/hr =#; ncpu=4, memory=16, ngpu=false, exactmatch=true)
5352
JuliaHub.nodespec(#= r6: 3.5 GHz Intel Xeon Platinum 8375C, 0.42/hr =#; ncpu=4, memory=32, ngpu=false, exactmatch=true)
54-
JuliaHub.nodespec(#= m6: 3.5 GHz Intel Xeon Platinum 8375C, 0.17/hr =#; ncpu=2, memory=8, ngpu=false, exactmatch=true)
53+
JuliaHub.nodespec(#= m6: 3.5 GHz Intel Xeon Platinum 8375C, 0.65/hr =#; ncpu=8, memory=32, ngpu=false, exactmatch=true)
5554
JuliaHub.nodespec(#= r6: 3.5 GHz Intel Xeon Platinum 8375C, 1.3/hr =#; ncpu=8, memory=64, ngpu=false, exactmatch=true)
55+
JuliaHub.nodespec(#= m6: 3.5 GHz Intel Xeon Platinum 8375C, 2.4/hr =#; ncpu=32, memory=128, ngpu=false, exactmatch=true)
5656
JuliaHub.nodespec(#= p2: Intel Xeon E5-2686 v4 (Broadwell), 1.4/hr =#; ncpu=4, memory=61, ngpu=true, exactmatch=true)
5757
JuliaHub.nodespec(#= p3: Intel Xeon E5-2686 v4 (Broadwell), 4.5/hr =#; ncpu=8, memory=61, ngpu=true, exactmatch=true)
5858
```

src/node.jl

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,14 @@ function nodespecs(; auth::Authentication=__auth__())
8383
try
8484
json = JSON.parse(String(r.body))
8585
if json["success"]
86-
return [
86+
nodes = [
8787
NodeSpec(n) for n in json["node_specs"]
8888
]
89+
# We'll sort the list using the same logic that _nodespec_smallest uses, so that
90+
# the result would not depend in backend response ordering. But whether the list
91+
# is sort, or based on what criteria is not documented, and is considered to be
92+
# an implementation detail.
93+
return sort(nodes; by=_nodespec_cmp_by)
8994
end
9095
catch err
9196
throw(JuliaHubError("Unexpected answer received."))
@@ -109,9 +114,10 @@ Finds the node matching the specified node parameters. Throws an [`InvalidReques
109114
if it is unable to find a node with the specific parameters. However, if `throw` is set to
110115
`false`, it will return `nothing` instead in that situation.
111116
112-
By default, it searches for the smallest node that has the specified parameters
113-
or more higher. If `exactmatch` is set to `true`, it only returns a node specification
114-
if it can find one that matches the parameters exactly.
117+
By default, it searches for the smallest node that has the at least the specified parameters
118+
(prioritizing GPU count, CPU count, and memory in this order when deciding).
119+
If `exactmatch` is set to `true`, it only returns a node specification if it can find one that
120+
matches the parameters exactly.
115121
116122
A list of nodes (e.g. from [`nodespecs`](@ref)) can also be passed, so that the function
117123
does not have to query the server for the list. When this method is used, it is not necessary
@@ -150,7 +156,7 @@ function nodespec(
150156
if exactmatch
151157
_nodespec_exact(nodes; ncpu, memory, gpu=has_gpu, throw)
152158
else
153-
_nodespec_cheapest(nodes; ncpu, memory, gpu=has_gpu, throw)
159+
_nodespec_smallest(nodes; ncpu, memory, gpu=has_gpu, throw)
154160
end
155161
end
156162

@@ -173,10 +179,12 @@ function _nodespec_exact(
173179
return nodes[first(idxs)]
174180
end
175181

176-
function _nodespec_cheapest(
182+
function _nodespec_smallest(
177183
nodes::Vector{NodeSpec}; ncpu::Integer, memory::Integer, gpu::Bool, throw::Bool
178184
)
179-
nodes = sort(nodes; by=n -> (n.priceHr, n.hasGPU, n.vcores, n.mem))
185+
# Note: while JuliaHub.nodespecs() does return a sorted list, we can not assume that
186+
# here, since the user can pass their own list which might not be sorted.
187+
nodes = sort(nodes; by=_nodespec_cmp_by)
180188
idx = findfirst(nodes) do n
181189
# !gpu || n.hasGPU <=> gpu => n.hasGPU
182190
(!gpu || n.hasGPU) && (n.vcores >= ncpu) && (n.mem >= memory)
@@ -190,3 +198,8 @@ function _nodespec_cheapest(
190198
return nodes[idx]
191199
end
192200
end
201+
202+
# This representation of a NodeSpec is used when comparing them to find the "smallest".
203+
# Node's hourly price is just used to disambiguate if there are two nodes that are
204+
# otherwise equal (in terms of GPU, CPU and memory numbers).
205+
_nodespec_cmp_by(n::NodeSpec) = (n.hasGPU, n.vcores, n.mem, n.priceHr)

test/jobs.jl

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,12 @@ end
120120

121121
# We'll re-use this further down in job submission tests.
122122
ns_cheapest = Mocking.apply(mocking_patch) do
123+
empty!(MOCK_JULIAHUB_STATE)
123124
JuliaHub.nodespec()
124125
end
125126

126127
@testset "JuliaHub.nodespec/s()" begin
128+
empty!(MOCK_JULIAHUB_STATE)
127129
@testset "Cheapest" begin
128130
@test ns_cheapest.hasGPU === false
129131
@test ns_cheapest.vcores == 2
@@ -184,6 +186,106 @@ end
184186
@test_throws JuliaHub.InvalidRequestError JuliaHub.nodespec(; ngpu=10, throw=true)
185187
@test @test_logs (:warn,) JuliaHub.nodespec(; ngpu=10, throw=false) === nothing
186188
end
189+
190+
# Check that we ignore bad price information, and match node based on the GPU, CPU, and memory (in that order)
191+
MOCK_JULIAHUB_STATE[:nodespecs] = [
192+
#! format: off
193+
# class, gpu, cpu, mem, price, desc, ?, memdisp, ?, ?, id
194+
[ "c1", false, 1.0, 16.0, 3.00, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 90.50, 87.90, 2],
195+
[ "c2", false, 2.0, 8.0, 2.00, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 95.10, 92.10, 3],
196+
[ "c8", false, 8.0, 4.0, 1.00, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 98.50, 93.90, 4],
197+
#! format: on
198+
]
199+
Mocking.apply(mocking_patch) do
200+
let n = JuliaHub.nodespec()
201+
@test n.nodeClass == "c1"
202+
@test n._id == 2
203+
@test n.vcores == 1
204+
@test n.mem == 16
205+
@test !n.hasGPU
206+
end
207+
let n = JuliaHub.nodespec(; ncpu=2)
208+
@test n.nodeClass == "c2"
209+
@test n._id == 3
210+
@test n.vcores == 2
211+
@test n.mem == 8
212+
@test !n.hasGPU
213+
end
214+
# Test sorting of JuliaHub.nodespecs()
215+
@test [n.nodeClass for n in JuliaHub.nodespecs()] == ["c1", "c2", "c8"]
216+
end
217+
# Cheap GPU node gets de-prioritised:
218+
push!(
219+
MOCK_JULIAHUB_STATE[:nodespecs],
220+
#! format: off
221+
# class, gpu, cpu, mem, price, desc, ?, memdisp, ?, ?, id
222+
[ "c1g1", true, 1.0, 16.0, 0.00, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 90.50, 87.90, 5],
223+
#! format: on
224+
)
225+
Mocking.apply(mocking_patch) do
226+
let n = JuliaHub.nodespec()
227+
@test n.nodeClass == "c1"
228+
@test n._id == 2
229+
@test n.vcores == 1
230+
@test n.mem == 16
231+
@test !n.hasGPU
232+
end
233+
# Test sorting of JuliaHub.nodespecs()
234+
@test [n.nodeClass for n in JuliaHub.nodespecs()] == ["c1", "c2", "c8", "c1g1"]
235+
end
236+
# Low memory gets prioritized:
237+
push!(
238+
MOCK_JULIAHUB_STATE[:nodespecs],
239+
#! format: off
240+
# class, gpu, cpu, mem, price, desc, ?, memdisp, ?, ?, id
241+
[ "c1m1", false, 1.0, 1.0, 99.99, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 90.50, 87.90, 6],
242+
#! format: on
243+
)
244+
Mocking.apply(mocking_patch) do
245+
let n = JuliaHub.nodespec()
246+
@test n.nodeClass == "c1m1"
247+
@test n._id == 6
248+
@test n.vcores == 1
249+
@test n.mem == 1
250+
@test !n.hasGPU
251+
end
252+
# But we'll be forced to pick the GPU node here:
253+
let n = JuliaHub.nodespec(; ngpu=1)
254+
@test n.nodeClass == "c1g1"
255+
@test n._id == 5
256+
@test n.vcores == 1
257+
@test n.mem == 16
258+
@test n.hasGPU
259+
end
260+
# Test sorting of JuliaHub.nodespecs()
261+
@test [n.nodeClass for n in JuliaHub.nodespecs()] == ["c1m1", "c1", "c2", "c8", "c1g1"]
262+
end
263+
# However, for identical nodespecs, we disambiguate based on price:
264+
MOCK_JULIAHUB_STATE[:nodespecs] = [
265+
#! format: off
266+
# class, gpu, cpu, mem, price, desc, ?, memdisp, ?, ?, id
267+
[ "a1", false, 1.0, 1.0, 2.00, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 90.50, 87.90, 2],
268+
[ "a2", false, 1.0, 1.0, 1.00, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 95.10, 92.10, 3],
269+
[ "a3", false, 1.0, 1.0, 2.00, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 98.50, 93.90, 4],
270+
#! format: on
271+
]
272+
Mocking.apply(mocking_patch) do
273+
let n = JuliaHub.nodespec()
274+
@test n._id == 3
275+
@test n.nodeClass == "a2"
276+
@test n.vcores == 1
277+
@test n.mem == 1
278+
@test !n.hasGPU
279+
end
280+
# Test sorting of JuliaHub.nodespecs()
281+
let ns = JuliaHub.nodespecs()
282+
@test ns[1].nodeClass == "a2"
283+
# With identical spec and price, order is not guaranteed
284+
@test ns[2].nodeClass ("a1", "a3")
285+
@test ns[3].nodeClass ("a1", "a3")
286+
end
287+
end
288+
empty!(MOCK_JULIAHUB_STATE)
187289
end
188290

189291
# This testset uses the show(::IO, ::JuliaHub.ComputeConfig) representation of ComputeConfig,

test/mocking.jl

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -186,21 +186,26 @@ function _restcall_mocked(method, url, headers, payload; query)
186186
apiv = get(MOCK_JULIAHUB_STATE, :api_version, JuliaHub._MISSING_API_VERSION)
187187
# Mocked versions of the different endpoints:
188188
if (method == :GET) && endswith(url, "app/config/nodespecs/info")
189-
Dict(
190-
"message" => "", "success" => true,
191-
"node_specs" => [
189+
nodespecs = get(MOCK_JULIAHUB_STATE, :nodespecs) do
190+
[
192191
#! format: off
193-
["m6", false, 4.0, 16.0, 0.33, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 90.5, 87.9, 2],
194-
["m6", false, 8.0, 32.0, 0.65, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 95.1, 92.1, 3],
195-
["m6", false, 32.0, 128.0, 2.4, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 98.5, 93.9, 4],
196-
["r6", false, 2.0, 16.0, 0.22, "3.5 GHz Intel Xeon Platinum 8375C", "", "8", 81.5, 89.8, 5],
197-
["r6", false, 4.0, 32.0, 0.42, "3.5 GHz Intel Xeon Platinum 8375C", "", "8", 90.5, 92.1, 6],
198-
["m6", false, 2.0, 8.0, 0.17, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 81.5, 83.25, 7],
199-
["r6", false, 8.0, 64.0, 1.3, "3.5 GHz Intel Xeon Platinum 8375C", "", "8", 95.1, 94.25, 9],
200-
["p2", true, 4.0, 61.0, 1.4, "Intel Xeon E5-2686 v4 (Broadwell)", "", "K80", 90.25, 88.09, 8],
201-
["p3", true, 8.0, 61.0, 4.5, "Intel Xeon E5-2686 v4 (Broadwell)", "", "V100", 95.03, 88.09, 1],
192+
# class, gpu, cpu, mem, price, desc, ?, memdisp, ?, ?, id
193+
[ "m6", false, 4.0, 16.0, 0.33, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 90.50, 87.90, 2],
194+
[ "m6", false, 8.0, 32.0, 0.65, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 95.10, 92.10, 3],
195+
[ "m6", false, 32.0, 128.0, 2.40, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 98.50, 93.90, 4],
196+
[ "r6", false, 2.0, 16.0, 0.22, "3.5 GHz Intel Xeon Platinum 8375C", "", "8", 81.50, 89.80, 5],
197+
[ "r6", false, 4.0, 32.0, 0.42, "3.5 GHz Intel Xeon Platinum 8375C", "", "8", 90.50, 92.10, 6],
198+
[ "m6", false, 2.0, 8.0, 0.17, "3.5 GHz Intel Xeon Platinum 8375C", "", "4", 81.50, 83.25, 7],
199+
[ "r6", false, 8.0, 64.0, 1.30, "3.5 GHz Intel Xeon Platinum 8375C", "", "8", 95.10, 94.25, 9],
200+
[ "p2", true, 4.0, 61.0, 1.40, "Intel Xeon E5-2686 v4 (Broadwell)", "", "K80", 90.25, 88.09, 8],
201+
[ "p3", true, 8.0, 61.0, 4.50, "Intel Xeon E5-2686 v4 (Broadwell)", "", "V100", 95.03, 88.09, 1],
202202
#! format: on
203-
],
203+
]
204+
end
205+
Dict(
206+
"message" => "",
207+
"success" => true,
208+
"node_specs" => nodespecs,
204209
) |> jsonresponse(200)
205210
elseif (method == :GET) && endswith(url, "app/packages/registries")
206211
packages_registries = get(MOCK_JULIAHUB_STATE, :app_packages_registries) do

0 commit comments

Comments
 (0)