11"use strict" ;
22
3- import { BENCHMARK_GROUPS , QUERY_NAME_MAP } from './config.js' ;
3+ import { shared } from './data-shared.js' ;
4+ import { BENCHMARK_GROUPS } from './config.js' ;
45
56// Data processing module
67export const dataProcessor = {
7- parseCommits ( commitMetadata ) {
8- const commits = [ ] ;
9- Object . values ( commitMetadata )
10- . sort ( ( a , b ) => new Date ( a . timestamp ) - new Date ( b . timestamp ) )
11- . forEach ( ( commit , index ) => {
12- commit . sortedIndex = index ;
13- commits . push ( commit ) ;
14- } ) ;
15- return commits ;
16- } ,
17-
18- createMissingCommit ( commitId ) {
19- return {
20- author :
{ email :
"[email protected] " , name :
"Dan King" } , 21- committer :
{ email :
"[email protected] " , name :
"GitHub" } , 22- id : commitId ,
23- message : "!! This commit is missing from commits.json !!" ,
24- timestamp : "1970-01-01T00:00:00Z" ,
25- tree_id : null ,
26- url : `https://github.com/vortex-data/vortex/commit/${ commitId } ` ,
27- } ;
28- } ,
8+ parseCommits : shared . parseCommits ,
299
30- determineGroupId ( benchmark ) {
31- const { name, dataset, storage } = benchmark ;
10+ createMissingCommit : shared . createMissingCommit ,
3211
33- if ( dataset ?. tpch ) {
34- const scaleFactor = dataset . tpch . scale_factor ;
35- const isNvme = storage === undefined || storage === "nvme" ;
36- return this . getTpchGroupId ( scaleFactor , isNvme ) ;
37- }
12+ determineGroupId : shared . determineGroupId ,
3813
39- if ( dataset ?. tpcds ) {
40- const scaleFactor = dataset . tpcds . scale_factor ;
41- const isNvme = storage === undefined || storage === "nvme" ;
42- return this . getTpcdsGroupId ( scaleFactor , isNvme ) ;
43- }
44-
45- if ( dataset ?. clickbench ) return "Clickbench" ;
46- if ( name . startsWith ( "random-access/" ) ) return "Random Access" ;
47- if ( name . includes ( "compress time/" ) ) return "Compression" ;
48- if ( name . startsWith ( "vortex size/" ) ) return "Compression Size" ;
49- if (
50- name . startsWith ( "vortex:raw size/" ) ||
51- name . startsWith ( "vortex:parquet-zstd size/" )
52- ) {
53- return "Compression Size" ;
54- }
55- if ( name . startsWith ( "tpch_q" ) ) {
56- const isNvme = storage === undefined || storage === "nvme" ;
57- return isNvme ? "TPC-H (NVMe) (SF=1)" : "TPC-H (S3) (SF=1)" ;
58- }
59- if ( name . startsWith ( "tpcds_q" ) ) {
60- const isNvme = storage === undefined || storage === "nvme" ;
61- return isNvme ? "TPC-DS (NVMe) (SF=1)" : "TPC-DS (S3) (SF=1)" ;
62- }
63- if ( name . startsWith ( "clickbench" ) ) return "Clickbench" ;
14+ getTpchGroupId : shared . getTpchGroupId ,
6415
65- return null ;
66- } ,
16+ getTpcdsGroupId : shared . getTpcdsGroupId ,
6717
68- getTpchGroupId ( scaleFactor , isNvme ) {
69- const sf = Number ( scaleFactor ) ;
70- const storage = isNvme ? "NVMe" : "S3" ;
18+ normalizeSeriesName : shared . normalizeSeriesName ,
7119
72- switch ( sf ) {
73- case 1 :
74- return `TPC-H (${ storage } ) (SF=1)` ;
75- case 10 :
76- return `TPC-H (${ storage } ) (SF=10)` ;
77- case 100 :
78- return `TPC-H (${ storage } ) (SF=100)` ;
79- case 1000 :
80- return `TPC-H (${ storage } ) (SF=1000)` ;
81- default :
82- console . warn ( "Unknown scale factor:" , scaleFactor ) ;
83- return null ;
84- }
85- } ,
20+ formatQueryName : shared . formatQueryName ,
8621
87- getTpcdsGroupId ( scaleFactor , isNvme ) {
88- const sf = Number ( scaleFactor ) ;
89- const storage = isNvme ? "NVMe" : "S3" ;
22+ convertValue : shared . convertValue ,
9023
91- switch ( sf ) {
92- case 1 :
93- return `TPC-DS (${ storage } ) (SF=1)` ;
94- case 10 :
95- return `TPC-DS (${ storage } ) (SF=10)` ;
96- case 100 :
97- return `TPC-DS (${ storage } ) (SF=100)` ;
98- case 1000 :
99- return `TPC-DS (${ storage } ) (SF=1000)` ;
100- default :
101- console . warn ( "Unknown scale factor:" , scaleFactor ) ;
102- return null ;
103- }
104- } ,
105-
106- normalizeSeriesName ( name , seriesName ) {
107- let normalizedName = seriesName ;
108- let normalizedQuery = name ;
109-
110- if (
111- seriesName . endsWith ( " throughput" ) ||
112- seriesName . endsWith ( "throughput" )
113- ) {
114- const suffix = seriesName . endsWith ( " throughput" )
115- ? " throughput"
116- : "throughput" ;
117- normalizedName = seriesName . slice ( 0 , seriesName . length - suffix . length ) ;
118- normalizedQuery = name . replace ( "time" , "throughput" ) ;
119- }
120-
121- return { name : normalizedQuery , seriesName : normalizedName } ;
122- } ,
123-
124- formatQueryName ( query ) {
125- let prettyQ = query . replace ( / _ / g, " " ) . toUpperCase ( ) ;
126- prettyQ = QUERY_NAME_MAP [ prettyQ ] || prettyQ ;
127- prettyQ = prettyQ . replace ( / ^ T P C H \s / , "TPC-H " ) ;
128- prettyQ = prettyQ . replace ( / ^ T P C D S \s / , "TPC-DS " ) ;
129- return prettyQ ;
130- } ,
131-
132- convertValue ( value , unit ) {
133- const isNanos = unit === "ns/iter" || unit === "ns" ;
134- const isBytes = unit === "bytes" ;
135- const isThroughput = unit === "bytes/ns" ;
136-
137- if ( isNanos ) return value / 1_000_000 ;
138- if ( isBytes ) return value / 1_048_576 ;
139- if ( isThroughput ) return ( value * 1_000_000_000 ) / 1_048_576 ;
140- return value ;
141- } ,
142-
143- getUnit ( unit ) {
144- const isNanos = unit === "ns/iter" || unit === "ns" ;
145- const isBytes = unit === "bytes" ;
146- const isThroughput = unit === "bytes/ns" ;
147-
148- if ( isNanos ) return "ms/iter" ;
149- if ( isBytes ) return "MiB" ;
150- if ( isThroughput ) return "MiB/s" ;
151- return unit ;
152- } ,
24+ getUnit : shared . getUnit ,
15325
15426 downloadAndGroupData ( data , commitMetadata , seriesRenameFn ) {
15527 const commits = this . parseCommits ( commitMetadata ) ;
@@ -190,148 +62,13 @@ export const dataProcessor = {
19062 } ) ) ;
19163 } ,
19264
193- initializeGroups ( ) {
194- const groups = { } ;
195- BENCHMARK_GROUPS . forEach ( ( name ) => {
196- groups [ name ] = new Map ( ) ;
197- } ) ;
198- return groups ;
199- } ,
200-
201- processBenchmark (
202- benchmark ,
203- commitMetadata ,
204- commits ,
205- groups ,
206- seriesRenameFn ,
207- missingCommits ,
208- uncategorizableNames
209- ) {
210- // Ensure commit metadata
211- if ( ! benchmark . commit ) {
212- benchmark . commit = commitMetadata [ benchmark . commit_id ] ;
213- if ( ! benchmark . commit ) {
214- missingCommits . add ( benchmark . commit_id ) ;
215- benchmark . commit = commitMetadata [ benchmark . commit_id ] =
216- this . createMissingCommit ( benchmark . commit_id ) ;
217- }
218- }
219-
220- // Determine group
221- const groupId = this . determineGroupId ( benchmark ) ;
222- if ( ! groupId ) {
223- uncategorizableNames . add ( benchmark . name ) ;
224- return ;
225- }
226-
227- const group = groups [ groupId ] ;
228- if ( ! group ) {
229- console . warn ( "Cannot find group element in group:" , groupId ) ;
230- return ;
231- }
232-
233- // Process benchmark data
234- let [ query , seriesName ] = benchmark . name . split ( "/" ) ;
235- const normalized = this . normalizeSeriesName ( query , seriesName ) ;
236- query = normalized . name ;
237- seriesName = normalized . seriesName ;
238-
239- // Apply series renaming
240- seriesName = this . applySeriesRenaming (
241- seriesName ,
242- groupId ,
243- seriesRenameFn
244- ) ;
245-
246- // Format query name
247- const prettyQ = this . formatQueryName ( query ) ;
248- if ( prettyQ . includes ( "PARQUET-UNC" ) ) return ;
249-
250- // Set units
251- let unit = benchmark . unit ;
252- if ( ! unit && benchmark . name . startsWith ( "vortex size/" ) ) {
253- unit = "bytes" ;
254- } else if (
255- ! unit &&
256- ( benchmark . name . startsWith ( "vortex:raw size/" ) ||
257- benchmark . name . startsWith ( "vortex:parquet-zstd size/" ) )
258- ) {
259- unit = "ratio" ;
260- }
261-
262- // Calculate sort position
263- const sortPosition =
264- query . slice ( 0 , 4 ) === "tpch" || query . slice ( 0 , 5 ) === "tpcds"
265- ? parseInt ( prettyQ . split ( " " ) [ 1 ] . substring ( 1 ) , 10 )
266- : 0 ;
65+ initializeGroups : shared . initializeGroups ,
26766
268- // Add to group
269- this . addToGroup (
270- group ,
271- prettyQ ,
272- seriesName ,
273- benchmark ,
274- unit ,
275- sortPosition ,
276- commits
277- ) ;
278- } ,
67+ processBenchmark : shared . processBenchmark ,
27968
280- applySeriesRenaming ( seriesName , groupId , seriesRenameFn ) {
281- if ( ! seriesRenameFn ) return seriesName ;
69+ applySeriesRenaming : shared . applySeriesRenaming ,
28270
283- const renamer = seriesRenameFn . find ( ( [ name ] ) => name === groupId ) ;
284- if ( renamer ?. [ 1 ] ?. renamedDatasets ) {
285- const renameDict = renamer [ 1 ] . renamedDatasets ;
286- return renameDict [ seriesName ] || seriesName ;
287- }
288- return seriesName ;
289- } ,
71+ addToGroup : shared . addToGroup ,
29072
291- addToGroup (
292- group ,
293- queryName ,
294- seriesName ,
295- benchmark ,
296- unit ,
297- sortPosition ,
298- commits
299- ) {
300- let arr = group . get ( queryName ) ;
301- if ( ! arr ) {
302- group . set ( queryName , {
303- sort_position : sortPosition ,
304- commits,
305- unit : this . getUnit ( unit ) ,
306- series : new Map ( ) ,
307- } ) ;
308- arr = group . get ( queryName ) ;
309- }
310-
311- let series = arr . series . get ( seriesName ) ;
312- if ( ! series ) {
313- arr . series . set ( seriesName , new Array ( commits . length ) . fill ( null ) ) ;
314- series = arr . series . get ( seriesName ) ;
315- }
316-
317- series [ benchmark . commit . sortedIndex ] = {
318- range : "this was the range" ,
319- value : this . convertValue ( benchmark . value , unit ) ,
320- } ;
321- } ,
322-
323- sortGroups ( groups ) {
324- const sortByPositionThenName = ( a , b ) => {
325- const positionCompare = a [ 1 ] . sort_position - b [ 1 ] . sort_position ;
326- return positionCompare !== 0
327- ? positionCompare
328- : a [ 0 ] . localeCompare ( b [ 0 ] ) ;
329- } ;
330-
331- Object . entries ( groups ) . forEach ( ( [ name , charts ] ) => {
332- groups [ name ] = new Map (
333- [ ...charts . entries ( ) ] . sort ( sortByPositionThenName )
334- ) ;
335- } ) ;
336- } ,
337- } ;
73+ sortGroups : shared . sortGroups ,
74+ } ;
0 commit comments