1+ #-------------------------------------------------------------
2+ #
3+ # Licensed to the Apache Software Foundation (ASF) under one
4+ # or more contributor license agreements. See the NOTICE file
5+ # distributed with this work for additional information
6+ # regarding copyright ownership. The ASF licenses this file
7+ # to you under the Apache License, Version 2.0 (the
8+ # "License"); you may not use this file except in compliance
9+ # with the License. You may obtain a copy of the License at
10+ #
11+ # http://www.apache.org/licenses/LICENSE-2.0
12+ #
13+ # Unless required by applicable law or agreed to in writing,
14+ # software distributed under the License is distributed on an
15+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+ # KIND, either express or implied. See the License for the
17+ # specific language governing permissions and limitations
18+ # under the License.
19+ #
20+ #-------------------------------------------------------------
21+
22+
23+ /*DML-script implementing the ssb query Q1.3 in SystemDS.
24+ SELECT SUM(lo_extendedprice * lo_discount) AS REVENUE
25+ FROM lineorder, dates
26+ WHERE
27+ lo_orderdate = d_datekey
28+ AND d_weeknuminyear = 6
29+ AND d_year = 1994
30+ AND lo_discount BETWEEN 5 AND 7
31+ AND lo_quantity BETWEEN 26 AND 35;
32+
33+ Usage:
34+ ./bin/systemds scripts/ssb/queries/q1_3.dml -nvargs input_dir="/path/to/data"
35+ ./bin/systemds scripts/ssb/queries/q1_3.dml -nvargs input_dir="/Users/ghafekalsaho/Desktop/data"
36+ or with explicit -f flag:
37+ ./bin/systemds -f scripts/ssb/queries/q1_3.dml -nvargs input_dir="/path/to/data"
38+
39+ Parameters:
40+ input_dir - Path to input directory containing the table files (e.g., ./data)
41+ */
42+
43+
44+ # -- SOURCING THE RA-FUNCTIONS --
45+ source("./scripts/builtin/raSelection.dml") as raSel
46+ source("./scripts/builtin/raJoin.dml") as raJoin
47+
48+ # -- PARAMETER HANDLING --
49+ input_dir = ifdef($input_dir, "./data");
50+
51+ # -- READING INPUT FILES --
52+ # CSV TABLES
53+ date_csv = read(input_dir + "/date.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
54+ lineorder_csv = read(input_dir + "/lineorder.tbl", data_type="frame", format="csv", header=FALSE, sep="|");
55+
56+ # -- PREPARING --
57+ # Optimized approach: Two-pass filtering with direct matrix construction
58+ # Convert date columns to numeric matrices for proper handling
59+ date_keys_matrix = as.matrix(date_csv[, 1]); # date_key
60+ date_year_matrix = as.matrix(date_csv[, 5]); # d_year
61+ date_weeknum_matrix = as.matrix(date_csv[, 12]); # d_weeknuminyear
62+
63+ # Count matching rows first to pre-allocate matrix efficiently
64+ date_nrows = nrow(date_csv);
65+ matching_count = 0;
66+ for (i in 1:date_nrows) {
67+ year_val = as.scalar(date_year_matrix[i, 1]);
68+ weeknum_val = as.scalar(date_weeknum_matrix[i, 1]);
69+ if (year_val == 1994 && weeknum_val == 6) {
70+ matching_count = matching_count + 1;
71+ }
72+ }
73+
74+ # Pre-allocate final matrix and fill in single pass
75+ date_filtered = matrix(0, matching_count, 2);
76+ filtered_idx = 0;
77+ for (i in 1:date_nrows) {
78+ year_val = as.scalar(date_year_matrix[i, 1]);
79+ weeknum_val = as.scalar(date_weeknum_matrix[i, 1]);
80+ if (year_val == 1994 && weeknum_val == 6) {
81+ filtered_idx = filtered_idx + 1;
82+ date_filtered[filtered_idx, 1] = as.scalar(date_keys_matrix[i, 1]); # date_key
83+ date_filtered[filtered_idx, 2] = 1; # encoded value for matching criteria
84+ }
85+ }
86+
87+ # EXTRACTING MINIMAL LINEORDER DATA TO OPTIMIZE RUNTIME => COL-6 : LO_ORDERDATE |
88+ # COL-9 : LO_QUANTITY | COL-10 : LO_EXTPRICE | COL-12 : LO_DISCOUNT
89+ lineorder_csv_min = cbind(lineorder_csv[, 6], lineorder_csv[, 9], lineorder_csv[, 10], lineorder_csv[, 12]);
90+ lineorder_min_matrix = as.matrix(lineorder_csv_min);
91+
92+ # -- FILTERING THE DATA WITH RA-SELECTION FUNCTION --
93+ # We already filtered for D_YEAR = 1994 AND D_WEEKNUMINYEAR = 6, so date_filtered is our filtered date data
94+ d_year_filt = date_filtered;
95+
96+ # LO_QUANTITY BETWEEN 26 AND 35
97+ lo_quan_filt = raSel::m_raSelection(lineorder_min_matrix, col=2, op=">=", val=26);
98+ lo_quan_filt = raSel::m_raSelection(lo_quan_filt, col=2, op="<=", val=35);
99+
100+ # LO_DISCOUNT BETWEEN 5 AND 7 (FIXED: was incorrectly >=6)
101+ lo_quan_disc_filt = raSel::m_raSelection(lo_quan_filt, col=4, op=">=", val=5);
102+ lo_quan_disc_filt = raSel::m_raSelection(lo_quan_disc_filt, col=4, op="<=", val=7);
103+
104+
105+ # -- JOIN TABLES WITH RA-JOIN FUNCTION --
106+ # JOINING FILTERED LINEORDER TABLE WITH FILTERED DATE TABLE WHERE LO_ORDERDATE = D_DATEKEY
107+ joined_matrix = raJoin::m_raJoin(A=lo_quan_disc_filt, colA=1, B=d_year_filt, colB=1, method="sort-merge");
108+
109+
110+ # -- AGGREGATION --
111+ lo_extprice = joined_matrix[, 3]; #LO_EXTPRICE : 3 COLUMN OF JOINED-MATRIX
112+ lo_disc = joined_matrix[, 4]; #LO_DISCOUNT : 4 COLUMN OF JOINED-MATRIX
113+ revenue = sum(lo_extprice * lo_disc);
114+
115+ print("REVENUE: " + as.integer(revenue));
0 commit comments