-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtransform.malloy
More file actions
95 lines (88 loc) · 2.42 KB
/
transform.malloy
File metadata and controls
95 lines (88 loc) · 2.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
##! experimental.persistence
source: raw_titles is duckdb.sql("""
SELECT * FROM
read_csv_auto(
-- 'https://datasets.imdbws.com/title.basics.tsv.gz'
'data/title.basics.tsv.gz'
, all_varchar=true, delim='\t', quote='',header=True)
""")
source: raw_principals is duckdb.sql("""
SELECT * FROM
read_csv_auto(
-- 'https://datasets.imdbws.com/title.principals.tsv.gz'
'data/title.principals.tsv.gz'
, all_varchar=true, delim='\t', quote='',header=True)
""")
source: raw_ratings is duckdb.sql("""
SELECT * FROM
read_csv_auto(
-- 'https://datasets.imdbws.com/title.ratings.tsv.gz'
'data/title.ratings.tsv.gz'
, all_varchar=true, delim='\t', quote='',header=True)
""")
source: raw_crew is duckdb.sql("""
SELECT * FROM
read_csv_auto(
-- 'https://datasets.imdbws.com/title.crew.tsv.gz'
'data/title.crew.tsv.gz'
, all_varchar=true, delim='\t', quote='',header=True)
""")
source: raw_names is duckdb.sql("""
SELECT * FROM
read_csv_auto(
-- 'https://datasets.imdbws.com/name.basics.tsv.gz'
'data/name.basics.tsv.gz'
, all_varchar=true, delim='\t', quote='',header=True)
""")
#@ persist name="titles"
source: titles_base is raw_titles -> {
join_one: raw_ratings on tconst = raw_ratings.tconst
join_one: raw_crew on tconst = raw_crew.tconst
where: raw_ratings.numVotes::number > 30000
select:
tconst
isAdult, originalTitle, primaryTitle
startYear is startYear:::number
endYear is endYear:::number
runtimeMinutes is runtimeMinutes:::number
genres is str_split!string[](genres,',')
directors is str_split!(raw_crew.directors,',')
writers is str_split!(raw_crew.writers,',')
averageRating is raw_ratings.averageRating:::number
numVotes is raw_ratings.numVotes:::number
} extend {
view: test is {
select: *
limit: 10
}
}
#@ persist name="principals"
source: principals_base is raw_principals -> {
join_one: titles is raw_titles on tconst = titles.tconst
where: titles.tconst is not null
select:
tconst
ordering
nconst
category
job
characters is str_split!(
regexp_replace!(characters,'[\[\]\"]','','g'),
',')
} extend {
view: test is {
select: *
limit: 10
}
}
#@ persist name="names"
source: names_base is raw_names -> {
join_many: principals is raw_principals on nconst = principals.nconst
where: principals.nconst is not null
group_by:
nconst
primaryName
primaryProfession
birthYear is birthYear:::number
deathYear is deathYear:::number
}