Skip to content

Commit ec6abe5

Browse files
committed
added blocks for regression task
1 parent 18cb1da commit ec6abe5

File tree

6 files changed

+183
-20
lines changed

6 files changed

+183
-20
lines changed

FastTimeSeries/src/FastTimeSeries.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,5 +54,5 @@ function __init__()
5454
end
5555

5656
export
57-
TimeSeriesRow, TSClassificationSingle, TSPreprocessing
57+
TimeSeriesRow, TSClassificationSingle, TSPreprocessing, _ts2df
5858
end

FastTimeSeries/src/container.jl

Lines changed: 117 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ function _ts2df(
5454

5555
timestamps = false
5656
class_labels = false
57+
target_labels = true
5758

5859
open(full_file_path_and_name, "r") do file
5960
for ln in eachline(file)
@@ -109,6 +110,21 @@ function _ts2df(
109110

110111
series_length = parse(Int, tokens[2])
111112

113+
elseif startswith(ln, "@dimension")
114+
# Check that the associated value is valid
115+
tokens = split(ln, " ")
116+
117+
num_dimensions = parse(Int, tokens[2])
118+
119+
elseif startswith(ln, "@targetlabel")
120+
tokens = split(ln, " ")
121+
122+
if tokens[2] == "true"
123+
target_labels = true
124+
else
125+
target_labels = false
126+
end
127+
112128
elseif startswith(ln, "@classlabel")
113129
# Check that the associated value is valid
114130
tokens = split(ln, " ")
@@ -150,7 +166,106 @@ function _ts2df(
150166
# Check if we dealing with data that has timestamps
151167

152168
if timestamps
153-
#! Need To Add Code.
169+
170+
has_another_value = false
171+
has_another_dimension = false
172+
173+
timestamps_for_dimension = []
174+
values_for_dimension = []
175+
176+
line_len = length(ln)
177+
char_num = 1
178+
num_this_dimension = 1
179+
arr = Array{Float32, 2}(undef, num_dimensions, series_length)
180+
181+
while char_num <= line_len
182+
183+
# Move through any spaces.
184+
while char_num <= line_len && isspace(ln[char_num])
185+
char_num += 1
186+
end
187+
188+
if char_num <= line_len
189+
190+
# Check if we have reached a class label
191+
if ln[char_num] != '(' && target_labels
192+
193+
class_val = strip(ln[char_num:end], ' ')
194+
195+
push!(class_val_list, parse(Float64, class_val))
196+
push!(instance_list, arr)
197+
198+
char_num = line_len
199+
200+
has_another_value = false
201+
has_another_dimension = false
202+
203+
timestamps_for_dimension = []
204+
values_for_dimension = []
205+
206+
char_num += 1
207+
num_this_dimension = 1
208+
arr = Array{Float32, 2}(undef, num_dimensions, series_length)
209+
210+
else
211+
212+
char_num += 1
213+
tuple_data = ""
214+
215+
while (char_num <= line_len && ln[char_num] != ')')
216+
tuple_data *= ln[char_num]
217+
char_num += 1
218+
end
219+
220+
char_num += 1
221+
222+
while char_num <= line_len && isspace(ln[char_num])
223+
char_num += 1
224+
end
225+
226+
# Check if there is another value or dimension to process after this tuple.
227+
if char_num > line_len
228+
has_another_value = false
229+
has_another_dimension = false
230+
elseif ln[char_num] == ','
231+
has_another_value = true
232+
has_another_dimension = false
233+
elseif ln[char_num] == ':'
234+
has_another_value = false
235+
has_another_dimension = true
236+
end
237+
238+
char_num += 1
239+
240+
last_comma_index = findlast(",", tuple_data)
241+
242+
if !isnothing(last_comma_index)
243+
last_comma_index = last_comma_index[1]
244+
end
245+
246+
value = tuple_data[last_comma_index+1:end]
247+
value = parse(Float64, value)
248+
249+
timestamp = tuple_data[1:last_comma_index-1]
250+
251+
push!(values_for_dimension, value)
252+
253+
if !has_another_value
254+
255+
arr[num_this_dimension, 1:end] = values_for_dimension
256+
257+
values_for_dimension = []
258+
259+
num_this_dimension += 1
260+
end
261+
262+
end
263+
264+
end
265+
266+
end
267+
268+
154269
else
155270
dimensions = split(ln, ':')
156271

@@ -196,13 +311,9 @@ function _ts2df(
196311
data_series = split(dimension, ',')
197312
data_series = [parse(Float32, i) for i in data_series]
198313
arr[dim, 1:end] = data_series
199-
# println(data_series)
200-
# data_series = [parse(Float32, i) for i in data_series]
201-
# push!(instance_list[dim], data_series)
202314
else
203315
tmp = Array{Float32, 1}(undef, 100)
204316
arr[dim, 1:end] = tmp
205-
# push!(instance_list[dim], [])
206317
end
207318
end
208319

@@ -229,7 +340,7 @@ function _ts2df(
229340
end
230341

231342
# Check if we should return any associated class labels separately
232-
if class_labels
343+
if class_labels || target_labels
233344
return data, class_val_list
234345
else
235346
return data

FastTimeSeries/src/models.jl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@ function blockmodel(inblock::TimeSeriesRow,
1010
data = rand(Float32, inblock.nfeatures, 32, inblock.obslength)
1111
# data = [rand(Float32, inblock.nfeatures, 32) for _ ∈ 1:inblock.obslength]
1212
output = backbone(data)
13-
outs = size(output)[1]
14-
return Models.RNNModel(backbone, outsize = length(outblock.classes), recout = outs)
13+
return Models.RNNModel(backbone, outsize = length(outblock.classes), recout = size(output, 1))
1514
end
1615

1716
"""

FastTimeSeries/src/models/RNN.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ function RNNModel(recbackbone;
2626
outsize,
2727
recout,
2828
kwargs...)
29-
return RNNModel{}(recbackbone, Dense(recout, outsize))
29+
return RNNModel(recbackbone, Dense(recout, outsize))
3030
end
3131

3232
function (m::RNNModel)(X)

FastTimeSeries/src/recipes.jl

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,18 @@ Recipe for loading a time series dataset stored in a .ts file
77
Base.@kwdef struct TimeSeriesDatasetRecipe <: Datasets.DatasetRecipe
88
train_file
99
test_file = nothing
10+
regression = false
1011
loadfn = Datasets.loadfile
1112
end
1213

13-
Datasets.recipeblocks(::Type{TimeSeriesDatasetRecipe}) = Tuple{TimeSeriesRow, Label}
14+
function Datasets.recipeblocks(recipe::TimeSeriesDatasetRecipe)
15+
if !recipe.regression
16+
return Tuple{TimeSeriesRow, Label}
17+
else
18+
return Tuple{TimeSeriesRow, Continuous}
19+
end
20+
end
21+
# Datasets.recipeblocks(::Type{TimeSeriesDatasetRecipe}) = Tuple{TimeSeriesRow, Label}
1422

1523
#TODO: Add Check if test_file is nothing.
1624
function Datasets.loadrecipe(recipe::TimeSeriesDatasetRecipe, path)
@@ -23,10 +31,18 @@ function Datasets.loadrecipe(recipe::TimeSeriesDatasetRecipe, path)
2331
labels = [labels_train; labels_test]
2432
rows = TimeSeriesDataset(rows)
2533
data = rows, labels
26-
blocks = (
27-
setup(TimeSeriesRow,rows),
28-
Label(unique(eachobs(labels))),
29-
)
34+
blocks = nothing
35+
if !recipe.regression
36+
blocks = (
37+
setup(TimeSeriesRow,rows),
38+
Label(unique(eachobs(labels))),
39+
)
40+
else
41+
blocks = (
42+
setup(TimeSeriesRow,rows),
43+
Continuous(1)
44+
)
45+
end
3046
return data, blocks
3147
end
3248

@@ -41,11 +57,11 @@ const RECIPES = Dict{String,Vector{Datasets.DatasetRecipe}}(
4157
],
4258
"natops" => [
4359
TimeSeriesDatasetRecipe(train_file="NATOPS_TEST.ts", test_file="NATOPS_TRAIN.ts")
44-
]
60+
],
4561
#! TODO.
46-
# "appliances_energy" => [
47-
# TimeSeriesDatasetRecipe(train_file="AppliancesEnergy_TRAIN.ts", test_file="AppliancesEnergy_TEST.ts")
48-
# ]
62+
"appliances_energy" => [
63+
TimeSeriesDatasetRecipe(train_file="AppliancesEnergy_TRAIN.ts", test_file="AppliancesEnergy_TEST.ts", regression = true)
64+
]
4965
)
5066

5167
function _registerrecipes()

src/datasets/fastaidatasets.jl

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,20 @@ struct TSClassificationDataset
1818
size
1919
end
2020

21+
struct MonashRegressionDataset
22+
name
23+
dset_id
24+
extension
25+
description
26+
checksum
27+
datadepname
28+
splits
29+
size
30+
end
31+
2132
const ROOT_URL_FastAI = "https://s3.amazonaws.com/fast-ai-"
2233
const ROOT_URL_TSClassification = "http://www.timeseriesclassification.com/Downloads"
34+
const ROOT_URL_MonashRegression = "https://zenodo.org/record/"
2335

2436
function FastAIDataset(name, subfolder, checksum = "";
2537
extension = "tgz",
@@ -40,13 +52,21 @@ function TSClassificationDataset(
4052
return TSClassificationDataset(name, extension, description, checksum, datadepname, size)
4153
end
4254

55+
function MonashRegressionDataset(
56+
name, dset_id, checksum = "";
57+
extension = "ts", description = "", splits = ["TRAIN", "TEST"],
58+
datadepname="", size="???")
59+
return MonashRegressionDataset(name, dset_id, extension, description, checksum, datadepname, splits, size)
60+
end
61+
4362
const DESCRIPTIONS = Dict(
4463
"imagenette" => "A subset of 10 easily classified classes from Imagenet: tench, English springer, cassette player, chain saw, church, French horn, garbage truck, gas pump, golf ball, parachute",
4564
"imagewoof" => "A subset of 10 harder to classify classes from Imagenet (all dog breeds): Australian terrier, Border terrier, Samoyed, beagle, Shih-Tzu, English foxhound, Rhodesian ridgeback, dingo, golden retriever, Old English sheepdog",
4665
"food-101" => "101 food categories, with 101,000 images; 250 test images and 750 training images per class. The training images were not cleaned. All images were rescaled to have a maximum side length of 512 pixels.",
4766
"ECG5000" => "The original dataset for \"ECG5000\" is a 20-hour long ECG downloaded from Physionet. The name is BIDMC Congestive Heart Failure Database(chfdb) and it is record \"chf07\".",
4867
"AtrialFibrillation" => "This is a physionet dataset of two-channel ECG recordings has been created from data used in the Computers in Cardiology Challenge 2004, an open competition with the goal of developing automated methods for predicting spontaneous termination of atrial fibrillation (AF).",
49-
"NATOPS" => "The data is generated by sensors on the hands, elbows, wrists and thumbs. The data are the x,y,z coordinates for each of the eight locations. "
68+
"NATOPS" => "The data is generated by sensors on the hands, elbows, wrists and thumbs. The data are the x,y,z coordinates for each of the eight locations. ",
69+
"AppliancesEnergy" => "The goal of this dataset is to predict total energy usage in kWh of a house.",
5070
)
5171

5272
const DATASETCONFIGS = [
@@ -209,6 +229,9 @@ const DATASETCONFIGS = [
209229
TSClassificationDataset("AtrialFibrillation", "218abad67d58190a6daa1a27f4bd58ace6e18f80fb59fb2c7385f0d2d4b411a2", description = DESCRIPTIONS["AtrialFibrillation"], datadepname = "atrial", size = "226KB"),
210230
TSClassificationDataset("NATOPS", "57a8debeedadad7764bfa9c87b4300bd64a999ef95a98a6ee07a830c41de4aa1", description = DESCRIPTIONS["NATOPS"], datadepname = "natops", size = "5.1MB"),
211231

232+
# monash regression datasets
233+
MonashRegressionDataset("AppliancesEnergy", 3902637, ["bbc65fcfa5c01655bb0ec7d558335d44b9c81979d7246f485bbc95a9759a5bff", "0e73676156bdce593059cd03785db9fd5616c1620ba87893b0f0903ef80f2248"],
234+
description = DESCRIPTIONS["AppliancesEnergy"], datadepname="appliances_energy", size = "15MB"),
212235
]
213236

214237
const DATASETS = [d.datadepname for d in DATASETCONFIGS]
@@ -258,6 +281,20 @@ function DataDeps.DataDep(d::TSClassificationDataset)
258281
)
259282
end
260283

284+
function DataDeps.DataDep(d::MonashRegressionDataset)
285+
remote_paths = [ "https://zenodo.org/record/$(d.dset_id)/files/$(d.name)_$split.ts" for split in d.splits]
286+
return DataDep(
287+
"fastai-$(d.datadepname)",
288+
"""
289+
"$(d.name)" from the Monash, UEA & UCR Time Series Extrinsic Regression Repository (http://tseregression.org)
290+
$(d.description)
291+
Download size: $(d.size)
292+
""",
293+
remote_paths,
294+
d.checksum
295+
)
296+
end
297+
261298
function initdatadeps()
262299
for d in DATASETCONFIGS
263300
DataDeps.register(DataDep(d))

0 commit comments

Comments
 (0)