Skip to content

Commit 50c3684

Browse files
authored
climate ver. 1.2.9
* fix: meteo_imgw_daily * fix: imgw changes * fix: meteo hourly and monthly * fix: handle status in meteo_imgw * fix: meteo_imgw labels * fix: unit tests and adjust imgw changes * climate rc 1.2.9 * simplify single-station test * fix: unit test
1 parent 0277243 commit 50c3684

14 files changed

+283
-211
lines changed

.github/workflows/test-coverage.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ jobs:
1515
runs-on: ubuntu-latest
1616
env:
1717
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18+
NOT_CRAN: true # to enforce also tests that are skipped on cran and reduce code coverage
1819

1920
steps:
2021
- uses: actions/checkout@v4

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: climate
22
Title: Interface to Download Meteorological (and Hydrological) Datasets
3-
Version: 1.2.8
3+
Version: 1.2.9
44
Authors@R: c(person(given = "Bartosz",
55
family = "Czernecki",
66
role = c("aut", "cre"),

NEWS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
# climate 1.2.9
2+
3+
* fixes for corrupted header files in `meteo_imgw_` family of functions due to changes in the IMGW-PIB repository
4+
5+
16
# climate 1.2.8
27

38
* speeding up selective download for given station names in `meteo_imgw_*` and `hydro_imgw_daily()` functions that simultaneously reduce use of memory

R/clean_metadata_meteo.R

Lines changed: 6 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -16,34 +16,12 @@ clean_metadata_meteo = function(address, rank = "synop", interval = "hourly") {
1616
test_url(link = address, output = temp)
1717
a = read.csv(temp, header = FALSE, stringsAsFactors = FALSE,
1818
fileEncoding = "CP1250")$V1
19-
a = gsub(a, pattern = "\\?", replacement = "")
20-
a = stringi::stri_trans_general(a, 'LATIN-ASCII')
2119

22-
# additional workarounds for mac os but not only...
23-
a = gsub(x = a, pattern = "'", replacement = "")
24-
a = gsub(x = a, pattern = "\\^0", replacement = "")
25-
a = data.frame(V1 = a[nchar(a) > 3], stringsAsFactors = FALSE)
26-
length_char = max(nchar(a$V1), na.rm = TRUE)
27-
28-
if (rank == "precip" && interval == "hourly") length_char = 40 # exception for precip / hourly
29-
if (rank == "precip" && interval == "daily") length_char = 38 # exception for precip / daily
30-
if (rank == "synop" && interval == "hourly") length_char = 60 # exception for synop / hourly
31-
if (rank == "climate" && interval == "monthly") length_char = 52 # exception for climate / monthly
32-
33-
field = substr(a$V1, length_char - 3, length_char)
34-
35-
if (rank == "synop" && interval == "monthly") {
36-
length_char = as.numeric(names(sort(table(nchar(a$V1)), decreasing = TRUE)[1])) + 2
37-
field = substr(a$V1, length_char - 3, length_char + 2)
38-
}
39-
40-
a$field1 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[1]))))
41-
a$field2 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[2]))))
42-
43-
a$V1 = trimws(substr(a$V1, 1, nchar(a$V1) - 3))
44-
a$V1 = gsub(x = a$V1, pattern = "* ", "")
45-
46-
a = a[!(is.na(a$field1) & is.na(a$field2)), ] # remove info about status
47-
colnames(a)[1] = "parameters"
20+
inds = grepl("^[A-Z]{2}.{5}", a)
21+
22+
code = trimws(substr(a, 1, 7))[inds]
23+
name = trimws(substr(a, 8, nchar(a)))[inds]
24+
a = data.frame(parameters = code, label = name)
25+
a$label = stringi::stri_trans_general(a$label, 'LATIN-ASCII')
4826
return(a)
4927
}

R/meteo_imgw_daily.R

Lines changed: 66 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -148,42 +148,45 @@ meteo_imgw_daily_bp = function(rank,
148148
file1 = paste(temp2, dir(temp2), sep = "/")[1]
149149
data1 = imgw_read(translit, file1)
150150
colnames(data1) = meta[[1]]$parameters
151-
data1$`Nazwa stacji` = trimws(data1$`Nazwa stacji`)
151+
for (labs in seq_along(meta[[1]]$parameters)) {
152+
attr(data1[[labs]], "label") = meta[[1]]$label[[labs]]
153+
}
154+
data1$POST = trimws(data1$POST)
152155

153156
file2 = paste(temp2, dir(temp2), sep = "/")[2]
154157
if (file.exists(file2)) {
155158
data2 = imgw_read(translit, file2)
156159
colnames(data2) = meta[[2]]$parameters
157-
data2$`Nazwa stacji` = trimws(data2$`Nazwa stacji`)
160+
for (labs in seq_along(meta[[2]]$parameters)) {
161+
attr(data2[[labs]], "label") = meta[[2]]$label[[labs]]
162+
}
163+
data2$POST = trimws(data2$POST)
158164
} else {
159165
data2 = head(data1, 0)[, 1:min(5, ncol(data1))]
160-
data2$`Nazwa stacji` = trimws(data2$`Nazwa stacji`)
166+
data2$POST = trimws(data2$POST)
161167
}
162168

163169
unlink(c(temp, temp2))
164170

165-
# remove statuses if not needed:
166-
if (status == FALSE) {
167-
data1[grep("^Status", colnames(data1))] = NULL
168-
data2[grep("^Status", colnames(data2))] = NULL
169-
}
170-
171-
ttt = merge(data1,
171+
data.table::setDT(data1)
172+
data.table::setDT(data2)
173+
174+
ttt = merge(
175+
data1,
172176
data2,
173-
by = c("Kod stacji", "Rok", "Miesiac", "Dzien"),
177+
by = c("NSP", "ROK", "MC", "DZ"),
174178
all.x = TRUE
175179
)
176-
177-
ttt = ttt[order(ttt$`Nazwa stacji.x`, ttt$Rok, ttt$Miesiac, ttt$Dzien), ]
180+
181+
data.table::setorder(ttt, POST.x, ROK, MC, DZ)
178182

179183
if (!is.null(station)) {
180-
all_data[[length(all_data) + 1]] = ttt[ttt$`Nazwa stacji.x` %in% station, ]
184+
all_data[[length(all_data) + 1]] = ttt[ttt$POST.x %in% station, ]
181185
} else {
182186
all_data[[length(all_data) + 1]] = ttt
183187
}
184188
} # end of looping for zip archives
185189
} # end of if statement for SYNOP stations
186-
187190
######################
188191
###### KLIMAT: #######
189192
if (rank == "climate") {
@@ -220,7 +223,10 @@ meteo_imgw_daily_bp = function(rank,
220223
if (!is.null(csv_data)) {
221224
csv_data = convert_encoding(csv_data)
222225
colnames(csv_data) = meta[[1]]$parameters
223-
csv_data$`Nazwa stacji` = trimws(csv_data$`Nazwa stacji`)
226+
for (labs in seq_along(meta[[1]]$parameters)) {
227+
attr(csv_data[[labs]], "label") = meta[[1]]$label[[labs]]
228+
}
229+
csv_data$POST = trimws(csv_data$POST)
224230
}
225231
return(csv_data)
226232
}
@@ -229,8 +235,8 @@ meteo_imgw_daily_bp = function(rank,
229235
if (is.data.frame(d)) {
230236
data1 = d
231237
colnames(data1) = meta[[1]]$parameters
232-
if (status == FALSE) {
233-
data1[grep("^Status", colnames(data1))] = NULL
238+
for (labs in seq_along(meta[[1]]$parameters)) {
239+
attr(data1[[labs]], "label") = meta[[1]]$label[[labs]]
234240
}
235241
}
236242

@@ -239,27 +245,25 @@ meteo_imgw_daily_bp = function(rank,
239245
file1 = paste(temp2, dir(temp2), sep = "/")[1]
240246
data1 = imgw_read(translit, file1)
241247
colnames(data1) = meta[[1]]$parameters
248+
for (labs in seq_along(meta[[1]]$parameters)) {
249+
attr(data1[[labs]], "label") = meta[[1]]$label[[labs]]
250+
}
242251

243252
file2 = paste(temp2, dir(temp2), sep = "/")[2]
244253
if (file.exists(file2)) {
245254
data2 = imgw_read(translit, file2)
246255
colnames(data2) = meta[[2]]$parameters
247-
}
248-
}
249-
250-
# remove statuses
251-
if (status == FALSE) {
252-
data1[grep("^Status", colnames(data1))] = NULL
253-
if (file.exists(file2)) {
254-
data2[grep("^Status", colnames(data2))] = NULL
256+
for (labs in seq_along(meta[[2]]$parameters)) {
257+
attr(data2[[labs]], "label") = meta[[2]]$label[[labs]]
258+
}
255259
}
256260
}
257261

258262
unlink(c(temp, temp2))
259263
if (file.exists(file2)) {
260264
all_data[[length(all_data) + 1]] = merge(data1,
261265
data2,
262-
by = c("Kod stacji", "Rok", "Miesiac", "Dzien"),
266+
by = c("NSP", "ROK", "MC", "DZ"),
263267
all.x = TRUE
264268
)
265269
} else {
@@ -304,16 +308,19 @@ meteo_imgw_daily_bp = function(rank,
304308
csv_data = read.table(data, header = FALSE, stringsAsFactors = FALSE, sep = ",", encoding = "CP1250")
305309
csv_data = convert_encoding(csv_data)
306310
colnames(csv_data) = meta[[1]]$parameters
307-
csv_data$`Nazwa stacji` = trimws(csv_data$`Nazwa stacji`)
311+
for (labs in seq_along(meta[[1]]$parameters)) {
312+
attr(csv_data[[labs]], "label") = meta[[1]]$label[[labs]]
313+
}
314+
csv_data$POST = trimws(csv_data$POST)
308315
return(csv_data)
309316
}
310317
)
311318

312319
if (is.data.frame(d)) {
313320
data1 = d
314321
colnames(data1) = meta[[1]]$parameters
315-
if (status == FALSE) {
316-
data1[grep("^Status", colnames(data1))] = NULL
322+
for (labs in seq_along(meta[[1]]$parameters)) {
323+
attr(data1[[labs]], "label") = meta[[1]]$label[[labs]]
317324
}
318325
}
319326

@@ -322,74 +329,73 @@ meteo_imgw_daily_bp = function(rank,
322329
file1 = paste(temp2, dir(temp2), sep = "/")[1]
323330
data1 = imgw_read(translit, file1)
324331
colnames(data1) = meta[[1]]$parameters
325-
# remove status
326-
if (status == FALSE) {
327-
data1[grep("^Status", colnames(data1))] = NULL
328-
}
329332
} # end of corrupted zips
330333
unlink(c(temp, temp2))
331334
all_data[[length(all_data) + 1]] = data1
332335
} # end of loop for zip files
333336
} # end of if statement for climate stations
334337
} # end of looping over catalogs
335338

336-
all_data = as.data.frame(data.table::rbindlist(all_data, fill = TRUE))
339+
all_data = data.table::rbindlist(all_data, fill = TRUE)
337340

338341
# fix order of columns if needed and entries in stations' names if more than 1 available:
339-
col_inds = grep(pattern = "Nazwa stacji", colnames(all_data), value = TRUE)
342+
col_inds = grep(pattern = "POST", colnames(all_data), value = TRUE)
343+
340344
if (length(col_inds) > 1) {
341-
all_data$`Nazwa stacji` = apply(all_data[, col_inds], 1, function(x) na.omit(unique(x))[1])
342-
all_data$`Nazwa stacji.x` = NULL
343-
all_data$`Nazwa stacji.y` = NULL
344-
if (colnames(all_data)[ncol(all_data)] == "Nazwa stacji") { # re-order columns if needed
345-
all_data = all_data[, c(1, ncol(all_data), 2:(ncol(all_data) - 1))]
345+
all_data$POST = apply(
346+
all_data[, col_inds, with = FALSE],
347+
1,
348+
function(x) na.omit(unique(x))[1]
349+
)
350+
all_data$POST.x = NULL
351+
all_data$POST.y = NULL
352+
if (colnames(all_data)[ncol(all_data)] == "POST") { # re-order columns if needed
353+
data.table::setcolorder(all_data, c(1, ncol(all_data), 2:(ncol(all_data) - 1)))
346354
}
347355
}
348356

349357
if (coords) {
350-
all_data = merge(climate::imgw_meteo_stations[, 1:3],
358+
all_data = merge(setDT(climate::imgw_meteo_stations[, 1:3]),
351359
all_data,
352360
by.x = "id",
353-
by.y = "Kod stacji",
361+
by.y = "NSP",
354362
all.y = TRUE
355363
)
356364
}
357365

358-
# add station rank:
359-
rank_code = switch(rank,
360-
synop = "SYNOPTYCZNA",
361-
climate = "KLIMATYCZNA",
362-
precip = "OPADOWA"
363-
)
364-
all_data = cbind(data.frame(rank_code = rank_code), all_data)
365-
366-
all_data = all_data[all_data$Rok %in% year, ] # clip only to selected years
366+
all_data = all_data[all_data$ROK %in% year, ] # clip only to selected years
367367

368368
# station selection and names cleaning:
369369
if (!is.null(station)) {
370370
if (is.character(station)) {
371-
inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$`Nazwa stacji`))))))
371+
inds = unique(as.numeric(unlist(sapply(station, function(x) grep(pattern = x, x = trimws(all_data$POST))))))
372372
if (any(is.na(inds)) || length(inds) == 0) {
373373
env$logs = c(
374374
env$logs,
375375
paste("At least one of selected station(s) is not available in the database. Returning all available stations")
376376
)
377377
} else {
378-
all_data = all_data[inds, ]
378+
all_data = all_data[inds]
379379
}
380380
}
381381
}
382-
all_data$`Nazwa stacji` = trimws(all_data$`Nazwa stacji`)
383-
382+
all_data$POST = trimws(all_data$POST)
383+
384384
# sort output
385-
if (sum(grepl(x = colnames(all_data), pattern = "Kod stacji"))) {
386-
all_data = all_data[order(all_data$`Kod stacji`, all_data$Rok, all_data$Miesiac, all_data$Dzien), ]
385+
if (sum(grepl(x = colnames(all_data), pattern = "NSP"))) {
386+
all_data = all_data[order(all_data$NSP, all_data$ROK, all_data$MC, all_data$DZ), ]
387387
} else {
388-
all_data = all_data[order(all_data$id, all_data$Rok, all_data$Miesiac, all_data$Dzien), ]
388+
all_data = all_data[order(all_data$id, all_data$ROK, all_data$MC, all_data$DZ), ]
389+
}
390+
391+
# remove status:
392+
if (status == FALSE) {
393+
all_data = remove_status(all_data)
389394
}
390395

391396
# remove duplicates and shorten colnames
392-
all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...)
397+
# turned off temporarily:
398+
# all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...)
393399
rownames(all_data) = NULL
394400

395401
# check if there any messages gathered in env$logs and if it is not empty then print them:

0 commit comments

Comments
 (0)