Skip to content

Commit 443c2cf

Browse files
authored
Merge pull request #2 from smartEnergyResearchLab/feature/first_changes
Changed filenames and locations to work with the new (August) data
2 parents a46f5b4 + d6239bf commit 443c2cf

File tree

1 file changed

+13
-21
lines changed

1 file changed

+13
-21
lines changed

scripts/SERL_smart_meter_data_preparation_v2020_08.R

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,40 +12,32 @@ library(data.table)
1212
library(lubridate)
1313
library(stringr)
1414

15-
source("S:/ENERGINST_EaB_Project_17_SMRP/Analysis/July_data_release/functions.R")
16-
1715
# Define Input Variables --------------------------------------------------
1816
save_data <- TRUE
1917

20-
hh_file_location <- "S:/ENERGINST_EaB_Project_17_SMRP/Data/Researcher data/SM data/Half-hourly Readings Aug2018-May2020/"
21-
daily_file_location <- "S:/ENERGINST_EaB_Project_17_SMRP/Data/Researcher data/SM data/Daily Readings Aug2018-May2020/"
18+
hh_file_location <- "S:/ENERGINST_EaB_Project_17_SMRP/Data/Researcher data/Observatory2020_08/Original/"
19+
daily_file_location <- "S:/ENERGINST_EaB_Project_17_SMRP/Data/Researcher data/Observatory2020_08/Original/"
2220

2321
# Names for saving
24-
daily_file_format_removed <- "SERL_smart_meter_daily_v2020_07"
25-
hh_file_format_removed <- "SERL_smart_meter_hh_v2020_07"
26-
rt_file_format_removed <- "SERL_smart_meter_rt_summary_v2020_07"
27-
pp_summary_file_format_removed <- "SERL_participant_summary_v2020_07"
28-
saving_location <- "S:/ENERGINST_EaB_Project_17_SMRP/Data/Researcher data/JulyStaticDataset/"
22+
daily_file_format_removed <- "SERL_smart_meter_daily_v2020_08"
23+
hh_file_format_removed <- "SERL_smart_meter_hh_v2020_08"
24+
rt_file_format_removed <- "SERL_smart_meter_rt_summary_v2020_08"
25+
pp_summary_file_format_removed <- "SERL_participant_summary_v2020_08"
26+
saving_location <- "S:/ENERGINST_EaB_Project_17_SMRP/Data/Researcher data/Observatory2020_08/"
2927

3028
## Smart meter data
3129

3230
### Half-hourly
33-
raw_hh <- c("Half-Hourly Readings Aug2018-Feb2019.csv",
34-
"Half-Hourly Readings Mar2019-Jul2019.csv",
35-
"Half-Hourly Readings Aug2019-Dec2019.csv",
36-
"Half-Hourly Readings Jan2020-May2020.csv")
31+
raw_hh <- "Half-Hourly Readings Aug2018-Jul2020.csv"
3732

3833
### Daily
39-
raw_daily <- c("Daily Readings Aug-Dec 2018.csv",
40-
"Daily Readings Jan-Jun 2019.csv",
41-
"Daily Readings Jul-Dec 2019.csv",
42-
"Daily Readings Jan-May 2020.csv")
34+
raw_daily <- "Daily Readings Aug2018-Jul2020.csv"
4335

4436

4537
## Auxilliary files
46-
theoretical_dates_file <- "S:/ENERGINST_EaB_Project_17_SMRP/Data/Smart Meter Data Quality Reports/theoretical_table_2020-07-26.csv"
38+
theoretical_dates_file <- "S:/ENERGINST_EaB_Project_17_SMRP/Data/Researcher data/Observatory2020_08/actualStart_20_08_10.csv"
4739

48-
collection_end_date <- ymd("2020-05-31")
40+
collection_end_date <- ymd("2020-07-31")
4941

5042
survey_file <- "S:/ENERGINST_EaB_Project_17_SMRP/Data/Pilot_survey/pilot_survey_data_1675.RData"
5143
participant_details_file <- "S:/ENERGINST_EaB_Project_17_SMRP/Data/Researcher data/SERL Participants Data 2020-07-09.csv"
@@ -479,7 +471,7 @@ determine.theoretical.read.dates <- function(theoretical_dates_file,
479471
setkey(consent_end_dates, PUPRN)
480472
theoretical_dates <- decommission_dates[theoretical_dates]
481473
theoretical_dates <- consent_end_dates[theoretical_dates]
482-
theoretical_dates[, theoreticalStart := ymd(actualStartDate)]
474+
theoretical_dates[, theoreticalStart := ymd(start)]
483475
theoretical_dates[, theoreticalEnd := pmin(WoC_CoT_effective_date - 1,
484476
dateDecommissioned,
485477
collection_end_date,
@@ -748,7 +740,7 @@ get.valid.read.dates <- function(hh, daily, readDates, error_codes_hh, error_cod
748740

749741
handle.duplicate.listings <- function(t_d = theoretical_dates) {
750742
# only keep earliest start date. Setkey sorts from earliest to latest
751-
setkey(t_d, actualStartDate)
743+
setkey(t_d, start)
752744
# Unique keeps the first entry which we sorted to be the earliest date
753745
t_d_unique <- unique(t_d, by = c("PUPRN", "deviceType", "readType"))
754746
return(t_d_unique)

0 commit comments

Comments
 (0)