energy-code-release-2020/0_make_dataset/2_construct_regression_ready_data.do at 97d4e5e796754267ea48b5df517d2436e6bfe014 · ClimateImpactLab/energy-code-release-2020 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
/*

Purpose: Master Do File for Analysis Dataset Construction
(Takes dataset from cleaned from IEA_merged_long*.dta to GMFD_*_regsort.dta)

Step 1) Construct reporting regimes and drop data according to selected coded issues
Step 2) Match product specific climate data with product
Step 3) Identify income spline knot location by constructing two income groups for each product
Step 4) Perform Final Cleaning Steps before first differenced interacted variable construction
	* Classify countries within 1 of 13 UN regions
	* Classify countries in income deciles and groups
Step 5) Construct First Differenced Interacted Variables

*/

clear all
set more off
qui ssc inst egenmore
macro drop _all
pause off
cilpath

/////////////// SET UP USER SPECIFIC PATHS //////////////////////////////////////////////////////

// path to energy-code-release repo

global root "$REPO/energy-code-release-2020"

/////////////////////////////////////////////////////////////////////////////////////////////////

******Set Script Toggles********************************************************

// What model do you want? TINV_clim or TINV_clim_EX
global model "TINV_clim"
local model $model

*************************************************************************
* Step 1) Construct FE regimes and drop data according to specification
*************************************************************************

do "$root/0_make_dataset/merged/1_issue_fix_v2.do"

//rename COMPILE -- OTHERIND and make sure only have desired flows and products for spec
// OTHERIND = TOTOTHER + TOTIND

replace flow = "OTHERIND" if flow == "COMPILE"
keep if inlist(flow, "OTHERIND")
keep if inlist(product, "other_energy", "electricity")

*************************************************************************
* Step 2) Match Product Specific Climate Data with respective product
*************************************************************************

* Reference climate data construction for information about the issues causing different climate data for different products

forval p=1/4 {
	replace temp`p'_GMFD = temp`p'_other_GMFD if inlist(product,"other_energy")
}

forval q=1/2 {
	replace precip`q'_GMFD = precip`q'_other_GMFD if product=="other_energy"
	replace polyAbove`q'_GMFD = polyAbove`q'_other_GMFD if inlist(product,"other_energy")
	replace polyBelow`q'_GMFD = polyBelow`q'_other_GMFD if inlist(product,"other_energy")
}

replace cdd20_TINV_GMFD = cdd20_other_TINV_GMFD if inlist(product,"other_energy")
replace hdd20_TINV_GMFD = hdd20_other_TINV_GMFD if inlist(product,"other_energy")


***********************************************************************************************************************
* Step 3) Identify income spline knot location by constructing two income groups for each product
***********************************************************************************************************************

//Part A) Prepare Dataset for Income group construction by ensuring only data included in regression remains in dataset

	//Keep only observations we actually have data for
	drop if load_pc == . | lgdppc_MA15 == . | temp1_GMFD == .


	// zero energy consumption for electricity or other energy for TOTOTHER and TOTIND deamed infeasible -> drop observations
	drop if load_pc == 0

	//generate reporting regimes
	egen region_i = group(country FEtag flow product)
	sort region_i year
	tset region_i year

	//Organize variables
	order country year flow product load_pc lgdppc_MA15 pop FEtag *GMFD*

//Part B) Construct Income Groups

	preserve

		duplicates drop country year, force

		// create income and climate quantiles
		qui egen gpid=xtile(lgdppc_MA15), nq(10)
		pause
		qui egen tpid=xtile(cdd20_TINV_GMFD), nq(3)
		qui egen tgpid=xtile(lgdppc_MA15), nq(3)

		**reversing the order of tpid to put hot ones on top**
		qui replace tpid = 4 - tpid

		//Generate large income groups (knot location varies by product)

		qui generate largegpid_electricity =.
		qui replace largegpid_electricity = 1 if (gpid>=1) & (gpid<=6)
		qui replace largegpid_electricity = 2 if gpid==7 | gpid==8
		qui replace largegpid_electricity = 2 if gpid==9 | gpid==10

		qui generate largegpid_other_energy =.
		qui replace largegpid_other_energy = 1 if (gpid >= 1) & (gpid <= 2)
		qui replace largegpid_other_energy = 2 if (gpid >= 3) & (gpid <= 6)
		qui replace largegpid_other_energy = 2 if (gpid >= 7) & (gpid <= 10)

		** center the year around 1971
		gen cyear = year - 1971

		** generate year variable for piecewise linear time effect interaction
		gen pyear = year - 1991 if year >= 1991
		replace pyear = 1991 - year if year < 1991

		** generate year variable for post1980 linear time effect interaction
		gen p80yr = year - 1980 if year >= 1980
		replace p80yr = 1980 - year if year < 1980

		//keep only necessary vars
		keep cdd20_TINV_GMFD hdd20_TINV_GMFD country *year p80yr lgdppc_MA15 gpid tpid tgpid large*

		// generate average variables for climate and income quantiles for plotting
		//average CDD in each cell
		qui egen avgCDD_tpid=mean(cdd20_TINV_GMFD), by(tpid)
		//average HDD in each cell
		qui egen avgHDD_tpid=mean(hdd20_TINV_GMFD), by(tpid)
		//average lgdppc in each cell
		qui egen avgInc_tgpid=mean(lgdppc_MA15), by(tgpid)
		//average lgdppc in each climate decile
		qui egen avgInc_tpid=mean(lgdppc_MA15), by(tpid)

		qui egen maxInc_gpid=max(lgdppc_MA15), by(gpid) //max lgdppc in each cell - this is needed for configs

		//max lggdppc for each large income group for each cell
		foreach var in "other_energy" "electricity" {
			qui egen maxInc_largegpid_`var'=max(lgdppc_MA15), by(largegpid_`var')
		}


		local break_data "$root/data/break_data_`model'.dta"
		save "`break_data'", replace

	restore

***********************************************************************************************************************
*Step 4) Perform Final Cleaning Steps
***********************************************************************************************************************

//Merge in income group definitions
merge m:1 country year using `break_data', nogen keep(3)
sort gpid

//Generate product specific large income groups
gen largegpid = largegpid_electricity if product == "electricity"
replace largegpid = largegpid_other_energy if product == "other_energy"
drop largegpid_electricity largegpid_other_energy

//Generate dummy variable by income decile and group
tab gpid, gen(ind)

// Ashwin: you can check how the dummies for the groups are generated here
// Group the data into 3 groups:
// group1: decile 1-6
// group2: decile 7-8
// group3: decile 9-10
gen indepic1 = 0
gen indepic2 = 0
gen indepic3 = 0
replace indepic2 = 1 if (ind7 == 1 | ind8 == 1)
replace indepic3 = 1 if (ind9 == 1 | ind10 == 1)
replace indepic1 = 1 if (indepic2 == 0 & indepic3 == 0)

tab largegpid, gen(largeind)

*********************************************************
// generate dummy variable with value = 1 indicating
// a country being in high income group for all years
// for coldsidep80highinc regression

// generate indicator for electricity observations with high income
gen largeind_electricity = 1 if largeind2 == 1 & product == "electricity"
replace largeind_electricity = 0 if largeind_electricity == .
// generate the number of observations and high income nobs for each country
bysort country: egen nobs_electricity = total(product == "electricity")
bysort country: egen nobs_largeind = total(largeind_electricity == 1)
bysort country: egen first_year = min(year)
bysort country: egen last_year = max(year)

// generate indicator for a country having as many high inc observations
// as electricity observations, while also spanning the whole period
gen largeind_allyears = (nobs_electricity == nobs_largeind & first_year == 1971 & last_year == 2010 & product == "electricity")
gen largeind_notallyears = 1 - largeind_allyears

*********************************************************

//Generate sector and fuel dummies

* 1 = electricity, 2 = other_energy
tab product, gen(indp)
egen product_i = group(product)

* only 1 sector, so this step exists due to path dependency
tab flow, gen(indf)
egen flow_i = group(flow)


* generate time period dummies for interaction
** for piecewise linear interaction
gen indt = 1 if year >= 1991
replace indt = 0 if year < 1991

** for post 1980 linear interaction
gen indp80 = 1 if year >= 1980
replace indp80 = 0 if year < 1980

** for decades interaction
gen indd = 0
replace indd = 1 if year >= 1980
replace indd = 2 if year >= 1990
replace indd = 3 if year >= 2000


// Classify world into 13 regions based on UN World Regions Classifications (for fixed effect... reference Temperature Response of Energy Consumption Section )

**Clean the region data**
preserve
insheet using "$root/data/UNSD — Methodology.csv", comma names clear
generate subregionid=.
replace subregionid=1 if regionname=="Oceania"
replace subregionid=2 if subregionname=="Northern America"
replace subregionid=3 if subregionname=="Northern Europe"
replace subregionid=4 if subregionname=="Southern Europe"
replace subregionid=5 if subregionname=="Western Europe"
replace subregionid=6 if subregionname=="Eastern Europe" | subregionname=="Central Asia"
replace subregionid=7 if subregionname=="Eastern Asia"
replace subregionid=8 if subregionname=="South-eastern Asia"
replace subregionid=9 if intermediateregionname=="Caribbean" | intermediateregionname=="Central America"
replace subregionid=10 if intermediateregionname=="South America"
replace subregionid=11 if subregionname=="Sub-Saharan Africa"
replace subregionid=12 if subregionname=="Northern Africa" | subregionname=="Western Asia"
replace subregionid=13 if subregionname=="Southern Asia"
drop if subregionid==.
keep isoalpha3code subregionid subregionname
replace subregionname="Oceania" if subregionid==1
replace subregionname="Caribbean and Central America" if subregionid==9
replace subregionname="South America" if subregionid==10
replace subregionname="Central Asia and Eastern Europe" if subregionid==6
replace subregionname="Western Asia and Northern Africa" if subregionid==12
rename isoalpha3code country
tempfile subregion
save `subregion', replace
restore

merge m:1 country using `subregion'
keep if _merge!=2
drop _merge

replace subregionid = 6 if country=="FSUND"
replace subregionid = 4 if country=="YUGOND"
replace subregionid = 7 if country=="TWN"
replace subregionid = 4 if country=="XKO"
replace subregionname = "Central Asia and Eastern Europe" if country == "FSUND"
replace subregionname = "Southern Europe" if country == "YUGOND"
replace subregionname = "Eastern Asia" if country == "TWN"
replace subregionname = "Southern Europe" if country=="XKO"

***********************************************************************************************************************
* Step 5) Construct First Differenced Interacted Variables
***********************************************************************************************************************
do "$root/0_make_dataset/merged/2_construct_FD_interacted_variables.do"
save "$root/data/GMFD_`model'_regsort.dta", replace