Skip to content

Commit 9a5e33a

Browse files
authored
Merge pull request #3517 from katherineanne21/develop
Python Irrigation files
2 parents d43f6c7 + e24b994 commit 9a5e33a

10 files changed

+1123
-0
lines changed
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Thu Mar 6 13:59:07 2025
5+
6+
@author: katherineanne
7+
"""
8+
# %% Import modules
9+
10+
import requests
11+
import numpy as np
12+
from netCDF4 import Dataset, num2date
13+
import matplotlib.pyplot as plt
14+
import pandas as pd
15+
import os
16+
from datetime import datetime, date, timedelta
17+
import pyarrow as pa
18+
import pyarrow.parquet as pq
19+
import pyarrow.dataset as ds
20+
import CCMMF_Irrigation_DataDownload
21+
import CCMMF_Irrigation_CalcVis
22+
import CCMMF_Irrigation_Events
23+
24+
25+
# %% Define multi use variables
26+
27+
# Define years to look at
28+
years = list(range(2016, 2026))
29+
30+
# Define main folder
31+
main_folder = '/projectnb/dietzelab/ccmmf/management/irrigation/'
32+
33+
# Define folder name for csv files
34+
csv_folder = main_folder + 'WaterBalanceCSV/'
35+
36+
# Define the name of the parquet filename
37+
pq_filename = main_folder + 'CCMMF_Irrigation_Parquet'
38+
39+
# %% Loading data
40+
41+
# Read in parquet file
42+
# Load the full dataset
43+
dataset = ds.dataset(pq_filename, format="parquet", partitioning = 'hive')
44+
table = dataset.to_table()
45+
parquet_df = table.to_pandas()
46+
days_to_download = 0
47+
48+
# Group by the location column and convert to dictionary
49+
data_dict = {location: location_df for location, location_df in parquet_df.groupby("location")}
50+
51+
# %% Check current date with most current downloaded data
52+
53+
# Delete the current CHIRPS file for this year
54+
# This will ensure we read in the new data for the current date
55+
# We only do this if the data is not up to date
56+
cur_year = datetime.now().year
57+
today = datetime.now().date()
58+
chirps_filename = f'{main_folder}chirps-v2.0.{cur_year}.days_p05.nc'
59+
60+
if os.path.exists(chirps_filename):
61+
with Dataset(chirps_filename, 'r') as nc:
62+
63+
time_var = nc.variables['time']
64+
dates = num2date(time_var[:], units=time_var.units)
65+
most_recent = max(dates)
66+
most_recent_date = date(most_recent.year, most_recent.month, most_recent.day)
67+
68+
if most_recent_date != today:
69+
print('Deleted')
70+
days_to_download = (today - most_recent_date).days
71+
os.remove(chirps_filename)
72+
73+
# %% Define locations
74+
75+
# Read in all lat lons
76+
df_lat_lon = pd.read_csv(f'{main_folder}design_points.csv')
77+
78+
# Handle duplicates
79+
df_lat_lon = df_lat_lon.drop_duplicates()
80+
81+
# %% Iterate through locations and download data for each
82+
83+
for row_number in range(35):
84+
85+
# Load location data
86+
latitude = df_lat_lon['lat'].iloc[row_number]
87+
longitude = df_lat_lon['lon'].iloc[row_number]
88+
location = df_lat_lon['id'].iloc[row_number]
89+
90+
# Create CSV filename
91+
csv_filename = f'{csv_folder}CCMMR_Water_Balance_{latitude}_{longitude}.csv'
92+
93+
if location in data_dict:
94+
95+
df = data_dict[location]
96+
97+
# If we have not downloaded data for today yet...
98+
if days_to_download != 0:
99+
# Download new data
100+
start_date = today - timedelta(days=days_to_download)
101+
new_df = CCMMF_Irrigation_DataDownload.new_data_entry_API(latitude, longitude,
102+
[start_date.year, cur_year],
103+
csv_folder, start_date, today)
104+
105+
# Concatenate with already saved data
106+
old_df = data_dict[location]
107+
df = pd.concat([new_df, old_df], ignore_index=True)
108+
df = df.sort_values(by='time')
109+
data_dict[location] = df
110+
111+
# Save data
112+
df.to_csv(csv_filename, index=False)
113+
114+
# Check that all years have been read in
115+
df['time'] = pd.to_datetime(df['time'])
116+
df_years = df['time'].dt.year.unique().tolist()
117+
118+
if set(df_years) != set(years):
119+
120+
# Years in what years we want but not in saved data
121+
# Does not care if there are values in saved data that are not in wanted years
122+
not_saved_years = set(years) - set(df_years)
123+
not_saved_years = list(not_saved_years)
124+
125+
# Download data and calculate for new years
126+
new_df = CCMMF_Irrigation_DataDownload.new_data_entry_API(latitude, longitude,
127+
not_saved_years, csv_folder)
128+
129+
# Concatenate with already saved data
130+
old_df = data_dict[location]
131+
df = pd.concat([new_df, old_df], ignore_index=True)
132+
df = df.sort_values(by='time')
133+
data_dict[location] = df
134+
135+
# Save data
136+
df.to_csv(csv_filename, index=False)
137+
138+
# The location is not in the saved dictionary
139+
else:
140+
# Download and calculate if it doesn't exist
141+
df = CCMMF_Irrigation_DataDownload.new_data_entry_API(latitude, longitude,
142+
years, csv_folder)
143+
data_dict[location] = df
144+
145+
# Save data
146+
df.to_csv(csv_filename, index=False)
147+
148+
# %% Create Event Files
149+
150+
CCMMF_Irrigation_Events.file_creation(data_dict)
151+
152+
# %% Write to parquet
153+
154+
for location, loc_df in data_dict.items():
155+
loc_df['location'] = location
156+
table = pa.Table.from_pandas(loc_df)
157+
pq.write_to_dataset(table, root_path = pq_filename, partition_cols = ['location', 'year'])
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Wed Apr 23 14:46:51 2025
5+
6+
@author: krein21
7+
"""
8+
# %% Import modules
9+
10+
import numpy as np
11+
import matplotlib.pyplot as plt
12+
import pandas as pd
13+
14+
# %% Turn raw data into usable data
15+
16+
def water_balance(df_water_balance, LAT, LON):
17+
print(f'{LAT} {LON}')
18+
19+
# Handle NAs
20+
df_water_balance['et'] = df_water_balance['et'].fillna(0)
21+
df_water_balance['precip'] = df_water_balance['precip'].fillna(0)
22+
23+
# Constants
24+
WHC = 500 # units: mm
25+
W_min = 0.15 * WHC
26+
field_capacity = WHC/2
27+
28+
# Water Balance Equation
29+
df_water_balance['W_t'] = field_capacity
30+
31+
for row_number in range(1,len(df_water_balance)):
32+
33+
# Pull all data
34+
W_tminusone = df_water_balance['W_t'].iloc[row_number - 1]
35+
precip = df_water_balance['precip'].iloc[row_number]
36+
et = df_water_balance['et'].iloc[row_number]
37+
38+
# Calculate initial W_t
39+
# W_t = W_t-1 + P_t - ET_t
40+
W_t_initial = W_tminusone + precip - et
41+
42+
# Calculate irrigation
43+
# Irr_t = max(Wmin - W_t, 0)
44+
irr = max(W_min - W_t_initial, 0)
45+
46+
# Calculate runoff
47+
# Qt = max(Wt - WHC, 0)
48+
runoff = max(W_t_initial - WHC, 0)
49+
50+
# Calculate final W_t
51+
# W_t = W_t-1 + P_t + Irr_t - ET_t - Q_t
52+
W_t = W_tminusone + precip + irr - et - runoff
53+
54+
# Add values to dataframe
55+
df_water_balance.loc[row_number, 'W_t'] = W_t
56+
df_water_balance.loc[row_number, 'irr'] = irr
57+
df_water_balance.loc[row_number, 'runoff'] = runoff
58+
59+
# Add year, day and week values
60+
df_water_balance['time'] = pd.to_datetime(df_water_balance['time'])
61+
df_water_balance['year'] = df_water_balance['time'].dt.year
62+
df_water_balance['week'] = df_water_balance['time'].dt.isocalendar().week
63+
df_water_balance['day_of_year'] = df_water_balance['time'].dt.dayofyear
64+
65+
return df_water_balance
66+
67+
68+
# %% Time Series
69+
70+
def timeseries_graphs_API(df_water_balance, LAT, LON, YEAR):
71+
72+
# Slicing warning if not copied
73+
df_water_balance = df_water_balance.copy()
74+
75+
# Create cumulative sum columns
76+
df_water_balance['et_cumsum'] = df_water_balance['et'].cumsum()
77+
df_water_balance['precip_cumsum'] = df_water_balance['precip'].cumsum()
78+
df_water_balance['irr_cumsum'] = df_water_balance['irr'].cumsum()
79+
80+
# Ensure time is dates
81+
df_water_balance['time'] = pd.to_datetime(df_water_balance['time'])
82+
83+
# Plot time series
84+
plt.figure(figsize=(10, 5))
85+
plt.plot(df_water_balance['time'], df_water_balance['et_cumsum'], linestyle = 'dotted', lw = 2.5, label = 'Evapotranspiration')
86+
plt.plot(df_water_balance['time'], df_water_balance['precip_cumsum'], linestyle = 'dashed', lw = 2.5, label = 'Precipitation')
87+
plt.plot(df_water_balance['time'], df_water_balance['irr_cumsum'], linestyle = 'dashdot', lw = 2.5, label = 'Irrigation')
88+
plt.plot(df_water_balance['time'], df_water_balance['runoff'], linestyle = 'solid', lw = 2.5, label = 'Runoff')
89+
90+
plt.xlabel('Date')
91+
plt.ylabel('Cumulative Sum of Evapotransipiration, \nPrecipitation, and Irrigation (mm)')
92+
plt.suptitle('Evapotransipiration and Precipitation Time Series in Central Valley CA')
93+
plt.title(f'(Lat: {LAT}, Lon: {LON})')
94+
plt.legend()
95+
plt.grid()
96+
97+
# Save plot
98+
filename = f'/projectnb/dietzelab/ccmmf/management/irrigation/TimeseriesPNG/CCMMR_et_precip_irr_cumsum_{YEAR}_{LAT}_{LON}.png'
99+
plt.savefig(filename)
100+
101+
plt.show()
102+
103+
def timeseries_graphs_GEE(df_water_balance, LAT, LON, YEAR):
104+
105+
# Slicing warning if not copied
106+
df_water_balance = df_water_balance.copy()
107+
108+
# Create cumulative sum columns
109+
df_water_balance['et_cumsum'] = df_water_balance['et'].cumsum()
110+
df_water_balance['precip_cumsum'] = df_water_balance['precip'].cumsum()
111+
df_water_balance['irr_cumsum'] = df_water_balance['irr'].cumsum()
112+
113+
# Ensure time is dates
114+
df_water_balance['time'] = pd.to_datetime(df_water_balance['time'])
115+
116+
# Plot time series
117+
plt.figure(figsize=(10, 5))
118+
plt.plot(df_water_balance['time'], df_water_balance['et_cumsum'], linestyle = 'dotted', lw = 2.5, label = 'Evapotranspiration')
119+
plt.plot(df_water_balance['time'], df_water_balance['precip_cumsum'], linestyle = 'dashed', lw = 2.5, label = 'Precipitation')
120+
plt.plot(df_water_balance['time'], df_water_balance['irr_cumsum'], linestyle = 'dashdot', lw = 2.5, label = 'Irrigation')
121+
plt.plot(df_water_balance['time'], df_water_balance['runoff'], linestyle = 'solid', lw = 2.5, label = 'Runoff')
122+
123+
plt.xlabel('Date')
124+
plt.ylabel('Monthly Cumulative Sum of Evapotransipiration, \nPrecipitation, and Irrigation (mm)')
125+
plt.suptitle('Evapotransipiration and Precipitation Time Series in Central Valley CA')
126+
plt.title(f'(Lat: {LAT}, Lon: {LON})')
127+
plt.legend()
128+
plt.grid()
129+
130+
# Save plot
131+
filename = f'/projectnb/dietzelab/ccmmf/management/irrigation/TimeseriesPNG_GEE/CCMMR_GEE_cumsum_{YEAR}_{LAT}_{LON}.png'
132+
plt.savefig(filename)
133+
134+
plt.show()

0 commit comments

Comments
 (0)