Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 157 additions & 0 deletions modules/data.remote/inst/Python/CCMMF_Irrigation_API.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 6 13:59:07 2025

@author: katherineanne
"""
# %% Import modules

import requests
import numpy as np
from netCDF4 import Dataset, num2date
import matplotlib.pyplot as plt
import pandas as pd
import os
from datetime import datetime, date, timedelta
import pyarrow as pa
import pyarrow.parquet as pq
import pyarrow.dataset as ds
import CCMMF_Irrigation_DataDownload
import CCMMF_Irrigation_CalcVis
import CCMMF_Irrigation_Events


# %% Define multi use variables

# Define years to look at
years = list(range(2016, 2026))

# Define main folder
main_folder = '/projectnb/dietzelab/ccmmf/management/irrigation/'

# Define folder name for csv files
csv_folder = main_folder + 'WaterBalanceCSV/'

# Define the name of the parquet filename
pq_filename = main_folder + 'CCMMF_Irrigation_Parquet'

# %% Loading data

# Read in parquet file
# Load the full dataset
dataset = ds.dataset(pq_filename, format="parquet", partitioning = 'hive')
table = dataset.to_table()
parquet_df = table.to_pandas()
days_to_download = 0

# Group by the location column and convert to dictionary
data_dict = {location: location_df for location, location_df in parquet_df.groupby("location")}

# %% Check current date with most current downloaded data

# Delete the current CHIRPS file for this year
# This will ensure we read in the new data for the current date
# We only do this if the data is not up to date
cur_year = datetime.now().year
today = datetime.now().date()
chirps_filename = f'{main_folder}chirps-v2.0.{cur_year}.days_p05.nc'

if os.path.exists(chirps_filename):
with Dataset(chirps_filename, 'r') as nc:

time_var = nc.variables['time']
dates = num2date(time_var[:], units=time_var.units)
most_recent = max(dates)
most_recent_date = date(most_recent.year, most_recent.month, most_recent.day)

if most_recent_date != today:
print('Deleted')
days_to_download = (today - most_recent_date).days
os.remove(chirps_filename)

# %% Define locations

# Read in all lat lons
df_lat_lon = pd.read_csv(f'{main_folder}design_points.csv')

# Handle duplicates
df_lat_lon = df_lat_lon.drop_duplicates()

# %% Iterate through locations and download data for each

for row_number in range(35):

# Load location data
latitude = df_lat_lon['lat'].iloc[row_number]
longitude = df_lat_lon['lon'].iloc[row_number]
location = df_lat_lon['id'].iloc[row_number]

# Create CSV filename
csv_filename = f'{csv_folder}CCMMR_Water_Balance_{latitude}_{longitude}.csv'

if location in data_dict:

df = data_dict[location]

# If we have not downloaded data for today yet...
if days_to_download != 0:
# Download new data
start_date = today - timedelta(days=days_to_download)
new_df = CCMMF_Irrigation_DataDownload.new_data_entry_API(latitude, longitude,
[start_date.year, cur_year],
csv_folder, start_date, today)

# Concatenate with already saved data
old_df = data_dict[location]
df = pd.concat([new_df, old_df], ignore_index=True)
df = df.sort_values(by='time')
data_dict[location] = df

# Save data
df.to_csv(csv_filename, index=False)

# Check that all years have been read in
df['time'] = pd.to_datetime(df['time'])
df_years = df['time'].dt.year.unique().tolist()

if set(df_years) != set(years):

# Years in what years we want but not in saved data
# Does not care if there are values in saved data that are not in wanted years
not_saved_years = set(years) - set(df_years)
not_saved_years = list(not_saved_years)

# Download data and calculate for new years
new_df = CCMMF_Irrigation_DataDownload.new_data_entry_API(latitude, longitude,
not_saved_years, csv_folder)

# Concatenate with already saved data
old_df = data_dict[location]
df = pd.concat([new_df, old_df], ignore_index=True)
df = df.sort_values(by='time')
data_dict[location] = df

# Save data
df.to_csv(csv_filename, index=False)

# The location is not in the saved dictionary
else:
# Download and calculate if it doesn't exist
df = CCMMF_Irrigation_DataDownload.new_data_entry_API(latitude, longitude,
years, csv_folder)
data_dict[location] = df

# Save data
df.to_csv(csv_filename, index=False)

# %% Create Event Files

CCMMF_Irrigation_Events.file_creation(data_dict)

# %% Write to parquet

for location, loc_df in data_dict.items():
loc_df['location'] = location
table = pa.Table.from_pandas(loc_df)
pq.write_to_dataset(table, root_path = pq_filename, partition_cols = ['location', 'year'])
134 changes: 134 additions & 0 deletions modules/data.remote/inst/Python/CCMMF_Irrigation_CalcVis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 23 14:46:51 2025

@author: krein21
"""
# %% Import modules

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# %% Turn raw data into usable data

def water_balance(df_water_balance, LAT, LON):
print(f'{LAT} {LON}')

# Handle NAs
df_water_balance['et'] = df_water_balance['et'].fillna(0)
df_water_balance['precip'] = df_water_balance['precip'].fillna(0)

# Constants
WHC = 500 # units: mm
W_min = 0.15 * WHC
field_capacity = WHC/2

# Water Balance Equation
df_water_balance['W_t'] = field_capacity

for row_number in range(1,len(df_water_balance)):

# Pull all data
W_tminusone = df_water_balance['W_t'].iloc[row_number - 1]
precip = df_water_balance['precip'].iloc[row_number]
et = df_water_balance['et'].iloc[row_number]

# Calculate initial W_t
# W_t = W_t-1 + P_t - ET_t
W_t_initial = W_tminusone + precip - et

# Calculate irrigation
# Irr_t = max(Wmin - W_t, 0)
irr = max(W_min - W_t_initial, 0)

# Calculate runoff
# Qt = max(Wt - WHC, 0)
runoff = max(W_t_initial - WHC, 0)

# Calculate final W_t
# W_t = W_t-1 + P_t + Irr_t - ET_t - Q_t
W_t = W_tminusone + precip + irr - et - runoff

# Add values to dataframe
df_water_balance.loc[row_number, 'W_t'] = W_t
df_water_balance.loc[row_number, 'irr'] = irr
df_water_balance.loc[row_number, 'runoff'] = runoff

# Add year, day and week values
df_water_balance['time'] = pd.to_datetime(df_water_balance['time'])
df_water_balance['year'] = df_water_balance['time'].dt.year
df_water_balance['week'] = df_water_balance['time'].dt.isocalendar().week
df_water_balance['day_of_year'] = df_water_balance['time'].dt.dayofyear

return df_water_balance


# %% Time Series

def timeseries_graphs_API(df_water_balance, LAT, LON, YEAR):

# Slicing warning if not copied
df_water_balance = df_water_balance.copy()

# Create cumulative sum columns
df_water_balance['et_cumsum'] = df_water_balance['et'].cumsum()
df_water_balance['precip_cumsum'] = df_water_balance['precip'].cumsum()
df_water_balance['irr_cumsum'] = df_water_balance['irr'].cumsum()

# Ensure time is dates
df_water_balance['time'] = pd.to_datetime(df_water_balance['time'])

# Plot time series
plt.figure(figsize=(10, 5))
plt.plot(df_water_balance['time'], df_water_balance['et_cumsum'], linestyle = 'dotted', lw = 2.5, label = 'Evapotranspiration')
plt.plot(df_water_balance['time'], df_water_balance['precip_cumsum'], linestyle = 'dashed', lw = 2.5, label = 'Precipitation')
plt.plot(df_water_balance['time'], df_water_balance['irr_cumsum'], linestyle = 'dashdot', lw = 2.5, label = 'Irrigation')
plt.plot(df_water_balance['time'], df_water_balance['runoff'], linestyle = 'solid', lw = 2.5, label = 'Runoff')

plt.xlabel('Date')
plt.ylabel('Cumulative Sum of Evapotransipiration, \nPrecipitation, and Irrigation (mm)')
plt.suptitle('Evapotransipiration and Precipitation Time Series in Central Valley CA')
plt.title(f'(Lat: {LAT}, Lon: {LON})')
plt.legend()
plt.grid()

# Save plot
filename = f'/projectnb/dietzelab/ccmmf/management/irrigation/TimeseriesPNG/CCMMR_et_precip_irr_cumsum_{YEAR}_{LAT}_{LON}.png'
plt.savefig(filename)

plt.show()

def timeseries_graphs_GEE(df_water_balance, LAT, LON, YEAR):

# Slicing warning if not copied
df_water_balance = df_water_balance.copy()

# Create cumulative sum columns
df_water_balance['et_cumsum'] = df_water_balance['et'].cumsum()
df_water_balance['precip_cumsum'] = df_water_balance['precip'].cumsum()
df_water_balance['irr_cumsum'] = df_water_balance['irr'].cumsum()

# Ensure time is dates
df_water_balance['time'] = pd.to_datetime(df_water_balance['time'])

# Plot time series
plt.figure(figsize=(10, 5))
plt.plot(df_water_balance['time'], df_water_balance['et_cumsum'], linestyle = 'dotted', lw = 2.5, label = 'Evapotranspiration')
plt.plot(df_water_balance['time'], df_water_balance['precip_cumsum'], linestyle = 'dashed', lw = 2.5, label = 'Precipitation')
plt.plot(df_water_balance['time'], df_water_balance['irr_cumsum'], linestyle = 'dashdot', lw = 2.5, label = 'Irrigation')
plt.plot(df_water_balance['time'], df_water_balance['runoff'], linestyle = 'solid', lw = 2.5, label = 'Runoff')

plt.xlabel('Date')
plt.ylabel('Monthly Cumulative Sum of Evapotransipiration, \nPrecipitation, and Irrigation (mm)')
plt.suptitle('Evapotransipiration and Precipitation Time Series in Central Valley CA')
plt.title(f'(Lat: {LAT}, Lon: {LON})')
plt.legend()
plt.grid()

# Save plot
filename = f'/projectnb/dietzelab/ccmmf/management/irrigation/TimeseriesPNG_GEE/CCMMR_GEE_cumsum_{YEAR}_{LAT}_{LON}.png'
plt.savefig(filename)

plt.show()
Loading
Loading