Skip to content

Commit 8beb7c4

Browse files
Addressing Monte-Carlo suggestion: INPUT variograms NREL#197. Variograms now created. Also addressed issue where multiple processes were writing to the output file at the same time and corrupting parts of it using File Locking. The result is slower, but the results are better.
1 parent 2cab2d0 commit 8beb7c4

File tree

2 files changed

+66
-18
lines changed

2 files changed

+66
-18
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ requirements.txt
1111
*~
1212
*.bak
1313
.DS_Store
14+
.lock
1415

1516
# C extensions
1617
*.so

src/geophires_monte_carlo/MC_GeoPHIRES3.py

Lines changed: 65 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import matplotlib.pyplot as plt
2323
import numpy as np
2424
import pandas as pd
25+
from pylocker import Locker
2526
from rich.console import Console
2627
from rich.table import Table
2728

@@ -291,8 +292,13 @@ def get_output(output):
291292
result_s = result_s.strip(' ').strip(',') # get rid of last space and comma
292293
result_s += '\n'
293294

294-
with open(output_file, 'a') as f:
295-
f.write(result_s)
295+
# write the result to a file in a concurrent thread safe way
296+
lock_pass = str(uuid.uuid1())
297+
FL = Locker(filePath=output_file, lockPass=lock_pass, timeout=10, mode='a')
298+
with FL as r:
299+
acquired, code, fd = r
300+
if fd is not None:
301+
fd.write(result_s)
296302

297303

298304
def main(command_line_args=None):
@@ -440,19 +446,14 @@ def main(command_line_args=None):
440446
result_count = result_count + 1
441447
if '-9999.0' not in line and len(s) > 1:
442448
line = line.strip()
443-
if len(line) > 3:
449+
if len(line) > 10:
444450
line, sep, tail = line.partition(', (') # strip off the Input Variable Values
445451
line = line.replace('(', '').replace(')', '') # strip off the ()
446452
results.append([float(y) for y in line.split(',')])
447453
else:
448454
logger.warning(f'-9999.0 or space found in line {result_count!s}')
449455

450456
actual_records_count = len(results)
451-
452-
# Load the results into a pandas dataframe
453-
results_pd = pd.read_csv(output_file)
454-
df = pd.DataFrame(results_pd)
455-
456457
if len(results) < 1:
457458
# TODO surface actual exceptions instead of giving this generic message
458459
raise RuntimeError(
@@ -469,25 +470,71 @@ def main(command_line_args=None):
469470
means = np.nanmean(results, 0)
470471
std = np.nanstd(results, 0)
471472

473+
# Load the results into a pandas dataframe
474+
results_pd = pd.read_csv(output_file)
475+
df = pd.DataFrame(results_pd)
476+
477+
# Build a second dataframe to contain the input data. In the df dataframe, it is too encoded to be useful
478+
input_df = pd.DataFrame()
479+
480+
# add the columns
481+
input_row = df[df.columns[len(outputs)]].tolist()[0]
482+
input_row = input_row.replace('(', '').replace(')', '')
483+
input_row = input_row.strip().strip(';')
484+
input_columns_data = input_row.split(';')
485+
for input_column_data in input_columns_data:
486+
input_column_name, input_column_value = input_column_data.split(':')
487+
input_df[input_column_name] = []
488+
489+
# add the data
490+
for i in range(actual_records_count):
491+
input_row = str(df[df.columns[len(outputs)]].tolist()[i])
492+
if len(input_row) < 10:
493+
continue
494+
input_row = input_row.replace('(', '').replace(')', '')
495+
input_row = input_row.strip().strip(';')
496+
input_columns_data = input_row.split(';')
497+
data = []
498+
for input_column_data in input_columns_data:
499+
input_column_name, input_column_value = input_column_data.split(':')
500+
data.append(float(input_column_value))
501+
input_df.loc[i] = data
502+
472503
logger.info(f'Calculation Time: {time.time() - tic:10.3f} sec')
473504
logger.info(f'Calculation Time per iteration: {(time.time() - tic) / actual_records_count:10.3f} sec')
474505
if iterations != actual_records_count:
475-
logger.warning(
476-
f'NOTE: {actual_records_count!s} iterations finished successfully and were used to calculate the '
477-
f'statistics.'
478-
)
506+
msg = f'NOTE: {actual_records_count!s} iterations finished successfully and were used to calculate the statistics.'
507+
logger.warning(msg)
479508

480-
# write them out
509+
# write them out and make the graphs
481510
annotations = ''
482511
outputs_result: dict[str, dict] = {}
512+
513+
input = ''
483514
full_names: set = set()
484515
short_names: set = set()
485516
with open(output_file, 'a') as f:
486-
if iterations != actual_records_count:
487-
f.write(
488-
f'\n\n{actual_records_count!s} iterations finished successfully and were used to calculate the '
489-
f'statistics\n\n'
490-
)
517+
518+
# First do the input graphs
519+
for i in range(len(inputs)):
520+
input = inputs[i][0]
521+
plt.figure(figsize=(8, 6))
522+
ax = plt.subplot()
523+
ax.set_title(input)
524+
ax.set_xlabel('Random Values')
525+
ax.set_ylabel('Probability')
526+
527+
plt.figtext(0.11, 0.74, annotations, fontsize=8)
528+
ret = plt.hist(input_df[input_df.columns[i]].tolist(), bins=50, density=True)
529+
fname = input_df.columns[i].strip().replace('/', '-')
530+
save_path = Path(Path(output_file).parent, f'{fname}.png')
531+
if html_path:
532+
save_path = Path(Path(html_path).parent, f'{fname}.png')
533+
plt.savefig(save_path)
534+
full_names.add(save_path)
535+
short_names.add(fname)
536+
537+
# Now do the output graphs
491538
for i in range(len(outputs)):
492539
output = outputs[i]
493540
f.write(f'{output}:\n')

0 commit comments

Comments
 (0)