Skip to content

Commit a53b012

Browse files
authored
Merge pull request #422 from MetOffice/fix-unified-model-oserror-table
Fix unified model oserror table
2 parents 8af680a + ecd5adf commit a53b012

File tree

4 files changed

+91
-7
lines changed

4 files changed

+91
-7
lines changed

forest/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
.. automodule:: forest.services
2929
3030
"""
31-
__version__ = '0.20.6'
31+
__version__ = '0.20.7'
3232

3333
from .config import *
3434
from . import (

forest/db/health.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
"""
2+
S3 object health status
3+
"""
4+
import sqlite3
5+
6+
7+
class HealthDB:
8+
"""Maintain meta-data related to S3 objects"""
9+
def __init__(self, connection):
10+
self.connection = connection
11+
self.cursor = self.connection.cursor()
12+
self.cursor.execute("""
13+
CREATE TABLE
14+
IF NOT EXISTS health (
15+
id INTEGER PRIMARY KEY,
16+
name TEXT NOT NULL,
17+
errno INTEGER,
18+
strerror TEXT,
19+
time TEXT,
20+
UNIQUE(name))
21+
""")
22+
23+
@classmethod
24+
def connect(cls, path_or_memory):
25+
"""Connect to sqlite3 database"""
26+
return cls(sqlite3.connect(path_or_memory))
27+
28+
def checked_files(self, pattern):
29+
"""Files that are in the database
30+
31+
:returns files: either successfully processed or marked as OSError
32+
"""
33+
return sorted(set(self.files(pattern)) |
34+
set(self.error_files(pattern)))
35+
36+
def files(self, pattern):
37+
query = "SELECT name FROM file WHERE name GLOB :pattern;"
38+
params = {"pattern": pattern}
39+
return [path for path, in self.cursor.execute(query, params)]
40+
41+
def error_files(self, pattern):
42+
query = "SELECT name FROM health WHERE name GLOB :pattern;"
43+
params = {"pattern": pattern}
44+
return [path for path, in self.cursor.execute(query, params)]
45+
46+
def insert_error(self, path, error, check_time):
47+
"""Insert OSError into table"""
48+
query = """
49+
INSERT OR IGNORE
50+
INTO health (name, errno, strerror, time)
51+
VALUES (:path, :errno, :strerror, :time);
52+
"""
53+
params = {
54+
"path": path,
55+
"errno": error.errno,
56+
"strerror": error.strerror,
57+
"time": check_time.isoformat()
58+
}
59+
self.cursor.execute(query, params)

forest/drivers/unified_model.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import netCDF4
1010
import sqlite3
1111
import forest.db
12+
import forest.db.health
1213
import forest.util
1314
import forest.map_view
1415
from forest import (
@@ -45,12 +46,9 @@ def __call__(self):
4546

4647
# Find names in database
4748
connection = sqlite3.connect(self.database_path)
48-
cursor = connection.cursor()
49-
query = "SELECT name FROM file WHERE name GLOB :pattern;"
50-
sql_names = []
51-
for row in cursor.execute(query, {"pattern": self.pattern}):
52-
path, = row
53-
sql_names.append(os.path.basename(path))
49+
health_db = forest.db.health.HealthDB(connection)
50+
sql_names = [os.path.basename(path)
51+
for path in health_db.checked_files(self.pattern)]
5452
connection.close()
5553

5654
# Find extra files
@@ -61,12 +59,14 @@ def __call__(self):
6159
if len(extra_paths) > 0:
6260
print("connecting to: {}".format(self.database_path))
6361
with forest.db.Database.connect(self.database_path) as database:
62+
health_db = forest.db.health.HealthDB(database.connection)
6463
for path in extra_paths:
6564
print("inserting: '{}'".format(path))
6665
try:
6766
database.insert_netcdf(path)
6867
except OSError as e:
6968
# S3 Glacier objects inaccessible via goofys
69+
health_db.insert_error(path, e, dt.datetime.now())
7070
print(e)
7171
print(f"skip file: {path}")
7272
continue

test/test_db_health.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import sqlite3
2+
import datetime as dt
3+
import forest.db
4+
import forest.db.health
5+
6+
7+
def test_db_health_check():
8+
"""Database tables to monitor S3 object availability"""
9+
database = forest.db.Database.connect(":memory:")
10+
database.insert_file_name("file.nc")
11+
pattern = "*.nc"
12+
health_db = forest.db.health.HealthDB(database.connection)
13+
assert health_db.checked_files(pattern) == ["file.nc"]
14+
15+
16+
def test_db_health_check_mark_oserror():
17+
"""Database tables to monitor S3 object availability"""
18+
database = forest.db.Database.connect(":memory:")
19+
database.insert_file_name("file-0.nc")
20+
health_db = forest.db.health.HealthDB(database.connection)
21+
health_db.insert_error("file-1.nc",
22+
OSError("Error message"),
23+
dt.datetime(2020, 1, 1))
24+
pattern = "*.nc"
25+
assert health_db.checked_files(pattern) == ["file-0.nc", "file-1.nc"]

0 commit comments

Comments
 (0)