Skip to content

Commit 73eb5e2

Browse files
pbailiebmcutler
authored andcommitted
DB backup update (#9)
* db_backup.py Monthly dumpfile retention WIP * db_backup.py Monthly retention WIP Should now work with day > 28 and still preserve at every end of month. * db_backup.py bugfix * Changes to be committed: modified: nightly_db_backup/db_backup.py WIP * readme.md Doc update * readme.md More doc updates
1 parent 80111d3 commit 73eb5e2

File tree

2 files changed

+91
-75
lines changed

2 files changed

+91
-75
lines changed

nightly_db_backup/db_backup.py

Lines changed: 40 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
:file: db_backup.py
66
:language: python3
77
:author: Peter Bailie (Systems Programmer, Dept. of Computer Science, RPI)
8-
:date: August 22 2018
8+
:date: August 28 2018
99
1010
This script will take backup dumps of each individual Submitty course
1111
database. This should be set up by a sysadmin to be run on the Submitty
@@ -30,6 +30,7 @@
3030
"""
3131

3232
import argparse
33+
import calendar
3334
import datetime
3435
import json
3536
import os
@@ -43,14 +44,16 @@
4344
# WHERE DUMP FILES ARE WRITTEN
4445
DUMP_PATH = '/var/local/submitty/submitty-dumps'
4546

46-
def delete_obsolete_dumps(working_path, expiration_stamp):
47+
def delete_obsolete_dumps(working_path, monthly_retention, expiration_date):
4748
"""
4849
Recurse through folders/files and delete any obsolete dump files
4950
50-
:param working_path: path to recurse through
51-
:param expiration_stamp: date to begin purging old dump files
52-
:type working_path: string
53-
:type expiration_stamp: string
51+
:param working_path: path to recurse through
52+
:param monthly_retention: day of month that dump is always preserved (val < 1 when disabled)
53+
:param expiration_date: date to begin purging old dump files
54+
:type working_path: string
55+
:type monthly_retention: integer
56+
:type expiration_date: datetime.date object
5457
"""
5558

5659
# Filter out '.', '..', and any "hidden" files/directories.
@@ -62,24 +65,26 @@ def delete_obsolete_dumps(working_path, expiration_stamp):
6265
for file in files_list:
6366
if os.path.isdir(file):
6467
# If the file is a folder, recurse
65-
delete_obsolete_dumps(file, expiration_stamp)
68+
delete_obsolete_dumps(file, monthly_retention, expiration_date)
6669
else:
67-
# File date was concat'ed into the file's name. Use regex to isolate date from full path.
68-
# e.g. "/var/local/submitty-dumps/s18/cs1000/180424_s18_cs1000.dbdump"
69-
# The date substring can be located with high confidence by looking for:
70-
# - final token of the full path (the actual file name)
71-
# - file name consists of three tokens delimited by '_' chars
72-
# - first token is exactly 6 digits, the date stamp.
73-
# - second token is the semester code, at least one 'word' char
74-
# - third token is the course code, at least one 'word' char
75-
# - filename always ends in ".dbdump"
76-
# - then take substring [0:6] to get "180424".
77-
match = re.search('(\d{6}_\w+_\w+\.dbdump)$', file)
78-
if match is not None:
79-
file_date_stamp = match.group(0)[0:6]
80-
if file_date_stamp <= expiration_stamp:
81-
os.remove(file)
82-
70+
# Determine file's date from its filename
71+
# Note: datetime.date.fromisoformat() doesn't exist in Python 3.6 or earlier.
72+
filename = file.split('/')[-1]
73+
datestamp = filename.split('_')[0]
74+
year, month, day = map(int, datestamp.split('-'))
75+
file_date = datetime.date(year, month, day)
76+
77+
# Conditions to NOT delete old file:
78+
if file_date > expiration_date:
79+
pass
80+
elif file_date.day == monthly_retention:
81+
pass
82+
# A month can be as few as 28 days, but we NEVER skip months even when "-m" is 28, 29, 30, or 31.
83+
elif monthly_retention > 28 and (file_date.day == calendar.monthrange(file_date.year, file_date.month)[1] and file_date.day <= monthly_retention):
84+
pass
85+
else:
86+
# os.remove(file)
87+
print("remove " + file)
8388
def main():
8489
""" Main """
8590

@@ -89,18 +94,19 @@ def main():
8994

9095
# READ COMMAND LINE ARGUMENTS
9196
# Note that -t and -g are different args and mutually exclusive
92-
parser = argparse.ArgumentParser(description='Dump all Submitty databases for a particular academic term.')
93-
parser.add_argument('-e', action='store', nargs='?', type=int, default=0, help='Set number of days expiration of older dumps (default: no expiration).', metavar='days')
97+
parser = argparse.ArgumentParser(description='Dump all Submitty databases for a particular academic term.', prefix_chars='-', add_help=True)
98+
parser.add_argument('-e', action='store', type=int, default=0, help='Set number of days expiration of older dumps (default: no expiration).', metavar='days')
99+
parser.add_argument('-m', action='store', type=int, default=0, choices=range(0,32), help='Day of month to ALWAYS retain a dumpfile (default: no monthly retention).', metavar='day of month')
94100
group = parser.add_mutually_exclusive_group(required=True)
95-
group.add_argument('-t', action='store', nargs='?', type=str, help='Set the term code.', metavar='term code')
101+
group.add_argument('-t', action='store', type=str, help='Set the term code.', metavar='term code')
96102
group.add_argument('-g', action='store_true', help='Guess term code based on calender month and year.')
103+
97104
args = parser.parse_args()
98105

99106
# Get current date -- needed throughout the script, but also used when guessing default term code.
100-
# (today.year % 100) determines the two digit year. e.g. '2017' -> '17'
101107
today = datetime.date.today()
102-
year = str(today.year % 100)
103-
today_stamp = '{:0>2}{:0>2}{:0>2}'.format(year, today.month, today.day)
108+
year = today.strftime("%y")
109+
today_stamp = today.isoformat()
104110

105111
# PARSE COMMAND LINE ARGUMENTS
106112
expiration = args.e
@@ -112,6 +118,9 @@ def main():
112118
else:
113119
semester = args.t
114120

121+
# MONTHLY RETENTION DATE
122+
monthly_retention = args.m
123+
115124
# GET DATABASE CONFIG FROM SUBMITTY
116125
fh = open(DB_CONFIG_PATH, "r")
117126
db_config = json.load(fh)
@@ -170,12 +179,11 @@ def main():
170179
# DETERMINE EXPIRATION DATE (to delete obsolete dump files)
171180
# (do this BEFORE recursion so it is not calculated recursively n times)
172181
if expiration > 0:
173-
expiration_date = datetime.date.fromordinal(today.toordinal() - expiration)
174-
expiration_stamp = '{:0>2}{:0>2}{:0>2}'.format(expiration_date.year % 100, expiration_date.month, expiration_date.day)
182+
expiration_date = datetime.date.fromordinal(today.toordinal() - expiration)
175183
working_path = "{}/{}".format(DUMP_PATH, semester)
176184

177185
# RECURSIVELY CULL OBSOLETE DUMPS
178-
delete_obsolete_dumps(working_path, expiration_stamp)
186+
delete_obsolete_dumps(working_path, monthly_retention, expiration_date)
179187

180188
if __name__ == "__main__":
181189
main()

nightly_db_backup/readme.md

Lines changed: 51 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,27 @@
11
# Nightly Database Backup Python Script
2-
Readme June 26, 2018
2+
Readme August 31, 2018
33

4-
### db_backup.py
4+
## db_backup.py
55

66
This script will read a course list, corresponding to a specific term, from
77
the 'master' Submitty database. With a course list, the script will use
88
Postgresql's "pg_dump" tool to retrieve a SQL dump of the submitty 'master'
99
database and each registered course's Submitty database of a specific semester.
1010
The script also has cleanup functionality to automatically remove older dumps.
1111

12-
*db_backup.py is written in Python 3, and tested with Python 3.4.*
12+
*db_backup.py is written in Python 3, and tested with Python 3.6.*
1313

14-
---
14+
NOTE: Some modification of code may be necessary to work with your school's
15+
information systems.
16+
17+
### FERPA Warning
18+
19+
WARNING: Database dumps can contain student information that is protected by
20+
[FERPA (20 U.S.C. § 1232g)](https://www2.ed.gov/policy/gen/guid/fpco/ferpa/index.html).
21+
Please consult with your school's IT dept. for advice on data security policies
22+
and practices.
23+
24+
### Term Code
1525

1626
The term code can be specified as a command line argument as option `-t`.
1727

@@ -28,70 +38,68 @@ current month and year of the server's date.
2838

2939
The term code will follow the pattern of TYY, where
3040
- T is the term
31-
- **s** is for Spring (Jan - May)
32-
- **u** is for Summer (Jun - Jul)
33-
- **f** is for Fall (Aug-Dec)
34-
- YY is the two digit year
41+
- `s` is for Spring (Jan - May)
42+
- `u` is for Summer (Jun - Jul)
43+
- `f` is for Fall (Aug-Dec)
44+
- `YY` is the two digit year
3545
- e.g. April 15, 2018 will correspond to "s18" (Spring 2018).
3646

37-
`-t` and `-g` are mutually exclusive.
47+
`-t` and `-g` are mutually exclusive, but one is required.
3848

39-
---
49+
### Date Stamp
4050

41-
Each dump has a date stamp in its name following the format of "YYMMD",
51+
Each dump has a date stamp in its name following the format of `YYYY-MM-DD`,
4252
followed by the semester code, then the course code.
4353

44-
e.g. '180423_s18_cs100.dbdump' is a dump taken on April 23, 2018 of the Spring
45-
2018 semester for course CS-100.
54+
e.g. `2018-04-23_s18_cs100.dbdump` is a dump taken on April 23, 2018 of the
55+
Spring 2018 semester for course CS-100.
56+
57+
### Cleanup Schedule
4658

47-
Older dumps can be automatically purged with the command line option "-e".
59+
Older dumps can be automatically purged with the command line option `-e`.
4860

4961
For example:
5062

5163
`python3 ./db_backup.py -t f17 -e 7`
5264

5365
will purge any dumps with a stamp seven days or older. Only dumps of the
54-
term being processed will be purged, in this example, 'f17'.
66+
term being processed will be purged, in this example, `f17`.
5567

5668
The default expiration value is 0 (no expiration -- no files are purged) should
5769
this argument be ommitted.
5870

59-
---
60-
61-
Submitty databases can be restored from a dump using the pg_restore tool.
62-
q.v. [https://www.postgresql.org/docs/9.5/static/app-pgrestore.html](https://www.postgresql.org/docs/9.5/static/app-pgrestore.html)
63-
64-
This is script intended to be run as a cronjob by 'root' on the same server
65-
machine as the Submitty system. *Running this script on another server other
66-
than Submitty has not been tested.*
71+
### Monthly Retention
6772

68-
---
73+
Command line option `-m` will set a monthly retention date. Dumps taken on that
74+
date will not be purged. In the case the retention date is past the 28th, end
75+
of month dumps will still be retained.
6976

70-
Please configure options near the top of the code.
77+
e.g. `-m 30` will retain any dump on the 30th of the month. In the case of
78+
February, dumps on the 28th, or 29th on a leap year, are also retained. Dumps
79+
on the 31st of another month are not retained (as they were retained on the
80+
30th).
7181

72-
DB_HOST: Hostname of the Submitty databases. You may use 'localhost' if
73-
Postgresql is on the same machine as the Submitty system.
82+
For clarification: `-m 31` will retain dumps taken on February 28/29;
83+
April, June, September, November 30; and January, March, May, July, August,
84+
October, December 31.
7485

75-
DB_USER: The username that interacts with Submitty databases. Typically
76-
'hsdbu'.
86+
No monthly retention occurs if `-m` is omitted or set `-m 0`.
7787

78-
DB_PASS: The password for Submitty's database account (e.g. account 'hsdbu').
79-
**Do NOT use the placeholder value of 'DB.p4ssw0rd'**
88+
### Restore a Dump
8089

81-
DUMP_PATH: The folder path to store the database dumps. Course folders will
82-
be created from this path, and the dumps stored in their respective course
83-
folders, grouped by semester.
90+
Submitty databases can be restored from a dump using the pg_restore tool.
91+
q.v. [https://www.postgresql.org/docs/10/static/app-pgrestore.html](https://www.postgresql.org/docs/10/static/app-pgrestore.html)
8492

85-
---
93+
### Cron
8694

87-
WARNING: Database dumps can contain student information that is protected by
88-
[FERPA (20 U.S.C. § 1232g)](https://www2.ed.gov/policy/gen/guid/fpco/ferpa/index.html).
89-
Please consult with your school's IT dept. for advice on data security policies
90-
and practices.
95+
This is script intended to be run as a cronjob by 'root' on the same server
96+
machine as the Submitty system. *Running this script on another server other
97+
than Submitty has not been tested.*
9198

92-
---
99+
### Options At The Top Of The Code
93100

94-
db_backup.py is tested to run on Python 3.4 or higher.
101+
`DB_CONFIG_PATH` looks for Submitty's `database.json` file that contains
102+
database authentication information. Leaving this at the default is usually OK.
95103

96-
NOTE: Some modification of code may be necessary to work with your school's
97-
information systems.
104+
`DUMP_PATH` indicates where dump files are stored. Only change this if the
105+
default location is undesirable for your server.

0 commit comments

Comments
 (0)