Skip to content
This repository was archived by the owner on Mar 4, 2025. It is now read-only.

Commit fd8135f

Browse files
committed
analysis: Add --hist option to enable Historical processing
"Historical" processing mode creates accurate disk space usage records for each user, from the day they joined through to the present day. It only generates data for standard databases, as those keep a byte accurate list of database sizes in their commit list. This will hopefully only need to be run once (ever) on the production database, to populate the usage data from before we had our disk space usage tables.
1 parent 9b303a7 commit fd8135f

File tree

4 files changed

+151
-51
lines changed

4 files changed

+151
-51
lines changed

common/postgresql.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,16 +104,16 @@ func AddUser(auth0ID, userName, password, email, displayName, avatarURL string)
104104
}
105105

106106
// AnalysisRecordUserStorage adds a record to the backend database containing the amount of storage space used by a user
107-
func AnalysisRecordUserStorage(userName string, spaceUsedStandard, spaceUsedLive int64) (err error) {
107+
func AnalysisRecordUserStorage(userName string, recordDate time.Time, spaceUsedStandard, spaceUsedLive int64) (err error) {
108108
dbQuery := `
109109
WITH u AS (
110110
SELECT user_id
111111
FROM users
112112
WHERE lower(user_name) = lower($1)
113113
)
114-
INSERT INTO analysis_space_used (user_id, standard_databases_bytes, live_databases_bytes)
115-
VALUES ((SELECT user_id FROM u), $2, $3)`
116-
commandTag, err := pdb.Exec(context.Background(), dbQuery, userName, spaceUsedStandard, spaceUsedLive)
114+
INSERT INTO analysis_space_used (user_id, analysis_date, standard_databases_bytes, live_databases_bytes)
115+
VALUES ((SELECT user_id FROM u), $2, $3, $4)`
116+
commandTag, err := pdb.Exec(context.Background(), dbQuery, userName, recordDate, spaceUsedStandard, spaceUsedLive)
117117
if err != nil {
118118
log.Printf("Adding record of storage space used by '%s' failed: %s", userName, err)
119119
return
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
BEGIN;
2+
DROP INDEX IF EXISTS analysis_space_used_analysis_date_index;
3+
DROP INDEX IF EXISTS analysis_space_used_user_id_index;
4+
COMMIT;
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
BEGIN;
2+
CREATE INDEX IF NOT EXISTS analysis_space_used_analysis_date_index ON public.analysis_space_used (analysis_date);
3+
CREATE INDEX IF NOT EXISTS analysis_space_used_user_id_index ON public.analysis_space_used (user_id);
4+
COMMIT;

standalone/analysis/main.go

Lines changed: 139 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,19 @@ package main
55
// run from cron on a periodic basis (ie every few hours)
66

77
import (
8-
"fmt"
98
"log"
9+
"os"
10+
"time"
1011

1112
"github.com/docker/go-units"
1213
com "github.com/sqlitebrowser/dbhub.io/common"
1314
)
1415

1516
var (
1617
Debug = false
18+
19+
// Historical controls whether to calculate historical space usage for each day, or just usage for the current date
20+
Historical = false
1721
)
1822

1923
func main() {
@@ -23,6 +27,12 @@ func main() {
2327
log.Fatalf("Configuration file problem: '%s'", err)
2428
}
2529

30+
// Check if we should operate in Historical mode for this run
31+
if len(os.Args) > 1 && os.Args[1] == "--hist" {
32+
Historical = true
33+
log.Println("Historical mode enabled")
34+
}
35+
2636
// Connect to PostgreSQL server
2737
err = com.ConnectPostgreSQL()
2838
if err != nil {
@@ -42,81 +52,163 @@ func main() {
4252
log.Fatalln(err)
4353
}
4454

55+
if Debug {
56+
log.Printf("# of users: %d", len(userList))
57+
}
58+
4559
type dbSizes struct {
46-
Live int64
60+
Live int64
4761
Standard int64
4862
}
4963
userStorage := make(map[string]dbSizes)
5064

5165
// Loop through the users, calculating the total disk space used by each
52-
for user, numDBs := range userList {
53-
if Debug {
54-
fmt.Printf("User: %s, # databases: %d\n", user, numDBs)
55-
}
56-
57-
// Get the list of standard databases for a user
58-
dbList, err := com.UserDBs(user, com.DB_BOTH)
59-
if err != nil {
60-
log.Fatal(err)
61-
}
66+
now := time.Now()
67+
if !Historical {
68+
for user, numDBs := range userList {
69+
if Debug {
70+
log.Printf("Processing user: %s, # databases: %d", user, numDBs)
71+
}
6272

63-
// For each standard database, count the list of commits and amount of space used
64-
var spaceUsedStandard int64
65-
for _, db := range dbList {
66-
commitList, err := com.GetCommitList(user, db.Database)
73+
// Get the list of standard databases for a user
74+
dbList, err := com.UserDBs(user, com.DB_BOTH)
6775
if err != nil {
68-
log.Println(err)
76+
log.Fatal(err)
6977
}
7078

71-
// Calculate space used by standard databases
72-
for _, commit := range commitList {
73-
tree := commit.Tree.Entries
74-
for _, j := range tree {
75-
spaceUsedStandard += j.Size
79+
// For each standard database, count the list of commits and amount of space used
80+
var spaceUsedStandard int64
81+
for _, db := range dbList {
82+
// Get the commit list for the database
83+
commitList, err := com.GetCommitList(user, db.Database)
84+
if err != nil {
85+
log.Println(err)
86+
}
87+
88+
// Calculate space used by standard databases across all time
89+
for _, commit := range commitList {
90+
tree := commit.Tree.Entries
91+
for _, j := range tree {
92+
spaceUsedStandard += j.Size
93+
}
94+
}
95+
96+
if Debug {
97+
log.Printf("User: %s, Standard database: %s, # Commits: %d, Space used: %s", user, db.Database, len(commitList), units.HumanSize(float64(spaceUsedStandard)))
7698
}
7799
}
78100

79-
if Debug {
80-
fmt.Printf("User: %s, Standard database: %s, # Commits: %d, Space used: %s\n", user, db.Database, len(commitList), units.HumanSize(float64(spaceUsedStandard)))
101+
// Get the list of live databases for a user
102+
liveList, err := com.LiveUserDBs(user, com.DB_BOTH)
103+
if err != nil {
104+
log.Fatal(err)
105+
}
106+
107+
// For each live database, get the amount of space used
108+
var spaceUsedLive int64
109+
for _, db := range liveList {
110+
_, liveNode, err := com.CheckDBLive(user, db.Database)
111+
if err != nil {
112+
log.Fatal(err)
113+
return
114+
}
115+
116+
// Ask our AMQP backend for the database size
117+
z, err := com.LiveSize(liveNode, user, user, db.Database)
118+
if err != nil {
119+
log.Fatal(err)
120+
}
121+
spaceUsedLive += z
122+
123+
if Debug {
124+
log.Printf("User: %s, Live database: %s, Space used: %s", user, db.Database, units.HumanSize(float64(spaceUsedLive)))
125+
}
81126
}
127+
userStorage[user] = dbSizes{Standard: spaceUsedStandard, Live: spaceUsedLive}
82128
}
83129

84-
// Get the list of live databases for a user
85-
liveList, err := com.LiveUserDBs(user, com.DB_BOTH)
86-
if err != nil {
87-
log.Fatal(err)
130+
// Store the information in our PostgreSQL backend
131+
for user, z := range userStorage {
132+
err = com.AnalysisRecordUserStorage(user, now, z.Standard, z.Live)
133+
if err != nil {
134+
log.Fatalln()
135+
}
88136
}
137+
}
89138

90-
// For each live database, get the amount of space used
91-
var spaceUsedLive int64
92-
for _, db := range liveList {
93-
_, liveNode, err := com.CheckDBLive(user, db.Database)
139+
// Do the historical storage analysis if requested by the caller
140+
if Historical {
141+
for user, _ := range userList {
142+
// Get the date the user signed up
143+
details, err := com.User(user)
94144
if err != nil {
95145
log.Fatal(err)
96-
return
146+
}
147+
joinDate := details.DateJoined
148+
149+
if Debug {
150+
log.Printf("Processing user: '%s', Joined on: %s", user, joinDate.Format(time.RFC1123))
97151
}
98152

99-
// Ask our AMQP backend for the database size
100-
z, err := com.LiveSize(liveNode, user, user, db.Database)
153+
// Get the list of standard databases for a user
154+
dbList, err := com.UserDBs(user, com.DB_BOTH)
101155
if err != nil {
102156
log.Fatal(err)
103157
}
104-
spaceUsedLive += z
105158

106-
if Debug {
107-
fmt.Printf("User: %s, Live database: %s, Space used: %s\n", user, db.Database, units.HumanSize(float64(spaceUsedLive)))
159+
type commitList map[string]com.CommitEntry
160+
dbCommits := make(map[string]commitList)
161+
162+
// Loop through the days, calculating the space used each day since they joined until today
163+
pointInTime := joinDate
164+
for pointInTime.Before(now) {
165+
// Calculate the disk space used by all of the users' databases for the given day
166+
var spaceUsed int64
167+
for _, db := range dbList {
168+
// Get the commit list for the database, using a cache to reduce multiple database hits for the same info
169+
commits, ok := dbCommits[db.Database]
170+
if !ok {
171+
commits, err = com.GetCommitList(user, db.Database)
172+
if err != nil {
173+
log.Println(err)
174+
}
175+
dbCommits[db.Database] = commits
176+
}
177+
178+
// Calculate the disk space used by this one database
179+
z, err := SpaceUsedBetweenDates(commits, joinDate, pointInTime)
180+
if err != nil {
181+
log.Fatal(err)
182+
}
183+
spaceUsed += z
184+
}
185+
186+
// Record the storage space used by the database (until this date) to our backend
187+
err = com.AnalysisRecordUserStorage(user, pointInTime, spaceUsed, 0)
188+
if err != nil {
189+
log.Fatalln()
190+
}
191+
192+
// Move the point in time forward by a day
193+
pointInTime = pointInTime.Add(time.Hour * 24)
108194
}
109195
}
110-
userStorage[user] = dbSizes{Standard: spaceUsedStandard, Live: spaceUsedLive}
111196
}
112197

113-
// Store the information in our PostgreSQL backend
114-
for user, z := range userStorage {
115-
err = com.AnalysisRecordUserStorage(user, z.Standard, z.Live)
116-
if err != nil {
117-
log.Fatalln()
198+
log.Printf("%s run complete", com.Conf.Live.Nodename)
199+
}
200+
201+
// SpaceUsedBetweenDates determines the storage space used by a standard database between two different dates
202+
func SpaceUsedBetweenDates(commitList map[string]com.CommitEntry, startDate, endDate time.Time) (spaceUsed int64, err error) {
203+
// Check every commit in the database, adding the ones between the start and end dates to the usage total
204+
for _, commit := range commitList {
205+
if commit.Timestamp.After(startDate) && commit.Timestamp.Before(endDate) {
206+
// This commit is in the requested time range
207+
tree := commit.Tree.Entries
208+
for _, j := range tree {
209+
spaceUsed += j.Size
210+
}
118211
}
119212
}
120-
121-
log.Printf("%s run complete", com.Conf.Live.Nodename)
122-
}
213+
return
214+
}

0 commit comments

Comments
 (0)