Skip to content

Commit c3e2409

Browse files
committed
hub: cleanup data of deleted projects
1 parent a71f678 commit c3e2409

File tree

5 files changed

+107
-5
lines changed

5 files changed

+107
-5
lines changed

src/packages/database/postgres-server-queries.coffee

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ read = require('read')
5151
{site_license_manager_set} = require('./postgres/site-license/manager')
5252
{matching_site_licenses, manager_site_licenses} = require('./postgres/site-license/search')
5353
{project_datastore_set, project_datastore_get, project_datastore_del} = require('./postgres/project-queries')
54-
{permanently_unlink_all_deleted_projects_of_user, unlink_old_deleted_projects} = require('./postgres/delete-projects')
54+
{permanently_unlink_all_deleted_projects_of_user, unlink_old_deleted_projects, cleanup_old_projects_data} = require('./postgres/delete-projects')
5555
{get_all_public_paths, unlist_all_public_paths} = require('./postgres/public-paths')
5656
{get_personal_user} = require('./postgres/personal')
5757
{set_passport_settings, get_passport_settings, get_all_passport_settings, get_all_passport_settings_cached, create_passport, passport_exists, update_account_and_passport, _passport_key} = require('./postgres/passport')
@@ -2590,6 +2590,10 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext
25902590
unlink_old_deleted_projects: () =>
25912591
return await unlink_old_deleted_projects(@)
25922592

2593+
# async function
2594+
cleanup_old_projects_data: () =>
2595+
return await cleanup_old_projects_data(@)
2596+
25932597
# async function
25942598
unlist_all_public_paths: (account_id, is_owner) =>
25952599
return await unlist_all_public_paths(@, account_id, is_owner)

src/packages/database/postgres/delete-projects.ts

Lines changed: 84 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,15 @@
77
Code related to permanently deleting projects.
88
*/
99

10+
import getLogger from "@cocalc/backend/logger";
11+
import getPool from "@cocalc/database/pool";
1012
import { callback2 } from "@cocalc/util/async-utils";
1113
import { PostgreSQL } from "./types";
14+
import { minutes_ago } from "@cocalc/util/misc";
15+
import { getServerSettings } from "@cocalc/database/settings";
16+
import { KUCALC_ON_PREMISES } from "@cocalc/util/db-schema/site-defaults";
17+
18+
const log = getLogger("db:delete-projects");
1219

1320
/*
1421
Permanently delete from the database all project records, where the
@@ -20,7 +27,7 @@ later then purges these projects from disk as well as the database.
2027
*/
2128
export async function permanently_unlink_all_deleted_projects_of_user(
2229
db: PostgreSQL,
23-
account_id_or_email_address: string
30+
account_id_or_email_address: string,
2431
): Promise<void> {
2532
// Get the account_id if necessary.
2633
const account_id = await get_account_id(db, account_id_or_email_address);
@@ -36,7 +43,7 @@ export async function permanently_unlink_all_deleted_projects_of_user(
3643

3744
async function get_account_id(
3845
db: PostgreSQL,
39-
account_id_or_email_address: string
46+
account_id_or_email_address: string,
4047
): Promise<string> {
4148
if (account_id_or_email_address.indexOf("@") == -1) {
4249
return account_id_or_email_address;
@@ -57,7 +64,7 @@ Another task has to run to actually get rid of the data, etc.
5764
*/
5865
export async function unlink_old_deleted_projects(
5966
db: PostgreSQL,
60-
age_d = 30
67+
age_d = 30,
6168
): Promise<void> {
6269
await callback2(db._query, {
6370
query: "UPDATE projects",
@@ -69,3 +76,77 @@ export async function unlink_old_deleted_projects(
6976
],
7077
});
7178
}
79+
80+
const Q_CLEANUP_SYNCSTRINGS = `
81+
SELECT p.project_id, s.string_id
82+
FROM projects as p
83+
INNER JOIN syncstrings as s
84+
ON p.project_id = s.project_id
85+
WHERE p.deleted = true
86+
AND p.state ->> 'state' != 'deleted'
87+
`;
88+
89+
/*
90+
This is more thorough than the above. It issues actual delete operations on data of projects marked as deleted.
91+
When done, it sets the state.state to "deleted".
92+
93+
The operations involves deleting all syncstrings of that project (and associated with that, patches),
94+
and only for on-prem setups, it also deletes all the data stored in the project on disk.
95+
96+
This function is called every couple of hours. Hence ensure it does not run longer than the given max_run_m time (minutes)
97+
*/
98+
export async function cleanup_old_projects_data(
99+
db: PostgreSQL,
100+
delay_ms = 50,
101+
max_run_m = 60,
102+
) {
103+
const settings = await getServerSettings();
104+
const on_prem = settings.kucalc === KUCALC_ON_PREMISES;
105+
106+
log.debug("cleanup_old_projects_data", { delay_ms, max_run_m, on_prem });
107+
const start_ts = new Date();
108+
109+
const pool = getPool();
110+
const { rows } = await pool.query(Q_CLEANUP_SYNCSTRINGS);
111+
112+
let num = 0;
113+
let pid = "";
114+
115+
for (const row of rows) {
116+
const { project_id, string_id } = row;
117+
if (start_ts < minutes_ago(max_run_m)) {
118+
log.debug(
119+
`cleanup_old_projects_data: too much time elapsed, breaking after ${num} syncstrings`,
120+
);
121+
break;
122+
}
123+
124+
log.debug(
125+
`cleanup_old_projects_data: deleting syncstring ${project_id}/${string_id}`,
126+
);
127+
num += 1;
128+
await callback2(db.delete_syncstring, { string_id });
129+
130+
// wait for the given amount of delay_ms millio seconds
131+
await new Promise((done) => setTimeout(done, delay_ms));
132+
133+
if (pid != project_id) {
134+
pid = project_id;
135+
if (on_prem) {
136+
log.debug(
137+
`cleanup_old_projects_data: deleting project data in ${project_id}`,
138+
);
139+
// TODO: this only works on-prem, and requires the project files to be mounted
140+
141+
log.debug(`deleting all shared files in project ${project_id}`);
142+
// TODO: do it directly like above, and also get rid of all those shares in the database
143+
}
144+
145+
// now, that we're done with that project, mark it as state.state ->> 'deleted'
146+
await callback2(db.set_project_state, {
147+
project_id,
148+
state: "deleted",
149+
});
150+
}
151+
}
152+
}

src/packages/database/postgres/types.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { EventEmitter } from "events";
77
import { Client } from "pg";
88

99
import { PassportStrategyDB } from "@cocalc/database/settings/auth-sso-types";
10+
import { ProjectState } from "@cocalc/util/db-schema/projects";
1011
import {
1112
CB,
1213
CBDB,
@@ -305,6 +306,8 @@ export interface PostgreSQL extends EventEmitter {
305306
cb: CB;
306307
});
307308

309+
delete_syncstring(opts: { string_id: string; cb: CB });
310+
308311
projects_that_need_to_be_started(): Promise<string[]>;
309312

310313
is_connected(): boolean;
@@ -316,4 +319,6 @@ export interface PostgreSQL extends EventEmitter {
316319
email_address: string;
317320
}>;
318321
}): Promise<void>;
322+
323+
set_project_state(opts: { project_id: string; state: ProjectState["state"] });
319324
}

src/packages/hub/run/delete-projects.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ async function update() {
1616
console.log("unlinking old deleted projects...");
1717
try {
1818
await db.unlink_old_deleted_projects();
19+
await db.cleanup_old_projects_data();
1920
} catch (err) {
2021
if (err !== null) {
2122
throw Error(`failed to unlink projects -- ${err}`);

src/packages/util/compute-states.ts

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ export type State =
2828
| "running"
2929
| "starting"
3030
| "stopping"
31-
| "unarchiving";
31+
| "unarchiving"
32+
| "deleted";
3233

3334
// @hsy: completely unclear what this is for.
3435
type Operation =
@@ -218,4 +219,14 @@ export const COMPUTE_STATES: ComputeState = {
218219
"migrate_live",
219220
],
220221
},
222+
223+
// projects are deleted in hub -> postgres.delete-projects and this is a one-way operation
224+
deleted: {
225+
desc: "Project is deleted",
226+
icon: "trash",
227+
display: "Deleted",
228+
stable: true,
229+
to: {},
230+
commands: [],
231+
},
221232
} as const;

0 commit comments

Comments
 (0)