diff --git a/src/packages/backend/files/path-to-files.ts b/src/packages/backend/files/path-to-files.ts new file mode 100644 index 00000000000..fdb344b0f3e --- /dev/null +++ b/src/packages/backend/files/path-to-files.ts @@ -0,0 +1,17 @@ +/* + * This file is part of CoCalc: Copyright © 2020 Sagemath, Inc. + * License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details + */ + +// This is used to find files on the share server (public_paths) in "next" +// and also in the hub, for deleting shared files of projects + +import { join } from "node:path"; + +import { projects } from "@cocalc/backend/data"; + +// Given a project_id/path, return the directory on the file system where +// that path should be located. +export function pathToFiles(project_id: string, path: string): string { + return join(projects.replace("[project_id]", project_id), path); +} diff --git a/src/packages/backend/logger.ts b/src/packages/backend/logger.ts index 6d1dcea6a3b..824c9ba1d86 100644 --- a/src/packages/backend/logger.ts +++ b/src/packages/backend/logger.ts @@ -12,9 +12,10 @@ process.env.DEBUG_HIDE_DATE = "yes"; // since we supply it ourselves // otherwise, maybe stuff like this works: (debug as any).inspectOpts["hideDate"] = true; import debug, { Debugger } from "debug"; -import { mkdirSync, createWriteStream, statSync, ftruncate } from "fs"; -import { format, inspect } from "util"; +import { createWriteStream, ftruncate, mkdirSync, statSync } from "fs"; import { dirname, join } from "path"; +import { format, inspect } from "util"; + import { logs } from "./data"; const MAX_FILE_SIZE_BYTES = 20 * 1024 * 1024; // 20MB @@ -128,7 +129,9 @@ function initTransports() { // Similar as in debug source code, except I stuck a timestamp // at the beginning, which I like... except also aware of // non-printf formatting. - const line = `${new Date().toISOString()} (${process.pid}):${myFormat(...args)}\n`; + const line = `${new Date().toISOString()} (${process.pid}):${myFormat( + ...args, + )}\n`; if (transports.console) { // the console transport: @@ -196,13 +199,13 @@ class Logger { } export interface WinstonLogger { - error: Function; - warn: Function; - info: Function; - http: Function; - verbose: Function; - debug: Function; - silly: Function; + error: Debugger; + warn: Debugger; + info: Debugger; + http: Debugger; + verbose: Debugger; + debug: Debugger; + silly: Debugger; extend: (name: string) => WinstonLogger; isEnabled: (level: Level) => boolean; } diff --git a/src/packages/backend/metrics.ts b/src/packages/backend/metrics.ts index 14551ae9cbb..6500653db02 100644 --- a/src/packages/backend/metrics.ts +++ b/src/packages/backend/metrics.ts @@ -1,6 +1,6 @@ import { Counter, Gauge, Histogram } from "prom-client"; -type Aspect = "db" | "database" | "server" | "llm"; +type Aspect = "db" | "database" | "server" | "llm" | "database"; function withPrefix(aspect: Aspect, name: string): string { return `cocalc_${aspect}_${name}`; @@ -13,7 +13,7 @@ export function newCounter( name: string, help: string, labelNames: string[] = [], -) { +): Counter { name = withPrefix(aspect, name); const key = `counter-${name}`; if (cache[key] != null) { diff --git a/src/packages/backend/misc.ts b/src/packages/backend/misc.ts index c52b14a34fd..80314d93406 100644 --- a/src/packages/backend/misc.ts +++ b/src/packages/backend/misc.ts @@ -1,4 +1,7 @@ import { createHash } from "crypto"; +import { join } from "node:path"; + +import { projects } from "@cocalc/backend/data"; import { is_valid_uuid_string } from "@cocalc/util/misc"; /* @@ -70,6 +73,17 @@ export function envForSpawn() { return env; } +// return the absolute home directory of given @project_id project on disk +export function homePath(project_id: string): string { + // $MOUNTED_PROJECTS_ROOT is for OnPrem and that "projects" location is only for dev/single-user + const projects_root = process.env.MOUNTED_PROJECTS_ROOT; + if (projects_root) { + return join(projects_root, project_id); + } else { + return projects.replace("[project_id]", project_id); + } +} + import { callback } from "awaiting"; import { randomBytes } from "crypto"; diff --git a/src/packages/database/package.json b/src/packages/database/package.json index 15b29d94fe3..00f623cf740 100644 --- a/src/packages/database/package.json +++ b/src/packages/database/package.json @@ -28,7 +28,7 @@ "immutable": "^4.3.0", "lodash": "^4.17.21", "lru-cache": "^7.18.3", - "pg": "^8.7.1", + "pg": "^8.16.3", "random-key": "^0.3.2", "read": "^1.0.7", "sql-string-escape": "^1.1.6", @@ -37,7 +37,7 @@ "devDependencies": { "@types/lodash": "^4.14.202", "@types/node": "^18.16.14", - "@types/pg": "^8.6.1", + "@types/pg": "^8.15.4", "coffeescript": "^2.5.1" }, "scripts": { diff --git a/src/packages/database/postgres-server-queries.coffee b/src/packages/database/postgres-server-queries.coffee index 423fedbc844..25859409214 100644 --- a/src/packages/database/postgres-server-queries.coffee +++ b/src/packages/database/postgres-server-queries.coffee @@ -50,7 +50,7 @@ read = require('read') {site_license_manager_set} = require('./postgres/site-license/manager') {matching_site_licenses, manager_site_licenses} = require('./postgres/site-license/search') {project_datastore_set, project_datastore_get, project_datastore_del} = require('./postgres/project-queries') -{permanently_unlink_all_deleted_projects_of_user, unlink_old_deleted_projects} = require('./postgres/delete-projects') +{permanently_unlink_all_deleted_projects_of_user, unlink_old_deleted_projects, cleanup_old_projects_data} = require('./postgres/delete-projects') {get_all_public_paths, unlist_all_public_paths} = require('./postgres/public-paths') {get_personal_user} = require('./postgres/personal') {set_passport_settings, get_passport_settings, get_all_passport_settings, get_all_passport_settings_cached, create_passport, passport_exists, update_account_and_passport, _passport_key} = require('./postgres/passport') @@ -2453,6 +2453,10 @@ exports.extend_PostgreSQL = (ext) -> class PostgreSQL extends ext unlink_old_deleted_projects: () => return await unlink_old_deleted_projects(@) + # async function + cleanup_old_projects_data: (max_run_m) => + return await cleanup_old_projects_data(@, max_run_m) + # async function unlist_all_public_paths: (account_id, is_owner) => return await unlist_all_public_paths(@, account_id, is_owner) diff --git a/src/packages/database/postgres/bulk-delete.test.ts b/src/packages/database/postgres/bulk-delete.test.ts new file mode 100644 index 00000000000..e06f3b4a2f3 --- /dev/null +++ b/src/packages/database/postgres/bulk-delete.test.ts @@ -0,0 +1,70 @@ +/* + * This file is part of CoCalc: Copyright © 2024 Sagemath, Inc. + * License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details + */ + +import getPool, { initEphemeralDatabase } from "@cocalc/database/pool"; +import { uuid } from "@cocalc/util/misc"; +import { bulkDelete } from "./bulk-delete"; + +beforeAll(async () => { + await initEphemeralDatabase({}); +}, 15000); + +afterAll(async () => { + await getPool().end(); +}); + +describe("bulk delete", () => { + test("deleting projects", async () => { + const p = getPool(); + const project_id = uuid(); + const N = 100000; + + // extra entry, which has to remain + const other = uuid(); + await p.query( + "INSERT INTO project_log (id, project_id, time) VALUES($1::UUID, $2::UUID, $3::TIMESTAMP)", + [other, uuid(), new Date()], + ); + + await p.query( + `INSERT INTO project_log (id, project_id, time) + SELECT gen_random_uuid(), $1::UUID, NOW() - interval '1 second' * g.n + FROM generate_series(1, $2) AS g(n)`, + [project_id, N], + ); + + const num1 = await p.query( + "SELECT COUNT(*)::INT as num FROM project_log WHERE project_id = $1", + [project_id], + ); + expect(num1.rows[0].num).toEqual(N); + + const res = await bulkDelete({ + table: "project_log", + field: "project_id", + value: project_id, + }); + + // if this ever fails, the "ret.rowCount" value is inaccurate. + // This must be replaced by "RETURNING 1" in the the query and a "SELECT COUNT(*) ..." and so. + // (and not only here, but everywhere in the code base) + expect(res.rowsDeleted).toEqual(N); + expect(res.durationS).toBeGreaterThan(0.1); + expect(res.totalPgTimeS).toBeGreaterThan(0.1); + expect(res.totalWaitS).toBeGreaterThan(0.1); + expect((res.totalPgTimeS * 10) / res.totalWaitS).toBeGreaterThan(0.5); + + const num2 = await p.query( + "SELECT COUNT(*)::INT as num FROM project_log WHERE project_id = $1", + [project_id], + ); + expect(num2.rows[0].num).toEqual(0); + + const otherRes = await p.query("SELECT * FROM project_log WHERE id = $1", [ + other, + ]); + expect(otherRes.rows[0].id).toEqual(other); + }, 10000); +}); diff --git a/src/packages/database/postgres/bulk-delete.ts b/src/packages/database/postgres/bulk-delete.ts new file mode 100644 index 00000000000..04519e5b2f0 --- /dev/null +++ b/src/packages/database/postgres/bulk-delete.ts @@ -0,0 +1,98 @@ +import { escapeIdentifier } from "pg"; + +import getLogger from "@cocalc/backend/logger"; +import { envToInt } from "@cocalc/backend/misc/env-to-number"; +import getPool from "@cocalc/database/pool"; +import { SCHEMA } from "@cocalc/util/schema"; + +const log = getLogger("db:bulk-delete"); +const D = log.debug; + +type Field = + | "project_id" + | "account_id" + | "target_project_id" + | "source_project_id"; + +const MAX_UTIL_PCT = envToInt("COCALC_DB_BULK_DELETE_MAX_UTIL_PCT", 10); +// adjust the time limits: by default, we aim to keep the operation between 0.1 and 0.2 secs +const MAX_TIME_TARGET_MS = envToInt( + "COCALC_DB_BULK_DELETE_MAX_TIME_TARGET_MS", + 100, +); +const MAX_TARGET_S = MAX_TIME_TARGET_MS / 1000; +const MIN_TARGET_S = MAX_TARGET_S / 2; +const DEFAULT_LIMIT = envToInt("COCALC_DB_BULK_DELETE_DEFAULT_LIMIT", 16); +const MAX_LIMIT = envToInt("COCALC_DB_BULK_DELETE_MAX_LIMIT", 32768); + +interface Opts { + table: string; // e.g. project_log, etc. + field: Field; // for now, we only support a few + id?: string; // default "id", the ID field in the table, which identifies each row uniquely + value: string; // a UUID + limit?: number; // default 1024 + maxUtilPct?: number; // 0-100, percent +} + +type Ret = Promise<{ + rowsDeleted: number; + durationS: number; + totalWaitS: number; + totalPgTimeS: number; +}>; + +function deleteQuery(table: string, field: string, id: string) { + const T = escapeIdentifier(table); + const F = escapeIdentifier(field); + const ID = escapeIdentifier(id); + + return ` +DELETE FROM ${T} +WHERE ${ID} IN ( + SELECT ${ID} FROM ${T} WHERE ${F} = $1 LIMIT $2 +)`; +} + +export async function bulkDelete(opts: Opts): Ret { + const { table, field, value, id = "id", maxUtilPct = MAX_UTIL_PCT } = opts; + let { limit = DEFAULT_LIMIT } = opts; + // assert table name is a key in SCHEMA + if (!(table in SCHEMA)) { + throw new Error(`table ${table} does not exist`); + } + + if (maxUtilPct < 1 || maxUtilPct > 99) { + throw new Error(`maxUtilPct must be between 1 and 99`); + } + + const q = deleteQuery(table, field, id); + const pool = getPool(); + const start_ts = Date.now(); + + let rowsDeleted = 0; + let totalWaitS = 0; + let totalPgTimeS = 0; + while (true) { + const t0 = Date.now(); + const ret = await pool.query(q, [value, limit]); + const dt = (Date.now() - t0) / 1000; + rowsDeleted += ret.rowCount ?? 0; + totalPgTimeS += dt; + + const next = + dt > MAX_TARGET_S ? limit / 2 : dt < MIN_TARGET_S ? limit * 2 : limit; + limit = Math.max(1, Math.min(MAX_LIMIT, Math.round(next))); + + // wait for a bit, but not more than 1 second ~ this aims for a max utilization of 10% + const waitS = Math.min(1, dt * ((100 - maxUtilPct) / maxUtilPct)); + await new Promise((done) => setTimeout(done, 1000 * waitS)); + totalWaitS += waitS; + + D(`deleted ${ret.rowCount} | dt=${dt} | wait=${waitS} | limit=${limit}`); + + if (ret.rowCount === 0) break; + } + + const durationS = (Date.now() - start_ts) / 1000; + return { durationS, rowsDeleted, totalWaitS, totalPgTimeS }; +} diff --git a/src/packages/database/postgres/delete-projects.ts b/src/packages/database/postgres/delete-projects.ts index fa125fb1432..e97bfb45809 100644 --- a/src/packages/database/postgres/delete-projects.ts +++ b/src/packages/database/postgres/delete-projects.ts @@ -7,9 +7,31 @@ Code related to permanently deleting projects. */ +import { promises as fs } from "node:fs"; + +import { pathToFiles } from "@cocalc/backend/files/path-to-files"; +import getLogger, { WinstonLogger } from "@cocalc/backend/logger"; +import { newCounter } from "@cocalc/backend/metrics"; +import { homePath } from "@cocalc/backend/misc"; +import getPool from "@cocalc/database/pool"; +import { getServerSettings } from "@cocalc/database/settings"; import { callback2 } from "@cocalc/util/async-utils"; +import { KUCALC_ON_PREMISES } from "@cocalc/util/db-schema/site-defaults"; +import { minutes_ago } from "@cocalc/util/misc"; +import { bulkDelete } from "./bulk-delete"; import { PostgreSQL } from "./types"; +const { F_OK, R_OK, W_OK } = fs.constants; + +const log = getLogger("db:delete-projects"); + +const delete_projects_prom = newCounter( + "database", + "delete_projects_total", + "Deleting projects and associated data operations counter.", + ["op"], +); + /* Permanently delete from the database all project records, where the project is explicitly deleted already (so the deleted field is true). @@ -17,10 +39,12 @@ Call this function to setup projects for permanent deletion. This blanks the user field so the user no longer can access the project, and we don't know that the user had anything to do with the project. A separate phase later then purges these projects from disk as well as the database. + +TODO: it's referenced from postgres-server-queries.coffee, but is it actually used anywhere? */ export async function permanently_unlink_all_deleted_projects_of_user( db: PostgreSQL, - account_id_or_email_address: string + account_id_or_email_address: string, ): Promise { // Get the account_id if necessary. const account_id = await get_account_id(db, account_id_or_email_address); @@ -36,7 +60,7 @@ export async function permanently_unlink_all_deleted_projects_of_user( async function get_account_id( db: PostgreSQL, - account_id_or_email_address: string + account_id_or_email_address: string, ): Promise { if (account_id_or_email_address.indexOf("@") == -1) { return account_id_or_email_address; @@ -52,20 +76,252 @@ async function get_account_id( } /* -This deletes all projects older than the given number of days, from the perspective of a user. -Another task has to run to actually get rid of the data, etc. +This removes all users from all projects older than the given number of days and marked as deleted. +In particular, users are no longer able to access that project. +The "cleanup_old_projects_data" function has to run to actually get rid of the data, etc. */ export async function unlink_old_deleted_projects( db: PostgreSQL, - age_d = 30 + age_d = 30, ): Promise { - await callback2(db._query, { + const L = log.extend("unlink_old_deleted_projects").debug; + const { rowCount } = await callback2(db._query, { query: "UPDATE projects", set: { users: null }, where: [ - "deleted = true", + "deleted = true", "users IS NOT NULL", `last_edited <= NOW() - '${age_d} days'::INTERVAL`, ], }); + L("unlinked projects:", rowCount); +} + +const Q_CLEANUP_SYNCSTRINGS = ` +SELECT s.string_id, p.project_id +FROM projects as p INNER JOIN syncstrings as s + ON p.project_id = s.project_id +WHERE p.deleted = true + AND p.users IS NULL +ORDER BY + p.project_id, s.string_id +LIMIT 1000 +`; + +const Q_CLEANUP_PROJECTS = ` +SELECT project_id +FROM projects +WHERE deleted = true + AND users IS NULL + AND coalesce(state ->> 'state', '') != 'deleted' +ORDER BY created ASC +LIMIT 1000 +`; + +/* + This more thorough delete procedure comes after the above. + It issues actual delete operations on data of projects marked as deleted. + When done, it sets the state.state to "deleted". + + The operations involves deleting all syncstrings of that project (and associated with that, patches), + and only for on-prem setups, it also deletes all the data stored in the project on disk and various tables. + + This function is called every couple of hours. Hence it checks to not run longer than the given max_run_m time (minutes). +*/ +export async function cleanup_old_projects_data( + db: PostgreSQL, + max_run_m = 60, +) { + const settings = await getServerSettings(); + const on_prem = settings.kucalc === KUCALC_ON_PREMISES; + const delete_data = settings.delete_project_data; + const L0 = log.extend("cleanup_old_projects_data"); + const L = L0.debug; + + L("args", { max_run_m, on_prem, delete_data }); + + if (!delete_data) { + L(`deleting project data is disabled ('delete_project_data' setting).`); + return; + } + + const start_ts = new Date(); + const pool = getPool(); + + let numSyncStr = 0; + let numProj = 0; + + while (true) { + if (start_ts < minutes_ago(max_run_m)) { + L(`too much time elapsed, breaking after ${numSyncStr} syncstrings`); + return; + } + + const { rows: syncstrings } = await pool.query(Q_CLEANUP_SYNCSTRINGS); + L(`deleting ${syncstrings.length} syncstrings`); + for (const { project_id, string_id } of syncstrings) { + L(`deleting syncstring ${project_id}/${string_id}`); + numSyncStr += 1; + const t0 = Date.now(); + await callback2(db.delete_syncstring, { string_id }); + const elapsed_ms = Date.now() - t0; + delete_projects_prom.labels("syncstring").inc(); + // wait a bit after deleting syncstrings, e.g. to let the standby db catch up + // this ensures a max of "10%" utilization of the database – or wait 1 second + await new Promise((done) => + setTimeout(done, Math.min(1000, elapsed_ms * 9)), + ); + } + + const { rows: projects } = await pool.query(Q_CLEANUP_PROJECTS); + L(`deleting the data of ${projects.length} projects`); + for (const { project_id } of projects) { + const L2 = L0.extend(project_id).debug; + delete_projects_prom.labels("project").inc(); + numProj += 1; + let delRows = 0; + + // Clean up data *on* a given project. For now, remove all site licenses, status and last_active. + await pool.query( + `UPDATE projects + SET site_license = NULL, status = NULL, last_active = NULL, run_quota = NULL + WHERE project_id = $1`, + [project_id], + ); + + if (on_prem) { + // we don't delete the central_log, it has its own expiration + // such an entry is good to have for reconstructing what really happened + db.log({ + event: "delete_project", + value: { deleting: "files", project_id }, + }); + + L2(`delete all project files`); + await deleteProjectFiles(L2, project_id); + + try { + // this is something like /shared/projects/${project_id} + const shared_path = pathToFiles(project_id, ""); + L2(`deleting all shared files in ${shared_path}`); + await fs.rm(shared_path, { recursive: true, force: true }); + } catch (err) { + L2(`Unable to delete shared files: ${err}`); + } + } + + // This gets rid of all sorts of data in tables specific to the given project. + delRows += await delete_associated_project_data(L2, project_id); + db.log({ + event: "delete_project", + value: { deleting: "database", project_id }, + }); + + // now, that we're done with that project, mark it as state.state ->> 'deleted' + // in addition to the flag "deleted = true". This also updates the state.time timestamp. + await callback2(db.set_project_state, { project_id, state: "deleted" }); + L2( + `finished deleting project data | deleted ${delRows} entries | state.state="deleted"`, + ); + } + + if (projects.length === 0 && syncstrings.length === 0) { + L(`all data of deleted projects and associated syncstrings are deleted.`); + L( + `In total ${numSyncStr} syncstrings and ${numProj} projects were processed.`, + ); + return; + } + } +} + +async function delete_associated_project_data( + L2: WinstonLogger["debug"], + project_id: string, +): Promise { + // TODO: two tables reference a project, but become useless. + // There should be a fallback strategy to move these objects to another project or surface them as being orphaned. + // tables: cloud_filesystems, compute_servers + + let total = 0; + // collecting tables, where the primary key is the default (i.e. "id") and + // the field to check is always called "project_id" + const tables = [ + //"blobs", // TODO: this is a bit tricky, because data could be used elsewhere. In the future, there will be an associated account_id! + "file_access_log", + "file_use", + "jupyter_api_log", + "mentions", + "openai_chatgpt_log", + "project_log", + "public_paths", + "shopping_cart_items", + ] as const; + + for (const table of tables) { + const { rowsDeleted } = await bulkDelete({ + table, + field: "project_id", + value: project_id, + }); + total += rowsDeleted; + L2(`deleted in ${table}: ${rowsDeleted} entries`); + } + + // these tables are different, i.e. another id, or the field to check the project_id value against is called differently + + for (const field of ["target_project_id", "source_project_id"] as const) { + const { rowsDeleted } = await bulkDelete({ + table: "copy_paths", + field, + value: project_id, + }); + total += rowsDeleted; + L2(`deleted copy_paths/${field}: ${rowsDeleted} entries`); + } + + { + const { rowsDeleted } = await bulkDelete({ + table: "listings", + field: "project_id", + id: "project_id", // TODO listings has a more complex ID, which means this gets rid of everything in one go. should be fine, though. + value: project_id, + }); + total += rowsDeleted; + L2(`deleted in listings: ${rowsDeleted} entries`); + } + + { + const { rowsDeleted } = await bulkDelete({ + table: "project_invite_tokens", + field: "project_id", + value: project_id, + id: "token", + }); + total += rowsDeleted; + L2(`deleted in project_invite_tokens: ${rowsDeleted} entries`); + } + + return total; +} + +async function deleteProjectFiles( + L2: WinstonLogger["debug"], + project_id: string, +) { + const project_dir = homePath(project_id); + try { + await fs.access(project_dir, F_OK | R_OK | W_OK); + const stats = await fs.lstat(project_dir); + if (stats.isDirectory()) { + L2(`deleting all files in ${project_dir}`); + await fs.rm(project_dir, { recursive: true, force: true }); + } else { + L2(`is not a directory: ${project_dir}`); + } + } catch (err) { + L2( + `not deleting project files: either '${project_dir}' does not exist or is not accessible`, + ); + } } diff --git a/src/packages/database/postgres/site-license/hook.test.ts b/src/packages/database/postgres/site-license/hook.test.ts index f7bfbddbd69..4cb7ee5a451 100644 --- a/src/packages/database/postgres/site-license/hook.test.ts +++ b/src/packages/database/postgres/site-license/hook.test.ts @@ -17,8 +17,6 @@ pnpm test hook.test.ts * The quota function uses a deep copy operation on all its arguments to avoid this. */ -// see packages/database/pool/pool.ts for where this name is also hard coded: -process.env.PGDATABASE = "smc_ephemeral_testing_database"; import { isEqual } from "lodash"; diff --git a/src/packages/database/postgres/types.ts b/src/packages/database/postgres/types.ts index 939a4b81cff..09943972a52 100644 --- a/src/packages/database/postgres/types.ts +++ b/src/packages/database/postgres/types.ts @@ -310,6 +310,8 @@ export interface PostgreSQL extends EventEmitter { cb?: CB; }); + delete_syncstring(opts: { string_id: string; cb: CB }); + projects_that_need_to_be_started(): Promise; is_connected(): boolean; @@ -322,6 +324,8 @@ export interface PostgreSQL extends EventEmitter { }>; }): Promise; + set_project_state(opts: { project_id: string; state: ProjectState["state"] }); + user_query_cancel_changefeed(opts: { id: any; cb?: CB }): void; save_blob(opts: { diff --git a/src/packages/database/test/setup.js b/src/packages/database/test/setup.js index 97701164bd7..ee2e6cce0d5 100644 --- a/src/packages/database/test/setup.js +++ b/src/packages/database/test/setup.js @@ -2,3 +2,6 @@ // see packages/database/pool/pool.ts for where this name is also hard coded: process.env.PGDATABASE = "smc_ephemeral_testing_database"; + +// checked for in some code to behave differently while running unit tests. +process.env.COCALC_TEST_MODE = true; diff --git a/src/packages/frontend/i18n/README.md b/src/packages/frontend/i18n/README.md index 2bd6a0ae431..431bcedc688 100644 --- a/src/packages/frontend/i18n/README.md +++ b/src/packages/frontend/i18n/README.md @@ -41,15 +41,17 @@ After introducing new messages, these are the steps to get all translations into 1. `pnpm i18n:download` - Will grab the updated files like `zh_CN.json` and save them in the `i18n` folder. + Will grab the updated files containing the translated strings (e.g. `zh_CN.json`) and save them in the `./i18n/trans/` folder. + The source of truth for these files is always the remotely stored data – hence do not ever edit these files directly. 1. `pnpm i18n:compile` - This transforms the `[locale].json` files to `[locale].compiles.json`. + This transforms the `[locale].json` translation files from the step before to `[locale].compiled.json`. This could also reveal problems, when conditional ICU messages aren't properly formatted. - E.g. `"Sí, cerrar sesión{en todas partes, seleccionar, verdadero { en todas partes} otro {}}" with ID "account.sign-out.button.ok" in file "./i18n/es_ES.json"`: In the brackets, it has to start according to the syntax: `{everywhere, select, true {..} other {}}`. + E.g. `"Sí, cerrar sesión{en todas partes, seleccionar, verdadero { en todas partes} otro {}}" with ID "account.sign-out.button.ok" in file "./i18n/es_ES.json"`: + In the brackets, it has to start according to the syntax: `{everywhere, select, true {..} other {}}`, i.e. the variable `everywhere` must stay in English. -1. Reload the `frontend` after a compile, such that `await import...` will load the updated translation file for the set locale. +1. Reload the `frontend` after a compile, such that `await import...` will load the updated compiled translation file for the configured locale. Note: if just a translation has been updated, you only need to do the `i18n:download` & `i18n:compile` steps. diff --git a/src/packages/frontend/project/warnings/deleted.tsx b/src/packages/frontend/project/warnings/deleted.tsx index 1a7a9ecf04e..a7d119518f7 100644 --- a/src/packages/frontend/project/warnings/deleted.tsx +++ b/src/packages/frontend/project/warnings/deleted.tsx @@ -3,21 +3,25 @@ * License: MS-RSL – see LICENSE.md for details */ -import { Alert } from "../../antd-bootstrap"; -import { Icon } from "../../components"; +import { FormattedMessage } from "react-intl"; + +import { Alert } from "@cocalc/frontend/antd-bootstrap"; +import { Icon } from "@cocalc/frontend/components"; // A warning to put on pages when the project is deleted export const DeletedProjectWarning: React.FC = () => { return ( - -

- Warning: this project is{" "} - deleted! -

-

- If you intend to use this project, you should{" "} - undelete it in project settings. -

+ + {icon} Warning: this project is deleted! + If you intend to use this project, you should undelete it in project settings.`} + values={{ + icon: , + strong: (c) => {c}, + h4: (c) =>

{c}

, + }} + />
); }; diff --git a/src/packages/hub/run/delete-projects.js b/src/packages/hub/run/delete-projects.js index 50f0b59d761..df7071b2ce6 100755 --- a/src/packages/hub/run/delete-projects.js +++ b/src/packages/hub/run/delete-projects.js @@ -1,9 +1,14 @@ #!/usr/bin/env node + /* Periodically delete projects. -TODO: For now, this just calls the unlink function. Later on it -should do more (actually delete data, etc.). +STATUS: +For now, this just calls the unlink function and deletes all assocated syncstrings and data. +In "onprem" mode, this also entries in various tables, which contain data specific to the deleted projects. + +TESTING: to run this in development and see logging, call it like that: +./src/packages/hub$ env DEBUG_CONSOLE=yes DEBUG=cocalc:debug:db:* pnpm cocalc-hub-delete-projects */ import * as postgres from "@cocalc/database"; @@ -16,6 +21,9 @@ async function update() { console.log("unlinking old deleted projects..."); try { await db.unlink_old_deleted_projects(); + // limit the max runtime to half the interval time + const max_run_m = (INTERVAL_MS / 2) / (1000 * 60) + await db.cleanup_old_projects_data(max_run_m); } catch (err) { if (err !== null) { throw Error(`failed to unlink projects -- ${err}`); diff --git a/src/packages/next/lib/share/get-contents.ts b/src/packages/next/lib/share/get-contents.ts index 002a09b10a2..47470f047d8 100644 --- a/src/packages/next/lib/share/get-contents.ts +++ b/src/packages/next/lib/share/get-contents.ts @@ -3,10 +3,11 @@ * License: MS-RSL – see LICENSE.md for details */ -import pathToFiles from "./path-to-files"; import { promises as fs } from "fs"; -import { join } from "path"; import { sortBy } from "lodash"; +import { join } from "path"; + +import { pathToFiles } from "@cocalc/backend/files/path-to-files"; import { hasSpecialViewer } from "@cocalc/frontend/file-extensions"; import { getExtension } from "./util"; @@ -22,7 +23,7 @@ const LISTED_LIMITS = { html: 3 * MB, // no special viewer generic: 2 * MB, -}; +} as const; const UNLISTED_LIMITS = { ...LISTED_LIMITS, diff --git a/src/packages/next/lib/share/path-to-files.ts b/src/packages/next/lib/share/path-to-files.ts index 943d5390384..83c2d6c5dc6 100644 --- a/src/packages/next/lib/share/path-to-files.ts +++ b/src/packages/next/lib/share/path-to-files.ts @@ -3,24 +3,17 @@ * License: MS-RSL – see LICENSE.md for details */ -import { join } from "path"; +import { pathToFiles } from "@cocalc/backend/files/path-to-files"; import getPool from "@cocalc/database/pool"; -import { projects } from "@cocalc/backend/data"; - -// Given a project_id/path, return the directory on the file system where -// that path should be located. -export default function pathToFiles(project_id: string, path: string): string { - return join(projects.replace("[project_id]", project_id), path); -} export async function pathFromID( - id: string + id: string, ): Promise<{ projectPath: string; fsPath: string }> { // 'infinite' since actually result can't change since id determines the path (it's a reverse sha1 hash computation) const pool = getPool("infinite"); const { rows } = await pool.query( "SELECT project_id, path FROM public_paths WHERE id=$1 AND disabled IS NOT TRUE", - [id] + [id], ); if (rows.length == 0) { throw Error(`no such public path: ${id}`); diff --git a/src/packages/next/lib/share/virtual-hosts.ts b/src/packages/next/lib/share/virtual-hosts.ts index f5d6f01fe9b..bc19cf137e4 100644 --- a/src/packages/next/lib/share/virtual-hosts.ts +++ b/src/packages/next/lib/share/virtual-hosts.ts @@ -10,11 +10,11 @@ Support for virtual hosts. import type { Request, Response } from "express"; import basePath from "@cocalc/backend/base-path"; +import { pathToFiles } from "@cocalc/backend/files/path-to-files"; import { getLogger } from "@cocalc/backend/logger"; import isAuthenticated from "./authenticate"; import getVirtualHostInfo from "./get-vhost-info"; import { staticHandler } from "./handle-raw"; -import pathToFiles from "./path-to-files"; const logger = getLogger("virtual-hosts"); diff --git a/src/packages/next/package.json b/src/packages/next/package.json index 53e18c0b60d..ff9de1e7c19 100644 --- a/src/packages/next/package.json +++ b/src/packages/next/package.json @@ -65,6 +65,7 @@ "@cocalc/frontend": "workspace:*", "@cocalc/server": "workspace:*", "@cocalc/util": "workspace:*", + "@types/pg": "^8.11.10", "@openapitools/openapi-generator-cli": "^2.19.1", "@vscode/vscode-languagedetection": "^1.0.22", "antd": "^5.24.7", @@ -82,7 +83,7 @@ "next-rest-framework": "6.0.0-beta.4", "next-translate": "^2.6.2", "password-hash": "^1.2.2", - "pg": "^8.7.1", + "pg": "^8.16.3", "react": "^19.1.0", "react-dom": "^19.1.0", "react-google-recaptcha-v3": "^1.9.7", diff --git a/src/packages/project/project-status/server.ts b/src/packages/project/project-status/server.ts index c0b6e98832a..092b8f0fa50 100644 --- a/src/packages/project/project-status/server.ts +++ b/src/packages/project/project-status/server.ts @@ -15,14 +15,10 @@ status updates. Hence in particular, information like cpu, memory and disk are smoothed out and throttled. */ -import { getLogger } from "@cocalc/project/logger"; -import { how_long_ago_m, round1 } from "@cocalc/util/misc"; -import { version as smcVersion } from "@cocalc/util/smc-version"; import { delay } from "awaiting"; import { EventEmitter } from "events"; import { isEqual } from "lodash"; -import { get_ProjectInfoServer, ProjectInfoServer } from "../project-info"; -import { ProjectInfo } from "@cocalc/util/types/project-info/types"; + import { ALERT_DISK_FREE, ALERT_HIGH_PCT /* ALERT_MEDIUM_PCT */, @@ -38,6 +34,11 @@ import { import { cgroup_stats } from "@cocalc/comm/project-status/utils"; import { createPublisher } from "@cocalc/conat/project/project-status"; import { compute_server_id, project_id } from "@cocalc/project/data"; +import { getLogger } from "@cocalc/project/logger"; +import { how_long_ago_m, round1 } from "@cocalc/util/misc"; +import { version as smcVersion } from "@cocalc/util/smc-version"; +import { ProjectInfo } from "@cocalc/util/types/project-info/types"; +import { get_ProjectInfoServer, ProjectInfoServer } from "../project-info"; // TODO: only return the "next" value, if it is significantly different from "prev" //function threshold(prev?: number, next?: number): number | undefined { @@ -85,7 +86,7 @@ export class ProjectStatusServer extends EventEmitter { constructor(testing = false) { super(); this.testing = testing; - this.dbg = (...msg) => logger.debug(...msg); + this.dbg = (...msg) => logger.debug(msg[0], ...msg.slice(1)); this.project_info = get_ProjectInfoServer(); } diff --git a/src/packages/project/usage-info.ts b/src/packages/project/usage-info.ts index 4f99ff34ba6..fa5e067c79b 100644 --- a/src/packages/project/usage-info.ts +++ b/src/packages/project/usage-info.ts @@ -14,6 +14,7 @@ It is made available via a service in @cocalc/conat/project/usage-info. */ import { EventEmitter } from "node:events"; + import { getLogger } from "@cocalc/project/logger"; import { ProjectInfoServer, diff --git a/src/packages/server/projects/control/multi-user.ts b/src/packages/server/projects/control/multi-user.ts index 78b6592eeb8..35084a403b8 100644 --- a/src/packages/server/projects/control/multi-user.ts +++ b/src/packages/server/projects/control/multi-user.ts @@ -16,6 +16,19 @@ This code is very similar to single-user.ts, except with some small modifications due to having to create and delete Linux users. */ +import getLogger from "@cocalc/backend/logger"; +import { getUid, homePath } from "@cocalc/backend/misc"; +import { + BaseProject, + CopyOptions, + getProject, + ProjectState, + ProjectStatus, +} from "./base"; +import { + deleteProjectSecretToken, + getProjectSecretToken, +} from "./secret-token"; import { chown, copyPath, @@ -25,7 +38,6 @@ import { getEnvironment, getState, getStatus, - homePath, isProjectRunning, launchProjectDaemon, mkdir, @@ -33,19 +45,6 @@ import { stopProjectProcesses, writeSecretToken, } from "./util"; -import { - BaseProject, - CopyOptions, - getProject, - ProjectStatus, - ProjectState, -} from "./base"; -import getLogger from "@cocalc/backend/logger"; -import { getUid } from "@cocalc/backend/misc"; -import { - deleteProjectSecretToken, - getProjectSecretToken, -} from "./secret-token"; const winston = getLogger("project-control:multi-user"); diff --git a/src/packages/server/projects/control/single-user.ts b/src/packages/server/projects/control/single-user.ts index 64103e9e909..b53fd22c0c9 100644 --- a/src/packages/server/projects/control/single-user.ts +++ b/src/packages/server/projects/control/single-user.ts @@ -29,6 +29,7 @@ Type ".help" for more information. import { kill } from "node:process"; import getLogger from "@cocalc/backend/logger"; +import { homePath } from "@cocalc/backend/misc"; import { BaseProject, CopyOptions, @@ -43,7 +44,6 @@ import { getProjectPID, getState, getStatus, - homePath, isProjectRunning, launchProjectDaemon, mkdir, diff --git a/src/packages/server/projects/control/util.ts b/src/packages/server/projects/control/util.ts index 80f4fb0ec4f..020301fe1ce 100644 --- a/src/packages/server/projects/control/util.ts +++ b/src/packages/server/projects/control/util.ts @@ -1,22 +1,22 @@ -import { promisify } from "util"; -import { dirname, join, resolve } from "path"; -import { exec as exec0, spawn } from "child_process"; import spawnAsync from "await-spawn"; -import * as fs from "fs"; import { writeFile } from "fs/promises"; -import { projects, root } from "@cocalc/backend/data"; -import { is_valid_uuid_string } from "@cocalc/util/misc"; -import { callback2 } from "@cocalc/util/async-utils"; -import getLogger from "@cocalc/backend/logger"; -import { CopyOptions, ProjectState, ProjectStatus } from "./base"; -import { getUid } from "@cocalc/backend/misc"; +import { exec as exec0, spawn } from "node:child_process"; +import * as fs from "node:fs"; +import { dirname, join, resolve } from "node:path"; +import { promisify } from "node:util"; + import base_path from "@cocalc/backend/base-path"; -import { db } from "@cocalc/database"; -import { getProject } from "."; -import { conatServer } from "@cocalc/backend/data"; -import { pidFilename } from "@cocalc/util/project-info"; +import { conatServer, root } from "@cocalc/backend/data"; import { executeCode } from "@cocalc/backend/execute-code"; +import getLogger from "@cocalc/backend/logger"; +import { getUid, homePath } from "@cocalc/backend/misc"; import ensureContainingDirectoryExists from "@cocalc/backend/misc/ensure-containing-directory-exists"; +import { db } from "@cocalc/database"; +import { callback2 } from "@cocalc/util/async-utils"; +import { is_valid_uuid_string } from "@cocalc/util/misc"; +import { pidFilename } from "@cocalc/util/project-info"; +import { getProject } from "."; +import { CopyOptions, ProjectState, ProjectStatus } from "./base"; const logger = getLogger("project-control:util"); @@ -34,10 +34,6 @@ export function dataPath(HOME: string): string { return join(HOME, ".smc"); } -export function homePath(project_id: string): string { - return projects.replace("[project_id]", project_id); -} - export function getUsername(project_id: string): string { return project_id.split("-").join(""); } @@ -55,7 +51,7 @@ function pidFile(HOME: string): string { return join(dataPath(HOME), pidFilename); } -function parseDarwinTime(output:string) : number { +function parseDarwinTime(output: string): number { // output = '{ sec = 1747866131, usec = 180679 } Wed May 21 15:22:11 2025'; const match = output.match(/sec\s*=\s*(\d+)/); @@ -72,7 +68,10 @@ export async function bootTime(): Promise { if (!_bootTime) { if (process.platform === "darwin") { // uptime isn't available on macos. - const { stdout } = await executeCode({ command: "sysctl", args: ['-n', 'kern.boottime']}); + const { stdout } = await executeCode({ + command: "sysctl", + args: ["-n", "kern.boottime"], + }); _bootTime = parseDarwinTime(stdout); } else { const { stdout } = await executeCode({ command: "uptime", args: ["-s"] }); diff --git a/src/packages/server/shopping/cart/add.ts b/src/packages/server/shopping/cart/add.ts index eb4718ada3c..e5720b0e96d 100644 --- a/src/packages/server/shopping/cart/add.ts +++ b/src/packages/server/shopping/cart/add.ts @@ -15,12 +15,12 @@ any value to a spammer so it's very unlikely to be exploited maliciously. I did add throttling to the api handler. */ -import { isValidUUID } from "@cocalc/util/misc"; import getPool from "@cocalc/database/pool"; import { - ProductType, ProductDescription, + ProductType, } from "@cocalc/util/db-schema/shopping-cart-items"; +import { isValidUUID } from "@cocalc/util/misc"; import { getItem } from "./get"; import dayjs from "dayjs"; //import { getLogger } from "@cocalc/backend/logger"; diff --git a/src/packages/server/shopping/cart/recent-purchases.ts b/src/packages/server/shopping/cart/recent-purchases.ts index f89994d65d2..d0b3a7705f7 100644 --- a/src/packages/server/shopping/cart/recent-purchases.ts +++ b/src/packages/server/shopping/cart/recent-purchases.ts @@ -29,7 +29,7 @@ export default async function getRecentPurchases({ const pool = getPool(); const { rows } = await pool.query( `SELECT * FROM shopping_cart_items WHERE account_id=$1 AND purchased IS NOT NULL AND (purchased#>>'{time}')::timestamptz >= NOW() - $2::interval AND purchased#>>'{voucher_id}' IS NULL`, - [account_id, recent ?? "1 week"] + [account_id, recent ?? "1 week"], ); rows.sort((a, b) => -cmp(a.purchased?.time, b.purchased?.time)); return rows; diff --git a/src/packages/server/shopping/cart/remove.ts b/src/packages/server/shopping/cart/remove.ts index 231a423309a..d36c89f5f28 100644 --- a/src/packages/server/shopping/cart/remove.ts +++ b/src/packages/server/shopping/cart/remove.ts @@ -15,7 +15,7 @@ import getPool from "@cocalc/database/pool"; // You can't remove an item more than once from a cart. export default async function removeFromCart( account_id: string, - id: number + id: number, ): Promise { if (!isValidUUID(account_id)) { throw Error("account_id is invalid"); @@ -23,8 +23,7 @@ export default async function removeFromCart( const pool = getPool(); const { rowCount } = await pool.query( "UPDATE shopping_cart_items SET removed=NOW() WHERE account_id=$1 AND id=$2 AND removed IS NULL AND purchased IS NULL", - [account_id, id] + [account_id, id], ); return rowCount ?? 0; } - diff --git a/src/packages/server/software-envs.ts b/src/packages/server/software-envs.ts index f0f0e1a6fb5..ee0a759d2bc 100644 --- a/src/packages/server/software-envs.ts +++ b/src/packages/server/software-envs.ts @@ -65,8 +65,7 @@ async function readConfig(purpose: Purpose): Promise { // parse the content of softwareFn as json try { const software = JSON.parse((await readFile(softwareFn)).toString()); - const dbg = (...msg) => L(...msg); - const sanitized = sanitizeSoftwareEnv({ software, registry, purpose }, dbg); + const sanitized = sanitizeSoftwareEnv({ software, registry, purpose }, L); return sanitized; } catch (err) { W(`WARNING: ${softwareFn} is not a valid JSON file -- ${err}`); diff --git a/src/packages/util/compute-states.ts b/src/packages/util/compute-states.ts index 947d00d9aa4..628ea2fb1a0 100644 --- a/src/packages/util/compute-states.ts +++ b/src/packages/util/compute-states.ts @@ -32,7 +32,8 @@ export type State = | "running" | "starting" | "stopping" - | "unarchiving"; + | "unarchiving" + | "deleted"; // @hsy: completely unclear what this is for. type Operation = @@ -300,4 +301,20 @@ export const COMPUTE_STATES: ComputeStates = { "migrate_live", ], }, + + // projects are deleted in hub -> postgres.delete-projects and this is a one-way operation + deleted: { + desc: defineMessage({ + id: "util.compute-states.deleted.desc", + defaultMessage: "Project is deleted", + }), + icon: "trash", + display: defineMessage({ + id: "util.compute-states.deleted.display", + defaultMessage: "Deleted", + }), + stable: true, + to: {}, + commands: [], + }, } as const; diff --git a/src/packages/util/db-schema/site-defaults.ts b/src/packages/util/db-schema/site-defaults.ts index 93ae9926e57..d16a3387f23 100644 --- a/src/packages/util/db-schema/site-defaults.ts +++ b/src/packages/util/db-schema/site-defaults.ts @@ -90,6 +90,7 @@ export type SiteSettingsKeys = | "unlicensed_project_timetravel_limit" | "google_analytics" | "kucalc" + | "delete_project_data" | "i18n" | "dns" | "datastore" @@ -733,6 +734,13 @@ export const site_settings_conf: SiteSettings = { to_val: split_iframe_comm_hosts, to_display: num_dns_hosts, }, + delete_project_data: { + name: "Delete Project Data", + desc: "When a project has been marked as deleted, also actually delete associated data from the database and – for OnPrem and single-user dev mode only – also its files.", + default: "no", + valid: only_booleans, + to_val: to_bool, + }, email_enabled: { name: "Email sending enabled", desc: "Controls visibility of UI elements and if any emails are sent. This is independent of any particular email configuration!",