diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 607501ba0..7c0aaec13 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -55,28 +55,20 @@ These are the steps @Ethan-Arrowood has been following to synchronize the reposi > > # Only fetch `main` branch > git config remote.old.fetch '+refs/heads/main:refs/remotes/old/main' -> -> git fetch old > ``` -1. Ensure we have the latest commits from the old repository with `git fetch old` -2. Make sure local `main` branch is up to date `git pull origin main` -3. Create a new branch for the synchronization `git checkout -b sync-mmddyyyy` -4. Create list of commits using `git rev-list --reverse --first-parent $(start-commit)..old/main > commits-to-pick.txt` - - The start-commit will be excluded so always include the last commit from `old/main` that was synchronized previously. - - The hash for that commit is recorded at the end of this section and should be updated after each synchronization -5. Go through commits one-by-one, from the top to the bottom of the `commits-to-pick.txt` file, following these steps: - 1. If `git rev-parse $(commit)^2 &>/dev/null 2>&1` exits with 0 then it's a merge commit - 1. Execute `git cherry-pick -m 1 $(commit)` - 2. Else it's a regular commit - 1. Execute `git cherry-pick $(commit)` - 3. If either cherry-pick command results in a non-zero exit code that means there is a merge conflict - 1. If the conflict is a content, resolve it manually and `git add` the file - - Example: `CONFLICT (content): Merge conflict in package.json` - 2. Else if the conflict is a modify/delete then likely `git rm` the file - - Example: `CONFLICT (modify/delete): unitTests/bin/copyDB-test.js deleted in HEAD and modified in f75d9170b` - 3. Then check `git status`, if there is nothing you can `git cherry-pick --skip` - - Note: in this circumstance, running `git cherry-pick --continue` results in a non-zero exit code with the message `The previous cherry-pick is now empty, possibly due to conflict resolution.` Maybe we use this to then run `--skip`? Or maybe there is a way to parse the output of previous `git status` step? +1. Make sure local `main` branch is checked out and clean `git checkout main && git status`. +2. Copy the [latest previously-synced commit hash from this file](#last-synchronized-commit). +3. Run the sync-commits helper script: `dev/sync-commits.js ` +4. For each commit the script lists, run the `git cherry-pick ...` command it suggests. + - NB: Some of these may have `-m 1` params to handle merge commits correctly. +5. If either cherry-pick command results in a non-zero exit code that means there is a merge conflict. + 1. If the conflict is a content, resolve it manually and `git add` the file + - Example: `CONFLICT (content): Merge conflict in package.json` + 2. Else if the conflict is a modify/delete then likely `git rm` the file + - Example: `CONFLICT (modify/delete): unitTests/bin/copyDB-test.js deleted in HEAD and modified in f75d9170b` + 3. Then check `git status`, if there is nothing you can `git cherry-pick --skip` + - Note: in this circumstance, running `git cherry-pick --continue` results in a non-zero exit code with the message `The previous cherry-pick is now empty, possibly due to conflict resolution.` Maybe we use this to then run `--skip`? Or maybe there is a way to parse the output of previous `git status` step? 6. After all commits have been picked, manually check that everything brought over was supposed to be. Look out for any source code we do not want open-sourced or things like unit tests which we are actively migrating separately (and will eventually include as part of the synchronization process) - The GitHub PR UI is useful for this step; but make sure to leave the PR as a draft until all synchronization steps are complete 7. Once everything looks good, run `npm run format:write` to ensure formatting is correct @@ -87,7 +79,9 @@ These are the steps @Ethan-Arrowood has been following to synchronize the reposi 12. Push all changes and open the PR for review 13. Merge using a Merge Commit so that all relative history is retained and things like the formatting change hash stays the same as recorded. -Last Synchronized Commit: `e1ea920d74e919140ae89d5ca4d75614c10c2925` +### Last Synchronized Commit + +`e1ea920d74e919140ae89d5ca4d75614c10c2925` ## Code of Conduct diff --git a/dev/sync-commits.js b/dev/sync-commits.js new file mode 100755 index 000000000..472645a40 --- /dev/null +++ b/dev/sync-commits.js @@ -0,0 +1,174 @@ +#!/usr/bin/env node + +const { execSync, exec } = require('node:child_process'); +const fs = require('node:fs'); + +/* This script should be deleted someday. It is for syncing commits from the + * old HarperDB closed-source repository while the Harper devs were + * transitioning the platform to open source. See CONTRIBUTING.md for more + * details. - WSM 2026-01-20 + */ + +function letsBail(exitCode, syncBranch = null) { + execSync('git checkout main', { stdio: 'ignore' }); + if (syncBranch) { + execSync(`git branch -D ${syncBranch}`, { stdio: 'ignore' }); + } + process.exit(exitCode); +} + +function gitRemotes() { + let remotesList = execSync('git remote -v') + .toString() + .trim() + .split('\n') + .map((r) => r.split('\t')); + let remotes = {}; + remotesList.forEach(([name, urlAndType]) => { + if (remotes[name] == null) { + remotes[name] = {}; + } + let [url, type] = urlAndType.split(' '); + type = type.replace('(', '').replace(')', ''); + remotes[name][type] = url; + }); + return remotes; +} + +function verifyRemote(remoteName, remoteUrl) { + let remotes = gitRemotes(); + if (!Object.hasOwn(remotes, remoteName)) { + return false; + } + if (!(Object.hasOwn(remotes[remoteName], 'fetch') && Object.hasOwn(remotes[remoteName], 'push'))) { + return false; + } + return remotes[remoteName]['fetch'] === remoteUrl && remotes[remoteName]['push'] === remoteUrl; +} + +function isOldRemoteConfigured() { + return verifyRemote('old', 'git@github.com:HarperFast/harperdb.git'); +} + +function isOriginRemoteConfigured() { + return verifyRemote('origin', 'git@github.com:HarperFast/harper.git'); +} + +function isBranchCheckedOut(branchName) { + let branch = execSync(`git branch --show-current`).toString().trim(); + return branch === branchName; +} + +function fetchCommits(remoteName) { + exec(`git fetch ${remoteName}`, (error, _stdout, _stderr) => { + // Note that git outputs all kinds of non-errors on stderr, so we don't + // want to assume something went wrong if there's anything written there. + if (error) { + console.error(`git exited with error '${error.message}' fetching ${remoteName} commits`); + letsBail(error.code); + } + }); +} + +function pullRemoteBranch(remoteName, branchName) { + fetchCommits(remoteName); + exec(`git merge ${remoteName}/${branchName}`, (error, _stdout, stderr) => { + if (error) { + console.error(`git exited with error '${error.message}' merging origin/main`); + letsBail(error.code); + } + if (stderr) { + console.error(`git error merging origin/main: ${stderr}`); + letsBail(6); + } + }); +} + +function checkoutNewBranch(branchName) { + exec(`git checkout -b ${branchName}`, (error, _stdout, stderr) => { + if (error) { + console.error(`git exited with error '${error.message}' creating branch ${branchName}`); + letsBail(error.code, branchName); + } + if (stderr && !stderr.startsWith('Switched to a new branch')) { + console.error(`git error creating branch ${branchName}: ${stderr}`); + letsBail(7, branchName); + } + }); +} + +function ensureValidConfig() { + process.stdout.write('Verifying git config... '); + if (!isOldRemoteConfigured()) { + process.stdout.write('❌'); + console.error('old remote not configured correctly.'); + console.error( + 'Run `git remote add old git@github.com:HarperFast/harperdb.git` to configure it (you may have to remove the old remote first with `git remote rm old`).' + ); + process.exit(2); + } + if (!isOriginRemoteConfigured()) { + console.log('❌'); + console.error('origin remote not configured correctly.'); + console.error( + 'Run `git remote add origin git@github.com:HarperFast/harper.git` to configure it (you may have to remove the origin remote first with `git remote rm origin`).' + ); + process.exit(3); + } + if (!isBranchCheckedOut('main')) { + console.log('❌'); + console.error('main branch not checked out. Run `git checkout main` to check it out.'); + process.exit(4); + } + console.log('✅'); +} + +function generateCommitsToPick(startCommit) { + const commits = execSync(`git rev-list --reverse --first-parent ${startCommit}..old/main`) + .toString() + .trim() + .split('\n'); + // write to file in case a human needs to take over + fs.writeFileSync('commits-to-pick.txt', commits.join('\n') + '\n'); + return commits; +} + +function isMergeCommit(commit) { + try { + execSync(`git rev-parse ${commit}^2`, { stdio: 'ignore' }); + } catch { + return false; + } + return true; +} + +function doItRockapella(startCommit) { + process.stdout.write('Finding commits to sync... '); + fetchCommits('old'); + pullRemoteBranch('origin', 'main'); + const syncDate = new Date(); + const month = String(syncDate.getMonth() + 1).padStart(2, '0'); + const day = String(syncDate.getDate()).padStart(2, '0'); + checkoutNewBranch(`sync-${month}${day}${syncDate.getFullYear()}`); + const commits = generateCommitsToPick(startCommit); + console.log('✅'); + console.log(`\n${commits.length} commits found:`); + for (const commit of commits) { + if (isMergeCommit(commit)) { + console.log(`${commit} (merge): git cherry-pick -m 1 ${commit}`); + } else { + console.log(`${commit}: git cherry-pick ${commit}`); + } + } +} + +function run(startCommit) { + if (!startCommit) { + console.error(`No start commit specified. Specify a commit hash or tag: sync-commits.js `); + letsBail(1); + } + ensureValidConfig(); + doItRockapella(startCommit); +} + +run(process.argv[2]);