From 534cea67f66a3aeb884ec325976630930e8e0613 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 21 Mar 2025 10:53:48 +0000 Subject: [PATCH 01/26] clar: pass a string for a `%s` format placeholder When `clar_parse_args()` encounters an unhandled command-line option, it wants to print out an error message that says so, and show the option that it encountered. However, the format claims that it wants to show a string, whereas using `argument[1]` provided only the first character. This was side-stepped in upstream `clar` in 7eaea8b2da (clar: argument parsing improvements, 2025-01-16) by not even showing the option anymore. Let's just fix this in Git until the next time Git synchronizes with an upstream `clar` version again. Signed-off-by: Johannes Schindelin --- t/unit-tests/clar/clar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/unit-tests/clar/clar.c b/t/unit-tests/clar/clar.c index d54e4553674908..ef2a22a7a2b40f 100644 --- a/t/unit-tests/clar/clar.c +++ b/t/unit-tests/clar/clar.c @@ -558,7 +558,7 @@ clar_parse_args(int argc, char **argv) default: clar_abort("Unexpected commandline argument '%s'.\n", - argument[1]); + argument); } } } From 91b24037b10bb3f32d48aac7ff9639a97e5851d1 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 21 Mar 2025 11:03:08 +0000 Subject: [PATCH 02/26] clar(clar__assert_equal): do in-bounds check before accessing element When accessing an array element (or, in these instances, characters in a string), the check whether the array index is within bounds should always come before accessing said element. Signed-off-by: Johannes Schindelin --- t/unit-tests/clar/clar.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/unit-tests/clar/clar.c b/t/unit-tests/clar/clar.c index ef2a22a7a2b40f..e7d142da53b2cd 100644 --- a/t/unit-tests/clar/clar.c +++ b/t/unit-tests/clar/clar.c @@ -767,7 +767,7 @@ void clar__assert_equal( if (!is_equal) { if (s1 && s2) { int pos; - for (pos = 0; s1[pos] == s2[pos] && pos < len; ++pos) + for (pos = 0; pos < len && s1[pos] == s2[pos]; ++pos) /* find differing byte offset */; p_snprintf(buf, sizeof(buf), "'%.*s' != '%.*s' (at byte %d)", len, s1, len, s2, pos); @@ -803,7 +803,7 @@ void clar__assert_equal( if (!is_equal) { if (wcs1 && wcs2) { int pos; - for (pos = 0; wcs1[pos] == wcs2[pos] && pos < len; ++pos) + for (pos = 0; pos < len && wcs1[pos] == wcs2[pos]; ++pos) /* find differing byte offset */; p_snprintf(buf, sizeof(buf), "'%.*ls' != '%.*ls' (at byte %d)", len, wcs1, len, wcs2, pos); From b85d18c8973eb90e9ab7749580474410b4ee0519 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 27 Oct 2022 21:46:13 +0200 Subject: [PATCH 03/26] cat_one_file(): make it easy to see that the `size` variable is initialized The large `switch` statement makes it a bit impractical to reason about the code. One of the code paths can technically lead to using `size` without being initialized: if the `t` case is taken and the type name is set to the empty string, we would actually leave `size` unintialized right until we use it. Practically, this cannot happen because the `do_oid_object_info_extended()` function is expected to always populate the `type_name` if asked for. However, it is quite unnecessary to leave the code as unwieldy to reason about: Just initialize the variable to 0 and be done with it. Signed-off-by: Johannes Schindelin --- builtin/cat-file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 67a5ff2b9ebd29..9aecdfe5e37c1f 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -106,7 +106,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) struct object_id oid; enum object_type type; char *buf; - unsigned long size; + unsigned long size = 0; struct object_context obj_context = {0}; struct object_info oi = OBJECT_INFO_INIT; unsigned flags = OBJECT_INFO_LOOKUP_REPLACE; From a827560742a10972c84bd50b0cf23412cf425edc Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Dec 2022 22:07:45 +0100 Subject: [PATCH 04/26] fsck: avoid using an uninitialized variable In `fsck_commit()`, after counting the authors of a commit, we set the `err` variable either when there was no author, or when there were more than two authors recorded. Then we access the `err` variable to figure out whether we should return early. But if there was exactly one author, that variable is still uninitialized. Let's just initialize the variable. This issue was pointed out by CodeQL. Signed-off-by: Johannes Schindelin --- fsck.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fsck.c b/fsck.c index 8dc8472ceb3781..868cbc6b0b3286 100644 --- a/fsck.c +++ b/fsck.c @@ -925,7 +925,7 @@ static int fsck_commit(const struct object_id *oid, { struct object_id tree_oid, parent_oid; unsigned author_count; - int err; + int err = 0; const char *buffer_begin = buffer; const char *buffer_end = buffer + size; const char *p; From c004e4e700237a9239c34cd2aa3465c8a2c85fb9 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Dec 2022 22:56:26 +0100 Subject: [PATCH 05/26] load_revindex_from_disk(): avoid accessing uninitialized data The `revindex_size` value is uninitialized in case the function is erroring out, but we want to assign its value. Let's just initialize it. Signed-off-by: Johannes Schindelin --- pack-revindex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pack-revindex.c b/pack-revindex.c index ffcde48870d8f7..d9f48f14c14d63 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -211,7 +211,7 @@ static int load_revindex_from_disk(const struct git_hash_algo *algo, int fd, ret = 0; struct stat st; void *data = NULL; - size_t revindex_size; + size_t revindex_size = 0; struct revindex_header *hdr; if (git_env_bool(GIT_TEST_REV_INDEX_DIE_ON_DISK, 0)) From c94ab2b10ff72f9e27fdc3e6feeba5774bc273af Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Dec 2022 22:58:37 +0100 Subject: [PATCH 06/26] load_pack_mtimes_file(): avoid accessing uninitialized data The `mtimes_size` variable is uninitialzed when the function errors out, yet its value is assigned to another variable. Let's just initialize it. Signed-off-by: Johannes Schindelin --- pack-mtimes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pack-mtimes.c b/pack-mtimes.c index 20900ca88d377a..77589698f1230e 100644 --- a/pack-mtimes.c +++ b/pack-mtimes.c @@ -28,7 +28,7 @@ static int load_pack_mtimes_file(char *mtimes_file, int fd, ret = 0; struct stat st; uint32_t *data = NULL; - size_t mtimes_size, expected_size; + size_t mtimes_size = 0, expected_size; struct mtimes_header header; fd = git_open(mtimes_file); From d1df00a3a491d6c9bd4297f8d63f1f0af1404984 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Dec 2022 23:27:05 +0100 Subject: [PATCH 07/26] revision: defensive programming On the off chance that `lookup_decoration()` cannot find anything, let `leave_one_treesame_to_parent()` return gracefully instead of crashing. Pointed out by CodeQL. Signed-off-by: Johannes Schindelin --- revision.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/revision.c b/revision.c index 2c36a9c179efb5..00d6fe5432cfeb 100644 --- a/revision.c +++ b/revision.c @@ -3353,6 +3353,9 @@ static int leave_one_treesame_to_parent(struct rev_info *revs, struct commit *co struct commit_list *p; unsigned n; + if (!ts) + return 0; + for (p = commit->parents, n = 0; p; p = p->next, n++) { if (ts->treesame[n]) { if (p->item->object.flags & TMP_MARK) { From 94c4074b7c3635ad31cd28d211be3a1c6920a694 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Dec 2022 23:35:08 +0100 Subject: [PATCH 08/26] get_parent(): defensive programming CodeQL points out that `lookup_commit_reference()` can return NULL values. Signed-off-by: Johannes Schindelin --- object-name.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/object-name.c b/object-name.c index 9288b2dd2453a0..f4350481ac8231 100644 --- a/object-name.c +++ b/object-name.c @@ -1108,7 +1108,7 @@ static enum get_oid_result get_parent(struct repository *r, if (ret) return ret; commit = lookup_commit_reference(r, &oid); - if (repo_parse_commit(r, commit)) + if (!commit || repo_parse_commit(r, commit)) return MISSING_OBJECT; if (!idx) { oidcpy(result, &commit->object.oid); From 6d7c86f085be9c386f9b133dd5fe15cb837dfca7 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Dec 2022 23:38:02 +0100 Subject: [PATCH 09/26] fetch-pack: defensive programming CodeQL points out that `parse_object()` can return NULL values. Signed-off-by: Johannes Schindelin --- fetch-pack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-pack.c b/fetch-pack.c index fa4231fee74c9f..4c6d346700ae25 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -155,7 +155,7 @@ static struct commit *deref_without_lazy_fetch(const struct object_id *oid, struct tag *tag = (struct tag *) parse_object(the_repository, oid); - if (!tag->tagged) + if (!tag || !tag->tagged) return NULL; if (mark_tags_complete_and_check_obj_db) tag->object.flags |= COMPLETE; From 216c78cd292a710a37de4758e0af84162a4ed9c8 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Dec 2022 23:41:12 +0100 Subject: [PATCH 10/26] unparse_commit(): defensive programming CodeQL points out that `lookup_commit()` can return NULL values. Signed-off-by: Johannes Schindelin --- commit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit.c b/commit.c index e915b2b9a1295c..722a83dbebfc7c 100644 --- a/commit.c +++ b/commit.c @@ -188,7 +188,7 @@ void unparse_commit(struct repository *r, const struct object_id *oid) { struct commit *c = lookup_commit(r, oid); - if (!c->object.parsed) + if (!c || !c->object.parsed) return; free_commit_list(c->parents); c->parents = NULL; From 499173e78a9275f462a8ebfd57f359b24c0f7008 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Dec 2022 23:44:08 +0100 Subject: [PATCH 11/26] verify_commit_graph(): defensive programming CodeQL points out that `lookup_commit()` can return NULL values. Signed-off-by: Johannes Schindelin --- commit-graph.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/commit-graph.c b/commit-graph.c index ad3943b6906520..ffeb14b5ed368a 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2796,6 +2796,11 @@ static int verify_one_commit_graph(struct repository *r, the_repository->hash_algo); graph_commit = lookup_commit(r, &cur_oid); + if (!graph_commit) { + graph_report(_("failed to look up commit %s for commit-graph"), + oid_to_hex(&cur_oid)); + continue; + } odb_commit = (struct commit *)create_object(r, &cur_oid, alloc_commit_node(r)); if (repo_parse_commit_internal(r, odb_commit, 0, 0)) { graph_report(_("failed to parse commit %s from object database for commit-graph"), From d9a4c85b2e440121b466dcffab4a694da619f937 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Dec 2022 23:49:59 +0100 Subject: [PATCH 12/26] stash: defensive programming CodeQL points out that `parse_tree_indirect()` can return NULL values. Signed-off-by: Johannes Schindelin --- builtin/stash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/stash.c b/builtin/stash.c index cfbd92852a6557..bbe06881fd5ac1 100644 --- a/builtin/stash.c +++ b/builtin/stash.c @@ -284,7 +284,7 @@ static int reset_tree(struct object_id *i_tree, int update, int reset) memset(&opts, 0, sizeof(opts)); tree = parse_tree_indirect(i_tree); - if (parse_tree(tree)) + if (!tree || parse_tree(tree)) return -1; init_tree_desc(t, &tree->object.oid, tree->buffer, tree->size); From e8f6e0100d01a49eef39188a2bea80f330da0d98 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Dec 2022 23:51:59 +0100 Subject: [PATCH 13/26] stash: defensive programming CodeQL points out that `lookup_commit()` can return NULL values. Signed-off-by: Johannes Schindelin --- builtin/stash.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/builtin/stash.c b/builtin/stash.c index bbe06881fd5ac1..c2df5b939653ad 100644 --- a/builtin/stash.c +++ b/builtin/stash.c @@ -1395,6 +1395,11 @@ static int do_create_stash(const struct pathspec *ps, struct strbuf *stash_msg_b goto done; } else { head_commit = lookup_commit(the_repository, &info->b_commit); + if (!head_commit) { + ret = error(_("could not look up commit '%s'"), + oid_to_hex (&info->b_commit)); + goto done; + } } if (!check_changes(ps, include_untracked, &untracked_files)) { From 984e44b7614a4fbc80c5d531c77d5ab172369aa4 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Dec 2022 23:53:40 +0100 Subject: [PATCH 14/26] push: defensive programming CodeQL points out that `branch_get()` can return NULL values. Signed-off-by: Johannes Schindelin --- builtin/push.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/push.c b/builtin/push.c index 92d530e5c4dff0..db698c1034243f 100644 --- a/builtin/push.c +++ b/builtin/push.c @@ -90,7 +90,7 @@ static void refspec_append_mapped(struct refspec *refspec, const char *ref, if (push_default == PUSH_DEFAULT_UPSTREAM && skip_prefix(matched->name, "refs/heads/", &branch_name)) { struct branch *branch = branch_get(branch_name); - if (branch->merge_nr == 1 && branch->merge[0]->src) { + if (branch && branch->merge_nr == 1 && branch->merge[0]->src) { refspec_appendf(refspec, "%s:%s", ref, branch->merge[0]->src); return; From e94d1ae21c926ba1eb7c7f3a54c5e7e55cf4d6ee Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Dec 2022 23:55:55 +0100 Subject: [PATCH 15/26] fetch: defensive programming CodeQL points out that `branch_get()` can return NULL values. Signed-off-by: Johannes Schindelin --- builtin/fetch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 40a0e8d24434f2..1683572f1b0642 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -552,7 +552,7 @@ static struct ref *get_ref_map(struct remote *remote, if (remote && (remote->fetch.nr || /* Note: has_merge implies non-NULL branch->remote_name */ - (has_merge && !strcmp(branch->remote_name, remote->name)))) { + (has_merge && branch && !strcmp(branch->remote_name, remote->name)))) { for (i = 0; i < remote->fetch.nr; i++) { get_fetch_map(remote_refs, &remote->fetch.items[i], &tail, 0); if (remote->fetch.items[i].dst && @@ -570,6 +570,7 @@ static struct ref *get_ref_map(struct remote *remote, * Note: has_merge implies non-NULL branch->remote_name */ if (has_merge && + branch && !strcmp(branch->remote_name, remote->name)) add_merge_config(&ref_map, remote_refs, branch, &tail); } else if (!prefetch) { From b81a679af754b3acea0c01fec93d39ebff9f0061 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Dec 2022 23:57:16 +0100 Subject: [PATCH 16/26] describe: defensive programming CodeQL points out that `lookup_commit_reference()` can return NULL values. Signed-off-by: Johannes Schindelin --- builtin/describe.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/builtin/describe.c b/builtin/describe.c index 2d50883b729671..001f582151e9aa 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -324,6 +324,8 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) unsigned int unannotated_cnt = 0; cmit = lookup_commit_reference(the_repository, oid); + if (!cmit) + die(_("could not look up commit '%s'"), oid_to_hex(oid)); n = find_commit_name(&cmit->object.oid); if (n && (tags || all || n->prio == 2)) { From 838a96c0ddcbd241b3d1ba7e74fa3b2d36057e83 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 17 Dec 2022 00:02:33 +0100 Subject: [PATCH 17/26] inherit_tracking(): defensive programming CodeQL points out that `branch_get()` can return NULL values. Signed-off-by: Johannes Schindelin --- branch.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/branch.c b/branch.c index 6d01d7d6bdb2e4..4629c25759572b 100644 --- a/branch.c +++ b/branch.c @@ -224,6 +224,8 @@ static int inherit_tracking(struct tracking *tracking, const char *orig_ref) skip_prefix(orig_ref, "refs/heads/", &bare_ref); branch = branch_get(bare_ref); + if (!branch) + BUG("could not get branch for '%s", bare_ref); if (!branch->remote_name) { warning(_("asked to inherit tracking from '%s', but no remote is set"), bare_ref); From 186fe7811a4a2f3525c4b03565327962e0eaa7be Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 9 Aug 2022 12:22:00 +0200 Subject: [PATCH 18/26] codeql: run static analysis as part of CI builds CodeQL is GitHub's native offering of a static code analyzer, and hence integrates with GitHub Actions better than any other static code analyzer. By default, it comes with a large range of "queries" that test for common code patterns that should be avoided. For now, we only target source code written in C, via the `language: cpp` directive. Just in case that other languages should be targeted, too, this GitHub workflow job is set up as a matrix job to make that easier in the future. For full documentation, see https://docs.github.com/en/code-security/code-scanning/introduction-to-code-scanning/about-code-scanning-with-codeql Co-authored-by: Pierre Tempel Co-authored-by: Arthur Baars Signed-off-by: Johannes Schindelin --- .github/workflows/codeql.yml | 46 ++++++++++++++++++++++++++++++++++++ .gitignore | 2 ++ ci/install-dependencies.sh | 2 +- 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/codeql.yml diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 00000000000000..b59adceb39d88f --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,46 @@ +name: "CodeQL" + +on: + push: + pull_request: + workflow_dispatch: + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: ["cpp"] + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Install dependencies + run: ci/install-dependencies.sh + if: matrix.language == 'cpp' + env: + jobname: codeql + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + queries: security-extended + + - name: Build + if: matrix.language == 'cpp' + run: | + cat /proc/cpuinfo + make -j$(nproc) + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 diff --git a/.gitignore b/.gitignore index 04c444404e4ba8..1351e4b5bd98cf 100644 --- a/.gitignore +++ b/.gitignore @@ -254,3 +254,5 @@ Release/ /contrib/buildsystems/out /contrib/libgit-rs/target /contrib/libgit-sys/target +/.github/codeql/.cache/ +/.github/codeql/codeql-pack.lock.yml diff --git a/ci/install-dependencies.sh b/ci/install-dependencies.sh index d061a4729339e0..8a85ff749df187 100755 --- a/ci/install-dependencies.sh +++ b/ci/install-dependencies.sh @@ -119,7 +119,7 @@ ClangFormat) sudo apt-get -q update sudo apt-get -q -y install clang-format ;; -StaticAnalysis) +StaticAnalysis|codeql) sudo apt-get -q update sudo apt-get -q -y install coccinelle libcurl4-openssl-dev libssl-dev \ libexpat-dev gettext make From a295cfa79bc8ccb4f34c9c2c8a07c9cabbf27d61 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 22 Mar 2023 22:27:56 +0100 Subject: [PATCH 19/26] codeql: publish the sarif file as build artifact In some instances, CodeQL's web UI on github.com leaves questions unanswered. For example, in some alerts it is really necessary to follow the entire "taint flow" to understand why something might be an issue. The alerts for the `cpp/uncontrolled-allocation-size` rule, for example, are all false positives, and only when inspecting the exact flow does it become obvious that one alert wants to point out that the size of a binary patch hunk, which is specified in the patch, is then used to determine how much memory to allocate, which may potentially run out of memory (and is hence just Git doing what it is asked to, and does not need to be changed). To help with those issues, publish the `.sarif` file as part of every workflow run; This allows downloading that file and inspecting it e.g. with the SARIF viewer extension in VS Code (for details, see https://marketplace.visualstudio.com/items?itemName=MS-SarifVSCode.sarif-viewer). Signed-off-by: Johannes Schindelin --- .github/workflows/codeql.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index b59adceb39d88f..9aa446899a8e53 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -44,3 +44,21 @@ jobs: - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v3 + with: + upload: False + output: sarif-results + + - name: debug + shell: bash + run: ls -la sarif-results + + - name: publish sarif for debugging + uses: actions/upload-artifact@v4 + with: + name: sarif-results + path: sarif-results + + - name: Upload SARIF + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: sarif-results/cpp.sarif From 86f49737560c241f2749638b36b079d394a780f7 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 14 Dec 2022 22:04:59 +0100 Subject: [PATCH 20/26] fetch: silence a CodeQL alert about a local variable's address' use after release As pointed out by CodeQL, it is a potentially dangerous practice to store local variables' addresses in non-local structs. My original intention was to make sure to clear it out after it was used, and before the function returns (which is when the address would go stale). However, I faced too much resistance in the Git project against such patches, there seemed to always be the overwhelming sentiment that the code isn't broken (even if it requires a complex and demanding analysis to wrap one's head around _that_). Therefore, I will be pragmatic and simply ask CodeQL to hold its peace about this issue forever. Signed-off-by: Johannes Schindelin --- builtin/fetch.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/builtin/fetch.c b/builtin/fetch.c index 40a0e8d24434f2..181b63756822cc 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -2550,6 +2550,11 @@ int cmd_fetch(int argc, die(_("must supply remote when using --negotiate-only")); gtransport = prepare_transport(remote, 1); if (gtransport->smart_options) { + /* + * Intentionally assign the address of a local variable + * to a non-local struct's field. + * codeql[cpp/stack-address-escape] + */ gtransport->smart_options->acked_commits = &acked_commits; } else { warning(_("protocol does not support --negotiate-only, exiting")); From 24e8e35d402049a6e3c86003829f07f1fd1053bd Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Dec 2022 23:20:54 +0100 Subject: [PATCH 21/26] submodule: check return value of `submodule_from_path()` As pointed out by CodeQL, it could be NULL and we usually check for that. Signed-off-by: Johannes Schindelin --- builtin/submodule--helper.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 53da2116ddf576..b4b81e7f02a01c 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1934,6 +1934,9 @@ static int determine_submodule_update_strategy(struct repository *r, const char *val; int ret; + if (!sub) + return error(_("could not retrieve submodule information for path '%s'"), path); + key = xstrfmt("submodule.%s.update", sub->name); if (update) { From 548b20a6d85599afa9937986e2bb2bce367529a2 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Dec 2022 23:22:50 +0100 Subject: [PATCH 22/26] test-tool repository: check return value of `lookup_commit()` On the off-chance that it's NULL... Signed-off-by: Johannes Schindelin --- t/helper/test-repository.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/t/helper/test-repository.c b/t/helper/test-repository.c index 63c37de33d22f1..05417d8f43dcab 100644 --- a/t/helper/test-repository.c +++ b/t/helper/test-repository.c @@ -27,6 +27,8 @@ static void test_parse_commit_in_graph(const char *gitdir, const char *worktree, repo_set_hash_algo(the_repository, hash_algo_by_ptr(r.hash_algo)); c = lookup_commit(&r, commit_oid); + if (!c) + die("Could not look up %s", oid_to_hex(commit_oid)); if (!parse_commit_in_graph(&r, c)) die("Couldn't parse commit"); From 3d91638f6102a43372629d519cf10343335688ad Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 Dec 2022 23:25:01 +0100 Subject: [PATCH 23/26] shallow: handle missing shallow commits gracefully As pointed out by CodeQL, `lookup_commit()` can return NULL. Signed-off-by: Johannes Schindelin --- shallow.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/shallow.c b/shallow.c index faeeeb45f986e1..01b62621e8fbf8 100644 --- a/shallow.c +++ b/shallow.c @@ -705,7 +705,8 @@ void assign_shallow_commits_to_refs(struct shallow_info *info, for (i = 0; i < nr_shallow; i++) { struct commit *c = lookup_commit(the_repository, &oid[shallow[i]]); - c->object.flags |= BOTTOM; + if (c) + c->object.flags |= BOTTOM; } for (i = 0; i < ref->nr; i++) From a1bef6cdc11b1bf1d30c53fe99de0407bfb98b10 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 17 Dec 2022 22:21:22 +0100 Subject: [PATCH 24/26] commit-graph: suppress warning about using a stale stack addresses The code is a bit too hard to reason about for CodeQL to figure out whether the `fill_commit_graph_info()` function is at all called after `write_commit_graph()` returns (and hence whether `topo_levels` goes out of context before it is used again). The Git project insists that this is correct (and does not want to make the code more obviously correct), so let's silence CodeQL's complaints in this instance. Signed-off-by: Johannes Schindelin --- commit-graph.c | 1 + 1 file changed, 1 insertion(+) diff --git a/commit-graph.c b/commit-graph.c index ad3943b6906520..00fa464e7f192f 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2560,6 +2560,7 @@ int write_commit_graph(struct object_directory *odb, struct commit_graph *g = ctx.r->objects->commit_graph; while (g) { + /* Intentional: codeql[cpp/stack-address-escape] */ g->topo_levels = &topo_levels; g = g->base_graph; } From e33df9a0b110cc911f74f070a642a97360d3aa0e Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 21 Mar 2025 10:25:59 +0000 Subject: [PATCH 25/26] codeql: disable a couple of non-critical queries for now A couple of CodeQL's queries are opinionated in a way that is obviously not shared by Git's source code's state, and apparently intentionally so. For example, the "For loop variable changed in body" query as well as the "No trivial switch statements" one result in too many results that are apparently intentional in Git's source code. Let's not worry about those, then. Also, Git has plenty of instances where variables shadow other variables. Other valid yet not quite critical issues identified by CodeQL include complex conditionals and nested switch statements spanning several pages. We probably want to address these issues at some stage, but they are not as critical as other problems pointed out by CodeQL, so let's silence those queries for now and take care of them at a later stage. Signed-off-by: Johannes Schindelin --- .github/codeql/codeql-config.yml | 131 +++++++++++++++++++++++++++++++ .github/workflows/codeql.yml | 2 +- 2 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 .github/codeql/codeql-config.yml diff --git a/.github/codeql/codeql-config.yml b/.github/codeql/codeql-config.yml new file mode 100644 index 00000000000000..d2f84f8b98aad5 --- /dev/null +++ b/.github/codeql/codeql-config.yml @@ -0,0 +1,131 @@ +name: "CodeQL config" + +queries: + - uses: security-extended + +query-filters: + - exclude: + # yes, this extra indentation is intentional + # too common in Git's source code + id: cpp/trivial-switch + - exclude: + id: cpp/loop-variable-changed + - exclude: + # we override this locally with a modified version + id: cpp/non-constant-format + - exclude: + # Git does not consider this a problem + id: cpp/irregular-enum-init + - exclude: + # Git has many long functions, this alert would match too many + id: cpp/poorly-documented-function + - exclude: + # In Git, there is a lot of commented-out code + id: cpp/commented-out-code + - exclude: + # While it is true that long switch cases are hard to read and + # validate, Git has way too many for us to allow this query to + # churn out alerts left and right + id: cpp/long-switch + - exclude: + # CodeQL does not expect Git to heed the umask(), but it does + id: cpp/world-writable-file-creation + - exclude: + # Git uses the construct `if () ; else ...` often, to + # avoid an extra indentation level. CodeQL does not like that. + id: cpp/empty-block + - exclude: + # This rule unfortunately triggers some false positives, e.g. + # where Git tries to redact URLs or where Git specifically + # asks for a password upon GIT_SSL_CERT_PASSWORD_PROTECTED. + id: cpp/user-controlled-bypass + - exclude: + # This rule fails to recognize that xmallocz() _specifically_ + # makes room for a trailing NUL, and instead assumes that this + # function behaves like malloc(), which does not. + id: cpp/invalid-pointer-deref + - exclude: + # CodeQL fails to recognize that xmallocz() accounts for the NUL, + # instead assuming malloc() semantics. + id: cpp/no-space-for-terminator + - exclude: + # Git does exchange plain-text passwords via stdin/stdout e.g. + # with helpers in the credential protocol, or in credential-cache. + # This rule, though, assumes that writing to _any_ file descriptor + # is unsafe. + id: cpp/cleartext-storage-file + - exclude: + # When storing the value of the environment variable `PWD` as the + # current directory in absolute_pathdup(), or when allocating memory + # for a binary patch where the size is specified in the patch itself, + # CodeQL assumes that this can lead to a denial of service because + # of an unbounded size, but Git's code works as designed here. + id: cpp/uncontrolled-allocation-size + - exclude: + # lock_repo_for_gc() has admittedly obtuse logic to parse the + # process ID out of the `gc.pid` file, which is correct, but + # due to its construction throws a false positive here. + id: cpp/missing-check-scanf + - exclude: + # discard_cache_entry() overwrites the name in a FLEX_ARRAY struct + # if GIT_TEST_VALIDATE_INDEX_CACHE_ENTRIES is set, which CodeQL fails + # to recognize as valid. + id: cpp/overrun-write + - exclude: + # Since `time_t` can be signed or unsigned, there is unfortunately + # no way to avoid letting this rule report a potential + id: cpp/integer-multiplication-cast-to-long + - exclude: + # There are many, many legitimate code paths in Git where a path is + # constructed from an environment variable, e.g. GIT_DIR. Let's suppress + # this slightly overzealous query. + id: cpp/path-injection + - exclude: + # Git has 99 instances of this at the time of writing :-( + id: cpp/declaration-hides-variable + - exclude: + id: cpp/declaration-hides-parameter + - exclude: + id: cpp/local-variable-hides-global-variable + - exclude: + id: cpp/complex-condition + - exclude: + # Nested, long-winded switch statements are hard to read and hard + # to reason about. Looking at you, `format_commit_one()`. + id: cpp/complex-block + - exclude: + # There are four instances of this at time of writing, all intentional. + # However, it is very easy to introduce unintentional re-use of loop + # variable names, therefore we will most likely want to either change these + # instances or add suppressions. + id: cpp/nested-loops-with-same-variable + - exclude: + # zOMG so many FIXMEs + id: cpp/fixme-comment + - exclude: + # Git assumes quite a bit about the user's control of the current worktree + # Therefore, it kind of assumes that TOCTOU issues are not a thing when + # it comes to files. + id: cpp/toctou-race-condition + - exclude: + # Too many results in Git where the code was, however, intentionally written + # the way it is. + id: cpp/stack-address-escape + - exclude: + id: cpp/inconsistent-null-check + - exclude: + # This would trigger alerts in the functions in `help.c` that want to open + # external programs to show manual pages. + id: cpp/uncontrolled-process-operation + - exclude: + # The code in t/unit-tests/u-ctype.c implicitly exercises the `sane_istest()` + # macro extensively, and CodeQL seems to miss the cast to `(unsigned char)`, + # thereby mistaking the accesses for being past the end of the array (which + # is incorrect). + # + # Ideally, we would exclude test programs from CodeQL anyways, but + # unfortunately there is no Makefile rule in Git's code base to build only + # the production code, and CodeQL's `paths-ignore` directive described at + # https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning#specifying-directories-to-scan + # unfortunately is _ignored_ for compiled languages. + id: cpp/overflow-buffer diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 9aa446899a8e53..957102887aa571 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -34,7 +34,7 @@ jobs: uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} - queries: security-extended + config-file: ./.github/codeql/codeql-config.yml - name: Build if: matrix.language == 'cpp' From adfc13093a89f3a69f5efd1e7f5be80f0806cc1a Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 21 Mar 2025 11:05:22 +0000 Subject: [PATCH 26/26] clar(clar_summary_testsuite): avoid thread-unsafe `localtime()` The `localtime()` function is inherently thread-unsafe and should not be used anymore. Let's use `localtime_r()` instead. Signed-off-by: Johannes Schindelin --- t/unit-tests/clar/clar/summary.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/t/unit-tests/clar/clar/summary.h b/t/unit-tests/clar/clar/summary.h index 0d0b646fe7514b..e972831938cd44 100644 --- a/t/unit-tests/clar/clar/summary.h +++ b/t/unit-tests/clar/clar/summary.h @@ -19,14 +19,24 @@ static int clar_summary_testsuites(struct clar_summary *summary) return fprintf(summary->fp, "\n"); } +#ifdef _WIN32 +static struct tm *localtime_r(const time_t *timep, struct tm *result) +{ + if (localtime_s(result, timep) == 0) + return result; + return NULL; +} +#endif + static int clar_summary_testsuite(struct clar_summary *summary, int idn, const char *name, time_t timestamp, int test_count, int fail_count, int error_count) { - struct tm *tm = localtime(×tamp); + struct tm tm; char iso_dt[20]; - if (strftime(iso_dt, sizeof(iso_dt), "%Y-%m-%dT%H:%M:%S", tm) == 0) + localtime_r(×tamp, &tm); + if (strftime(iso_dt, sizeof(iso_dt), "%Y-%m-%dT%H:%M:%S", &tm) == 0) return -1; return fprintf(summary->fp, "\t