diff --git a/.github/codeql/codeql-config.yml b/.github/codeql/codeql-config.yml new file mode 100644 index 00000000000000..d2f84f8b98aad5 --- /dev/null +++ b/.github/codeql/codeql-config.yml @@ -0,0 +1,131 @@ +name: "CodeQL config" + +queries: + - uses: security-extended + +query-filters: + - exclude: + # yes, this extra indentation is intentional + # too common in Git's source code + id: cpp/trivial-switch + - exclude: + id: cpp/loop-variable-changed + - exclude: + # we override this locally with a modified version + id: cpp/non-constant-format + - exclude: + # Git does not consider this a problem + id: cpp/irregular-enum-init + - exclude: + # Git has many long functions, this alert would match too many + id: cpp/poorly-documented-function + - exclude: + # In Git, there is a lot of commented-out code + id: cpp/commented-out-code + - exclude: + # While it is true that long switch cases are hard to read and + # validate, Git has way too many for us to allow this query to + # churn out alerts left and right + id: cpp/long-switch + - exclude: + # CodeQL does not expect Git to heed the umask(), but it does + id: cpp/world-writable-file-creation + - exclude: + # Git uses the construct `if () ; else ...` often, to + # avoid an extra indentation level. CodeQL does not like that. + id: cpp/empty-block + - exclude: + # This rule unfortunately triggers some false positives, e.g. + # where Git tries to redact URLs or where Git specifically + # asks for a password upon GIT_SSL_CERT_PASSWORD_PROTECTED. + id: cpp/user-controlled-bypass + - exclude: + # This rule fails to recognize that xmallocz() _specifically_ + # makes room for a trailing NUL, and instead assumes that this + # function behaves like malloc(), which does not. + id: cpp/invalid-pointer-deref + - exclude: + # CodeQL fails to recognize that xmallocz() accounts for the NUL, + # instead assuming malloc() semantics. + id: cpp/no-space-for-terminator + - exclude: + # Git does exchange plain-text passwords via stdin/stdout e.g. + # with helpers in the credential protocol, or in credential-cache. + # This rule, though, assumes that writing to _any_ file descriptor + # is unsafe. + id: cpp/cleartext-storage-file + - exclude: + # When storing the value of the environment variable `PWD` as the + # current directory in absolute_pathdup(), or when allocating memory + # for a binary patch where the size is specified in the patch itself, + # CodeQL assumes that this can lead to a denial of service because + # of an unbounded size, but Git's code works as designed here. + id: cpp/uncontrolled-allocation-size + - exclude: + # lock_repo_for_gc() has admittedly obtuse logic to parse the + # process ID out of the `gc.pid` file, which is correct, but + # due to its construction throws a false positive here. + id: cpp/missing-check-scanf + - exclude: + # discard_cache_entry() overwrites the name in a FLEX_ARRAY struct + # if GIT_TEST_VALIDATE_INDEX_CACHE_ENTRIES is set, which CodeQL fails + # to recognize as valid. + id: cpp/overrun-write + - exclude: + # Since `time_t` can be signed or unsigned, there is unfortunately + # no way to avoid letting this rule report a potential + id: cpp/integer-multiplication-cast-to-long + - exclude: + # There are many, many legitimate code paths in Git where a path is + # constructed from an environment variable, e.g. GIT_DIR. Let's suppress + # this slightly overzealous query. + id: cpp/path-injection + - exclude: + # Git has 99 instances of this at the time of writing :-( + id: cpp/declaration-hides-variable + - exclude: + id: cpp/declaration-hides-parameter + - exclude: + id: cpp/local-variable-hides-global-variable + - exclude: + id: cpp/complex-condition + - exclude: + # Nested, long-winded switch statements are hard to read and hard + # to reason about. Looking at you, `format_commit_one()`. + id: cpp/complex-block + - exclude: + # There are four instances of this at time of writing, all intentional. + # However, it is very easy to introduce unintentional re-use of loop + # variable names, therefore we will most likely want to either change these + # instances or add suppressions. + id: cpp/nested-loops-with-same-variable + - exclude: + # zOMG so many FIXMEs + id: cpp/fixme-comment + - exclude: + # Git assumes quite a bit about the user's control of the current worktree + # Therefore, it kind of assumes that TOCTOU issues are not a thing when + # it comes to files. + id: cpp/toctou-race-condition + - exclude: + # Too many results in Git where the code was, however, intentionally written + # the way it is. + id: cpp/stack-address-escape + - exclude: + id: cpp/inconsistent-null-check + - exclude: + # This would trigger alerts in the functions in `help.c` that want to open + # external programs to show manual pages. + id: cpp/uncontrolled-process-operation + - exclude: + # The code in t/unit-tests/u-ctype.c implicitly exercises the `sane_istest()` + # macro extensively, and CodeQL seems to miss the cast to `(unsigned char)`, + # thereby mistaking the accesses for being past the end of the array (which + # is incorrect). + # + # Ideally, we would exclude test programs from CodeQL anyways, but + # unfortunately there is no Makefile rule in Git's code base to build only + # the production code, and CodeQL's `paths-ignore` directive described at + # https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning#specifying-directories-to-scan + # unfortunately is _ignored_ for compiled languages. + id: cpp/overflow-buffer diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 00000000000000..957102887aa571 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,64 @@ +name: "CodeQL" + +on: + push: + pull_request: + workflow_dispatch: + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: ["cpp"] + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Install dependencies + run: ci/install-dependencies.sh + if: matrix.language == 'cpp' + env: + jobname: codeql + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + config-file: ./.github/codeql/codeql-config.yml + + - name: Build + if: matrix.language == 'cpp' + run: | + cat /proc/cpuinfo + make -j$(nproc) + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + upload: False + output: sarif-results + + - name: debug + shell: bash + run: ls -la sarif-results + + - name: publish sarif for debugging + uses: actions/upload-artifact@v4 + with: + name: sarif-results + path: sarif-results + + - name: Upload SARIF + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: sarif-results/cpp.sarif diff --git a/.gitignore b/.gitignore index a0ac2ba0c6b3ca..ef3972eec46f8a 100644 --- a/.gitignore +++ b/.gitignore @@ -258,3 +258,5 @@ Release/ CMakeSettings.json /contrib/libgit-rs/target /contrib/libgit-sys/target +/.github/codeql/.cache/ +/.github/codeql/codeql-pack.lock.yml diff --git a/branch.c b/branch.c index 6d01d7d6bdb2e4..4629c25759572b 100644 --- a/branch.c +++ b/branch.c @@ -224,6 +224,8 @@ static int inherit_tracking(struct tracking *tracking, const char *orig_ref) skip_prefix(orig_ref, "refs/heads/", &bare_ref); branch = branch_get(bare_ref); + if (!branch) + BUG("could not get branch for '%s", bare_ref); if (!branch->remote_name) { warning(_("asked to inherit tracking from '%s', but no remote is set"), bare_ref); diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 67a5ff2b9ebd29..9aecdfe5e37c1f 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -106,7 +106,7 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) struct object_id oid; enum object_type type; char *buf; - unsigned long size; + unsigned long size = 0; struct object_context obj_context = {0}; struct object_info oi = OBJECT_INFO_INIT; unsigned flags = OBJECT_INFO_LOOKUP_REPLACE; diff --git a/builtin/describe.c b/builtin/describe.c index 2d50883b729671..001f582151e9aa 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -324,6 +324,8 @@ static void describe_commit(struct object_id *oid, struct strbuf *dst) unsigned int unannotated_cnt = 0; cmit = lookup_commit_reference(the_repository, oid); + if (!cmit) + die(_("could not look up commit '%s'"), oid_to_hex(oid)); n = find_commit_name(&cmit->object.oid); if (n && (tags || all || n->prio == 2)) { diff --git a/builtin/fetch.c b/builtin/fetch.c index bd81ef1cfba62f..0e4e8925f87d73 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -555,7 +555,7 @@ static struct ref *get_ref_map(struct remote *remote, if (remote && (remote->fetch.nr || /* Note: has_merge implies non-NULL branch->remote_name */ - (has_merge && !strcmp(branch->remote_name, remote->name)))) { + (has_merge && branch && !strcmp(branch->remote_name, remote->name)))) { for (i = 0; i < remote->fetch.nr; i++) { get_fetch_map(remote_refs, &remote->fetch.items[i], &tail, 0); if (remote->fetch.items[i].dst && @@ -573,6 +573,7 @@ static struct ref *get_ref_map(struct remote *remote, * Note: has_merge implies non-NULL branch->remote_name */ if (has_merge && + branch && !strcmp(branch->remote_name, remote->name)) add_merge_config(&ref_map, remote_refs, branch, &tail); } else if (!prefetch) { @@ -2563,6 +2564,11 @@ int cmd_fetch(int argc, die(_("must supply remote when using --negotiate-only")); gtransport = prepare_transport(remote, 1); if (gtransport->smart_options) { + /* + * Intentionally assign the address of a local variable + * to a non-local struct's field. + * codeql[cpp/stack-address-escape] + */ gtransport->smart_options->acked_commits = &acked_commits; } else { warning(_("protocol does not support --negotiate-only, exiting")); diff --git a/builtin/push.c b/builtin/push.c index b1581a9c091502..e4a974f20f3af6 100644 --- a/builtin/push.c +++ b/builtin/push.c @@ -90,7 +90,7 @@ static void refspec_append_mapped(struct refspec *refspec, const char *ref, if (push_default == PUSH_DEFAULT_UPSTREAM && skip_prefix(matched->name, "refs/heads/", &branch_name)) { struct branch *branch = branch_get(branch_name); - if (branch->merge_nr == 1 && branch->merge[0]->src) { + if (branch && branch->merge_nr == 1 && branch->merge[0]->src) { refspec_appendf(refspec, "%s:%s", ref, branch->merge[0]->src); return; diff --git a/builtin/stash.c b/builtin/stash.c index 29acc8baa8e272..f5f98ded5cc8eb 100644 --- a/builtin/stash.c +++ b/builtin/stash.c @@ -284,7 +284,7 @@ static int reset_tree(struct object_id *i_tree, int update, int reset) memset(&opts, 0, sizeof(opts)); tree = parse_tree_indirect(i_tree); - if (parse_tree(tree)) + if (!tree || parse_tree(tree)) return -1; init_tree_desc(t, &tree->object.oid, tree->buffer, tree->size); @@ -1395,6 +1395,11 @@ static int do_create_stash(const struct pathspec *ps, struct strbuf *stash_msg_b goto done; } else { head_commit = lookup_commit(the_repository, &info->b_commit); + if (!head_commit) { + ret = error(_("could not look up commit '%s'"), + oid_to_hex (&info->b_commit)); + goto done; + } } if (!check_changes(ps, include_untracked, &untracked_files)) { diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index fac542a5136e22..e47d77dd1b6e48 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1934,6 +1934,9 @@ static int determine_submodule_update_strategy(struct repository *r, const char *val; int ret; + if (!sub) + return error(_("could not retrieve submodule information for path '%s'"), path); + key = xstrfmt("submodule.%s.update", sub->name); if (update) { diff --git a/ci/install-dependencies.sh b/ci/install-dependencies.sh index d061a4729339e0..8a85ff749df187 100755 --- a/ci/install-dependencies.sh +++ b/ci/install-dependencies.sh @@ -119,7 +119,7 @@ ClangFormat) sudo apt-get -q update sudo apt-get -q -y install clang-format ;; -StaticAnalysis) +StaticAnalysis|codeql) sudo apt-get -q update sudo apt-get -q -y install coccinelle libcurl4-openssl-dev libssl-dev \ libexpat-dev gettext make diff --git a/commit-graph.c b/commit-graph.c index ad3943b6906520..a64cb532fe8db0 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2560,6 +2560,7 @@ int write_commit_graph(struct object_directory *odb, struct commit_graph *g = ctx.r->objects->commit_graph; while (g) { + /* Intentional: codeql[cpp/stack-address-escape] */ g->topo_levels = &topo_levels; g = g->base_graph; } @@ -2796,6 +2797,11 @@ static int verify_one_commit_graph(struct repository *r, the_repository->hash_algo); graph_commit = lookup_commit(r, &cur_oid); + if (!graph_commit) { + graph_report(_("failed to look up commit %s for commit-graph"), + oid_to_hex(&cur_oid)); + continue; + } odb_commit = (struct commit *)create_object(r, &cur_oid, alloc_commit_node(r)); if (repo_parse_commit_internal(r, odb_commit, 0, 0)) { graph_report(_("failed to parse commit %s from object database for commit-graph"), diff --git a/commit.c b/commit.c index 64d0268838207c..8209d5303d427e 100644 --- a/commit.c +++ b/commit.c @@ -189,7 +189,7 @@ void unparse_commit(struct repository *r, const struct object_id *oid) { struct commit *c = lookup_commit(r, oid); - if (!c->object.parsed) + if (!c || !c->object.parsed) return; free_commit_list(c->parents); c->parents = NULL; diff --git a/fetch-pack.c b/fetch-pack.c index bd124080dc3639..c11fca2b928dae 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -155,7 +155,7 @@ static struct commit *deref_without_lazy_fetch(const struct object_id *oid, struct tag *tag = (struct tag *) parse_object(the_repository, oid); - if (!tag->tagged) + if (!tag || !tag->tagged) return NULL; if (mark_tags_complete_and_check_obj_db) tag->object.flags |= COMPLETE; diff --git a/fsck.c b/fsck.c index 8dc8472ceb3781..868cbc6b0b3286 100644 --- a/fsck.c +++ b/fsck.c @@ -925,7 +925,7 @@ static int fsck_commit(const struct object_id *oid, { struct object_id tree_oid, parent_oid; unsigned author_count; - int err; + int err = 0; const char *buffer_begin = buffer; const char *buffer_end = buffer + size; const char *p; diff --git a/object-name.c b/object-name.c index 9288b2dd2453a0..f4350481ac8231 100644 --- a/object-name.c +++ b/object-name.c @@ -1108,7 +1108,7 @@ static enum get_oid_result get_parent(struct repository *r, if (ret) return ret; commit = lookup_commit_reference(r, &oid); - if (repo_parse_commit(r, commit)) + if (!commit || repo_parse_commit(r, commit)) return MISSING_OBJECT; if (!idx) { oidcpy(result, &commit->object.oid); diff --git a/pack-mtimes.c b/pack-mtimes.c index 20900ca88d377a..77589698f1230e 100644 --- a/pack-mtimes.c +++ b/pack-mtimes.c @@ -28,7 +28,7 @@ static int load_pack_mtimes_file(char *mtimes_file, int fd, ret = 0; struct stat st; uint32_t *data = NULL; - size_t mtimes_size, expected_size; + size_t mtimes_size = 0, expected_size; struct mtimes_header header; fd = git_open(mtimes_file); diff --git a/pack-revindex.c b/pack-revindex.c index ffcde48870d8f7..d9f48f14c14d63 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -211,7 +211,7 @@ static int load_revindex_from_disk(const struct git_hash_algo *algo, int fd, ret = 0; struct stat st; void *data = NULL; - size_t revindex_size; + size_t revindex_size = 0; struct revindex_header *hdr; if (git_env_bool(GIT_TEST_REV_INDEX_DIE_ON_DISK, 0)) diff --git a/revision.c b/revision.c index 5b4a5d4810d3a3..fdb02b6601aae6 100644 --- a/revision.c +++ b/revision.c @@ -3368,6 +3368,9 @@ static int leave_one_treesame_to_parent(struct rev_info *revs, struct commit *co struct commit_list *p; unsigned n; + if (!ts) + return 0; + for (p = commit->parents, n = 0; p; p = p->next, n++) { if (ts->treesame[n]) { if (p->item->object.flags & TMP_MARK) { diff --git a/shallow.c b/shallow.c index faeeeb45f986e1..01b62621e8fbf8 100644 --- a/shallow.c +++ b/shallow.c @@ -705,7 +705,8 @@ void assign_shallow_commits_to_refs(struct shallow_info *info, for (i = 0; i < nr_shallow; i++) { struct commit *c = lookup_commit(the_repository, &oid[shallow[i]]); - c->object.flags |= BOTTOM; + if (c) + c->object.flags |= BOTTOM; } for (i = 0; i < ref->nr; i++) diff --git a/t/helper/test-repository.c b/t/helper/test-repository.c index 63c37de33d22f1..05417d8f43dcab 100644 --- a/t/helper/test-repository.c +++ b/t/helper/test-repository.c @@ -27,6 +27,8 @@ static void test_parse_commit_in_graph(const char *gitdir, const char *worktree, repo_set_hash_algo(the_repository, hash_algo_by_ptr(r.hash_algo)); c = lookup_commit(&r, commit_oid); + if (!c) + die("Could not look up %s", oid_to_hex(commit_oid)); if (!parse_commit_in_graph(&r, c)) die("Couldn't parse commit"); diff --git a/t/unit-tests/clar/clar.c b/t/unit-tests/clar/clar.c index d54e4553674908..e7d142da53b2cd 100644 --- a/t/unit-tests/clar/clar.c +++ b/t/unit-tests/clar/clar.c @@ -558,7 +558,7 @@ clar_parse_args(int argc, char **argv) default: clar_abort("Unexpected commandline argument '%s'.\n", - argument[1]); + argument); } } } @@ -767,7 +767,7 @@ void clar__assert_equal( if (!is_equal) { if (s1 && s2) { int pos; - for (pos = 0; s1[pos] == s2[pos] && pos < len; ++pos) + for (pos = 0; pos < len && s1[pos] == s2[pos]; ++pos) /* find differing byte offset */; p_snprintf(buf, sizeof(buf), "'%.*s' != '%.*s' (at byte %d)", len, s1, len, s2, pos); @@ -803,7 +803,7 @@ void clar__assert_equal( if (!is_equal) { if (wcs1 && wcs2) { int pos; - for (pos = 0; wcs1[pos] == wcs2[pos] && pos < len; ++pos) + for (pos = 0; pos < len && wcs1[pos] == wcs2[pos]; ++pos) /* find differing byte offset */; p_snprintf(buf, sizeof(buf), "'%.*ls' != '%.*ls' (at byte %d)", len, wcs1, len, wcs2, pos); diff --git a/t/unit-tests/clar/clar/summary.h b/t/unit-tests/clar/clar/summary.h index 0d0b646fe7514b..e972831938cd44 100644 --- a/t/unit-tests/clar/clar/summary.h +++ b/t/unit-tests/clar/clar/summary.h @@ -19,14 +19,24 @@ static int clar_summary_testsuites(struct clar_summary *summary) return fprintf(summary->fp, "\n"); } +#ifdef _WIN32 +static struct tm *localtime_r(const time_t *timep, struct tm *result) +{ + if (localtime_s(result, timep) == 0) + return result; + return NULL; +} +#endif + static int clar_summary_testsuite(struct clar_summary *summary, int idn, const char *name, time_t timestamp, int test_count, int fail_count, int error_count) { - struct tm *tm = localtime(×tamp); + struct tm tm; char iso_dt[20]; - if (strftime(iso_dt, sizeof(iso_dt), "%Y-%m-%dT%H:%M:%S", tm) == 0) + localtime_r(×tamp, &tm); + if (strftime(iso_dt, sizeof(iso_dt), "%Y-%m-%dT%H:%M:%S", &tm) == 0) return -1; return fprintf(summary->fp, "\t