diff --git a/lib/hex/application.ex b/lib/hex/application.ex index 72dd23a6..0d3748d4 100644 --- a/lib/hex/application.ex +++ b/lib/hex/application.ex @@ -49,6 +49,7 @@ defmodule Hex.Application do defp children do [ Hex.Netrc.Cache, + Hex.RepoIdentifier, Hex.State, Hex.Server, {Hex.Parallel, [:hex_fetcher]} @@ -58,6 +59,7 @@ defmodule Hex.Application do defp children do [ Hex.Netrc.Cache, + Hex.RepoIdentifier, Hex.State, Hex.Server, {Hex.Parallel, [:hex_fetcher]}, diff --git a/lib/hex/http.ex b/lib/hex/http.ex index a68201bb..25f91d30 100644 --- a/lib/hex/http.ex +++ b/lib/hex/http.ex @@ -41,6 +41,7 @@ defmodule Hex.HTTP do headers = headers |> add_basic_auth_via_netrc(url) + |> add_repo_identifier_header() timeout = adapter_config[:timeout] || @@ -315,6 +316,13 @@ defmodule Hex.HTTP do host end + defp add_repo_identifier_header(headers) do + case Hex.RepoIdentifier.fetch() do + nil -> headers + identifier -> Map.put(headers, "x-hex-repo-id", identifier) + end + end + def handle_hex_message(nil) do :ok end diff --git a/lib/hex/repo_identifier.ex b/lib/hex/repo_identifier.ex new file mode 100644 index 00000000..06a62ce2 --- /dev/null +++ b/lib/hex/repo_identifier.ex @@ -0,0 +1,72 @@ +defmodule Hex.RepoIdentifier do + @moduledoc """ + Gets an anonymized identifier for the current git repository. + + This module caches the SHA of the first commit in the repository and hashes it once more for + anonymization. + + Returns `nil` when: + + - The `HEX_REPO_IDENTIFIER` environment variable is set to anything other `1` or `true` + - The `git` executable isn't available + - The current directory isn't within a git repository + """ + + use Agent + + def start_link(_args) do + Agent.start_link(fn -> nil end, name: __MODULE__) + end + + def fetch do + Agent.get_and_update(__MODULE__, fn + nil -> + value = get() + + {value, value} + + cached -> + {cached, cached} + end) + end + + def put(value) do + Agent.update(__MODULE__, fn _value -> value end) + end + + def get do + cond do + Hex.State.get(:no_repo_identifier) -> + nil + + output = initial_commit_sha() -> + output + |> String.trim() + |> then(&:crypto.hash(:sha256, &1)) + |> Base.encode16(case: :lower) + + true -> + nil + end + end + + def clear do + Agent.update(__MODULE__, fn _value -> nil end) + end + + defp initial_commit_sha do + cmd_args = ~w(rev-list --max-parents=0 HEAD) + + with path when is_binary(path) <- System.find_executable("git") do + case System.cmd("git", cmd_args, stderr_to_stdout: true) do + {output, 0} -> + output + + {output, exit_status} -> + Hex.Shell.debug( + " Unable to extract git identifier: (Exit #{exit_status}) \n\n" <> output + ) + end + end + end +end diff --git a/lib/hex/state.ex b/lib/hex/state.ex index 9546a8e7..7507294f 100644 --- a/lib/hex/state.ex +++ b/lib/hex/state.ex @@ -112,6 +112,12 @@ defmodule Hex.State do default: nil, config: [:cacerts_path] }, + no_repo_identifier: %{ + env: ["HEX_NO_REPO_IDENTIFIER"], + default: false, + config: [:no_repo_identifier], + fun: {__MODULE__, :to_boolean} + }, no_short_urls: %{ env: ["HEX_NO_SHORT_URLS"], config: [:no_short_urls], diff --git a/test/hex/http_test.exs b/test/hex/http_test.exs index 9d7833ba..7c8f5a0c 100644 --- a/test/hex/http_test.exs +++ b/test/hex/http_test.exs @@ -115,4 +115,25 @@ defmodule Hex.HTTPTest do ) end) end + + test "request includes identifier header when available", %{bypass: bypass} do + in_tmp(fn -> + # Initialize a git repository with a commit + System.cmd("git", ["init", "--initial-branch=main"]) + System.cmd("git", ["config", "user.email", "test@example.com"]) + System.cmd("git", ["config", "user.name", "Test User"]) + File.write!("test.txt", "test content") + System.cmd("git", ["add", "test.txt"]) + System.cmd("git", ["commit", "-m", "Initial commit"]) + + Bypass.expect(bypass, fn conn -> + assert [client_id] = Plug.Conn.get_req_header(conn, "x-hex-repo-id") + assert client_id =~ ~r/^[a-f0-9]{64}$/ + + Plug.Conn.resp(conn, 200, "") + end) + + Hex.HTTP.request(:get, "http://localhost:#{bypass.port}", %{}, nil) + end) + end end diff --git a/test/hex/repo_identifier_test.exs b/test/hex/repo_identifier_test.exs new file mode 100644 index 00000000..dc749ba9 --- /dev/null +++ b/test/hex/repo_identifier_test.exs @@ -0,0 +1,55 @@ +defmodule Hex.RepoIdentifierTest do + use ExUnit.Case + + alias Hex.RepoIdentifier + + setup do + RepoIdentifier.clear() + + :ok + end + + describe "get/0" do + test "an identifier is included within a repository" do + assert RepoIdentifier.get() =~ ~r/^[a-f0-9]{64}$/ + end + + test "identifier is nil outside of a repository" do + # The tmp_dir resolves at hex/test/tmp, which allows git to traverse up to the repository + # root and find a commit. We're creating a temporary directory to simulate being outside of + # a repository instead. + dir = + "../../.." + |> Path.expand(__DIR__) + |> Path.join("empty-directory") + + try do + File.mkdir!(dir) + + File.cd!(dir, fn -> refute RepoIdentifier.get() end) + after + File.rmdir(dir) + end + end + + test "identifier is nil when disabled by an environment variable" do + System.put_env("HEX_NO_REPO_IDENTIFIER", "1") + Hex.State.refresh() + + refute RepoIdentifier.get() + after + System.delete_env("HEX_NO_REPO_IDENTIFIER") + Hex.State.refresh() + end + end + + describe "fetch/0" do + test "the identifier is cached accross calls" do + value = "cached-identifier" + + RepoIdentifier.put(value) + + assert value == RepoIdentifier.fetch() + end + end +end