Skip to content
11 changes: 10 additions & 1 deletion lib/hex/http.ex
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ defmodule Hex.HTTP do
defp build_headers(headers) do
default_headers = %{"user-agent" => user_agent()}

Map.merge(default_headers, headers)
default_headers
|> add_repo_identifier_header()
|> Map.merge(headers)
end

defp build_http_opts(url, timeout) do
Expand Down Expand Up @@ -271,6 +273,13 @@ defmodule Hex.HTTP do
host
end

defp add_repo_identifier_header(headers) do
case Hex.Utils.repo_identifier() do
nil -> headers
identifier -> Map.put(headers, "x-hex-repo-id", identifier)
end
end

defp user_agent do
ci = if Hex.State.fetch!(:ci), do: " (CI)", else: ""
"Hex/#{Hex.version()} (Elixir/#{System.version()}) (OTP/#{Hex.Utils.otp_version()})#{ci}"
Expand Down
21 changes: 21 additions & 0 deletions lib/hex/utils.ex
Original file line number Diff line number Diff line change
Expand Up @@ -324,4 +324,25 @@ defmodule Hex.Utils do
{app, req, opts}
end)
end

@doc """
Gets an anonymized identifier for the current git repository.

This function finds the SHA of the first commit in the repository and hashes it once more for
anonymization.

Returns `nil` if git is not available or the directory is not a git repository.
"""
def repo_identifier do
with flag when flag in ["1", "true"] <- System.get_env("HEX_REPO_IDENTIFIER", "1"),
path when is_binary(path) <- System.find_executable("git"),
{output, 0} <- System.cmd("git", ["rev-list", "--max-parents=0", "HEAD"]) do
output
|> String.trim()
|> then(&:crypto.hash(:sha256, &1))
|> Base.encode16(case: :lower)
else
_ -> nil
end
end
end
21 changes: 21 additions & 0 deletions test/hex/http_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,25 @@ defmodule Hex.HTTPTest do
)
end)
end

test "request includes identifier header when available", %{bypass: bypass} do
in_tmp(fn ->
# Initialize a git repository with a commit
System.cmd("git", ["init", "--initial-branch=main"])
System.cmd("git", ["config", "user.email", "test@example.com"])
System.cmd("git", ["config", "user.name", "Test User"])
File.write!("test.txt", "test content")
System.cmd("git", ["add", "test.txt"])
System.cmd("git", ["commit", "-m", "Initial commit"])

Bypass.expect(bypass, fn conn ->
assert [client_id] = Plug.Conn.get_req_header(conn, "x-hex-repo-id")
assert client_id =~ ~r/^[a-f0-9]{64}$/

Plug.Conn.resp(conn, 200, "")
end)

Hex.HTTP.request(:get, "http://localhost:#{bypass.port}", %{}, nil)
end)
end
end
37 changes: 37 additions & 0 deletions test/hex/utils_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
defmodule Hex.UtilsTest do
use ExUnit.Case

describe "repo_identifier/0" do
test "an identifier is included within a repository" do
assert Hex.Utils.repo_identifier() =~ ~r/^[a-f0-9]{64}$/
end

test "identifier is nil outside of a repository" do
# The tmp_dir resolves at hex/test/tmp, which allows git to traverse up to the repository
# root and find a commit. We're creating a temporary directory to simulate being outside of
# a repository instead.
dir =
"../../.."
|> Path.expand(__DIR__)
|> Path.join("empty-directory")

try do
File.mkdir!(dir)

File.cd!(dir, fn -> refute Hex.Utils.repo_identifier() end)
after
File.rmdir(dir)
end
end

test "identifier is nil when disabled by an environment variable" do
System.put_env("HEX_REPO_IDENTIFIER", "0")
refute Hex.Utils.repo_identifier()

System.put_env("HEX_REPO_IDENTIFIER", "false")
refute Hex.Utils.repo_identifier()
after
System.delete_env("HEX_REPO_IDENTIFIER")
end
end
end
Loading