Skip to content
2 changes: 2 additions & 0 deletions lib/hex/application.ex
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ defmodule Hex.Application do
defp children do
[
Hex.Netrc.Cache,
Hex.RepoIdentifier,
Hex.State,
Hex.Server,
{Hex.Parallel, [:hex_fetcher]}
Expand All @@ -58,6 +59,7 @@ defmodule Hex.Application do
defp children do
[
Hex.Netrc.Cache,
Hex.RepoIdentifier,
Hex.State,
Hex.Server,
{Hex.Parallel, [:hex_fetcher]},
Expand Down
8 changes: 8 additions & 0 deletions lib/hex/http.ex
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ defmodule Hex.HTTP do
headers =
headers
|> add_basic_auth_via_netrc(url)
|> add_repo_identifier_header()

timeout =
adapter_config[:timeout] ||
Expand Down Expand Up @@ -315,6 +316,13 @@ defmodule Hex.HTTP do
host
end

defp add_repo_identifier_header(headers) do
case Hex.RepoIdentifier.fetch() do
nil -> headers
identifier -> Map.put(headers, "x-hex-repo-id", identifier)
end
end

def handle_hex_message(nil) do
:ok
end
Expand Down
72 changes: 72 additions & 0 deletions lib/hex/repo_identifier.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
defmodule Hex.RepoIdentifier do
@moduledoc """
Gets an anonymized identifier for the current git repository.

This module caches the SHA of the first commit in the repository and hashes it once more for
anonymization.

Returns `nil` when:

- The `HEX_REPO_IDENTIFIER` environment variable is set to anything other `1` or `true`
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it should be “ HEX_NO_REPO_IDENTIFIER”

?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Either version is fine by me (HEX_REPO_IDENTIFIER=false or HEX_NO_REPO_IDENTIFIER=true)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean, the doc here says: HEX_REPO_IDENTIFIER

But the code is actually using NO_HEX_REPO_IDENTIFIER:

https://github.com/sorentwo/hex/blob/5e329783583aa4d5929953ce95a73d68590406d2/lib/hex/state.ex#L115-L116

So maybe the docs should be changed to NO_HEX_REPO_IDENTIFIER.

Or, I'm missing something. 😅

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My mistake! This PR drifted a while and I completely forgot about the name change. Glad @ericmj took care of it =)

- The `git` executable isn't available
- The current directory isn't within a git repository
"""

use Agent

def start_link(_args) do
Agent.start_link(fn -> nil end, name: __MODULE__)
end

def fetch do
Agent.get_and_update(__MODULE__, fn
nil ->
value = get()

{value, value}

cached ->
{cached, cached}
end)
end

def put(value) do
Agent.update(__MODULE__, fn _value -> value end)
end

def get do
cond do
Hex.State.get(:no_repo_identifier) ->
nil

output = initial_commit_sha() ->
output
|> String.trim()
|> then(&:crypto.hash(:sha256, &1))
|> Base.encode16(case: :lower)

true ->
nil
end
end

def clear do
Agent.update(__MODULE__, fn _value -> nil end)
end

defp initial_commit_sha do
cmd_args = ~w(rev-list --max-parents=0 HEAD)

with path when is_binary(path) <- System.find_executable("git") do
case System.cmd("git", cmd_args, stderr_to_stdout: true) do
{output, 0} ->
output

{output, exit_status} ->
Hex.Shell.debug(
" Unable to extract git identifier: (Exit #{exit_status}) \n\n" <> output
)
end
end
end
end
6 changes: 6 additions & 0 deletions lib/hex/state.ex
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ defmodule Hex.State do
default: nil,
config: [:cacerts_path]
},
no_repo_identifier: %{
env: ["HEX_NO_REPO_IDENTIFIER"],
default: false,
config: [:no_repo_identifier],
fun: {__MODULE__, :to_boolean}
},
no_short_urls: %{
env: ["HEX_NO_SHORT_URLS"],
config: [:no_short_urls],
Expand Down
21 changes: 21 additions & 0 deletions test/hex/http_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,25 @@ defmodule Hex.HTTPTest do
)
end)
end

test "request includes identifier header when available", %{bypass: bypass} do
in_tmp(fn ->
# Initialize a git repository with a commit
System.cmd("git", ["init", "--initial-branch=main"])
System.cmd("git", ["config", "user.email", "test@example.com"])
System.cmd("git", ["config", "user.name", "Test User"])
File.write!("test.txt", "test content")
System.cmd("git", ["add", "test.txt"])
System.cmd("git", ["commit", "-m", "Initial commit"])

Bypass.expect(bypass, fn conn ->
assert [client_id] = Plug.Conn.get_req_header(conn, "x-hex-repo-id")
assert client_id =~ ~r/^[a-f0-9]{64}$/

Plug.Conn.resp(conn, 200, "")
end)

Hex.HTTP.request(:get, "http://localhost:#{bypass.port}", %{}, nil)
end)
end
end
55 changes: 55 additions & 0 deletions test/hex/repo_identifier_test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
defmodule Hex.RepoIdentifierTest do
use ExUnit.Case

alias Hex.RepoIdentifier

setup do
RepoIdentifier.clear()

:ok
end

describe "get/0" do
test "an identifier is included within a repository" do
assert RepoIdentifier.get() =~ ~r/^[a-f0-9]{64}$/
end

test "identifier is nil outside of a repository" do
# The tmp_dir resolves at hex/test/tmp, which allows git to traverse up to the repository
# root and find a commit. We're creating a temporary directory to simulate being outside of
# a repository instead.
dir =
"../../.."
|> Path.expand(__DIR__)
|> Path.join("empty-directory")

try do
File.mkdir!(dir)

File.cd!(dir, fn -> refute RepoIdentifier.get() end)
after
File.rmdir(dir)
end
end

test "identifier is nil when disabled by an environment variable" do
System.put_env("HEX_NO_REPO_IDENTIFIER", "1")
Hex.State.refresh()

refute RepoIdentifier.get()
after
System.delete_env("HEX_NO_REPO_IDENTIFIER")
Hex.State.refresh()
end
end

describe "fetch/0" do
test "the identifier is cached accross calls" do
value = "cached-identifier"

RepoIdentifier.put(value)

assert value == RepoIdentifier.fetch()
end
end
end
Loading