Skip to content

Commit 1264476

Browse files
sorentwoericmj
andauthored
Add client identifier header to HTTP requests (#1075)
* Add client identifier header to HTTP requests Creates an anonymized repository identifier based on the SHA of the first commit, hashed with SHA256 for additional privacy. The identifier is sent as an `x-hex-client-id` header when available. * Rename identifier header to x-hex-repo-id * Respect HEX_REPO_IDENTIFIER environment variable The new variable allows users to opt-out of identification by setting the value to anything other than `1` or `true`. It remains enabled by default. * Add local caching to repo identifier lookup Prevents fetching the same identifier on every client call by caching the value in the current process dictionary. While the git command isn't particularly slow, this should avoid spawning during repeated http calls. * Use Hex.State agent for identifier config Switch from a raw `System.get_env` to using the normalized state agent. Also prevent STDERR from leaking when called outside of a git repository. * Use Hex.State to store cached repo identifier * Print error message on non-zero identifier message * Synchronize cached repo identifier access The repo identifier value is now cached with an agent, which also serializes access to fetch the state. This also removes the value from `Hex.State`, which was previously used for caching but is ultimately unnecessary. --------- Co-authored-by: Eric Meadows-Jönsson <eric.meadows.jonsson@gmail.com>
1 parent 843974a commit 1264476

File tree

6 files changed

+164
-0
lines changed

6 files changed

+164
-0
lines changed

lib/hex/application.ex

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ defmodule Hex.Application do
4949
defp children do
5050
[
5151
Hex.Netrc.Cache,
52+
Hex.RepoIdentifier,
5253
Hex.State,
5354
Hex.Server,
5455
{Hex.Parallel, [:hex_fetcher]}
@@ -58,6 +59,7 @@ defmodule Hex.Application do
5859
defp children do
5960
[
6061
Hex.Netrc.Cache,
62+
Hex.RepoIdentifier,
6163
Hex.State,
6264
Hex.Server,
6365
{Hex.Parallel, [:hex_fetcher]},

lib/hex/http.ex

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ defmodule Hex.HTTP do
4141
headers =
4242
headers
4343
|> add_basic_auth_via_netrc(url)
44+
|> add_repo_identifier_header()
4445

4546
timeout =
4647
adapter_config[:timeout] ||
@@ -315,6 +316,13 @@ defmodule Hex.HTTP do
315316
host
316317
end
317318

319+
defp add_repo_identifier_header(headers) do
320+
case Hex.RepoIdentifier.fetch() do
321+
nil -> headers
322+
identifier -> Map.put(headers, "x-hex-repo-id", identifier)
323+
end
324+
end
325+
318326
def handle_hex_message(nil) do
319327
:ok
320328
end

lib/hex/repo_identifier.ex

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
defmodule Hex.RepoIdentifier do
2+
@moduledoc """
3+
Gets an anonymized identifier for the current git repository.
4+
5+
This module caches the SHA of the first commit in the repository and hashes it once more for
6+
anonymization.
7+
8+
Returns `nil` when:
9+
10+
- The `HEX_REPO_IDENTIFIER` environment variable is set to anything other `1` or `true`
11+
- The `git` executable isn't available
12+
- The current directory isn't within a git repository
13+
"""
14+
15+
use Agent
16+
17+
def start_link(_args) do
18+
Agent.start_link(fn -> nil end, name: __MODULE__)
19+
end
20+
21+
def fetch do
22+
Agent.get_and_update(__MODULE__, fn
23+
nil ->
24+
value = get()
25+
26+
{value, value}
27+
28+
cached ->
29+
{cached, cached}
30+
end)
31+
end
32+
33+
def put(value) do
34+
Agent.update(__MODULE__, fn _value -> value end)
35+
end
36+
37+
def get do
38+
cond do
39+
Hex.State.get(:no_repo_identifier) ->
40+
nil
41+
42+
output = initial_commit_sha() ->
43+
output
44+
|> String.trim()
45+
|> then(&:crypto.hash(:sha256, &1))
46+
|> Base.encode16(case: :lower)
47+
48+
true ->
49+
nil
50+
end
51+
end
52+
53+
def clear do
54+
Agent.update(__MODULE__, fn _value -> nil end)
55+
end
56+
57+
defp initial_commit_sha do
58+
cmd_args = ~w(rev-list --max-parents=0 HEAD)
59+
60+
with path when is_binary(path) <- System.find_executable("git") do
61+
case System.cmd("git", cmd_args, stderr_to_stdout: true) do
62+
{output, 0} ->
63+
output
64+
65+
{output, exit_status} ->
66+
Hex.Shell.debug(
67+
" Unable to extract git identifier: (Exit #{exit_status}) \n\n" <> output
68+
)
69+
end
70+
end
71+
end
72+
end

lib/hex/state.ex

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,12 @@ defmodule Hex.State do
112112
default: nil,
113113
config: [:cacerts_path]
114114
},
115+
no_repo_identifier: %{
116+
env: ["HEX_NO_REPO_IDENTIFIER"],
117+
default: false,
118+
config: [:no_repo_identifier],
119+
fun: {__MODULE__, :to_boolean}
120+
},
115121
no_short_urls: %{
116122
env: ["HEX_NO_SHORT_URLS"],
117123
config: [:no_short_urls],

test/hex/http_test.exs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,4 +115,25 @@ defmodule Hex.HTTPTest do
115115
)
116116
end)
117117
end
118+
119+
test "request includes identifier header when available", %{bypass: bypass} do
120+
in_tmp(fn ->
121+
# Initialize a git repository with a commit
122+
System.cmd("git", ["init", "--initial-branch=main"])
123+
System.cmd("git", ["config", "user.email", "test@example.com"])
124+
System.cmd("git", ["config", "user.name", "Test User"])
125+
File.write!("test.txt", "test content")
126+
System.cmd("git", ["add", "test.txt"])
127+
System.cmd("git", ["commit", "-m", "Initial commit"])
128+
129+
Bypass.expect(bypass, fn conn ->
130+
assert [client_id] = Plug.Conn.get_req_header(conn, "x-hex-repo-id")
131+
assert client_id =~ ~r/^[a-f0-9]{64}$/
132+
133+
Plug.Conn.resp(conn, 200, "")
134+
end)
135+
136+
Hex.HTTP.request(:get, "http://localhost:#{bypass.port}", %{}, nil)
137+
end)
138+
end
118139
end

test/hex/repo_identifier_test.exs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
defmodule Hex.RepoIdentifierTest do
2+
use ExUnit.Case
3+
4+
alias Hex.RepoIdentifier
5+
6+
setup do
7+
RepoIdentifier.clear()
8+
9+
:ok
10+
end
11+
12+
describe "get/0" do
13+
test "an identifier is included within a repository" do
14+
assert RepoIdentifier.get() =~ ~r/^[a-f0-9]{64}$/
15+
end
16+
17+
test "identifier is nil outside of a repository" do
18+
# The tmp_dir resolves at hex/test/tmp, which allows git to traverse up to the repository
19+
# root and find a commit. We're creating a temporary directory to simulate being outside of
20+
# a repository instead.
21+
dir =
22+
"../../.."
23+
|> Path.expand(__DIR__)
24+
|> Path.join("empty-directory")
25+
26+
try do
27+
File.mkdir!(dir)
28+
29+
File.cd!(dir, fn -> refute RepoIdentifier.get() end)
30+
after
31+
File.rmdir(dir)
32+
end
33+
end
34+
35+
test "identifier is nil when disabled by an environment variable" do
36+
System.put_env("HEX_NO_REPO_IDENTIFIER", "1")
37+
Hex.State.refresh()
38+
39+
refute RepoIdentifier.get()
40+
after
41+
System.delete_env("HEX_NO_REPO_IDENTIFIER")
42+
Hex.State.refresh()
43+
end
44+
end
45+
46+
describe "fetch/0" do
47+
test "the identifier is cached accross calls" do
48+
value = "cached-identifier"
49+
50+
RepoIdentifier.put(value)
51+
52+
assert value == RepoIdentifier.fetch()
53+
end
54+
end
55+
end

0 commit comments

Comments
 (0)