Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,20 @@ jobs:
os: 'ubuntu-latest'
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: julia-actions/setup-julia@v2
with:
version: ${{ matrix.version }}
- uses: julia-actions/julia-buildpkg@latest
- name: Generate a token that has read access to https://github.com/JuliaRegistries/user-blocklist-mock-for-testing
id: generate-token
uses: actions/create-github-app-token@v2
with:
app-id: ${{ vars.APP_ID_TEST_APP_USER_BLOCKLIST_MOCK }}
private-key: ${{ secrets.APP_PRIVATE_KEY_TEST_APP_USER_BLOCKLIST_MOCK }}
owner: JuliaRegistries
repositories: user-blocklist-mock-for-testing
- name: Run the package tests
run: |
import Pkg
Expand All @@ -47,7 +57,7 @@ jobs:
Pkg.test(; allow_reresolve, coverage)
end
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_TOKEN: ${{ steps.generate-token.outputs.token }}
shell: julia --color=yes --project {0}
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v5
Expand All @@ -59,6 +69,8 @@ jobs:
contents: write # needed to be able to push to the gh-pages branch
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- uses: julia-actions/setup-julia@v2
with:
version: '1'
Expand Down
15 changes: 15 additions & 0 deletions run/config.commentbot.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,21 @@ check_private_membership = false # If set to true Registrator will allow priv
# Additional registries to look into for `[deps]` and `[compat]`
registry_deps = ["https://github.com/JuliaRegistries/General"]

# -- Blocklist configuration (optional) --
# Block specific users from using Registrator. The blocklist is fetched from a
# GitHub repo (can be private) containing a TOML file with blocked user entries.
# The github token above (commentbot.github.token) is used to authenticate when
# fetching the blocklist, so it must have read access to the blocklist repo.
# Use scripts/lookup_user_id.sh to find a user's immutable platform ID.
#
# blocklist_repo = "JuliaRegistries/user-blocklist" # owner/repo on GitHub
# blocklist_file = "blocklist.toml" # path within the repo
# blocklist_cache_ttl = 300 # seconds between refreshes
# blocklist_token = "" # optional: a GitHub PAT with read access to the
# blocklist repo. If not set, commentbot.github.token
# is used instead (it must then have read access to
# the blocklist repo).

[commentbot.github]
user = "" # A GitHub user ID, can be same as that set for [regservice]
email = "" # The email associated with the above user, this is
Expand Down
15 changes: 15 additions & 0 deletions run/config.web.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,21 @@ stop_file = "stopwebui"
# allow_private = false
# enable_logging = true

# -- Blocklist configuration (optional) --
# Block specific users from using Registrator. The blocklist is fetched from a
# GitHub repo (can be private) containing a TOML file with blocked user entries.
# The github token below (web.github.token) is used to authenticate when
# fetching the blocklist, so it must have read access to the blocklist repo.
# Use scripts/lookup_user_id.sh to find a user's immutable platform ID.
#
# blocklist_repo = "JuliaRegistries/user-blocklist" # owner/repo on GitHub
# blocklist_file = "blocklist.toml" # path within the repo
# blocklist_cache_ttl = 300 # seconds between refreshes
# blocklist_token = "" # optional: a GitHub PAT with read access to the
# blocklist repo. If not set, web.github.token is
# used instead (it must then have read access to
# the blocklist repo).

[web.github]
token = ""
client_id = ""
Expand Down
115 changes: 115 additions & 0 deletions scripts/lookup_user_id.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#!/usr/bin/env bash
#
# Look up a user or organization's immutable platform ID for use in the
# Registrator blocklist. Works for both users and organizations.
#
# Usage:
# ./lookup_user_id.sh <name> # defaults to github
# ./lookup_user_id.sh <name> github
# ./lookup_user_id.sh <name> gitlab
# ./lookup_user_id.sh <name> bitbucket
#
# Requires: curl, grep, sed
#
# For private GitHub repos or to avoid rate limits, set GITHUB_TOKEN:
# GITHUB_TOKEN=ghp_... ./lookup_user_id.sh <name>
#
# Note: On GitHub, the same endpoint works for both users and organizations,
# since they share the same ID namespace.

set -euo pipefail

usage() {
echo "Usage: $0 <username> [github|gitlab|bitbucket]"
exit 1
}

[ $# -lt 1 ] && usage

USERNAME="$1"
PROVIDER="${2:-github}"

case "$PROVIDER" in
github)
AUTH_HEADER=""
if [ -n "${GITHUB_TOKEN:-}" ]; then
AUTH_HEADER="Authorization: token $GITHUB_TOKEN"
fi

RESPONSE=$(curl -s ${AUTH_HEADER:+-H "$AUTH_HEADER"} \
"https://api.github.com/users/${USERNAME}")

ID=$(echo "$RESPONSE" | grep '"id":' | head -1 | sed 's/[^0-9]//g')

if [ -z "$ID" ]; then
echo "Error: Could not find GitHub user '$USERNAME'" >&2
echo "$RESPONSE" >&2
exit 1
fi

echo "Provider: GitHub"
echo "Username: $USERNAME"
echo "ID: $ID"
echo ""
echo "Blocklist entry:"
echo ""
echo "[[blocked]]"
echo "provider = \"github\""
echo "id = $ID"
echo "username = \"$USERNAME\""
echo "reason = \"\""
;;

gitlab)
RESPONSE=$(curl -s "https://gitlab.com/api/v4/users?username=${USERNAME}")

ID=$(echo "$RESPONSE" | grep -o '"id":[0-9]*' | head -1 | sed 's/"id"://')

if [ -z "$ID" ]; then
echo "Error: Could not find GitLab user '$USERNAME'" >&2
echo "$RESPONSE" >&2
exit 1
fi

echo "Provider: GitLab"
echo "Username: $USERNAME"
echo "ID: $ID"
echo ""
echo "Blocklist entry:"
echo ""
echo "[[blocked]]"
echo "provider = \"gitlab\""
echo "id = $ID"
echo "username = \"$USERNAME\""
echo "reason = \"\""
;;

bitbucket)
RESPONSE=$(curl -s "https://api.bitbucket.org/2.0/users/${USERNAME}")

UUID=$(echo "$RESPONSE" | grep -o '"uuid": *"[^"]*"' | head -1 | sed 's/"uuid": *"//;s/"//')

if [ -z "$UUID" ]; then
echo "Error: Could not find Bitbucket user '$USERNAME'" >&2
echo "$RESPONSE" >&2
exit 1
fi

echo "Provider: Bitbucket"
echo "Username: $USERNAME"
echo "UUID: $UUID"
echo ""
echo "Blocklist entry:"
echo ""
echo "[[blocked]]"
echo "provider = \"bitbucket\""
echo "id = \"$UUID\""
echo "username = \"$USERNAME\""
echo "reason = \"\""
;;

*)
echo "Error: Unknown provider '$PROVIDER'. Use github, gitlab, or bitbucket." >&2
exit 1
;;
esac
1 change: 1 addition & 0 deletions src/Registrator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ RegistryTools.register(regp::RegisterParams) = RegistryTools.register(regp.packa

include("pull_request.jl")
include("Messaging.jl")
include("blocklist.jl")
include("RegService.jl")
include("commentbot/CommentBot.jl")
include("webui/WebUI.jl")
Expand Down
143 changes: 143 additions & 0 deletions src/blocklist.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
module Blocklist

using HTTP
using JSON
using Base64
using Dates
using Logging
import Pkg: TOML

export is_blocked, load_blocklist!

# Maps provider name (e.g. "github") to Set of blocked ID strings.
const BLOCKED_IDS = Dict{String, Set{String}}()
const BLOCKLIST_LOCK = ReentrantLock()
const LAST_FETCH = Ref{DateTime}(DateTime(0))

"""
load_blocklist!(config::Dict)

Fetch the blocklist from the configured GitHub repo and update the in-memory cache.
The blocklist file is expected to be TOML-formatted with a `[[blocked]]` array of tables,
each having an `id` field (the immutable platform ID) and a `provider` field.

Entries can block individual users OR organizations/repo owners. GitHub users and orgs
share the same ID namespace, so an org ID works the same way as a user ID. Registrator
checks both the requesting user's ID and the repository owner's ID against the blocklist.

Example blocklist.toml:

# To find a GitHub user or org ID: curl https://api.github.com/users/NAME
# To find a GitLab user's ID: curl https://gitlab.com/api/v4/users?username=NAME
# To find a GitLab group's ID: curl https://gitlab.com/api/v4/groups/NAME
# To find a Bitbucket user's UUID: curl https://api.bitbucket.org/2.0/users/NAME
#
# Or use: scripts/lookup_user_id.sh NAME [github|gitlab|bitbucket]

# Block a user
[[blocked]]
provider = "github"
id = 12345678
username = "spammer1" # for human reference only
reason = "AI-generated spam"

# Block an organization (prevents registration of any repo owned by this org)
[[blocked]]
provider = "github"
id = 87654321
username = "spam-org"
reason = "organization used for spam packages"

Falls back silently (fail-open) if the repo is unreachable or the file is malformed.
"""
function load_blocklist!(config::Dict)
repo = get(config, "blocklist_repo", "")
isempty(repo) && return
file = get(config, "blocklist_file", "blocklist.toml")
# Use a dedicated blocklist token if provided, otherwise fall back to the
# main GitHub token. The token must have read access to the blocklist repo.
token = get(config, "blocklist_token", "")
if isempty(token)
token = get(get(config, "github", Dict()), "token", "")
end
isempty(token) && return

# Fetch outside the lock so that slow/stalled network requests don't block
# all is_blocked() callers. Only hold the lock for the in-memory swap.
new_blocked = nothing
try
headers = [
"Authorization" => "Bearer $token",
"Accept" => "application/vnd.github.v3+json",
"User-Agent" => "Registrator.jl",
]
url = "https://api.github.com/repos/$repo/contents/$file"
resp = HTTP.get(url; headers=headers, status_exception=false)
if resp.status != 200
@warn "Failed to fetch blocklist" status=resp.status repo=repo file=file
return
end
data = JSON.parse(String(resp.body))
content = String(base64decode(replace(get(data, "content", ""), "\n" => "")))
toml = TOML.parse(content)
new_blocked = Dict{String, Set{String}}()
for entry in get(toml, "blocked", [])
id = get(entry, "id", nothing)
provider = get(entry, "provider", nothing)
(id === nothing || provider === nothing) && continue
provider_key = lowercase(string(provider))
ids = get!(Set{String}, new_blocked, provider_key)
push!(ids, string(id))
end
catch ex
# Log only the exception type and message, not the full exception object,
# because HTTP errors may include request headers containing the auth token.
@warn "Failed to load blocklist, allowing all users" error=sprint(showerror, ex)
return
end

lock(BLOCKLIST_LOCK) do
empty!(BLOCKED_IDS)
merge!(BLOCKED_IDS, new_blocked)
LAST_FETCH[] = now(UTC)
end
total = sum(length, values(new_blocked); init=0)
@info "Blocklist loaded" count=total
end

function maybe_refresh!(config::Dict)
repo = get(config, "blocklist_repo", "")
isempty(repo) && return
ttl = get(config, "blocklist_cache_ttl", 300)
if (now(UTC) - LAST_FETCH[]).value / 1000 > ttl
load_blocklist!(config)
end
end

"""
is_blocked(provider::AbstractString, user_id, config::Dict) -> Bool

Check whether a user ID on the given provider is on the blocklist.
`provider` should be `"github"`, `"gitlab"`, or `"bitbucket"`.
The `user_id` can be any type (integer, string, UUID) — it is converted to
a string for comparison. Refreshes the cached blocklist if the TTL has expired.
Returns `false` (fail-open) on any error or if the blocklist is not configured.
"""
function is_blocked(provider::AbstractString, user_id, config::Dict)
repo = get(config, "blocklist_repo", "")
isempty(repo) && return false

maybe_refresh!(config)

provider_key = lowercase(provider)
id_str = string(user_id)
blocked = lock(BLOCKLIST_LOCK) do
id_str in get(BLOCKED_IDS, provider_key, Set{String}())
end
if blocked
@info "Blocked user attempted registration" provider=provider_key user_id=id_str
end
return blocked
end

end # module
3 changes: 3 additions & 0 deletions src/commentbot/CommentBot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ import RegistryTools: RegBranch, Project
import Base: string
using ..Messaging
import ..RegisterParams
using ..Blocklist: is_blocked, load_blocklist!
import ..Blocklist

include("trigger_types.jl")
include("parse_comment.jl")
Expand Down Expand Up @@ -274,6 +276,7 @@ function main(config::AbstractString=isempty(ARGS) ? "config.toml" : first(ARGS)
end
zsock = RequestSocket(get(CONFIG, "backend_port", 5555))

Blocklist.load_blocklist!(CONFIG)
@info("Starting server...")
t1 = @async request_processor(zsock)
t2 = @async status_monitor(CONFIG["stop_file"], event_queue, httpsock)
Expand Down
14 changes: 14 additions & 0 deletions src/commentbot/github_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,20 @@ function get_user_login(payload::Dict{<:AbstractString})
end
end

function get_user_id(payload::Dict{<:AbstractString})
if haskey(payload, "comment")
return payload["comment"]["user"]["id"]
elseif haskey(payload, "issue")
return payload["issue"]["user"]["id"]
elseif haskey(payload, "pull_request")
return payload["pull_request"]["user"]["id"]
else
error("Don't know how to get user id")
end
end

get_repo_owner_id(payload::Dict{<:AbstractString}) = payload["repository"]["owner"]["id"]

function get_body(payload::Dict{<:AbstractString})
if haskey(payload, "comment")
return payload["comment"]["body"]
Expand Down
Loading
Loading