Skip to content

Normalize labels before addition or removal #2128

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 54 additions & 64 deletions src/github.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ use std::{
};
use tracing as log;

pub mod labels;

pub type UserId = u64;
pub type PullRequestNumber = u64;

Expand Down Expand Up @@ -565,38 +567,15 @@ impl IssueRepository {
format!("{}/{}", self.organization, self.repository)
}

async fn has_label(&self, client: &GithubClient, label: &str) -> anyhow::Result<bool> {
#[allow(clippy::redundant_pattern_matching)]
let url = format!("{}/labels/{}", self.url(client), label);
match client.send_req(client.get(&url)).await {
Ok(_) => Ok(true),
Err(e) => {
if e.downcast_ref::<reqwest::Error>()
.map_or(false, |e| e.status() == Some(StatusCode::NOT_FOUND))
{
Ok(false)
} else {
Err(e)
}
}
}
}
}

#[derive(Debug)]
pub(crate) struct UnknownLabels {
labels: Vec<String>,
}

// NOTE: This is used to post the Github comment; make sure it's valid markdown.
impl fmt::Display for UnknownLabels {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Unknown labels: {}", &self.labels.join(", "))
async fn labels(&self, client: &GithubClient) -> anyhow::Result<Vec<Label>> {
let url = format!("{}/labels", self.url(client));
client
.json(client.get(&url))
.await
.context("failed to get labels")
}
}

impl std::error::Error for UnknownLabels {}

impl Issue {
pub fn to_zulip_github_reference(&self) -> ZulipGitHubReference {
ZulipGitHubReference {
Expand Down Expand Up @@ -730,8 +709,39 @@ impl Issue {
Ok(())
}

async fn normalize_and_match_labels(
&self,
client: &GithubClient,
requested_labels: &[&str],
) -> anyhow::Result<Vec<String>> {
let available_labels = self
.repository()
.labels(client)
.await
.context("unable to retrieve the repository labels")?;

labels::normalize_and_match_labels(
&available_labels
.iter()
.map(|l| l.name.as_str())
.collect::<Vec<_>>(),
requested_labels,
)
}

pub async fn remove_label(&self, client: &GithubClient, label: &str) -> anyhow::Result<()> {
log::info!("remove_label from {}: {:?}", self.global_id(), label);

let normalized_labels = self.normalize_and_match_labels(client, &[label]).await?;
let label = normalized_labels
.first()
.context("failed to find label on repository")?;
log::info!(
"remove_label from {}: matched label to {:?}",
self.global_id(),
label
);

// DELETE /repos/:owner/:repo/issues/:number/labels/{name}
let url = format!(
"{repo_url}/issues/{number}/labels/{name}",
Expand Down Expand Up @@ -767,6 +777,19 @@ impl Issue {
labels: Vec<Label>,
) -> anyhow::Result<()> {
log::info!("add_labels: {} +{:?}", self.global_id(), labels);

let labels = self
.normalize_and_match_labels(
client,
&labels.iter().map(|l| l.name.as_str()).collect::<Vec<_>>(),
)
.await?;
log::info!(
"add_labels: {} matched requested labels to +{:?}",
self.global_id(),
labels
);

// POST /repos/:owner/:repo/issues/:number/labels
// repo_url = https://api.github.com/repos/Codertocat/Hello-World
let url = format!(
Expand All @@ -778,8 +801,7 @@ impl Issue {
// Don't try to add labels already present on this issue.
let labels = labels
.into_iter()
.filter(|l| !self.labels().contains(&l))
.map(|l| l.name)
.filter(|l| !self.labels().iter().any(|existing| existing.name == *l))
.collect::<Vec<_>>();

log::info!("add_labels: {} filtered to {:?}", self.global_id(), labels);
Expand All @@ -788,32 +810,13 @@ impl Issue {
return Ok(());
}

let mut unknown_labels = vec![];
let mut known_labels = vec![];
for label in labels {
if !self.repository().has_label(client, &label).await? {
unknown_labels.push(label);
} else {
known_labels.push(label);
}
}

if !unknown_labels.is_empty() {
return Err(UnknownLabels {
labels: unknown_labels,
}
.into());
}

#[derive(serde::Serialize)]
struct LabelsReq {
labels: Vec<String>,
}

client
.send_req(client.post(&url).json(&LabelsReq {
labels: known_labels,
}))
.send_req(client.post(&url).json(&LabelsReq { labels }))
.await
.context("failed to add labels")?;

Expand Down Expand Up @@ -3217,16 +3220,3 @@ impl Submodule {
client.repository(fullname).await
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn display_labels() {
let x = UnknownLabels {
labels: vec!["A-bootstrap".into(), "xxx".into()],
};
assert_eq!(x.to_string(), "Unknown labels: A-bootstrap, xxx");
}
}
162 changes: 162 additions & 0 deletions src/github/labels.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
use std::{fmt, sync::LazyLock};

use itertools::Itertools;
use regex::Regex;

static EMOJI_REGEX: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"[\p{Emoji}\p{Emoji_Presentation}]").unwrap());

pub(crate) fn normalize_and_match_labels(
available_labels: &[&str],
requested_labels: &[&str],
) -> anyhow::Result<Vec<String>> {
let normalize = |s: &str| EMOJI_REGEX.replace_all(s, "").trim().to_lowercase();

let mut found_labels = Vec::<String>::with_capacity(requested_labels.len());
let mut unknown_labels = Vec::new();

for requested_label in requested_labels {
// First look for an exact match
if let Some(found) = available_labels.iter().find(|l| **l == *requested_label) {
found_labels.push((*found).into());
continue;
}

// Try normalizing requested label (remove emoji, case insensitive, trim whitespace)
let normalized_requested: String = normalize(requested_label);

// Find matching labels by normalized name
let found = available_labels
.iter()
.filter(|l| normalize(l) == normalized_requested)
.collect::<Vec<_>>();

match found[..] {
[] => {
unknown_labels.push(requested_label);
}
[label] => {
found_labels.push((*label).into());
}
[..] => {
return Err(AmbiguousLabelMatch {
requested_label: requested_label.to_string(),
labels: found.into_iter().map(|l| (*l).into()).collect(),
}
.into());
}
};
}

if !unknown_labels.is_empty() {
return Err(UnknownLabels {
labels: unknown_labels.iter().map(|s| s.to_string()).collect(),
}
.into());
}

Ok(found_labels)
}

#[derive(Debug)]
pub(crate) struct UnknownLabels {
labels: Vec<String>,
}

// NOTE: This is used to post the Github comment; make sure it's valid markdown.
impl fmt::Display for UnknownLabels {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Unknown labels: {}", &self.labels.join(", "))
}
}

impl std::error::Error for UnknownLabels {}

#[derive(Debug)]
pub(crate) struct AmbiguousLabelMatch {
pub requested_label: String,
pub labels: Vec<String>,
}

// NOTE: This is used to post the Github comment; make sure it's valid markdown.
impl fmt::Display for AmbiguousLabelMatch {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"Unsure which label to use for `{}` - could be one of: {}",
self.requested_label,
self.labels.iter().map(|l| format!("`{}`", l)).join(", ")
)
}
}

impl std::error::Error for AmbiguousLabelMatch {}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn display_unknown_labels_error() {
let x = UnknownLabels {
labels: vec!["A-bootstrap".into(), "xxx".into()],
};
assert_eq!(x.to_string(), "Unknown labels: A-bootstrap, xxx");
}

#[test]
fn display_ambiguous_label_error() {
let x = AmbiguousLabelMatch {
requested_label: "A-bootstrap".into(),
labels: vec!["A-bootstrap".into(), "A-bootstrap-2".into()],
};
assert_eq!(
x.to_string(),
"Unsure which label to use for `A-bootstrap` - could be one of: `A-bootstrap`, `A-bootstrap-2`"
);
}

#[test]
fn normalize_and_match_labels_happy_path() {
let available_labels = vec!["A-bootstrap 😺", "B-foo 👾", "C-bar", "C-bar 😦"];
let requested_labels = vec!["A-bootstrap", "B-foo", "C-bar"];

let result = normalize_and_match_labels(&available_labels, &requested_labels);

assert!(result.is_ok());
let found_labels = result.unwrap();
assert_eq!(found_labels.len(), 3);
assert_eq!(found_labels[0], "A-bootstrap 😺");
assert_eq!(found_labels[1], "B-foo 👾");
assert_eq!(found_labels[2], "C-bar");
}

#[test]
fn normalize_and_match_labels_no_match() {
let available_labels = vec!["A-bootstrap", "B-foo"];
let requested_labels = vec!["A-bootstrap", "C-bar"];

let result = normalize_and_match_labels(&available_labels, &requested_labels);

assert!(result.is_err());
let err = result.unwrap_err();
assert!(err.is::<UnknownLabels>());
let unknown = err.downcast::<UnknownLabels>().unwrap();
assert_eq!(unknown.labels, vec!["C-bar"]);
}

#[test]
fn normalize_and_match_labels_ambiguous_match() {
let available_labels = vec!["A-bootstrap 😺", "A-bootstrap 👾"];
let requested_labels = vec!["A-bootstrap"];

let result = normalize_and_match_labels(&available_labels, &requested_labels);

assert!(result.is_err());
let err = result.unwrap_err();
assert!(err.is::<AmbiguousLabelMatch>());
let ambiguous = err.downcast::<AmbiguousLabelMatch>().unwrap();
assert_eq!(ambiguous.requested_label, "A-bootstrap");
assert_eq!(ambiguous.labels, vec!["A-bootstrap 😺", "A-bootstrap 👾"]);
}
}
4 changes: 2 additions & 2 deletions src/handlers/assign.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

use crate::db::issue_data::IssueData;
use crate::db::review_prefs::{RotationMode, get_review_prefs_batch};
use crate::github::UserId;
use crate::github::{UserId, labels};
use crate::handlers::pr_tracking::ReviewerWorkqueue;
use crate::{
config::AssignConfig,
Expand Down Expand Up @@ -563,7 +563,7 @@ pub(super) async fn handle_command(
.add_labels(&ctx.github, vec![github::Label { name: t_label }])
.await
{
if let Some(github::UnknownLabels { .. }) = err.downcast_ref() {
if let Some(labels::UnknownLabels { .. }) = err.downcast_ref() {
log::warn!("Error assigning label: {}", err);
} else {
return Err(err);
Expand Down
3 changes: 1 addition & 2 deletions src/handlers/autolabel.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
config::AutolabelConfig,
github::{IssuesAction, IssuesEvent, Label},
github::{IssuesAction, IssuesEvent, Label, labels::UnknownLabels},
handlers::Context,
};
use anyhow::Context as _;
Expand Down Expand Up @@ -209,7 +209,6 @@ pub(super) async fn handle_input(
match event.issue.add_labels(&ctx.github, input.add).await {
Ok(()) => {}
Err(e) => {
use crate::github::UnknownLabels;
if let Some(err @ UnknownLabels { .. }) = e.downcast_ref() {
event
.issue
Expand Down
3 changes: 1 addition & 2 deletions src/handlers/relabel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
use crate::team_data::TeamClient;
use crate::{
config::RelabelConfig,
github::UnknownLabels,
github::{self, Event},
github::{self, Event, labels::UnknownLabels},
handlers::Context,
interactions::ErrorComment,
};
Expand Down