Skip to content

Commit 3c553d5

Browse files
committed
Deduplicate scores based on e-mail
1 parent ad1eb06 commit 3c553d5

File tree

1 file changed

+31
-3
lines changed

1 file changed

+31
-3
lines changed

src/site.rs

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::{AuthorMap, VersionTag};
22
use handlebars::Handlebars;
3-
use std::collections::BTreeMap;
3+
use std::collections::{BTreeMap, HashMap};
44
use std::fs;
55
use std::path::Path;
66
use unicase::UniCase;
@@ -135,17 +135,18 @@ fn about() -> Result<(), Box<dyn std::error::Error>> {
135135
Ok(())
136136
}
137137

138-
#[derive(serde::Serialize)]
138+
#[derive(serde::Serialize, Ord, PartialOrd, Eq, PartialEq)]
139139
struct Entry {
140140
rank: u32,
141141
author: String,
142+
email: String,
142143
commits: usize,
143144
}
144145

145146
fn author_map_to_scores(map: &AuthorMap) -> Vec<Entry> {
146147
let debug_emails = std::env::var("DEBUG_EMAILS").is_ok_and(|value| value == "1");
147148

148-
let mut scores = map
149+
let scores = map
149150
.iter()
150151
.map(|(author, commits)| {
151152
let name = UniCase::into_inner(author.name.clone());
@@ -157,11 +158,14 @@ fn author_map_to_scores(map: &AuthorMap) -> Vec<Entry> {
157158
} else {
158159
name
159160
},
161+
email: UniCase::into_inner(author.email.clone()),
160162
commits,
161163
}
162164
})
163165
.collect::<Vec<_>>();
166+
let mut scores = deduplicate_scores(scores);
164167
scores.sort_by_key(|e| (std::cmp::Reverse(e.commits), e.author.clone()));
168+
165169
let mut last_rank = 1;
166170
let mut ranked_at_current = 0;
167171
let mut last_commits = usize::max_value();
@@ -178,6 +182,30 @@ fn author_map_to_scores(map: &AuthorMap) -> Vec<Entry> {
178182
scores
179183
}
180184

185+
/// Deduplicate scores based on the assumption that an e-mail uniquely identifies a given
186+
/// person. If there are multiple entries with the same email, their commit counts will be
187+
/// merged into a single entry, with the canonical name being chosen based on the entry with
188+
/// the most commits.
189+
fn deduplicate_scores(entries: Vec<Entry>) -> Vec<Entry> {
190+
let mut entry_map: HashMap<String, Vec<Entry>> = HashMap::with_capacity(entries.len());
191+
for entry in entries {
192+
entry_map.entry(entry.email.clone()).or_default().push(entry);
193+
}
194+
195+
entry_map.into_values().map(|mut entry| {
196+
// If there are multiple entries with the same maximum commit count, ensure that
197+
// the ordering is stable, by sorting based on the whole entry.
198+
entry.sort();
199+
let canonical_entry = entry.iter().max_by_key(|entry| entry.commits).unwrap();
200+
Entry {
201+
rank: 0,
202+
author: canonical_entry.author.clone(),
203+
email: canonical_entry.email.clone(),
204+
commits: entry.iter().map(|e| e.commits).sum(),
205+
}
206+
}).collect()
207+
}
208+
181209
fn releases(
182210
by_version: &BTreeMap<VersionTag, AuthorMap>,
183211
all_time: &AuthorMap,

0 commit comments

Comments
 (0)