Skip to content

Commit 4a6b3d8

Browse files
committed
autocomplete: Match user results by email match, and rank by match quality
Fixes zulip#236.
1 parent 19c1992 commit 4a6b3d8

File tree

2 files changed

+208
-11
lines changed

2 files changed

+208
-11
lines changed

lib/model/autocomplete.dart

Lines changed: 135 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,25 @@ enum NameMatchQuality {
744744
wordPrefixes,
745745
}
746746

747+
/// The match quality of a [UserStore.userDisplayEmail]
748+
/// to a mention autocomplete query.
749+
///
750+
/// All matches are case-insensitive.
751+
enum EmailMatchQuality {
752+
/// The query starts with the email's "local part", then "@",
753+
/// then a nonempty prefix of the "domain".
754+
///
755+
/// For example, the query "email@e" matches "[email protected]" this way,
756+
/// but "email" and "email@" do not.
757+
localPartExactAndDomainPrefix,
758+
759+
/// The query is the email's "local part" optionally followed by "@".
760+
localPartExact,
761+
762+
/// The email starts with the query.
763+
prefix,
764+
}
765+
747766
/// Any autocomplete query in the compose box's content input.
748767
abstract class ComposeAutocompleteQuery extends AutocompleteQuery {
749768
ComposeAutocompleteQuery(super.raw);
@@ -789,15 +808,19 @@ class MentionAutocompleteQuery extends ComposeAutocompleteQuery {
789808
if (store.isUserMuted(user.userId)) return null;
790809

791810
final cache = store.autocompleteViewManager.autocompleteDataCache;
792-
// TODO(#236) test email too, not just name
793811
final nameMatchQuality = _matchName(
794812
lowercaseName: cache.lowercaseNameForUser(user),
795813
lowercaseNameWords: cache.lowercaseNameWordsForUser(user));
796-
if (nameMatchQuality == null) return null;
814+
EmailMatchQuality? emailMatchQuality;
815+
if (nameMatchQuality == null) {
816+
emailMatchQuality = _matchEmail(user, cache);
817+
if (emailMatchQuality == null) return null;
818+
}
797819

798820
return UserMentionAutocompleteResult(
799821
userId: user.userId,
800-
rank: _rankUserResult(user, nameMatchQuality: nameMatchQuality));
822+
rank: _rankUserResult(user,
823+
nameMatchQuality: nameMatchQuality, emailMatchQuality: emailMatchQuality));
801824
}
802825

803826
NameMatchQuality? _matchName({
@@ -825,6 +848,53 @@ class MentionAutocompleteQuery extends ComposeAutocompleteQuery {
825848
return null;
826849
}
827850

851+
EmailMatchQuality? _matchEmail(User user, AutocompleteDataCache cache) {
852+
final lowercaseEmail = cache.lowercaseEmailForUser(user);
853+
if (lowercaseEmail == null) return null; // Email not known
854+
855+
final userEmailParts = cache.lowercaseEmailPartsForUser(user);
856+
if (userEmailParts == null) { // TODO(log)?
857+
// Email has unexpected shape; just do a simple match
858+
return lowercaseEmail.startsWith(_lowercase)
859+
? EmailMatchQuality.prefix
860+
: null;
861+
}
862+
863+
final (localPart, domain) = userEmailParts;
864+
865+
int localPartMatchLength = 0;
866+
while (
867+
localPartMatchLength < localPart.length && localPartMatchLength < _lowercase.length
868+
&& localPart[localPartMatchLength] == _lowercase[localPartMatchLength]
869+
) {
870+
localPartMatchLength++;
871+
}
872+
873+
final reachedLocalPartEnd = localPartMatchLength == localPart.length;
874+
bool reachedQueryEnd = localPartMatchLength == _lowercase.length;
875+
switch ((reachedLocalPartEnd, reachedQueryEnd)) {
876+
case (false, false): return null;
877+
case (false, true ): return EmailMatchQuality.prefix;
878+
case (true, true ): return EmailMatchQuality.localPartExact;
879+
case (true, false):
880+
assert(_lowercase.length >= localPart.length);
881+
if (_lowercase[localPart.length] != '@') return null;
882+
if (_lowercase.length == localPart.length + 1) {
883+
// // Query matches the local part plus just '@'.
884+
return EmailMatchQuality.localPartExact;
885+
}
886+
// Proceed to check the domain
887+
}
888+
889+
int i = 0; // index in domain
890+
for (int j = localPart.length + 1; j < _lowercase.length; j++) {
891+
if (i >= domain.length) return null;
892+
if (_lowercase[j] != domain[i]) return null;
893+
i++;
894+
}
895+
return EmailMatchQuality.localPartExactAndDomainPrefix;
896+
}
897+
828898
/// A measure of a wildcard result's quality in the context of the query,
829899
/// from 0 (best) to one less than [_numResultRanks].
830900
///
@@ -835,17 +905,28 @@ class MentionAutocompleteQuery extends ComposeAutocompleteQuery {
835905
/// from 0 (best) to one less than [_numResultRanks].
836906
///
837907
/// See also [_rankWildcardResult].
838-
static int _rankUserResult(User user, {required NameMatchQuality nameMatchQuality}) {
839-
return switch (nameMatchQuality) {
840-
NameMatchQuality.exact => 1,
841-
NameMatchQuality.totalPrefix => 2,
842-
NameMatchQuality.wordPrefixes => 3,
908+
static int _rankUserResult(User user, {
909+
required NameMatchQuality? nameMatchQuality,
910+
required EmailMatchQuality? emailMatchQuality,
911+
}) {
912+
if (nameMatchQuality != null) {
913+
return switch (nameMatchQuality) {
914+
NameMatchQuality.exact => 1,
915+
NameMatchQuality.totalPrefix => 2,
916+
NameMatchQuality.wordPrefixes => 3,
917+
};
918+
}
919+
assert(emailMatchQuality != null);
920+
return switch (emailMatchQuality!) {
921+
EmailMatchQuality.localPartExactAndDomainPrefix => 4,
922+
EmailMatchQuality.localPartExact => 5,
923+
EmailMatchQuality.prefix => 6,
843924
};
844925
}
845926

846927
/// The number of possible values returned by
847928
/// [_rankWildcardResult] and [_rankUserResult].
848-
static const _numResultRanks = 4;
929+
static const _numResultRanks = 7;
849930

850931
@override
851932
String toString() {
@@ -892,9 +973,45 @@ class AutocompleteDataCache {
892973
return _lowercaseNameWordsByUser[user.userId] ??= lowercaseNameForUser(user).split(' ');
893974
}
894975

976+
final Map<int, String?> _lowercaseEmailsByUser = {};
977+
978+
/// The lowercase `deliveryEmail` of [user], or null if that's null.
979+
String? lowercaseEmailForUser(User user) {
980+
return _lowercaseEmailsByUser[user.userId] ??= user.deliveryEmail?.toLowerCase();
981+
}
982+
983+
final Map<int, (String localPart, String domain)?> _lowercaseEmailPartsByUser = {};
984+
985+
/// The [lowercaseEmailForUser] split by the single "@" if it has that shape,
986+
/// else null.
987+
(String localPart, String domain)? lowercaseEmailPartsForUser(User user) {
988+
// Optimization: use containsKey to check the cache, instead of operator [],
989+
// because `null` is a valid value that can take some string-computation
990+
// to compute (on unexpected email strings with multiple "@"s).
991+
return _lowercaseEmailPartsByUser.containsKey(user.userId)
992+
? _lowercaseEmailPartsByUser[user.userId]
993+
: (_lowercaseEmailPartsByUser[user.userId] = _computeLowercaseEmailPartsForUser(user));
994+
}
995+
996+
/// Whether to throw an [AssertionError] on [lowercaseEmailPartsForUser]'s
997+
/// cache-misses. Useful for testing.
998+
bool debugErrorOnComputeEmailParts = false;
999+
1000+
(String localPart, String domain)? _computeLowercaseEmailPartsForUser(User user) {
1001+
assert(!debugErrorOnComputeEmailParts);
1002+
1003+
final lowercaseEmail = lowercaseEmailForUser(user);
1004+
if (lowercaseEmail == null) return null;
1005+
final split = lowercaseEmail.split('@');
1006+
if (split.length != 2) return null;
1007+
return (split[0], split[1]);
1008+
}
1009+
8951010
void invalidateUser(int userId) {
8961011
_lowercaseNamesByUser.remove(userId);
8971012
_lowercaseNameWordsByUser.remove(userId);
1013+
_lowercaseEmailsByUser.remove(userId);
1014+
_lowercaseEmailPartsByUser.remove(userId);
8981015
}
8991016
}
9001017

@@ -947,6 +1064,9 @@ sealed class MentionAutocompleteResult extends ComposeAutocompleteResult {
9471064
// Behavior we have that web doesn't and might like to follow:
9481065
// - A "word-prefixes" match quality on user names:
9491066
// see [NameMatchQuality.wordPrefixes], which we rank on.
1067+
// - Two email match qualities when the query matches the "local part";
1068+
// see [EmailMatchQuality.localPartExactAndDomainPrefix] and
1069+
// [EmailMatchQuality.localPartExact], which we rank on.
9501070
//
9511071
// Behavior web has that seems undesired, which we don't plan to follow:
9521072
// - Ranking humans above bots, even when the bots have higher relevance
@@ -957,6 +1077,12 @@ sealed class MentionAutocompleteResult extends ComposeAutocompleteResult {
9571077
// special rank when the whole query appears contiguously
9581078
// right after a word-boundary character.
9591079
// Our [NameMatchQuality.wordPrefixes] seems smarter.
1080+
// - An "exact" match quality on emails: probably not worth its complexity.
1081+
// Emails are much more uniform in their endings than users' names are,
1082+
// so [EmailMatchQuality.localPartExactAndDomainPrefix] should be adequate.
1083+
// (If I've typed "[email protected]", that'll probably be the only result.
1084+
// There might be an "[email protected]", and an "exact" match would
1085+
// downrank that, but still that's just two items to scan throug.)
9601086
// - A "word-boundary" match quality on user emails:
9611087
// "words" is a wrong abstraction when matching on emails.
9621088
// - Ranking some case-sensitive matches differently from case-insensitive

test/model/autocomplete_test.dart

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,45 @@ void main() {
263263
doTest('。~:^', emoji('')); doTest('。~:a^', emoji('a'));
264264
});
265265

266+
group('AutocompleteDataCache', () {
267+
// TODO test user full-name data
268+
269+
test('lowercaseEmailPartsForUser.debugErrorOnComputeEmailParts works', () {
270+
final cache = AutocompleteDataCache()
271+
..debugErrorOnComputeEmailParts = true;
272+
273+
check(() => cache.lowercaseEmailPartsForUser(eg.user())).throws<AssertionError>();
274+
});
275+
276+
test('lowercaseEmailPartsForUser: non-null deliveryEmail', () {
277+
final cache = AutocompleteDataCache();
278+
279+
final user = eg.user(deliveryEmail: '[email protected]');
280+
final result = cache.lowercaseEmailPartsForUser(user);
281+
cache.debugErrorOnComputeEmailParts = true; // (this flag is tested above)
282+
final cachedResult = cache.lowercaseEmailPartsForUser(user);
283+
check(cachedResult)..identicalTo(result)..equals(('email', 'example.com'));
284+
});
285+
286+
test('lowercaseEmailPartsForUser: null deliveryEmail', () {
287+
final cache = AutocompleteDataCache();
288+
289+
final user = eg.user(deliveryEmail: null);
290+
check(cache.lowercaseEmailPartsForUser(user)).isNull();
291+
cache.debugErrorOnComputeEmailParts = true; // (this flag is tested above)
292+
check(cache.lowercaseEmailPartsForUser(user)).isNull();
293+
});
294+
295+
test('lowercaseEmailPartsForUser: non-null deliveryEmail but with multiple "@"s', () {
296+
final cache = AutocompleteDataCache();
297+
298+
final user = eg.user(deliveryEmail: 'Email@example@com');
299+
check(cache.lowercaseEmailPartsForUser(user)).isNull();
300+
cache.debugErrorOnComputeEmailParts = true; // (this flag is tested above)
301+
check(cache.lowercaseEmailPartsForUser(user)).isNull();
302+
});
303+
});
304+
266305
test('MentionAutocompleteView misc', () async {
267306
const narrow = ChannelNarrow(1);
268307
final store = eg.store();
@@ -1037,14 +1076,46 @@ void main() {
10371076
checkPrecedes('so m', user1, user2);
10381077
});
10391078

1079+
test('email matched case-insensitively', () {
1080+
final user1 = eg.user(deliveryEmail: '[email protected]');
1081+
final user2 = eg.user(deliveryEmail: '[email protected]');
1082+
1083+
checkSameRank('[email protected]', user1, user2); // exact
1084+
checkSameRank('email@e', user1, user2); // local-part-exact-and-domain-prefix
1085+
checkSameRank('email@', user1, user2); // local-part-exact
1086+
checkSameRank('email', user1, user2); // local-part-exact
1087+
checkSameRank('ema', user1, user2); // prefix
1088+
});
1089+
1090+
test('email matching: when email has unexpected shape, no crash; just do a prefix match', () {
1091+
final user1 = eg.user(deliveryEmail: 'something weird');
1092+
final user2 = eg.user(deliveryEmail: '[email protected]');
1093+
checkSameRank('some', user1, user2);
1094+
});
1095+
1096+
test('email match: local-part-exact-and-domain-prefix over local-part-exact', () {
1097+
final user = eg.user(deliveryEmail: '[email protected]');
1098+
check(rankOf('email@z', user)!).isLessThan(rankOf('email', user)!);
1099+
});
1100+
1101+
test('email match: local-part-exact over prefix', () {
1102+
final user1 = eg.user(deliveryEmail: '[email protected]');
1103+
final user2 = eg.user(deliveryEmail: '[email protected]');
1104+
1105+
checkPrecedes('email', user1, user2);
1106+
});
1107+
10401108
test('full list of ranks', () {
1041-
final user1 = eg.user(fullName: 'some user');
1109+
final user1 = eg.user(fullName: 'some user', deliveryEmail: '[email protected]');
10421110
check([
10431111
rankOf('', WildcardMentionOption.all), // wildcard
10441112
rankOf('some user', user1), // user, exact name match
10451113
rankOf('some us', user1), // user, total-prefix name match
10461114
rankOf('so us', user1), // user, word-prefixes name match
1047-
]).deepEquals([0, 1, 2, 3]);
1115+
rankOf('email@e', user1), // user, local-part-exact-domain-prefix email match
1116+
rankOf('email', user1), // user, local-part-exact email match
1117+
rankOf('emai', user1), // user, prefix email match
1118+
]).deepEquals([0, 1, 2, 3, 4, 5, 6]);
10481119
});
10491120
});
10501121

0 commit comments

Comments
 (0)