Skip to content

Commit 0aa9a53

Browse files
authored
feat(lookup): update follow logic (#1307)
1 parent c00e375 commit 0aa9a53

File tree

17 files changed

+326
-209
lines changed

17 files changed

+326
-209
lines changed

.vscode/settings.json

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
11
{
22
"java.compile.nullAnalysis.mode": "automatic",
3-
"rust-analyzer.procMacro.ignored": { "napi-derive": ["napi"] }
3+
"rust-analyzer.procMacro.ignored": {
4+
"napi-derive": [
5+
"napi"
6+
]
7+
},
8+
"debug.javascript.defaultRuntimeExecutable": {
9+
"pwa-node": "/Users/tjnickerson/.local/share/mise/shims/node"
10+
},
11+
"python.defaultInterpreterPath": "${workspaceFolder}/.venv"
412
}

cli/src/lookup.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,9 @@ pub struct LookupArgs {
2828
#[arg(
2929
short = 'F',
3030
long,
31-
default_value_t = 0,
32-
help = "Number of redirects to follow via \"see also\" attributes. Use a high number like 999999 for infinite following (old behavior)."
31+
help = "Follow see_also redirects until finding an entry with etymologies"
3332
)]
34-
follow: u32,
33+
follow: bool,
3534

3635
#[arg(
3736
short,

cli/src/serve/lookup.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use crate::get_lookup_entries;
1616
#[derive(Debug, Deserialize)]
1717
pub struct LookupRequest {
1818
queries: String,
19-
follow: Option<u32>,
19+
follow: Option<bool>,
2020
split: Option<usize>,
2121
}
2222

@@ -87,7 +87,7 @@ async fn handle_lookup(
8787
name: dictionary_name.to_string(),
8888
})?;
8989

90-
let mut opts = LookupOptions::default().follow(follow.unwrap_or(0));
90+
let mut opts = LookupOptions::default().follow(follow.unwrap_or(false));
9191

9292
if split.is_some() {
9393
opts = opts.strategy(LookupStrategy::Split(split.unwrap()));

cli/src/serve/tokenize.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use serde::Deserialize;
1111
#[derive(Debug, Deserialize)]
1212
pub struct TokenizeRequest {
1313
text: String,
14-
follow: Option<u32>,
14+
follow: Option<bool>,
1515
}
1616

1717
#[derive(Debug, Display, Error)]
@@ -72,7 +72,7 @@ async fn handle_tokenize(
7272
name: dictionary_name.to_string(),
7373
})?;
7474

75-
let opts = TokenizeOptions::default().follow(follow.unwrap_or(0));
75+
let opts = TokenizeOptions::default().follow(follow.unwrap_or(false));
7676

7777
let tokens = dictionary
7878
.tokenize(&text, opts)

cli/src/tokenize.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,9 @@ pub struct TokenizeArgs {
2525
#[arg(
2626
short = 'F',
2727
long,
28-
default_value_t = 0,
29-
help = "Number of redirects to follow via \"see also\" attributes. Use a high number like 999999 for infinite following (old behavior)."
28+
help = "Follow see_also redirects until finding an entry with etymologies"
3029
)]
31-
follow: u32,
30+
follow: bool,
3231

3332
#[arg(
3433
short = 'i',

lib/src/core/lookup.rs

Lines changed: 91 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ pub enum LookupStrategy {
1212

1313
#[derive(Debug, Clone)]
1414
pub struct LookupOptions {
15-
/// Maximum number of redirects to follow via see_also links.
16-
/// None means no following, Some(u32::MAX) provides infinite following (old behavior).
17-
pub follow: Option<u32>,
15+
/// Whether to follow see_also links until finding an entry with etymologies.
16+
/// true means follow redirects until etymology found, false means no following.
17+
pub follow: bool,
1818
pub strategy: LookupStrategy,
1919
pub insensitive: bool,
2020
}
@@ -28,14 +28,14 @@ impl AsRef<LookupOptions> for LookupOptions {
2828
impl LookupOptions {
2929
pub fn default() -> Self {
3030
Self {
31-
follow: None,
31+
follow: false,
3232
strategy: LookupStrategy::Exact,
3333
insensitive: false,
3434
}
3535
}
3636

37-
pub fn follow(mut self, follow: u32) -> Self {
38-
self.follow = Some(follow);
37+
pub fn follow(mut self, follow: bool) -> Self {
38+
self.follow = follow;
3939
self
4040
}
4141

@@ -65,39 +65,51 @@ macro_rules! lookup {
6565
impl $tys {
6666
fn find_entry<'a>(
6767
&'a self,
68-
follow: &Option<u32>,
68+
follow: &bool,
6969
insensitive: &bool,
7070
query: &str,
7171
directed_from: Option<&'a $ret>,
72-
) -> $opt<LookupResult<&'a $ret>> {
72+
path: &mut Vec<String>,
73+
) -> crate::Result<$opt<LookupResult<&'a $ret>>> {
74+
// Check for redirect loop
75+
if path.contains(&query.to_string()) {
76+
// Build the loop chain from the path in order
77+
let mut chain = path.clone();
78+
chain.push(query.to_string());
79+
return Err(crate::Error::RedirectLoop(chain.join(" -> ")));
80+
}
81+
82+
// Add current query to path
83+
path.push(query.to_string());
84+
7385
// Try exact match first
7486
if let Some(entry) = self.entries.get(query) {
75-
// Follow an alias if it exists and we have redirects remaining
76-
if let Some(max_redirects) = follow {
77-
if *max_redirects > 0 {
78-
if let Option::Some(also) = &entry.see_also.as_ref() {
79-
if also.len() > 0 {
80-
// Decrement redirect count for recursive call
81-
let remaining_redirects = if *max_redirects == u32::MAX {
82-
Some(u32::MAX) // Keep infinite
83-
} else {
84-
Some(max_redirects - 1)
85-
};
86-
return self.find_entry(
87-
&remaining_redirects,
88-
insensitive,
89-
also,
90-
directed_from.or(Some(entry)),
91-
);
92-
}
87+
// Follow an alias if follow is true, entry has no etymologies, and see_also exists
88+
if *follow && entry.etymologies.is_empty() {
89+
if let Option::Some(also) = &entry.see_also.as_ref() {
90+
if also.len() > 0 {
91+
// Recursively follow the redirect
92+
let result = self.find_entry(
93+
follow,
94+
insensitive,
95+
also,
96+
directed_from.or(Some(entry)),
97+
path,
98+
);
99+
100+
path.pop();
101+
102+
return result;
93103
}
94104
}
95105
}
96106

97-
return $opt::Some(LookupResult {
107+
path.pop();
108+
109+
return Ok($opt::Some(LookupResult {
98110
entry,
99111
directed_from,
100-
});
112+
}));
101113
}
102114

103115
// If insensitive flag is true and exact match failed, try with lowercase
@@ -106,16 +118,20 @@ macro_rules! lookup {
106118

107119
// Only try lowercase if it's different from the original query
108120
if query_lower != query {
109-
// Try direct lookup with lowercase (reuse all the same logic)
110-
if let $opt::Some(result) =
111-
self.find_entry(follow, &false, &query_lower, directed_from)
121+
// Try direct lookup with lowercase (keep insensitive flag for redirect following)
122+
if let Ok($opt::Some(result)) =
123+
self.find_entry(follow, insensitive, &query_lower, directed_from, path)
112124
{
113-
return $opt::Some(result);
125+
path.pop();
126+
return Ok($opt::Some(result));
114127
}
115128
}
116129
}
117130

118-
$opt::None
131+
// Remove from path since we're not following any redirects
132+
path.pop();
133+
134+
Ok($opt::None)
119135
}
120136

121137
fn perform_lookup<'a, Options>(
@@ -132,38 +148,53 @@ macro_rules! lookup {
132148
insensitive,
133149
} = options.as_ref();
134150

135-
if let $opt::Some(result) = self.find_entry(follow, insensitive, query, None) {
136-
return Ok(vec![result]);
137-
}
138-
139-
let mut results: Vec<LookupResult<&$ret>> = Vec::new();
140-
141-
if let LookupStrategy::Split(min_length) = strategy {
142-
let chars: Vec<_> = query.chars().collect();
143-
let mut start = 0;
144-
let mut end = chars.len();
145-
146-
while start < end {
147-
let substr: String = chars[start..end].iter().collect();
148-
let maybe_entry =
149-
self.find_entry(follow, insensitive, substr.as_str(), None);
150-
151-
if maybe_entry.is_some() || substr.len() <= *min_length {
152-
start = end;
153-
end = chars.len();
151+
let mut path = Vec::new();
152+
153+
return match self.find_entry(follow, insensitive, query, None, &mut path)? {
154+
$opt::Some(result) => Ok(vec![result]),
155+
$opt::None => {
156+
let mut results: Vec<LookupResult<&$ret>> = Vec::new();
157+
158+
if let LookupStrategy::Split(min_length) = strategy {
159+
let chars: Vec<_> = query.chars().collect();
160+
let mut start = 0;
161+
let mut end = chars.len();
162+
163+
while start < end {
164+
let substr: String = chars[start..end].iter().collect();
165+
let mut substr_path = Vec::new();
166+
let maybe_entry = self.find_entry(
167+
follow,
168+
insensitive,
169+
substr.as_str(),
170+
None,
171+
&mut substr_path,
172+
);
173+
174+
match maybe_entry {
175+
Ok($opt::Some(result)) => {
176+
results.push(result);
177+
start = end;
178+
end = chars.len();
179+
continue;
180+
}
181+
Ok($opt::None) => {
182+
if substr.len() <= *min_length {
183+
start = end;
184+
end = chars.len();
185+
continue;
186+
}
187+
}
188+
Err(e) => return Err(e),
189+
}
154190

155-
if let $opt::Some(result) = maybe_entry {
156-
results.push(result);
191+
end -= 1;
157192
}
158-
159-
continue;
160193
}
161194

162-
end -= 1;
195+
Ok(results)
163196
}
164-
}
165-
166-
Ok(results)
197+
};
167198
}
168199

169200
pub fn lookup<'a, 'b, Query, Options>(

lib/src/error.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ pub enum Error {
5555
#[error("The input does not have a valid ODict file signature")]
5656
InvalidSignature,
5757

58+
#[error("Redirect loop detected: {0}")]
59+
RedirectLoop(String),
60+
5861
/* -------------------------------------------------------------------------- */
5962
/* Formatting */
6063
/* -------------------------------------------------------------------------- */

lib/src/tokenize.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ pub struct Token<T> {
2525

2626
#[derive(Default)]
2727
pub struct TokenizeOptions {
28-
/// Maximum number of redirects to follow via see_also links.
29-
/// 0 means no following, u32::MAX provides infinite following (old behavior).
30-
pub follow: u32,
28+
/// Whether to follow see_also links until finding an entry with etymologies.
29+
/// true means follow redirects until etymology found, false means no following.
30+
pub follow: bool,
3131
// The list of languages to be considered during tokenization. Defaults to all languages supported by whatlang.
3232
pub allow_list: Option<Vec<Language>>,
3333
pub insensitive: bool,
@@ -40,7 +40,7 @@ impl AsRef<TokenizeOptions> for TokenizeOptions {
4040
}
4141

4242
impl TokenizeOptions {
43-
pub fn follow(mut self, follow: u32) -> Self {
43+
pub fn follow(mut self, follow: bool) -> Self {
4444
self.follow = follow;
4545
self
4646
}

0 commit comments

Comments
 (0)