Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
b02fbde
pr: migrate fetchers to Search.g4 ANTLR parser.
turhantolgaunal Aug 13, 2025
9892bec
pr: migrate fetchers to Search.g4 ANTLR parser.
turhantolgaunal Aug 14, 2025
d802521
pr: migrate fetchers to Search.g4 ANTLR parser.
turhantolgaunal Aug 14, 2025
a645b38
Added a new interface for search nodes in order to parse operators co…
turhantolgaunal Aug 17, 2025
44853c3
Added 2 new node types and changed SearchQueryNode to implement the n…
turhantolgaunal Aug 17, 2025
a643d22
Created a new visitor class for parsing the search syntax and modifie…
turhantolgaunal Aug 17, 2025
c890e92
Updated AbstractQueryTransformer to be more in line with the older co…
turhantolgaunal Aug 17, 2025
954e1a4
Updated ACMPortalFetcher to use the new parser and node logic
turhantolgaunal Aug 17, 2025
3977901
Updated ArxivPortalFetcher to use the new parser and node logic
turhantolgaunal Aug 17, 2025
60d2034
Updated BvbFetcher and BvbFetcherTest to use the new parser and node …
turhantolgaunal Aug 17, 2025
2f9ea97
Updated DBLPFetcher and DBLPQueryTransformerTest to use the new parse…
turhantolgaunal Aug 17, 2025
f3d40af
Updated DOABFetcher and DOABFetcherTest to use the new parser and nod…
turhantolgaunal Aug 17, 2025
184a721
Updated DOAJFetcher to use the new parser and node logic
turhantolgaunal Aug 18, 2025
3ce8865
Updated GoogleScholar and SearchBasedFetcherCapabilityTest to use the…
turhantolgaunal Aug 18, 2025
b28df9c
Updated GvkFetcher GvkFetcherTest and GVKQueryTransformerTest to use …
turhantolgaunal Aug 18, 2025
9c978dc
Updated IEEE and IEEEQueryTransformerTest to use Search.g4 based parser
turhantolgaunal Aug 18, 2025
60b6db9
Updated InfixTransformerTest to use Search.g4 based parser.
turhantolgaunal Aug 18, 2025
98aa35f
Updated INSPIREFetcher to use Search.g4 based parser.
turhantolgaunal Aug 18, 2025
efcd08f
Updated ISIDOREFetcher and ISIDOREFetcherTest to use Search.g4 based …
turhantolgaunal Aug 18, 2025
45245b8
Updated JstorFetcher to use Search.g4 based fetcher
turhantolgaunal Aug 18, 2025
ee311be
Updated LOBIDFetcher to use Search.g4 based fetcher
turhantolgaunal Aug 18, 2025
59175ac
Updated MathSciNet to use Search.g4 based fetcher
turhantolgaunal Aug 18, 2025
bf0be13
Changed general exceptions to more specific ones according to feedback
turhantolgaunal Aug 19, 2025
492456f
Removed null pointer exception
turhantolgaunal Aug 19, 2025
f561bbd
Removed null pointer exception
turhantolgaunal Aug 19, 2025
d7d4a04
Updated ScholarArchiveFetcher to use the ANTLR parser
turhantolgaunal Aug 19, 2025
9d8b24d
Updated SpringerFetcher to use the ANTLR parser
turhantolgaunal Aug 19, 2025
879e5c1
Updated rest of the classes needed for compiling
turhantolgaunal Aug 19, 2025
179b3a0
Updated ANTLR parser grammar and updated the transformer testers to b…
turhantolgaunal Aug 23, 2025
b78d8cc
Updated ZbMathQueryTransformerTest to throw the appropriate exception
turhantolgaunal Aug 23, 2025
1bf6d99
IEEEQueryTransformerTest wont check for the case where the string is …
turhantolgaunal Aug 23, 2025
a56066a
SearchBasedFetcher performSearch function now throws fetcher exceptio…
turhantolgaunal Aug 23, 2025
617703f
Merge branch 'main' into fetchertosearchg4
turhantolgaunal Aug 23, 2025
70988c1
Changed description for function getURLForQuery
turhantolgaunal Aug 23, 2025
e2f2ae2
Merge remote-tracking branch 'origin/fetchertosearchg4' into fetchert…
turhantolgaunal Aug 23, 2025
138746f
Changed how "not comparison expressions" are handled
turhantolgaunal Aug 23, 2025
e83c1fe
Added the change to CHANGELOG.md
turhantolgaunal Aug 24, 2025
a95b3d0
Merge branch 'main' into fetchertosearchg4
turhantolgaunal Aug 24, 2025
b8c190f
Update CHANGELOG.md
turhantolgaunal Aug 24, 2025
9f4953d
Reword
calixtus Aug 25, 2025
668a1d4
Merge branch 'main' into fetchertosearchg4
calixtus Aug 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,20 @@
import java.util.ArrayList;
import java.util.List;

import org.jabref.logic.search.query.SearchQueryExtractorVisitor;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.paging.Page;

import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser;
import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser;
import org.jabref.model.search.query.SearchQuery;
import org.jabref.model.search.query.SearchQueryNode;

public interface PagedSearchBasedFetcher extends SearchBasedFetcher {

/**
* @param luceneQuery the root node of the lucene query
* @param queryList the list that contains the parsed nodes
* @param pageNumber requested site number indexed from 0
* @return Page with search results
*/
Page<BibEntry> performSearchPaged(QueryNode luceneQuery, int pageNumber) throws FetcherException;
Page<BibEntry> performSearchPaged(List<SearchQueryNode> queryList, int pageNumber) throws FetcherException;

/**
* @param searchQuery query string that can be parsed into a lucene query
Expand All @@ -29,11 +27,11 @@ default Page<BibEntry> performSearchPaged(String searchQuery, int pageNumber) th
if (searchQuery.isBlank()) {
return new Page<>(searchQuery, pageNumber, List.of());
}
SyntaxParser parser = new StandardSyntaxParser();
final String NO_EXPLICIT_FIELD = "default";
SearchQuery searchQueryObject = new SearchQuery(searchQuery);
SearchQueryExtractorVisitor visitor = new SearchQueryExtractorVisitor(searchQueryObject.getSearchFlags());
try {
return this.performSearchPaged(parser.parse(searchQuery, NO_EXPLICIT_FIELD), pageNumber);
} catch (QueryNodeParseException e) {
return this.performSearchPaged(visitor.visitStart(searchQueryObject.getContext()), pageNumber);
} catch (Exception e) {
throw new FetcherException("An error occurred during parsing of the query.");
}
}
Expand All @@ -48,11 +46,11 @@ default int getPageSize() {
/**
* This method is used to send complex queries using fielded search.
*
* @param luceneQuery the root node of the lucene query
* @param queryList the list that contains the parsed nodes
* @return a list of {@link BibEntry}, which are matched by the query (may be empty)
*/
@Override
default List<BibEntry> performSearch(QueryNode luceneQuery) throws FetcherException {
return new ArrayList<>(performSearchPaged(luceneQuery, 0).getContent());
default List<BibEntry> performSearch(List<SearchQueryNode> queryList) throws FetcherException {
return new ArrayList<>(performSearchPaged(queryList, 0).getContent());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,20 @@

import org.jabref.model.entry.BibEntry;
import org.jabref.model.paging.Page;

import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.jabref.model.search.query.SearchQueryNode;

public interface PagedSearchBasedParserFetcher extends SearchBasedParserFetcher, PagedSearchBasedFetcher, ParserFetcher {

@Override
default Page<BibEntry> performSearchPaged(QueryNode luceneQuery, int pageNumber) throws FetcherException {
default Page<BibEntry> performSearchPaged(List<SearchQueryNode> queryList, int pageNumber) throws FetcherException {
// ADR-0014
URL urlForQuery;
try {
urlForQuery = getURLForQuery(luceneQuery, pageNumber);
urlForQuery = getURLForQuery(queryList, pageNumber);
} catch (URISyntaxException | MalformedURLException e) {
throw new FetcherException("Search URI crafted from complex search query is malformed", e);
}
return new Page<>(luceneQuery.toString(), pageNumber, getBibEntries(urlForQuery));
return new Page<>(queryList.toString(), pageNumber, getBibEntries(urlForQuery));
}

private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException {
Expand All @@ -41,18 +40,18 @@ private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException {
/**
* Constructs a URL based on the query, size and page number.
*
* @param luceneQuery the search query
* @param queryList the list that contains the parsed nodes
* @param pageNumber the number of the page indexed from 0
*/
URL getURLForQuery(QueryNode luceneQuery, int pageNumber) throws URISyntaxException, MalformedURLException;
URL getURLForQuery(List<SearchQueryNode> queryList, int pageNumber) throws URISyntaxException, MalformedURLException;

@Override
default URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException {
return getURLForQuery(luceneQuery, 0);
default URL getURLForQuery(List<SearchQueryNode> queryList) throws URISyntaxException, MalformedURLException {
return getURLForQuery(queryList, 0);
}

@Override
default List<BibEntry> performSearch(QueryNode luceneQuery) throws FetcherException {
return SearchBasedParserFetcher.super.performSearch(luceneQuery);
default List<BibEntry> performSearch(List<SearchQueryNode> queryList) throws FetcherException {
return SearchBasedParserFetcher.super.performSearch(queryList);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,10 @@

import java.util.List;

import org.jabref.logic.search.query.SearchQueryExtractorVisitor;
import org.jabref.model.entry.BibEntry;

import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser;
import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser;

import static org.jabref.logic.importer.fetcher.transformers.AbstractQueryTransformer.NO_EXPLICIT_FIELD;
import org.jabref.model.search.query.SearchQuery;
import org.jabref.model.search.query.SearchQueryNode;

/**
* Searches web resources for bibliographic information based on a free-text query.
Expand All @@ -23,30 +19,31 @@ public interface SearchBasedFetcher extends WebFetcher {
/**
* This method is used to send complex queries using fielded search.
*
* @param luceneQuery the root node of the lucene query
* @param queryList the list that contains the parsed nodes
* @return a list of {@link BibEntry}, which are matched by the query (may be empty)
*/
List<BibEntry> performSearch(QueryNode luceneQuery) throws FetcherException;
List<BibEntry> performSearch(List<SearchQueryNode> queryList) throws FetcherException;

/**
* Looks for hits which are matched by the given free-text query.
*
* @param searchQuery query string that can be parsed into a lucene query
* @param searchQuery query string that can be parsed into a search.g4 query
* @return a list of {@link BibEntry}, which are matched by the query (may be empty)
*/
default List<BibEntry> performSearch(String searchQuery) throws FetcherException {
if (searchQuery.isBlank()) {
return List.of();
}

SyntaxParser parser = new StandardSyntaxParser();
QueryNode queryNode;
SearchQuery searchQueryObject = new SearchQuery(searchQuery);
SearchQueryExtractorVisitor visitor = new SearchQueryExtractorVisitor(searchQueryObject.getSearchFlags());
List<SearchQueryNode> queryList;
try {
queryNode = parser.parse(searchQuery, NO_EXPLICIT_FIELD);
} catch (QueryNodeParseException e) {
queryList = visitor.visitStart(searchQueryObject.getContext());
} catch (Exception e) {
throw new FetcherException("An error occurred when parsing the query");
}

return this.performSearch(queryNode);
return this.performSearch(queryList);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
import java.util.List;

import org.jabref.model.entry.BibEntry;

import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.jabref.model.search.query.SearchQueryNode;

/**
* Provides a convenient interface for search-based fetcher, which follows the usual three-step procedure:
Expand Down Expand Up @@ -40,14 +39,14 @@ public interface SearchBasedParserFetcher extends SearchBasedFetcher, ParserFetc
* This method is necessary as the performSearch method does not support certain URL parameters that are used for
* fielded search, such as a title, author, or year parameter.
*
* @param luceneQuery the root node of the lucene query
* @param queryList the list that contains the parsed nodes
*/
@Override
default List<BibEntry> performSearch(QueryNode luceneQuery) throws FetcherException {
default List<BibEntry> performSearch(List<SearchQueryNode> queryList) throws FetcherException {
// ADR-0014
URL urlForQuery;
try {
urlForQuery = getURLForQuery(luceneQuery);
urlForQuery = getURLForQuery(queryList);
} catch (URISyntaxException | MalformedURLException | FetcherException e) {
throw new FetcherException("Search URI crafted from complex search query is malformed", e);
}
Expand Down Expand Up @@ -76,7 +75,7 @@ private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException {
/**
* Constructs a URL based on the lucene query.
*
* @param luceneQuery the root node of the lucene query
* @param queryList the list that contains the parsed nodes
*/
URL getURLForQuery(QueryNode luceneQuery) throws URISyntaxException, MalformedURLException, FetcherException;
URL getURLForQuery(List<SearchQueryNode> queryList) throws URISyntaxException, MalformedURLException, FetcherException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;
import java.util.Optional;

import org.jabref.logic.help.HelpFile;
import org.jabref.logic.importer.Parser;
import org.jabref.logic.importer.SearchBasedParserFetcher;
import org.jabref.logic.importer.fetcher.transformers.DefaultQueryTransformer;
import org.jabref.logic.importer.fileformat.ACMPortalParser;
import org.jabref.model.search.query.SearchQueryNode;

import org.apache.hc.core5.net.URIBuilder;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;

public class ACMPortalFetcher implements SearchBasedParserFetcher {

Expand All @@ -37,20 +37,25 @@ public Optional<HelpFile> getHelpPage() {
return Optional.of(HelpFile.FETCHER_ACM);
}

private static String createQueryString(QueryNode query) {
return new DefaultQueryTransformer().transformLuceneQuery(query).orElse("");
private static String createQueryString(List<SearchQueryNode> queryList) {
StringBuilder stringBuilder = new StringBuilder();
for (SearchQueryNode term : queryList) {
stringBuilder.append(term.term());
stringBuilder.append(" "); // Append a space as a delimiter
}
return stringBuilder.toString().trim();
}

/**
* Constructing the url for the searchpage.
*
* @param query query node
* @param queryList list that contains the parsed nodes
* @return query URL
*/
@Override
public URL getURLForQuery(QueryNode query) throws URISyntaxException, MalformedURLException {
public URL getURLForQuery(List<SearchQueryNode> queryList) throws URISyntaxException, MalformedURLException {
URIBuilder uriBuilder = new URIBuilder(SEARCH_URL);
uriBuilder.addParameter("AllField", createQueryString(query));
uriBuilder.addParameter("AllField", createQueryString(queryList));
return uriBuilder.build().toURL();
}

Expand Down
Loading