diff --git a/WEB-INF/struts-config.xml b/WEB-INF/struts-config.xml index 1e95fe48..e434cd27 100755 --- a/WEB-INF/struts-config.xml +++ b/WEB-INF/struts-config.xml @@ -222,6 +222,8 @@ + + @@ -610,6 +612,15 @@ We would like to remove getChildImages. Not needed now in the new UI + + + + rows = parseStream(inputStream, charSet); + + Set matchedTaxonNames = new HashSet<>(); + + for (ParsedRow row : rows) { + if (row.hasError) { + report.addResult(new ValidateSpeciesResultItem( + row.rowNum, row.rawLine, "", ValidateSpeciesResultItem.Status.FORMAT_ERROR, row.errorMsg, "")); + continue; + } + + // Normalization + String subfamily = normalize(row.subfamily); + String genus = normalizeCapitalized(row.genus); + String species = normalize(row.species); + String subspecies = normalize(row.subspecies); + + if (genus == null || species == null) { + report.addResult(new ValidateSpeciesResultItem( + row.rowNum, row.rawLine, "", ValidateSpeciesResultItem.Status.FORMAT_ERROR, "Genus and species are required.", "")); + continue; + } + + // Human-readable display name: "Genus species [subspecies]" + String displayName = genus + " " + species; + if (subspecies != null) displayName += " " + subspecies; + + // Resolve the internal DB taxon_name key. + // DB taxon_name is entirely lowercase (e.g. "dorylinaeaenictus clavatus atripennis"). + // When subfamily is known: key = subfamily + genus(lower) + " " + species [+ " " + subspecies] + // When subfamily is unknown: query the DB by genus+species columns to get the key. + String taxonName = null; + String genusLower = genus.toLowerCase(); + if (subfamily != null) { + taxonName = subfamily + genusLower + " " + species; + if (subspecies != null) taxonName += " " + subspecies; + } else { + taxonName = lookupTaxonName(genusLower, species, subspecies); + } + + // Lean DB lookup — only status + current_valid_name (avoids expensive image/bioregion queries) + TaxonLookupResult result = taxonName != null ? lookupTaxon(taxonName) : null; + + if (result != null) { + if ("valid".equals(result.status)) { + report.addResult(new ValidateSpeciesResultItem( + row.rowNum, row.rawLine, displayName, ValidateSpeciesResultItem.Status.EXACT_MATCH, "Exact match.", "")); + matchedTaxonNames.add(taxonName); + } else if ("fossil".equals(result.status)) { + report.addResult(new ValidateSpeciesResultItem( + row.rowNum, row.rawLine, displayName, ValidateSpeciesResultItem.Status.NOT_FOUND, "Fossil taxon — not an extant valid name.", "")); + } else if ("synonym".equals(result.status) || "homonym".equals(result.status)) { + // current_valid_name is an internal DB key (all lowercase, subfamily-prefixed). + // Resolve it to a human-readable display name. + String suggestion = resolveDisplayName(result.currentValidName); + report.addResult(new ValidateSpeciesResultItem( + row.rowNum, row.rawLine, displayName, ValidateSpeciesResultItem.Status.AMBIGUOUS, + "Matched a " + result.status + ".", suggestion != null ? suggestion : "")); + } else { + report.addResult(new ValidateSpeciesResultItem( + row.rowNum, row.rawLine, displayName, ValidateSpeciesResultItem.Status.AMBIGUOUS, + "Status is '" + result.status + "'. Expected 'valid'.", "")); + } + } else { + // Fuzzy match using display names for accurate distance calculation + String suggestion = getBestFuzzyMatch(displayName, genus); + report.addResult(new ValidateSpeciesResultItem( + row.rowNum, row.rawLine, displayName, ValidateSpeciesResultItem.Status.NOT_FOUND, "Taxon not found.", suggestion)); + } + } + + if (showUnmatched) { + populateUnmatched(report, matchedTaxonNames); + } + + return report; + } + + /** + * Minimal status-only DB lookup. Avoids TaxonDb.getTaxon() which fires + * expensive image / bioregion queries for every row in production. + */ + private TaxonLookupResult lookupTaxon(String taxonName) throws SQLException { + String q = "SELECT status, current_valid_name FROM taxon WHERE taxon_name = ?"; + try (PreparedStatement stmt = connection.prepareStatement(q)) { + stmt.setString(1, taxonName); + try (ResultSet rs = stmt.executeQuery()) { + if (rs.next()) { + return new TaxonLookupResult(rs.getString("status"), rs.getString("current_valid_name")); + } + } + } + return null; + } + + /** + * When no subfamily is provided (Format B without subfamily column), + * look up the taxon_name by querying genus + species + subspecies columns. + */ + private String lookupTaxonName(String genus, String species, String subspecies) throws SQLException { + String subspClause = subspecies != null ? " AND subspecies = ?" : " AND (subspecies IS NULL OR subspecies = '')"; + String q = "SELECT taxon_name FROM taxon WHERE genus = ? AND species = ?" + subspClause + + " AND status != 'synonym' AND taxarank IN ('species','subspecies') LIMIT 1"; + try (PreparedStatement stmt = connection.prepareStatement(q)) { + stmt.setString(1, genus.toLowerCase()); + stmt.setString(2, species); + if (subspecies != null) stmt.setString(3, subspecies); + try (ResultSet rs = stmt.executeQuery()) { + if (rs.next()) return rs.getString("taxon_name"); + } + } catch (SQLException e) { + s_log.warn("lookupTaxonName() genus:" + genus + " e:" + e.getMessage()); + } + return null; + } + + /** + * Resolves an internal DB taxon_name key to a human-readable display name + * by querying the DB for its genus/species/subspecies components. + * Returns "Genus species [subspecies]" or null if not found. + */ + private String resolveDisplayName(String internalTaxonName) { + if (internalTaxonName == null || internalTaxonName.isEmpty()) return null; + String q = "SELECT genus, species, subspecies FROM taxon WHERE taxon_name = ? LIMIT 1"; + try (PreparedStatement stmt = connection.prepareStatement(q)) { + stmt.setString(1, internalTaxonName); + try (ResultSet rs = stmt.executeQuery()) { + if (rs.next()) { + return buildDisplayName(rs.getString("genus"), rs.getString("species"), rs.getString("subspecies")); + } + } + } catch (SQLException e) { + s_log.warn("resolveDisplayName() failed for: " + internalTaxonName + " e:" + e.getMessage()); + } + return internalTaxonName; // fallback: return raw key + } + + /** + * Builds a human-readable display name from DB components. + * Capitalizes genus, returns "Genus species [subspecies]". + */ + private String buildDisplayName(String genus, String species, String subspecies) { + if (genus == null || genus.isEmpty()) return null; + String g = genus.substring(0, 1).toUpperCase() + genus.substring(1); + StringBuilder sb = new StringBuilder(g); + if (species != null && !species.isEmpty()) { + sb.append(" ").append(species); + } + if (subspecies != null && !subspecies.isEmpty()) { + sb.append(" ").append(subspecies); + } + return sb.toString(); + } + + private static class TaxonLookupResult { + final String status; + final String currentValidName; + TaxonLookupResult(String status, String currentValidName) { + this.status = status; + this.currentValidName = currentValidName; + } + } + + private String normalize(String s) { + if (s == null || s.trim().isEmpty()) return null; + return s.trim().toLowerCase(); + } + + private String normalizeCapitalized(String s) { + if (s == null || s.trim().isEmpty()) return null; + s = s.trim().toLowerCase(); + return s.substring(0, 1).toUpperCase() + s.substring(1); + } + + // Limits fuzzy matching to species sharing the same genus for performance. + // Queries genus/species/subspecies columns to build proper display names for comparison. + private String getBestFuzzyMatch(String displayName, String genus) { + if (genus == null) return "Check spelling."; + + List candidates = new ArrayList<>(); + // Query individual columns so we can build proper display names + String query = "SELECT genus, species, subspecies FROM taxon WHERE genus = ? AND status = 'valid'"; + + try (PreparedStatement stmt = connection.prepareStatement(query)) { + stmt.setString(1, genus.toLowerCase()); + try (ResultSet rs = stmt.executeQuery()) { + while (rs.next()) { + String candidate = buildDisplayName(rs.getString("genus"), rs.getString("species"), rs.getString("subspecies")); + if (candidate != null) candidates.add(candidate); + } + } + } catch (SQLException e) { + s_log.warn("Fuzzy match query failed: " + e.getMessage()); + return "Check spelling. (DB error)"; + } + + if (candidates.isEmpty()) return "No valid species found for genus " + genus + "."; + + int bestDistance = Integer.MAX_VALUE; + String bestMatch = null; + LevenshteinDistance ld = new LevenshteinDistance(); + + for (String candidate : candidates) { + int dist = ld.apply(displayName, candidate); + if (dist < bestDistance) { + bestDistance = dist; + bestMatch = candidate; + } + } + + if (bestDistance <= 4 && bestMatch != null) { + return bestMatch; + } + + return "Check spelling."; + } + + private void populateUnmatched(ValidateSpeciesReport report, Set matchedTaxa) { + String q = "select taxon_name from taxon where status = 'valid' and taxarank in ('species', 'subspecies') order by taxon_name"; + try (PreparedStatement stmt = connection.prepareStatement(q); + ResultSet rs = stmt.executeQuery()) { + while(rs.next()) { + String n = rs.getString(1); + if (!matchedTaxa.contains(n)) { + report.addUnmatchedValidTaxon(n); + } + } + } catch (SQLException e) { + s_log.warn("Failed retrieving unmatched: " + e.getMessage()); + } + } + + private List parseStream(InputStream is, String charset) throws ValidationParseException, IOException { + List results = new ArrayList<>(); + BufferedReader reader = new BufferedReader(new InputStreamReader(is, charset)); + + String headerLine = reader.readLine(); + if (headerLine == null) throw new ValidationParseException("File is empty."); + + // Handle BOM + if (headerLine.startsWith("\uFEFF")) { + headerLine = headerLine.substring(1); + } + + String[] headers = headerLine.toLowerCase().replace("\r", "").split("\t"); + boolean isOptionA = false; + boolean isOptionB = false; + + int subfamIdx = -1, genusIdx = -1, speciesIdx = -1, subspIdx = -1, taxonNameIdx = -1; + + for (int i=0; i 50000) { + throw new ValidationParseException("Maximum row limit of 50,000 exceeded. Stopping parse."); + } + + ParsedRow pr = new ParsedRow(); + pr.rowNum = rowNum; + pr.rawLine = line; + + String[] tokens = line.split("\t", -1); + pr.subfamily = safeGet(tokens, subfamIdx); + + if (isOptionA) { + pr.genus = safeGet(tokens, genusIdx); + pr.species = safeGet(tokens, speciesIdx); + pr.subspecies = safeGet(tokens, subspIdx); + + if (pr.genus == null || pr.species == null) { + pr.hasError = true; + pr.errorMsg = "Missing genus or species token."; + } + } else { + String taxNameRaw = safeGet(tokens, taxonNameIdx); + if (taxNameRaw == null || taxNameRaw.trim().isEmpty()) { + pr.hasError = true; pr.errorMsg = "Empty taxon_name."; + } else { + String[] nameParts = taxNameRaw.trim().split("\\s+"); + if (nameParts.length < 2 || nameParts.length > 3) { + pr.hasError = true; + pr.errorMsg = "taxon_name value '" + taxNameRaw + "' has " + nameParts.length + " tokens. Expected 'Genus species' or 'Genus species subspecies'."; + } else { + pr.genus = nameParts[0]; + pr.species = nameParts[1]; + if (nameParts.length == 3) { + pr.subspecies = nameParts[2]; + } + } + } + } + + results.add(pr); + } + + return results; + } + + private String safeGet(String[] t, int idx) { + if (idx == -1 || idx >= t.length) return null; + String val = t[idx].trim(); + return val.isEmpty() ? null : val; + } + + private static class ParsedRow { + int rowNum; + String rawLine; + String subfamily; + String genus; + String species; + String subspecies; + boolean hasError = false; + String errorMsg; + } +} diff --git a/src/org/calacademy/antweb/curate/speciesList/ValidateSpeciesListAction.java b/src/org/calacademy/antweb/curate/speciesList/ValidateSpeciesListAction.java new file mode 100644 index 00000000..1bac0a7e --- /dev/null +++ b/src/org/calacademy/antweb/curate/speciesList/ValidateSpeciesListAction.java @@ -0,0 +1,113 @@ +package org.calacademy.antweb.curate.speciesList; + +import java.io.IOException; +import java.io.InputStream; +import java.sql.Connection; +import java.sql.SQLException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import javax.sql.DataSource; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.struts.action.ActionForm; +import org.apache.struts.action.ActionForward; +import org.apache.struts.action.ActionMapping; +import org.apache.struts.upload.FormFile; +import org.calacademy.antweb.util.DBUtil; +import org.calacademy.antweb.util.Check; +import org.calacademy.antweb.ValidationParseException; + +public class ValidateSpeciesListAction extends SpeciesListSuperAction { + + private static final Log s_log = LogFactory.getLog(ValidateSpeciesListAction.class); + + @Override + public ActionForward execute(ActionMapping mapping, ActionForm form, + HttpServletRequest request, HttpServletResponse response) { + + ActionForward loginCheck = Check.login(request, mapping); + if (loginCheck != null) return loginCheck; + + ValidateSpeciesListForm toolForm = (ValidateSpeciesListForm) form; + FormFile uploadedFile = toolForm.getFile(); + + Connection connection = null; + try { + // Check downloads first, since they won't have the uploaded file! + if ("download".equals(toolForm.getAction()) || "downloadCorrected".equals(toolForm.getAction())) { + ValidateSpeciesReport cachedReport = (ValidateSpeciesReport) request.getSession().getAttribute("validationReport"); + if (cachedReport != null) { + response.setContentType("text/tab-separated-values"); + if ("downloadCorrected".equals(toolForm.getAction())) { + response.setHeader("Content-Disposition", "attachment; filename=\"corrected_species_list_for_upload.tsv\""); + response.getWriter().write(cachedReport.generateCorrectedTsvReport()); + } else { + response.setHeader("Content-Disposition", "attachment; filename=\"validation_report.tsv\""); + response.getWriter().write(cachedReport.generateTsvReport()); + } + return null; + } else { + request.setAttribute("message", "Session expired or no report found. Please re-validate your file."); + return mapping.findForward("validateSpeciesList"); + } + } + + // Initial render or empty submission + if (uploadedFile == null || uploadedFile.getFileSize() == 0) { + return mapping.findForward("validateSpeciesList"); + } + + String filename = uploadedFile.getFileName().toLowerCase(); + if (!filename.endsWith(".txt") && !filename.endsWith(".tsv")) { + request.setAttribute("message", "File must be a tab-delimited .txt or .tsv file."); + return mapping.findForward("validateSpeciesList"); + } + + // 5MB max + if (uploadedFile.getFileSize() > (5 * 1024 * 1024)) { + request.setAttribute("message", "File is too large. Maximum size is 5MB (approx 50,000 rows)."); + return mapping.findForward("validateSpeciesList"); + } + + InputStream fileStream = uploadedFile.getInputStream(); + + DataSource ds = getDataSource(request, "longConPool"); + connection = DBUtil.getConnection(ds, "ValidateSpeciesListAction.execute()"); + if (connection == null) { + request.setAttribute("message", "Could not obtain database connection."); + return mapping.findForward("validateSpeciesList"); + } + + // Enforce strictly read-only mode to prevent ALL data modifications. + connection.setReadOnly(true); + + SpeciesListValidator validator = new SpeciesListValidator(connection); + ValidateSpeciesReport report = validator.validate(fileStream, "UTF-8", toolForm.isShowUnmatched()); + + request.getSession().setAttribute("validationReport", report); + request.setAttribute("validationReport", report); + return mapping.findForward("validateSpeciesList"); + + } catch (ValidationParseException e) { + s_log.warn("ValidateSpeciesListAction parse error: " + e.getMessage()); + request.setAttribute("message", e.getMessage()); + return mapping.findForward("validateSpeciesList"); + } catch (SQLException e) { + s_log.error("ValidateSpeciesListAction DB error: " + e); + request.setAttribute("message", "Database error occurred during validation: " + e.getMessage()); + return mapping.findForward("validateSpeciesList"); + } catch (IOException e) { + s_log.error("ValidateSpeciesListAction IO error: " + e); + request.setAttribute("message", "Error reading uploaded file: " + e.getMessage()); + return mapping.findForward("validateSpeciesList"); + } finally { + if (connection != null) { + try { + connection.setReadOnly(false); // Reset to default pool state + } catch (SQLException e) { s_log.error("Failed to reset connection readOnly status", e); } + } + DBUtil.close(connection, this, "ValidateSpeciesListAction.execute()"); + } + } +} diff --git a/src/org/calacademy/antweb/curate/speciesList/ValidateSpeciesListForm.java b/src/org/calacademy/antweb/curate/speciesList/ValidateSpeciesListForm.java new file mode 100644 index 00000000..2a4bd288 --- /dev/null +++ b/src/org/calacademy/antweb/curate/speciesList/ValidateSpeciesListForm.java @@ -0,0 +1,43 @@ +package org.calacademy.antweb.curate.speciesList; + +import javax.servlet.http.HttpServletRequest; +import org.apache.struts.action.ActionForm; +import org.apache.struts.action.ActionMapping; +import org.apache.struts.upload.FormFile; + +public class ValidateSpeciesListForm extends ActionForm { + private FormFile file; + private String action; + private boolean showUnmatched; + + public FormFile getFile() { + return file; + } + + public void setFile(FormFile file) { + this.file = file; + } + + public String getAction() { + return action; + } + + public void setAction(String action) { + this.action = action; + } + + public boolean isShowUnmatched() { + return showUnmatched; + } + + public void setShowUnmatched(boolean showUnmatched) { + this.showUnmatched = showUnmatched; + } + + @Override + public void reset(ActionMapping mapping, HttpServletRequest request) { + this.file = null; + this.action = null; + this.showUnmatched = false; + } +} diff --git a/src/org/calacademy/antweb/curate/speciesList/ValidateSpeciesReport.java b/src/org/calacademy/antweb/curate/speciesList/ValidateSpeciesReport.java new file mode 100644 index 00000000..40207d0b --- /dev/null +++ b/src/org/calacademy/antweb/curate/speciesList/ValidateSpeciesReport.java @@ -0,0 +1,110 @@ +package org.calacademy.antweb.curate.speciesList; + +import java.util.ArrayList; +import java.util.List; + +public class ValidateSpeciesReport { + private final List exactMatches = new ArrayList<>(); + private final List problems = new ArrayList<>(); + private final List formatErrors = new ArrayList<>(); + + // For "Show Unmatched" feature + private final List unmatchedValidTaxa = new ArrayList<>(); + + private int totalInputRows = 0; + + // Limits + private boolean rowLimitExceeded = false; + + public void addResult(ValidateSpeciesResultItem item) { + totalInputRows++; + if (item.getStatus() == ValidateSpeciesResultItem.Status.EXACT_MATCH) { + exactMatches.add(item); + } else if (item.getStatus() == ValidateSpeciesResultItem.Status.FORMAT_ERROR) { + formatErrors.add(item); + } else { + problems.add(item); + } + } + + public void addUnmatchedValidTaxon(String taxonName) { + this.unmatchedValidTaxa.add(taxonName); + } + + public List getExactMatches() { return exactMatches; } + public List getProblems() { return problems; } + public List getFormatErrors() { return formatErrors; } + public List getUnmatchedValidTaxa() { return unmatchedValidTaxa; } + + public int getTotalInputRows() { return totalInputRows; } + public int getExactMatchCount() { return exactMatches.size(); } + public int getProblemCount() { return problems.size(); } + public int getFormatErrorCount() { return formatErrors.size(); } + public int getUnmatchedCount() { return unmatchedValidTaxa.size(); } + + public void setRowLimitExceeded(boolean exceeded) { this.rowLimitExceeded = exceeded; } + public boolean isRowLimitExceeded() { return rowLimitExceeded; } + + public String generateTsvReport() { + StringBuilder sb = new StringBuilder(); + sb.append("Row\tInput Raw\tNormalized Taxon Name\tStatus\tMessage\tSuggestion\n"); + + List all = new ArrayList<>(); + all.addAll(formatErrors); + all.addAll(problems); + all.addAll(exactMatches); + + // Sort by row number + all.sort((a, b) -> Integer.compare(a.getRowNum(), b.getRowNum())); + + for (ValidateSpeciesResultItem item : all) { + // Sanitize rawLine: replace embedded tabs to avoid corrupting TSV columns + String safeRaw = item.getInputRaw().replace("\t", " "); + sb.append(item.getRowNum()).append("\t") + .append(safeRaw).append("\t") + .append(item.getNormalizedName()).append("\t") + .append(item.getStatus().name()).append("\t") + .append(item.getMessage()).append("\t") + .append(item.getSuggestion()).append("\n"); + } + + if (!unmatchedValidTaxa.isEmpty()) { + sb.append("\n\n--- UNMATCHED VALID ANTWEB TAXA ---\n"); + sb.append("Taxon Name\n"); + for (String t : unmatchedValidTaxa) { + sb.append(t).append("\n"); + } + } + + return sb.toString(); + } + + public String generateCorrectedTsvReport() { + StringBuilder sb = new StringBuilder(); + sb.append("taxon_name\n"); + + List all = new ArrayList<>(); + all.addAll(formatErrors); + all.addAll(problems); + all.addAll(exactMatches); + + // Sort by row number + all.sort((a, b) -> Integer.compare(a.getRowNum(), b.getRowNum())); + + for (ValidateSpeciesResultItem item : all) { + String taxonStr = ""; + if (item.getStatus() == ValidateSpeciesResultItem.Status.EXACT_MATCH) { + taxonStr = item.getNormalizedName(); + } else if (item.getSuggestion() != null && !item.getSuggestion().isEmpty() + && Character.isUpperCase(item.getSuggestion().charAt(0))) { + // Only include suggestions that look like actual taxon names (start with uppercase genus). + // Excludes messages like "Check spelling.", "No valid species found...", etc. + taxonStr = item.getSuggestion(); + } + if (!taxonStr.isEmpty()) { + sb.append(taxonStr).append("\n"); + } + } + return sb.toString(); + } +} diff --git a/src/org/calacademy/antweb/curate/speciesList/ValidateSpeciesResultItem.java b/src/org/calacademy/antweb/curate/speciesList/ValidateSpeciesResultItem.java new file mode 100644 index 00000000..0b145e92 --- /dev/null +++ b/src/org/calacademy/antweb/curate/speciesList/ValidateSpeciesResultItem.java @@ -0,0 +1,28 @@ +package org.calacademy.antweb.curate.speciesList; + +public final class ValidateSpeciesResultItem { + public enum Status { EXACT_MATCH, NOT_FOUND, FORMAT_ERROR, AMBIGUOUS } + + private final int rowNum; + private final String inputRaw; + private final String normalizedName; + private final Status status; + private final String message; + private final String suggestion; + + public ValidateSpeciesResultItem(int rowNum, String inputRaw, String normalizedName, Status status, String message, String suggestion) { + this.rowNum = rowNum; + this.inputRaw = inputRaw != null ? inputRaw : ""; + this.normalizedName = normalizedName != null ? normalizedName : ""; + this.status = status; + this.message = message != null ? message : ""; + this.suggestion = suggestion != null ? suggestion : ""; + } + + public int getRowNum() { return rowNum; } + public String getInputRaw() { return inputRaw; } + public String getNormalizedName() { return normalizedName; } + public Status getStatus() { return status; } + public String getMessage() { return message; } + public String getSuggestion() { return suggestion; } +} diff --git a/web/curate/curate-body.jsp b/web/curate/curate-body.jsp index 7f9f828a..2aa02624 100755 --- a/web/curate/curate-body.jsp +++ b/web/curate/curate-body.jsp @@ -489,6 +489,23 @@ Upload Curator File +<% if (accessLogin.isCurator()) { %> +
+
+
+

Read-Only Tools

+
+
+
+ +
+<% } %> + <% if (accessLogin.isDeveloper()) { %>
diff --git a/web/curate/speciesList/validateSpeciesList-body.jsp b/web/curate/speciesList/validateSpeciesList-body.jsp new file mode 100644 index 00000000..f5e0f4da --- /dev/null +++ b/web/curate/speciesList/validateSpeciesList-body.jsp @@ -0,0 +1,178 @@ +<%@ page language="java" %> +<%@ page errorPage = "/error.jsp" %> +<%@ page import="java.util.*" %> +<%@ page import="org.calacademy.antweb.*" %> +<%@ page import="org.calacademy.antweb.util.*" %> +<%@ page import="org.calacademy.antweb.curate.speciesList.*" %> + +<%@ taglib uri="/WEB-INF/struts-bean.tld" prefix="bean" %> +<%@ taglib uri="/WEB-INF/struts-html.tld" prefix="html" %> +<%@ taglib uri="/WEB-INF/struts-logic.tld" prefix="logic" %> + + + + +<% + String domainApp = AntwebProps.getDomainApp(); + String message = (String) request.getAttribute("message"); + ValidateSpeciesReport report = (ValidateSpeciesReport) request.getAttribute("validationReport"); +%> + +
+ +

Validate Species List

+
+ Read-Only Validator. Safe to use for reconciliation workflows. No database modifications will occur. +
+ + <% if (message != null && !message.isEmpty()) { %> +
+ <%= message %> +
+ <% } %> + +
+
+

Validation Instructions

+

Upload a tab-delimited .txt or .tsv file. Maximum file size: 5MB (approx. 50,000 rows). The first row must contain column headers.

+ +

Two formats are supported:

+ +
+ Format 1 (Separate Columns): Required headers: genus, species. Optional: subfamily, subspecies.
+
+ subfamily genus species subspecies
+ Myrmicinae Acromyrmex balzani multituber
+ Dorylinae Aenictus clavatus atripennis +
+ + Format 2 (Combined Taxon): Required header: taxon_name (case-insensitive).
+
+ subfamily taxon_name
+ Myrmicinae Acromyrmex balzani multituber
+ Dorylinae Aenictus clavatus atripennis +
+
+ +
+

Download Template File

+
+
+ +
+
+

Upload File for Validation

+ + + + +
+ +
+ +
+ + +
+ +
+ +
+
+
+
+
+ + + <% if (report != null) { %> +
+

Validation Report Summary

+ + + + + + <% if (validateSpeciesListForm.isShowUnmatched()) { %> + + <% } %> +
Total Rows Processed<%= report.getTotalInputRows() %>
Exact Matches<%= report.getExactMatchCount() %>
Not Found / Ambiguous<%= report.getProblemCount() %>
Format Errors<%= report.getFormatErrorCount() %>
Unmatched AntWeb Taxa<%= report.getUnmatchedCount() %>
+ +
+ + + + + + + + + + +
+ + <% if (report.getProblemCount() > 0 || report.getFormatErrorCount() > 0) { %> +

Problems & Format Errors Log

+ + + + + + + + + + + <% + List issues = new ArrayList<>(); + issues.addAll(report.getFormatErrors()); + issues.addAll(report.getProblems()); + issues.sort((a, b) -> Integer.compare(a.getRowNum(), b.getRowNum())); + + for (ValidateSpeciesResultItem item : issues) { + String bg = item.getStatus() == ValidateSpeciesResultItem.Status.FORMAT_ERROR ? "#ffe6e6" : "#fff3cd"; + %> + + + + + + + <% if (item.getSuggestion() != null && !item.getSuggestion().isEmpty()) { %> + + <% } else { %> + + <% } %> + + <% } %> +
RowInput RawNormalized Taxon NameStatusMessageSuggestion
<%= item.getRowNum() %><%= item.getInputRaw() != null ? item.getInputRaw().replace("<", "<").replace(">", ">") : "" %><%= item.getNormalizedName() != null ? item.getNormalizedName() : "" %><%= item.getStatus().name() %><%= item.getMessage() %><%= item.getSuggestion() %> + <% if (item.getStatus() == ValidateSpeciesResultItem.Status.NOT_FOUND) { %> +
↑ Try replacing with this valid name.
+ <% } %> +
+ <% } %> + + <% if (report.getFormatErrorCount() == 0 && report.getProblemCount() == 0) { %> +
+ ✓ All rows matched valid extant taxa exactly. +
+ <% } %> + + <% if (validateSpeciesListForm.isShowUnmatched() && !report.getUnmatchedValidTaxa().isEmpty()) { %> +

Unmatched Valid AntWeb Taxa (Preview)

+
+ <% + int maxPreview = Math.min(100, report.getUnmatchedValidTaxa().size()); + for (int i=0; i"); + } + if (report.getUnmatchedValidTaxa().size() > 100) { + out.println("
... and " + (report.getUnmatchedValidTaxa().size() - 100) + " more (Download TSV for full list)"); + } + %> +
+ <% } %> + +
+ <% } %> + +
diff --git a/web/curate/speciesList/validateSpeciesList.jsp b/web/curate/speciesList/validateSpeciesList.jsp new file mode 100644 index 00000000..7ca6035a --- /dev/null +++ b/web/curate/speciesList/validateSpeciesList.jsp @@ -0,0 +1,14 @@ +<%@ page language="java" %> +<%@ page errorPage = "error.jsp" %> +<%@ taglib uri="/WEB-INF/struts-bean.tld" prefix="bean" %> +<%@ taglib uri="/WEB-INF/struts-html.tld" prefix="html" %> +<%@ taglib uri="/WEB-INF/struts-logic.tld" prefix="logic" %> +<%@ taglib uri="/WEB-INF/struts-tiles.tld" prefix="tiles" %> + +<%@include file="/curate/curatorCheck.jsp" %> +<%@include file="/common/antweb_admin-defs.jsp" %> + + + + + diff --git a/web/data/validateSpeciesList_template.txt b/web/data/validateSpeciesList_template.txt new file mode 100644 index 00000000..c95bc851 --- /dev/null +++ b/web/data/validateSpeciesList_template.txt @@ -0,0 +1,4 @@ +subfamily genus species subspecies +Myrmicinae Acromyrmex balzani multituber +Dorylinae Aenictus clavatus atripennis +Myrmicinae Atta cephalotes lutea