diff --git a/content.js b/content.js index c306474..5bcd3f5 100644 --- a/content.js +++ b/content.js @@ -5,28 +5,108 @@ chrome.runtime.onMessage.addListener(function (request, sender, sendResponse) { } else if (request.action === "downloadTable") { const tables = Array.from(document.querySelectorAll("table.wikitable")) const tableIndex = request.tableIndex + const includeUrls = request.includeUrls if (tableIndex >= 0 && tableIndex < tables.length) { - const csvContent = convertTableToCSV(tables[tableIndex]) - downloadCSV(csvContent, "table_" + (tableIndex + 1) + ".csv") + const csvContent = convertTableToCSV(tables[tableIndex], includeUrls) + const fileNameSuffix = includeUrls ? " - Table" + (tableIndex + 1) + " (with URLs)" : " - Table" + (tableIndex + 1) + downloadCSV(csvContent, document.title + fileNameSuffix + ".csv") } } }) -function convertTableToCSV(table) { +function convertTableToCSV(table, includeUrls) { const rows = Array.from(table.querySelectorAll("tr")) - const csvData = [] + if (rows.length === 0) { + return "" + } + + const finalCsvLines = [] + + const sanitize = (text) => { + // Ensure text is a string, convert null/undefined to empty string + let cellText = String(text == null ? "" : text).trim().replace(/\s\s+/g, " ") + + // Normalize and remove diacritics + cellText = cellText.normalize("NFD").replace(/[\u0300-\u036f]/g, "") + + // Quote if it contains double quotes, commas, or newlines + if (cellText.includes('"') || cellText.includes(',') || cellText.includes('\n') || cellText.includes('\r')) { + cellText = '"' + cellText.replace(/"/g, '""') + '"' + } + return cellText + } + + // Determine which original columns have links (needed for header and data rows) + const columnHasLink = [] + if (includeUrls && rows[0]) { + const firstRowCells = Array.from(rows[0].children) + for (let j = 0; j < firstRowCells.length; j++) { + let hasLinkInCurrentColumn = false + for (let i = 0; i < rows.length; i++) { + const cellsInRow = Array.from(rows[i].children) + if (cellsInRow[j] && cellsInRow[j].querySelector("a[href]")) { + hasLinkInCurrentColumn = true + break + } + } + columnHasLink[j] = hasLinkInCurrentColumn + } + } + + // 1. Generate Header Line + if (rows[0]) { + const headerLineCells = [] + const firstRowActualCells = Array.from(rows[0].children) + firstRowActualCells.forEach((cell, cellIndex) => { + const headerBaseText = String(cell.textContent == null ? "" : cell.textContent) + .trim() + .replace(/\s\s+/g, " ") + .normalize("NFD") + .replace(/[\u0300-\u036f]/g, "") + + headerLineCells.push(sanitize(headerBaseText)) - rows.forEach((row) => { - const rowData = [] - Array.from(row.children).forEach((cell) => { - const cellText = cell.textContent.trim().replace(/\s\s+/g, " ") - rowData.push(cellText) + if (includeUrls && columnHasLink[cellIndex]) { + headerLineCells.push(sanitize(headerBaseText + "_url")) + } }) - csvData.push(rowData.join(",")) + if (headerLineCells.length > 0) { + finalCsvLines.push(headerLineCells.join(",")) + } + } + + rows.slice(1).forEach((row) => { // Process rows starting from the second one + const dataLineCells = [] + const currentDataRowCells = Array.from(row.children) + currentDataRowCells.forEach((cell, cellIndex) => { + dataLineCells.push(sanitize(cell.textContent)) // Text of the data cell + + if (includeUrls && columnHasLink[cellIndex]) { + const link = cell.querySelector("a[href]") + let urlToPush = "" + if (link) { + let url = link.getAttribute("href") + if (url != null) { // Check for null or undefined explicitly + if (!url.startsWith("http") && !url.startsWith("//")) { + try { + url = new URL(url, document.baseURI).href + } catch (e) { + // Keep potentially problematic original if new URL() fails + } + } + urlToPush = url + } + } + dataLineCells.push(sanitize(urlToPush)) + } + }) + if (dataLineCells.length > 0) { + finalCsvLines.push(dataLineCells.join(",")) + } }) - return csvData.join("\n") + return finalCsvLines.join("\n") } function downloadCSV(content, filename) { diff --git a/popup.js b/popup.js index 5dd0d1b..35b62e7 100644 --- a/popup.js +++ b/popup.js @@ -7,10 +7,18 @@ document.addEventListener("DOMContentLoaded", function () { const button = document.createElement("button") button.textContent = "Download Table " + (index + 1) button.addEventListener("click", function () { - chrome.tabs.sendMessage(tabs[0].id, { action: "downloadTable", tableIndex: index }) + chrome.tabs.sendMessage(tabs[0].id, { action: "downloadTable", tableIndex: index, includeUrls: false }) + }) + + const buttonWithUrls = document.createElement("button") + buttonWithUrls.className = "with-urls" + buttonWithUrls.textContent = "Download Table " + (index + 1) + " (with URLs)" + buttonWithUrls.addEventListener("click", function () { + chrome.tabs.sendMessage(tabs[0].id, { action: "downloadTable", tableIndex: index, includeUrls: true }) }) tablesList.appendChild(button) + tablesList.appendChild(buttonWithUrls) }) }) }) diff --git a/styles.css b/styles.css index f548b34..75233b0 100644 --- a/styles.css +++ b/styles.css @@ -70,3 +70,8 @@ button:active { transform: translateY(2px); transition-duration: 0.35s; } + +button.with-urls { + background-color: #1e6e84; + box-shadow: rgba(38, 144, 173, 0.2) 0 6px 12px; +} \ No newline at end of file