-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextract-issue.js
More file actions
101 lines (89 loc) · 3.83 KB
/
extract-issue.js
File metadata and controls
101 lines (89 loc) · 3.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
// Extraction script to be executed in browser context
(function() {
const data = {
title: document.querySelector('h1')?.textContent?.trim() || '',
metadata: {},
sections: {},
images: []
};
// Extract metadata from page text
const pageText = document.body.innerText;
const wcagMatch = pageText.match(/WCAG:\s*([\d.]+)/);
const severityMatch = pageText.match(/Severity:\s*(\w+)/);
const statusMatch = pageText.match(/Status:\s*(\w+)/);
const stageMatch = pageText.match(/Stage:\s*(\w+)/);
const flowMatch = pageText.match(/Flow:\s*([^\n]+)/);
const urlMatch = pageText.match(/Page URL:\s*([^\n]+)/);
const auditDateMatch = pageText.match(/Audit Date:\s*([^\n]+)/);
data.metadata = {
wcag: wcagMatch ? wcagMatch[1] : '',
severity: severityMatch ? severityMatch[1] : '',
status: statusMatch ? statusMatch[1] : '',
stage: stageMatch ? stageMatch[1] : '',
flow: flowMatch ? flowMatch[1].trim() : '',
pageUrl: urlMatch ? urlMatch[1].trim() : '',
auditDate: auditDateMatch ? auditDateMatch[1].trim() : ''
};
// Extract sections by finding h2 headings and their content
const headings = Array.from(document.querySelectorAll('h2'));
headings.forEach(h2 => {
const sectionName = h2.textContent.trim();
if (!sectionName || sectionName === 'Activity') return; // Skip Activity section
// For Audit Evidence, just note that images are included (handled separately)
if (sectionName === 'Audit Evidence') {
data.sections[sectionName] = 'See images section below.';
return;
}
let content = '';
let next = h2.nextElementSibling;
while (next && next.tagName !== 'H2' && next.tagName !== 'H1') {
if (next.tagName === 'P') {
content += next.textContent.trim() + '\n\n';
} else if (next.tagName === 'UL' || next.tagName === 'OL') {
const items = Array.from(next.querySelectorAll('li')).map(li => '- ' + li.textContent.trim());
content += items.join('\n') + '\n\n';
} else if (next.tagName === 'PRE' || (next.tagName === 'CODE' && next.parentElement?.tagName === 'PRE')) {
const codeText = next.textContent || (next.parentElement?.textContent || '');
content += '\n```\n' + codeText + '\n```\n\n';
} else if (next.tagName === 'CODE') {
content += '`' + next.textContent + '` ';
} else if (next.textContent.trim()) {
content += next.textContent.trim() + '\n\n';
}
next = next.nextElementSibling;
}
if (content.trim()) {
data.sections[sectionName] = content.trim();
}
});
// Extract images
const imageLinks = Array.from(document.querySelectorAll('a[href*="/api/gcs"]'));
imageLinks.forEach((link, index) => {
const img = link.querySelector('img');
// Try to get image name from link text or img alt, fallback to extracting from URL
let imageName = link.textContent.trim();
if (!imageName && img) {
imageName = img.getAttribute('alt') || '';
}
if (!imageName) {
// Extract filename from URL - look for evidence/ filename pattern
const urlMatch = link.getAttribute('href').match(/evidence%2F([^&]+)/);
imageName = urlMatch ? decodeURIComponent(urlMatch[1]) : `image-${index + 1}.png`;
}
// Clean up the name but preserve extension
const extMatch = imageName.match(/\.(png|jpg|jpeg|gif|svg|webp)$/i);
const extension = extMatch ? extMatch[0] : '.png';
imageName = imageName.replace(/\.[^.]+$/, '') + extension;
imageName = imageName.replace(/[^a-zA-Z0-9.-]/g, '_');
let imageUrl = link.getAttribute('href');
if (!imageUrl.startsWith('http')) {
imageUrl = window.location.origin + imageUrl;
}
data.images.push({
name: imageName,
url: imageUrl,
alt: img ? (img.getAttribute('alt') || '') : ''
});
});
return JSON.stringify(data);
})();