|
| 1 | +package org.greencodeinitiative.tools.exporter.infra; |
| 2 | + |
| 3 | +import jakarta.json.Json; |
| 4 | +import jakarta.json.JsonMergePatch; |
| 5 | +import jakarta.json.JsonObject; |
| 6 | +import jakarta.json.JsonObjectBuilder; |
| 7 | +import jakarta.json.JsonReader; |
| 8 | +import jakarta.json.JsonValue; |
| 9 | +import jakarta.json.JsonWriter; |
| 10 | +import org.greencodeinitiative.tools.exporter.domain.Rule; |
| 11 | +import org.jsoup.Jsoup; |
| 12 | + |
| 13 | +import java.io.IOException; |
| 14 | +import java.nio.file.Files; |
| 15 | +import java.nio.file.Path; |
| 16 | +import java.nio.file.StandardCopyOption; |
| 17 | +import java.text.Normalizer; |
| 18 | +import java.util.List; |
| 19 | +import java.util.Locale; |
| 20 | +import java.util.Map; |
| 21 | +import java.util.Optional; |
| 22 | +import java.util.TreeMap; |
| 23 | +import java.util.stream.Collectors; |
| 24 | +import java.util.stream.Stream; |
| 25 | + |
| 26 | +import static java.lang.System.Logger.Level.DEBUG; |
| 27 | + |
| 28 | +public class MetadataWriter implements Runnable { |
| 29 | + private static final System.Logger LOGGER = System.getLogger("MetadataWriter"); |
| 30 | + |
| 31 | + private final Path sourceDir; |
| 32 | + private final Path targetDir; |
| 33 | + private final Path indexFilepath; |
| 34 | + private final Map<String, String> specificationInfo; |
| 35 | + private final int minTermLength; |
| 36 | + |
| 37 | + public MetadataWriter( |
| 38 | + Path sourceDir, |
| 39 | + Path targetDir, |
| 40 | + Map<String, String> specificationInfo, |
| 41 | + Path indexFilepath, |
| 42 | + Integer minTermLength |
| 43 | + ) { |
| 44 | + this.sourceDir = sourceDir; |
| 45 | + this.targetDir = targetDir; |
| 46 | + this.indexFilepath = indexFilepath; |
| 47 | + this.specificationInfo = specificationInfo; |
| 48 | + this.minTermLength = minTermLength; |
| 49 | + } |
| 50 | + |
| 51 | + @Override |
| 52 | + public void run() { |
| 53 | + var rulesMap = new TreeMap<String, JsonObjectBuilder>(); |
| 54 | + |
| 55 | + getResourcesToCopy().forEach(rule -> { |
| 56 | + var rulesByLanguage = rulesMap.computeIfAbsent(rule.ruleKey(), k -> Json.createObjectBuilder()); |
| 57 | + var resultMetadata = mergeOrCopyJsonMetadata(rule.metadata(), rule.specificMetadata(), rule.getMetadataTargetPath(targetDir)); |
| 58 | + |
| 59 | + var htmlDescriptionRelativePath = this.indexFilepath.getParent() |
| 60 | + .relativize(rule.getHtmlDescriptionTargetPath(targetDir)) |
| 61 | + .toString(); |
| 62 | + var resultMetadataBuilder = Json.createObjectBuilder(resultMetadata); |
| 63 | + resultMetadataBuilder.add("key", rule.ruleKey()); |
| 64 | + resultMetadataBuilder.add("language", rule.language()); |
| 65 | + resultMetadataBuilder.add("severity", resultMetadata.getString("defaultSeverity").toUpperCase(Locale.ENGLISH)); |
| 66 | + resultMetadataBuilder.add("htmlDescription", htmlDescriptionRelativePath); |
| 67 | + resultMetadataBuilder.add("terms", extractTermsFromHtmlFile(rule.htmlDescription())); |
| 68 | + |
| 69 | + rulesByLanguage.add(rule.language(), resultMetadataBuilder); |
| 70 | + copyFile(rule.htmlDescription(), rule.getHtmlDescriptionTargetPath(targetDir)); |
| 71 | + }); |
| 72 | + |
| 73 | + writeIndexFile(rulesMap); |
| 74 | + } |
| 75 | + |
| 76 | + private String extractTermsFromHtmlFile(Path htmlFile) { |
| 77 | + try { |
| 78 | + var textContent = Jsoup.parse(htmlFile).select("body").text(); |
| 79 | + return Stream |
| 80 | + .of( |
| 81 | + removeDiacritics(textContent) |
| 82 | + .toLowerCase(Locale.ENGLISH) |
| 83 | + .replaceAll("[^a-zA-Z0-9]", " ") |
| 84 | + .trim() |
| 85 | + .split("[\\s\\n\\r]+") |
| 86 | + ) |
| 87 | + .filter(term -> term.length() >= minTermLength) |
| 88 | + .distinct() |
| 89 | + .sorted() |
| 90 | + .collect(Collectors.joining(" ")); |
| 91 | + } catch (IOException e) { |
| 92 | + throw new ProcessException("Unable to parse HTML file: " + htmlFile, e); |
| 93 | + } |
| 94 | + } |
| 95 | + |
| 96 | + private String removeDiacritics(String text) { |
| 97 | + Normalizer.normalize(text, Normalizer.Form.NFKD); |
| 98 | + return text.replaceAll("[^\\p{ASCII}]", "").replaceAll("\\p{M}", ""); |
| 99 | + } |
| 100 | + |
| 101 | + private void writeIndexFile(TreeMap<String, JsonObjectBuilder> rulesMap) { |
| 102 | + if (indexFilepath == null) { |
| 103 | + return; |
| 104 | + } |
| 105 | + var rules = Json.createObjectBuilder(); |
| 106 | + rulesMap.forEach(rules::add); |
| 107 | + var result = Json.createObjectBuilder(); |
| 108 | + |
| 109 | + var specification = Json.createObjectBuilder(); |
| 110 | + this.specificationInfo.forEach(specification::add); |
| 111 | + result.add("specification", specification); |
| 112 | + |
| 113 | + result.add("rules", rules); |
| 114 | + |
| 115 | + try (var resultJsonWriter = Json.createWriter(Files.newBufferedWriter(indexFilepath))) { |
| 116 | + resultJsonWriter.write(result.build()); |
| 117 | + } catch (IOException e) { |
| 118 | + throw new ProcessException("cannot write file: " + indexFilepath, e); |
| 119 | + } |
| 120 | + } |
| 121 | + |
| 122 | + private List<Rule> getResourcesToCopy() { |
| 123 | + try (Stream<Path> stream = Files.walk(sourceDir)) { |
| 124 | + return stream |
| 125 | + .filter(Files::isRegularFile) |
| 126 | + .map(Rule::createFromHtmlDescription) |
| 127 | + .filter(Optional::isPresent) |
| 128 | + .map(Optional::get) |
| 129 | + .collect(Collectors.toList()); |
| 130 | + } catch (IOException e) { |
| 131 | + throw new IllegalStateException(e); |
| 132 | + } |
| 133 | + } |
| 134 | + |
| 135 | + private JsonObject mergeOrCopyJsonMetadata(Path source, Path merge, Path target) { |
| 136 | + try { |
| 137 | + Files.createDirectories(target.getParent()); |
| 138 | + } catch (IOException e) { |
| 139 | + throw new ProcessException("cannot create directory: " + target.getParent(), e); |
| 140 | + } |
| 141 | + if (Files.isRegularFile(merge)) { |
| 142 | + return mergeJsonFile(source, merge, target).asJsonObject(); |
| 143 | + } else { |
| 144 | + copyFile(source, target); |
| 145 | + try (JsonReader targetJsonReader = Json.createReader(Files.newBufferedReader(target))) { |
| 146 | + return targetJsonReader.readObject(); |
| 147 | + } catch (IOException e) { |
| 148 | + throw new ProcessException("cannot process source: " + source, e); |
| 149 | + } |
| 150 | + } |
| 151 | + } |
| 152 | + |
| 153 | + private JsonValue mergeJsonFile(Path source, Path merge, Path target) { |
| 154 | + LOGGER.log(DEBUG, "Merge: {0} and {1} -> {2}", source, merge, target); |
| 155 | + |
| 156 | + try ( |
| 157 | + JsonReader sourceJsonReader = Json.createReader(Files.newBufferedReader(source)); |
| 158 | + JsonReader mergeJsonReader = Json.createReader(Files.newBufferedReader(merge)); |
| 159 | + JsonWriter resultJsonWriter = Json.createWriter(Files.newBufferedWriter(target)); |
| 160 | + ) { |
| 161 | + Files.createDirectories(target.getParent()); |
| 162 | + |
| 163 | + JsonObject sourceJson = sourceJsonReader.readObject(); |
| 164 | + JsonObject mergeJson = mergeJsonReader.readObject(); |
| 165 | + |
| 166 | + JsonMergePatch mergePatch = Json.createMergePatch(mergeJson); |
| 167 | + JsonValue result = mergePatch.apply(sourceJson); |
| 168 | + |
| 169 | + resultJsonWriter.write(result); |
| 170 | + return result; |
| 171 | + } catch (IOException e) { |
| 172 | + throw new ProcessException("cannot process source: " + source, e); |
| 173 | + } |
| 174 | + } |
| 175 | + |
| 176 | + private void copyFile(Path source, Path target) { |
| 177 | + try { |
| 178 | + LOGGER.log(DEBUG, "Copy: {0} -> {1}", source, target); |
| 179 | + Files.copy(source, target, StandardCopyOption.REPLACE_EXISTING); |
| 180 | + } catch (IOException e) { |
| 181 | + throw new ProcessException("unable to copy '" + source + "' to '" + target + "'", e); |
| 182 | + } |
| 183 | + } |
| 184 | +} |
0 commit comments