diff --git a/.gitignore b/.gitignore
index bdbf179..0b24114 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,6 @@
target
target/*
*.pdf
-*.DS_STORE
\ No newline at end of file
+*.DS_STORE
+
+.idea
diff --git a/pom.xml b/pom.xml
index 04150f1..3c23455 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,33 +1,47 @@
-
- 4.0.0
- fr.ksahin
- blog
- 0.0.1-SNAPSHOT
- Blog
-
-
-
-
- net.sourceforge.htmlunit
- htmlunit
- 2.19
-
-
- com.fasterxml.jackson.core
- jackson-databind
- 2.9.8
-
-
-
- org.seleniumhq.selenium
- selenium-java
- 3.8.1
-
+
+ 4.0.0
+ fr.ksahin
+ blog
+ 0.0.1-SNAPSHOT
+ Blog
-
- com.github.detro
- phantomjsdriver
- 1.2.0
-
-
-
\ No newline at end of file
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.13.0
+
+ 23
+
+
+
+
+
+
+
+ net.sourceforge.htmlunit
+ htmlunit
+ 2.70.0
+
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ 2.18.0
+
+
+
+ org.seleniumhq.selenium
+ selenium-java
+ 3.8.1
+
+
+
+ com.github.detro
+ phantomjsdriver
+ 1.2.0
+
+
+
diff --git a/src/main/java/blog/article1/Item.java b/src/main/java/blog/article1/Item.java
deleted file mode 100644
index 661edfb..0000000
--- a/src/main/java/blog/article1/Item.java
+++ /dev/null
@@ -1,30 +0,0 @@
-package blog.article1;
-
-import java.math.BigDecimal;
-
-public class Item {
- private String title ;
- private BigDecimal price ;
- private String url ;
-
- public String getTitle() {
- return title;
- }
- public void setTitle(String title) {
- this.title = title;
- }
- public BigDecimal getPrice() {
- return price;
- }
- public void setPrice(BigDecimal price) {
- this.price = price;
- }
- public String getUrl() {
- return url;
- }
- public void setUrl(String url) {
- this.url = url;
- }
-
-
-}
diff --git a/src/main/java/blog/article1/WebScraper.java b/src/main/java/blog/article1/WebScraper.java
deleted file mode 100644
index 226bd48..0000000
--- a/src/main/java/blog/article1/WebScraper.java
+++ /dev/null
@@ -1,56 +0,0 @@
-package blog.article1;
-
-import java.math.BigDecimal;
-import java.net.URLEncoder;
-import java.util.List;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.gargoylesoftware.htmlunit.WebClient;
-import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
-import com.gargoylesoftware.htmlunit.html.HtmlElement;
-import com.gargoylesoftware.htmlunit.html.HtmlPage;
-
-public class WebScraper {
-
- public static void main(String[] args) {
-
- String searchQuery = "iphone 6s" ;
- String baseUrl = "https://newyork.craigslist.org/" ;
- WebClient client = new WebClient();
- client.getOptions().setCssEnabled(false);
- client.getOptions().setJavaScriptEnabled(false);
- try {
- String searchUrl = baseUrl + "search/sss?sort=rel&query=" + URLEncoder.encode(searchQuery, "UTF-8");
- HtmlPage page = client.getPage(searchUrl);
-
- List items = (List) page.getByXPath("//li[@class='result-row']") ;
- if(items.isEmpty()){
- System.out.println("No items found !");
- }else{
- for(HtmlElement htmlItem : items){
- HtmlAnchor itemAnchor = ((HtmlAnchor) htmlItem.getFirstByXPath(".//p[@class='result-info']/a"));
- HtmlElement spanPrice = ((HtmlElement) htmlItem.getFirstByXPath(".//a/span[@class='result-price']")) ;
-
- // It is possible that an item doesn't have any price, we set the price to 0.0 in this case
- String itemPrice = spanPrice == null ? "0.0" : spanPrice.asText() ;
-
- Item item = new Item();
- item.setTitle(itemAnchor.asText());
- item.setUrl( baseUrl + itemAnchor.getHrefAttribute());
-
- item.setPrice(new BigDecimal(itemPrice.replace("$", "")));
-
-
- ObjectMapper mapper = new ObjectMapper();
- String jsonString = mapper.writeValueAsString(item) ;
-
- System.out.println(jsonString);
- }
- }
- } catch(Exception e){
- e.printStackTrace();
- }
-
- }
-
-}
diff --git a/src/main/java/blog/article1/e1_plain/E1_Plain.java b/src/main/java/blog/article1/e1_plain/E1_Plain.java
new file mode 100644
index 0000000..4574b03
--- /dev/null
+++ b/src/main/java/blog/article1/e1_plain/E1_Plain.java
@@ -0,0 +1,39 @@
+package blog.article1.e1_plain;
+
+import com.gargoylesoftware.htmlunit.WebClient;
+import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
+import com.gargoylesoftware.htmlunit.html.HtmlElement;
+import com.gargoylesoftware.htmlunit.html.HtmlPage;
+
+import java.io.IOException;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+
+class E1_Plain {
+
+ public static void main(String[] args) throws IOException {
+ var searchQuery = "iphone 13";
+ var searchUrl = "https://newyork.craigslist.org/search/moa?query=%s".formatted(URLEncoder.encode(searchQuery, StandardCharsets.UTF_8));
+
+ System.out.println("searchUrl = " + searchUrl);
+
+ try (var client = new WebClient()) {
+ client.getOptions().setCssEnabled(false);
+ client.getOptions().setJavaScriptEnabled(false);
+ client.getOptions().setThrowExceptionOnFailingStatusCode(false);
+ client.getOptions().setThrowExceptionOnScriptError(false);
+
+ HtmlPage page = client.getPage(searchUrl);
+ for (var htmlItem : page.getByXPath("//li[contains(@class,'cl-static-search-result')]")) {
+ HtmlAnchor itemAnchor = htmlItem.getFirstByXPath(".//a");
+ HtmlElement itemTitle = htmlItem.getFirstByXPath(".//div[@class='title']");
+ HtmlElement itemPrice = htmlItem.getFirstByXPath(".//div[@class='price']");
+ HtmlElement itemLocation = htmlItem.getFirstByXPath(".//div[@class='location']");
+
+ if (itemAnchor != null && itemTitle != null) {
+ System.out.printf("Name: %s, Price: %s, Location: %s, URL: %s%n", itemTitle.asNormalizedText(), itemPrice.asNormalizedText(), (itemLocation == null) ? "N/A" : itemLocation.asNormalizedText(), itemAnchor.getHrefAttribute());
+ }
+ }
+ }
+ }
+}
diff --git a/src/main/java/blog/article1/e2_json/E2_ResultsAsJson.java b/src/main/java/blog/article1/e2_json/E2_ResultsAsJson.java
new file mode 100644
index 0000000..174849d
--- /dev/null
+++ b/src/main/java/blog/article1/e2_json/E2_ResultsAsJson.java
@@ -0,0 +1,52 @@
+package blog.article1.e2_json;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.gargoylesoftware.htmlunit.WebClient;
+import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
+import com.gargoylesoftware.htmlunit.html.HtmlElement;
+import com.gargoylesoftware.htmlunit.html.HtmlPage;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+
+class E2_ResultsAsJson {
+
+ private final static ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ public static void main(String[] args) throws IOException {
+ var searchQuery = "iphone 13";
+ var searchUrl = "https://newyork.craigslist.org/search/moa?query=%s".formatted(URLEncoder.encode(searchQuery, StandardCharsets.UTF_8));
+
+ System.out.println("searchUrl = " + searchUrl);
+
+ try (var client = new WebClient()) {
+ client.getOptions().setCssEnabled(false);
+ client.getOptions().setJavaScriptEnabled(false);
+ client.getOptions().setThrowExceptionOnFailingStatusCode(false);
+ client.getOptions().setThrowExceptionOnScriptError(false);
+
+ HtmlPage page = client.getPage(searchUrl);
+ for (var htmlItem : page.getByXPath("//li[contains(@class,'cl-static-search-result')]")) {
+ HtmlAnchor itemAnchor = htmlItem.getFirstByXPath(".//a");
+ HtmlElement itemTitle = htmlItem.getFirstByXPath(".//div[@class='title']");
+ HtmlElement itemPrice = htmlItem.getFirstByXPath(".//div[@class='price']");
+ HtmlElement itemLocation = htmlItem.getFirstByXPath(".//div[@class='location']");
+
+ if (itemAnchor != null && itemTitle != null) {
+ var itemName = itemTitle.asNormalizedText();
+ var itemUrl = itemAnchor.getHrefAttribute();
+ var itemPriceText = itemPrice.asNormalizedText();
+ var itemLocationText = (itemLocation == null) ? "N/A" : itemLocation.asNormalizedText();
+
+ var item = new Item(itemName, new BigDecimal(itemPriceText.replace("$", "").replace(",", ".")), itemLocationText, itemUrl);
+ System.out.println("item = " + OBJECT_MAPPER.writeValueAsString(item));
+ }
+ }
+ }
+ }
+
+ record Item(String title, BigDecimal price, String location, String url) {
+ }
+}
diff --git a/src/main/java/blog/article1/e3_multiple_cities/E3_MultipleCities.java b/src/main/java/blog/article1/e3_multiple_cities/E3_MultipleCities.java
new file mode 100644
index 0000000..34bc2ee
--- /dev/null
+++ b/src/main/java/blog/article1/e3_multiple_cities/E3_MultipleCities.java
@@ -0,0 +1,57 @@
+package blog.article1.e3_multiple_cities;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.gargoylesoftware.htmlunit.WebClient;
+import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
+import com.gargoylesoftware.htmlunit.html.HtmlElement;
+import com.gargoylesoftware.htmlunit.html.HtmlPage;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+
+class E3_MultipleCities {
+
+ private final static ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+
+ public static void main(String[] args) throws IOException {
+ var searchQuery = "iphone 13";
+ var cities = List.of("newyork", "boston", "washingtondc");
+
+ try (var client = new WebClient()) {
+ client.getOptions().setCssEnabled(false);
+ client.getOptions().setJavaScriptEnabled(false);
+ client.getOptions().setThrowExceptionOnFailingStatusCode(false);
+ client.getOptions().setThrowExceptionOnScriptError(false);
+
+ for (String city : cities) {
+ var searchUrl = "https://%s.craigslist.org/search/moa?query=%s".formatted(city, URLEncoder.encode(searchQuery, StandardCharsets.UTF_8));
+
+ System.out.println("searchUrl = " + searchUrl);
+
+ HtmlPage page = client.getPage(searchUrl);
+ for (var htmlItem : page.getByXPath("//li[contains(@class,'cl-static-search-result')]")) {
+ HtmlAnchor itemAnchor = htmlItem.getFirstByXPath(".//a");
+ HtmlElement itemTitle = htmlItem.getFirstByXPath(".//div[@class='title']");
+ HtmlElement itemPrice = htmlItem.getFirstByXPath(".//div[@class='price']");
+ HtmlElement itemLocation = htmlItem.getFirstByXPath(".//div[@class='location']");
+
+ if (itemAnchor != null && itemTitle != null) {
+ var itemName = itemTitle.asNormalizedText();
+ var itemUrl = itemAnchor.getHrefAttribute();
+ var itemPriceText = itemPrice.asNormalizedText();
+ var itemLocationText = (itemLocation == null) ? "N/A" : itemLocation.asNormalizedText();
+
+ var item = new Item(itemName, new BigDecimal(itemPriceText.replace("$", "").replace(",", ".")), itemLocationText, itemUrl);
+ System.out.println("item = " + OBJECT_MAPPER.writeValueAsString(item));
+ }
+ }
+ }
+ }
+ }
+
+ record Item(String title, BigDecimal price, String location, String url) {
+ }
+}
diff --git a/src/main/java/blog/article1/e4_multiple_outputs/E4_MultipleOutputTypes.java b/src/main/java/blog/article1/e4_multiple_outputs/E4_MultipleOutputTypes.java
new file mode 100644
index 0000000..ba4105f
--- /dev/null
+++ b/src/main/java/blog/article1/e4_multiple_outputs/E4_MultipleOutputTypes.java
@@ -0,0 +1,108 @@
+package blog.article1.e4_multiple_outputs;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.gargoylesoftware.htmlunit.WebClient;
+import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
+import com.gargoylesoftware.htmlunit.html.HtmlElement;
+import com.gargoylesoftware.htmlunit.html.HtmlPage;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+class E4_MultipleOutputTypes {
+
+ public static void main(String[] args) {
+ timed(() -> {
+ var outputType = args.length == 1 ? args[0].toLowerCase() : "";
+ var searchQuery = "iphone 13";
+ var cities = List.of("newyork", "boston", "washingtondc", "losangeles", "chicago", "sanfrancisco", "seattle", "miami", "dallas", "denver");
+
+ var results = fetchCities(cities, searchQuery);
+
+ switch (outputType) {
+ case "json" -> asJson(results);
+ case "csv" -> asCsv(results);
+ default -> System.out.println("unknown output type");
+ }
+ });
+ }
+
+ private static void timed(Runnable action) {
+ var start = System.currentTimeMillis();
+ action.run();
+ var end = System.currentTimeMillis();
+ System.out.printf("time = %dms%n", end - start);
+ }
+
+ private static void asJson(Map> results) {
+ var objectMapper = new ObjectMapper();
+ try {
+ System.out.println(objectMapper.writeValueAsString(results));
+ } catch (JsonProcessingException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static void asCsv(Map> results) {
+ System.out.println("city,title,price,location,url");
+ for (Map.Entry> entry : results.entrySet()) {
+ for (Item item : entry.getValue()) {
+ System.out.printf("%s,%s,%s,%s,%s%n", entry.getKey(), item.title, item.price, item.location, item.url);
+ }
+ }
+ }
+
+ private static Map> fetchCities(List cities, String searchQuery) {
+ try (var client = new WebClient()) {
+ client.getOptions().setCssEnabled(false);
+ client.getOptions().setJavaScriptEnabled(false);
+ client.getOptions().setThrowExceptionOnFailingStatusCode(false);
+ client.getOptions().setThrowExceptionOnScriptError(false);
+
+ Map> items = new HashMap<>();
+
+ for (String city : cities) {
+ var searchUrl = "https://%s.craigslist.org/search/moa?query=%s".formatted(city, URLEncoder.encode(searchQuery, StandardCharsets.UTF_8));
+
+ System.out.println("searchUrl = " + searchUrl);
+
+ try {
+ HtmlPage page = client.getPage(searchUrl);
+ for (var htmlItem : page.getByXPath("//li[contains(@class,'cl-static-search-result')]")) {
+ HtmlAnchor itemAnchor = htmlItem.getFirstByXPath(".//a");
+ HtmlElement itemTitle = htmlItem.getFirstByXPath(".//div[@class='title']");
+ HtmlElement itemPrice = htmlItem.getFirstByXPath(".//div[@class='price']");
+ HtmlElement itemLocation = htmlItem.getFirstByXPath(".//div[@class='location']");
+
+ if (itemAnchor != null && itemTitle != null) {
+ var itemName = itemTitle.asNormalizedText();
+ var itemUrl = itemAnchor.getHrefAttribute();
+ var itemPriceText = itemPrice.asNormalizedText();
+ var itemLocationText = (itemLocation == null) ? "N/A" : itemLocation.asNormalizedText();
+
+ items.computeIfAbsent(city, _ -> new ArrayList<>())
+ .add(new Item(itemName, new BigDecimal(itemPriceText.replace("$", "")
+ .replace(",", ".")), itemLocationText, itemUrl));
+ }
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ return items;
+ }
+ }
+
+ record Item(String title, BigDecimal price, String location, String url) {
+ }
+
+
+}
diff --git a/src/main/java/blog/article1/e5_parallel/E5_ParallelExecution.java b/src/main/java/blog/article1/e5_parallel/E5_ParallelExecution.java
new file mode 100644
index 0000000..92876d4
--- /dev/null
+++ b/src/main/java/blog/article1/e5_parallel/E5_ParallelExecution.java
@@ -0,0 +1,109 @@
+package blog.article1.e5_parallel;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.gargoylesoftware.htmlunit.WebClient;
+import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
+import com.gargoylesoftware.htmlunit.html.HtmlElement;
+import com.gargoylesoftware.htmlunit.html.HtmlPage;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.Executors;
+import java.util.stream.Collectors;
+
+class E5_ParallelExecution {
+
+ public static void main(String[] args) {
+ timed(() -> {
+ var outputType = args.length == 1 ? args[0].toLowerCase() : "";
+ var searchQuery = "iphone 13";
+ var cities = List.of("newyork", "boston", "washingtondc", "losangeles", "chicago", "sanfrancisco", "seattle", "miami", "dallas", "denver");
+
+ var results = fetchCities(cities, searchQuery);
+
+ switch (outputType) {
+ case "json" -> asJson(results);
+ case "csv" -> asCsv(results);
+ default -> System.out.println("unknown output type");
+ }
+ });
+ }
+
+ private static void asJson(Map> results) {
+ var objectMapper = new ObjectMapper();
+ try {
+ System.out.println(objectMapper.writeValueAsString(results));
+ } catch (JsonProcessingException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static void asCsv(Map> results) {
+ System.out.println("city,title,price,location,url");
+ for (Map.Entry> entry : results.entrySet()) {
+ for (Item item : entry.getValue()) {
+ System.out.printf("%s,%s,%s,%s,%s%n", entry.getKey(), item.title, item.price, item.location, item.url);
+ }
+ }
+ }
+
+ private static Map> fetchCities(List cities, String searchQuery) {
+ try (var client = new WebClient()) {
+ client.getOptions().setCssEnabled(false);
+ client.getOptions().setJavaScriptEnabled(false);
+ client.getOptions().setThrowExceptionOnFailingStatusCode(false);
+ client.getOptions().setThrowExceptionOnScriptError(false);
+
+ return cities.stream()
+ .map(city -> Map.entry(city, CompletableFuture.supplyAsync(() -> {
+ var searchUrl = "https://%s.craigslist.org/search/moa?query=%s".formatted(city, URLEncoder.encode(searchQuery, StandardCharsets.UTF_8));
+
+ System.out.println("fetching: " + searchUrl);
+
+ try {
+ var results = new ArrayList- ();
+ HtmlPage page = client.getPage(searchUrl);
+ for (var htmlItem : page.getByXPath("//li[contains(@class,'cl-static-search-result')]")) {
+ HtmlAnchor itemAnchor = htmlItem.getFirstByXPath(".//a");
+ HtmlElement itemTitle = htmlItem.getFirstByXPath(".//div[@class='title']");
+ HtmlElement itemPrice = htmlItem.getFirstByXPath(".//div[@class='price']");
+ HtmlElement itemLocation = htmlItem.getFirstByXPath(".//div[@class='location']");
+
+ if (itemAnchor != null && itemTitle != null) {
+ var itemName = itemTitle.asNormalizedText();
+ var itemUrl = itemAnchor.getHrefAttribute();
+ var itemPriceText = itemPrice.asNormalizedText();
+ var itemLocationText = (itemLocation == null) ? "N/A" : itemLocation.asNormalizedText();
+
+ var item = new Item(itemName, new BigDecimal(itemPriceText.replace("$", "").replace(",", ".")), itemLocationText, itemUrl);
+ results.add(item);
+ }
+ }
+ return results;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }, Executors.newVirtualThreadPerTaskExecutor())))
+ .toList()
+ .stream()
+ .collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().join()));
+ }
+ }
+
+ record Item(String title, BigDecimal price, String location, String url) {
+ }
+
+ private static void timed(Runnable action) {
+ var start = System.currentTimeMillis();
+ action.run();
+ var end = System.currentTimeMillis();
+ System.out.printf("time: %dms%n", end - start);
+ }
+}
diff --git a/src/main/java/blog/article3/BillDownloader.java b/src/main/java/blog/article3/BillDownloader.java
index 7bdb091..d05845f 100644
--- a/src/main/java/blog/article3/BillDownloader.java
+++ b/src/main/java/blog/article3/BillDownloader.java
@@ -17,7 +17,6 @@
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlTable;
import com.gargoylesoftware.htmlunit.html.HtmlTableRow;
-import com.gargoylesoftware.htmlunit.javascript.host.URL;
import blog.article2.Authenticator;
@@ -31,21 +30,21 @@ public static void main(String[] args) {
WebClient client = Authenticator.autoLogin(baseUrl + "/login", login, password);
HtmlPage page = client.getPage("https://cloud.digitalocean.com/settings/billing");
- if(page.asText().contains("You need to sign in for access to this page")){
+ if(page.asNormalizedText().contains("You need to sign in for access to this page")){
throw new Exception(String.format("Error during login on %s , check your credentials", baseUrl));
}
List bills = new ArrayList();
HtmlTable billsTable = (HtmlTable) page.getFirstByXPath("//table[@class='listing Billing--history']");
for(HtmlTableRow row : billsTable.getBodies().get(0).getRows()){
- String label = row.getCell(1).asText();
+ String label = row.getCell(1).asNormalizedText();
// We only want the invoice row, not the payment one
if(!label.contains("Invoice")){
continue ;
}
- Date date = new SimpleDateFormat("MMMM d, yyyy", Locale.ENGLISH).parse(row.getCell(0).asText());
- BigDecimal amount =new BigDecimal(row.getCell(2).asText().replace("$", ""));
+ Date date = new SimpleDateFormat("MMMM d, yyyy", Locale.ENGLISH).parse(row.getCell(0).asNormalizedText());
+ BigDecimal amount =new BigDecimal(row.getCell(2).asNormalizedText().replace("$", ""));
String url = ((HtmlAnchor) row.getCell(3).getFirstChild()).getHrefAttribute();
Bill bill = new Bill(label, amount, date, url);
diff --git a/src/main/java/blog/article6/SchemaScraper.java b/src/main/java/blog/article6/SchemaScraper.java
index b04e258..e1b9dec 100644
--- a/src/main/java/blog/article6/SchemaScraper.java
+++ b/src/main/java/blog/article6/SchemaScraper.java
@@ -5,7 +5,6 @@
import java.net.MalformedURLException;
import java.net.URL;
-import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.WebClient;
@@ -14,7 +13,7 @@
public class SchemaScraper {
- public static void main(String[] args) throws FailingHttpStatusCodeException, MalformedURLException, IOException {
+ public static void main(String[] args) throws FailingHttpStatusCodeException, IOException {
WebClient client = new WebClient();
client.getOptions().setCssEnabled(false);
client.getOptions().setJavaScriptEnabled(false);
@@ -27,10 +26,10 @@ public static void main(String[] args) throws FailingHttpStatusCodeException, Ma
.getAttribute("src"));
HtmlElement offers = ((HtmlElement) productNode.getFirstByXPath("./span[@itemprop='offers']"));
- BigDecimal price = new BigDecimal(((HtmlElement) offers.getFirstByXPath("./span[@itemprop='price']")).asText());
- String productName = (((HtmlElement) productNode.getFirstByXPath("./span[@itemprop='name']")).asText());
+ BigDecimal price = new BigDecimal(((HtmlElement) offers.getFirstByXPath("./span[@itemprop='price']")).asNormalizedText());
+ String productName = (((HtmlElement) productNode.getFirstByXPath("./span[@itemprop='name']")).asNormalizedText());
String currency = (((HtmlElement) offers.getFirstByXPath("./*[@itemprop='priceCurrency']")).getAttribute("content"));
- String productSKU = (((HtmlElement) productNode.getFirstByXPath("./span[@itemprop='sku']")).asText());
+ String productSKU = (((HtmlElement) productNode.getFirstByXPath("./span[@itemprop='sku']")).asNormalizedText());
Product product = new Product(price, productName, productSKU, imageUrl, currency);
ObjectMapper mapper = new ObjectMapper();