Skip to content

Commit b545699

Browse files
init. extract logo and favicon automatically
1 parent 0477ea9 commit b545699

File tree

7 files changed

+434
-40
lines changed

7 files changed

+434
-40
lines changed

backend/build.gradle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ dependencies {
4343
implementation group: 'org.apache.xmlgraphics', name: 'batik-transcoder', version: '1.17'
4444
implementation group: 'com.github.gotson', name: 'webp-imageio', version: '0.2.2'
4545

46+
implementation group: 'org.jsoup', name: 'jsoup', version: '1.18.1'
47+
4648
// For dotenv file
4749
implementation group: 'io.github.cdimascio', name: 'dotenv-java', version: '3.0.1'
4850

backend/src/main/java/io/easystartup/suggestfeature/dto/OrganizationRequest.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ public class OrganizationRequest {
1414
@NotBlank
1515
private String organizationSlug;
1616

17+
private String favicon;
18+
private String logo;
19+
private String websiteUrl;
20+
1721
public OrganizationRequest() {
1822
}
1923

@@ -32,4 +36,28 @@ public String getOrganizationSlug() {
3236
public void setOrganizationSlug(String organizationSlug) {
3337
this.organizationSlug = organizationSlug;
3438
}
39+
40+
public String getFavicon() {
41+
return favicon;
42+
}
43+
44+
public void setFavicon(String favicon) {
45+
this.favicon = favicon;
46+
}
47+
48+
public String getLogo() {
49+
return logo;
50+
}
51+
52+
public void setLogo(String logo) {
53+
this.logo = logo;
54+
}
55+
56+
public String getWebsiteUrl() {
57+
return websiteUrl;
58+
}
59+
60+
public void setWebsiteUrl(String websiteUrl) {
61+
this.websiteUrl = websiteUrl;
62+
}
3563
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package io.easystartup.suggestfeature.dto;
2+
3+
4+
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
5+
6+
/*
7+
* @author indianBond
8+
*/
9+
@JsonIgnoreProperties(ignoreUnknown = true)
10+
public class WebPageDetailsDTO {
11+
12+
private String url;
13+
14+
public WebPageDetailsDTO() {
15+
}
16+
17+
public String getUrl() {
18+
return url;
19+
}
20+
21+
public void setUrl(String url) {
22+
this.url = url;
23+
}
24+
}

backend/src/main/java/io/easystartup/suggestfeature/rest/admin/PagesRestApi.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ public Response editOrg(Organization organization) {
131131

132132
String returnToSiteUrl = organization.getReturnToSiteUrl();
133133
if (StringUtils.isNotBlank(returnToSiteUrl) && !returnToSiteUrl.startsWith("http")) {
134+
if (returnToSiteUrl.startsWith("mailto:")) {
135+
throw new UserVisibleException("Invalid return to site url");
136+
}
134137
returnToSiteUrl = "https://" + returnToSiteUrl;
135138
}
136139
existingOrg.setReturnToSiteUrl(returnToSiteUrl);

backend/src/main/java/io/easystartup/suggestfeature/rest/admin/UserRestApi.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@
77
import io.easystartup.suggestfeature.dto.CreateMemberRequest;
88
import io.easystartup.suggestfeature.dto.LoginResponse;
99
import io.easystartup.suggestfeature.dto.OrganizationRequest;
10+
import io.easystartup.suggestfeature.dto.WebPageDetailsDTO;
1011
import io.easystartup.suggestfeature.filters.UserContext;
1112
import io.easystartup.suggestfeature.filters.UserVisibleException;
1213
import io.easystartup.suggestfeature.services.AuthService;
1314
import io.easystartup.suggestfeature.services.ValidationService;
1415
import io.easystartup.suggestfeature.services.db.MongoTemplateFactory;
1516
import io.easystartup.suggestfeature.utils.JacksonMapper;
17+
import io.easystartup.suggestfeature.utils.WebPageExtractorUtil;
1618
import jakarta.validation.constraints.NotBlank;
1719
import jakarta.ws.rs.*;
1820
import jakarta.ws.rs.core.Response;
@@ -135,6 +137,29 @@ public Response fetchOrgsForUser() {
135137
return Response.ok(JacksonMapper.toJson(safeReturn)).build();
136138
}
137139

140+
@POST
141+
@Path("/fetch-web-page-details")
142+
@Consumes("application/json")
143+
@Produces("application/json")
144+
public Response details(WebPageDetailsDTO req) {
145+
String url = req.getUrl();
146+
if (req == null || url == null || url.isEmpty()) {
147+
throw new UserVisibleException("Invalid URL");
148+
}
149+
UserContext userContext = UserContext.current();
150+
String userId = userContext.getUserId();
151+
if (url.startsWith("http://") || url.startsWith("https://")) {
152+
// Do nothing
153+
} else {
154+
url = "https://" + url;
155+
}
156+
WebPageExtractorUtil.WebPageData pageData = WebPageExtractorUtil.getPageData(userId, null, url);
157+
if (pageData == null) {
158+
return Response.ok("{}").build();
159+
}
160+
return Response.ok(JacksonMapper.toJson(pageData)).build();
161+
}
162+
138163
@POST
139164
@Path("/create-org")
140165
@Consumes("application/json")
@@ -154,6 +179,20 @@ public Response createOrg(OrganizationRequest req) {
154179
organization.setSlug(req.getOrganizationSlug());
155180
organization.setName(req.getOrganizationName());
156181

182+
if (StringUtils.isNotBlank(req.getFavicon())) {
183+
organization.setFavicon(req.getFavicon());
184+
}
185+
if (StringUtils.isNotBlank(req.getLogo())) {
186+
organization.setLogo(req.getLogo());
187+
}
188+
String websiteUrl = req.getWebsiteUrl();
189+
if (StringUtils.isNotBlank(websiteUrl) && !websiteUrl.startsWith("http")) {
190+
websiteUrl = "https://" + websiteUrl;
191+
}
192+
if (StringUtils.isNotBlank(websiteUrl)) {
193+
organization.setReturnToSiteUrl(websiteUrl);
194+
}
195+
157196
Organization.SSOSettings ssoSettings = new Organization.SSOSettings();
158197
ssoSettings.setPrimaryKey(UUID.randomUUID().toString());
159198
ssoSettings.setSecondaryKey(UUID.randomUUID().toString());
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
package io.easystartup.suggestfeature.utils;
2+
3+
import io.easystartup.suggestfeature.loggers.Logger;
4+
import io.easystartup.suggestfeature.loggers.LoggerFactory;
5+
import org.apache.commons.lang3.StringUtils;
6+
import org.jsoup.Jsoup;
7+
import org.jsoup.nodes.Document;
8+
import org.jsoup.nodes.Element;
9+
import org.jsoup.select.Elements;
10+
11+
import java.io.IOException;
12+
import java.net.MalformedURLException;
13+
import java.net.URL;
14+
15+
/*
16+
* @author indianBond
17+
*/
18+
public class WebPageExtractorUtil {
19+
20+
private static final Logger LOGGER = LoggerFactory.getLogger(WebPageExtractorUtil.class);
21+
private static final String USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36";
22+
23+
public static WebPageData getPageData(String userId, String orgId, String url) {
24+
try {
25+
WebPageData webPageData = extractWebPageInfo(url);
26+
if (webPageData == null) {
27+
return null;
28+
}
29+
String favicon = webPageData.getFavicon();
30+
if (StringUtils.isNotBlank(favicon)) {
31+
// Save favicon to S3
32+
favicon = Util.uploadCopy(userId, orgId, favicon);
33+
}
34+
String logo = webPageData.getLogo();
35+
if (StringUtils.isNotBlank(logo)) {
36+
// Save logo to S3
37+
logo = Util.uploadCopy(userId, orgId, logo);
38+
}
39+
40+
return new WebPageData(favicon, webPageData.getTitle(), webPageData.getOrgName(), logo);
41+
} catch (Throwable e) {
42+
LOGGER.error("Error extracting web page info " + url, e);
43+
}
44+
return null;
45+
}
46+
47+
private static WebPageData extractWebPageInfo(String url) throws IOException {
48+
Document doc = null;
49+
try {
50+
doc = Jsoup.connect(url).userAgent(USER_AGENT).get();
51+
} catch (Exception e) {
52+
LOGGER.error("Error extracting web page info " + url, e);
53+
}
54+
55+
if (doc == null) {
56+
return null;
57+
}
58+
59+
// Extract favicon
60+
String favicon = extractFavicon(doc, url);
61+
62+
// Extract title
63+
String title = doc.title();
64+
65+
// Extract organization name
66+
String orgName = extractOrganizationName(doc);
67+
68+
// Extract logo
69+
String logo = extractLogo(doc, url);
70+
71+
return new WebPageData(favicon, title, orgName, logo);
72+
}
73+
74+
private static String extractFavicon(Document doc, String url) {
75+
Elements favicons = doc.select("link[rel~=^(shortcut )?icon]");
76+
if (!favicons.isEmpty()) {
77+
return favicons.first().absUrl("href");
78+
}
79+
// If no favicon is specified in HTML, try the default location
80+
try {
81+
return new URL(new URL(url), "/favicon.ico").toString();
82+
} catch (MalformedURLException e) {
83+
return null;
84+
}
85+
}
86+
87+
private static String extractOrganizationName(Document doc) {
88+
// Try to extract from meta tags
89+
String orgName = doc.select("meta[property=og:site_name]").attr("content");
90+
if (!orgName.isEmpty()) {
91+
return orgName;
92+
}
93+
94+
// If not found, try to extract from schema.org metadata
95+
Elements schema = doc.select("script[type=application/ld+json]");
96+
for (Element element : schema) {
97+
if (element.data().contains("\"name\"")) {
98+
// This is a simple extraction and might need more robust parsing for complex JSON
99+
String[] parts = element.data().split("\"name\":");
100+
if (parts.length > 1) {
101+
return parts[1].split("\"")[1];
102+
}
103+
}
104+
}
105+
106+
// If still not found, return the domain name as a fallback
107+
try {
108+
return new URL(doc.location()).getHost();
109+
} catch (Exception e) {
110+
return null;
111+
}
112+
}
113+
114+
private static String extractLogo(Document doc, String url) {
115+
// Try to extract from Open Graph meta tag
116+
String logo = doc.select("meta[property=og:image]").attr("content");
117+
if (!logo.isEmpty()) {
118+
return logo;
119+
}
120+
121+
// Try to extract from schema.org metadata
122+
Elements schema = doc.select("script[type=application/ld+json]");
123+
for (Element element : schema) {
124+
if (element.data().contains("\"logo\"")) {
125+
// This is a simple extraction and might need more robust parsing for complex JSON
126+
String[] parts = element.data().split("\"logo\":");
127+
if (parts.length > 1) {
128+
return parts[1].split("\"")[1];
129+
}
130+
}
131+
}
132+
133+
// If not found, try to find a prominent image
134+
Elements images = doc.select("img[src~=(?i)\\.(png|jpe?g)]");
135+
for (Element image : images) {
136+
if (image.hasAttr("alt") && image.attr("alt").toLowerCase().contains("logo")) {
137+
return image.absUrl("src");
138+
}
139+
}
140+
141+
return null;
142+
}
143+
144+
public static class WebPageData {
145+
private String favicon;
146+
private String title;
147+
private String orgName;
148+
private String logo;
149+
150+
public WebPageData(String favicon, String title, String orgName, String logo) {
151+
this.favicon = favicon;
152+
this.title = title;
153+
this.orgName = orgName;
154+
this.logo = logo;
155+
}
156+
157+
public String getFavicon() {
158+
return favicon;
159+
}
160+
161+
public String getTitle() {
162+
return title;
163+
}
164+
165+
public String getOrgName() {
166+
return orgName;
167+
}
168+
169+
public String getLogo() {
170+
return logo;
171+
}
172+
}
173+
}

0 commit comments

Comments
 (0)