Skip to content

Commit b10037b

Browse files
committed
- Turing Connector Initial Commit
1 parent e2ab67d commit b10037b

File tree

21 files changed

+828
-106
lines changed

21 files changed

+828
-106
lines changed

turing-app/src/main/resources/application.properties

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,10 @@ spring.servlet.multipart.max-request-size=1024MB
9999

100100
## Others
101101
spring.mvc.async.request-timeout=3600000
102+
spring.output.ansi.enabled=always
102103

103104
jasypt.encryptor.bean=turEncryptor
104105

105-
spring.output.ansi.enabled=always
106-
107106
management.endpoints.web.exposure.include=*
108107

109108
#spring.security.oauth2.client.registration.keycloak.client-id=demo-app

turing-commons/src/main/java/com/viglet/turing/commons/file/TurFileAttributes.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import lombok.*;
2121

2222
import java.util.Date;
23+
import java.util.HashMap;
2324
import java.util.Map;
2425

2526
/**
@@ -41,9 +42,7 @@ public class TurFileAttributes {
4142
private String name;
4243
private String title;
4344
private String extension;
44-
private TurFileSize size;
45-
private Date lastModified;
46-
private Map<String, String> metadata;
47-
48-
45+
private TurFileSize size = new TurFileSize();
46+
private Date lastModified = new Date();
47+
private Map<String, String> metadata = new HashMap<>();
4948
}

turing-connector/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
/target/
2+
/connector-app/target/
3+
/connector-commons/target/
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
4+
<modelVersion>4.0.0</modelVersion>
5+
<parent>
6+
<artifactId>turing-connector</artifactId>
7+
<groupId>com.viglet.turing</groupId>
8+
<version>${revision}</version>
9+
</parent>
10+
11+
<artifactId>connector-app</artifactId>
12+
<name>Turing - Connector</name>
13+
14+
<properties>
15+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
16+
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
17+
<java.version>21</java.version>
18+
<maven.compiler.source>21</maven.compiler.source>
19+
<maven.compiler.target>21</maven.compiler.target>
20+
</properties>
21+
22+
<dependencies>
23+
<dependency>
24+
<groupId>ch.qos.logback</groupId>
25+
<artifactId>logback-classic</artifactId>
26+
</dependency>
27+
<dependency>
28+
<groupId>com.h2database</groupId>
29+
<artifactId>h2</artifactId>
30+
</dependency>
31+
<dependency>
32+
<groupId>org.projectlombok</groupId>
33+
<artifactId>lombok</artifactId>
34+
<optional>true</optional>
35+
</dependency>
36+
<dependency>
37+
<groupId>com.viglet.turing</groupId>
38+
<artifactId>turing-spring</artifactId>
39+
<version>${revision}</version>
40+
</dependency>
41+
<dependency>
42+
<groupId>com.google.inject</groupId>
43+
<artifactId>guice</artifactId>
44+
<version>7.0.0</version>
45+
<scope>compile</scope>
46+
</dependency>
47+
<dependency>
48+
<groupId>com.fasterxml.jackson.datatype</groupId>
49+
<artifactId>jackson-datatype-hibernate5-jakarta</artifactId>
50+
</dependency>
51+
<dependency>
52+
<groupId>com.viglet.turing</groupId>
53+
<artifactId>turing-java-sdk</artifactId>
54+
</dependency>
55+
<dependency>
56+
<groupId>com.viglet.turing</groupId>
57+
<artifactId>wc-commons</artifactId>
58+
<version>${revision}</version>
59+
</dependency>
60+
<dependency>
61+
<groupId>com.sezinkarli</groupId>
62+
<artifactId>random-user-agent-generator</artifactId>
63+
<version>1.3</version>
64+
</dependency>
65+
<dependency>
66+
<groupId>io.swagger.core.v3</groupId>
67+
<artifactId>swagger-annotations-jakarta</artifactId>
68+
<version>2.2.26</version>
69+
<scope>compile</scope>
70+
</dependency>
71+
<dependency>
72+
<groupId>org.springframework.boot</groupId>
73+
<artifactId>spring-boot-starter-data-jpa</artifactId>
74+
</dependency>
75+
<dependency>
76+
<groupId>org.springframework.boot</groupId>
77+
<artifactId>spring-boot-starter-web</artifactId>
78+
</dependency>
79+
<dependency>
80+
<groupId>org.springframework.boot</groupId>
81+
<artifactId>spring-boot-starter-test</artifactId>
82+
<scope>test</scope>
83+
</dependency>
84+
<dependency>
85+
<groupId>com.viglet.turing</groupId>
86+
<artifactId>connector-commons</artifactId>
87+
<version>0.3.9</version>
88+
<scope>compile</scope>
89+
</dependency>
90+
</dependencies>
91+
<dependencyManagement>
92+
<dependencies>
93+
<dependency>
94+
<groupId>org.springframework.boot</groupId>
95+
<artifactId>spring-boot-dependencies</artifactId>
96+
<version>${spring-boot.version}</version>
97+
<type>pom</type>
98+
<scope>import</scope>
99+
</dependency>
100+
</dependencies>
101+
</dependencyManagement>
102+
<build>
103+
<finalName>turing-connector</finalName>
104+
<pluginManagement>
105+
<plugins>
106+
<plugin>
107+
<groupId>org.apache.maven.plugins</groupId>
108+
<artifactId>maven-compiler-plugin</artifactId>
109+
<version>3.13.0</version>
110+
<configuration>
111+
<parameters>true</parameters>
112+
</configuration>
113+
</plugin>
114+
</plugins>
115+
</pluginManagement>
116+
<plugins>
117+
<plugin>
118+
<groupId>org.springframework.boot</groupId>
119+
<artifactId>spring-boot-maven-plugin</artifactId>
120+
<version>${spring-boot.version}</version>
121+
<configuration>
122+
<layout>ZIP</layout>
123+
<skip>false</skip>
124+
<executable>true</executable>
125+
</configuration>
126+
<executions>
127+
<execution>
128+
<goals>
129+
<goal>repackage</goal>
130+
</goals>
131+
</execution>
132+
</executions>
133+
</plugin>
134+
</plugins>
135+
</build>
136+
</project>
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
package com.viglet.turing.connector;
2+
3+
import com.fasterxml.jackson.databind.Module;
4+
import com.fasterxml.jackson.datatype.hibernate5.jakarta.Hibernate5JakartaModule;
5+
import lombok.extern.slf4j.Slf4j;
6+
import org.springframework.boot.SpringApplication;
7+
import org.springframework.boot.autoconfigure.SpringBootApplication;
8+
import org.springframework.boot.web.servlet.FilterRegistrationBean;
9+
import org.springframework.context.annotation.Bean;
10+
import org.springframework.scheduling.annotation.EnableScheduling;
11+
import org.springframework.web.filter.CharacterEncodingFilter;
12+
13+
@Slf4j
14+
@SpringBootApplication
15+
@EnableScheduling
16+
public class TurConnectorApplication {
17+
public static final String UTF_8 = "UTF-8";
18+
19+
public static void main(String[] args) {
20+
21+
log.info(":: Starting Turing Connector ...");
22+
SpringApplication.run(TurConnectorApplication.class, args);
23+
log.info(":: Started Turing Connector");
24+
}
25+
26+
@Bean
27+
FilterRegistrationBean<CharacterEncodingFilter> filterRegistrationBean() {
28+
FilterRegistrationBean<CharacterEncodingFilter> registrationBean = new FilterRegistrationBean<>();
29+
CharacterEncodingFilter characterEncodingFilter = new CharacterEncodingFilter();
30+
characterEncodingFilter.setForceEncoding(true);
31+
characterEncodingFilter.setEncoding(UTF_8);
32+
registrationBean.setFilter(characterEncodingFilter);
33+
return registrationBean;
34+
}
35+
@Bean
36+
Module hibernate5Module() {
37+
return new Hibernate5JakartaModule();
38+
}
39+
}
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/*
2+
*
3+
* Copyright (C) 2016-2024 the original author or authors.
4+
*
5+
* This program is free software: you can redistribute it and/or modify
6+
* it under the terms of the GNU General Public License as published by
7+
* the Free Software Foundation, either version 3 of the License, or
8+
* (at your option) any later version.
9+
*
10+
* This program is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
* GNU General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU General Public License
16+
* along with this program. If not, see <https://www.gnu.org/licenses/>.
17+
*/
18+
19+
package com.viglet.turing.connector;
20+
21+
import com.google.common.collect.Iterators;
22+
import com.google.inject.Inject;
23+
import com.viglet.turing.client.auth.credentials.TurApiKeyCredentials;
24+
import com.viglet.turing.client.sn.TurSNServer;
25+
import com.viglet.turing.client.sn.job.TurSNJobItem;
26+
import com.viglet.turing.client.sn.job.TurSNJobItems;
27+
import com.viglet.turing.client.sn.job.TurSNJobUtils;
28+
import com.viglet.turing.connector.plugin.TurConnectorPlugin;
29+
import lombok.extern.slf4j.Slf4j;
30+
import org.springframework.beans.factory.annotation.Value;
31+
import org.springframework.stereotype.Component;
32+
33+
import java.net.MalformedURLException;
34+
import java.net.URI;
35+
import java.util.*;
36+
37+
@Slf4j
38+
@Component
39+
public class TurConnectorProcess {
40+
public static final String ID_ATTR = "id";
41+
private final String turingUrl;
42+
private final String turingApiKey;
43+
private TurSNJobItems turSNJobItems = new TurSNJobItems();
44+
private final Set<String> visitedLinks = new HashSet<>();
45+
private final Queue<TurSNJobItem> queueLinks = new LinkedList<>();
46+
private final int timeout;
47+
private final int jobSize;
48+
@Inject
49+
public TurConnectorProcess(@Value("${turing.url}") String turingUrl,
50+
@Value("${turing.apiKey}") String turingApiKey,
51+
@Value("${turing.connector.timeout:5000}") int timeout,
52+
@Value("${turing.connector.job.size:50}") int jobSize) {
53+
this.turingUrl = turingUrl;
54+
this.turingApiKey = turingApiKey;
55+
this.timeout = timeout;
56+
this.jobSize = jobSize;
57+
}
58+
59+
public void start(TurConnectorPlugin plugin) {
60+
reset();
61+
TurSNJobItem currentItem = plugin.getNext();
62+
63+
if (currentItem != null) {
64+
queueLinks.offer(currentItem);
65+
getPagesFromQueue(plugin);
66+
}
67+
if (turSNJobItems.size() > 0) {
68+
sendToTuring();
69+
getInfoQueue();
70+
}
71+
}
72+
private void sendToTuring() {
73+
if (log.isDebugEnabled()) {
74+
for (TurSNJobItem turSNJobItem : turSNJobItems) {
75+
log.debug("TurSNJobItem Id: {}", turSNJobItem.getAttributes().get(ID_ATTR));
76+
}
77+
}
78+
try {
79+
TurSNJobUtils.importItems(turSNJobItems,
80+
new TurSNServer(URI.create(turingUrl).toURL(), null,
81+
new TurApiKeyCredentials(turingApiKey)),
82+
false);
83+
} catch (MalformedURLException e) {
84+
log.error(e.getMessage(), e);
85+
}
86+
87+
}
88+
private void reset() {
89+
turSNJobItems = new TurSNJobItems();
90+
visitedLinks.clear();
91+
}
92+
93+
private void getInfoQueue() {
94+
log.info("Total Job Item: {}", Iterators.size(turSNJobItems.iterator()));
95+
log.info("Total Visited Links: {}", (long) visitedLinks.size());
96+
log.info("Queue Size: {}", (long) queueLinks.size());
97+
}
98+
99+
public void getPagesFromQueue(TurConnectorPlugin plugin) {
100+
while (!queueLinks.isEmpty()) {
101+
turSNJobItems.add(queueLinks.poll());
102+
sendToTuringWhenMaxSize();
103+
getInfoQueue();
104+
}
105+
}
106+
107+
private void sendToTuringWhenMaxSize() {
108+
if (turSNJobItems.size() >= jobSize) {
109+
sendToTuring();
110+
turSNJobItems = new TurSNJobItems();
111+
}
112+
}
113+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
*
3+
* Copyright (C) 2016-2024 the original author or authors.
4+
*
5+
* This program is free software: you can redistribute it and/or modify
6+
* it under the terms of the GNU General Public License as published by
7+
* the Free Software Foundation, either version 3 of the License, or
8+
* (at your option) any later version.
9+
*
10+
* This program is distributed in the hope that it will be useful,
11+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
* GNU General Public License for more details.
14+
*
15+
* You should have received a copy of the GNU General Public License
16+
* along with this program. If not, see <https://www.gnu.org/licenses/>.
17+
*/
18+
19+
package com.viglet.turing.connector;
20+
21+
import com.google.inject.Inject;
22+
import com.viglet.turing.connector.persistence.repository.TurConnectorConfigVarRepository;
23+
import com.viglet.turing.connector.plugin.TurConnectorPlugin;
24+
import lombok.extern.slf4j.Slf4j;
25+
import org.springframework.scheduling.annotation.Scheduled;
26+
import org.springframework.stereotype.Component;
27+
28+
import java.text.SimpleDateFormat;
29+
import java.util.Date;
30+
import java.util.concurrent.TimeUnit;
31+
32+
@Slf4j
33+
@Component
34+
public class TurConnectorScheduledTasks {
35+
private final TurConnectorConfigVarRepository turConnectorConfigVarRepository;
36+
private final TurConnectorProcess turConnectorProcess;
37+
private final SimpleDateFormat dateFormat = new SimpleDateFormat("HH:mm:ss");
38+
public static final String FIRST_TIME = "FIRST_TIME";
39+
40+
@Inject
41+
public TurConnectorScheduledTasks(TurConnectorConfigVarRepository turConnectorConfigVarRepository,
42+
TurConnectorProcess turConnectorProcess) {
43+
this.turConnectorConfigVarRepository = turConnectorConfigVarRepository;
44+
this.turConnectorProcess = turConnectorProcess;
45+
}
46+
47+
@Scheduled(fixedDelay = 60, timeUnit = TimeUnit.MINUTES)
48+
public void executeWebCrawler(TurConnectorPlugin plugin) {
49+
log.info("The time is now {}", dateFormat.format(new Date()));
50+
if (turConnectorConfigVarRepository.findById(FIRST_TIME).isEmpty()) {
51+
log.info("This is the first time, waiting next schedule.");
52+
} else {
53+
log.info("Starting indexing");
54+
turConnectorProcess.start(plugin);
55+
}
56+
}
57+
}

0 commit comments

Comments
 (0)