Skip to content

Commit 5c31edd

Browse files
committed
Initial version
0 parents  commit 5c31edd

File tree

72 files changed

+14073
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+14073
-0
lines changed

.gitignore

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
*.class
2+
*.versionsBackup
3+
4+
# Mobile Tools for Java (J2ME)
5+
.mtj.tmp/
6+
7+
# Package Files #
8+
*.jar
9+
*.war
10+
*.ear
11+
output
12+
target
13+
.settings
14+
.project
15+
.classpath
16+
.metadata
17+
.factorypath
18+
19+
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
20+
hs_err_pid*

pom.xml

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
4+
5+
<parent>
6+
<groupId>io.skullabs.trip</groupId>
7+
<version>1.2.4</version>
8+
<artifactId>trip-project</artifactId>
9+
</parent>
10+
11+
<groupId>io.skullabs.stalkr</groupId>
12+
<artifactId>stalkr-parent</artifactId>
13+
<version>0.1.0-SNAPSHOT</version>
14+
<name>Stalkr: The crawler took kit for Java</name>
15+
<packaging>pom</packaging>
16+
17+
<properties>
18+
<jdk.source.version>1.8</jdk.source.version>
19+
</properties>
20+
21+
<modules>
22+
<module>stalkr-http</module>
23+
<module>stalkr-binder</module>
24+
<module>stalkr-death-by-captcha</module>
25+
<module>stalkr-commons</module>
26+
<module>stalkr-crawler</module>
27+
</modules>
28+
29+
<dependencyManagement>
30+
<dependencies>
31+
<dependency>
32+
<groupId>com.ning</groupId>
33+
<artifactId>async-http-client</artifactId>
34+
<version>1.8.14</version>
35+
</dependency>
36+
<dependency>
37+
<groupId>com.typesafe</groupId>
38+
<artifactId>config</artifactId>
39+
<version>1.2.0</version>
40+
</dependency>
41+
<dependency>
42+
<groupId>org.jsoup</groupId>
43+
<artifactId>jsoup</artifactId>
44+
<version>1.8.1</version>
45+
</dependency>
46+
47+
<!-- modules as depencency -->
48+
<dependency>
49+
<groupId>${project.groupId}</groupId>
50+
<version>${project.version}</version>
51+
<artifactId>stalkr-commons</artifactId>
52+
</dependency>
53+
<dependency>
54+
<groupId>${project.groupId}</groupId>
55+
<version>${project.version}</version>
56+
<artifactId>stalkr-http</artifactId>
57+
</dependency>
58+
<dependency>
59+
<groupId>${project.groupId}</groupId>
60+
<version>${project.version}</version>
61+
<artifactId>stalkr-binder</artifactId>
62+
</dependency>
63+
<dependency>
64+
<groupId>${project.groupId}</groupId>
65+
<version>${project.version}</version>
66+
<artifactId>stalkr-crawler</artifactId>
67+
</dependency>
68+
</dependencies>
69+
</dependencyManagement>
70+
71+
<url>https://github.com/Skullabs/stalkr</url>
72+
73+
<licenses>
74+
<license>
75+
<name>The Apache Software License, Version 2.0</name>
76+
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
77+
<distribution>repo</distribution>
78+
</license>
79+
</licenses>
80+
81+
<scm>
82+
<connection>scm:git:git@github.com:Skullabs/stalkr.git</connection>
83+
<developerConnection>scm:git:git@github.com:Skullabs/stalkr.git</developerConnection>
84+
<url>git@github.com:Skullabs/stalkr.git</url>
85+
</scm>
86+
87+
<developers>
88+
<developer>
89+
<id>miere</id>
90+
<email>miere.teixeira@gmail.com</email>
91+
<name>Miere Liniel Teixeira</name>
92+
</developer>
93+
</developers>
94+
95+
</project>

stalkr-binder/pom.xml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
4+
5+
<parent>
6+
<groupId>io.skullabs.stalkr</groupId>
7+
<artifactId>stalkr-parent</artifactId>
8+
<version>0.1.0-SNAPSHOT</version>
9+
</parent>
10+
11+
<artifactId>stalkr-binder</artifactId>
12+
<name>Stalkr: Java data binding</name>
13+
<packaging>jar</packaging>
14+
15+
<dependencies>
16+
<dependency>
17+
<groupId>com.typesafe</groupId>
18+
<artifactId>config</artifactId>
19+
</dependency>
20+
<dependency>
21+
<groupId>org.jsoup</groupId>
22+
<artifactId>jsoup</artifactId>
23+
</dependency>
24+
</dependencies>
25+
</project>
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package stalkr.html;
2+
3+
import java.lang.reflect.Field;
4+
5+
import lombok.RequiredArgsConstructor;
6+
7+
import org.jsoup.nodes.Element;
8+
9+
@RequiredArgsConstructor
10+
public class AttributeElementSetter implements Setter {
11+
12+
final Field field;
13+
final String selector;
14+
final String attribute;
15+
16+
@Override
17+
public void bind( final Element document, final Object instance ) {
18+
try {
19+
final Element element = document.select( selector ).first();
20+
field.set( instance, element.attr( attribute ) );
21+
} catch ( IllegalArgumentException | IllegalAccessException e ) {
22+
throw new RuntimeException( e );
23+
}
24+
}
25+
26+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package stalkr.html;
2+
3+
import java.lang.annotation.ElementType;
4+
import java.lang.annotation.Retention;
5+
import java.lang.annotation.RetentionPolicy;
6+
import java.lang.annotation.Target;
7+
8+
@Retention( RetentionPolicy.RUNTIME )
9+
@Target( ElementType.FIELD )
10+
public @interface BindableAttribute {
11+
12+
String selector();
13+
14+
String attribute();
15+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package stalkr.html;
2+
3+
import java.lang.reflect.Constructor;
4+
import java.lang.reflect.InvocationTargetException;
5+
import java.util.List;
6+
7+
import lombok.RequiredArgsConstructor;
8+
9+
import org.jsoup.nodes.Element;
10+
11+
/**
12+
* Stores the data stripped from classes able to be binded against HTML nodes.
13+
*
14+
* @author Miere Teixeira
15+
*/
16+
@RequiredArgsConstructor
17+
class BindableClass {
18+
19+
final Class<?> type;
20+
final List<Setter> values;
21+
22+
public void bind( final Element document, final Object instance ) {
23+
for ( final Setter setter : values )
24+
setter.bind( document, instance );
25+
}
26+
27+
public Object newInstanceOf() {
28+
try {
29+
final Constructor<?> constructor = type.getConstructor();
30+
constructor.setAccessible( true );
31+
return constructor.newInstance();
32+
} catch ( InstantiationException | IllegalAccessException
33+
| NoSuchMethodException | SecurityException
34+
| IllegalArgumentException | InvocationTargetException e ) {
35+
throw new RuntimeException( e );
36+
}
37+
}
38+
}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
package stalkr.html;
2+
3+
import java.lang.reflect.Field;
4+
import java.util.ArrayList;
5+
import java.util.List;
6+
import java.util.Map;
7+
import java.util.concurrent.ConcurrentHashMap;
8+
9+
import lombok.val;
10+
import trip.spi.Singleton;
11+
12+
@Singleton
13+
public class BindableClassFactory {
14+
15+
final Map<Class<?>, BindableClass> cache = new ConcurrentHashMap<>();
16+
17+
BindableClass createNewBindableClass( final Class<?> clazz ) {
18+
final List<Setter> fields = stripBindableFieldsOfClazz( clazz );
19+
return new BindableClass( clazz, fields );
20+
}
21+
22+
List<Setter> stripBindableFieldsOfClazz( Class<?> clazz ) {
23+
final List<Setter> list = new ArrayList<Setter>();
24+
while ( !clazz.equals( Object.class ) ) {
25+
for ( val field : clazz.getDeclaredFields() ) {
26+
field.setAccessible( true );
27+
memorizeBindableField( clazz, list, field );
28+
}
29+
clazz = clazz.getSuperclass();
30+
}
31+
return list;
32+
}
33+
34+
void memorizeBindableField( final Class<?> clazz, final List<Setter> list, final Field field )
35+
{
36+
val textAnnotation = field.getAnnotation( BindableText.class );
37+
if ( textAnnotation != null )
38+
list.add( memorizeBindableTextField( clazz, field, textAnnotation ) );
39+
val attributeAnnotation = field.getAnnotation( BindableAttribute.class );
40+
if ( attributeAnnotation != null )
41+
list.add( memorizeBindableAttributeField( clazz, field, attributeAnnotation ) );
42+
val repeatableAnnotation = field.getAnnotation( BindableManyTimes.class );
43+
if ( repeatableAnnotation != null )
44+
list.add( memorizeRepeatableBinding( clazz, field, repeatableAnnotation ) );
45+
}
46+
47+
Setter memorizeBindableTextField( final Class<?> clazz, final Field field, final BindableText annotation ) {
48+
final String selector = annotation.value();
49+
return new TextElementSetter( field, selector );
50+
}
51+
52+
Setter memorizeBindableAttributeField( final Class<?> clazz, final Field field, final BindableAttribute annotation ) {
53+
final String selector = annotation.selector();
54+
final String attribute = annotation.attribute();
55+
return new AttributeElementSetter( field, selector, attribute );
56+
}
57+
58+
Setter memorizeRepeatableBinding( final Class<?> clazz, final Field field, final BindableManyTimes annotation ) {
59+
final String selector = annotation.selector();
60+
final BindableClass bindableClass = getBindableClassFor( annotation.model() );
61+
return new RepeatableElementSetter( selector, field, bindableClass );
62+
}
63+
64+
public <T> BindableClass getBindableClassFor( final Class<T> type ) {
65+
BindableClass bindableData = cache.get( type );
66+
if ( bindableData == null )
67+
synchronized ( cache ) {
68+
bindableData = cache.get( type );
69+
if ( bindableData == null )
70+
cache.put( type, bindableData = createNewBindableClass( type ) );
71+
}
72+
return bindableData;
73+
}
74+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package stalkr.html;
2+
3+
import java.lang.annotation.ElementType;
4+
import java.lang.annotation.Retention;
5+
import java.lang.annotation.RetentionPolicy;
6+
import java.lang.annotation.Target;
7+
8+
@Retention( RetentionPolicy.RUNTIME )
9+
@Target( ElementType.FIELD )
10+
public @interface BindableManyTimes {
11+
12+
String selector();
13+
14+
Class<?> model();
15+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package stalkr.html;
2+
3+
import java.lang.annotation.ElementType;
4+
import java.lang.annotation.Retention;
5+
import java.lang.annotation.RetentionPolicy;
6+
import java.lang.annotation.Target;
7+
8+
@Retention( RetentionPolicy.RUNTIME )
9+
@Target( ElementType.FIELD )
10+
public @interface BindableText {
11+
12+
String value();
13+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package stalkr.html;
2+
3+
import java.util.ArrayList;
4+
import java.util.List;
5+
import java.util.function.Consumer;
6+
7+
import org.jsoup.Jsoup;
8+
import org.jsoup.nodes.Document;
9+
import org.jsoup.nodes.Element;
10+
import org.jsoup.select.Elements;
11+
12+
import trip.spi.Provided;
13+
import trip.spi.Singleton;
14+
15+
@Singleton
16+
public class HtmlBinder {
17+
18+
@Provided
19+
BindableClassFactory factory;
20+
21+
public <T> T bind( final String data, final Class<T> type ) {
22+
final Document document = Jsoup.parse( data );
23+
final BindableClass bindableClass = factory.getBindableClassFor( type );
24+
return newBindedDataFrom( bindableClass, document );
25+
}
26+
27+
public <T> List<T> bind( final String data, final Class<T> type, final String selector ) {
28+
final Elements elements = Jsoup.parse( data ).select( selector );
29+
final BindableClass bindableClass = factory.getBindableClassFor( type );
30+
final List<T> list = new ArrayList<T>();
31+
for ( final Element element : elements )
32+
list.add( newBindedDataFrom( bindableClass, element ) );
33+
return list;
34+
}
35+
36+
@SuppressWarnings( "unchecked" )
37+
<T> T newBindedDataFrom( final BindableClass bindableClass, final Element element ) {
38+
final T instance = (T)bindableClass.newInstanceOf();
39+
bindableClass.bind( element, instance );
40+
return instance;
41+
}
42+
43+
public List<String> select( final String data, final String selector ) {
44+
final List<String> list = new ArrayList<>();
45+
selectElements( data, selector, ( element ) -> {
46+
list.add( element.text() );
47+
} );
48+
return list;
49+
}
50+
51+
public List<String> selectAttr( final String data, final String selector, final String attr ) {
52+
final List<String> list = new ArrayList<>();
53+
selectElements( data, selector, ( element ) -> {
54+
list.add( element.attr( attr ) );
55+
} );
56+
return list;
57+
}
58+
59+
public void selectElements( final String data, final String selector, final Consumer<Element> consumer ) {
60+
final Document document = Jsoup.parse( data );
61+
final Elements elements = document.select( selector );
62+
for ( final Element element : elements )
63+
consumer.accept( element );
64+
}
65+
}

0 commit comments

Comments
 (0)