Skip to content

Commit fadc291

Browse files
committed
完成PandaTv弹幕抓取
1 parent edbaa29 commit fadc291

File tree

11 files changed

+739
-83
lines changed

11 files changed

+739
-83
lines changed

CrawlPandaDanmu/.idea/libraries/Maven__org_json_json_20151123.xml

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

CrawlPandaDanmu/.idea/libraries/Maven__org_jsoup_jsoup_1_8_3.xml

Lines changed: 13 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

CrawlPandaDanmu/.idea/misc.xml

Lines changed: 3 additions & 50 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

CrawlPandaDanmu/.idea/workspace.xml

Lines changed: 354 additions & 33 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

CrawlPandaDanmu/CrawlPandaDanmu.iml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,7 @@
1111
</content>
1212
<orderEntry type="inheritedJdk" />
1313
<orderEntry type="sourceFolder" forTests="false" />
14+
<orderEntry type="library" name="Maven: org.jsoup:jsoup:1.8.3" level="project" />
15+
<orderEntry type="library" name="Maven: org.json:json:20151123" level="project" />
1416
</component>
1517
</module>

CrawlPandaDanmu/pom.xml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,18 @@
88
<artifactId>PandaDanmu</artifactId>
99
<version>1.0-SNAPSHOT</version>
1010

11+
<dependencies>
12+
<dependency>
13+
<groupId>org.jsoup</groupId>
14+
<artifactId>jsoup</artifactId>
15+
<version>1.8.3</version>
16+
</dependency>
17+
<dependency>
18+
<groupId>org.json</groupId>
19+
<artifactId>json</artifactId>
20+
<version>20151123</version>
21+
</dependency>
22+
</dependencies>
23+
1124

1225
</project>
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
import org.json.JSONArray;
2+
import org.json.JSONObject;
3+
import org.jsoup.Jsoup;
4+
import org.jsoup.nodes.Document;
5+
6+
import java.io.ByteArrayOutputStream;
7+
import java.io.IOException;
8+
import java.io.OutputStream;
9+
import java.net.Socket;
10+
import java.util.Arrays;
11+
import java.util.List;
12+
13+
/**
14+
* Created by geekgao on 16-1-29.
15+
*/
16+
public class Crawl extends Thread {
17+
18+
//获取弹幕需要发送的内容
19+
private String rid;
20+
private String appid;
21+
private String k = "1";
22+
private String t = "300";
23+
private String ts;
24+
private String sign;
25+
private String authType;
26+
27+
//与弹幕服务器联系的socket
28+
private Socket socket;
29+
//弹幕服务器ip
30+
private String serverIp;
31+
//弹幕服务器端口
32+
private int port;
33+
34+
/**
35+
*
36+
* @return 返回结果表示是否初始化成功
37+
* @throws IOException
38+
*/
39+
public boolean init() throws IOException {
40+
String roomId = Utils.getRoomId();
41+
String time = String.valueOf(System.currentTimeMillis());
42+
43+
String url1 = "http://www.panda.tv/ajax_chatroom?roomid=" + roomId + "&_=" + time;
44+
Document doc1 = Jsoup.connect(url1).header("User-Agent","Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36").ignoreContentType(true).get();
45+
JSONObject jsonObject1 = new JSONObject(doc1.toString().split("<body>",2)[1].split("</body>",2)[0]);
46+
47+
String _sign;
48+
String _roomid;
49+
String _rid;
50+
String _ts;
51+
52+
int errno = jsonObject1.getInt("errno");
53+
if (errno != 0) {
54+
System.out.println("-----------------------");
55+
System.out.println("第一步获取数据出错,程序将退出");
56+
System.out.println("url:" + url1);
57+
System.out.println("json数据:");
58+
System.out.println(jsonObject1);
59+
System.out.println("-----------------------");
60+
return false;
61+
} else {
62+
JSONObject j = jsonObject1.getJSONObject("data");
63+
_sign = j.getString("sign");
64+
_roomid = String.valueOf(j.getLong("roomid"));
65+
_rid = String.valueOf(j.getLong("rid"));
66+
_ts = String.valueOf(j.getLong("ts"));
67+
}
68+
69+
String url2 = "http://api.homer.panda.tv/chatroom/getinfo?rid=" + _rid + "&roomid=" + _roomid + "&retry=0&sign=" + _sign + "&ts=" + _ts + "&_=" + System.currentTimeMillis();
70+
Document doc2 = Jsoup.connect(url2).header("User-Agent","Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36").ignoreContentType(true).get();
71+
JSONObject jsonObject2 = new JSONObject(doc2.toString().split("<body>",2)[1].split("</body>",2)[0]);
72+
73+
errno = jsonObject2.getInt("errno");
74+
if (errno != 0) {
75+
System.out.println("-----------------------");
76+
System.out.println("第二步获取数据出错,程序将退出");
77+
System.out.println("url:" + url2);
78+
System.out.println("json数据:");
79+
System.out.println(jsonObject2);
80+
System.out.println("-----------------------");
81+
return false;
82+
} else {
83+
JSONObject j = jsonObject2.getJSONObject("data");
84+
rid = String.valueOf(j.getLong("rid"));
85+
appid = j.getString("appid");
86+
ts = String.valueOf(j.getLong("ts"));
87+
sign = j.getString("sign");
88+
authType = j.getString("authType");
89+
90+
JSONArray chat_addr_list = j.getJSONArray("chat_addr_list");
91+
for (Object o:chat_addr_list) {
92+
serverIp = ((String) o).split(":",2)[0];
93+
port = Integer.valueOf(((String) o).split(":", 2)[1]);
94+
break;
95+
}
96+
}
97+
98+
return true;
99+
}
100+
101+
/**
102+
* 与弹幕服务器取得联系,相当于登录弹幕服务器
103+
*/
104+
public void login() throws IOException {
105+
socket = new Socket(serverIp,port);
106+
System.out.println("连接弹幕服务器:" + serverIp + ":" + port);
107+
ByteArrayOutputStream byteArray = new ByteArrayOutputStream();
108+
byte[] b = new byte[]{0x00, 0x06, 0x00, 0x02, 0x00, 0x60, 0x75, 0x3a};
109+
byteArray.write(b);
110+
111+
String msg = rid + "@" + appid + "\n" +
112+
"k:" + k + "\n" +
113+
"t:" + t + "\n" +
114+
"ts:" + ts + "\n" +
115+
"sign:" + sign + "\n" +
116+
"authtype:" + authType;
117+
byteArray.write(msg.getBytes("ISO-8859-1"));
118+
OutputStream outputStream = socket.getOutputStream();
119+
outputStream.write(byteArray.toByteArray());
120+
121+
b = new byte[]{0x00, 0x06, 0x00, 0x00};
122+
outputStream.write(b);
123+
}
124+
125+
@Override
126+
public void run() {
127+
MessageHandler messageHandler;
128+
OutputStream outputStream;
129+
130+
try {
131+
init();
132+
login();
133+
134+
messageHandler = new MessageHandler(socket);
135+
outputStream = socket.getOutputStream();
136+
137+
long start = System.currentTimeMillis();
138+
while (true) {
139+
List<String> msgs = messageHandler.read();
140+
for (String s:msgs) {
141+
String type = s.split("\\{\"type\":\"",2)[1].split("\"",2)[0];
142+
//发言弹幕
143+
if (type.equals("1")) {
144+
String nickname = s.split("nickName\":\"",2)[1].split("\"")[0];
145+
String content = s.split("content\":\"",2)[1].split("\"",2)[0];
146+
System.out.println("[" + nickname + "]:" + content);
147+
}
148+
}
149+
150+
long end = System.currentTimeMillis();
151+
//心跳包
152+
if (end - start > 60000) {
153+
outputStream.write(new byte[]{0x00, 0x06, 0x00, 0x00});
154+
}
155+
156+
Thread.sleep(1);
157+
}
158+
} catch (IOException e) {
159+
e.printStackTrace();
160+
} catch (InterruptedException e) {
161+
e.printStackTrace();
162+
}
163+
}
164+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import java.io.IOException;
2+
3+
/**
4+
* Created by geekgao on 16-1-29.
5+
*/
6+
public class Main {
7+
public static void main(String[] args) throws IOException {
8+
Crawl c = new Crawl();
9+
c.start();
10+
}
11+
}

0 commit comments

Comments
 (0)