-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathAppendix 1 2_reddit_praw_arizona.py
More file actions
68 lines (51 loc) · 2.16 KB
/
Appendix 1 2_reddit_praw_arizona.py
File metadata and controls
68 lines (51 loc) · 2.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
timport praw
import pandas as pd
# Replace with your own Reddit API credentials
client_id = 'SUCUWf9SRoqx-yN4gJ3WNg'
client_secret = 'deleted'
user_agent = '2526564987854'
# Create a Reddit instance
reddit = praw.Reddit(client_id=client_id,
client_secret=client_secret,
user_agent=user_agent)
# Access the Chicago subreddit
subreddit = reddit.subreddit('arizona')
# Search for posts related to immigrants or immigration
search_terms = ['immigrants', 'immigration', 'migrants']
# Initialize a list to store scraped data
data = []
# Iterate through search results
for submission in subreddit.search(' OR '.join(search_terms)):
# Extract relevant information about the submission
submission_data = {
'post_id': submission.id,
'title': submission.title,
'author': submission.author.name if submission.author else 'N/A',
'created_utc': submission.created_utc,
'url': submission.url,
'selftext': submission.selftext,
'num_comments': submission.num_comments,
'score': submission.score
}
# Extract comments from the submission
submission.comments.replace_more(limit=0) # Replace 'MoreComments' objects for full comments extraction
for comment in submission.comments.list():
comment_data = {
'post_id': submission.id,
'comment_id': comment.id,
'comment_author': comment.author.name if comment.author else 'N/A',
'comment_body': comment.body,
'comment_score': comment.score,
'comment_created_utc': comment.created_utc
}
data.append({**submission_data, **comment_data})
# Create a Pandas DataFrame
df = pd.DataFrame(data)
# Convert Unix timestamp to datetime
df['created_utc'] = pd.to_datetime(df['created_utc'], unit='s')
df['comment_created_utc'] = pd.to_datetime(df['comment_created_utc'], unit='s')
# Save data to a CSV file
df.to_csv('reddit_arizona_data_with_comments.csv', index=False)
print('Data scraped and saved to reddit_arizona_data_with_comments.csv')
print(f"Processing submission: {submission.title}")
print(f"Fetching comments for submission: {submission.title}")