Skip to content

Commit f96f168

Browse files
committed
chore(bot): fix lobser.rs story scrapping
1 parent 76d207a commit f96f168

File tree

2 files changed

+82
-10
lines changed

2 files changed

+82
-10
lines changed

crates/bots/rostra-bot/src/main.rs

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ pub enum BotError {
4242
Publisher { source: PublisherError },
4343
#[snafu(display("Logging initialization failed"))]
4444
Logging,
45+
#[snafu(display("Secret file is required for bot operation"))]
46+
MissingSecretFile,
4547
}
4648

4749
pub type BotResult<T> = std::result::Result<T, BotError>;
@@ -67,9 +69,12 @@ impl std::fmt::Display for Source {
6769
#[derive(Debug, Parser)]
6870
#[command(version, about, long_about = None)]
6971
pub struct Opts {
72+
#[command(subcommand)]
73+
pub command: Option<Command>,
74+
7075
/// Path to the secret file for authentication
71-
#[arg(long, required = true)]
72-
pub secret_file: PathBuf,
76+
#[arg(long)]
77+
pub secret_file: Option<PathBuf>,
7378

7479
/// Interval between scraping runs in minutes
7580
#[arg(long, default_value = "30")]
@@ -92,13 +97,45 @@ pub struct Opts {
9297
pub source: Source,
9398
}
9499

100+
#[derive(Debug, Parser)]
101+
pub enum Command {
102+
/// Development commands
103+
Dev {
104+
#[command(subcommand)]
105+
dev_command: DevCommand,
106+
},
107+
}
108+
109+
#[derive(Debug, Parser)]
110+
pub enum DevCommand {
111+
/// Test scraping functionality
112+
Test {
113+
/// Source to scrape from
114+
#[arg(long, value_enum, default_value = "hn")]
115+
source: Source,
116+
},
117+
}
118+
95119
#[snafu::report]
96120
#[tokio::main]
97121
async fn main() -> BotResult<()> {
98122
init_logging()?;
99123

100124
let opts = Opts::parse();
101125

126+
match opts.command {
127+
Some(Command::Dev { dev_command }) => {
128+
handle_dev_command(dev_command).await
129+
}
130+
None => {
131+
// Default behavior - run the bot
132+
let secret_file = opts.secret_file.clone().ok_or_else(|| BotError::MissingSecretFile)?;
133+
run_bot(opts, secret_file).await
134+
}
135+
}
136+
}
137+
138+
async fn run_bot(opts: Opts, secret_file: PathBuf) -> BotResult<()> {
102139
info!(target: LOG_TARGET, "Starting Rostra Bot for {}", opts.source);
103140
info!(
104141
target: LOG_TARGET,
@@ -109,7 +146,7 @@ async fn main() -> BotResult<()> {
109146
"Bot configuration"
110147
);
111148

112-
let secret = Client::read_id_secret(&opts.secret_file)
149+
let secret = Client::read_id_secret(&secret_file)
113150
.await
114151
.context(SecretSnafu)?;
115152

@@ -154,6 +191,42 @@ async fn main() -> BotResult<()> {
154191
run_bot_loop(&opts, &db, scraper.as_ref(), &publisher).await
155192
}
156193

194+
async fn handle_dev_command(dev_command: DevCommand) -> BotResult<()> {
195+
match dev_command {
196+
DevCommand::Test { source } => {
197+
info!(target: LOG_TARGET, "Testing scraper for {}", source);
198+
199+
let scraper = create_scraper(&source);
200+
201+
match scraper.scrape_frontpage().await {
202+
Ok(articles) => {
203+
println!("Successfully scraped {} articles from {}:", articles.len(), source);
204+
println!();
205+
206+
for (i, article) in articles.iter().enumerate() {
207+
println!("Article {}: ", i + 1);
208+
println!(" ID: {}", article.id);
209+
println!(" Title: {}", article.title);
210+
println!(" Score: {}", article.score);
211+
println!(" Author: {}", article.author);
212+
println!(" Source: {}", article.source);
213+
println!(" URL: {}", article.url.as_deref().unwrap_or("None"));
214+
println!(" Source URL: {}", article.source_url);
215+
println!(" Scraped at: {:?}", article.scraped_at);
216+
println!();
217+
}
218+
219+
Ok(())
220+
}
221+
Err(e) => {
222+
eprintln!("Failed to scrape {}: {}", source, e);
223+
Err(BotError::Scraper { source: e })
224+
}
225+
}
226+
}
227+
}
228+
}
229+
157230
async fn run_bot_loop(
158231
opts: &Opts,
159232
db: &BotDatabase,

crates/bots/rostra-bot/src/scraper.rs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -236,15 +236,14 @@ impl LobstersScraper {
236236
let title = title_link.inner_html();
237237
let url = title_link.value().attr("href").map(|s| s.to_string());
238238

239-
// Extract score from vote div
240-
let score_selector =
241-
Selector::parse("div.score").map_err(|_| ScraperError::HtmlParse)?;
239+
// Extract score from voting element - correct selector for Lobsters
240+
let vote_selector = Selector::parse("div.voters > a.upvoter").map_err(|_| ScraperError::HtmlParse)?;
242241
let score = story_element
243-
.select(&score_selector)
242+
.select(&vote_selector)
244243
.next()
245-
.and_then(|score_elem| {
246-
let score_text = score_elem.inner_html();
247-
score_text.trim().parse::<u32>().ok()
244+
.and_then(|vote_elem| {
245+
let vote_text = vote_elem.inner_html();
246+
vote_text.trim().parse::<u32>().ok()
248247
})
249248
.unwrap_or(0);
250249

0 commit comments

Comments
 (0)