Skip to content

Commit 7b06b88

Browse files
committed
support OPML importing
1 parent 3ccbbb3 commit 7b06b88

File tree

11 files changed

+509
-24
lines changed

11 files changed

+509
-24
lines changed

Cargo.lock

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ chrono = { version = "0.4.13" }
3131
md-5 = "0.9.1"
3232
feed-rs = "0.4.0"
3333
serde_urlencoded = "0.6.1"
34+
quick-xml = "0.18.1"
35+
log = "0.4.11"
36+
femme = "2.1.0"
37+
url = "2.1.1"
3438

3539
[dev-dependencies]
3640
rand = "0.7"

fixtures/flat.opml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<opml version="2.0">
3+
<head>
4+
<title>Test OPML: Flat</title>
5+
</head>
6+
<body>
7+
<outline xmlUrl="https://example.com/feed1" title="Feed 1 Title" type="rss" text="Feed 1 Text" htmlUrl="http://example.com/site1" />
8+
<outline xmlUrl="https://example.com/feed2" title="Feed 2 Title" type="rss" text="Feed 2 Text" htmlUrl="http://example.com/site2" />
9+
</body>
10+
</opml>

fixtures/missing.opml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<opml version="2.0">
3+
<head>
4+
<title>Test OPML: Missing Metadata</title>
5+
</head>
6+
<body>
7+
<outline text="Group 1 Text" title="Group 1 Title">
8+
<outline xmlUrl="__REPLACE__/rust.xml" />
9+
</outline>
10+
</body>
11+
</opml>

fixtures/normal.opml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<opml version="2.0">
3+
<head>
4+
<title>Test OPML: Normal</title>
5+
</head>
6+
<body>
7+
<outline text="Group 1 Text" title="Group 1 Title">
8+
<outline xmlUrl="https://example.com/feed1" title="Feed 1 Title" type="rss" text="Feed 1 Text" htmlUrl="http://example.com/site1" />
9+
<outline xmlUrl="https://example.com/feed2" type="rss" text="Feed 2 Text" htmlUrl="http://example.com/site2" />
10+
</outline>
11+
<outline text="Group 2 Text">
12+
<outline xmlUrl="https://example.com/feed3" title="Feed 3 Title" type="rss" htmlUrl="http://example.com/site3" />
13+
<outline xmlUrl="https://example.com/feed4" text="Feed 4 Text" htmlUrl="http://example.com/site4" />
14+
</outline>
15+
</body>
16+
</opml>

src/cli.rs

Lines changed: 93 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
1-
use crate::model::{Feed, FeedGroup, Group, Item, ModelExt};
2-
use crate::state::State;
31
use anyhow::{anyhow, Context, Result};
42
use async_std::prelude::FutureExt;
3+
use futures::stream::{self, StreamExt};
4+
use log::{info, warn};
55
use prettytable::{cell, format, row, Table};
66
use std::path::PathBuf;
77
use structopt::StructOpt;
88

9+
use crate::model::{Feed, FeedGroup, Group, Item, ModelExt};
10+
use crate::opml;
11+
use crate::remote::RemoteFeed;
12+
use crate::state::State;
13+
914
#[derive(Debug, StructOpt)]
1015
pub enum FeedCommand {
1116
/// Lists all feeds
@@ -23,6 +28,9 @@ pub enum FeedCommand {
2328

2429
/// Crawls a feed manually
2530
Crawl { id: u32 },
31+
32+
/// Imports OPML file
33+
Import { file: PathBuf },
2634
}
2735

2836
impl FeedCommand {
@@ -53,26 +61,14 @@ impl FeedCommand {
5361
return Err(anyhow!("Feed `{}` already exists!", url));
5462
}
5563

56-
let bytes = surf::get(&url)
57-
.await
58-
.map_err(|err| anyhow!("unable to fetch {}: {:?}", &url, err))?
59-
.body_bytes()
60-
.await?;
61-
let raw_feed = feed_rs::parser::parse(&bytes[..])?;
64+
let remote = RemoteFeed::new(&url).await?;
65+
6266
let feed = Feed::new(
63-
raw_feed
64-
.title
65-
.map(|t| t.content)
67+
remote
68+
.get_title()
6669
.ok_or_else(|| anyhow!("Feed doesn't have a title"))?,
6770
url.clone(),
68-
raw_feed
69-
.links
70-
.iter()
71-
.map(|l| l.href.as_str())
72-
.filter(|&link| link != url)
73-
.next()
74-
.map(|l| l.to_string())
75-
.unwrap_or(url),
71+
remote.get_site_url().unwrap_or(url),
7672
);
7773
let feed = {
7874
let conn = state.db.get()?;
@@ -111,12 +107,81 @@ impl FeedCommand {
111107
Ok(())
112108
}
113109

110+
async fn import(state: State, file: PathBuf) -> Result<()> {
111+
let imports = opml::from_file(&file)?;
112+
113+
let imports: Vec<_> = stream::iter(imports)
114+
.then(|(group, feeds)| async move {
115+
// normalize feeds
116+
let feeds = stream::iter(feeds)
117+
.filter_map(|mut feed| async move {
118+
if let Err(e) = feed.update().await {
119+
warn!("failed to update feed {}: {:?}", feed, e);
120+
}
121+
122+
if let Err(e) = feed.validate() {
123+
warn!("invalid feed ({}): {:?}", feed, e);
124+
None
125+
} else {
126+
Some(feed)
127+
}
128+
})
129+
.map(Feed::from)
130+
.collect::<Vec<Feed>>()
131+
.await;
132+
133+
(group, feeds)
134+
})
135+
.collect()
136+
.await;
137+
138+
let conn = state.db.get()?;
139+
for (group, feeds) in imports.into_iter() {
140+
let group = group.and_then(|title| {
141+
if let Ok(group) = Group::get_by_name(&conn, &title) {
142+
Some(group)
143+
} else {
144+
let group = Group::new(title.clone());
145+
match group.insert(&conn) {
146+
Ok(group) => Some(group),
147+
Err(e) => {
148+
warn!("unable to create group {}: {:?}", title, e);
149+
None
150+
}
151+
}
152+
}
153+
});
154+
155+
for feed in feeds {
156+
let feed = match feed.insert(&conn) {
157+
Err(e) => {
158+
warn!("unable to create feed: {:?}", e);
159+
continue;
160+
}
161+
Ok(feed) => feed,
162+
};
163+
164+
if let Some(group) = group.as_ref() {
165+
if let Err(e) = group.add_feed(&conn, feed) {
166+
warn!("unable to add feed to group {:?}: {:?}", group, e);
167+
continue;
168+
}
169+
}
170+
}
171+
}
172+
173+
info!("import completed.");
174+
175+
Ok(())
176+
}
177+
114178
async fn run(self, state: State) -> Result<()> {
115179
match self {
116180
Self::List => Self::list(state),
117181
Self::Add { url, group } => Self::add(state, url, group).await,
118182
Self::Delete { id } => Self::delete(state, id),
119183
Self::Crawl { id } => Self::crawl(state, id).await,
184+
Self::Import { file } => Self::import(state, file).await,
120185
}
121186
}
122187
}
@@ -260,6 +325,9 @@ pub struct Options {
260325
)]
261326
database: PathBuf,
262327

328+
#[structopt(long)]
329+
debug: bool,
330+
263331
#[structopt(subcommand)]
264332
command: SubCommand,
265333
}
@@ -287,6 +355,12 @@ impl Options {
287355
let pool = crate::model::get_pool(&self.database)?;
288356
let state = crate::state::State::new(pool);
289357

358+
if self.debug {
359+
femme::with_level(log::LevelFilter::Debug);
360+
} else {
361+
femme::with_level(log::LevelFilter::Info);
362+
}
363+
290364
match self.command {
291365
SubCommand::Feed(cmd) => cmd.run(state).await,
292366
SubCommand::Group(cmd) => cmd.run(state).await,

src/error.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,19 @@ pub enum Error {
4040
#[error("Feed error")]
4141
FeedError(#[from] feed_rs::parser::ParseFeedError),
4242

43+
#[error("XML error")]
44+
XmlError(#[from] quick_xml::Error),
45+
46+
#[error("XML error at position {position}: {source}")]
47+
XmlErrorWithPosition {
48+
#[source]
49+
source: quick_xml::Error,
50+
position: usize,
51+
},
52+
53+
#[error("url parsing error")]
54+
UrlError(#[from] url::ParseError),
55+
4356
#[error("{}", _0)]
4457
Message(String),
4558
}
@@ -50,4 +63,10 @@ impl Error {
5063
}
5164
}
5265

66+
impl From<(quick_xml::Error, usize)> for Error {
67+
fn from((source, position): (quick_xml::Error, usize)) -> Self {
68+
Error::XmlErrorWithPosition { source, position }
69+
}
70+
}
71+
5372
pub type Result<T, E = Error> = std::result::Result<T, E>;

src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ mod error;
66
mod api;
77
mod cli;
88
mod crawler;
9-
mod model;
9+
pub mod model;
10+
mod opml;
11+
mod remote;
1012
mod state;
1113
mod utils;
1214

0 commit comments

Comments
 (0)