Skip to content

Experiment: Add support for fetch-post to wp_rs_cli #834

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 14 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions wp_rs_cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ serde_json = { workspace = true }
tokio = { workspace = true, features = ["full"] }
tokio-stream = { workspace = true }
wp_api = { path = "../wp_api", features = [ "reqwest-request-executor" ] }
async-trait = { workspace = true }
url = { workspace = true }

[[bin]]
name = "wp_rs_cli"
33 changes: 33 additions & 0 deletions wp_rs_cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,39 @@ wp_rs_cli --help
## Commands

- `discover-login-url`: Tries connecting to the given URL, and prints the library's relevant error message if unable to.
- `fetch-post`: Fetch a post and its comments, supporting WordPress.com (Bearer token) and WordPress.org/Jetpack (Application Password) sites.

### fetch-post examples

```bash
# WordPress.com (Bearer) by post URL (auto derive site)
wp_rs_cli fetch-post \
--url https://example.wordpress.com/2024/07/01/my-post \
--bearer "$WP_BEARER_TOKEN" \
--pretty

# WordPress.com (Bearer) by explicit site and post id
wp_rs_cli fetch-post \
--wpcom-site example.wordpress.com \
--post-id 123 \
--bearer "$WP_BEARER_TOKEN" \
--pretty

# WordPress.org/Jetpack (Application Password) by post URL (auto-discover /wp-json)
wp_rs_cli fetch-post \
--url https://yoursite.com/blog/2024/07/01/my-post \
--username "$WP_USERNAME" \
--password "$WP_APP_PASSWORD" \
--pretty

# WordPress.org/Jetpack (Application Password) by explicit API root and post id
wp_rs_cli fetch-post \
--api-root https://yoursite.com/wp-json \
--post-id 123 \
--username "$WP_USERNAME" \
--password "$WP_APP_PASSWORD" \
--pretty
```

## License

Expand Down
305 changes: 303 additions & 2 deletions wp_rs_cli/src/bin/wp_rs_cli/main.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
use anyhow::Result;
use clap::{Parser, Subcommand};
use anyhow::{Result, anyhow};
use clap::{ArgGroup, Args, Parser, Subcommand};
use colored::Colorize;
use csv::Writer;
use futures::stream::StreamExt;
use std::{fmt::Display, fs::File, sync::Arc, time::Duration};
use url::Url;
use wp_api::{
comments::CommentListParams,
parsed_url::ParsedUrl,
posts::{PostId, PostRetrieveParams},
request::endpoint::WpOrgSiteApiUrlResolver,
wp_com::{WpComBaseUrl, endpoint::WpComDotOrgApiUrlResolver},
};
use wp_api::{
login::url_discovery::{
AutoDiscoveryAttemptFailure, FetchAndParseApiRootFailure, FindApiRootFailure,
Expand All @@ -29,6 +37,67 @@ enum Commands {
input_file: String,
output_file: String,
},
/// Fetch a single post and its comments
FetchPost(FetchPostArgs),
}

#[derive(Debug, Args, Clone)]
struct AuthArgs {
/// WordPress.com site (e.g. example.wordpress.com or numeric ID)
#[arg(long)]
wpcom_site: Option<String>,

/// WordPress.org/Jetpack API root (must end with /wp-json)
#[arg(long)]
api_root: Option<String>,

/// Bearer token for WordPress.com (fallback env: WP_BEARER_TOKEN)
#[arg(long)]
bearer: Option<String>,

/// Application Password username for wp.org/Jetpack (fallback env: WP_USERNAME)
#[arg(long)]
username: Option<String>,

/// Application Password for wp.org/Jetpack (fallback env: WP_APP_PASSWORD)
#[arg(long)]
password: Option<String>,
}

#[derive(Debug, Parser)]
#[command(group(
ArgGroup::new("site_type")
.args(["wpcom_site", "api_root", "url"]),
), group(
ArgGroup::new("post_ref")
.required(true)
.args(["post_id", "url"]),
))]
struct FetchPostArgs {
/// Common authentication parameters
#[command(flatten)]
auth: AuthArgs,

/// Full post URL (alternative to --post-id)
/// When provided, this URL is used to infer the site (wp.com) or autodiscover the API root (wp.org/Jetpack).
#[arg(long)]
url: Option<String>,

/// The post ID to fetch
#[arg(long, value_parser = parse_post_id)]
post_id: Option<PostId>,

/// Password for the post if it is password-protected
#[arg(long)]
post_password: Option<String>,

/// Max items per page when fetching comments
#[arg(long, default_value_t = 100)]
per_page: u32,

/// Output pretty-printed JSON
#[arg(long, default_value_t = false)]
pretty: bool,
}

#[tokio::main]
Expand All @@ -46,6 +115,9 @@ async fn main() -> Result<()> {
} => {
batch_test_autodiscovery(&login_client, input_file.as_str(), output_file).await?;
}
Commands::FetchPost(args) => {
fetch_post_and_comments(args).await?;
}
}

Ok(())
Expand Down Expand Up @@ -221,3 +293,232 @@ fn csv_error_type(failure: &AutoDiscoveryAttemptFailure) -> String {
},
}
}

#[derive(Debug)]
enum SiteApiType {
WpCom { site: String },
WpOrg { api_root: Arc<ParsedUrl> },
}

impl SiteApiType {
async fn detect_from_args(
args: &AuthArgs,
url: &Option<String>,
request_executor: &Arc<ReqwestRequestExecutor>,
) -> Result<Self> {
if let Some(site) = &args.wpcom_site {
// Explicit WordPress.com site takes priority
return Ok(SiteApiType::WpCom { site: site.clone() });
}
if let Some(api_root) = &args.api_root {
// Explicit api_root takes priority for wp.org/Jetpack
let parsed = ParsedUrl::try_from(api_root.as_str()).map_err(|_| {
anyhow!("Invalid api_root URL: must be a valid URL ending with /wp-json")
})?;
return Ok(SiteApiType::WpOrg {
api_root: Arc::new(parsed),
});
}
if let Some(url) = url {
// Derive from URL if possible
if let Ok(u) = Url::parse(url.as_str()) {
let host = u.host_str().unwrap_or("");
if host.ends_with(".wordpress.com") {
return Ok(SiteApiType::WpCom {
site: host.to_string(),
});
}

// Attempt autodiscovery of API root from URL
let login_client =
WpLoginClient::new_with_default_middleware_pipeline(request_executor.clone());
match login_client
.api_discovery(url.clone())
.await
.combined_result()
.cloned()
{
Ok(success) => Ok(SiteApiType::WpOrg {
api_root: success.api_root_url,
}),
Err(_) => Err(anyhow!(
"Could not autodiscover API root from URL. Please provide --api-root explicitly."
)),
}
} else {
Err(anyhow!("Invalid URL; could not parse"))
}
} else {
Err(anyhow!(
"Provide either --wpcom-site, or --api-root, or a wordpress.com URL"
))
}
}

fn api_url_resolver(&self) -> Arc<dyn ApiUrlResolver> {
match self {
SiteApiType::WpCom { site } => Arc::new(WpComDotOrgApiUrlResolver::new(
site.clone(),
WpComBaseUrl::Production,
)),
SiteApiType::WpOrg { api_root } => {
Arc::new(WpOrgSiteApiUrlResolver::new(api_root.clone()))
}
}
}

fn auth_provider(&self, args: &AuthArgs) -> Result<Arc<WpAuthenticationProvider>> {
match self {
SiteApiType::WpCom { .. } => {
let token = env_or_arg(&args.bearer, "WP_BEARER_TOKEN").ok_or_else(|| {
anyhow!("Missing bearer token. Provide --bearer or set WP_BEARER_TOKEN")
})?;
Ok(Arc::new(WpAuthenticationProvider::static_with_auth(
WpAuthentication::Bearer { token },
)))
}
SiteApiType::WpOrg { .. } => {
let username = env_or_arg(&args.username, "WP_USERNAME").ok_or_else(|| {
anyhow!("Missing username. Provide --username or set WP_USERNAME")
})?;
let password = env_or_arg(&args.password, "WP_APP_PASSWORD").ok_or_else(|| {
anyhow!(
"Missing application password. Provide --password or set WP_APP_PASSWORD"
)
})?;
Ok(Arc::new(
WpAuthenticationProvider::static_with_username_and_password(username, password),
))
}
}
}
}

fn env_or_arg(value: &Option<String>, var: &str) -> Option<String> {
value.clone().or_else(|| std::env::var(var).ok())
}

async fn build_api_client(args: &AuthArgs, url: &Option<String>) -> Result<WpApiClient> {
let request_executor = Arc::new(ReqwestRequestExecutor::new(false, Duration::from_secs(60)));
let middleware_pipeline = Arc::new(WpApiMiddlewarePipeline::default());
// Determine resolver and auth provider
let api_type = SiteApiType::detect_from_args(args, url, &request_executor).await?;
let resolver: Arc<dyn ApiUrlResolver> = api_type.api_url_resolver();
let auth_provider: Arc<WpAuthenticationProvider> = api_type.auth_provider(args)?;

#[derive(Debug)]
struct CliAppNotifier;
#[async_trait::async_trait]
impl WpAppNotifier for CliAppNotifier {
async fn requested_with_invalid_authentication(&self) {
eprintln!(
"Authentication failed. Please verify your credentials or token and try again."
);
std::process::exit(1);
}
}

Ok(WpApiClient::new(
resolver,
WpApiClientDelegate {
auth_provider,
request_executor,
middleware_pipeline,
app_notifier: Arc::new(CliAppNotifier),
},
))
}

fn parse_post_id(s: &str) -> Result<PostId, String> {
s.parse::<i64>()
.map(PostId)
.map_err(|e| format!("Invalid post id '{s}': {e}"))
}

async fn resolve_post_id(client: &WpApiClient, post_url: &str) -> Result<PostId> {
// Strategy: retrieve by slug via posts list API when possible.
// For wp.com, the resolver requires site context; for wp.org, api_root is given.
// We'll try to parse the URL and extract a last path segment as potential slug.
let url = Url::parse(post_url).map_err(|e| anyhow!("Invalid url: {e}"))?;
let slug_candidate = url
.path_segments()
.and_then(|segs| segs.rev().find(|s| !s.is_empty()))
.map(|s| s.trim_end_matches('/'))
.unwrap_or("")
.to_string();

if slug_candidate.is_empty() {
return Err(anyhow!("Could not parse a slug from url"));
}

// Query posts by slug; returns an array, take first match.
// Using view context to ensure public content shape.
let params = wp_api::posts::PostListParams {
slug: vec![slug_candidate.clone()],
per_page: Some(1),
..Default::default()
};
let resp = client.posts().list_with_view_context(&params).await?;
if let Some(p) = resp.data.into_iter().map(|sp| sp.id).next() {
return Ok(p);
}

Err(anyhow!(
"No post found for slug '{slug}' parsed from URL '{url}'",
slug = slug_candidate,
url = post_url
))
}

async fn fetch_post_and_comments(args: FetchPostArgs) -> Result<()> {
let client = build_api_client(&args.auth, &args.url).await?;

let post_id = if let Some(id) = args.post_id {
id
} else {
let post_url = args
.url
.as_ref()
.ok_or_else(|| anyhow!("Either --post-id or --url must be provided"))?;
resolve_post_id(&client, post_url.as_str()).await?
};

let post = client
.posts()
.retrieve_with_view_context(
&post_id,
&PostRetrieveParams {
password: args.post_password.clone(),
},
)
.await?;

let mut all_comments = Vec::new();
let mut page = client
.comments()
.list_with_view_context(&CommentListParams {
post: vec![post_id],
per_page: Some(args.per_page),
..Default::default()
})
.await?;
all_comments.extend(page.data);
while let Some(next_params) = page.next_page_params.take() {
page = client
.comments()
.list_with_view_context(&next_params)
.await?;
all_comments.extend(page.data);
}

let out = serde_json::json!({
"post": post,
"comments": all_comments,
});
if args.pretty {
println!("{}", serde_json::to_string_pretty(&out)?);
} else {
println!("{}", serde_json::to_string(&out)?);
}
Ok(())
}