diff --git a/Cargo.lock b/Cargo.lock index e45328ecd..8ecf868ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4799,6 +4799,7 @@ name = "wordpress-rs" version = "0.1.0" dependencies = [ "anyhow", + "async-trait", "clap", "colored", "csv", @@ -4807,6 +4808,7 @@ dependencies = [ "serde_json", "tokio", "tokio-stream", + "url", "wp_api", ] diff --git a/wp_rs_cli/Cargo.toml b/wp_rs_cli/Cargo.toml index b32f615b6..e4fcc3a9d 100644 --- a/wp_rs_cli/Cargo.toml +++ b/wp_rs_cli/Cargo.toml @@ -15,6 +15,8 @@ serde_json = { workspace = true } tokio = { workspace = true, features = ["full"] } tokio-stream = { workspace = true } wp_api = { path = "../wp_api", features = [ "reqwest-request-executor" ] } +async-trait = { workspace = true } +url = { workspace = true } [[bin]] name = "wp_rs_cli" diff --git a/wp_rs_cli/README.md b/wp_rs_cli/README.md index 920cd880a..9e365df6b 100644 --- a/wp_rs_cli/README.md +++ b/wp_rs_cli/README.md @@ -32,6 +32,39 @@ wp_rs_cli --help ## Commands - `discover-login-url`: Tries connecting to the given URL, and prints the library's relevant error message if unable to. +- `fetch-post`: Fetch a post and its comments, supporting WordPress.com (Bearer token) and WordPress.org/Jetpack (Application Password) sites. + +### fetch-post examples + +```bash +# WordPress.com (Bearer) by post URL (auto derive site) +wp_rs_cli fetch-post \ + --url https://example.wordpress.com/2024/07/01/my-post \ + --bearer "$WP_BEARER_TOKEN" \ + --pretty + +# WordPress.com (Bearer) by explicit site and post id +wp_rs_cli fetch-post \ + --wpcom-site example.wordpress.com \ + --post-id 123 \ + --bearer "$WP_BEARER_TOKEN" \ + --pretty + +# WordPress.org/Jetpack (Application Password) by post URL (auto-discover /wp-json) +wp_rs_cli fetch-post \ + --url https://yoursite.com/blog/2024/07/01/my-post \ + --username "$WP_USERNAME" \ + --password "$WP_APP_PASSWORD" \ + --pretty + +# WordPress.org/Jetpack (Application Password) by explicit API root and post id +wp_rs_cli fetch-post \ + --api-root https://yoursite.com/wp-json \ + --post-id 123 \ + --username "$WP_USERNAME" \ + --password "$WP_APP_PASSWORD" \ + --pretty +``` ## License diff --git a/wp_rs_cli/src/bin/wp_rs_cli/main.rs b/wp_rs_cli/src/bin/wp_rs_cli/main.rs index c6e09cd29..a9dc1ce25 100644 --- a/wp_rs_cli/src/bin/wp_rs_cli/main.rs +++ b/wp_rs_cli/src/bin/wp_rs_cli/main.rs @@ -1,9 +1,17 @@ -use anyhow::Result; -use clap::{Parser, Subcommand}; +use anyhow::{Result, anyhow}; +use clap::{ArgGroup, Args, Parser, Subcommand}; use colored::Colorize; use csv::Writer; use futures::stream::StreamExt; use std::{fmt::Display, fs::File, sync::Arc, time::Duration}; +use url::Url; +use wp_api::{ + comments::CommentListParams, + parsed_url::ParsedUrl, + posts::{PostId, PostRetrieveParams}, + request::endpoint::WpOrgSiteApiUrlResolver, + wp_com::{WpComBaseUrl, endpoint::WpComDotOrgApiUrlResolver}, +}; use wp_api::{ login::url_discovery::{ AutoDiscoveryAttemptFailure, FetchAndParseApiRootFailure, FindApiRootFailure, @@ -29,6 +37,67 @@ enum Commands { input_file: String, output_file: String, }, + /// Fetch a single post and its comments + FetchPost(FetchPostArgs), +} + +#[derive(Debug, Args, Clone)] +struct AuthArgs { + /// WordPress.com site (e.g. example.wordpress.com or numeric ID) + #[arg(long)] + wpcom_site: Option, + + /// WordPress.org/Jetpack API root (must end with /wp-json) + #[arg(long)] + api_root: Option, + + /// Bearer token for WordPress.com (fallback env: WP_BEARER_TOKEN) + #[arg(long)] + bearer: Option, + + /// Application Password username for wp.org/Jetpack (fallback env: WP_USERNAME) + #[arg(long)] + username: Option, + + /// Application Password for wp.org/Jetpack (fallback env: WP_APP_PASSWORD) + #[arg(long)] + password: Option, +} + +#[derive(Debug, Parser)] +#[command(group( + ArgGroup::new("site_type") + .args(["wpcom_site", "api_root", "url"]), +), group( + ArgGroup::new("post_ref") + .required(true) + .args(["post_id", "url"]), +))] +struct FetchPostArgs { + /// Common authentication parameters + #[command(flatten)] + auth: AuthArgs, + + /// Full post URL (alternative to --post-id) + /// When provided, this URL is used to infer the site (wp.com) or autodiscover the API root (wp.org/Jetpack). + #[arg(long)] + url: Option, + + /// The post ID to fetch + #[arg(long, value_parser = parse_post_id)] + post_id: Option, + + /// Password for the post if it is password-protected + #[arg(long)] + post_password: Option, + + /// Max items per page when fetching comments + #[arg(long, default_value_t = 100)] + per_page: u32, + + /// Output pretty-printed JSON + #[arg(long, default_value_t = false)] + pretty: bool, } #[tokio::main] @@ -46,6 +115,9 @@ async fn main() -> Result<()> { } => { batch_test_autodiscovery(&login_client, input_file.as_str(), output_file).await?; } + Commands::FetchPost(args) => { + fetch_post_and_comments(args).await?; + } } Ok(()) @@ -221,3 +293,232 @@ fn csv_error_type(failure: &AutoDiscoveryAttemptFailure) -> String { }, } } + +#[derive(Debug)] +enum SiteApiType { + WpCom { site: String }, + WpOrg { api_root: Arc }, +} + +impl SiteApiType { + async fn detect_from_args( + args: &AuthArgs, + url: &Option, + request_executor: &Arc, + ) -> Result { + if let Some(site) = &args.wpcom_site { + // Explicit WordPress.com site takes priority + return Ok(SiteApiType::WpCom { site: site.clone() }); + } + if let Some(api_root) = &args.api_root { + // Explicit api_root takes priority for wp.org/Jetpack + let parsed = ParsedUrl::try_from(api_root.as_str()).map_err(|_| { + anyhow!("Invalid api_root URL: must be a valid URL ending with /wp-json") + })?; + return Ok(SiteApiType::WpOrg { + api_root: Arc::new(parsed), + }); + } + if let Some(url) = url { + // Derive from URL if possible + if let Ok(u) = Url::parse(url.as_str()) { + let host = u.host_str().unwrap_or(""); + if host.ends_with(".wordpress.com") { + return Ok(SiteApiType::WpCom { + site: host.to_string(), + }); + } + + // Attempt autodiscovery of API root from URL + let login_client = + WpLoginClient::new_with_default_middleware_pipeline(request_executor.clone()); + match login_client + .api_discovery(url.clone()) + .await + .combined_result() + .cloned() + { + Ok(success) => Ok(SiteApiType::WpOrg { + api_root: success.api_root_url, + }), + Err(_) => Err(anyhow!( + "Could not autodiscover API root from URL. Please provide --api-root explicitly." + )), + } + } else { + Err(anyhow!("Invalid URL; could not parse")) + } + } else { + Err(anyhow!( + "Provide either --wpcom-site, or --api-root, or a wordpress.com URL" + )) + } + } + + fn api_url_resolver(&self) -> Arc { + match self { + SiteApiType::WpCom { site } => Arc::new(WpComDotOrgApiUrlResolver::new( + site.clone(), + WpComBaseUrl::Production, + )), + SiteApiType::WpOrg { api_root } => { + Arc::new(WpOrgSiteApiUrlResolver::new(api_root.clone())) + } + } + } + + fn auth_provider(&self, args: &AuthArgs) -> Result> { + match self { + SiteApiType::WpCom { .. } => { + let token = env_or_arg(&args.bearer, "WP_BEARER_TOKEN").ok_or_else(|| { + anyhow!("Missing bearer token. Provide --bearer or set WP_BEARER_TOKEN") + })?; + Ok(Arc::new(WpAuthenticationProvider::static_with_auth( + WpAuthentication::Bearer { token }, + ))) + } + SiteApiType::WpOrg { .. } => { + let username = env_or_arg(&args.username, "WP_USERNAME").ok_or_else(|| { + anyhow!("Missing username. Provide --username or set WP_USERNAME") + })?; + let password = env_or_arg(&args.password, "WP_APP_PASSWORD").ok_or_else(|| { + anyhow!( + "Missing application password. Provide --password or set WP_APP_PASSWORD" + ) + })?; + Ok(Arc::new( + WpAuthenticationProvider::static_with_username_and_password(username, password), + )) + } + } + } +} + +fn env_or_arg(value: &Option, var: &str) -> Option { + value.clone().or_else(|| std::env::var(var).ok()) +} + +async fn build_api_client(args: &AuthArgs, url: &Option) -> Result { + let request_executor = Arc::new(ReqwestRequestExecutor::new(false, Duration::from_secs(60))); + let middleware_pipeline = Arc::new(WpApiMiddlewarePipeline::default()); + // Determine resolver and auth provider + let api_type = SiteApiType::detect_from_args(args, url, &request_executor).await?; + let resolver: Arc = api_type.api_url_resolver(); + let auth_provider: Arc = api_type.auth_provider(args)?; + + #[derive(Debug)] + struct CliAppNotifier; + #[async_trait::async_trait] + impl WpAppNotifier for CliAppNotifier { + async fn requested_with_invalid_authentication(&self) { + eprintln!( + "Authentication failed. Please verify your credentials or token and try again." + ); + std::process::exit(1); + } + } + + Ok(WpApiClient::new( + resolver, + WpApiClientDelegate { + auth_provider, + request_executor, + middleware_pipeline, + app_notifier: Arc::new(CliAppNotifier), + }, + )) +} + +fn parse_post_id(s: &str) -> Result { + s.parse::() + .map(PostId) + .map_err(|e| format!("Invalid post id '{s}': {e}")) +} + +async fn resolve_post_id(client: &WpApiClient, post_url: &str) -> Result { + // Strategy: retrieve by slug via posts list API when possible. + // For wp.com, the resolver requires site context; for wp.org, api_root is given. + // We'll try to parse the URL and extract a last path segment as potential slug. + let url = Url::parse(post_url).map_err(|e| anyhow!("Invalid url: {e}"))?; + let slug_candidate = url + .path_segments() + .and_then(|segs| segs.rev().find(|s| !s.is_empty())) + .map(|s| s.trim_end_matches('/')) + .unwrap_or("") + .to_string(); + + if slug_candidate.is_empty() { + return Err(anyhow!("Could not parse a slug from url")); + } + + // Query posts by slug; returns an array, take first match. + // Using view context to ensure public content shape. + let params = wp_api::posts::PostListParams { + slug: vec![slug_candidate.clone()], + per_page: Some(1), + ..Default::default() + }; + let resp = client.posts().list_with_view_context(¶ms).await?; + if let Some(p) = resp.data.into_iter().map(|sp| sp.id).next() { + return Ok(p); + } + + Err(anyhow!( + "No post found for slug '{slug}' parsed from URL '{url}'", + slug = slug_candidate, + url = post_url + )) +} + +async fn fetch_post_and_comments(args: FetchPostArgs) -> Result<()> { + let client = build_api_client(&args.auth, &args.url).await?; + + let post_id = if let Some(id) = args.post_id { + id + } else { + let post_url = args + .url + .as_ref() + .ok_or_else(|| anyhow!("Either --post-id or --url must be provided"))?; + resolve_post_id(&client, post_url.as_str()).await? + }; + + let post = client + .posts() + .retrieve_with_view_context( + &post_id, + &PostRetrieveParams { + password: args.post_password.clone(), + }, + ) + .await?; + + let mut all_comments = Vec::new(); + let mut page = client + .comments() + .list_with_view_context(&CommentListParams { + post: vec![post_id], + per_page: Some(args.per_page), + ..Default::default() + }) + .await?; + all_comments.extend(page.data); + while let Some(next_params) = page.next_page_params.take() { + page = client + .comments() + .list_with_view_context(&next_params) + .await?; + all_comments.extend(page.data); + } + + let out = serde_json::json!({ + "post": post, + "comments": all_comments, + }); + if args.pretty { + println!("{}", serde_json::to_string_pretty(&out)?); + } else { + println!("{}", serde_json::to_string(&out)?); + } + Ok(()) +}