@@ -40,39 +40,136 @@ fn _feedparser_rs(m: &Bound<'_, PyModule>) -> PyResult<()> {
4040 Ok ( ( ) )
4141}
4242
43- /// Parse an RSS/Atom/JSON Feed from bytes or string
43+ /// Parse an RSS/Atom/JSON Feed from bytes, string, or URL
44+ ///
45+ /// Automatically detects whether `source` is a URL (http://, https://) or content.
46+ /// For URLs, fetches and parses the feed. For content, parses directly.
47+ ///
48+ /// # Arguments
49+ ///
50+ /// * `source` - URL string, feed content string, or bytes
51+ /// * `etag` - Optional ETag from previous fetch (for URLs with conditional GET)
52+ /// * `modified` - Optional Last-Modified timestamp (for URLs with conditional GET)
53+ /// * `user_agent` - Optional custom User-Agent header (for URLs)
54+ ///
55+ /// # Examples
56+ ///
57+ /// ```python
58+ /// import feedparser_rs
59+ ///
60+ /// # Parse from URL (auto-detected)
61+ /// feed = feedparser_rs.parse("https://example.com/feed.xml")
62+ ///
63+ /// # Parse from content
64+ /// feed = feedparser_rs.parse("<rss>...</rss>")
65+ ///
66+ /// # Parse from URL with caching
67+ /// feed = feedparser_rs.parse(
68+ /// "https://example.com/feed.xml",
69+ /// etag=cached_etag,
70+ /// modified=cached_modified
71+ /// )
72+ /// ```
4473#[ pyfunction]
45- #[ pyo3( signature = ( source, /) ) ]
46- fn parse ( py : Python < ' _ > , source : & Bound < ' _ , PyAny > ) -> PyResult < PyParsedFeed > {
47- parse_with_limits ( py, source, None )
74+ #[ pyo3( signature = ( source, /, etag=None , modified=None , user_agent=None ) ) ]
75+ fn parse (
76+ py : Python < ' _ > ,
77+ source : & Bound < ' _ , PyAny > ,
78+ etag : Option < & str > ,
79+ modified : Option < & str > ,
80+ user_agent : Option < & str > ,
81+ ) -> PyResult < PyParsedFeed > {
82+ parse_internal ( py, source, etag, modified, user_agent, None )
4883}
4984
5085/// Parse with custom resource limits for DoS protection
86+ ///
87+ /// Like `parse()` but allows specifying custom limits for untrusted feeds.
88+ ///
89+ /// # Arguments
90+ ///
91+ /// * `source` - URL string, feed content string, or bytes
92+ /// * `etag` - Optional ETag from previous fetch (for URLs)
93+ /// * `modified` - Optional Last-Modified timestamp (for URLs)
94+ /// * `user_agent` - Optional custom User-Agent header (for URLs)
95+ /// * `limits` - Optional parser limits for DoS protection
96+ ///
97+ /// # Examples
98+ ///
99+ /// ```python
100+ /// import feedparser_rs
101+ ///
102+ /// limits = feedparser_rs.ParserLimits.strict()
103+ ///
104+ /// # Parse from URL with limits
105+ /// feed = feedparser_rs.parse_with_limits(
106+ /// "https://example.com/feed.xml",
107+ /// limits=limits
108+ /// )
109+ ///
110+ /// # Parse from content with limits
111+ /// feed = feedparser_rs.parse_with_limits("<rss>...</rss>", limits=limits)
112+ /// ```
51113#[ pyfunction]
52- #[ pyo3( signature = ( source, limits=None ) ) ]
114+ #[ pyo3( signature = ( source, / , etag= None , modified= None , user_agent= None , limits=None ) ) ]
53115fn parse_with_limits (
54116 py : Python < ' _ > ,
55117 source : & Bound < ' _ , PyAny > ,
118+ etag : Option < & str > ,
119+ modified : Option < & str > ,
120+ user_agent : Option < & str > ,
56121 limits : Option < & PyParserLimits > ,
57122) -> PyResult < PyParsedFeed > {
58- let bytes: Vec < u8 > = if let Ok ( s) = source. extract :: < String > ( ) {
123+ parse_internal ( py, source, etag, modified, user_agent, limits)
124+ }
125+
126+ /// Internal parse function that handles both URL and content sources
127+ fn parse_internal (
128+ py : Python < ' _ > ,
129+ source : & Bound < ' _ , PyAny > ,
130+ etag : Option < & str > ,
131+ modified : Option < & str > ,
132+ user_agent : Option < & str > ,
133+ limits : Option < & PyParserLimits > ,
134+ ) -> PyResult < PyParsedFeed > {
135+ // Try to extract as string first
136+ if let Ok ( s) = source. extract :: < String > ( ) {
137+ // Check if it's a URL
59138 if s. starts_with ( "http://" ) || s. starts_with ( "https://" ) {
60- return Err ( pyo3:: exceptions:: PyNotImplementedError :: new_err (
61- "URL fetching not implemented. Use requests.get(url).content" ,
62- ) ) ;
139+ // Handle URL - requires http feature
140+ #[ cfg( feature = "http" ) ]
141+ {
142+ let parser_limits = limits. map ( |l| l. to_core_limits ( ) ) . unwrap_or_default ( ) ;
143+ let parsed =
144+ core:: parse_url_with_limits ( & s, etag, modified, user_agent, parser_limits)
145+ . map_err ( convert_feed_error) ?;
146+ return PyParsedFeed :: from_core ( py, parsed) ;
147+ }
148+ #[ cfg( not( feature = "http" ) ) ]
149+ {
150+ return Err ( pyo3:: exceptions:: PyNotImplementedError :: new_err (
151+ "URL fetching requires the 'http' feature. Build with: maturin develop --features http" ,
152+ ) ) ;
153+ }
63154 }
64- s. into_bytes ( )
65- } else if let Ok ( b) = source. extract :: < Vec < u8 > > ( ) {
66- b
67- } else {
68- return Err ( pyo3:: exceptions:: PyTypeError :: new_err (
69- "source must be str or bytes" ,
70- ) ) ;
71- } ;
72155
73- let parser_limits = limits. map ( |l| l. to_core_limits ( ) ) . unwrap_or_default ( ) ;
74- let parsed = core:: parse_with_limits ( & bytes, parser_limits) . map_err ( convert_feed_error) ?;
75- PyParsedFeed :: from_core ( py, parsed)
156+ // Parse as content
157+ let parser_limits = limits. map ( |l| l. to_core_limits ( ) ) . unwrap_or_default ( ) ;
158+ let parsed =
159+ core:: parse_with_limits ( s. as_bytes ( ) , parser_limits) . map_err ( convert_feed_error) ?;
160+ return PyParsedFeed :: from_core ( py, parsed) ;
161+ }
162+
163+ // Try to extract as bytes
164+ if let Ok ( b) = source. extract :: < Vec < u8 > > ( ) {
165+ let parser_limits = limits. map ( |l| l. to_core_limits ( ) ) . unwrap_or_default ( ) ;
166+ let parsed = core:: parse_with_limits ( & b, parser_limits) . map_err ( convert_feed_error) ?;
167+ return PyParsedFeed :: from_core ( py, parsed) ;
168+ }
169+
170+ Err ( pyo3:: exceptions:: PyTypeError :: new_err (
171+ "source must be str, bytes, or URL" ,
172+ ) )
76173}
77174
78175/// Detect feed format without full parsing
0 commit comments