Skip to content

Commit aefcdc7

Browse files
committed
feat(py): add FeedParserDict field mapping for backward compatibility
Implement Phase 1 of Python feedparser compatibility improvements: - Add deprecated field aliases (description→subtitle, tagline→subtitle, modified→updated, copyright→rights, date→updated/published, url→link) - Add entry aliases (guid→id, description→summary, issued→published, modified→updated, date→updated/published) - Add container aliases (channel→feed, items→entries) - Use once_cell::Lazy<HashMap> for O(1) alias lookups - Add __getattr__ methods to PyFeedMeta, PyEntry, PyParsedFeed - Add comprehensive Python tests (19 test cases) This allows users migrating from Python feedparser to access data using familiar deprecated field names while the modern field names remain the primary API.
1 parent 6168185 commit aefcdc7

File tree

9 files changed

+661
-0
lines changed

9 files changed

+661
-0
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ memchr = "2.7"
2929
mockito = "1.6"
3030
napi = "3.7"
3131
napi-derive = "3.4"
32+
once_cell = "1.20"
3233
pyo3 = "0.27"
3334
quick-xml = "0.38"
3435
regex = "1.11"

crates/feedparser-rs-py/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ crate-type = ["cdylib"]
1818
feedparser-rs = { path = "../feedparser-rs-core" }
1919
pyo3 = { workspace = true, features = ["extension-module", "chrono"] }
2020
chrono = { workspace = true, features = ["clock"] }
21+
once_cell = { workspace = true }
2122

2223
[features]
2324
default = ["http"]
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/// Python feedparser backward compatibility field mappings.
2+
///
3+
/// This module provides field alias mappings for deprecated Python feedparser field names.
4+
/// Old field names map to new field names for backward compatibility.
5+
///
6+
/// Example: `feed.description` → `feed.subtitle`
7+
/// `entry.guid` → `entry.id`
8+
use std::collections::HashMap;
9+
use once_cell::sync::Lazy;
10+
11+
/// Feed-level field mappings: old name → list of new names (tried in order).
12+
///
13+
/// Some aliases can map to multiple fields (e.g., description → subtitle OR summary).
14+
/// The resolver tries each new field in order until it finds a non-None value.
15+
pub static FEED_FIELD_MAP: Lazy<HashMap<&'static str, Vec<&'static str>>> = Lazy::new(|| {
16+
let mut map = HashMap::new();
17+
18+
// Description aliases
19+
map.insert("description", vec!["subtitle", "summary"]);
20+
map.insert("description_detail", vec!["subtitle_detail", "summary_detail"]);
21+
22+
// Tagline aliases (old Atom 0.3 field)
23+
map.insert("tagline", vec!["subtitle"]);
24+
map.insert("tagline_detail", vec!["subtitle_detail"]);
25+
26+
// Info alias (RSS 1.0)
27+
map.insert("info", vec!["subtitle"]);
28+
map.insert("info_detail", vec!["subtitle_detail"]);
29+
30+
// Copyright alias
31+
map.insert("copyright", vec!["rights"]);
32+
map.insert("copyright_detail", vec!["rights_detail"]);
33+
34+
// Modified alias
35+
map.insert("modified", vec!["updated"]);
36+
map.insert("modified_parsed", vec!["updated_parsed"]);
37+
38+
// Date alias (generic fallback)
39+
map.insert("date", vec!["updated", "published"]);
40+
map.insert("date_parsed", vec!["updated_parsed", "published_parsed"]);
41+
42+
// URL alias
43+
map.insert("url", vec!["link"]);
44+
45+
map
46+
});
47+
48+
/// Entry-level field mappings: old name → list of new names (tried in order).
49+
pub static ENTRY_FIELD_MAP: Lazy<HashMap<&'static str, Vec<&'static str>>> = Lazy::new(|| {
50+
let mut map = HashMap::new();
51+
52+
// GUID alias (RSS)
53+
map.insert("guid", vec!["id"]);
54+
55+
// Description alias
56+
map.insert("description", vec!["summary"]);
57+
map.insert("description_detail", vec!["summary_detail"]);
58+
59+
// Issued alias (old feedparser field)
60+
map.insert("issued", vec!["published"]);
61+
map.insert("issued_parsed", vec!["published_parsed"]);
62+
63+
// Modified alias
64+
map.insert("modified", vec!["updated"]);
65+
map.insert("modified_parsed", vec!["updated_parsed"]);
66+
67+
// Date alias (generic fallback)
68+
map.insert("date", vec!["updated", "published"]);
69+
map.insert("date_parsed", vec!["updated_parsed", "published_parsed"]);
70+
71+
map
72+
});
73+
74+
/// Container-level field mappings for PyParsedFeed.
75+
pub static CONTAINER_FIELD_MAP: Lazy<HashMap<&'static str, &'static str>> = Lazy::new(|| {
76+
let mut map = HashMap::new();
77+
78+
// RSS uses <channel>, Atom uses <feed>
79+
map.insert("channel", "feed");
80+
81+
// RSS uses <item>, Atom uses <entry>
82+
map.insert("items", "entries");
83+
84+
map
85+
});
86+
87+
#[cfg(test)]
88+
mod tests {
89+
use super::*;
90+
91+
#[test]
92+
fn test_feed_field_map_description() {
93+
let targets = FEED_FIELD_MAP.get("description").unwrap();
94+
assert_eq!(targets, &vec!["subtitle", "summary"]);
95+
}
96+
97+
#[test]
98+
fn test_feed_field_map_modified() {
99+
let targets = FEED_FIELD_MAP.get("modified").unwrap();
100+
assert_eq!(targets, &vec!["updated"]);
101+
}
102+
103+
#[test]
104+
fn test_entry_field_map_guid() {
105+
let targets = ENTRY_FIELD_MAP.get("guid").unwrap();
106+
assert_eq!(targets, &vec!["id"]);
107+
}
108+
109+
#[test]
110+
fn test_entry_field_map_issued() {
111+
let targets = ENTRY_FIELD_MAP.get("issued").unwrap();
112+
assert_eq!(targets, &vec!["published"]);
113+
}
114+
115+
#[test]
116+
fn test_container_field_map_channel() {
117+
let target = CONTAINER_FIELD_MAP.get("channel").unwrap();
118+
assert_eq!(*target, "feed");
119+
}
120+
}

crates/feedparser-rs-py/src/types/entry.rs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use feedparser_rs::Entry as CoreEntry;
22
use pyo3::prelude::*;
3+
use pyo3::exceptions::PyAttributeError;
34

5+
use super::compat::ENTRY_FIELD_MAP;
46
use super::common::{PyContent, PyEnclosure, PyLink, PyPerson, PySource, PyTag, PyTextConstruct};
57
use super::datetime::optional_datetime_to_struct_time;
68
use super::geo::PyGeoLocation;
@@ -301,4 +303,58 @@ impl PyEntry {
301303
self.inner.id.as_deref().unwrap_or("no-id")
302304
)
303305
}
306+
307+
/// Provides backward compatibility for deprecated Python feedparser field names.
308+
///
309+
/// Maps old field names to their modern equivalents:
310+
/// - `guid` → `id`
311+
/// - `description` → `summary`
312+
/// - `issued` → `published`
313+
/// - `modified` → `updated`
314+
/// - `date` → `updated` (or `published` as fallback)
315+
///
316+
/// This method is called by Python when normal attribute lookup fails.
317+
fn __getattr__(&self, py: Python<'_>, name: &str) -> PyResult<Py<PyAny>> {
318+
// Check if this is a deprecated field name
319+
if let Some(new_names) = ENTRY_FIELD_MAP.get(name) {
320+
// Try each new field name in order
321+
for new_name in new_names {
322+
let value: Option<Py<PyAny>> = match *new_name {
323+
"id" => self.inner.id.as_deref().and_then(|v| {
324+
v.into_pyobject(py).map(|o| o.unbind().into()).ok()
325+
}),
326+
"summary" => self.inner.summary.as_deref().and_then(|v| {
327+
v.into_pyobject(py).map(|o| o.unbind().into()).ok()
328+
}),
329+
"summary_detail" => self.inner.summary_detail.as_ref().and_then(|tc| {
330+
Py::new(py, PyTextConstruct::from_core(tc.clone())).ok().map(|p: Py<PyTextConstruct>| p.into_any())
331+
}),
332+
"published" => self.inner.published.and_then(|dt| {
333+
dt.to_rfc3339().into_pyobject(py).map(|o| o.unbind().into()).ok()
334+
}),
335+
"published_parsed" => {
336+
optional_datetime_to_struct_time(py, &self.inner.published).ok().flatten()
337+
},
338+
"updated" => self.inner.updated.and_then(|dt| {
339+
dt.to_rfc3339().into_pyobject(py).map(|o| o.unbind().into()).ok()
340+
}),
341+
"updated_parsed" => {
342+
optional_datetime_to_struct_time(py, &self.inner.updated).ok().flatten()
343+
},
344+
_ => None,
345+
};
346+
347+
// If we found a value, return it
348+
if let Some(v) = value {
349+
return Ok(v);
350+
}
351+
}
352+
}
353+
354+
// Field not found - raise AttributeError
355+
Err(PyAttributeError::new_err(format!(
356+
"'Entry' object has no attribute '{}'",
357+
name
358+
)))
359+
}
304360
}

crates/feedparser-rs-py/src/types/feed_meta.rs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use feedparser_rs::FeedMeta as CoreFeedMeta;
22
use pyo3::prelude::*;
3+
use pyo3::exceptions::PyAttributeError;
34

5+
use super::compat::FEED_FIELD_MAP;
46
use super::common::{PyGenerator, PyImage, PyLink, PyPerson, PyTag, PyTextConstruct};
57
use super::datetime::optional_datetime_to_struct_time;
68
use super::geo::PyGeoLocation;
@@ -252,4 +254,71 @@ impl PyFeedMeta {
252254
self.inner.link.as_deref().unwrap_or("no-link")
253255
)
254256
}
257+
258+
/// Provides backward compatibility for deprecated Python feedparser field names.
259+
///
260+
/// Maps old field names to their modern equivalents:
261+
/// - `description` → `subtitle` (or `summary` as fallback)
262+
/// - `tagline` → `subtitle`
263+
/// - `modified` → `updated`
264+
/// - `copyright` → `rights`
265+
/// - `date` → `updated` (or `published` as fallback)
266+
/// - `url` → `link`
267+
///
268+
/// This method is called by Python when normal attribute lookup fails.
269+
fn __getattr__(&self, py: Python<'_>, name: &str) -> PyResult<Py<PyAny>> {
270+
// Check if this is a deprecated field name
271+
if let Some(new_names) = FEED_FIELD_MAP.get(name) {
272+
// Try each new field name in order
273+
for new_name in new_names {
274+
let value: Option<Py<PyAny>> = match *new_name {
275+
"subtitle" => self.inner.subtitle.as_deref().and_then(|v| {
276+
v.into_pyobject(py).map(|o| o.unbind().into()).ok()
277+
}),
278+
"subtitle_detail" => self.inner.subtitle_detail.as_ref().and_then(|tc| {
279+
Py::new(py, PyTextConstruct::from_core(tc.clone())).ok().map(|p: Py<PyTextConstruct>| p.into_any())
280+
}),
281+
"summary" => self.inner.subtitle.as_deref().and_then(|v| {
282+
v.into_pyobject(py).map(|o| o.unbind().into()).ok()
283+
}),
284+
"summary_detail" => self.inner.subtitle_detail.as_ref().and_then(|tc| {
285+
Py::new(py, PyTextConstruct::from_core(tc.clone())).ok().map(|p: Py<PyTextConstruct>| p.into_any())
286+
}),
287+
"rights" => self.inner.rights.as_deref().and_then(|v| {
288+
v.into_pyobject(py).map(|o| o.unbind().into()).ok()
289+
}),
290+
"rights_detail" => self.inner.rights_detail.as_ref().and_then(|tc| {
291+
Py::new(py, PyTextConstruct::from_core(tc.clone())).ok().map(|p: Py<PyTextConstruct>| p.into_any())
292+
}),
293+
"updated" => self.inner.updated.and_then(|dt| {
294+
dt.to_rfc3339().into_pyobject(py).map(|o| o.unbind().into()).ok()
295+
}),
296+
"updated_parsed" => {
297+
optional_datetime_to_struct_time(py, &self.inner.updated).ok().flatten()
298+
},
299+
"published" => self.inner.published.and_then(|dt| {
300+
dt.to_rfc3339().into_pyobject(py).map(|o| o.unbind().into()).ok()
301+
}),
302+
"published_parsed" => {
303+
optional_datetime_to_struct_time(py, &self.inner.published).ok().flatten()
304+
},
305+
"link" => self.inner.link.as_deref().and_then(|v| {
306+
v.into_pyobject(py).map(|o| o.unbind().into()).ok()
307+
}),
308+
_ => None,
309+
};
310+
311+
// If we found a value, return it
312+
if let Some(v) = value {
313+
return Ok(v);
314+
}
315+
}
316+
}
317+
318+
// Field not found - raise AttributeError
319+
Err(PyAttributeError::new_err(format!(
320+
"'FeedMeta' object has no attribute '{}'",
321+
name
322+
)))
323+
}
255324
}

crates/feedparser-rs-py/src/types/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pub mod common;
2+
pub mod compat;
23
pub mod datetime;
34
pub mod entry;
45
pub mod feed_meta;

crates/feedparser-rs-py/src/types/parsed_feed.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
use feedparser_rs::ParsedFeed as CoreParsedFeed;
22
use pyo3::prelude::*;
3+
use pyo3::exceptions::PyAttributeError;
34
use pyo3::types::PyDict;
45

6+
use super::compat::CONTAINER_FIELD_MAP;
57
use super::entry::PyEntry;
68
use super::feed_meta::PyFeedMeta;
79

@@ -141,4 +143,41 @@ impl PyParsedFeed {
141143
fn __str__(&self) -> String {
142144
self.__repr__()
143145
}
146+
147+
/// Provides backward compatibility for deprecated Python feedparser container names.
148+
///
149+
/// Maps old container names to their modern equivalents:
150+
/// - `channel` → `feed` (RSS uses <channel>, Atom uses <feed>)
151+
/// - `items` → `entries` (RSS uses <item>, Atom uses <entry>)
152+
///
153+
/// This method is called by Python when normal attribute lookup fails.
154+
fn __getattr__(&self, py: Python<'_>, name: &str) -> PyResult<Py<PyAny>> {
155+
// Check if this is a deprecated container name
156+
if let Some(new_name) = CONTAINER_FIELD_MAP.get(name) {
157+
match *new_name {
158+
"feed" => {
159+
// Convert Py<PyFeedMeta> to Py<PyAny>
160+
Ok(self.feed.clone_ref(py).into())
161+
},
162+
"entries" => {
163+
// Convert Vec<Py<PyEntry>> to Py<PyAny> (as Python list)
164+
let entries: Vec<_> = self.entries.iter().map(|e| e.clone_ref(py)).collect();
165+
match entries.into_pyobject(py) {
166+
Ok(list) => Ok(list.unbind()),
167+
Err(e) => Err(e),
168+
}
169+
},
170+
_ => Err(PyAttributeError::new_err(format!(
171+
"'FeedParserDict' object has no attribute '{}'",
172+
name
173+
))),
174+
}
175+
} else {
176+
// Field not found - raise AttributeError
177+
Err(PyAttributeError::new_err(format!(
178+
"'FeedParserDict' object has no attribute '{}'",
179+
name
180+
)))
181+
}
182+
}
144183
}

0 commit comments

Comments
 (0)