Skip to content

Commit 9fd4ea2

Browse files
committed
Add OKP module
This patch is adding an OKP module with functions to help processing OKP files. Signed-off-by: Lucas Alvares Gomes <lucasagomes@gmail.com>
1 parent fa72d05 commit 9fd4ea2

File tree

3 files changed

+267
-0
lines changed

3 files changed

+267
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ coverage.xml
5050
.hypothesis/
5151
.pytest_cache/
5252
cover/
53+
tests/test_results
5354

5455
# Translations
5556
*.mo

src/lightspeed_rag_content/okp.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/usr/bin/env python3
2+
3+
"""Utility methods processing OKP files."""
4+
5+
import re
6+
from pathlib import Path
7+
from typing import Any, Generator
8+
import tomllib
9+
10+
from lightspeed_rag_content.metadata_processor import MetadataProcessor
11+
12+
13+
def is_file_related_to_projects(metadata: dict[str, Any], projects: list[str]) -> bool:
14+
"""Check if the OKP file is related to specific projects."""
15+
product_names = metadata.get("extra", {}).get("portal_product_names", [])
16+
17+
# Lowercase both lists
18+
product_names = [p.lower() for p in product_names]
19+
projects = [p.lower() for p in projects]
20+
21+
# Check if any project is in the product names
22+
return any(p in pn for p in projects for pn in product_names)
23+
24+
25+
def metadata_has_url_and_title(metadata: dict[str, Any]) -> bool:
26+
"""Check if the metadata contains the URL and title."""
27+
return (
28+
"reference_url" in metadata.get("extra", {})
29+
and "title" in metadata
30+
and metadata["title"].strip() != ""
31+
)
32+
33+
34+
def yield_files_related_to_projects(
35+
directory: Path, projects: list[str]
36+
) -> Generator[Path, None, None]:
37+
"""Yield OKP files paths in a given directory for specific projects."""
38+
for filepath in directory.glob("*.md"):
39+
try:
40+
metadata = parse_metadata(filepath)
41+
if metadata_has_url_and_title(metadata) and is_file_related_to_projects(
42+
metadata, projects
43+
):
44+
yield filepath
45+
except ValueError as e:
46+
print(f"Skipping file {filepath}: {e}")
47+
48+
49+
def parse_metadata(filepath: Path) -> dict[str, Any]:
50+
"""Extract metadata from the OKP file."""
51+
with open(filepath, "rb") as f:
52+
content = f.read()
53+
54+
# Extract everything between the +++ markers
55+
match = re.search(rb"\+{3,}\s*(.*?)\s*\+{3,}", content, re.S)
56+
if not match:
57+
raise ValueError(f"No metadata found in {filepath}")
58+
59+
metadata_block = match.group(1)
60+
return tomllib.loads(metadata_block.decode("utf-8"))
61+
62+
63+
class OKPMetadataProcessor(MetadataProcessor):
64+
"""Metadata processor for OKP files."""
65+
66+
def url_function(self, file_path: str) -> Any:
67+
"""Return the URL for the OKP file."""
68+
md = parse_metadata(file_path)
69+
return md["extra"]["reference_url"]
70+
71+
def get_file_title(self, file_path: str) -> Any:
72+
"""Return the title of the OKP file."""
73+
md = parse_metadata(file_path)
74+
return md["title"]

tests/test_okp.py

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
# Copyright 2025 Red Hat, Inc.
2+
# All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
5+
# not use this file except in compliance with the License. You may obtain
6+
# a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13+
# License for the specific language governing permissions and limitations
14+
# under the License.
15+
16+
import unittest
17+
from unittest import mock
18+
19+
from lightspeed_rag_content import okp
20+
21+
22+
class TestOKP(unittest.TestCase):
23+
"""Test cases for OKP utility methods."""
24+
25+
def test_metadata_has_url_and_title(self):
26+
"""Test that the metadata has both URL and title."""
27+
md = {
28+
"title": "Example Title",
29+
"extra": {
30+
"reference_url": "https://fake.url/for/example",
31+
},
32+
}
33+
self.assertTrue(okp.metadata_has_url_and_title(md))
34+
35+
def test_metadata_has_url_and_title_false(self):
36+
"""Test that the metadata has both URL and title."""
37+
# No URL
38+
md = {
39+
"title": "Example Title",
40+
"extra": {},
41+
}
42+
self.assertFalse(okp.metadata_has_url_and_title(md))
43+
44+
# No title
45+
md = {
46+
"extra": {
47+
"reference_url": "https://fake.url/for/example",
48+
},
49+
}
50+
self.assertFalse(okp.metadata_has_url_and_title(md))
51+
52+
def test_is_file_related_to_projects(self):
53+
"""Test if the file is related to specific projects."""
54+
metadata = {
55+
"extra": {
56+
"portal_product_names": ["Project Foo", "Project Bar"],
57+
},
58+
}
59+
projects = ["foo", "bar"]
60+
self.assertTrue(okp.is_file_related_to_projects(metadata, projects))
61+
62+
projects = ["spongebob"]
63+
self.assertFalse(okp.is_file_related_to_projects(metadata, projects))
64+
65+
def test_parse_metadata(self):
66+
"""Test parsing metadata from a OKP file."""
67+
content = b"""
68+
+++
69+
title = '''Example Title'''
70+
path = "/errata/FAKE-1234"
71+
template = "erratum.html"
72+
[extra]
73+
document_kind="errata"
74+
original_title='''FAKE-1234 - Bugs in fake project'''
75+
solr_index="true"
76+
modified="2003-02-05T00:00:00Z"
77+
issued="2003-02-06T00:00:00Z"
78+
id="FAKE-1234"
79+
reference_url="https://fake.url/for/example"
80+
view_uri="/errata/FAKE-1234"
81+
portal_advisory_type="Bug Fix Advisory"
82+
portal_synopsis='''Bugs in fake project'''
83+
portal_severity="None"
84+
portal_product_names=["Product Foo","Product Bar"]
85+
portal_product_filter=["Product Foo|Product Bar|2|ia64","Product Foo|Product Bar|2|ia64"]
86+
+++
87+
"""
88+
89+
m = mock.mock_open(read_data=content)
90+
with mock.patch("builtins.open", m):
91+
metadata = okp.parse_metadata("fake_file.md")
92+
93+
# Check if the metadata is parsed correctly
94+
expected_metadata = {
95+
"title": "Example Title",
96+
"path": "/errata/FAKE-1234",
97+
"template": "erratum.html",
98+
"extra": {
99+
"document_kind": "errata",
100+
"original_title": "FAKE-1234 - Bugs in fake project",
101+
"solr_index": "true",
102+
"modified": "2003-02-05T00:00:00Z",
103+
"issued": "2003-02-06T00:00:00Z",
104+
"id": "FAKE-1234",
105+
"reference_url": "https://fake.url/for/example",
106+
"view_uri": "/errata/FAKE-1234",
107+
"portal_advisory_type": "Bug Fix Advisory",
108+
"portal_synopsis": "Bugs in fake project",
109+
"portal_severity": "None",
110+
"portal_product_names": ["Product Foo", "Product Bar"],
111+
"portal_product_filter": [
112+
"Product Foo|Product Bar|2|ia64",
113+
"Product Foo|Product Bar|2|ia64",
114+
],
115+
},
116+
}
117+
self.assertEqual(metadata, expected_metadata)
118+
119+
def test_yield_files_related_to_projects(self):
120+
"""Test yielding files related to specific projects."""
121+
directory = mock.MagicMock()
122+
directory.glob.return_value = [
123+
"file1.md",
124+
"file2.md",
125+
"file3.md", # Should be ignored, missing metadata
126+
]
127+
128+
okp.parse_metadata = mock.MagicMock(
129+
side_effect=[
130+
{
131+
"title": "File 1",
132+
"extra": {
133+
"reference_url": "https://example.com/file1",
134+
"portal_product_names": ["Project Foo"],
135+
},
136+
},
137+
{
138+
"title": "File 2",
139+
"extra": {
140+
"reference_url": "https://example.com/file2",
141+
"portal_product_names": ["Project Bar"],
142+
},
143+
},
144+
{
145+
"title": "File 3",
146+
"extra": {
147+
"portal_product_names": ["Project Baz"],
148+
},
149+
},
150+
]
151+
)
152+
153+
projects = ["foo", "bar"]
154+
files = list(okp.yield_files_related_to_projects(directory, projects))
155+
156+
# Check that the correct files are yielded
157+
self.assertEqual(len(files), 2)
158+
self.assertIn("file1.md", files)
159+
self.assertIn("file2.md", files)
160+
161+
# Check that parse_metadata was called with the correct file paths
162+
okp.parse_metadata.assert_any_call("file1.md")
163+
okp.parse_metadata.assert_any_call("file2.md")
164+
165+
166+
@mock.patch(
167+
"lightspeed_rag_content.okp.parse_metadata",
168+
return_value={
169+
"title": "Test Title",
170+
"extra": {
171+
"reference_url": "https://example.com",
172+
},
173+
},
174+
)
175+
class TestOKPMetadataProcessor(unittest.TestCase):
176+
"""Test cases for OKPMetadataProcessor class."""
177+
178+
def setUp(self):
179+
"""Set up the test case."""
180+
self.okp_mp = okp.OKPMetadataProcessor()
181+
182+
def test_url_function(self, mock_parse_metadata):
183+
"""Test the URL function of OKPMetadataProcessor."""
184+
file_path = "/fake/path/errata_file.md"
185+
expected_url = "https://example.com"
186+
self.assertEqual(self.okp_mp.url_function(file_path), expected_url)
187+
188+
def test_get_file_title(self, mock_parse_metadata):
189+
"""Test the get_file_title function of OKPMetadataProcessor."""
190+
file_path = "/fake/path/errata_file.md"
191+
expected_title = "Test Title"
192+
self.assertEqual(self.okp_mp.get_file_title(file_path), expected_title)

0 commit comments

Comments
 (0)