1+ /*
2+ * SkyTube
3+ * Copyright (C) 2026 Zsombor Gegesy
4+ *
5+ * This program is free software: you can redistribute it and/or modify
6+ * it under the terms of the GNU General Public License as published by
7+ * the Free Software Foundation (version 3 of the License).
8+ *
9+ * This program is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+ * GNU General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU General Public License
15+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
16+ */
17+ package free .rm .skytube .businessobjects .opml ;
18+
19+ import org .xmlpull .v1 .XmlPullParser ;
20+ import org .xmlpull .v1 .XmlPullParserException ;
21+ import org .xmlpull .v1 .XmlPullParserFactory ;
22+
23+ import java .io .ByteArrayInputStream ;
24+ import java .io .IOException ;
25+ import java .io .InputStream ;
26+ import java .util .ArrayList ;
27+ import java .util .List ;
28+ import java .util .regex .Matcher ;
29+ import java .util .regex .Pattern ;
30+
31+ import javax .annotation .Nullable ;
32+
33+ /**
34+ * OPML parser for importing YouTube subscriptions from OPML files.
35+ * Provides static methods for parsing OPML content without requiring Android context.
36+ */
37+ public class OpmlParser {
38+
39+ // Patterns for extracting YouTube channel IDs from various URL formats
40+ private static final Pattern YOUTUBE_CHANNEL_PATTERN = Pattern .compile (".*youtube\\ .com/(?:user/|channel/|c/)?([^&]+)" );
41+ private static final Pattern CHANNEL_ID_PATTERN = Pattern .compile (".*channel_id=([^&]+)" );
42+
43+ /**
44+ * Represents a parsed YouTube channel from OPML
45+ */
46+ public static class ParsedChannel {
47+ private final String channelId ;
48+ private final String title ;
49+ private final String sourceUrl ;
50+
51+ public ParsedChannel (String channelId , String title , String sourceUrl ) {
52+ this .channelId = channelId ;
53+ this .title = title ;
54+ this .sourceUrl = sourceUrl ;
55+ }
56+
57+ public String getChannelId () {
58+ return channelId ;
59+ }
60+
61+ public String getTitle () {
62+ return title ;
63+ }
64+
65+ public String getSourceUrl () {
66+ return sourceUrl ;
67+ }
68+
69+ @ Override
70+ public String toString () {
71+ return title + " (" + channelId + ") from " + sourceUrl ;
72+ }
73+
74+ @ Override
75+ public boolean equals (Object o ) {
76+ if (this == o ) return true ;
77+ if (o == null || getClass () != o .getClass ()) return false ;
78+ ParsedChannel that = (ParsedChannel ) o ;
79+ return channelId .equals (that .channelId ) &&
80+ title .equals (that .title ) &&
81+ sourceUrl .equals (that .sourceUrl );
82+ }
83+
84+ @ Override
85+ public int hashCode () {
86+ return channelId .hashCode () + title .hashCode () + sourceUrl .hashCode ();
87+ }
88+ }
89+
90+ /**
91+ * Parses OPML content from an InputStream and returns a list of parsed channels.
92+ *
93+ * @param inputStream InputStream containing OPML data
94+ * @return List of parsed channels
95+ * @throws IOException If there's an error reading the input
96+ * @throws XmlPullParserException If there's an error parsing the XML
97+ */
98+ public static List <ParsedChannel > parseOpml (InputStream inputStream ) throws IOException , XmlPullParserException {
99+ List <ParsedChannel > channels = new ArrayList <>();
100+
101+
102+ try {
103+ XmlPullParserFactory factory = XmlPullParserFactory .newInstance ();
104+ XmlPullParser parser = factory .newPullParser ();
105+ parser .setInput (inputStream , null );
106+
107+ int event = parser .getEventType ();
108+ while (event != XmlPullParser .END_DOCUMENT ) {
109+ if (event == XmlPullParser .START_TAG && "outline" .equals (parser .getName ())) {
110+ ParsedChannel parsedChannel = parseOutlineTag (parser );
111+ if (parsedChannel != null ) {
112+ channels .add (parsedChannel );
113+ }
114+ }
115+ event = parser .next ();
116+ }
117+ } finally {
118+ if (inputStream != null ) {
119+ try {
120+ inputStream .close ();
121+ } catch (IOException e ) {
122+ // Ignore close exceptions
123+ }
124+ }
125+ }
126+
127+ return channels ;
128+ }
129+
130+ /**
131+ * Parses an outline tag and extracts channel information if it's a YouTube channel.
132+ *
133+ * @param parser the XML pull parser positioned at a start tag
134+ * @return a parsed channel or null
135+ */
136+ private static @ Nullable ParsedChannel parseOutlineTag (XmlPullParser parser ) {
137+ String xmlUrl = parser .getAttributeValue (null , "xmlUrl" );
138+ String htmlUrl = parser .getAttributeValue (null , "htmlUrl" );
139+ String title = parser .getAttributeValue (null , "text" ); // Use "text" attribute, not "title"
140+ String type = parser .getAttributeValue (null , "type" );
141+
142+ // Skip if this is not a YouTube-related outline (e.g., folders)
143+ if (type != null && !"rss" .equals (type )) {
144+ return null ;
145+ }
146+
147+ String channelId = null ;
148+ String sourceUrl = null ;
149+
150+ // Try to extract from xmlUrl first
151+ if (xmlUrl != null ) {
152+ channelId = tryExtractChannelId (xmlUrl , CHANNEL_ID_PATTERN , YOUTUBE_CHANNEL_PATTERN );
153+ if (channelId != null ) {
154+ sourceUrl = xmlUrl ;
155+ }
156+ }
157+
158+ // Fallback to htmlUrl if xmlUrl didn't yield a channel ID
159+ if (channelId == null && htmlUrl != null ) {
160+ channelId = tryExtractChannelId (htmlUrl , YOUTUBE_CHANNEL_PATTERN , null );
161+ if (channelId != null ) {
162+ sourceUrl = htmlUrl ;
163+ }
164+ }
165+
166+ // Add channel if we found an ID (title can be empty but not null)
167+ if (channelId != null ) {
168+ // Use empty string if title is null
169+ if (title == null ) {
170+ title = "" ;
171+ }
172+ return new ParsedChannel (channelId , title , sourceUrl );
173+ }
174+ return null ;
175+ }
176+
177+ /**
178+ * Attempts to extract a channel ID from a URL using the provided patterns.
179+ */
180+ private static String tryExtractChannelId (String url , Pattern primaryPattern , Pattern fallbackPattern ) {
181+ Matcher matcher = primaryPattern .matcher (url );
182+ if (matcher .find ()) {
183+ String channelId = matcher .group (1 );
184+ // Clean query parameters from channel ID
185+ int questionMark = channelId .indexOf ('?' );
186+ if (questionMark != -1 ) {
187+ channelId = channelId .substring (0 , questionMark );
188+ }
189+ return channelId ;
190+ }
191+
192+ // Try fallback pattern if provided
193+ if (fallbackPattern != null ) {
194+ matcher = fallbackPattern .matcher (url );
195+ if (matcher .find ()) {
196+ String channelId = matcher .group (1 );
197+ // Clean query parameters from channel ID
198+ int questionMark = channelId .indexOf ('?' );
199+ if (questionMark != -1 ) {
200+ channelId = channelId .substring (0 , questionMark );
201+ }
202+ return channelId ;
203+ }
204+ }
205+
206+ return null ;
207+ }
208+
209+ /**
210+ * Convenience method that parses OPML from a string.
211+ *
212+ * @param opmlString String containing OPML data
213+ * @return List of parsed channels
214+ * @throws IOException If there's an error reading the input
215+ * @throws XmlPullParserException If there's an error parsing the XML
216+ */
217+ public static List <ParsedChannel > parseOpml (String opmlString ) throws IOException , XmlPullParserException {
218+ // Using "UTF-8" is intentional, so we can keep the same code in SkytubeLegacy too.
219+ try (InputStream inputStream = new ByteArrayInputStream (opmlString .getBytes ("UTF-8" ))) {
220+ return parseOpml (inputStream );
221+ }
222+ }
223+ }
0 commit comments