Skip to content

Commit da96fe1

Browse files
committed
🔧 Fix video previews and add Motherless parser 🔧
Video previews: - Remove hardcoded type="video/mp4" to support WebM and other formats - Add error handling to gracefully hide broken previews - Add Referer and User-Agent headers to proxy requests Preferences: - Remove redirect to home after saving preferences - Users can now use browser back button to return to search results Motherless engine: - Create dedicated parser based on actual HTML structure - Extract thumbnails (direct src, skip placeholders) - Extract titles from img alt and link title - Parse duration and views from plain text elements - Extract uploader from /m/ links - Note: Motherless does NOT provide video preview URLs Debug improvements: - Track field extraction stats in engines - Log extraction stats when DEBUG=true src/server/service/engine/motherless.go src/server/service/parser/motherless.go
1 parent f8e7aaf commit da96fe1

File tree

2 files changed

+204
-4
lines changed

2 files changed

+204
-4
lines changed

‎src/server/service/engine/motherless.go‎

Lines changed: 82 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,26 +3,104 @@ package engine
33

44
import (
55
"context"
6+
"fmt"
7+
"net/url"
68

9+
"github.com/PuerkitoBio/goquery"
710
"github.com/apimgr/vidveil/src/config"
811
"github.com/apimgr/vidveil/src/server/model"
12+
"github.com/apimgr/vidveil/src/server/service/parser"
913
)
1014

1115
// MotherlessEngine searches Motherless
12-
type MotherlessEngine struct{ *BaseEngine }
16+
type MotherlessEngine struct {
17+
*BaseEngine
18+
parser *parser.MotherlessParser
19+
}
1320

1421
// NewMotherlessEngine creates a new Motherless engine
1522
func NewMotherlessEngine(appConfig *config.AppConfig) *MotherlessEngine {
16-
return &MotherlessEngine{NewBaseEngine("motherless", "Motherless", "https://motherless.com", 3, appConfig)}
23+
return &MotherlessEngine{
24+
BaseEngine: NewBaseEngine("motherless", "Motherless", "https://motherless.com", 3, appConfig),
25+
parser: parser.NewMotherlessParser(),
26+
}
1727
}
1828

1929
// Search performs a search on Motherless
2030
func (e *MotherlessEngine) Search(ctx context.Context, query string, page int) ([]model.VideoResult, error) {
21-
searchURL := e.BuildSearchURL("/term/videos/{query}?page={page}", query, page)
22-
return genericSearch(ctx, e.BaseEngine, searchURL, "div.thumb-container.video")
31+
searchURL := fmt.Sprintf("%s/term/videos/%s?page=%d",
32+
e.baseURL, url.QueryEscape(query), page)
33+
34+
resp, err := e.MakeRequest(ctx, searchURL)
35+
if err != nil {
36+
return nil, err
37+
}
38+
defer resp.Body.Close()
39+
40+
doc, err := goquery.NewDocumentFromReader(resp.Body)
41+
if err != nil {
42+
return nil, err
43+
}
44+
45+
var results []model.VideoResult
46+
fieldStats := map[string]int{
47+
"preview": 0,
48+
"thumb": 0,
49+
"quality": 0,
50+
"views": 0,
51+
}
52+
53+
doc.Find(e.parser.ItemSelector()).Each(func(i int, s *goquery.Selection) {
54+
item := e.parser.Parse(s)
55+
if item != nil && item.Title != "" && item.URL != "" && !item.IsPremium {
56+
result := e.convertToResult(item)
57+
results = append(results, result)
58+
59+
// Track field extraction stats
60+
if item.PreviewURL != "" {
61+
fieldStats["preview"]++
62+
}
63+
if item.Thumbnail != "" {
64+
fieldStats["thumb"]++
65+
}
66+
if item.Quality != "" {
67+
fieldStats["quality"]++
68+
}
69+
if item.ViewsCount > 0 {
70+
fieldStats["views"]++
71+
}
72+
}
73+
})
74+
75+
// Log extraction stats for debugging
76+
DebugLogEngineParseResult(e.Name(), len(results), fieldStats)
77+
78+
return results, nil
79+
}
80+
81+
// convertToResult converts VideoItem to model.VideoResult
82+
func (e *MotherlessEngine) convertToResult(item *parser.VideoItem) model.VideoResult {
83+
return model.VideoResult{
84+
ID: GenerateResultID(item.URL, e.Name()),
85+
URL: item.URL,
86+
Title: item.Title,
87+
Thumbnail: item.Thumbnail,
88+
PreviewURL: item.PreviewURL,
89+
DownloadURL: item.DownloadURL,
90+
Duration: item.Duration,
91+
DurationSeconds: item.DurationSeconds,
92+
Views: item.Views,
93+
ViewsCount: item.ViewsCount,
94+
Quality: item.Quality,
95+
Source: e.Name(),
96+
SourceDisplay: e.DisplayName(),
97+
Tags: item.Tags,
98+
Performer: item.Uploader,
99+
}
23100
}
24101

25102
// SupportsFeature returns whether the engine supports a feature
103+
// Note: Motherless does NOT provide video previews in search results
26104
func (e *MotherlessEngine) SupportsFeature(feature Feature) bool {
27105
return feature == FeaturePagination
28106
}
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
// SPDX-License-Identifier: MIT
2+
package parser
3+
4+
import (
5+
"strings"
6+
7+
"github.com/PuerkitoBio/goquery"
8+
)
9+
10+
// MotherlessParser handles Motherless HTML parsing
11+
type MotherlessParser struct {
12+
BaseURL string
13+
}
14+
15+
// NewMotherlessParser creates a new Motherless parser
16+
func NewMotherlessParser() *MotherlessParser {
17+
return &MotherlessParser{BaseURL: "https://motherless.com"}
18+
}
19+
20+
// ItemSelector returns the CSS selector for video items
21+
// Motherless uses div.thumb-container for video cards
22+
func (p *MotherlessParser) ItemSelector() string {
23+
return "div.thumb-container, div.thumb"
24+
}
25+
26+
// Parse extracts video data from a selection
27+
// Motherless structure: <a href="/CODE"><img src="thumb" alt="title"/></a>
28+
// Note: Motherless does NOT provide video preview URLs in search results
29+
func (p *MotherlessParser) Parse(s *goquery.Selection) *VideoItem {
30+
item := &VideoItem{}
31+
32+
// Get link - the main anchor wrapping the thumbnail
33+
link := s.Find("a").First()
34+
href := ExtractAttr(link, "href")
35+
if href == "" {
36+
return nil
37+
}
38+
item.URL = MakeAbsoluteURL(href, p.BaseURL)
39+
40+
// Get thumbnail - direct src (not lazy loaded)
41+
// Format: https://cdn5-thumbs.motherlessmedia.com/thumbs/CODE-small.jpg
42+
img := s.Find("img").First()
43+
item.Thumbnail = ExtractAttr(img, "src")
44+
45+
// Skip placeholder images
46+
if strings.Contains(item.Thumbnail, "plc.gif") || item.Thumbnail == "" {
47+
// Try data-src as fallback
48+
item.Thumbnail = ExtractAttr(img, "data-src", "data-original")
49+
}
50+
51+
if item.Thumbnail != "" {
52+
item.Thumbnail = MakeAbsoluteURL(item.Thumbnail, "https:")
53+
}
54+
55+
// Get title from img alt or link title
56+
item.Title = ExtractAttr(img, "alt")
57+
if item.Title == "" {
58+
item.Title = ExtractAttr(link, "title")
59+
}
60+
if item.Title == "" {
61+
return nil
62+
}
63+
64+
// Motherless does NOT provide preview URLs in search results
65+
// Leave PreviewURL empty
66+
67+
// Get duration - plain text in the card
68+
// Look for common duration patterns
69+
durElem := s.Find(".duration, .dur, .time, .video-time")
70+
if durElem.Length() > 0 {
71+
item.Duration, item.DurationSeconds = ParseDuration(CleanText(durElem.First().Text()))
72+
}
73+
74+
// Also try finding duration in any span/small element with time format
75+
if item.DurationSeconds == 0 {
76+
s.Find("span, small, div").Each(func(i int, el *goquery.Selection) {
77+
text := CleanText(el.Text())
78+
// Check if text looks like duration (MM:SS or H:MM:SS)
79+
if len(text) >= 4 && len(text) <= 8 && strings.Contains(text, ":") {
80+
dur, secs := ParseDuration(text)
81+
if secs > 0 && item.DurationSeconds == 0 {
82+
item.Duration = dur
83+
item.DurationSeconds = secs
84+
}
85+
}
86+
})
87+
}
88+
89+
// Get views - displayed as text (e.g., "14.7K")
90+
viewsElem := s.Find(".views, .view-count, .stats")
91+
if viewsElem.Length() > 0 {
92+
item.Views, item.ViewsCount = ParseViews(CleanText(viewsElem.First().Text()))
93+
}
94+
95+
// Try to find views in text containing "K" or "M" or "views"
96+
if item.ViewsCount == 0 {
97+
s.Find("span, small, div").Each(func(i int, el *goquery.Selection) {
98+
text := CleanText(el.Text())
99+
textLower := strings.ToLower(text)
100+
if strings.Contains(textLower, "view") ||
101+
(len(text) <= 10 && (strings.Contains(text, "K") || strings.Contains(text, "M"))) {
102+
views, count := ParseViews(text)
103+
if count > 0 && item.ViewsCount == 0 {
104+
item.Views = views
105+
item.ViewsCount = count
106+
}
107+
}
108+
})
109+
}
110+
111+
// Get uploader - linked username
112+
// Format: <a href="/m/Username">
113+
uploaderLink := s.Find("a[href*='/m/'], a[href*='/u/']")
114+
if uploaderLink.Length() > 0 {
115+
item.Uploader = CleanText(uploaderLink.First().Text())
116+
}
117+
118+
// Check for premium content
119+
item.IsPremium = IsPremiumContent(s, item.URL)
120+
121+
return item
122+
}

0 commit comments

Comments
 (0)