🔧 Fix video previews and add Motherless parser 🔧

casjay · casjay · commit da96fe18f8c6 · 2026-01-31T19:02:41.000-05:00
Video previews:
- Remove hardcoded type="video/mp4" to support WebM and other formats
- Add error handling to gracefully hide broken previews
- Add Referer and User-Agent headers to proxy requests
Preferences:
- Remove redirect to home after saving preferences
- Users can now use browser back button to return to search results
Motherless engine:
- Create dedicated parser based on actual HTML structure
- Extract thumbnails (direct src, skip placeholders)
- Extract titles from img alt and link title
- Parse duration and views from plain text elements
- Extract uploader from /m/ links
- Note: Motherless does NOT provide video preview URLs
Debug improvements:
- Track field extraction stats in engines
- Log extraction stats when DEBUG=true

src/server/service/engine/motherless.go
src/server/service/parser/motherless.go
diff --git a/src/server/service/engine/motherless.go b/src/server/service/engine/motherless.go
@@ -3,26 +3,104 @@ package engine
 
 import (
 	"context"
+	"fmt"
+	"net/url"
 
+	"github.com/PuerkitoBio/goquery"
 	"github.com/apimgr/vidveil/src/config"
 	"github.com/apimgr/vidveil/src/server/model"
+	"github.com/apimgr/vidveil/src/server/service/parser"
 )
 
 // MotherlessEngine searches Motherless
-type MotherlessEngine struct{ *BaseEngine }
+type MotherlessEngine struct {
+	*BaseEngine
+	parser *parser.MotherlessParser
+}
 
 // NewMotherlessEngine creates a new Motherless engine
 func NewMotherlessEngine(appConfig *config.AppConfig) *MotherlessEngine {
-	return &MotherlessEngine{NewBaseEngine("motherless", "Motherless", "https://motherless.com", 3, appConfig)}
+	return &MotherlessEngine{
+		BaseEngine: NewBaseEngine("motherless", "Motherless", "https://motherless.com", 3, appConfig),
+		parser:     parser.NewMotherlessParser(),
+	}
 }
 
 // Search performs a search on Motherless
 func (e *MotherlessEngine) Search(ctx context.Context, query string, page int) ([]model.VideoResult, error) {
-	searchURL := e.BuildSearchURL("/term/videos/{query}?page={page}", query, page)
-	return genericSearch(ctx, e.BaseEngine, searchURL, "div.thumb-container.video")
+	searchURL := fmt.Sprintf("%s/term/videos/%s?page=%d",
+		e.baseURL, url.QueryEscape(query), page)
+
+	resp, err := e.MakeRequest(ctx, searchURL)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	doc, err := goquery.NewDocumentFromReader(resp.Body)
+	if err != nil {
+		return nil, err
+	}
+
+	var results []model.VideoResult
+	fieldStats := map[string]int{
+		"preview": 0,
+		"thumb":   0,
+		"quality": 0,
+		"views":   0,
+	}
+
+	doc.Find(e.parser.ItemSelector()).Each(func(i int, s *goquery.Selection) {
+		item := e.parser.Parse(s)
+		if item != nil && item.Title != "" && item.URL != "" && !item.IsPremium {
+			result := e.convertToResult(item)
+			results = append(results, result)
+
+			// Track field extraction stats
+			if item.PreviewURL != "" {
+				fieldStats["preview"]++
+			}
+			if item.Thumbnail != "" {
+				fieldStats["thumb"]++
+			}
+			if item.Quality != "" {
+				fieldStats["quality"]++
+			}
+			if item.ViewsCount > 0 {
+				fieldStats["views"]++
+			}
+		}
+	})
+
+	// Log extraction stats for debugging
+	DebugLogEngineParseResult(e.Name(), len(results), fieldStats)
+
+	return results, nil
+}
+
+// convertToResult converts VideoItem to model.VideoResult
+func (e *MotherlessEngine) convertToResult(item *parser.VideoItem) model.VideoResult {
+	return model.VideoResult{
+		ID:              GenerateResultID(item.URL, e.Name()),
+		URL:             item.URL,
+		Title:           item.Title,
+		Thumbnail:       item.Thumbnail,
+		PreviewURL:      item.PreviewURL,
+		DownloadURL:     item.DownloadURL,
+		Duration:        item.Duration,
+		DurationSeconds: item.DurationSeconds,
+		Views:           item.Views,
+		ViewsCount:      item.ViewsCount,
+		Quality:         item.Quality,
+		Source:          e.Name(),
+		SourceDisplay:   e.DisplayName(),
+		Tags:            item.Tags,
+		Performer:       item.Uploader,
+	}
 }
 
 // SupportsFeature returns whether the engine supports a feature
+// Note: Motherless does NOT provide video previews in search results
 func (e *MotherlessEngine) SupportsFeature(feature Feature) bool {
 	return feature == FeaturePagination
 }
diff --git a/src/server/service/parser/motherless.go b/src/server/service/parser/motherless.go
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: MIT
+package parser
+
+import (
+	"strings"
+
+	"github.com/PuerkitoBio/goquery"
+)
+
+// MotherlessParser handles Motherless HTML parsing
+type MotherlessParser struct {
+	BaseURL string
+}
+
+// NewMotherlessParser creates a new Motherless parser
+func NewMotherlessParser() *MotherlessParser {
+	return &MotherlessParser{BaseURL: "https://motherless.com"}
+}
+
+// ItemSelector returns the CSS selector for video items
+// Motherless uses div.thumb-container for video cards
+func (p *MotherlessParser) ItemSelector() string {
+	return "div.thumb-container, div.thumb"
+}
+
+// Parse extracts video data from a selection
+// Motherless structure: <a href="/CODE"><img src="thumb" alt="title"/></a>
+// Note: Motherless does NOT provide video preview URLs in search results
+func (p *MotherlessParser) Parse(s *goquery.Selection) *VideoItem {
+	item := &VideoItem{}
+
+	// Get link - the main anchor wrapping the thumbnail
+	link := s.Find("a").First()
+	href := ExtractAttr(link, "href")
+	if href == "" {
+		return nil
+	}
+	item.URL = MakeAbsoluteURL(href, p.BaseURL)
+
+	// Get thumbnail - direct src (not lazy loaded)
+	// Format: https://cdn5-thumbs.motherlessmedia.com/thumbs/CODE-small.jpg
+	img := s.Find("img").First()
+	item.Thumbnail = ExtractAttr(img, "src")
+
+	// Skip placeholder images
+	if strings.Contains(item.Thumbnail, "plc.gif") || item.Thumbnail == "" {
+		// Try data-src as fallback
+		item.Thumbnail = ExtractAttr(img, "data-src", "data-original")
+	}
+
+	if item.Thumbnail != "" {
+		item.Thumbnail = MakeAbsoluteURL(item.Thumbnail, "https:")
+	}
+
+	// Get title from img alt or link title
+	item.Title = ExtractAttr(img, "alt")
+	if item.Title == "" {
+		item.Title = ExtractAttr(link, "title")
+	}
+	if item.Title == "" {
+		return nil
+	}
+
+	// Motherless does NOT provide preview URLs in search results
+	// Leave PreviewURL empty
+
+	// Get duration - plain text in the card
+	// Look for common duration patterns
+	durElem := s.Find(".duration, .dur, .time, .video-time")
+	if durElem.Length() > 0 {
+		item.Duration, item.DurationSeconds = ParseDuration(CleanText(durElem.First().Text()))
+	}
+
+	// Also try finding duration in any span/small element with time format
+	if item.DurationSeconds == 0 {
+		s.Find("span, small, div").Each(func(i int, el *goquery.Selection) {
+			text := CleanText(el.Text())
+			// Check if text looks like duration (MM:SS or H:MM:SS)
+			if len(text) >= 4 && len(text) <= 8 && strings.Contains(text, ":") {
+				dur, secs := ParseDuration(text)
+				if secs > 0 && item.DurationSeconds == 0 {
+					item.Duration = dur
+					item.DurationSeconds = secs
+				}
+			}
+		})
+	}
+
+	// Get views - displayed as text (e.g., "14.7K")
+	viewsElem := s.Find(".views, .view-count, .stats")
+	if viewsElem.Length() > 0 {
+		item.Views, item.ViewsCount = ParseViews(CleanText(viewsElem.First().Text()))
+	}
+
+	// Try to find views in text containing "K" or "M" or "views"
+	if item.ViewsCount == 0 {
+		s.Find("span, small, div").Each(func(i int, el *goquery.Selection) {
+			text := CleanText(el.Text())
+			textLower := strings.ToLower(text)
+			if strings.Contains(textLower, "view") ||
+			   (len(text) <= 10 && (strings.Contains(text, "K") || strings.Contains(text, "M"))) {
+				views, count := ParseViews(text)
+				if count > 0 && item.ViewsCount == 0 {
+					item.Views = views
+					item.ViewsCount = count
+				}
+			}
+		})
+	}
+
+	// Get uploader - linked username
+	// Format: <a href="/m/Username">
+	uploaderLink := s.Find("a[href*='/m/'], a[href*='/u/']")
+	if uploaderLink.Length() > 0 {
+		item.Uploader = CleanText(uploaderLink.First().Text())
+	}
+
+	// Check for premium content
+	item.IsPremium = IsPremiumContent(s, item.URL)
+
+	return item
+}