Skip to content

Commit b720a1e

Browse files
committed
Fetch X share context before digital analysis
1 parent 14f868e commit b720a1e

File tree

4 files changed

+196
-11
lines changed

4 files changed

+196
-11
lines changed

report-analyze-pipeline/openai/openai.go

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -242,12 +242,17 @@ func (c *Client) AnalyzeImage(imageData []byte, description string) (string, err
242242
Text: description,
243243
}
244244

245-
imagePrompt := ImageContent{
246-
Type: "image_url",
247-
ImageURL: ImageURL{
248-
URL: encodeImageToBase64(imageData),
249-
},
245+
userContent := make([]any, 0, 2)
246+
if len(imageData) > 0 {
247+
imagePrompt := ImageContent{
248+
Type: "image_url",
249+
ImageURL: ImageURL{
250+
URL: encodeImageToBase64(imageData),
251+
},
252+
}
253+
userContent = append(userContent, imagePrompt)
250254
}
255+
userContent = append(userContent, descriptionPrompt)
251256

252257
reqBody := ChatRequest{
253258
Model: c.model,
@@ -259,11 +264,8 @@ func (c *Client) AnalyzeImage(imageData []byte, description string) (string, err
259264
},
260265
},
261266
{
262-
Role: "user",
263-
Content: []any{
264-
imagePrompt,
265-
descriptionPrompt,
266-
},
267+
Role: "user",
268+
Content: userContent,
267269
},
268270
},
269271
}

report-analyze-pipeline/service/service.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ func (s *Service) AnalyzeReport(report *database.Report) error {
183183
// Use the image from database and other fields from the report message
184184
log.Printf("Analyzing report %d with image size: %d bytes", report.Seq, len(imageData))
185185

186-
analysisInput := buildAnalysisInput(report)
186+
analysisInput := s.enrichAnalysisInput(report, buildAnalysisInput(report))
187187

188188
// Call OpenAI API with assistant for initial analysis in English
189189
response, err := s.llmClient.AnalyzeImage(imageData, analysisInput)

report-analyze-pipeline/service/service_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,3 +150,33 @@ func TestBuildAnalysisInputPrefersShareContextOverGenericHumanDescription(t *tes
150150
}
151151
}
152152
}
153+
154+
func TestExtractTextFromOEmbedHTML(t *testing.T) {
155+
html := `<blockquote class="twitter-tweet"><p lang="en" dir="ltr">Claude is down again and credits look wrong.<br>Several users impacted.</p>&mdash; tester (@tester) <a href="https://twitter.com/tester/status/123">March 24, 2026</a></blockquote>`
156+
got := extractTextFromOEmbedHTML(html)
157+
for _, expected := range []string{
158+
"Claude is down again and credits look wrong.",
159+
"Several users impacted.",
160+
} {
161+
if !strings.Contains(got, expected) {
162+
t.Fatalf("extractTextFromOEmbedHTML() missing %q in %q", expected, got)
163+
}
164+
}
165+
if strings.Contains(got, "March 24, 2026") {
166+
t.Fatalf("extractTextFromOEmbedHTML() should not include footer metadata: %q", got)
167+
}
168+
}
169+
170+
func TestEnrichAnalysisInputAddsThinEvidenceWarning(t *testing.T) {
171+
svc := &Service{}
172+
report := &database.Report{
173+
Seq: 1182538,
174+
Description: "Human report submission",
175+
SourceURL: "notaurl",
176+
}
177+
178+
got := svc.enrichAnalysisInput(report, buildAnalysisInput(report))
179+
if !strings.Contains(got, "Do not invent specific bug details") {
180+
t.Fatalf("enrichAnalysisInput() = %q, want evidence warning", got)
181+
}
182+
}
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
package service
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"html"
7+
"io"
8+
"log"
9+
"net/http"
10+
neturl "net/url"
11+
"regexp"
12+
"strings"
13+
"time"
14+
15+
"report-analyze-pipeline/database"
16+
)
17+
18+
var (
19+
htmlTagPattern = regexp.MustCompile(`(?s)<[^>]*>`)
20+
whitespacePattern = regexp.MustCompile(`\s+`)
21+
lineBreakTagPattern = regexp.MustCompile(`(?i)<br\s*/?>|</p>|</div>|</blockquote>`)
22+
)
23+
24+
type xOEmbedResponse struct {
25+
AuthorName string `json:"author_name"`
26+
AuthorURL string `json:"author_url"`
27+
HTML string `json:"html"`
28+
}
29+
30+
func (s *Service) enrichAnalysisInput(report *database.Report, baseInput string) string {
31+
shareContext, err := fetchShareContext(report)
32+
if err != nil {
33+
log.Printf("Report %d: failed to fetch share context for %q: %v", report.Seq, report.SourceURL, err)
34+
}
35+
36+
parts := make([]string, 0, 3)
37+
if trimmed := strings.TrimSpace(baseInput); trimmed != "" {
38+
parts = append(parts, trimmed)
39+
}
40+
if shareContext != "" {
41+
parts = append(parts, shareContext)
42+
}
43+
if shouldWarnAboutThinEvidence(report, shareContext) {
44+
parts = append(parts, "Evidence quality note: there is no attached image and no retrievable remote post/page content. Do not invent specific bug details, metrics, IDs, or impacted products. If evidence is insufficient, say so clearly and keep the analysis generic.")
45+
}
46+
return strings.Join(parts, "\n\n")
47+
}
48+
49+
func shouldWarnAboutThinEvidence(report *database.Report, shareContext string) bool {
50+
if len(strings.TrimSpace(shareContext)) > 0 {
51+
return false
52+
}
53+
if strings.TrimSpace(report.SharedText) != "" {
54+
return false
55+
}
56+
description := strings.TrimSpace(report.Description)
57+
if description == "" {
58+
return true
59+
}
60+
return strings.EqualFold(description, "Human report submission")
61+
}
62+
63+
func fetchShareContext(report *database.Report) (string, error) {
64+
sourceURL := strings.TrimSpace(report.SourceURL)
65+
if sourceURL == "" {
66+
return "", nil
67+
}
68+
69+
parsed, err := neturl.Parse(sourceURL)
70+
if err != nil {
71+
return "", fmt.Errorf("invalid source url: %w", err)
72+
}
73+
74+
host := strings.ToLower(strings.TrimPrefix(parsed.Hostname(), "www."))
75+
switch host {
76+
case "x.com", "twitter.com":
77+
return fetchXOEmbedContext(sourceURL)
78+
default:
79+
return "", nil
80+
}
81+
}
82+
83+
func fetchXOEmbedContext(sourceURL string) (string, error) {
84+
query := neturl.Values{}
85+
query.Set("url", sourceURL)
86+
query.Set("omit_script", "true")
87+
query.Set("dnt", "true")
88+
89+
endpoint := "https://publish.twitter.com/oembed?" + query.Encode()
90+
client := &http.Client{Timeout: 5 * time.Second}
91+
req, err := http.NewRequest(http.MethodGet, endpoint, nil)
92+
if err != nil {
93+
return "", err
94+
}
95+
req.Header.Set("User-Agent", "CleanAppAnalyzer/1.0")
96+
97+
resp, err := client.Do(req)
98+
if err != nil {
99+
return "", err
100+
}
101+
defer resp.Body.Close()
102+
103+
if resp.StatusCode != http.StatusOK {
104+
body, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
105+
return "", fmt.Errorf("oembed status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
106+
}
107+
108+
var payload xOEmbedResponse
109+
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
110+
return "", err
111+
}
112+
113+
postText := extractTextFromOEmbedHTML(payload.HTML)
114+
if postText == "" {
115+
return "", nil
116+
}
117+
118+
parts := []string{"Fetched X post context:"}
119+
if author := strings.TrimSpace(payload.AuthorName); author != "" {
120+
parts = append(parts, "Author: "+author)
121+
}
122+
if authorURL := strings.TrimSpace(payload.AuthorURL); authorURL != "" {
123+
parts = append(parts, "Author profile: "+authorURL)
124+
}
125+
parts = append(parts, "Post text: "+postText)
126+
return strings.Join(parts, "\n"), nil
127+
}
128+
129+
func extractTextFromOEmbedHTML(fragment string) string {
130+
fragment = strings.TrimSpace(fragment)
131+
if fragment == "" {
132+
return ""
133+
}
134+
135+
normalized := lineBreakTagPattern.ReplaceAllString(fragment, "\n")
136+
normalized = htmlTagPattern.ReplaceAllString(normalized, " ")
137+
normalized = html.UnescapeString(normalized)
138+
139+
lines := strings.Split(normalized, "\n")
140+
cleaned := make([]string, 0, len(lines))
141+
for _, line := range lines {
142+
line = whitespacePattern.ReplaceAllString(strings.TrimSpace(line), " ")
143+
if line == "" {
144+
continue
145+
}
146+
if strings.HasPrefix(line, "— ") || strings.HasPrefix(line, "&mdash; ") {
147+
continue
148+
}
149+
cleaned = append(cleaned, line)
150+
}
151+
152+
return strings.Join(cleaned, "\n")
153+
}

0 commit comments

Comments
 (0)