1+ import { action , internalAction } from "./_generated/server" ;
2+ import { v } from "convex/values" ;
3+ import { internal } from "./_generated/api" ;
4+
5+ // URLからメタデータを取得する(外部用)
6+ export const fetchMetadata = action ( {
7+ args : {
8+ url : v . string ( ) ,
9+ } ,
10+ handler : async ( ctx , args ) : Promise < any > => {
11+ return await ctx . runAction ( internal . metadataFetcher . fetchMetadataInternal , args ) ;
12+ } ,
13+ } ) ;
14+
15+ // HTML属性を解析するヘルパー関数
16+ function parseAttributes ( attributeString : string ) : Record < string , string > {
17+ const attrs : Record < string , string > = { } ;
18+ const attrPattern = / ( \w + ) (?: = " ( [ ^ " ] * ) " ) ? / g;
19+ let match ;
20+
21+ while ( ( match = attrPattern . exec ( attributeString ) ) !== null ) {
22+ const [ , name , value ] = match ;
23+ attrs [ name . toLowerCase ( ) ] = value || '' ;
24+ }
25+
26+ return attrs ;
27+ }
28+
29+ // 内部用のメタデータ取得関数
30+ export const fetchMetadataInternal = internalAction ( {
31+ args : {
32+ url : v . string ( ) ,
33+ } ,
34+ handler : async ( _ , args ) : Promise < any > => {
35+ try {
36+ // HTMLを取得
37+ const response = await fetch ( args . url ) ;
38+ if ( ! response . ok ) {
39+ throw new Error ( `Failed to fetch: ${ response . status } ` ) ;
40+ }
41+
42+ const html = await response . text ( ) ;
43+
44+ // メタタグを抽出する正規表現パターン
45+ const metaTagPattern = / < m e t a \s + ( [ ^ > ] * ?) > / gi;
46+ const linkTagPattern = / < l i n k \s + ( [ ^ > ] * ?) > / gi;
47+ const titlePattern = / < t i t l e > ( [ ^ < ] * ) < \/ t i t l e > / i;
48+
49+ const metadata : Record < string , any > = {
50+ url : args . url ,
51+ fetchedAt : new Date ( ) . toISOString ( ) ,
52+ meta : { } ,
53+ ogp : { } ,
54+ twitter : { } ,
55+ other : { } ,
56+ links : [ ] ,
57+ } ;
58+
59+ // titleタグを取得
60+ const titleMatch = html . match ( titlePattern ) ;
61+ if ( titleMatch ) {
62+ metadata . title = titleMatch [ 1 ] . trim ( ) ;
63+ }
64+
65+ // メタタグを解析
66+ let match ;
67+ while ( ( match = metaTagPattern . exec ( html ) ) !== null ) {
68+ const attributes = parseAttributes ( match [ 1 ] ) ;
69+
70+ // OGPタグ
71+ if ( attributes . property ?. startsWith ( 'og:' ) ) {
72+ const key = attributes . property . replace ( 'og:' , '' ) ;
73+ metadata . ogp [ key ] = attributes . content || '' ;
74+ }
75+ // Twitterカード
76+ else if ( attributes . name ?. startsWith ( 'twitter:' ) ) {
77+ const key = attributes . name . replace ( 'twitter:' , '' ) ;
78+ metadata . twitter [ key ] = attributes . content || '' ;
79+ }
80+ // 一般的なメタタグ
81+ else if ( attributes . name ) {
82+ metadata . meta [ attributes . name ] = attributes . content || '' ;
83+ }
84+ // その他のメタタグ
85+ else if ( attributes . property ) {
86+ metadata . other [ attributes . property ] = attributes . content || '' ;
87+ }
88+ }
89+
90+ // linkタグを解析(canonical, RSS feedなど)
91+ while ( ( match = linkTagPattern . exec ( html ) ) !== null ) {
92+ const attributes = parseAttributes ( match [ 1 ] ) ;
93+ if ( attributes . rel && attributes . href ) {
94+ metadata . links . push ( {
95+ rel : attributes . rel ,
96+ href : attributes . href ,
97+ type : attributes . type ,
98+ title : attributes . title ,
99+ } ) ;
100+ }
101+ }
102+
103+ // 構造化データ(JSON-LD)を探す
104+ const jsonLdPattern = / < s c r i p t \s + t y p e = " a p p l i c a t i o n \/ l d \+ j s o n " [ ^ > ] * > ( [ ^ < ] + ) < \/ s c r i p t > / gi;
105+ const jsonLdMatches = [ ] ;
106+ while ( ( match = jsonLdPattern . exec ( html ) ) !== null ) {
107+ try {
108+ const jsonData = JSON . parse ( match [ 1 ] . trim ( ) ) ;
109+ jsonLdMatches . push ( jsonData ) ;
110+ } catch ( e ) {
111+ // JSON解析エラーは無視
112+ }
113+ }
114+ if ( jsonLdMatches . length > 0 ) {
115+ metadata . jsonLd = jsonLdMatches ;
116+ }
117+
118+ return metadata ;
119+
120+ } catch ( error ) {
121+ console . error ( 'Error fetching metadata:' , error ) ;
122+ return {
123+ error : error instanceof Error ? error . message : 'Unknown error' ,
124+ url : args . url ,
125+ fetchedAt : new Date ( ) . toISOString ( ) ,
126+ } ;
127+ }
128+ } ,
129+ } ) ;
130+
131+ // Zenn記事のサンプルURLでメタデータを検証
132+ export const verifyZennMetadata = action ( {
133+ args : { } ,
134+ handler : async ( ctx ) : Promise < any > => {
135+ // Zennの最新記事を取得してテスト
136+ const rssResponse = await fetch ( "https://zenn.dev/feed" ) ;
137+ const rssText = await rssResponse . text ( ) ;
138+
139+ // item要素内のlinkタグを抽出(最初のitemの中のlink)
140+ const itemMatch = rssText . match ( / < i t e m > [ \s \S ] * ?< \/ i t e m > / ) ;
141+ if ( ! itemMatch ) {
142+ return { error : "No item found in RSS feed" } ;
143+ }
144+
145+ const linkMatch = itemMatch [ 0 ] . match ( / < l i n k > ( [ ^ < ] + ) < \/ l i n k > / ) ;
146+ if ( ! linkMatch || ! linkMatch [ 1 ] ) {
147+ return { error : "No article URL found in RSS item" } ;
148+ }
149+
150+ const articleUrl = linkMatch [ 1 ] ;
151+ console . log ( 'Verifying metadata for article:' , articleUrl ) ;
152+
153+ // メタデータを取得(内部アクションを使用)
154+ const metadata = await ctx . runAction ( internal . metadataFetcher . fetchMetadataInternal , {
155+ url : articleUrl ,
156+ } ) ;
157+
158+ return {
159+ articleUrl,
160+ metadata,
161+ summary : {
162+ hasOGP : Object . keys ( metadata . ogp || { } ) . length > 0 ,
163+ hasTwitterCard : Object . keys ( metadata . twitter || { } ) . length > 0 ,
164+ hasJsonLd : ! ! metadata . jsonLd ,
165+ metaTagsCount : Object . keys ( metadata . meta || { } ) . length ,
166+ linksCount : metadata . links ?. length || 0 ,
167+ }
168+ } ;
169+ } ,
170+ } ) ;
0 commit comments