66using System . Net . Sockets ;
77using System . Text ;
88using System . Threading . Tasks ;
9+ using AngleSharp . Dom ;
910using Serilog ;
10- using static System . Net . Mime . MediaTypeNames ;
11+ using DeepLClient . Models ;
12+ using Newtonsoft . Json ;
1113
1214namespace DeepLClient . Managers
1315{
@@ -19,33 +21,35 @@ internal static class UrlManager
1921 /// <param name="url"></param>
2022 /// <param name="isLocaLFile"></param>
2123 /// <returns></returns>
22- internal static async Task < ( bool success , bool isReadable , string content , string error ) > GetReadableContentAsync ( string url , bool isLocaLFile = false )
24+ internal static async Task < WebpageResult > GetReadableContentAsync ( string url , bool isLocaLFile = false )
2325 {
26+ var webpageResult = new WebpageResult ( ) ;
27+
2428 try
2529 {
2630 // fetch the content if it's a local file, otherwise download it
2731 var reader = isLocaLFile
2832 ? new Reader ( url , await File . ReadAllTextAsync ( url ) )
2933 : new Reader ( url ) ;
30-
34+
3135 // get the article
3236 var article = await reader . GetArticleAsync ( ) ;
33-
37+
3438 // readable?
3539 if ( ! article . IsReadable )
3640 {
3741 // nope
3842 Log . Warning ( "[URL] Fetching readable text failed: {url}" , url ) ;
39- return ( true , false , article . Content , "unable to determine what part of the site is relevant text" ) ;
43+ return webpageResult . SetReadableFailed ( article . Content , article . Title ) ;
4044 }
41-
42- // yep
43- return ( true , true , article . Content , string . Empty ) ;
45+
46+ // done
47+ return webpageResult . SetSuccess ( article . Content , article . Title ) ;
4448 }
4549 catch ( UriFormatException ex )
4650 {
4751 Log . Fatal ( ex , "[URL] Unable to parse host '{url}': {err}" , url , ex . Message ) ;
48- return ( false , false , string . Empty , "provided url is in the wrong format" ) ;
52+ return webpageResult . SetFailed ( "provided url is in the wrong format" ) ;
4953 }
5054 catch ( HttpRequestException ex )
5155 {
@@ -57,25 +61,25 @@ internal static class UrlManager
5761
5862 return exc . SocketErrorCode switch
5963 {
60- SocketError . HostNotFound => ( false , false , string . Empty , "the remote host address wasn't found" ) ,
61- SocketError . AccessDenied => ( false , false , string . Empty , "access denied by the remote host" ) ,
62- SocketError . TimedOut => ( false , false , string . Empty , "the host didn't respond" ) ,
63- SocketError . HostDown => ( false , false , string . Empty , "the remote host is offline" ) ,
64- SocketError . HostUnreachable or SocketError . NetworkUnreachable => ( false , false , string . Empty , "the remote host couldn't be reached" ) ,
65- SocketError . NotConnected => ( false , false , string . Empty , "no internet connection" ) ,
66- _ => ( false , false , string . Empty , $ "error trying to contact the host: { exc . SocketErrorCode . ToString ( ) . ToLower ( ) } ")
64+ SocketError . HostNotFound => webpageResult . SetFailed ( "the remote host address wasn't found" ) ,
65+ SocketError . AccessDenied => webpageResult . SetFailed ( "access denied by the remote host" ) ,
66+ SocketError . TimedOut => webpageResult . SetFailed ( "the host didn't respond" ) ,
67+ SocketError . HostDown => webpageResult . SetFailed ( "the remote host is offline" ) ,
68+ SocketError . HostUnreachable or SocketError . NetworkUnreachable => webpageResult . SetFailed ( "the remote host couldn't be reached" ) ,
69+ SocketError . NotConnected => webpageResult . SetFailed ( "no internet connection" ) ,
70+ _ => webpageResult . SetFailed ( $ "error trying to contact the host: { exc . SocketErrorCode . ToString ( ) . ToLower ( ) } ")
6771 } ;
6872 }
6973
7074 var statusCode = ex . StatusCode . ToString ( ) ;
71- return ! string . IsNullOrWhiteSpace ( statusCode )
72- ? ( false , false , string . Empty , $ "error trying to contact the host: { statusCode } ")
73- : ( false , false , string . Empty , "unknown error trying to contact the host" ) ;
75+ return ! string . IsNullOrWhiteSpace ( statusCode )
76+ ? webpageResult . SetFailed ( $ "error trying to contact the host: { statusCode } ")
77+ : webpageResult . SetFailed ( "unknown error trying to contact the host" ) ;
7478 }
7579 catch ( Exception ex )
7680 {
7781 Log . Fatal ( ex , "[URL] Unable to process host '{url}': {err}" , url , ex . Message ) ;
78- return ( false , false , string . Empty , "unknown error trying to contact the host" ) ;
82+ return webpageResult . SetFailed ( "unknown error trying to contact the host" ) ;
7983 }
8084 }
8185
@@ -88,6 +92,9 @@ internal static string CleanText(string rawText)
8892 {
8993 if ( string . IsNullOrEmpty ( rawText ) ) return string . Empty ;
9094
95+ if ( rawText . StartsWith ( "\" " ) ) rawText = rawText . Remove ( 0 , 1 ) ;
96+ if ( rawText . EndsWith ( "\" " ) ) rawText = rawText . Remove ( rawText . Length - 1 , 1 ) ;
97+
9198 rawText = rawText . Replace ( "\\ n\\ n" , Environment . NewLine ) ;
9299 rawText = rawText . Replace ( "\\ n" , Environment . NewLine ) ;
93100 rawText = rawText . Replace ( "\\ \" " , "\" " ) ;
@@ -115,5 +122,29 @@ internal static bool IsLocalOrNetworkFile(string value)
115122 if ( string . IsNullOrWhiteSpace ( value ) ) return false ;
116123 return value . Substring ( 1 , 2 ) == ":\\ " || value [ ..2 ] == "\\ \\ " ;
117124 }
125+
126+ /// <summary>
127+ /// Wraps the provided content into a reading mode html page, and adds the title
128+ /// </summary>
129+ /// <param name="content"></param>
130+ /// <param name="title"></param>
131+ /// <returns></returns>
132+ internal static string WrapContentInHtml ( string content , string title )
133+ {
134+ var htmlContent = new StringBuilder ( ) ;
135+ htmlContent . AppendLine ( "<html>" ) ;
136+ htmlContent . AppendLine ( "<head>" ) ;
137+ htmlContent . AppendLine ( $ "<title>{ title } </title>") ;
138+ htmlContent . AppendLine ( "<style>" ) ;
139+ htmlContent . AppendLine ( "a:link, a:visited, a:hover, a:active { color: #6590fd }" ) ;
140+ htmlContent . AppendLine ( "</style>" ) ;
141+ htmlContent . AppendLine ( "</head>" ) ;
142+ htmlContent . AppendLine ( "<body style=\" background-color: #3f3f46; color: #f1f1f1;\" >" ) ;
143+ htmlContent . AppendLine ( content ) ;
144+ htmlContent . AppendLine ( "</body>" ) ;
145+ htmlContent . AppendLine ( "</html>" ) ;
146+
147+ return htmlContent . ToString ( ) ;
148+ }
118149 }
119150}
0 commit comments