ceee · alexeib · Jun 27, 2015
diff --git a/PortablePorts/NReadability/NReadabilityTranscoder.cs b/PortablePorts/NReadability/NReadabilityTranscoder.cs
@@ -136,7 +136,7 @@ private class LinkData
     private static readonly Regex _MailtoHrefRegex = new Regex("^\\s*mailto\\s*:", RegexOptions.IgnoreCase);
     private static readonly Regex _TitleWhitespacesCleanUpRegex = new Regex("\\s+");
 
-    private static readonly Dictionary<Regex, string> _articleContentElementHints = new Dictionary<Regex, string>
+    private readonly Dictionary<Regex, string> _articleContentElementHints = new Dictionary<Regex, string>
     {
       { new Regex("^https?://(www|mobile)\\.theverge.com", RegexOptions.IgnoreCase), ".entry-body" },
       { new Regex("^https?://(www|blog)\\.bufferapp.com", RegexOptions.IgnoreCase), ".post" },
@@ -146,7 +146,8 @@ private class LinkData
       { new Regex("^https?://(m\\.|www\\.)?youtube.com", RegexOptions.IgnoreCase), "#watch-description-content" },
       { new Regex("^https?://(www.)?vimeo.com", RegexOptions.IgnoreCase), ".description_wrapper" },
       { new Regex("^https?://(www.)?zdnet.com", RegexOptions.IgnoreCase), ".storyBody" },
-      { new Regex("^https?://(www.)?recode.net", RegexOptions.IgnoreCase), ".postarea" }
+      { new Regex("^https?://(www.)?recode.net", RegexOptions.IgnoreCase), ".postarea" },
+      { new Regex("^https?://(.*\\.)?slashdot.org", RegexOptions.IgnoreCase), ".body" }
     };
 
     #endregion
@@ -172,20 +173,49 @@ private class LinkData
     private Func<AttributeTransformationInput, AttributeTransformationResult> _anchorHrefTransformer;
     private Func<AttributeTransformationInput, AttributeTransformationResult> _metaTransformer;
 
-    #endregion
-
-    #region Constructor(s)
+        #endregion
+
+        #region Constructor(s)
+
+      /// <summary>
+      /// Initializes a new instance of NReadabilityTranscoder. Allows setting all options.
+      /// </summary>
+      /// <param name="dontStripUnlikelys">Determines whether elements that are unlikely to be a part of main content will be removed.</param>
+      /// <param name="dontNormalizeSpacesInTextContent">Determines whether spaces in InnerText properties of elements will be normalized automatically (eg. whether double spaces will be replaced with single spaces).</param>
+      /// <param name="dontWeightClasses">Determines whether 'weight-class' algorithm will be used when cleaning content.</param>
+      /// <param name="readingStyle">Styling for the extracted article.</param>
+      /// <param name="readingMargin">Margin for the extracted article.</param>
+      /// <param name="readingSize">Font size for the extracted article.</param>
+      /// <param name="articleElementHints">Hints for sites with difficult to find article bodies, in form of a dictionary of a regex matching the url of the content and the selector for the element containing the article</param>
+      public NReadabilityTranscoder(
+          bool dontStripUnlikelys,
+          bool dontNormalizeSpacesInTextContent,
+          bool dontWeightClasses,
+          ReadingStyle readingStyle,
+          ReadingMargin readingMargin,
+          ReadingSize readingSize,
+          IDictionary<Regex, string> articleElementHints)
+            : this(dontStripUnlikelys, dontNormalizeSpacesInTextContent, dontWeightClasses, readingStyle, readingMargin, readingSize)
+        {
+          if (articleElementHints != null)
+          {
+              foreach (var kvp in articleElementHints)
+              {
+                  _articleContentElementHints[kvp.Key] = kvp.Value;
+              }
+          }
+        }
 
-    /// <summary>
-    /// Initializes a new instance of NReadabilityTranscoder. Allows setting all options.
-    /// </summary>
-    /// <param name="dontStripUnlikelys">Determines whether elements that are unlikely to be a part of main content will be removed.</param>
-    /// <param name="dontNormalizeSpacesInTextContent">Determines whether spaces in InnerText properties of elements will be normalized automatically (eg. whether double spaces will be replaced with single spaces).</param>
-    /// <param name="dontWeightClasses">Determines whether 'weight-class' algorithm will be used when cleaning content.</param>
-    /// <param name="readingStyle">Styling for the extracted article.</param>
-    /// <param name="readingMargin">Margin for the extracted article.</param>
-    /// <param name="readingSize">Font size for the extracted article.</param>
-    public NReadabilityTranscoder(
+        /// <summary>
+        /// Initializes a new instance of NReadabilityTranscoder. Allows setting all options.
+        /// </summary>
+        /// <param name="dontStripUnlikelys">Determines whether elements that are unlikely to be a part of main content will be removed.</param>
+        /// <param name="dontNormalizeSpacesInTextContent">Determines whether spaces in InnerText properties of elements will be normalized automatically (eg. whether double spaces will be replaced with single spaces).</param>
+        /// <param name="dontWeightClasses">Determines whether 'weight-class' algorithm will be used when cleaning content.</param>
+        /// <param name="readingStyle">Styling for the extracted article.</param>
+        /// <param name="readingMargin">Margin for the extracted article.</param>
+        /// <param name="readingSize">Font size for the extracted article.</param>
+        public NReadabilityTranscoder(
       bool dontStripUnlikelys,
       bool dontNormalizeSpacesInTextContent,
       bool dontWeightClasses,
@@ -1928,7 +1958,7 @@ private static XElement TryFindArticleContentElement(XDocument document, string
       }
     }
 
-    private static string GetArticleContentElementHint(string url)
+    private string GetArticleContentElementHint(string url)
     {
       if (string.IsNullOrEmpty(url))
       {

diff --git a/ReadSharp/Models/TranscoderOptions.cs b/ReadSharp/Models/TranscoderOptions.cs
@@ -0,0 +1,18 @@
+using System.Collections.Generic;
+using System.Text.RegularExpressions;
+
+namespace ReadSharp.Models
+{
+    public class TranscoderOptions
+    {
+        /// <summary>
+        /// A dictionary of url matching regex as key to html element selector as value that represents hints for the transcoder to be able to find the actual article content within downlaoded HTML
+        /// </summary>
+        public IDictionary<Regex, string> ArticleElementHints { get; set; }
+
+        public TranscoderOptions()
+        {
+            ArticleElementHints = new Dictionary<Regex, string>();
+        }
+    }
+}
diff --git a/ReadSharp/ReadSharp.csproj b/ReadSharp/ReadSharp.csproj
@@ -86,6 +86,7 @@
     <Compile Include="Models\ArticleImage.cs" />
     <Compile Include="Models\HttpOptions.cs" />
     <Compile Include="Models\ReadOptions.cs" />
+    <Compile Include="Models\TranscoderOptions.cs" />
     <Compile Include="Reader.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />
     <Compile Include="Models\Response.cs" />

diff --git a/ReadSharp/Reader.cs b/ReadSharp/Reader.cs
@@ -10,6 +10,7 @@
 using System.Threading;
 using System.Threading.Tasks;
 using System.Xml.Linq;
+using ReadSharp.Models;
 
 
 namespace ReadSharp
@@ -63,26 +64,23 @@ public class Reader : IReader
     /// Initializes a new instance of the <see cref="Reader" /> class.
     /// </summary>
     /// <param name="options">The HTTP options.</param>
-    public Reader(HttpOptions options = null)
-    {
-      // initialize transcoder
-      _transcoder = new NReadabilityTranscoder(
-        dontStripUnlikelys: false,
-        dontNormalizeSpacesInTextContent: true,
-        dontWeightClasses: false,
-        readingStyle: ReadingStyle.Ebook,
-        readingMargin: ReadingMargin.Narrow,
-        readingSize: ReadingSize.Medium
-      );
-
-      // get default HTTP options if none available
+    public Reader(HttpOptions options = null, TranscoderOptions transcoderOptions = null)
+    {      
+        // get default HTTP options if none available
       if (options == null)
       {
         options = HttpOptions.CreateDefault();
       }
 
+        if (transcoderOptions == null)
+        {
+            transcoderOptions = new TranscoderOptions();
+        }
+
       _options = options;
 
+      _transcoder = CreateTranscoder(transcoderOptions);
+
       // initialize custom encoder
       _encoder = new Encodings.Encoder(true);
 
@@ -113,8 +111,7 @@ public Reader(HttpOptions options = null)
     }
 
 
-
-    /// <summary>
+      /// <summary>
     /// Reads article content from the given URI.
     /// </summary>
     /// <param name="uri">An URI to extract the content from.</param>
@@ -245,7 +242,18 @@ protected TranscodingResult ExtractReadableInformation(
       return _transcoder.Transcode(transcodingInput);
     }
 
-
+      private static NReadabilityTranscoder CreateTranscoder(TranscoderOptions transcoderOptions)
+      {
+          return new NReadabilityTranscoder(
+              dontStripUnlikelys: false,
+              dontNormalizeSpacesInTextContent: true,
+              dontWeightClasses: false,
+              readingStyle: ReadingStyle.Ebook,
+              readingMargin: ReadingMargin.Narrow,
+              readingSize: ReadingSize.Medium,
+              articleElementHints: transcoderOptions.ArticleElementHints
+              );
+      }
 
     /// <summary>
     /// Reverses the deep links.