Skip to content

Commit 01a89be

Browse files
committed
Update Website file with Hashtable version
1 parent 462586f commit 01a89be

File tree

13 files changed

+277
-436
lines changed

13 files changed

+277
-436
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
[![NuGet](https://img.shields.io/nuget/vpre/Shard.WebsiteScraper)](https://www.nuget.org/packages/Shard.WebsiteScraper) [![Downloads](https://img.shields.io/nuget/dt/Shard.WebsiteScraper)](https://www.nuget.org/packages/Shard.WebsiteScraper) [![License](https://img.shields.io/github/license/typnull/WebsiteScraper.svg)](https://github.com/typnull/downloadassistant/blob/master/LICENSE)
1+
[![NuGet](https://img.shields.io/nuget/vpre/Shard.WebsiteScraper)](https://www.nuget.org/packages/Shard.WebsiteScraper) [![Downloads](https://img.shields.io/nuget/dt/Shard.WebsiteScraper)](https://www.nuget.org/packages/Shard.WebsiteScraper) [![License](https://img.shields.io/github/license/typnull/WebsiteScraper.svg)](https://github.com/TypNull/WebsiteScraper/blob/master/LICENSE.txt)
22
# WebsiteScraper
33

44
WebsiteScraper is a powerful library that allows you to easily download comics and manga from various websites. With its intuitive interface and advanced parsing capabilities, you can quickly fetch and save your favorite content for offline reading. This readme provides an overview of how to use the WebsiteScraper library and includes some example code snippets to get you started.
@@ -53,6 +53,8 @@ An example for a website is provided in the repository. You can refer to this ex
5353

5454
Please note that the example provided may need to be modified based on the structure and requirements of the specific website you are targeting.
5555

56+
Easily add your own websites with the Website Creator WPF application.
57+
5658
## Contributing
5759

5860
Contributions to the WebsiteScraper library are welcome. If you encounter any bugs, have feature requests, or want to improve the library in any way, please feel free to open an issue or submit a pull request.

WebsiteScraper.sln

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UnitTest", "UnitTest\UnitTe
77
EndProject
88
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "WebsiteScraper", "WebsiteScraper\WebsiteScraper.csproj", "{059340C7-558E-4AC3-AE88-6AF78231903B}"
99
EndProject
10+
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "WebsiteCreator", "WebsiteCreator\WebsiteCreator.csproj", "{4DA4106B-DBD2-42D6-AB68-C80D7C2644E5}"
11+
EndProject
1012
Global
1113
GlobalSection(SolutionConfigurationPlatforms) = preSolution
1214
Debug|Any CPU = Debug|Any CPU
@@ -21,6 +23,10 @@ Global
2123
{059340C7-558E-4AC3-AE88-6AF78231903B}.Debug|Any CPU.Build.0 = Debug|Any CPU
2224
{059340C7-558E-4AC3-AE88-6AF78231903B}.Release|Any CPU.ActiveCfg = Release|Any CPU
2325
{059340C7-558E-4AC3-AE88-6AF78231903B}.Release|Any CPU.Build.0 = Release|Any CPU
26+
{4DA4106B-DBD2-42D6-AB68-C80D7C2644E5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
27+
{4DA4106B-DBD2-42D6-AB68-C80D7C2644E5}.Debug|Any CPU.Build.0 = Debug|Any CPU
28+
{4DA4106B-DBD2-42D6-AB68-C80D7C2644E5}.Release|Any CPU.ActiveCfg = Release|Any CPU
29+
{4DA4106B-DBD2-42D6-AB68-C80D7C2644E5}.Release|Any CPU.Build.0 = Release|Any CPU
2430
EndGlobalSection
2531
GlobalSection(SolutionProperties) = preSolution
2632
HideSolutionNode = FALSE

WebsiteScraper/Downloadable/Books/Chapter.cs

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,29 @@ public class Chapter
1616
public int Order { get; private set; }
1717
public DateTime UploadDateTime { get; init; }
1818
public Comic HoldingComic { get; init; }
19+
private Dictionary<string, string>? _dictionary;
1920
public void SetOrder(int i)
2021
{
2122
if (Order == 0)
2223
Order = i;
2324
}
2425

26+
private string? GetValue(string key)
27+
{
28+
if (_dictionary == null)
29+
_dictionary = HoldingComic?.HoldingWebsite?.GetValue<Dictionary<string, string>>("ComicChapter");
30+
return _dictionary?.GetValueOrDefault(key);
31+
}
32+
2533
public Chapter(Comic holdingComic) => HoldingComic = holdingComic;
2634

2735
public Task<ProgressableContainer<LoadRequest>> DownloadAsync(string destination, string? tempDestination = null, CancellationToken? token = null, Action? finished = null)
2836
{
2937
if (!string.IsNullOrWhiteSpace(DownloadURL))
3038
return DownloadImageFromFileAsync(token);
31-
if (HoldingComic?.HoldingWebsite?.InputDictionary["Chapter"].GetValueOrDefault("ListExtension") != null)
39+
if (GetValue("AddToListUrl") != null)
3240
return DownloadImageListAsync(destination, tempDestination, token, finished);
33-
else if (HoldingComic?.HoldingWebsite?.InputDictionary["Chapter"].GetValueOrDefault("PageExtension") != null)
41+
else if (GetValue("AddToPagedUrl") != null)
3442
return DownloadImagePageAsync(destination, tempDestination, token, finished);
3543
else throw new Exception("Can not donwload this object");
3644
}
@@ -40,10 +48,10 @@ private async Task<ProgressableContainer<LoadRequest>> DownloadImageListAsync(st
4048
ProgressableContainer<LoadRequest> container = new();
4149
await new OwnRequest(async DToken =>
4250
{
43-
string? selector = HoldingComic?.HoldingWebsite.InputDictionary.GetValueOrDefault("Chapter")?.GetValueOrDefault("ImageList");
51+
string? selector = GetValue("ListImageQuery");
4452
if (selector == null)
4553
return false;
46-
using HttpRequestMessage? msg = new(HttpMethod.Get, Url + (HoldingComic?.HoldingWebsite.InputDictionary.GetValueOrDefault("Chapter")?.GetValueOrDefault("ListExtension") ?? string.Empty));
54+
using HttpRequestMessage? msg = new(HttpMethod.Get, Url + (GetValue("AddToListUrl") ?? string.Empty));
4755
using HttpResponseMessage res = await HttpGet.HttpClient.SendAsync(msg, HttpCompletionOption.ResponseHeadersRead, DToken);
4856
if (!res.IsSuccessStatusCode)
4957
return false;
@@ -81,12 +89,12 @@ private async Task<ProgressableContainer<LoadRequest>> DownloadImagePageAsync(st
8189
await new OwnRequest(async DToken =>
8290
{
8391
bool stop = false;
84-
string? selector = HoldingComic?.HoldingWebsite.InputDictionary?.GetValueOrDefault("Chapter")?.GetValueOrDefault("ImagePage");
92+
string? selector = GetValue("PageImageQuery");
8593
if (selector == null)
8694
stop = true;
8795
for (int i = 1; !stop; i++)
8896
{
89-
using HttpRequestMessage? msg = new(System.Net.Http.HttpMethod.Get, Url + HoldingComic?.HoldingWebsite?.InputDictionary["Chapter"].GetValueOrDefault("PageExtension")?.Replace("[page]", i.ToString()));
97+
using HttpRequestMessage? msg = new(HttpMethod.Get, Url + GetValue("AddToPagedUrl")?.Replace("[page]", i.ToString()));
9098
using HttpResponseMessage res = await HttpGet.HttpClient.SendAsync(msg, HttpCompletionOption.ResponseContentRead, DToken);
9199
if (!res.IsSuccessStatusCode)
92100
break;

WebsiteScraper/Downloadable/Books/Comic.cs

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public class Comic : BookObject, ISearchable<Comic>
2121
[JsonIgnore]
2222
public Website HoldingWebsite { get; private init; }
2323
public Comic(string url, string title, Website website) : base(url, title) { HoldingWebsite = website; }
24-
protected Dictionary<string, string> WebsiteKeys => HoldingWebsite.InputDictionary[nameof(Comic)];
24+
protected Dictionary<string, string> WebsiteKeys => HoldingWebsite.GetValue<Dictionary<string, string>>(nameof(Comic)) ?? new();
2525

2626
protected bool _isSearchObject;
2727
public string? CoverUrl { get; set; }
@@ -40,18 +40,18 @@ private static string Replace(string filename)
4040

4141
public static Comic CreateSearch(IElement html, Website website)
4242
{
43-
Dictionary<string, string>? searchId = website.InputDictionary.GetValueOrDefault(nameof(Comic) + "Search");
43+
Dictionary<string, string>? searchId = website.GetValue<Dictionary<string, string>>(nameof(Comic) + "Search");
4444
string dateSelector = string.Empty;
45-
string format = searchId?.GetValueOrDefault(nameof(LastUpdated))?.GetDateFormat(out dateSelector!) ?? string.Empty;
46-
Comic comic = new(GetParsed(html, searchId?.GetValueOrDefault(nameof(Url))),
47-
GetParsed(html, searchId?.GetValueOrDefault(nameof(Title))), website)
45+
string format = searchId?.GetValueOrDefault("LastUpdatedQuery")?.GetDateFormat(out dateSelector!) ?? string.Empty;
46+
Comic comic = new(GetParsed(html, searchId?.GetValueOrDefault("LinkQuery")),
47+
GetParsed(html, searchId?.GetValueOrDefault("TitleQuery")), website)
4848
{
49-
Description = GetParsed(html, searchId?.GetValueOrDefault(nameof(Description))),
50-
AlternativeTitles = GetParsedArray(html, searchId?.GetValueOrDefault(nameof(AlternativeTitles)) ?? string.Empty).Select((x) => x.UnicodeToText()).ToArray(),
51-
Genres = GetParsedArray(html, searchId?.GetValueOrDefault(nameof(Genres)) ?? string.Empty).Select((x) => x.UnicodeToText()).ToArray(),
49+
Description = GetParsed(html, searchId?.GetValueOrDefault("DescriptionQuery")),
50+
AlternativeTitles = GetParsedArray(html, searchId?.GetValueOrDefault("AlternativeTitlesQuery") ?? string.Empty).Select((x) => x.UnicodeToText()).ToArray(),
51+
Genres = GetParsedArray(html, searchId?.GetValueOrDefault("GenresQuery") ?? string.Empty).Select((x) => x.UnicodeToText()).ToArray(),
5252
LastUpdated = StringToDateTime(GetParsed(html, dateSelector), format),
53-
Status = StringToStatus(GetParsed(html, searchId?.GetValueOrDefault(nameof(Status)))),
54-
CoverUrl = GetParsed(html, searchId?.GetValueOrDefault("Cover")),
53+
Status = StringToStatus(GetParsed(html, searchId?.GetValueOrDefault("StatusQuery"))),
54+
CoverUrl = GetParsed(html, searchId?.GetValueOrDefault("CoverQuery")),
5555
_isSearchObject = true,
5656
HoldingWebsite = website
5757
};
@@ -67,24 +67,24 @@ private void Update(IElement? html)
6767
{
6868
if (html == null)
6969
return;
70-
IElement? container = WebsiteKeys.GetValueOrDefault("Container")?.GetElement(html);
70+
IElement? container = WebsiteKeys.GetValueOrDefault("ContainerQuery")?.GetElement(html);
7171
if (container == null)
7272
return;
73-
Title = GetParsed(container, WebsiteKeys.GetValueOrDefault(nameof(Title)));
73+
Title = GetParsed(container, WebsiteKeys.GetValueOrDefault("TitleQuery"));
7474
List<Chapter> chapters = new();
7575

76-
IElement[] chapterContainer = WebsiteKeys.GetValueOrDefault("ChapterContainer")?.GetAllElements(container) ?? Array.Empty<IElement>();
77-
string? chapterDateSelector = WebsiteKeys.GetValueOrDefault("ChapterDate");
76+
IElement[] chapterContainer = WebsiteKeys.GetValueOrDefault("ChapterContainerQuery")?.GetAllElements(container) ?? Array.Empty<IElement>();
77+
string? chapterDateSelector = WebsiteKeys.GetValueOrDefault("ChapterDateQuery");
7878
string format = chapterDateSelector.GetDateFormat(out chapterDateSelector);
7979
foreach (IElement chapter in chapterContainer)
8080
{
81-
_ = float.TryParse(GetParsed(chapter, WebsiteKeys.GetValueOrDefault("ChapterNumber")).Replace(',', '.'), NumberStyles.Any, CultureInfo.InvariantCulture, out float num);
81+
_ = float.TryParse(GetParsed(chapter, WebsiteKeys.GetValueOrDefault("ChapterNumberQuery")).Replace(',', '.'), NumberStyles.Any, CultureInfo.InvariantCulture, out float num);
8282
Chapter foundChapter = new(this)
8383
{
84-
Url = GetParsed(chapter, WebsiteKeys.GetValueOrDefault("ChapterUrl")),
85-
Title = GetParsed(chapter, WebsiteKeys.GetValueOrDefault("ChapterTitle")),
84+
Url = GetParsed(chapter, WebsiteKeys.GetValueOrDefault("ChapterLinkQuery")),
85+
Title = GetParsed(chapter, WebsiteKeys.GetValueOrDefault("ChapterTitleQuery")),
8686
UploadDateTime = StringToDateTime(GetParsed(chapter, chapterDateSelector), format),
87-
DownloadURL = GetParsed(chapter, WebsiteKeys.GetValueOrDefault("ChapterDownloadUrl")),
87+
DownloadURL = GetParsed(chapter, WebsiteKeys.GetValueOrDefault("ChapterDownloadLinkQuery")),
8888
Number = num
8989
};
9090
chapters.Add(foundChapter);
@@ -96,19 +96,19 @@ private void Update(IElement? html)
9696
for (int i = 0; i < chapters.Count; i++)
9797
chapters[i].SetOrder(1 + i);
9898

99-
string? dateSelector = WebsiteKeys.GetValueOrDefault(nameof(LastUpdated));
99+
string? dateSelector = WebsiteKeys.GetValueOrDefault("DateQuery");
100100
format = dateSelector.GetDateFormat(out dateSelector);
101-
Description = RemoveHTML(GetParsed(html, WebsiteKeys.GetValueOrDefault(nameof(Description))));
102-
Genres = GetParsedArray(html, WebsiteKeys[nameof(Genres)]).Select((x) => x.UnicodeToText()).ToArray();
103-
Author = GetParsed(container, WebsiteKeys.GetValueOrDefault(nameof(Author)));
101+
Description = RemoveHTML(GetParsed(html, WebsiteKeys.GetValueOrDefault("DescriptionQuery")));
102+
Genres = GetParsedArray(html, WebsiteKeys["GenresQuery"]).Select((x) => x.UnicodeToText()).ToArray();
103+
Author = GetParsed(container, WebsiteKeys.GetValueOrDefault("AuthorQuery"));
104104
LastUpdated = StringToDateTime(GetParsed(html, dateSelector), format);
105-
Status = StringToStatus(GetParsed(html, WebsiteKeys.GetValueOrDefault(nameof(Status))));
105+
Status = StringToStatus(GetParsed(html, WebsiteKeys.GetValueOrDefault("StatusQuery")));
106106

107-
AlternativeTitles = GetParsedArray(html, WebsiteKeys.GetValueOrDefault(nameof(AlternativeTitles))).Select((x) => x.UnicodeToText()).ToArray();
107+
AlternativeTitles = GetParsedArray(html, WebsiteKeys.GetValueOrDefault("AlternativeTitlesQuery")).Select((x) => x.UnicodeToText()).ToArray();
108108

109109
Chapter = chapters.ToArray();
110110
_isSearchObject = false;
111-
CoverUrl = GetParsed(html, WebsiteKeys.GetValueOrDefault("Cover"));
111+
CoverUrl = GetParsed(html, WebsiteKeys.GetValueOrDefault("CoverQuery"));
112112

113113
OnPropertyChanged("Update");
114114
}

WebsiteScraper/Downloadable/DownloadableObject.cs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
using System.ComponentModel;
2-
using System.Diagnostics;
32
using System.Globalization;
43
using System.Runtime.CompilerServices;
54
using System.Text.RegularExpressions;
@@ -88,14 +87,14 @@ public static bool TryStringToDateTime(string value, string? format, out DateTim
8887
dateTime = default;
8988
if (value == null || value == string.Empty)
9089
return false;
91-
Debug.Assert(true, "Proof ParseDate in Downloadable Object");
92-
if ((format == string.Empty || format == null) && DateTime.TryParse(value, out dateTime))
90+
if (DateTime.TryParseExact(value, format, CultureInfo.InvariantCulture, DateTimeStyles.None, out dateTime))
91+
{
9392
return true;
94-
else if (DateTime.TryParseExact(value,
95-
format,
96-
CultureInfo.InvariantCulture,
97-
DateTimeStyles.None, out dateTime))
93+
}
94+
else if (DateTime.TryParse(value, out dateTime))
95+
{
9896
return true;
97+
}
9998
else if (new Regex(@"\d+\s?((min(ute)?(s)?)|(hour(s)?)|(day(s)?))\s?ago").IsMatch(value))
10099
{
101100
dateTime = default;

WebsiteScraper/ElementPaser.cs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,10 @@ private static string CssQuery(IElement element, string selector, string atribut
116116

117117

118118
}
119-
catch (Exception)
119+
catch (Exception ex)
120120
{
121121
Debug.WriteLine(selector);
122+
Debug.WriteLine(ex);
122123
}
123124
return found;
124125
}
@@ -160,8 +161,9 @@ public static string Parse(this string selector, IElement element)
160161
{
161162
return element.QuerySelector(selector);
162163
}
163-
catch (Exception)
164+
catch (Exception ex)
164165
{
166+
Debug.WriteLine(ex);
165167
}
166168
return null;
167169
}
@@ -178,9 +180,9 @@ public static IElement[] GetAllElements(this string selector, IElement element)
178180
{
179181
return element.QuerySelectorAll(selector).ToArray();
180182
}
181-
catch (Exception)
183+
catch (Exception ex)
182184
{
183-
185+
Debug.WriteLine(ex);
184186
}
185187
return Array.Empty<IElement>();
186188
}

WebsiteScraper/WebsiteScraper.csproj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
<Title>WebsiteScraper</Title>
99
<Authors>Meyn</Authors>
1010
<Company>Shard</Company>
11-
<Version>0.1.1</Version>
11+
<Version>1.0.0</Version>
1212
<Description>WebsiteScraper is a powerful tool that allows you to effortlessly download your favorite comics and manga from various websites.
1313
You can easily provide a JSON file containing the website links, and the scraper will do the rest.
1414
Using the advanced parsing capabilities of Angelsharp and Regex, the tool efficiently extracts the desired content from the website and saves it onto your device for offline reading.
@@ -41,6 +41,6 @@ With WebsiteScraper, you can enjoy your favorite comics and manga anytime, anywh
4141

4242
<ItemGroup>
4343
<PackageReference Include="AngleSharp" Version="1.0.7" />
44-
<PackageReference Include="Shard.DownloadAssistant" Version="0.1.1" />
44+
<PackageReference Include="Shard.DownloadAssistant" Version="0.1.2" />
4545
</ItemGroup>
4646
</Project>
Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,20 @@
1-
using WebsiteScraper.Downloadable;
2-
3-
namespace WebsiteScraper.WebsiteUtilities
1+
namespace WebsiteScraper.WebsiteUtilities
42
{
5-
public enum DisableTagState
6-
{
7-
None = 0,
8-
Enabeld = 1,
9-
Disabled = 2,
10-
}
11-
public enum TagState
12-
{
13-
None = 0,
14-
Enabeld = 1,
15-
}
16-
173
public class SearchInfo
184
{
19-
public SearchInfo(string search) => Search = search;
5+
public SearchInfo(string search, Website website)
6+
{
7+
Search = search;
8+
EnableAbleTags = website.EnableTags?.Select(x => x with { }).ToDictionary(x => x.Key) ?? new Dictionary<string, EnableAbleTag>();
9+
DisableAbleTags = website.DisableTags?.Select(x => x with { }).ToDictionary(x => x.Key) ?? new Dictionary<string, DisableAbleTag>();
10+
TextTags = website.TextTags?.Select(x => x with { }).ToDictionary(x => x.Key) ?? new Dictionary<string, TextTag>();
11+
RadioTags = website.RadioTags?.Select(x => x with { }).ToDictionary(x => x.Key) ?? new Dictionary<string, RadioTag>();
12+
}
2013
public string Search { get; set; }
2114
public bool IsDirect => (Search.StartsWith('\'') && Search.EndsWith('\'')) || (Search.StartsWith('\"') && Search.EndsWith('\"'));
22-
public string? Author { get; set; }
23-
public Status Status { get; set; }
24-
public Dictionary<string, TagState> Tags { get; set; } = new();
25-
public Dictionary<string, DisableTagState> DisableTags { get; set; } = new();
26-
15+
public IReadOnlyDictionary<string, TextTag> TextTags { get; }
16+
public IReadOnlyDictionary<string, EnableAbleTag> EnableAbleTags { get; }
17+
public IReadOnlyDictionary<string, DisableAbleTag> DisableAbleTags { get; }
18+
public IReadOnlyDictionary<string, RadioTag> RadioTags { get; }
2719
}
2820
}

0 commit comments

Comments
 (0)