Skip to content

Commit cedc8c3

Browse files
committed
Add Bluesky blog (user feed) crawler
1 parent 63391fc commit cedc8c3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+2990
-47
lines changed

src/TumblThree/SharedAssemblyInfo.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55

66
[assembly: AssemblyCompany("")]
77
[assembly: AssemblyProduct("TumblThree")]
8-
[assembly: AssemblyCopyright("Copyright (C) 2016-2024 Johannes Meyer zum Alten Borgloh and others")]
8+
[assembly: AssemblyCopyright("Copyright (C) 2016-2025 Johannes Meyer zum Alten Borgloh and others")]
99
[assembly: AssemblyTrademark("")]
1010
[assembly: AssemblyCulture("")]
1111
[assembly: CLSCompliant(false)] // The WinRT API is not CLS-compliant (e.g. usage of uint)
1212

1313
[assembly: ComVisible(false)]
1414
[assembly: NeutralResourcesLanguage("en-US", UltimateResourceFallbackLocation.MainAssembly)]
15-
[assembly: AssemblyVersion("2.17.0.0")]
16-
[assembly: AssemblyFileVersion("2.17.0.0")]
15+
[assembly: AssemblyVersion("2.18.0.0")]
16+
[assembly: AssemblyFileVersion("2.18.0.0")]

src/TumblThree/TumblThree.Applications/Controllers/CrawlerController.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ private async Task RunCrawlerTasksAsync(PauseToken pt, CancellationToken ct)
248248

249249
if (pt.IsPaused)
250250
{
251-
pt.WaitWhilePausedWithResponseAsyc().Wait();
251+
pt.WaitWhilePausedWithResponseAsync().Wait();
252252
}
253253

254254
bool lockTaken = false;

src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,11 @@ private IReadOnlyList<IBlog> GetIBlogsCore(string directory)
341341
blog = new NewTumblBlog().Load(filename, bufferSize);
342342
}
343343

344+
if (filename.EndsWith(BlogTypes.bluesky.ToString()))
345+
{
346+
blog = new BlueskyBlog().Load(filename, bufferSize);
347+
}
348+
344349
if (blog != null)
345350
{
346351
if (!validCollectionIds.Contains(blog.CollectionId))
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
using System;
2+
using Newtonsoft.Json;
3+
using Newtonsoft.Json.Linq;
4+
5+
namespace TumblThree.Applications.Converter
6+
{
7+
public class ObjectOrStringConverter<T> : JsonConverter
8+
{
9+
public override bool CanConvert(Type objectType)
10+
{
11+
// CanConvert is not called when the [JsonConverter] attribute is used
12+
return false;
13+
}
14+
15+
public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
16+
{
17+
JToken token = JToken.Load(reader);
18+
if (token.Type == JTokenType.Object)
19+
{
20+
return token.ToObject<T>(serializer);
21+
}
22+
return token.ToString();
23+
}
24+
25+
public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer)
26+
{
27+
serializer.Serialize(writer, value);
28+
}
29+
}
30+
}

src/TumblThree/TumblThree.Applications/Crawler/AbstractCrawler.cs

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -265,14 +265,15 @@ protected async Task<string> PostDataAsync(string url, string referer, Dictionar
265265
}
266266
}
267267

268-
public virtual T ConvertJsonToClassNew<T>(string json) where T : new()
268+
public virtual T ConvertJsonToClassNew<T>(string json, bool ignoreMetadata = false) where T : new()
269269
{
270270
try
271271
{
272272
json = json.Replace(":undefined", ":null");
273273
using (var ms = new MemoryStream(Encoding.UTF8.GetBytes(json)))
274274
{
275275
var deserializer = new Newtonsoft.Json.JsonSerializer();
276+
deserializer.MetadataPropertyHandling = ignoreMetadata ? Newtonsoft.Json.MetadataPropertyHandling.Ignore : Newtonsoft.Json.MetadataPropertyHandling.Default;
276277
deserializer.Converters.Add(new SingleOrArrayConverter<T>());
277278
using (StreamReader sr = new StreamReader(ms))
278279
using (var jsonTextReader = new Newtonsoft.Json.JsonTextReader(sr))
@@ -393,7 +394,8 @@ protected static string FileName(string url)
393394

394395
[System.Diagnostics.CodeAnalysis.SuppressMessage("Globalization", "CA1305:Specify IFormatProvider", Justification = "<Pending>")]
395396
protected virtual string BuildFileNameCore(string url, string blogName, DateTime date, int timestamp, int index, string type, string id,
396-
List<string> tags, string slug, string title, string rebloggedFromName, string rebloggedRootName, string reblogKey, int noteCount)
397+
List<string> tags, string slug, string title, string rebloggedFromName, string rebloggedRootName, string reblogKey, int noteCount,
398+
bool extraReserve = false)
397399
{
398400
/*
399401
* Replaced are:
@@ -507,7 +509,7 @@ protected virtual string BuildFileNameCore(string url, string blogName, DateTime
507509
}
508510

509511
int tokenLength = ContainsCI(filename, "%p") ? 2 : 0;
510-
int maxCharacters = ShellService.IsLongPathSupported ? MaximumComponentLength : MAX_PATH - 1;
512+
int maxCharacters = (ShellService.IsLongPathSupported ? MaximumComponentLength : MAX_PATH - 1) - (extraReserve ? 4 : 0);
511513
int intendedLength = ShellService.IsLongPathSupported ? filename.Length : Path.Combine(Blog.DownloadLocation(), filename).Length;
512514
// without long path support: 259 (max path minus NULL) - current filename length + 2 chars (%p) - chars for numbering
513515
int charactersLeft = maxCharacters - intendedLength + tokenLength - neededCharactersForNumbering;
@@ -568,7 +570,7 @@ protected void CheckIfShouldPause()
568570
{
569571
if (Pt.IsPaused)
570572
{
571-
Pt.WaitWhilePausedWithResponseAsyc().Wait();
573+
Pt.WaitWhilePausedWithResponseAsync().Wait();
572574
}
573575
}
574576

@@ -607,7 +609,8 @@ protected bool HandleNotFoundWebException(WebException webException)
607609
protected enum LimitExceededSource
608610
{
609611
tumblr,
610-
twitter
612+
twitter,
613+
bluesky
611614
}
612615

613616
protected bool HandleLimitExceededWebException(WebException webException, LimitExceededSource source = LimitExceededSource.tumblr)

src/TumblThree/TumblThree.Applications/Crawler/AbstractTumblrCrawler.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ protected string[] AddTumblrVideoUrl(string text, Post post)
275275
url = "https://vtt.tumblr.com/" + url + ".mp4";
276276
var filename = BuildFileName(url, post, -1);
277277
AddDownloadedMedia(url, filename, post);
278-
AddToDownloadList(new VideoPost(url, post.Id, post.UnixTimestamp.ToString(), filename));
278+
AddToDownloadList(new VideoPost(url, null, post.Id, post.UnixTimestamp.ToString(), filename));
279279
list.Add(url);
280280
}
281281

@@ -295,7 +295,7 @@ protected void AddInlineTumblrVideoUrl(string post, Regex regexVideo, Regex rege
295295
videoUrl += "_480";
296296
}
297297

298-
AddToDownloadList(new VideoPost(videoUrl + ".mp4", Guid.NewGuid().ToString("N"), FileName(videoUrl + ".mp4")));
298+
AddToDownloadList(new VideoPost(videoUrl + ".mp4", null, Guid.NewGuid().ToString("N"), FileName(videoUrl + ".mp4")));
299299
}
300300
}
301301

@@ -304,7 +304,7 @@ protected void AddInlineTumblrVideoUrl(string post, Regex regexVideo, Regex rege
304304
foreach (Match match in regexThumbnail.Matches(post))
305305
{
306306
string thumbnailUrl = match.Groups[1].Value;
307-
AddToDownloadList(new VideoPost(thumbnailUrl, Guid.NewGuid().ToString("N"), FileName(thumbnailUrl)));
307+
AddToDownloadList(new VideoPost(thumbnailUrl, null, Guid.NewGuid().ToString("N"), FileName(thumbnailUrl)));
308308
}
309309
}
310310
}
@@ -326,7 +326,7 @@ protected string[] AddInlineTumblrVideoUrl(string text, Post post)
326326
url += ".mp4";
327327
var filename = BuildFileName(url, post, -1);
328328
AddDownloadedMedia(url, filename, post);
329-
AddToDownloadList(new VideoPost(url, post.Id, post.UnixTimestamp.ToString(), filename));
329+
AddToDownloadList(new VideoPost(url, null, post.Id, post.UnixTimestamp.ToString(), filename));
330330
list.Add(url);
331331
}
332332
}
@@ -361,7 +361,7 @@ protected void AddGenericVideoUrl(string text, Post post)
361361
{
362362
if (TumblrParser.IsTumblrUrl(videoUrl)) { continue; }
363363

364-
AddToDownloadList(new VideoPost(videoUrl, post.Id, post.UnixTimestamp.ToString(), FileName(videoUrl)));
364+
AddToDownloadList(new VideoPost(videoUrl, null, post.Id, post.UnixTimestamp.ToString(), FileName(videoUrl)));
365365
}
366366
}
367367

0 commit comments

Comments
 (0)