Skip to content

Commit 252710b

Browse files
committed
* prefer I?ReadOnlyDictionary<,> over I?Dictionary<,>
* add field for auto prop `SplitEntities` @ RevisionWithSplitting.cs * using primary ctor @ SaverChangeSet.cs @ c#/crawler
1 parent 1bccc4e commit 252710b

File tree

12 files changed

+56
-55
lines changed

12 files changed

+56
-55
lines changed

c#/crawler/src/Db/Revision/RevisionWithSplitting.cs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,29 @@ namespace tbm.Crawler.Db.Revision;
33
public abstract class RevisionWithSplitting<TBaseRevision> : IRevision
44
where TBaseRevision : class, IRevision
55
{
6+
private readonly Dictionary<Type, TBaseRevision> _splitEntities = [];
7+
68
public uint TakenAt { get; set; }
79
public ushort? NullFieldsBitMask { get; set; }
8-
public IDictionary<Type, TBaseRevision> SplitEntities { get; } = new Dictionary<Type, TBaseRevision>();
10+
public IReadOnlyDictionary<Type, TBaseRevision> SplitEntities => _splitEntities;
911

1012
public virtual bool IsAllFieldsIsNullExceptSplit() => throw new NotSupportedException();
1113

1214
protected TValue? GetSplitEntityValue<TSplitEntity, TValue>
1315
(Func<TSplitEntity, TValue?> valueSelector)
1416
where TSplitEntity : class, TBaseRevision =>
15-
SplitEntities.TryGetValue(typeof(TSplitEntity), out var entity)
17+
_splitEntities.TryGetValue(typeof(TSplitEntity), out var entity)
1618
? valueSelector((TSplitEntity)entity)
1719
: default;
1820

1921
protected void SetSplitEntityValue<TSplitEntity, TValue>
2022
(TValue? value, Action<TSplitEntity, TValue?> valueSetter, Func<TSplitEntity> entityFactory)
2123
where TSplitEntity : class, TBaseRevision
2224
{
23-
if (SplitEntities.TryGetValue(typeof(TSplitEntity), out var entity))
25+
if (_splitEntities.TryGetValue(typeof(TSplitEntity), out var entity))
2426
valueSetter((TSplitEntity)entity, value);
2527
else
26-
SplitEntities[typeof(TSplitEntity)] = entityFactory();
28+
_splitEntities[typeof(TSplitEntity)] = entityFactory();
2729
}
2830

2931
public class ModelBuilderExtension(ModelBuilder builder, string baseTableName)

c#/crawler/src/Tieba/ClientRequester.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public class ClientRequester(
1919
public async Task<JsonElement> RequestJson(
2020
string url,
2121
string clientVersion,
22-
IDictionary<string, string> postParam,
22+
IReadOnlyDictionary<string, string> postParam,
2323
CancellationToken stoppingToken = default) =>
2424
await Request(() => PostJson(url, postParam, clientVersion, stoppingToken), stream =>
2525
{
@@ -84,7 +84,7 @@ private static async Task<T> Request<T>
8484

8585
private async Task<HttpResponseMessage> PostJson(
8686
string url,
87-
IDictionary<string, string> postParam,
87+
IReadOnlyDictionary<string, string> postParam,
8888
string clientVersion,
8989
CancellationToken stoppingToken = default)
9090
{

c#/crawler/src/Tieba/Crawl/CrawlPost.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ public async Task<SavedThreadsList> CrawlThreads
4646
var threadsLatestReplyPostedAt = currentPageChangeSet.AllAfter
4747
.Select(th => th.LatestReplyPostedAt).ToList();
4848
minLatestReplyPostedAt = threadsLatestReplyPostedAt.Min();
49-
if (crawlingPage == 1) _latestReplyPostedAtCheckpointCache[fid] = threadsLatestReplyPostedAt.Max();
49+
if (crawlingPage == 1)
50+
_latestReplyPostedAtCheckpointCache[fid] = threadsLatestReplyPostedAt.Max();
5051
}
5152
else
5253
{ // retry this page
@@ -98,7 +99,7 @@ await Task.WhenAll(shouldCrawlParentPosts.Select(async tid =>
9899
}
99100

100101
public async Task CrawlSubReplies(
101-
IDictionary<Tid, SaverChangeSet<ReplyPost>> savedRepliesKeyByTid,
102+
IReadOnlyDictionary<Tid, SaverChangeSet<ReplyPost>> savedRepliesKeyByTid,
102103
Fid fid,
103104
CancellationToken stoppingToken = default)
104105
{

c#/crawler/src/Tieba/Crawl/Facade/BaseCrawlFacade.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ protected virtual void ThrowIfEmptyUsersEmbedInPosts() { }
122122
protected virtual void PostParseHook(
123123
TResponse response,
124124
CrawlRequestFlag flag,
125-
IDictionary<PostId, TPost> parsedPostsInResponse) { }
125+
IReadOnlyDictionary<PostId, TPost> parsedPostsInResponse) { }
126126
protected virtual void BeforeCommitSaveHook(CrawlerDbContext db, UserSaver userSaver) { }
127127
protected virtual void PostCommitSaveHook(
128128
SaverChangeSet<TPost> savedPosts,

c#/crawler/src/Tieba/Crawl/Facade/SubReplyCrawlFacade.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ public class SubReplyCrawlFacade(
2424
protected override void PostParseHook(
2525
SubReplyResponse response,
2626
CrawlRequestFlag flag,
27-
IDictionary<PostId, SubReplyPost> parsedPostsInResponse)
27+
IReadOnlyDictionary<PostId, SubReplyPost> parsedPostsInResponse)
2828
{
2929
foreach (var sr in parsedPostsInResponse.Values)
3030
{

c#/crawler/src/Tieba/Crawl/Facade/ThreadArchiveCrawlFacade.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ public class ThreadArchiveCrawlFacade(
1919
protected override void PostParseHook(
2020
ThreadResponse response,
2121
CrawlRequestFlag flag,
22-
IDictionary<PostId, ThreadPost> parsedPostsInResponse)
22+
IReadOnlyDictionary<PostId, ThreadPost> parsedPostsInResponse)
2323
{ // the second respond with flag is as same as the first one so just skip it
2424
if (flag == CrawlRequestFlag.ThreadClientVersion602) return;
2525
var data = response.Data;

c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,23 @@ protected override void BeforeCommitSaveHook(CrawlerDbContext db, UserSaver user
4242
_ = db.Users.UpsertRange(newLatestRepliersExceptLocked).NoUpdate().Run();
4343
}
4444

45+
protected override void PostParseHook(
46+
ThreadResponse response,
47+
CrawlRequestFlag flag,
48+
IReadOnlyDictionary<PostId, ThreadPost> parsedPostsInResponse)
49+
{
50+
var data = response.Data;
51+
if (flag == CrawlRequestFlag.ThreadClientVersion602) FillFromRequestingWith602(data.ThreadList);
52+
if (flag != CrawlRequestFlag.None) return;
53+
UserParser.Parse(data.UserList);
54+
UserParser.ResetUsersIcon();
55+
ParseLatestRepliers(data.ThreadList);
56+
57+
// remove livepost threads since their real parent forum may not match with current crawling fid
58+
data.ThreadList.Where(th => th.LivePostType != "")
59+
.ForEach(th => Posts.TryRemove((Tid)th.Tid, out _));
60+
}
61+
4562
protected void ParseLatestRepliers(IEnumerable<Thread> threads) =>
4663
threads.Select(th => th.LastReplyer ?? null) // LastReplyer will be null when LivePostType != ""
4764
.OfType<TbClient.User>() // filter out nulls
@@ -67,21 +84,4 @@ join parsed in Posts.Values on (Tid)inResponse.Tid equals parsed.Tid
6784
// LastReplyer will be null when LivePostType != "", but LastTimeInt will have expected timestamp value
6885
t.parsed.LatestReplierUid = t.inResponse.LastReplyer?.Uid;
6986
});
70-
71-
protected override void PostParseHook(
72-
ThreadResponse response,
73-
CrawlRequestFlag flag,
74-
IDictionary<PostId, ThreadPost> parsedPostsInResponse)
75-
{
76-
var data = response.Data;
77-
if (flag == CrawlRequestFlag.ThreadClientVersion602) FillFromRequestingWith602(data.ThreadList);
78-
if (flag != CrawlRequestFlag.None) return;
79-
UserParser.Parse(data.UserList);
80-
UserParser.ResetUsersIcon();
81-
ParseLatestRepliers(data.ThreadList);
82-
83-
// remove livepost threads since their real parent forum may not match with current crawling fid
84-
data.ThreadList.Where(th => th.LivePostType != "")
85-
.ForEach(th => Posts.TryRemove((Tid)th.Tid, out _));
86-
}
8787
}

c#/crawler/src/Tieba/Crawl/Facade/ThreadLateCrawlFacade.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ public class ThreadLateCrawlFacade(
88
public delegate ThreadLateCrawlFacade New(Fid fid);
99

1010
public async Task CrawlThenSave(
11-
IDictionary<Tid, FailureCount> failureCountsKeyByTid,
11+
IReadOnlyDictionary<Tid, FailureCount> failureCountsKeyByTid,
1212
CancellationToken stoppingToken = default)
1313
{
1414
var threads = await Task.WhenAll(

c#/crawler/src/Tieba/Crawl/Saver/IRevisionProperties.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ namespace tbm.Crawler.Tieba.Crawl.Saver;
22

33
public interface IRevisionProperties
44
{
5-
protected static IDictionary<Type, IDictionary<string, PropertyInfo>> Cache { get; } = GetPropsKeyByType(
5+
protected static IReadOnlyDictionary<Type, IReadOnlyDictionary<string, PropertyInfo>> Cache { get; } = GetPropsKeyByType(
66
[typeof(ThreadRevision), typeof(ReplyRevision), typeof(SubReplyRevision), typeof(UserRevision)]);
77

8-
private static IDictionary<Type, IDictionary<string, PropertyInfo>> GetPropsKeyByType(IEnumerable<Type> types) =>
8+
private static IReadOnlyDictionary<Type, IReadOnlyDictionary<string, PropertyInfo>> GetPropsKeyByType(IEnumerable<Type> types) =>
99
types.ToDictionary(type => type, type =>
10-
(IDictionary<string, PropertyInfo>)type.GetProperties().ToDictionary(prop => prop.Name));
10+
(IReadOnlyDictionary<string, PropertyInfo>)type.GetProperties().ToDictionary(prop => prop.Name));
1111
}

c#/crawler/src/Tieba/Crawl/Saver/SaverChangeSet.cs

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,24 @@
22

33
namespace tbm.Crawler.Tieba.Crawl.Saver;
44

5-
public class SaverChangeSet<TPost> where TPost : class, IPost
5+
public class SaverChangeSet<TPost>(
6+
IReadOnlyCollection<TPost> existingBefore,
7+
ICollection<TPost> existingAfterAndNewlyAdded,
8+
Func<TPost, PostId> postIdSelector)
9+
where TPost : class, IPost
610
{
7-
public SaverChangeSet(
8-
IReadOnlyCollection<TPost> existingBefore,
9-
ICollection<TPost> existingAfterAndNewlyAdded,
10-
Func<TPost, PostId> postIdSelector)
11-
{
12-
Existing = existingBefore
13-
.OrderBy(postIdSelector)
14-
.EquiZip(existingAfterAndNewlyAdded
11+
public IReadOnlyCollection<(TPost Before, TPost After)> Existing { get; } = existingBefore
12+
.OrderBy(postIdSelector)
13+
.EquiZip(existingAfterAndNewlyAdded
1514
.IntersectBy(existingBefore.Select(postIdSelector), postIdSelector)
1615
.OrderBy(postIdSelector),
17-
(before, after) => (before, after))
18-
.ToList().AsReadOnly();
19-
NewlyAdded = existingAfterAndNewlyAdded
20-
.ExceptBy(existingBefore.Select(postIdSelector), postIdSelector)
21-
.ToList().AsReadOnly();
22-
AllAfter = existingAfterAndNewlyAdded.ToList().AsReadOnly();
23-
}
16+
(before, after) => (before, after))
17+
.ToList().AsReadOnly();
2418

25-
public IReadOnlyCollection<(TPost Before, TPost After)> Existing { get; }
26-
public IReadOnlyCollection<TPost> NewlyAdded { get; }
27-
public IReadOnlyCollection<TPost> AllAfter { get; }
19+
public IReadOnlyCollection<TPost> NewlyAdded { get; } = existingAfterAndNewlyAdded
20+
.ExceptBy(existingBefore.Select(postIdSelector), postIdSelector)
21+
.ToList().AsReadOnly();
22+
23+
public IReadOnlyCollection<TPost> AllAfter { get; } = existingAfterAndNewlyAdded
24+
.ToList().AsReadOnly();
2825
}

0 commit comments

Comments
 (0)