Skip to content

Commit 57838cf

Browse files
committed
int to long
1 parent 21ae9ed commit 57838cf

2 files changed

Lines changed: 15 additions & 14 deletions

File tree

src/CommonCrawl/Handlers/StatisticsHandler.cs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ public async Task<StatisticsInfo> GetStatisticsAsync(string collectionId,
2626
await foreach (var line in lines)
2727
{
2828
// 解析每一行数据并填充到result中
29-
var parts = line.Split('\t');
29+
//BUG:CC-MAIN-2016-36 has a few lines that does not start with '['
30+
var parts = (line.StartsWith('[') ? line : $"[{line}").Split('\t');
3031
if (parts.Length != 2) continue;
3132
var args1 = JsonSerializer.Deserialize<JsonElement[]>(parts[0]);
3233
var type = args1![0].GetString();
@@ -41,7 +42,7 @@ public async Task<StatisticsInfo> GetStatisticsAsync(string collectionId,
4142
{
4243

4344
var counts = parts[1].StartsWith('[')
44-
? JsonSerializer.Deserialize<int[]>(parts[1])!
45+
? JsonSerializer.Deserialize<long[]>(parts[1])!
4546
:
4647
[
4748
int.Parse(parts[1]),
@@ -73,8 +74,8 @@ public async Task<StatisticsInfo> GetStatisticsAsync(string collectionId,
7374
case "tld":
7475
{
7576
var counts = parts[1].StartsWith('[')
76-
? JsonSerializer.Deserialize<int[]>(parts[1])!
77-
: [int.Parse(parts[1])];
77+
? JsonSerializer.Deserialize<long[]>(parts[1])!
78+
: [long.Parse(parts[1])];
7879
var domain = args1![1]!.GetString()!;
7980
}
8081

src/CommonCrawl/Models/StatisticsInfo.cs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,36 +2,36 @@
22
{
33
public class StatisticsInfo
44
{
5-
public Dictionary<int, int> HttpStatusCodes { get; } = new();
6-
public Dictionary<string, int> PageCharsets { get; } = new();
7-
public Dictionary<string, int> UrlCharsets { get; } = new();
8-
public Dictionary<string, int> PageLanguages { get; } = new();
9-
public Dictionary<string, int> UrlLanguages { get; } = new();
5+
public Dictionary<int, long> HttpStatusCodes { get; } = new();
6+
public Dictionary<string, long> PageCharsets { get; } = new();
7+
public Dictionary<string, long> UrlCharsets { get; } = new();
8+
public Dictionary<string, long> PageLanguages { get; } = new();
9+
public Dictionary<string, long> UrlLanguages { get; } = new();
1010
public long SurtDomainTotal { set; get; }
1111
public long DomainTotal { set; get; }
1212
public long HostTotal { set; get; }
1313
public long FetchTotal { set; get; }
1414
public long PageTotal { set; get; }
1515
public long UrlTotal { set; get; }
16-
public void AddHttpStatusCode(int statusCode, int result)
16+
public void AddHttpStatusCode(int statusCode, long result)
1717
{
1818
HttpStatusCodes[statusCode] = result;
1919
}
2020

21-
public void AddPageCharset(string charset, int count)
21+
public void AddPageCharset(string charset, long count)
2222
{
2323
PageCharsets[charset] = count;
2424
}
25-
public void AddUrlCharset(string charset, int count)
25+
public void AddUrlCharset(string charset, long count)
2626
{
2727
UrlCharsets[charset] = count;
2828
}
2929

30-
public void AddPageLanguage(string language, int count)
30+
public void AddPageLanguage(string language, long count)
3131
{
3232
PageLanguages[language] = count;
3333
}
34-
public void AddUrlLanguage(string language, int count)
34+
public void AddUrlLanguage(string language, long count)
3535
{
3636
UrlLanguages[language] = count;
3737
}

0 commit comments

Comments
 (0)