Skip to content

Commit 6781b80

Browse files
committed
generating data.json. next see how to visualize
1 parent af2aa02 commit 6781b80

File tree

3 files changed

+4
-106
lines changed

3 files changed

+4
-106
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"dates":["2018-03","2018-04","2018-05","2018-09","2018-10","2018-11","2018-12","2019-01","2019-02","2019-03","2019-04","2019-05","2019-06","2019-07","2019-08","2019-09","2019-10","2019-11","2019-12","2020-01","2020-02","2020-03","2020-04","2020-05","2020-06","2020-07","2020-08","2020-09","2020-10","2020-12","2021-01","2021-02","2021-03","2021-04","2021-05","2021-06","2021-07","2021-08","2021-09","2021-10","2021-11","2021-12","2022-01","2022-02","2022-03","2022-04","2022-05","2022-06","2022-07","2022-08","2022-09","2022-10","2022-11","2022-12","2023-01","2023-02","2023-03","2023-04","2023-05","2023-06","2023-07","2023-08","2023-09","2023-10","2023-11","2023-12","2024-01","2024-02","2024-03","2024-04","2024-05","2024-06","2024-07","2024-08","2024-09","2024-10","2024-11","2024-12","2025-01"],"counts":[[4,4],[9,0],[3,0],[4,0],[8,1],[10,0],[17,2],[6,0],[32,8],[12,0],[6,0],[11,0],[17,0],[8,0],[4,0],[6,1],[13,1],[5,0],[7,0],[15,4],[11,0],[14,2],[35,4],[24,1],[9,0],[6,0],[4,0],[5,0],[10,1],[13,5],[2,0],[14,7],[12,6],[1,0],[6,0],[7,0],[21,9],[10,1],[8,1],[16,2],[10,0],[20,0],[35,2],[43,2],[11,1],[29,1],[26,6],[8,2],[13,2],[7,2],[3,0],[16,3],[16,1],[18,0],[28,3],[42,3],[26,0],[21,3],[18,2],[12,6],[15,0],[10,3],[25,2],[21,2],[28,0],[7,1],[26,3],[21,0],[30,8],[9,0],[28,3],[21,3],[20,3],[17,4],[26,3],[32,0],[22,1],[16,4],[31,2]]}

repositories/data.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

scripts/repositories.jl

Lines changed: 2 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ if isfile(joinpath(@__DIR__, "dev.env"))
2121
println("Loaded GH secret IG")
2222
end
2323

24-
const DATA_DIR = joinpath(dirname(@__DIR__), "docs", "repositories")
24+
const DATA_DIR = joinpath(dirname(@__DIR__), "repositories")
2525

2626
function Repository(repo; since, until, my_auth)
2727
println("Getting : ", repo)
@@ -45,50 +45,6 @@ function get_repos(since, until)
4545
)
4646
end
4747

48-
function download_stats(file)
49-
url = "https://julialang-logs.s3.amazonaws.com/public_outputs/current/$(file).csv.gz"
50-
output = joinpath(dirname(@__DIR__), "data", "$(file).csv.gz")
51-
Downloads.download(url, output)
52-
return output
53-
end
54-
55-
function load_stats(file, uuids)
56-
out = download_stats(file)
57-
df = CSV.read(out, DataFrames.DataFrame)
58-
uuid_to_name = DataFrames.DataFrame(
59-
package_uuid = collect(keys(uuids)),
60-
name = collect(values(uuids)),
61-
)
62-
df = DataFrames.leftjoin(df, uuid_to_name; on = :package_uuid)
63-
filter!(df) do row
64-
return !ismissing(row.client_type) &&
65-
row.client_type == "user" &&
66-
!ismissing(row.name) &&
67-
occursin("ERGO-Code/", row.name) &&
68-
row.status in (200, 301, 302)
69-
end
70-
return DataFrames.select(df, [:name, :date, :request_count])
71-
end
72-
73-
function get_historical_downloads(
74-
filename::String = joinpath(DATA_DIR, "download_stats.json"),
75-
)
76-
current = JSON.parsefile(filename; use_mmap = false)
77-
name = String[]
78-
date = Dates.Date[]
79-
request_count_sum = Int[]
80-
for (pkg, results) in current
81-
append!(name, fill("ERGO-Code/$pkg", length(results["requests"])))
82-
append!(date, Dates.Date.(results["dates"]))
83-
append!(request_count_sum, results["requests"])
84-
end
85-
return DataFrames.DataFrame(
86-
name = name,
87-
date = date,
88-
request_count_sum = request_count_sum,
89-
)
90-
end
91-
9248
function get_pkg_uuids()
9349
pkg_uuids = Dict{String,String}()
9450
r = first(Pkg.Registry.reachable_registries())
@@ -101,28 +57,6 @@ function get_pkg_uuids()
10157
return pkg_uuids
10258
end
10359

104-
function update_download_statistics()
105-
pkg_uuids = get_pkg_uuids()
106-
df = load_stats("package_requests_by_region_by_date", pkg_uuids)
107-
new_df = sort!(combine(groupby(df, [:name, :date]), :request_count => sum))
108-
new_df.name = String.(new_df.name)
109-
current = get_historical_downloads()
110-
append!(current, new_df)
111-
unique!(current)
112-
sort!(current, [:name, :date])
113-
data = Dict{String,Dict{String,Any}}()
114-
for g in groupby(current, :name)
115-
key = replace(g[1, :name], "ERGO-Code/" => "")
116-
data[key] = Dict{String,Any}(
117-
"dates" => string.(collect(g.date)),
118-
"requests" => collect(g.request_count_sum),
119-
)
120-
end
121-
open(joinpath(DATA_DIR, "download_stats.json"), "w") do io
122-
return write(io, JSON.json(data))
123-
end
124-
return
125-
end
12660

12761
function update_package_statistics()
12862
since = "2013-01-01T00:00:00"
@@ -246,50 +180,12 @@ function prs_by_user(user)
246180
return prs_by_user
247181
end
248182

249-
# function state_of_jump_statistics()
250-
# old_date = Dates.today() - Dates.Year(1)
251-
# # Downloads
252-
# df = get_historical_downloads()
253-
# n_downloads = sum(df[df.date.>=old_date, :].request_count_sum)
254-
# # PRs and issues
255-
# data = JSON.parsefile(joinpath(DATA_DIR, "data.json"))
256-
# prs_opened, issues_opened, contributors = 0, 0, Set{String}()
257-
# for (pkg, items) in data, item in items
258-
# if Dates.DateTime(item["date"]) >= old_date && item["type"] == "opened"
259-
# if item["is_pr"]
260-
# push!(contributors, item["user"])
261-
# prs_opened += 1
262-
# else
263-
# issues_opened += 1
264-
# end
265-
# end
266-
# end
267-
# open(joinpath(DATA_DIR, "summary.json"), "w") do io
268-
# summary = Dict(
269-
# "n_downloads" => n_downloads,
270-
# "prs_opened" => prs_opened,
271-
# "issues_opened" => issues_opened,
272-
# "num_contributors" => length(contributors),
273-
# )
274-
# write(io, JSON.json(summary))
275-
# return
276-
# end
277-
# println("""
278-
# Downloads : >$n_downloads
279-
# Pull requests opened : $prs_opened
280-
# Issues opened : $issues_opened
281-
# Unique contributors : $(length(contributors))
282-
# """)
283-
# return
284-
# end
285183

286184
has_arg(arg) = any(isequal(arg), ARGS)
287185

288186
if has_arg("--update")
289-
# update_download_statistics()
290187
update_package_statistics()
291-
# update_contributor_prs_over_time()
292-
# # state_of_jump_statistics()
188+
update_contributor_prs_over_time()
293189
end
294190

295191

0 commit comments

Comments
 (0)