Skip to content

Commit ee07152

Browse files
committed
in midst of merging some user records
1 parent b4388a4 commit ee07152

File tree

1 file changed

+157
-75
lines changed

1 file changed

+157
-75
lines changed

scripts/database_migration.exs

Lines changed: 157 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,8 @@ defmodule DatabaseMigration do
6060
]
6161

6262
@index_fields [
63-
{"GithubUser", ["id", "user_id"]},
6463
{"User", ["id"]},
65-
{"Org", ["id"]},
64+
{"GithubUser", ["id", "user_id"]},
6665
{"Bounty", ["id", "task_id"]},
6766
{"Task", ["id"]},
6867
{"Claim", ["id", "bounty_id"]},
@@ -203,53 +202,57 @@ defmodule DatabaseMigration do
203202
}
204203
end
205204

206-
defp transform({"Org", User}, row, _db) do
207-
%{
208-
"id" => row["id"],
209-
"provider" => row["github_handle"] && "github",
210-
"provider_id" => row["github_id"],
211-
"provider_login" => row["github_handle"],
212-
"provider_meta" => row["github_data"] && deserialize_value(row["github_data"]),
213-
"email" => nil,
214-
"display_name" => row["name"],
215-
"handle" => row["handle"],
216-
"avatar_url" => update_url(row["avatar_url"]),
217-
"external_homepage_url" => nil,
218-
"type" => "organization",
219-
"bio" => row["description"],
220-
"location" => nil,
221-
"country" => nil,
222-
"timezone" => nil,
223-
"stargazers_count" => row["stargazers_count"],
224-
"domain" => row["domain"],
225-
"tech_stack" => row["tech"],
226-
"featured" => row["featured"],
227-
"priority" => row["priority"],
228-
"fee_pct" => row["fee_pct"],
229-
"seeded" => row["seeded"],
230-
"activated" => row["active"],
231-
"max_open_attempts" => row["max_open_attempts"],
232-
"manual_assignment" => row["manual_assignment"],
233-
"bounty_mode" => nil,
234-
"hourly_rate_min" => nil,
235-
"hourly_rate_max" => nil,
236-
"hours_per_week" => nil,
237-
"website_url" => row["website_url"],
238-
"twitter_url" => row["twitter_url"],
239-
"github_url" => nil,
240-
"youtube_url" => row["youtube_url"],
241-
"twitch_url" => nil,
242-
"discord_url" => row["discord_url"],
243-
"slack_url" => row["slack_url"],
244-
"linkedin_url" => nil,
245-
"og_title" => nil,
246-
"og_image_url" => nil,
247-
"last_context" => nil,
248-
"need_avatar" => nil,
249-
"inserted_at" => row["created_at"],
250-
"updated_at" => row["updated_at"],
251-
"is_admin" => false
252-
}
205+
defp transform({"Org", User}, row, db) do
206+
merged_user = find_by_index(db, "_MergedUser", "id", row["id"])
207+
208+
if not user?(merged_user) do
209+
%{
210+
"id" => row["id"],
211+
"provider" => row["github_handle"] && "github",
212+
"provider_id" => row["github_id"],
213+
"provider_login" => row["github_handle"],
214+
"provider_meta" => row["github_data"] && deserialize_value(row["github_data"]),
215+
"email" => nil,
216+
"display_name" => row["name"],
217+
"handle" => row["handle"],
218+
"avatar_url" => update_url(row["avatar_url"]),
219+
"external_homepage_url" => nil,
220+
"type" => "organization",
221+
"bio" => row["description"],
222+
"location" => nil,
223+
"country" => nil,
224+
"timezone" => nil,
225+
"stargazers_count" => row["stargazers_count"],
226+
"domain" => row["domain"],
227+
"tech_stack" => row["tech"],
228+
"featured" => row["featured"],
229+
"priority" => row["priority"],
230+
"fee_pct" => row["fee_pct"],
231+
"seeded" => row["seeded"],
232+
"activated" => row["active"],
233+
"max_open_attempts" => row["max_open_attempts"],
234+
"manual_assignment" => row["manual_assignment"],
235+
"bounty_mode" => nil,
236+
"hourly_rate_min" => nil,
237+
"hourly_rate_max" => nil,
238+
"hours_per_week" => nil,
239+
"website_url" => row["website_url"],
240+
"twitter_url" => row["twitter_url"],
241+
"github_url" => nil,
242+
"youtube_url" => row["youtube_url"],
243+
"twitch_url" => nil,
244+
"discord_url" => row["discord_url"],
245+
"slack_url" => row["slack_url"],
246+
"linkedin_url" => nil,
247+
"og_title" => nil,
248+
"og_image_url" => nil,
249+
"last_context" => nil,
250+
"need_avatar" => nil,
251+
"inserted_at" => row["created_at"],
252+
"updated_at" => row["updated_at"],
253+
"is_admin" => false
254+
}
255+
end
253256
end
254257

255258
defp transform({"GithubUser", User}, row, _db) do
@@ -324,10 +327,16 @@ defmodule DatabaseMigration do
324327
}
325328
end
326329

327-
defp transform({"OrgMember", Member}, row, _db) do
330+
defp transform({"OrgMember", Member}, row, db) do
331+
owner = find_by_index(db, "_MergedUser", "id", row["org_id"])
332+
333+
if !owner do
334+
raise "Owner not found: #{inspect(row)}"
335+
end
336+
328337
%{
329338
"id" => row["id"],
330-
"org_id" => row["org_id"],
339+
"org_id" => owner["id"],
331340
"role" => row["role"],
332341
"user_id" => row["user_id"],
333342
"inserted_at" => row["created_at"],
@@ -337,15 +346,20 @@ defmodule DatabaseMigration do
337346

338347
defp transform({"Bounty", Bounty}, row, db) do
339348
reward = find_by_index(db, "Reward", "bounty_id", row["id"])
349+
owner = find_by_index(db, "_MergedUser", "id", row["org_id"])
340350

341351
amount = if reward, do: Money.from_integer(String.to_integer(reward["amount"]), reward["currency"])
342352

353+
if !owner do
354+
raise "Owner not found: #{inspect(row)}"
355+
end
356+
343357
if row["type"] != "tip" do
344358
%{
345359
"id" => row["id"],
346360
"amount" => amount,
347361
"ticket_id" => row["task_id"],
348-
"owner_id" => row["org_id"],
362+
"owner_id" => owner["id"],
349363
"creator_id" => row["poster_id"],
350364
"inserted_at" => row["created_at"],
351365
"updated_at" => row["updated_at"],
@@ -434,7 +448,7 @@ defmodule DatabaseMigration do
434448
end
435449

436450
defp transform({"BountyCharge", Transaction}, row, db) do
437-
user = find_by_index(db, "Org", "id", row["org_id"])
451+
user = find_by_index(db, "_MergedUser", "id", row["org_id"])
438452

439453
amount = Money.from_integer(String.to_integer(row["amount"]), row["currency"])
440454

@@ -483,7 +497,7 @@ defmodule DatabaseMigration do
483497
claim = find_by_index(db, "Claim", "id", row["claim_id"])
484498
github_user = find_by_index(db, "GithubUser", "id", claim["github_user_id"])
485499
bounty = find_by_index(db, "Bounty", "id", claim["bounty_id"])
486-
500+
owner = find_by_index(db, "_MergedUser", "id", bounty["org_id"])
487501
user_id = or_else(github_user["user_id"], github_user["id"])
488502
amount = Money.from_integer(String.to_integer(row["amount"]), row["currency"])
489503

@@ -495,13 +509,17 @@ defmodule DatabaseMigration do
495509
raise "User not found: #{inspect(row)}"
496510
end
497511

512+
if !owner do
513+
raise "Owner not found: #{inspect(row)}"
514+
end
515+
498516
if bounty["type"] == "tip" do
499517
%{
500518
"id" => bounty["id"] <> user_id,
501519
"amount" => amount,
502520
"status" => nil,
503521
"ticket_id" => bounty["task_id"],
504-
"owner_id" => bounty["org_id"],
522+
"owner_id" => owner["id"],
505523
"creator_id" => bounty["poster_id"],
506524
"recipient_id" => user_id,
507525
"inserted_at" => bounty["created_at"],
@@ -514,7 +532,7 @@ defmodule DatabaseMigration do
514532
claim = find_by_index(db, "Claim", "id", row["claim_id"])
515533
bounty = find_by_index(db, "Bounty", "id", claim["bounty_id"])
516534
github_user = find_by_index(db, "GithubUser", "id", claim["github_user_id"])
517-
org = find_by_index(db, "Org", "id", bounty["org_id"])
535+
org = find_by_index(db, "_MergedUser", "id", bounty["org_id"])
518536
bounty_charge = find_by_index(db, "BountyCharge", "id", row["bounty_charge_id"])
519537

520538
user_id = or_else(github_user["user_id"], github_user["id"])
@@ -566,7 +584,13 @@ defmodule DatabaseMigration do
566584
)
567585
end
568586

569-
defp transform({"GithubInstallation", Installation}, row, _db) do
587+
defp transform({"GithubInstallation", Installation}, row, db) do
588+
connected_user = find_by_index(db, "_MergedUser", "id", row["org_id"])
589+
590+
if !connected_user do
591+
raise "Connected user not found: #{inspect(row)}"
592+
end
593+
570594
%{
571595
"id" => row["id"],
572596
"provider" => "github",
@@ -575,7 +599,7 @@ defmodule DatabaseMigration do
575599
"avatar_url" => nil,
576600
"repository_selection" => nil,
577601
"owner_id" => nil,
578-
"connected_user_id" => row["org_id"],
602+
"connected_user_id" => connected_user["id"],
579603
"inserted_at" => row["created_at"],
580604
"updated_at" => row["updated_at"],
581605
"provider_user_id" => nil
@@ -605,29 +629,41 @@ defmodule DatabaseMigration do
605629
}
606630
end
607631

608-
defp transform({"StripeCustomer", Customer}, row, _db) do
609-
if row["org_id"] not in ["clfqtao4h0001mo0gkp9az0bn", "cm251pvg40007ld031q5t2hj2", "cljo6j981000el60f1k1cvtns"] do
632+
defp transform({"StripeCustomer", Customer}, row, db) do
633+
owner = find_by_index(db, "_MergedUser", "id", row["org_id"])
634+
635+
if !owner do
636+
raise "Owner not found: #{inspect(row)}"
637+
end
638+
639+
if owner["id"] not in ["clfqtao4h0001mo0gkp9az0bn", "cm251pvg40007ld031q5t2hj2", "cljo6j981000el60f1k1cvtns"] do
610640
%{
611641
"id" => row["id"],
612642
"provider" => "stripe",
613643
"provider_id" => row["stripe_id"],
614644
"provider_meta" => nil,
615645
"name" => row["name"],
616-
"user_id" => row["org_id"],
646+
"user_id" => owner["id"],
617647
"inserted_at" => row["created_at"],
618648
"updated_at" => row["updated_at"]
619649
}
620650
end
621651
end
622652

623653
defp transform({"StripePaymentMethod", PaymentMethod}, row, db) do
624-
if row["org_id"] not in ["clfqtao4h0001mo0gkp9az0bn", "cm251pvg40007ld031q5t2hj2", "cljo6j981000el60f1k1cvtns"] do
625-
customer = find_by_index(db, "StripeCustomer", "org_id", row["org_id"])
654+
owner = find_by_index(db, "_MergedUser", "id", row["org_id"])
626655

627-
if !customer do
628-
raise "StripeCustomer not found: #{inspect(row)}"
629-
end
656+
if !owner do
657+
raise "Owner not found: #{inspect(row)}"
658+
end
659+
660+
customer = find_by_index(db, "StripeCustomer", "org_id", row["org_id"])
661+
662+
if !customer do
663+
raise "StripeCustomer not found: #{inspect(row)}"
664+
end
630665

666+
if owner["id"] not in ["clfqtao4h0001mo0gkp9az0bn", "cm251pvg40007ld031q5t2hj2", "cljo6j981000el60f1k1cvtns"] do
631667
%{
632668
"id" => row["id"],
633669
"provider" => "stripe",
@@ -765,6 +801,8 @@ defmodule DatabaseMigration do
765801
|> Stream.run()
766802
end
767803

804+
defp user?(row), do: not nullish?(row["email"])
805+
768806
defp collect_data(input_file) do
769807
db =
770808
input_file
@@ -790,7 +828,49 @@ defmodule DatabaseMigration do
790828
Map.put(acc, table, table_indexes)
791829
end)
792830

793-
Map.put(db, :indexes, indexes)
831+
db = Map.put(db, :indexes, indexes)
832+
833+
put_in(db, [:indexes, "_MergedUser"], %{"id" => index_merged_users(db)})
834+
end
835+
836+
defp index_merged_users(db) do
837+
(db["User"] ++ db["Org"])
838+
|> Enum.group_by(fn row ->
839+
if user?(row) do
840+
github_user = find_by_index(db, "GithubUser", "user_id", row["id"])
841+
842+
if is_nil(github_user) or nullish?(github_user["login"]) do
843+
"algora_" <> row["id"]
844+
else
845+
"github_" <> github_user["login"]
846+
end
847+
else
848+
if nullish?(row["github_handle"]) do
849+
"algora_" <> row["id"]
850+
else
851+
"github_" <> row["github_handle"]
852+
end
853+
end
854+
end)
855+
|> Enum.flat_map(fn {_k, entities} ->
856+
case Enum.find(entities, &user?/1) do
857+
nil ->
858+
case entities do
859+
[user] -> [{user["id"], user}]
860+
_ -> raise "Unexpected number of users for #{inspect(entities)}"
861+
end
862+
863+
user ->
864+
Enum.map(entities, fn row ->
865+
if row["id"] != user["id"] do
866+
Logger.info("#{row["handle"]} -> #{user["handle"]}")
867+
end
868+
869+
{row["id"], user}
870+
end)
871+
end
872+
end)
873+
|> Map.new(fn {k, v} -> {k, v} end)
794874
end
795875

796876
defp index_by_field(data, field) do
@@ -907,13 +987,11 @@ defmodule DatabaseMigration do
907987
end
908988

909989
defp ensure_unique_handle(fields) do
910-
case fields[:handle] do
911-
nil ->
912-
fields
913-
914-
handle ->
915-
new_handle = get_unique_handle(handle)
916-
Map.put(fields, :handle, new_handle)
990+
if nullish?(fields[:handle]) do
991+
fields
992+
else
993+
new_handle = get_unique_handle(fields[:handle])
994+
Map.put(fields, :handle, new_handle)
917995
end
918996
end
919997

@@ -925,6 +1003,10 @@ defmodule DatabaseMigration do
9251003
new_handle = if count > 0, do: "#{handle}#{count + 1}", else: handle
9261004
Process.put(:handles, Map.put(handles, downcased_handle, count + 1))
9271005

1006+
if count > 0 do
1007+
Logger.warning("Unique handle collision: #{handle} -> #{new_handle}")
1008+
end
1009+
9281010
new_handle
9291011
end
9301012

@@ -1144,7 +1226,7 @@ defmodule DatabaseMigration do
11441226
:ok = time_step("Processing dump", fn -> process_dump(input_file, output_file) end)
11451227
:ok = time_step("Clearing tables", fn -> clear_tables!() end)
11461228
{:ok, _} = time_step("Importing new data", fn -> psql(["-f", output_file]) end)
1147-
:ok = time_step("Backfilling repositories", fn -> Algora.Admin.backfill_repos!() end)
1229+
# :ok = time_step("Backfilling repositories", fn -> Algora.Admin.backfill_repos!() end)
11481230
end)
11491231

11501232
IO.puts("\n✓ Migration completed successfully in #{total_time / 1_000_000} seconds")

0 commit comments

Comments
 (0)