Skip to content

Commit 641d44a

Browse files
committed
add llm
1 parent c563cfd commit 641d44a

File tree

6 files changed

+181
-1
lines changed

6 files changed

+181
-1
lines changed

lib/algora/admin/admin.ex

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,154 @@ defmodule Algora.Admin do
2424

2525
require Logger
2626

27+
defmodule JobPostingPrediction do
28+
@moduledoc false
29+
use Ecto.Schema
30+
use Instructor.Validator
31+
32+
alias Algora.Organizations
33+
34+
@llm_doc """
35+
## Field Descriptions:
36+
- tech_stack: List of technologies used in the job posting (e.g. ["Ruby", "Rails", "PostgreSQL"])
37+
- countries: List of 2-letter ISO country codes (e.g. ["US", "CA"])
38+
- regions: List of regions (e.g. ["EMEA", "LATAM"])
39+
- location: Location of the job posting (e.g. "Remote", "San Francisco, CA", "London/Berlin")
40+
- seniority: Seniority level (e.g. "Senior", "Mid-Senior", "Entry-Level")
41+
- company_url: Company website URL which can be derived from email (e.g. example.com)
42+
"""
43+
@primary_key false
44+
embedded_schema do
45+
embeds_many :job_postings, JobPosting, primary_key: false do
46+
field(:title, :string)
47+
field(:description, :string)
48+
field(:tech_stack, {:array, :string})
49+
field(:company_name, :string)
50+
field(:company_url, :string)
51+
field(:location, :string)
52+
field(:countries, {:array, :string})
53+
field(:regions, {:array, :string})
54+
field(:compensation, :string)
55+
field(:seniority, :string)
56+
end
57+
end
58+
59+
@impl true
60+
def validate_changeset(changeset) do
61+
changeset
62+
end
63+
64+
def seed_jobs(jobs) do
65+
jobs
66+
|> Task.async_stream(&seed/1, timeout: :infinity, max_concurrency: 50)
67+
|> Enum.to_list()
68+
end
69+
70+
def seed(job) do
71+
with domain when not is_nil(domain) <- to_domain(job.company_url),
72+
{:ok, org} <- fetch_or_create_user(domain, %{hiring: true, tech_stack: job.tech_stack}),
73+
{:ok, org} <-
74+
org
75+
|> change(
76+
Map.merge(
77+
%{
78+
domain: org.domain || domain,
79+
hiring_subscription: :trial,
80+
billing_name: org.billing_name || job.company_name,
81+
billing_address: org.billing_address || job.location,
82+
executive_name: org.executive_name || job.company_name,
83+
executive_role: org.executive_role || job.seniority
84+
},
85+
if org.handle do
86+
%{}
87+
else
88+
%{handle: Organizations.ensure_unique_org_handle(job.company_name)}
89+
end
90+
)
91+
)
92+
|> Repo.update() do
93+
Repo.insert(%JobPosting{
94+
status: :processing,
95+
id: Nanoid.generate(),
96+
user_id: org.id,
97+
company_name: org.name,
98+
company_url: org.website_url,
99+
title: job.title,
100+
description: job.description,
101+
tech_stack: job.tech_stack,
102+
location: job.location,
103+
compensation: job.compensation,
104+
seniority: job.seniority,
105+
countries: job.countries,
106+
regions: job.regions
107+
})
108+
end
109+
end
110+
111+
defp to_domain(nil), do: nil
112+
113+
defp to_domain(url) do
114+
url
115+
|> String.trim_leading("https://")
116+
|> String.trim_leading("http://")
117+
|> String.trim_leading("www.")
118+
end
119+
120+
def fetch_or_create_user(domain, opts) do
121+
case Repo.one(from o in User, where: o.domain == ^domain, limit: 1) do
122+
%User{} = user ->
123+
{:ok, user}
124+
125+
_ ->
126+
res = Organizations.onboard_organization_from_domain(domain, opts)
127+
res
128+
end
129+
end
130+
end
131+
132+
def classify_jobs(jobs) do
133+
batches = Enum.chunk_every(jobs, 10)
134+
135+
batches
136+
|> Enum.with_index()
137+
|> Enum.flat_map(fn {jobs, index} ->
138+
case classify_batch(jobs, index) do
139+
{:ok, predictions} ->
140+
predictions.job_postings
141+
142+
{:error, error} ->
143+
Logger.error("Failed to classify batch #{index}: #{inspect(error)}")
144+
[]
145+
end
146+
end)
147+
end
148+
149+
def classify_batch(jobs, index) do
150+
text = Enum.join(jobs, "\n\n")
151+
152+
Github.Client.run_cached("classify_jobs_#{index}", fn ->
153+
Instructor.chat_completion(
154+
model: "gpt-4o-mini",
155+
response_model: JobPostingPrediction,
156+
max_retries: 2,
157+
messages: [
158+
%{
159+
role: "user",
160+
content: """
161+
Your purpose is to turn arbitrary job postings into structured data.
162+
163+
Return a distinct entry for each job. Some paragraphs may contain multiple jobs.
164+
165+
Turn following job postings into structured data:
166+
167+
#{text}
168+
"""
169+
}
170+
]
171+
)
172+
end)
173+
end
174+
27175
def seed_job(opts \\ %{}) do
28176
with {:ok, user} <- Repo.fetch_by(User, handle: opts.org.handle),
29177
{:ok, user} <- user |> change(opts.org) |> Repo.update(),

lib/algora/organizations/organizations.ex

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ defmodule Algora.Organizations do
163163
org -> org.handle
164164
end
165165

166+
dbg(params)
167+
166168
case org do
167169
nil ->
168170
%User{type: :organization}

lib/algora_web/live/hn_jobs_live.ex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ defmodule AlgoraWeb.HNJobsLive do
2121
def mount(_params, _session, socket) do
2222
jobs =
2323
JobPosting
24-
|> where([j], j.id in ^Settings.get_hn_job_ids())
24+
# |> where([j], j.id in ^Settings.get_hn_job_ids())
2525
|> order_by([j], desc: j.inserted_at)
2626
|> Repo.all()
2727
|> Repo.preload(:user)

mix.exs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ defmodule Algora.MixProject do
9999
{:sobelow, "~> 0.13", only: [:dev, :test], runtime: false},
100100
{:cmark, "~> 0.10"},
101101
{:csv, "~> 3.2"},
102+
{:instructor, "~> 0.1.0"},
102103
# ex_aws
103104
{:ex_aws, "~> 2.1"},
104105
{:ex_aws_s3, "~> 2.0"},

mix.lock

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@
4646
"hpax": {:hex, :hpax, "1.0.2", "762df951b0c399ff67cc57c3995ec3cf46d696e41f0bba17da0518d94acd4aac", [:mix], [], "hexpm", "2f09b4c1074e0abd846747329eaa26d535be0eb3d189fa69d812bfb8bfefd32f"},
4747
"httpoison": {:hex, :httpoison, "2.2.1", "87b7ed6d95db0389f7df02779644171d7319d319178f6680438167d7b69b1f3d", [:mix], [{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "51364e6d2f429d80e14fe4b5f8e39719cacd03eb3f9a9286e61e216feac2d2df"},
4848
"idna": {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], [{:unicode_util_compat, "~> 0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "92376eb7894412ed19ac475e4a86f7b413c1b9fbb5bd16dccd57934157944cea"},
49+
"instructor": {:hex, :instructor, "0.1.0", "ca587fa11b9de7dff68b6f0a28ee17682d35f67efa20f71aef61bbb528444562", [:mix], [{:ecto, "~> 3.12", [hex: :ecto, repo: "hexpm", optional: false]}, {:jason, "~> 1.4.0", [hex: :jason, repo: "hexpm", optional: false]}, {:jaxon, "~> 2.0", [hex: :jaxon, repo: "hexpm", optional: false]}, {:req, "~> 0.5 or ~> 1.0", [hex: :req, repo: "hexpm", optional: false]}], "hexpm", "05a7020a460ca43dc1123c7903e928b678360cd37eac761f472bd8e0787fefcb"},
4950
"jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"},
51+
"jaxon": {:hex, :jaxon, "2.0.8", "00951a79d354260e28d7e36f956c3de94818124768a4b22e0fc55559d1b3bfe7", [:make, :mix], [{:elixir_make, "~> 0.4", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "74532853b1126609615ea98f0ceb5009e70465ca98027afbbd8ed314d887e82d"},
5052
"joken": {:hex, :joken, "2.6.2", "5daaf82259ca603af4f0b065475099ada1b2b849ff140ccd37f4b6828ca6892a", [:mix], [{:jose, "~> 1.11.10", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm", "5134b5b0a6e37494e46dbf9e4dad53808e5e787904b7c73972651b51cce3d72b"},
5153
"jose": {:hex, :jose, "1.11.10", "a903f5227417bd2a08c8a00a0cbcc458118be84480955e8d251297a425723f83", [:mix, :rebar3], [], "hexpm", "0d6cd36ff8ba174db29148fc112b5842186b68a90ce9fc2b3ec3afe76593e614"},
5254
"live_svelte": {:hex, :live_svelte, "0.14.1", "78fcd3bb7eb1c349138ebcaef5b61653bf1a818e42129c847482717895af8f70", [:mix], [{:jason, "~> 1.2", [hex: :jason, repo: "hexpm", optional: false]}, {:nodejs, "~> 3.1", [hex: :nodejs, repo: "hexpm", optional: false]}, {:phoenix, ">= 1.7.0", [hex: :phoenix, repo: "hexpm", optional: false]}, {:phoenix_html, ">= 3.3.1", [hex: :phoenix_html, repo: "hexpm", optional: false]}, {:phoenix_live_view, ">= 0.18.0", [hex: :phoenix_live_view, repo: "hexpm", optional: false]}], "hexpm", "f96b06456957fbc6d25f71fc984f8f949664d5b61fbfcb3db43d5b82699c39a8"},
@@ -82,6 +84,7 @@
8284
"postgrex": {:hex, :postgrex, "0.20.0", "363ed03ab4757f6bc47942eff7720640795eb557e1935951c1626f0d303a3aed", [:mix], [{:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "d36ef8b36f323d29505314f704e21a1a038e2dc387c6409ee0cd24144e187c0f"},
8385
"ranch": {:hex, :ranch, "2.2.0", "25528f82bc8d7c6152c57666ca99ec716510fe0925cb188172f41ce93117b1b0", [:make, :rebar3], [], "hexpm", "fa0b99a1780c80218a4197a59ea8d3bdae32fbff7e88527d7d8a4787eff4f8e7"},
8486
"redirect": {:hex, :redirect, "0.4.0", "98b46053504ee517bc3ad2fd04c064b64b48d339e1e18266355b30c4f8bb52b0", [:mix], [{:phoenix, "~> 1.4", [hex: :phoenix, repo: "hexpm", optional: false]}, {:plug, "~> 1.8.3 or ~> 1.9", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "dfa29a8ecbad066ed0b73b34611cf24c78101719737f37bdf750f39197d67b97"},
87+
"req": {:hex, :req, "0.5.10", "a3a063eab8b7510785a467f03d30a8d95f66f5c3d9495be3474b61459c54376c", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 2.0.6 or ~> 2.1", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "8a604815743f8a2d3b5de0659fa3137fa4b1cffd636ecb69b30b2b9b2c2559be"},
8588
"reverse_proxy_plug": {:hex, :reverse_proxy_plug, "3.0.2", "38fde2f59bca8b219ef4f1ec0c0849a67c6d9705160e426a2354f35399db5c7b", [:mix], [{:finch, "~> 0.18", [hex: :finch, repo: "hexpm", optional: true]}, {:httpoison, "~> 1.2 or ~> 2.0", [hex: :httpoison, repo: "hexpm", optional: true]}, {:plug, "~> 1.6", [hex: :plug, repo: "hexpm", optional: false]}, {:req, "~> 0.3.0 or ~> 0.4.0 or ~> 0.5.0", [hex: :req, repo: "hexpm", optional: true]}, {:tesla, "~> 1.4", [hex: :tesla, repo: "hexpm", optional: true]}], "hexpm", "31ae5e068f7f504fba1b5c17c31c87966c720809ac15140c6c181440fbd24eda"},
8689
"rustler": {:hex, :rustler, "0.36.1", "2d4b1ff57ea2789a44756a40dbb5fbb73c6ee0a13d031dcba96d0a5542598a6a", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:toml, "~> 0.7", [hex: :toml, repo: "hexpm", optional: false]}], "hexpm", "f3fba4ad272970e0d1bc62972fc4a99809651e54a125c5242de9bad4574b2d02"},
8790
"rustler_precompiled": {:hex, :rustler_precompiled, "0.8.2", "5f25cbe220a8fac3e7ad62e6f950fcdca5a5a5f8501835d2823e8c74bf4268d5", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "63d1bd5f8e23096d1ff851839923162096364bac8656a4a3c00d1fff8e83ee0a"},
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
defmodule Algora.Repo.Migrations.UpdateJobPostingDescription do
2+
use Ecto.Migration
3+
4+
def up do
5+
alter table(:job_postings) do
6+
modify :description, :text
7+
modify :company_name, :text
8+
modify :company_url, :text
9+
end
10+
11+
alter table(:users) do
12+
modify :linkedin_url, :text
13+
modify :website_url, :text
14+
modify :discord_url, :text
15+
modify :github_url, :text
16+
modify :twitter_url, :text
17+
modify :youtube_url, :text
18+
modify :slack_url, :text
19+
modify :twitch_url, :text
20+
modify :og_image_url, :text
21+
end
22+
end
23+
24+
def down do
25+
end
26+
end

0 commit comments

Comments
 (0)