-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathOpenAI_JobScreener.py
More file actions
116 lines (94 loc) · 4.75 KB
/
OpenAI_JobScreener.py
File metadata and controls
116 lines (94 loc) · 4.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import json
import time
from openai import OpenAI
from concurrent.futures import ThreadPoolExecutor, as_completed
from threading import Lock
import tiktoken
# API limits
TPM_LIMIT = 200000 # Token limit per minute
RPM_LIMIT = 500 # Requests per minute limit
MAX_WORKERS = 3 # Number of threads for concurrent processing
# Initialize tiktoken encoder
encoder = tiktoken.encoding_for_model("gpt-4o-mini")
# Function to load job listings from file and count tokens in descriptions
def extract_listings_with_tokens(json_file):
with open(json_file, 'r', encoding='utf-8') as file:
data = json.load(file)
listings_with_tokens = []
for item in data:
title = item['title']
description = item['description']
url = item['url']
token_count = len(encoder.encode(description))
listings_with_tokens.append((title, description, url, token_count))
return listings_with_tokens
# Function to analyze job listings considering API limits
def analyze_listing(client, title, description, url, token_count, tokens_used, request_count, lock, thread_id):
messages = [
{"role": "system",
"content": "You help me find vacancies for C++ developer, Go developer, Rust developer, game developer, trainee Cloud positions. Respond with 'yes' if the job is suitable, otherwise respond with 'no'. Answer is limited to 3 characters."},
{"role": "user",
"content": f"Is this job opening suitable for C++ developer, Go developer, Rust developer, game developer, trainee Cloud?\n\n{description}"}
]
while True:
with lock:
total_tokens_needed = tokens_used[0] + token_count + 1 # Include 1 token for the completion
print(f"[Thread {thread_id}] Total tokens needed: {total_tokens_needed}, Tokens used: {tokens_used[0]}")
if total_tokens_needed > TPM_LIMIT:
time_to_wait = 60 - (time.time() % 60)
print(f"[Thread {thread_id}] Reached token limit. Waiting for {time_to_wait:.2f} seconds to reset token counter.")
time.sleep(time_to_wait)
tokens_used[0] = 0 # Reset the token counter after waiting
if request_count[0] >= RPM_LIMIT:
time_to_wait = 60 - (time.time() % 60)
print(f"[Thread {thread_id}] Reached request limit. Waiting for {time_to_wait:.2f} seconds to reset request counter.")
time.sleep(time_to_wait)
request_count[0] = 0 # Reset the request counter after waiting
tokens_used[0] += token_count + 1 # Update token usage
request_count[0] += 1 # Update request count
print(f"[Thread {thread_id}] Sending request for job: {title}")
completion = client.chat.completions.create(
model="gpt-4o-mini",
messages=messages,
max_tokens=1
)
response = completion.choices[0].message.content.strip().lower()
print(f"[Thread {thread_id}] Received response for job: {title}, response: {response}")
if response == 'yes':
return f"{title} - {url}\n"
return None
# Function to process a chunk of job listings
def process_chunk(chunk, tokens_used, request_count, lock, thread_id):
client = OpenAI() # Create a new client instance for each thread
results = []
for title, description, url, token_count in chunk:
print(f"[Thread {thread_id}] Processing job: {title}")
result = analyze_listing(client, title, description, url, token_count, tokens_used, request_count, lock, thread_id)
if result:
results.append(result)
return results
# Main execution
json_file = 'job_listings.json' # Use the path to the uploaded file
listings = extract_listings_with_tokens(json_file)
tokens_used = [0] # Variable to track used tokens in the current minute
request_count = [0] # Variable to track the number of requests in the current minute
lock = Lock()
# Split listings into chunks for each thread
chunks = [listings[i::MAX_WORKERS] for i in range(MAX_WORKERS)]
start_time = time.time()
results = []
# Use ThreadPoolExecutor to manage worker threads
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
futures = [executor.submit(process_chunk, chunk, tokens_used, request_count, lock, i) for i, chunk in enumerate(chunks)]
for future in as_completed(futures):
chunk_results = future.result()
results.extend(chunk_results)
with open('results.txt', 'w', encoding='utf-8') as file:
for result in results:
file.write(result)
end_time = time.time()
execution_time = end_time - start_time
minutes = int(execution_time // 60)
seconds = execution_time % 60
print(f"AI analysis was performed in {minutes} minutes and {seconds:.2f} seconds")
print(f"Total jobs processed: {len(listings)}")