Skip to content

Commit 3042e6d

Browse files
authored
Add LLMRec/NLGraph/WalkLM as examples (#231)
1 parent 60b7658 commit 3042e6d

File tree

26 files changed

+5776
-0
lines changed

26 files changed

+5776
-0
lines changed

examples/llmrec/LLM_augmentation_construct_prompt/gpt_i_attribute_generate_aug.py

Lines changed: 547 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
2+
import threading
3+
import openai
4+
import time
5+
import pandas as pd
6+
import csv
7+
import requests
8+
import concurrent.futures
9+
import pickle
10+
import torch
11+
import os
12+
import threading
13+
import time
14+
import tqdm
15+
import requests
16+
17+
file_path = ""
18+
max_threads = 5
19+
cnt = 0
20+
21+
# MovieLens
22+
def construct_prompting(item_attribute, item_list, candidate_list):
23+
# make history string
24+
history_string = "User history:\n"
25+
for index in item_list:
26+
title = item_attribute['title'][index]
27+
genre = item_attribute['genre'][index]
28+
history_string += "["
29+
history_string += str(index)
30+
history_string += "] "
31+
history_string += title + ", "
32+
history_string += genre + "\n"
33+
# make candidates
34+
candidate_string = "Candidates:\n"
35+
for index in candidate_list:
36+
title = item_attribute['title'][index.item()]
37+
genre = item_attribute['genre'][index.item()]
38+
candidate_string += "["
39+
candidate_string += str(index.item())
40+
candidate_string += "] "
41+
candidate_string += title + ", "
42+
candidate_string += genre + "\n"
43+
# output format
44+
output_format = "Please output the index of user\'s favorite and least favorite movie only from candidate, but not user history. Please get the index from candidate, at the beginning of each line.\nOutput format:\nTwo numbers separated by '::'. Nothing else.Plese just give the index of candicates, remove [] (just output the digital value), please do not output other thing else, do not give reasoning.\n\n"
45+
# make prompt
46+
prompt = "You are a movie recommendation system and required to recommend user with movies based on user history that each movie with title(same topic/doctor), year(similar years), genre(similar genre).\n"
47+
prompt += history_string
48+
prompt += candidate_string
49+
prompt += output_format
50+
return prompt
51+
52+
# # Netflix
53+
# def construct_prompting(item_attribute, item_list, candidate_list):
54+
# # make history string
55+
# history_string = "User history:\n"
56+
# for index in item_list:
57+
# year = item_attribute['year'][index]
58+
# title = item_attribute['title'][index]
59+
# history_string += "["
60+
# history_string += str(index)
61+
# history_string += "] "
62+
# history_string += str(year) + ", "
63+
# history_string += title + "\n"
64+
# # make candidates
65+
# candidate_string = "Candidates:\n"
66+
# for index in candidate_list:
67+
# year = item_attribute['year'][index.item()]
68+
# title = item_attribute['title'][index.item()]
69+
# candidate_string += "["
70+
# candidate_string += str(index.item())
71+
# candidate_string += "] "
72+
# candidate_string += str(year) + ", "
73+
# candidate_string += title + "\n"
74+
# # output format
75+
# output_format = "Please output the index of user\'s favorite and least favorite movie only from candidate, but not user history. Please get the index from candidate, at the beginning of each line.\nOutput format:\nTwo numbers separated by '::'. Nothing else.Plese just give the index of candicates, remove [] (just output the digital value), please do not output other thing else, do not give reasoning.\n\n"
76+
# # make prompt
77+
# # prompt = "You are a movie recommendation system and required to recommend user with movies based on user history that each movie with title(same topic/doctor), year(similar years), genre(similar genre).\n"
78+
# prompt = ""
79+
# prompt += history_string
80+
# prompt += candidate_string
81+
# prompt += output_format
82+
# return prompt
83+
84+
### read candidate
85+
candidate_indices = pickle.load(open(file_path + 'candidate_indices','rb'))
86+
candidate_indices_dict = {}
87+
for index in range(candidate_indices.shape[0]):
88+
candidate_indices_dict[index] = candidate_indices[index]
89+
### read adjacency_list
90+
adjacency_list_dict = {}
91+
train_mat = pickle.load(open(file_path + 'train_mat','rb'))
92+
for index in range(train_mat.shape[0]):
93+
data_x, data_y = train_mat[index].nonzero()
94+
adjacency_list_dict[index] = data_y
95+
### read item_attribute
96+
toy_item_attribute = pd.read_csv(file_path + 'item_attribute.csv', names=['id','title', 'genre'])
97+
### write augmented dict
98+
augmented_sample_dict = {}
99+
if os.path.exists(file_path + "augmented_sample_dict"):
100+
print(f"The file augmented_sample_dict exists.")
101+
augmented_sample_dict = pickle.load(open(file_path + 'augmented_sample_dict','rb'))
102+
else:
103+
print(f"The file augmented_sample_dict does not exist.")
104+
pickle.dump(augmented_sample_dict, open(file_path + 'augmented_sample_dict','wb'))
105+
106+
def file_reading():
107+
augmented_attribute_dict = pickle.load(open(file_path + 'augmented_sample_dict','rb'))
108+
return augmented_attribute_dict
109+
110+
# baidu
111+
def LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict):
112+
113+
try:
114+
augmented_sample_dict = file_reading()
115+
except pickle.UnpicklingError as e:
116+
print("Error occurred while unpickling:", e)
117+
LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict)
118+
if index in augmented_sample_dict:
119+
return 0
120+
else:
121+
try:
122+
print(f"{index}")
123+
prompt = construct_prompting(toy_item_attribute, adjacency_list_dict[index], candidate_indices_dict[index])
124+
url = "http://llms-se.baidu-int.com:8200/chat/completions"
125+
headers={
126+
# "Content-Type": "application/json",
127+
"Authorization": "Bearer your key"
128+
129+
}
130+
params={
131+
"model": model_type,
132+
"messages": [{"role": "user", "content": prompt}],
133+
"temperature":0.6,
134+
"max_tokens": 1000,
135+
"stream": False,
136+
"top_p": 0.1
137+
}
138+
139+
response = requests.post(url=url, headers=headers,json=params)
140+
message = response.json()
141+
142+
content = message['choices'][0]['message']['content']
143+
print(f"content: {content}, model_type: {model_type}")
144+
samples = content.split("::")
145+
pos_sample = int(samples[0])
146+
neg_sample = int(samples[1])
147+
augmented_sample_dict[index] = {}
148+
augmented_sample_dict[index][0] = pos_sample
149+
augmented_sample_dict[index][1] = neg_sample
150+
pickle.dump(augmented_sample_dict, open(file_path + 'augmented_sample_dict','wb'))
151+
152+
# except ValueError as e:
153+
except requests.exceptions.RequestException as e:
154+
print("An HTTP error occurred:", str(e))
155+
time.sleep(10)
156+
except ValueError as ve:
157+
print("An error occurred while parsing the response:", str(ve))
158+
time.sleep(10)
159+
LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, "gpt-3.5-turbo-0613", augmented_sample_dict)
160+
except KeyError as ke:
161+
print("An error occurred while accessing the response:", str(ke))
162+
time.sleep(10)
163+
LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, "gpt-3.5-turbo-0613", augmented_sample_dict)
164+
except Exception as ex:
165+
print("An unknown error occurred:", str(ex))
166+
time.sleep(10)
167+
168+
return 1
169+
170+
171+
172+
173+
174+
# # chatgpt
175+
# def LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict):
176+
177+
# if index in augmented_sample_dict:
178+
# print(f"g:{index}")
179+
# return 0
180+
# else:
181+
# try:
182+
# print(f"{index}")
183+
# prompt = construct_prompting(toy_item_attribute, adjacency_list_dict[index], candidate_indices_dict[index])
184+
# # url = "http://llms-se.baidu-int.com:8200/chat/completions"
185+
# # url = "https://api.openai.com/v1/completions"
186+
# url = "https://api.openai.com/v1/chat/completions"
187+
188+
# headers={
189+
# # "Content-Type": "application/json",
190+
# # "Authorization": "Bearer your key"
191+
#
192+
# }
193+
# # params={
194+
# # "model": model_type,
195+
# # "prompt": prompt,
196+
# # "max_tokens": 1024,
197+
# # "temperature": 0.6,
198+
# # "stream": False,
199+
# # }
200+
201+
# params = {
202+
# "model": "gpt-3.5-turbo",
203+
# "messages": [{"role": "system", "content": "You are a movie recommendation system and required to recommend user with movies based on user history that each movie with title(same topic/doctor), year(similar years), genre(similar genre).\n"}, {"role": "user", "content": prompt}]
204+
# }
205+
206+
# response = requests.post(url=url, headers=headers,json=params)
207+
# message = response.json()
208+
209+
# content = message['choices'][0]['message']['content']
210+
# # content = message['choices'][0]['text']
211+
# print(f"content: {content}, model_type: {model_type}")
212+
# samples = content.split("::")
213+
# pos_sample = int(samples[0])
214+
# neg_sample = int(samples[1])
215+
# augmented_sample_dict[index] = {}
216+
# augmented_sample_dict[index][0] = pos_sample
217+
# augmented_sample_dict[index][1] = neg_sample
218+
# # pickle.dump(augmented_sample_dict, open('augmented_sample_dict','wb'))
219+
# # pickle.dump(augmented_sample_dict, open('/Users/weiwei/Documents/Datasets/ml-10m/ml-10M100K/preprocessed_raw_MovieLens/toy_MovieLens1000/augmented_sample_dict','wb'))
220+
# pickle.dump(augmented_sample_dict, open(file_path + 'augmented_sample_dict','wb'))
221+
222+
# # # except ValueError as e:
223+
# # except requests.exceptions.RequestException as e:
224+
# # print("An HTTP error occurred:", str(e))
225+
# # # time.sleep(40)
226+
# # except ValueError as ve:
227+
# # print("An error occurred while parsing the response:", str(ve))
228+
# # # time.sleep(40)
229+
# # LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, "gpt-3.5-turbo-0613", augmented_sample_dict)
230+
# # except KeyError as ke:
231+
# # print("An error occurred while accessing the response:", str(ke))
232+
# # # time.sleep(40)
233+
# # LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, "gpt-3.5-turbo-0613", augmented_sample_dict)
234+
# # except Exception as ex:
235+
# # print("An unknown error occurred:", str(ex))
236+
# # # time.sleep(40)
237+
238+
# # return 1
239+
240+
# # except ValueError as e:
241+
# except requests.exceptions.RequestException as e:
242+
# print("An HTTP error occurred:", str(e))
243+
# time.sleep(8)
244+
# # print(content)
245+
# # error_cnt += 1
246+
# # if error_cnt==5:
247+
# # return 1
248+
# # LLM_request(toy_item_attribute, indices, "gpt-3.5-turbo-0613", augmented_attribute_dict, error_cnt)
249+
# LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict)
250+
# except ValueError as ve:
251+
# print("ValueError error occurred while parsing the response:", str(ve))
252+
# time.sleep(10)
253+
# # error_cnt += 1
254+
# # if error_cnt==5:
255+
# # return 1
256+
# # print(content)
257+
# # LLM_request(toy_item_attribute, indices, "gpt-3.5-turbo-0613", augmented_attribute_dict, error_cnt)
258+
# LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict)
259+
# except KeyError as ke:
260+
# print("KeyError error occurred while accessing the response:", str(ke))
261+
# time.sleep(10)
262+
# # error_cnt += 1
263+
# # if error_cnt==5:
264+
# # return 1
265+
# # print(content)
266+
# # LLM_request(toy_item_attribute, indices, "gpt-3.5-turbo-0613", augmented_attribute_dict, error_cnt)
267+
# LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict)
268+
# except IndexError as ke:
269+
# print("IndexError error occurred while accessing the response:", str(ke))
270+
# time.sleep(10)
271+
# # error_cnt += 1
272+
# # if error_cnt==5:
273+
# # return 1
274+
# # # print(content)
275+
# # LLM_request(toy_item_attribute, indices, "gpt-3.5-turbo-0613", augmented_attribute_dict)
276+
# # return 1
277+
# LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict)
278+
# except EOFError as ke:
279+
# print("EOFError: : Ran out of input error occurred while accessing the response:", str(ke))
280+
# time.sleep(10)
281+
# # error_cnt += 1
282+
# # if error_cnt==5:
283+
# # return 1
284+
# # print(content)
285+
# # LLM_request(toy_item_attribute, indices, "gpt-3.5-turbo-0613", augmented_attribute_dict, error_cnt)
286+
# LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict)
287+
# except Exception as ex:
288+
# print("An unknown error occurred:", str(ex))
289+
# time.sleep(10)
290+
# # error_cnt += 1
291+
# # if error_cnt==5:
292+
# # return 1
293+
# # print(content)
294+
# # LLM_request(toy_item_attribute, indices, "gpt-3.5-turbo-0613", augmented_attribute_dict, error_cnt)
295+
# LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, model_type, augmented_sample_dict)
296+
# return 1
297+
298+
299+
for index in range(0, len(adjacency_list_dict)):
300+
# # make prompting
301+
re = LLM_request(toy_item_attribute, adjacency_list_dict, candidate_indices_dict, index, "gpt-3.5-turbo", augmented_sample_dict)
302+
303+
304+
305+

0 commit comments

Comments
 (0)