forked from pranjallk1995/Smart-Recommendation-System
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathCreating_user_data.py
More file actions
47 lines (35 loc) · 2.15 KB
/
Creating_user_data.py
File metadata and controls
47 lines (35 loc) · 2.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 13 12:37:18 2019
@author: Pranjall
"""
"""
Intution: Few users were studied to identify their personality types. For the sake of demonstration, 6 personality types have been assumed.
These users were then asked five thoughtful questions (pertaining psychology analysis) and their responses were observed.
This dataset is then used to train an Artificial Neural Network to find a relationship between the personality types and the responses,
so that the personality types of the new users can be predicted based on their responses to those five questions.
"""
import numpy as np
import pandas as pd
dataset = pd.read_csv('Amazon_Fine_Food_data.csv')
dataset_by_user = dataset.groupby('UserId')
list_of_users = list(dataset_by_user.groups.keys())
user_dataset = pd.DataFrame(list_of_users, columns = ['UserId'])
Resp_to_question1 = np.random.randint(4, size = (len(list_of_users), 1))
Resp_to_question2 = np.random.randint(4, size = (len(list_of_users), 1))
Resp_to_question3 = np.random.randint(4, size = (len(list_of_users), 1))
Resp_to_question4 = np.random.randint(4, size = (len(list_of_users), 1))
Resp_to_question5 = np.random.randint(4, size = (len(list_of_users), 1))
Responses = np.concatenate((Resp_to_question1, Resp_to_question2, Resp_to_question3, Resp_to_question4, Resp_to_question5), axis = 1)
Total_resp_score = Responses.sum(axis = 1).reshape(len(Responses), 1) + np.random.uniform(low = -1, high = 1, size=(len(Responses), 1)) #adding some error to total score.
Total_resp_score = np.around(Total_resp_score)
Total_resp_score[Total_resp_score < 0] = 0
Total_resp_score = np.around(Total_resp_score / (16 / 5))
Responses = np.concatenate((Responses, Total_resp_score), axis = 1)
user_dataset['ResponseQ1'] = Responses[:, 0]
user_dataset['ResponseQ2'] = Responses[:, 1]
user_dataset['ResponseQ3'] = Responses[:, 2]
user_dataset['ResponseQ4'] = Responses[:, 3]
user_dataset['ResponseQ5'] = Responses[:, 4]
user_dataset['PersonalityType'] = Responses[:, 5]
user_dataset.to_csv('Amazon_user_data.csv', sep=',', index = False)