-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsql:retail_sales_project.sql
More file actions
185 lines (140 loc) · 4.42 KB
/
sql:retail_sales_project.sql
File metadata and controls
185 lines (140 loc) · 4.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
-- ---------------------------------------------------------------------------------------------
-- RETAIL SALES ANALYSIS PROJECT
-- ----------------------------------------------------------------------------------------------
CREATE DATABASE retail_sales_project;
USE retail_sales_project;
-- Creating a table
DROP TABLE IF EXISTS retail_sales;
CREATE TABLE retail_sales(
transactions_id INT PRIMARY KEY,
sale_date DATE,
sale_time TIME,
customer_id INT,
gender VARCHAR(15),
age INT,
category VARCHAR(25),
quantiy INT,
price_per_unit FLOAT,
cogs FLOAT,
total_sale FLOAT
);
SELECT *
FROM retail_sales;
-- --------------------------------------------------------------------------------
-- DATA CLEANING
-- --------------------------------------------------------------------------------
-- Identifying if we have any NULL VALUES
SELECT *
FROM retail_sales
WHERE
transactions_id IS NULL
OR sale_date IS NULL
OR sale_time IS NULL
OR customer_id IS NULL
OR gender IS NULL
OR age IS NULL
OR category IS NULL
OR quantiy IS NULL
OR price_per_unit IS NULL
OR cogs IS NULL
OR total_sale IS NULL
;
-- Deleting the NULL VALUE ROWS
DELETE
FROM retail_sales
WHERE
transactions_id IS NULL
OR sale_date IS NULL
OR sale_time IS NULL
OR customer_id IS NULL
OR gender IS NULL
OR age IS NULL
OR category IS NULL
OR quantiy IS NULL
OR price_per_unit IS NULL
OR cogs IS NULL
OR total_sale IS NULL;
SELECT COUNT(*)
FROM retail_sales;
-- --------------------------------------------------------------------------------
-- EXPLORATORY DATA ANALYSIS
-- --------------------------------------------------------------------------------
-- 1. How many number of sales do we have?
SELECT COUNT(*)
FROM retail_sales; -- 1997
-- 2. How many unique customers do we have?
SELECT COUNT(DISTINCT(customer_id))
FROM retail_sales; -- 155
-- 3. How many unique category of products do we have?
SELECT COUNT(DISTINCT(category))
FROM retail_sales; -- 3
-- --------------------------------------------------------------------------------
-- SOLVING BUSINESS PROBLEMS/DATA ANALYSIS
-- --------------------------------------------------------------------------------
-- 1. Retrive all columns for sales made on "2022-11-05"
SELECT *
FROM retail_sales
WHERE sale_date = "2022-11-05";
-- 2. Retrive all transactions where the category is "Clothing" and the quantity sold
-- is more than 10 in the month of Nov-2022
SELECT category,
SUM(quantiy) AS total_quantity,sale_date
FROM retail_sales
WHERE category = "Clothing"
AND MONTH(sale_date) = 11
AND
YEAR(sale_date) = 2022
GROUP BY category,sale_date
HAVING total_quantity >= 10;
-- 3. Calculate the total sales (total_sale) for each category
SELECT category,SUM(total_sale) AS total_sales
FROM retail_sales
GROUP BY category;
-- 4. Find average age of the customers who purchased items from the "Beauty" category
SELECT category, AVG(age)
FROM retail_sales
GROUP BY category
HAVING category = "Beauty";
SELECT category, AVG(age)
FROM retail_sales
WHERE category = "Beauty";
-- 5. Find all transactions where the total_sale is more than 1000
SELECT *
FROM retail_sales
WHERE total_sale >= 1000;
-- 6.Find the total number of transactions (transaction_id) made by each gender in every category
SELECT gender, category,COUNT(transactions_id)
FROM retail_sales
GROUP BY gender,category
ORDER BY category,gender;
-- 7. Calaculate the average sale for each month.Find out best selling month in each year
SELECT * FROM
(SELECT
YEAR(sale_date),
MONTH(sale_date),
AVG(total_sale) AS avg_sale,
RANK() OVER(PARTITION BY YEAR(sale_date) ORDER BY AVG(total_sale) DESC) as RNK
FROM retail_sales
GROUP BY YEAR(sale_date),MONTH(sale_date)
ORDER BY 1 ,3 DESC) AS t1
WHERE RNK = 1;
-- 8.Find top 5 cstomers based on the highest total sales
SELECT customer_id, SUM(total_sale) AS total_sales
FROM retail_sales
GROUP BY customer_id
ORDER BY 2 DESC
LIMIT 5;
-- 9. Find the number of unique customers who purchased items from each category
SELECT category,COUNT(DISTINCT(customer_id))
FROM retail_sales
GROUP BY category;
-- 10. Create each shift and number of orders
-- (example:morning <=12, afternoon between 12 &17, evening > 17)
SELECT CASE
WHEN HOUR(sale_time) < 12 THEN "Morning"
WHEN HOUR(sale_time) BETWEEN 12 AND 17 THEN "Afternoon"
ELSE "Evening"
END AS shift_name,
COUNT(transactions_id)
FROM retail_sales
GROUP BY shift_name;