-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSQL DATA CLEANING PROJ.sql
More file actions
195 lines (126 loc) · 5.23 KB
/
SQL DATA CLEANING PROJ.sql
File metadata and controls
195 lines (126 loc) · 5.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
/*------------------------ Data Cleaning Portfolio Project ------------------------*/
/* Cleaning Data using SQL Queries */
/*-----------------------------------------------------------------------------------*/
SELECT TOP (1000) [UniqueID ]
,[ParcelID]
,[LandUse]
,[PropertyAddress]
,[SaleDate]
,[SalePrice]
,[LegalReference]
,[SoldAsVacant]
,[OwnerName]
,[OwnerAddress]
,[Acreage]
,[TaxDistrict]
,[LandValue]
,[BuildingValue]
,[TotalValue]
,[YearBuilt]
,[Bedrooms]
,[FullBath]
,[HalfBath]
FROM [PortfolioProject].[dbo].[Nashville_Housing]
/**************************************************/
/* Select Everything From Dataset */
SELECT * FROM PortfolioProject.dbo.Nashville_Housing
/*----- Change Salesdate Datatype From Datetime To Date -----*/
SELECT SaleDate , CONVERT(DATE,SaleDate) FROM PortfolioProject.dbo.Nashville_Housing ORDER BY 1;
UPDATE PortfolioProject.dbo.Nashville_Housing SET SaleDate = CONVERT(DATE, SaleDate)
/*IF THE ABOVE NOT WORKING */
ALTER TABLE Nashville_Housing ALTER COLUMN SaleDate DATE
SELECT SaleDate , CONVERT(DATE,SaleDate) FROM PortfolioProject.dbo.Nashville_Housing ORDER BY 1;
/*Done*/
--OR I CAN DO LIKE VIDEO
ALTER TABLE portfolioproject.dbo.nashville_housing ADD salesdate DATE;
UPDATE portfolioproject.dbo.nashville_housing SET salesdate = CONVERT(DATE,saledate)
SELECT salesdate FROM portfolioproject.dbo.nashville_housing
/*******************************************************/
SELECT * FROM portfolioproject.dbo.nashville_housing
ORDER BY 2
--WHERE PropertyAddress IS NULL
/********************************************************/
-- Populate Property Address data --> REMOVING NULL VALUES
SELECT A.ParcelID,A.PropertyAddress,B.ParcelID,B.PropertyAddress, ISNULL(A.PropertyAddress,B.PropertyAddress)
FROM portfolioproject.dbo.nashville_housing A
JOIN portfolioproject.dbo.nashville_housing B
ON A.ParcelID = B.ParcelID
AND A.[UniqueID ]<>B.[UniqueID ]
WHERE A.PropertyAddress IS NULL
--OR U CAN USING
/*
AND A.[PropertyAddress] <>B.[PropertyAddress]
*/
UPDATE A
SET A.PropertyAddress = ISNULL(A.PropertyAddress ,B.PropertyAddress)
FROM portfolioproject.dbo.nashville_housing A
JOIN portfolioproject.dbo.nashville_housing B
ON A.ParcelID = B.ParcelID
AND A.[UniqueID ] <> B.[UniqueID ]
--OR U CAN USING
/*
AND A.[PropertyAddress] <>B.[PropertyAddress]
*/
WHERE A.PropertyAddress IS NULL
/***************************************************************/
-- Breaking out Address into Individual Columns (Address, City, State)
SELECT * FROM Nashville_Housing
select PROPERTYADDRESS, SUBSTRING(Nashville_Housing.PropertyAddress, 1, CHARINDEX(',',Nashville_Housing.PropertyAddress)-1)
,SUBSTRING(Nashville_Housing.PropertyAddress, CHARINDEX(',',Nashville_Housing.PropertyAddress)+1, LEN(Nashville_Housing.PropertyAddress))
FROM Nashville_Housing
/*-- OR WE CAN DO THIS --*/
SELECT Nashville_Housing.PROPERTYADDRESS,PARSENAME(REPLACE(PROPERTYADDRESS,',','.'),2),PARSENAME(REPLACE(PROPERTYADDRESS,',','.'),1) FROM Nashville_Housing
ALTER TABLE Nashville_Housing
ADD PROPERTY_SPLIT_ADDRESS VARCHAR(255)
ALTER TABLE Nashville_Housing
ADD PROPERTY_SPLIT_CITY VARCHAR(255)
UPDATE Nashville_Housing
SET PROPERTY_SPLIT_ADDRESS = PARSENAME(REPLACE(PROPERTYADDRESS,',','.'),2)
UPDATE Nashville_Housing
SET PROPERTY_SPLIT_CITY = PARSENAME(REPLACE(PROPERTYADDRESS,',','.'),1)
SELECT * FROM Nashville_Housing
/*THE SAME IDEA WITH OWNERADDRESS TO MAKE IT MORE USEFUL */
Select OwnerAddress
From PortfolioProject.dbo.Nashville_Housing
Select
PARSENAME(REPLACE(OwnerAddress, ',', '.') , 3)
,PARSENAME(REPLACE(OwnerAddress, ',', '.') , 2)
,PARSENAME(REPLACE(OwnerAddress, ',', '.') , 1)
From PortfolioProject.dbo.Nashville_Housing
ALTER TABLE Nashville_Housing
Add OwnerSplitAddress Nvarchar(255);
Update Nashville_Housing
SET OwnerSplitAddress = PARSENAME(REPLACE(OwnerAddress, ',', '.') , 3)
ALTER TABLE Nashville_Housing
Add OwnerSplitCity Nvarchar(255);
Update Nashville_Housing
SET OwnerSplitCity = PARSENAME(REPLACE(OwnerAddress, ',', '.') , 2)
ALTER TABLE Nashville_Housing
Add OwnerSplitState Nvarchar(255);
Update Nashville_Housing
SET OwnerSplitState = PARSENAME(REPLACE(OwnerAddress, ',', '.') , 1)
Select *
From PortfolioProject.dbo.Nashville_Housing
/****************************************************************************/
-- REPLACING EACH N WITH NO AND Y WITH YES
SELECT * FROM Nashville_Housing
SELECT DISTINCT(SOLDASVACANT), COUNT(SoldAsVacant) FROM Nashville_Housing GROUP BY SoldAsVacant
SELECT
CASE SOLDASVACANT
WHEN 'N' THEN 'No'
WHEN 'Y' THEN 'Yes'
ELSE SOLDASVACANT
END SELLCASE
FROM Nashville_Housing
UPDATE Nashville_Housing
SET SoldAsVacant =
CASE
WHEN SoldAsVacant = 'N' THEN 'No'
WHEN SoldAsVacant='Y' THEN 'Yes'
ELSE SoldAsVacant
END
/*************************************************************************/
/****** DELETE SOME UNUSED COLUMNS AFTER EDITING IT *********/
ALTER TABLE NASHVILLE_HOUSING
DROP COLUMN OwnerAddress, TaxDistrict, PropertyAddress, SaleDate
SELECT * FROM Nashville_Housing