-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathCodigo-cap10.R
More file actions
115 lines (79 loc) · 3.88 KB
/
Codigo-cap10.R
File metadata and controls
115 lines (79 loc) · 3.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# Práticas de Associação e Agrupamento
## Apriori
tabela = read.table("ExAssocAula.csv", sep = ";", dec = ",", header = T)
install.packages("arulesViz")
library("arulesViz")
tabela = tabela[, -1] # remove somente a primeira coluna
names(tabela)
tabela$TV = as.logical(tabela$TV)
tabela$Tablet = as.logical(tabela$Tablet)
tabela$Smartphone = as.logical(tabela$Smartphone)
tabela = as(tabela, "transactions")
param = list(supp = 1/7, conf = 2/7, minlen = 2, maxlen = 3, ext = T)
ruleset = apriori(tabela, parameter = param)
summary(ruleset)
inspect(ruleset)
ruleset = sort(ruleset, by = c("confidence", "support", "coverage"))
inspect(ruleset)
tabela = read.table("ExRealAssoc.csv", sep = ";", dec = ",", header = T)
sort(table(tabela$Segmento), decreasing = T)
ClientesCat = discretize(tabela$NK, method="interval", breaks = 3, labels = c("Baixo", "Médio", "Alto"))
# mostra a quantidade de elementos em cada categoria
table(ClientesCat)
# mostra os valores das faixas
discretize(tabela$NK, method="interval", breaks = 3, onlycuts=T)
# Legenda: VB = Volume Baixo, VM = Volume Médio e VA = Volume Alto
CatFLO = discretize(tabela$FLO, method="interval", breaks = 3, labels = c("VB", "VM", "VA"))
CatJY = discretize(tabela$JY, method="interval", breaks = 3, labels = c("VB", "VM", "VA"))
CatKIV = discretize(tabela$KIV, method="interval", breaks = 3, labels = c("VB", "VM", "VA"))
CatLEV = discretize(tabela$LEV, method="interval", breaks = 3, labels = c("VB", "VM", "VA"))
CatLIP = discretize(tabela$LIP, method="interval", breaks = 3, labels = c("VB", "VM", "VA"))
CatNAR = discretize(tabela$NAR, method="interval", breaks = 3, labels = c("VB", "VM", "VA"))
CatNK = discretize(tabela$NK, method="interval", breaks = 3, labels = c("VB", "VM", "VA"))
CatSPD = discretize(tabela$SPD, method="interval", breaks = 3, labels = c("VB", "VM", "VA"))
CatSUM = discretize(tabela$SUM, method="interval", breaks = 3, labels = c("VB", "VM", "VA"))
CatTRP = discretize(tabela$TRP, method="interval", breaks = 3, labels = c("VB", "VM", "VA"))
Seg = tabela$Segmento # separa a variável Segmento
tabitens = data.frame(Seg, CatFLO, CatJY, CatKIV, CatLEV, CatLIP, CatNAR, CatNK, CatSPD, CatSUM, CatTRP)
tabitens = as(tabitens, "transactions")
param = list(supp = 0.5, conf = 0.9, minlen = 2, maxlen = 5, ext = TRUE)
ruleset = apriori(tabitens, parameter = param)
summary(ruleset)
param = list(supp = 0.9, conf = 1, minlen = 2, maxlen = 3, ext = TRUE)
ruleset = apriori(tabitens, parameter = param)
summary(ruleset)
ruleset = sort(ruleset, by = c("confidence", "support", "coverage"))
inspect(ruleset)
inspect(head(sort(ruleset),10))
subruleset = head(sort(ruleset, by="confidence"), 10)
plot(subruleset, method="graph", measure=c("confidence"), shading="support")
## K-means
library(datasets)
library(ggplot2)
ggplot(iris, aes(Petal.Length, Petal.Width, color = Species)) + geom_point()
set.seed(20)
irisCluster <- kmeans(iris[, 3:4], 3, nstart = 20)
irisCluster
table(irisCluster$cluster, iris$Species)
irisCluster$cluster <- as.factor(irisCluster$cluster)
ggplot(iris, aes(Petal.Length, Petal.Width, color = irisCluster$cluster)) + geom_point()
# Limpa a console
cat("\014")
# Limpa o global environment
rm(list = ls())
# Carrega um novo conjunto de dados
mydata = cars
mydata <- na.omit(mydata) # eliminação de missings
mydata <- scale(mydata) # padronização de variáveis
wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
for (i in 2:15) wss[i] <- sum(kmeans(mydata,
centers=i)$withinss)
plot(1:15, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")
fit <- kmeans(mydata, 5)
install.packages("fpc")
library(fpc)
plotcluster(mydata, fit$cluster)
aggregate(mydata,by=list(fit$cluster),FUN=mean)
mydata <- data.frame(mydata, fit$cluster)
mydata