forked from WhyR2017/meetup-harvesting
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexplore.R
More file actions
84 lines (64 loc) · 2.84 KB
/
explore.R
File metadata and controls
84 lines (64 loc) · 2.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
library(moments)
library(tidyverse)
library(stringr)
library(lubridate)
# pro networks
d %>% select(name, city, contains("pro")) %>% filter(!is.na(pro_network.name)) %>% View()
d %>% filter(name == "Poznań Women in Machine Learning & Data Science") %>% View
# map_bar message
lon_lat_cities %>% select(city, members_sum, members_per_groups, groups_n) %>% arrange(desc(groups_n))
# members
summary(d$members)
skewness(d$members)
kurtosis(d$members)
by(d$members, d$city, summary)
by(d$members, d$city, skewness)
boxplot(d$members~d$city)
hist(d$members)
hist(d$members[d$city == "Warszawa"])
hist(d$members[d$city == "Kraków"])
d$name[d$members >= 3000]
# members vs population
# csv from http://swaid.stat.gov.pl/Dashboards/Miasta%20najwi%C4%99ksze%20pod%20wzgl%C4%99dem%20liczby%20ludno%C5%9Bci.aspx
city_pop <- read_csv("city_pop.csv", skip = 3)
city_pop[1,7] <- "Warszawa"
names(city_pop)[7] <- "city"
city_pop <- city_pop %>% mutate(ogolem_num = gsub("[[:space:]]", "", ogolem),
ogolem_num = parse_integer(ogolem_num))
city_pop <- left_join(lon_lat_cities, city_pop, "city")
city_pop %>%
ggplot(aes(x = as.numeric(ogolem_num), y = as.numeric(members_sum), label = city, colour = city)) +
geom_point() +
geom_text() +
scale_y_log10() +
scale_x_log10()
summary(lm(members_sum~ogolem_num, city_pop))
# cor(city_pop$members_sum, city_pop$groups_n, method = "pearson")
# cor(city_pop$members_sum, city_pop$groups_n, method = "kendall")
cor(city_pop$members_sum, city_pop$groups_n, method = "spearman")
# check https://stackoverflow.com/questions/7549694/adding-regression-line-equation-and-r2-on-graph
# rsvp vs members
d %>% filter(!is.na(next_event.id)) %>% dim()
d %>% filter(!is.na(next_event.id)) %>%
mutate(rspv_by_members = next_event.yes_rsvp_count / members) %>%
select(rspv_by_members) %>% summary()
d %>% filter(!is.na(next_event.id)) %>%
mutate(rspv_by_members = next_event.yes_rsvp_count / members) %>%
select(name, members, next_event.yes_rsvp_count, rspv_by_members) %>% arrange(members)
# pop vs time of groups creation
# members vs population
# csv from http://swaid.stat.gov.pl/Dashboards/Miasta%20najwi%C4%99ksze%20pod%20wzgl%C4%99dem%20liczby%20ludno%C5%9Bci.aspx
city_pop <- read_csv("city_pop.csv", skip = 3)
city_pop[1,7] <- "Warszawa"
names(city_pop)[7] <- "city"
city_pop <- city_pop %>% mutate(ogolem_num = gsub("[[:space:]]", "", ogolem),
ogolem_num = parse_integer(ogolem_num))
city_time_created <- d %>% group_by(city) %>%
summarise(min_created = min(created_date),
median_created = median(created_date),
max_created = max(created_date))
city_pop <- left_join(city_time_created, city_pop, "city")
city_pop %>%
ggplot(aes(x = as.numeric(ogolem_num), y = median_created, label = city, colour = city)) +
geom_point() +
geom_text()