movies <- read.csv("MovieRating.csv")
#See the structure of 'movies' using str()
str(movies)
#'Genre' is categorical variable/column but 'year' is treated as non-categorical variable
#We want to treat it as categorical variable , so this is done by using factor() function
movies$Year <- factor(movies$Year)
#Now 'Year' is a factor with 5 levels(2007,2008,2009,2010,2011)
q <- ggplot(data = movies , aes(x=CriticRatings,y=AudienceRatings,
color = Genre,size = BudgetMillion))
#Here we are mapping the color to genre , so it is put inside the aesthetics function called aes()
h <- q + geom_point(size=1.5,alpha=0.5) + geom_smooth(fill=NA,
size=1.25)
#Adding themes
h + ggtitle("Audience Rating v/s Critic Rating") +
theme(axis.title.x = element_text(size=20,color = "Red"),
axis.title.y = element_text(size=20,color = "DarkBlue"),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
plot.title = element_text(size = 30,color = "DarkGreen",family="Courier"),
plot.background = element_rect(fill = "LightCyan", color = "Black"),
legend.position = c(0,1),
legend.justification = c(0,1)
) +
coord_cartesian(ylim=c(10,100))
The purple line shows Romance Genre and it is clear that even when the Romantic movie is rated very low by the Critics (25) , even though it is liked by the Audience. For Action Genre , Audience and Critic shows the similar behaviour.You must have a good plot to impress the audience 😉 (Blue and Pink line)And for high Critic-Rating for Horror and Thriller , Audience choose Thriller over Horror. Horror Movie's business is risky 😥
j <- ggplot(data=movies , aes(x=BudgetMillion,y=AudienceRatings,
color=Genre,size=BudgetMillion)) +
ggtitle("Audience-Rating v/s Budget of the Film") +
theme(
axis.text.x = element_text(size=10,color="Black"),
axis.text.y = element_text(size=10,color="Black"),
axis.title.x = element_text(size=15,color="DarkBlue"),
axis.title.y = element_text(size=15,color="DarkBlue"),
plot.title = element_text(size=20,color="DarkGreen"),
plot.background = element_rect(fill="LightCyan",color="Maroon")
) +
xlab("Budget Millions ($)")
j + geom_point()
Audience's Love for the Movie is independent of the Movie's Budget. You need a good script not good set 😜 Bingo!
l <- ggplot(data = movies , aes(x=Genre,y=AudienceRatings,colour=Genre))
m <- l + geom_jitter() + geom_boxplot(size=1.2,alpha=0.70)
m + ggtitle("AudienceRating v/s Genre") + theme(
axis.title.x = element_text(size=20,color = "Blue"),
axis.title.y = element_text(size=20,color = "DarkGreen"),
axis.text.x = element_text(size = 12 , color ="Black"),
axis.text.y = element_text(size = 10 , color = "Black"),
plot.background = element_rect(fill="LightCyan",color="Black"),
plot.title = element_text(size = 25 , color="DarkBlue")
) + ylab("Audience Rating")
Median of Thriller, Drama and Romance is high.The Thriller's box is narrow , this shows the deviation in Thriller's liking is least. AudienceRating is likely to fall between 60 to 75. And Audience loves Drama , as the Maximum value for drama is close to 80 (highest of all)
l <- ggplot(data=movies,aes(x=BudgetMillion,fill=Genre))
l + geom_histogram(binwidth = 10,color="Black") +
ggtitle("Movies Budget distribution") +
theme(
axis.title.x = element_text(size=15,color="DarkBlue"),
axis.title.y = element_text(size=15,color="DarkBlue"),
axis.text.x = element_text(size=10),
axis.text.y = element_text(size=10),
plot.title = element_text(size=20,color="DarkGreen"),
plot.background = element_rect(fill="LightCyan",color="Maroon"),
legend.title = element_text(size=20),
legend.text = element_text(size=15),
legend.position = c(1,1),
legend.justification = c(1,1)
)
It's not easy to visualize for a specific genre here! Let's try to have this graph of Movie Budget Distribution for each Genre. This can be done using facets.
l <- ggplot(data=movies,aes(x=BudgetMillion,fill=Genre))
l + geom_histogram(binwidth = 10,color="Black") +
ggtitle("Movies Budget distribution") +
theme(
axis.title.x = element_text(size=15,color="DarkBlue"),
axis.title.y = element_text(size=15,color="DarkBlue"),
axis.text.x = element_text(size=10),
axis.text.y = element_text(size=10),
plot.title = element_text(size=20,color="DarkGreen"),
plot.background = element_rect(fill="LightCyan",color="Maroon")
) +
facet_grid(Genre~.)
Action Genre plot has the longest tail implying that the high budget movies are made in Action. And Comedy Genre has high number of movies below buget 50 million dollars.
-
i <- ggplot(data=movies , aes(x=AudienceRatings)) + ggtitle("Audience-Rating Distribution") + theme( axis.text.x = element_text(size=10,color="Black"), axis.text.y = element_text(size=10,color="Black"), axis.title.x = element_text(size=15,color="DarkBlue"), axis.title.y = element_text(size=15,color="DarkBlue"), plot.title = element_text(size=20,color="Red"), plot.background = element_rect(fill="LightCyan",color="Maroon") ) + xlab("Audience Rating") i + geom_histogram(binwidth=10,fill="White",color="Blue")
-
i <- ggplot(data=movies , aes(x=CriticRatings)) + ggtitle("Critic-Rating Distribution") + theme( axis.text.x = element_text(size=10,color="Black"), axis.text.y = element_text(size=10,color="Black"), axis.title.x = element_text(size=15,color="DarkBlue"), axis.title.y = element_text(size=15,color="DarkBlue"), plot.title = element_text(size=20,color="Red"), plot.background = element_rect(fill="LightCyan",color="Maroon") ) + xlab("Critic Rating") i + geom_histogram(binwidth=10,fill="White",color="Blue")
Critic-Rating is uniformly distributed but Audience-Rating is not. Audience-Rating is biased.
k <- ggplot(data=movies,aes(x=CriticRatings,y=AudienceRatings,color=Genre))
k + geom_point(aes(size=BudgetMillion),alpha=0.75) + ggtitle("Audience v/s Critic") +
geom_smooth(fill=NA)+
facet_grid(Genre~Year) +
coord_cartesian(ylim=c(0,100)) +
theme(
axis.title.x = element_text(size=15,color="DarkBlue"),
axis.title.y = element_text(size=15,color="DarkBlue"),
plot.title = element_text(size=20,color="DarkGreen"),
plot.background = element_rect(fill="LightCyan",color="Maroon")
)








