3  Examples using ggplot2

library(ggplot2)

ggplot(data=iris, aes(x=Species, y=Sepal.Length)) + 
      geom_boxplot(aes(fill=Species)) + 
      ylab("Sepal Length") + ggtitle("Iris Boxplot") +
      stat_summary(fun.y=mean, geom="point", shape=5, size=4) + 
      theme_bw() + 
      theme(plot.title = element_text(hjust = 0.5))
Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
ℹ Please use the `fun` argument instead.

ggplot(data=iris, aes(x=Sepal.Width, fill=Species)) + geom_density(alpha=.3) +
        xlab("Sepal Width") +  ylab("Density") + ggtitle("Histogram & Density Curve of Sepal Width") + theme_classic()+
   theme(panel.grid.major = element_blank(), 
         panel.grid.minor = element_blank(),
         plot.title = element_text(hjust = 0.5))

ggplot(data=iris, aes(x=Species, y=Sepal.Length, group=Species)) + 
  geom_violin(aes(fill=Species)) + xlab("Species") + ylab("Sepal Length") + 
  ggtitle("Violin Plot") + geom_smooth(method="loess") +
  theme(
        panel.grid.minor = element_blank(),
        plot.title = element_text(hjust = 0.5))
`geom_smooth()` using formula = 'y ~ x'

ggplot(data=iris, aes(x=Sepal.Length, y=Sepal.Width, color=Species)) + 
  geom_point(aes(shape=Species), size=1.5) + xlab("Sepal Length") + ylab("Sepal Width") + 
  ggtitle("Scatterplot with Smoothers") + geom_smooth(method="loess") + theme_classic() + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        plot.title = element_text(hjust = 0.5))
`geom_smooth()` using formula = 'y ~ x'

Filter internal R dataset “txhousing” to years 2013, 2014, 2015


Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
txhousing <- txhousing %>%  
  filter(year %in% c(2013, 2014, 2015)) %>%   
  mutate(year=as.factor(year))

3.0.1 Step 1: Set up the ggplot

ggplot(data=diamonds, aes(x=carat, y=price))

3.0.2 Step 2: Add layers to ggplot

ggplot(data=diamonds, aes(x=carat, y=price)) + 
  geom_point() + 
  geom_smooth()
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

3.0.3 Add labels to plot

ggplot(data=diamonds, aes(x=carat, y=price)) + 
  geom_point() + 
  geom_smooth() + 
  labs(title = "Scatterplot of diamond carat weight and price", 
       x = "Carat", 
       y = "Price")
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(data=diamonds, aes(x=carat, y=price)) + 
  geom_point() + 
  geom_smooth() + 
  labs(title = "Scatterplot of diamond carat weight and price", 
       x = "Carat", 
       y = "Price") + 
  theme(plot.title=element_text(size=14, face="bold", hjust = 0.5),
        axis.text.x=element_text(size=11), 
        axis.text.y=element_text(size=11),
        axis.title.x=element_text(size=14),
        axis.title.y=element_text(size=14))
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

ggplot(data=diamonds, aes(x=carat, y=price)) +
  geom_point() +
  geom_smooth() +
  labs(title = "Scatterplot of diamond carat weight and price", 
       x = "Carat", 
       y = "Price") +
  theme(plot.title=element_text(size=14, face="bold", hjust = 0.5),
        axis.text.x=element_text(size=11),
        axis.text.y=element_text(size=11),
        axis.title.x=element_text(size=14),
        axis.title.y=element_text(size=14)) + 
  facet_wrap(~cut)
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Example plot showing grouping

ggplot(data=diamonds, aes(x=carat, y=price)) +
  geom_point(aes(color=cut)) +
  geom_smooth() +
  labs(title = "Scatterplot of diamond carat weight and price", 
       x = "Carat", 
       y = "Price",
       caption = "Based on a sample of diamonds sold in the U.S.", 
       color = "Cut of Diamond") +
  theme(plot.title=element_text(size=14, face="bold", hjust = 0.5),
        axis.text.x=element_text(size=11),
        axis.text.y=element_text(size=11),
        axis.title.x=element_text(size=14),
        axis.title.y=element_text(size=14)) 
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Example plot showing grouping and faceting

ggplot(data=diamonds, aes(x=carat, y=price)) + 
  geom_point(aes(color=cut)) +
  geom_smooth() + 
  labs(title = "Scatterplot of diamond carat weight and price", 
       x = "Carat", 
       y = "Price",
       caption = "Based on a sample of diamonds sold in the U.S.", 
       color = "Cut of Diamond") +
  theme(plot.title=element_text(size=14, face="bold", hjust = 0.5),
        axis.text.x=element_text(size=11), 
        axis.text.y=element_text(size=11),
        axis.title.x=element_text(size=14),
        axis.title.y=element_text(size=14)) + 
  facet_wrap(~clarity)
`geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

3.1 Examples using dplyr

Using the built-in diamonds dataset

head(diamonds)
# A tibble: 6 × 10
  carat cut       color clarity depth table price     x     y     z
  <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48

filter and select

new_df <- diamonds %>% 
  filter(carat > .20) %>% 
  select(carat, cut, price)

head(new_df)
# A tibble: 6 × 3
  carat cut       price
  <dbl> <ord>     <int>
1  0.23 Ideal       326
2  0.21 Premium     326
3  0.23 Good        327
4  0.29 Premium     334
5  0.31 Good        335
6  0.24 Very Good   336

mutate and select

new_df2 <- diamonds %>% 
  mutate(ideal_boolean = ifelse(cut=="Ideal",1,0)) %>% 
  select(cut, ideal_boolean)

head(new_df2)
# A tibble: 6 × 2
  cut       ideal_boolean
  <ord>             <dbl>
1 Ideal                 1
2 Premium               0
3 Good                  0
4 Premium               0
5 Good                  0
6 Very Good             0

group_by and summarise

new_df3 <- diamonds %>% 
  group_by(cut) %>% 
  summarise(mean_price = mean(price))

new_df3
# A tibble: 5 × 2
  cut       mean_price
  <ord>          <dbl>
1 Fair           4359.
2 Good           3929.
3 Very Good      3982.
4 Premium        4584.
5 Ideal          3458.