library(tidyverse)
bees <- read_csv("C:/Users/Z/Downloads/plants_and_bees.csv")
Since we are focusing on which plants bees prefer, I removed all samples taken from the air.
bees_with_plant <- bees %>%
filter(plant_species != "None")
View(bees_with_plant)
I then created various data frames to help focus on certain information.
First I created tables that focused on either natove or non-native bee species and graphed the results.
plant_summary_native <- bees_with_plant %>%
filter(nonnative_bee == 0) %>%
group_by(native_or_non) %>%
count(plant_species, wt = bees_num, sort = TRUE)
plant_summary_non <- bees_with_plant %>%
filter(nonnative_bee == 1) %>%
group_by(native_or_non) %>%
count(plant_species, wt = bees_num, sort = TRUE)
cols <- c("non-native" = "palevioletred2", "native" = "lightskyblue")
ggplot(plant_summary_native,
aes(x = reorder(plant_species, -n),
y = n,
fill = native_or_non)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90)) +
labs(x = "Plant Species",
y = "Total Bees Counted",
title = "Native Bee Preferences",
fill = "Plant Origin") +
scale_fill_manual(values = cols)
ggplot(plant_summary_non,
aes(x = reorder(plant_species, -n),
y = n,
fill = native_or_non)) +
geom_bar(stat = 'identity') +
theme(axis.text.x = element_text(angle = 90)) +
labs(x = "Plant Species",
y = "Total Bees Counted",
title = "Non-native Bee Preferences",
fill = "Plant Origin") +
scale_fill_manual(values = cols)
Next I created tables that provided information about the timeframe of samples, focusing on the native bees. I then zoomed in on the 6 most popular species and graphed the results.
native_season <- bees_with_plant %>%
group_by(season, native_or_non) %>%
filter(nonnative_bee == 0) %>%
count(plant_species, wt = bees_num, sort = TRUE)
pop_species <- c("Leucanthemum vulgare", "Rudbeckia hirta", "Cichorium intybus", "Daucus carota", "Chamaecrista fasciculata", "Asclepias tuberosa")
season_lab <- c("Early Season 04/18/2017 - 07/03/2017", "Late Season
07/18/2017 - 08/02/2017")
native_season_zoom <- native_season %>%
filter(plant_species %in% pop_species)
native_season_zoom['season'][native_season_zoom['season'] == "early.season"] <- "Early Season"
native_season_zoom['season'][native_season_zoom['season'] == "late.season"] <- "Late Season"
ggplot(native_season_zoom,
aes(x = reorder(plant_species, -n),
y = n,
fill = plant_species)) +
geom_bar(stat = 'identity') +
facet_wrap(~season) +
theme(axis.text.x = element_text(angle = 90)) +
labs(x = "Plant Species",
y = "Total Bees Counted",
title = "Native Bee Preferences",
fill = "Plant Species")
When examining a single sample, I opted to focus on the sample with the most entries. I limited it to samples that had plants. I then graphed the distribution of that sample.
diverse_sample <- bees_with_plant %>%
group_by(sample_id) %>%
summarize(n = n()) %>%
arrange(desc(n))
sample_dist <- bees %>%
filter( sample_id == 17473) %>%
group_by(plant_species)
ggplot(sample_dist, aes(x = plant_species, y = bees_num, fill = reorder(bee_species, -bees_num))) +
geom_col() +
labs(x = "Plant Species",
y = "Total Bees Counted",
title = "Distribution of a Sample",
fill = "Bee Species",
subtitle = "Sample ID 17473, 07/18/2017") +
theme(axis.text.x = element_text(angle = 90))