This tutorial shows how probability of activity curves can be extracted from the VHF signal data after classification of 1 min intervals into active or passive states. I’m using Hierarchical Generalized Additive Models following the Pedersen et al. 2021 article. The data needed for the execution of this tutorial can be downloaded here
Before you start, make sure that you have the following packages installed: tidyverse
, data.table
,lubridate
, hms
, mgcv
, gratia
, itsadug
, mgcViz
, ggthemes
, viridis
Additionally we will use functionalities from the tRackIT
R package that is not hosted on cran yet. Please check the README in the tRackIT GitHub repository for installation instructions
Load the packages
The tRackIT-package GitHub page shows the complete workflow to get from raw signals to activity classifications with a 1 Minute resolution. We will skip that part here and start with the classified data of bats that have been tracket from 2018 to 2021 (all_bats_aggregated
). There is one file per individual, that contains the timestamp, the activity class per timestamp and the ID. We will need some more info such as timing in relation to sunset and sunrise as well as species, sex etc. To do so we will use two functionalities of the tRackIT-package. The time_of_day() function claculates timedifference tu sunset and sunrise etc, the add.ID.info() function adds meta-info for the individual to the data. This section of the tutorial can only be executed if the ecological_case_study_bat_activity directory has been downloaded. It has >270Gb and can´t be directly downloaded via the user interface. Check the README of the repository for instructions. However, you can skip that part and continue with the next chunk of code using already processed data.
#set coordinates for sunset/sunrise calculation
Lat<-50.844868
Lon<-8.663649
#Loop through years
for(y in c(2018, 2019, 2020, 2021)){
#print(y)
#get projectfile per year
pr<-getProject(projroot=paste0("bats_for_activity/",y, "/"))
#remove ids with no data at all
pr$tags<-pr$tags[pr$tags$ID!="Mbec180518_2",]
pr$tags<-pr$tags[pr$tags$ID!="mbec150155_m",]
#loop through individuals
for(id in pr$tags$ID){
#print(id)
anml<-getAnimal(projList = pr, animalID = id)
#get activity classification aggregated to 1 Minute
activity_files<-list.files(anml$path$classification,pattern="agg", full.names = TRUE)
if(length(activity_files)>0){
activity_1Min<-data.table::fread(fls[1])
#calculate daytime infos
activity_1Min<-time_of_day(data = activity_1Min, Lat=Lat, Lon = Lon, tcol = "timestamp", tz = "CET", activity_period = "nocturnal")
#add id info
activity_1Min<-add.ID.Info( data=activity_1Min, animal=anml)
#number of tracking days
activity_1Min$n_days<-as.numeric(abs(difftime(as.Date(activity_1Min$start_datetime), as.Date(activity_1Min$stop_datetime))))+1
#binary activity classification
activity_1Min$activity<-ifelse(activity_1Min$prediction=="active", 1,0)
#correction of typo
activity_1Min$species[activity_1Min$species=="mbec"]<-"Mbec"
data.table::fwrite(activity_1Min, paste0("all_bats_aggregated/",anml$meta$animalID, "_",as.character(y), "_aggregated_1_Minute.csv"))
}
}}
#merge all data
all_aggregated_files<-list.files("all_bats_aggregated/", full.names = TRUE)
all_bats<-plyr::ldply(fls, function(x){data.table::fread(x)})
all_bats<-all_bats[all_bats$species=="Mbec" | all_bats$species=="Nlei",]
#get info which ids should be excluded
check_ids<-data.table::fread("bats_inspect_id.csv")
excld<-check_ids[check_ids$exclude_individual=="Y",]
#exclude ids from data
all_bats<-all_bats[!(all_bats$ID %in% excld$ID),]
#account for rep state transition
df_rep<-read.csv("df_rep_state_transition.csv")
for (id in unique(df_rep$ID)){
if (df_rep$rep2[df_rep$ID==id]=="pregnant"){
all_bats$rep.state[all_bats$ID==id & all_bats$year==df_rep$year[df_rep$ID==id] & all_bats$timestamp>=df_rep$start_rep1[df_rep$ID==id]]<-"lactating"}
if (df_rep$rep2[df_rep$ID==id]=="post-lactating"){
all_bats$rep.state[all_bats$ID==id & all_bats$year==df_rep$year[df_rep$ID==id] & all_bats$timestamp>=df_rep$start_rep2[df_rep$ID==id]]<-"post-lactating"}
}
#save data
data.table::fwrite(all_bats,"all_bats_aggregated.csv")
## Rows: 818,398
## Columns: 24
## $ timestamp <dttm> 2020-05-15 20:14:00, 2020-05-15 20:15:00, 2020-05-15 2~
## $ prediction <fct> active, active, active, active, active, active, active,~
## $ ID <fct> h146474, h146474, h146474, h146474, h146474, h146474, h~
## $ date <date> 2020-05-15, 2020-05-15, 2020-05-15, 2020-05-15, 2020-0~
## $ sunset_date <date> 2020-05-15, 2020-05-15, 2020-05-15, 2020-05-15, 2020-0~
## $ hour <int> 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,~
## $ sunrise_date <date> 2020-05-16, 2020-05-16, 2020-05-16, 2020-05-16, 2020-0~
## $ sunset <dttm> 2020-05-15 19:08:35, 2020-05-15 19:08:35, 2020-05-15 1~
## $ sunrise <dttm> 2020-05-16 03:34:14, 2020-05-16 03:34:14, 2020-05-16 0~
## $ time_to_rise <dbl> -7.337226, -7.320559, -7.303893, -7.287226, -7.270559, ~
## $ time_to_set <dbl> 1.090028, 1.106695, 1.123362, 1.140028, 1.156695, 1.173~
## $ start_datetime <dttm> 2020-05-15 20:14:00, 2020-05-15 20:14:00, 2020-05-15 2~
## $ stop_datetime <dttm> 2020-05-29 21:58:00, 2020-05-29 21:58:00, 2020-05-29 2~
## $ year <int> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2~
## $ ydate <int> 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, ~
## $ month <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5~
## $ t <dbl> 0, 60, 120, 180, 240, 300, 360, 420, 480, 540, 600, 660~
## $ species <fct> Mbec, Mbec, Mbec, Mbec, Mbec, Mbec, Mbec, Mbec, Mbec, M~
## $ sex <fct> w, w, w, w, w, w, w, w, w, w, w, w, w, w, w, w, w, w, w~
## $ age <fct> ad, ad, ad, ad, ad, ad, ad, ad, ad, ad, ad, ad, ad, ad,~
## $ weight <dbl> 8.2, 8.2, 8.2, 8.2, 8.2, 8.2, 8.2, 8.2, 8.2, 8.2, 8.2, ~
## $ rep.state <fct> pregnant, pregnant, pregnant, pregnant, pregnant, pregn~
## $ n_days <int> 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,~
## $ activity <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1~
df_1min$species<-as.character(df_1min$species)
df_1min$species[df_1min$species=="Mbec"]<-"M.bechsteinii"
df_1min$species[df_1min$species=="Nlei"]<-"N.leisleri"
df_1min$species<-as.factor(df_1min$species)
df_1min$month_f <- factor(month(df_1min$date))
df_1min$year_f <- factor(df_1min$year)
df_1min$date <- date(df_1min$date)
df_1min$hour <- hour(df_1min$timestamp)
df_1min$week <- week(df_1min$timestamp)
min_set <- min(df_1min$time_to_set)
max_set <- max(df_1min$time_to_set)
df_1min$start.event <- ifelse(df_1min$time_to_set==min_set,T,F) # Identify start of the time series
df_1min$ydate_f <- as.factor(df_1min$ydate)
df_1min$date_f <- as.factor(df_1min$date)
K.time_of_day <- length(unique(df_1min$time_to_rise))
df_1min <- df_1min %>% data.frame()
Exclude retagged individuals and extract sample sizes
df_1min<- df_1min[!(df_1min$ID =="Nlei20211" & df_1min$ydate >= 200),]
df_1min<- df_1min[!(df_1min$ID =="h146487" & df_1min$ydate >= 150),]
# Sample sizes
df_1min %>%
group_by(species) %>%
summarise(nID = n_distinct(ID),
nObs = n(),
meanDays = mean(n_days))
## # A tibble: 2 x 4
## species nID nObs meanDays
## <fct> <int> <int> <dbl>
## 1 M.bechsteinii 52 577977 18.4
## 2 N.leisleri 20 204443 21.3
## nID nObs meanDays
## 1 72 782420 19.15002
NOTE Some individuals were tagged twice within the same year. We want to avoid these situations and reduce the sampling period to the first tagging event. The full details of this analysis can be found here.
We can plot visually inspect the data by presenting the probability of activity over time of the day. Here, it is easier to calculate this probability by time intervals of 15 minutes for easier viuslization
df_15min <- df_1min %>%
#filter(species == "M.bechsteinii" | species == "N.leisleri") %>%
mutate(interval = as_hms(floor_date(timestamp, unit = "15minutes"))) %>%
group_by(ID, species, year, ydate, hour, interval) %>%
summarise(n_intervals = length(activity),
n_active = length(activity[activity == 1]),
n_passive = length(activity[activity == 0]),
time_to_set = mean(time_to_set)) # calculate average time to sunset for that 15 minute interval
df_15min %>%
ggplot(aes(x = time_to_set, y = n_active/n_intervals, color = species)) +
geom_point(alpha = 0.1) +
geom_smooth() + scale_color_wsj() + theme_bw(14) +
facet_wrap(~species) + geom_hline(yintercept = 0.5,linetype = "dashed") +
ylim(0, 1) + ylab("Activity probability (15 min interval)") +
xlab("Time to sunset (h)")
We can also inspect each tagged individual in the same way
df_15min %>%
filter(species == "N.leisleri") %>%
ggplot(aes(x = time_to_set, y = n_active/n_intervals)) +
geom_point(alpha = 0.2) +
geom_smooth() + scale_color_wsj() + theme_bw(14) +
facet_wrap(~ID) + geom_hline(yintercept = 0.5,linetype = "dashed") +
ylim(0, 1) + ylab("Activity probability (15 min interval)") +
xlab("Time to sunset (h)")
df_15min %>%
filter(species == "M.bechsteinii") %>%
ggplot(aes(x = time_to_set, y = n_active/n_intervals)) +
geom_point(alpha = 0.2) +
geom_smooth() + scale_color_wsj() + theme_bw(14) +
facet_wrap(~ID) + geom_hline(yintercept = 0.5,linetype = "dashed") +
ylim(0, 1) + ylab("Activity probability (15 min interval)") +
xlab("Time to sunset (h)")
We fit a Hierarchical Generalized Additive Model to compare whether Bechstein’s and Leisler’s bats differ significantly in their daily activity patterns. We assume that the probability of activity is a non-linear function of time of the day, here centered around time of sunset (t = 0). We use a binomial error term since our activity column is a string of 0 and 1 (i.e the bat is either marked as passive for that minute or active).
We use circular spline functions to constrain the activity probability to be equal at 00:00 and 23:59 (argument bs = "cc"
). We also need to account for the fact that individuals were measured repeatedely but in differnet years of monitoring. The simplest way to do that is to include individuals and date as random intercepts with the argument bs = "re"
. Note that there are many flavors for specifying random effects with HGAMs which apply more or less penalty to the random effects and allow them to deviate from population level trends. Here, we are mainly interested in species differences and assume that there is a general time of activity function that is species specific. Note that more complex random effect structures gave functionally similar results and did not affect conclusions. Pedersen et al. 2021 provides a great and exhasutive tutorials on the different ways to approach random effects within the GAM framework. There are two other arguments that require ou attention. First, we need to account for the fact that there observations are likely to be highly autocorrelated because they are taken at 1 min intervals. This value has to be set manually and we show our procedure for investigating autocorrelation in our analysis R script (see filename
, section 1.1). Next, we need to select the degree of complexity of our smoothing terms: k
. After inspection with the k.check
function, setting k to 120 was the highest complexity we could set without overfitting the data (see for details).
To test for species difference in activity patterns, we fit two models. Model M0 assumes that both species have the same global activity patterns while model M1 allows both species to follow their own trend (argment by = species
within the spline function)
Set autocorrelation (r1
) and basis dimension (k
) of the smooth term
Fit model M0
fit.gam.M0 <- bam(activity ~
s(time_to_set, bs = c("cc"), k = k) +
s(ID, bs = "re") + s(date_f, bs = "re"),
rho = r1, AR.start = start.event,
data = df_1min,
method = "fREML", discrete=T, family = "binomial",
knots=list(time_to_rise=c(min_set, max_set)))
summary(fit.gam.M0)
##
## Family: binomial
## Link function: logit
##
## Formula:
## activity ~ s(time_to_set, bs = c("cc"), k = k) + s(ID, bs = "re") +
## s(date_f, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.7383 0.1339 -12.98 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(time_to_set) 51.84 118 4061286 <2e-16 ***
## s(ID) 67.07 71 1976322 <2e-16 ***
## s(date_f) 280.26 306 3084457 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.45 Deviance explained = 38.8%
## fREML = 8.5139e+05 Scale est. = 1 n = 782420
Fit model M1
fit.gam.M1 <- bam(activity ~
s(time_to_set, bs = c("cc"), k = k, by = species) +
s(ID, bs = "re") + s(date_f, bs = "re"),
rho = r1, AR.start = start.event,
data = df_1min,
method = "fREML", discrete=T, family = "binomial",
knots=list(time_to_rise=c(min_set, max_set)))
summary(fit.gam.M1)
##
## Family: binomial
## Link function: logit
##
## Formula:
## activity ~ s(time_to_set, bs = c("cc"), k = k, by = species) +
## s(ID, bs = "re") + s(date_f, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.7399 0.1309 -13.29 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(time_to_set):speciesM.bechsteinii 47.23 118 3571851 <2e-16 ***
## s(time_to_set):speciesN.leisleri 45.82 118 188033 <2e-16 ***
## s(ID) 66.32 71 1824189 <2e-16 ***
## s(date_f) 282.01 306 2421828 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.468 Deviance explained = 40.4%
## fREML = 8.4389e+05 Scale est. = 1 n = 782420
Compare M0 and M1
## Analysis of Deviance Table
##
## Model 1: activity ~ s(time_to_set, bs = c("cc"), k = k) + s(ID, bs = "re") +
## s(date_f, bs = "re")
## Model 2: activity ~ s(time_to_set, bs = c("cc"), k = k, by = species) +
## s(ID, bs = "re") + s(date_f, bs = "re")
## Resid. Df Resid. Dev Df Deviance
## 1 781994 578790
## 2 781942 563613 52.41 15177
## df AIC delta.AIC
## fit.gam.M1 443.2209 248309.1 0.00
## fit.gam.M0 400.6479 263401.1 15092.01
# Calculate hourly interval data
df_1h <- df_1min %>%
mutate(interval = as_hms(floor_date(timestamp, unit = "60minutes"))) %>%
group_by(ID, species, year, ydate, hour, interval) %>%
summarise(n_intervals = length(activity),
n_active = length(activity[activity == 1]),
n_passive = length(activity[activity == 0]),
time_to_set = mean(time_to_set)) # calculate average time to sunset for that 15 minute interval
df_1h$p_act <- df_1h$n_active/df_1h$n_intervals
fit.values <- evaluate_smooth(fit.gam.M1, "s(time_to_set)", n = 244,
overall_uncertainty = T,
unconditional = T)
draw(fit.values)
b0 <- coef(fit.gam.M1)[1]
Fig5 <- ggplot(data = fit.values,
aes(x = time_to_set, y = plogis(est+b0),
color = species, group = species))
Fig5a <- Fig5 +
geom_point(data = df_1h, alpha = .1,
aes(x = time_to_set,
y = p_act)) +
geom_ribbon(aes(ymin = plogis(est+b0 - 2 * se) ,
ymax = plogis(est+b0 + 2 * se)),
fill = "grey", color = "grey") +
geom_line(size = .5) +
geom_hline(yintercept = 0.5, linetype = "dashed") +
scale_color_wsj() + theme_bw(14) +
xlab("") +
ylab("Activity probability \n") +
ylim(0, 1) +
theme(legend.position = c(.1,.745))
Fig5a
fit.delta <- difference_smooths(fit.gam.M1, "s(time_to_set)", n = 244)
Fig5b <- ggplot(data = fit.delta,
aes(x = time_to_set, y = diff)) +
geom_ribbon(aes(ymin = lower,
ymax = upper), color = "grey",
alpha = 0.3) +
geom_line() + geom_hline(yintercept = 0, linetype = "dashed") +
theme_bw(14) + theme(legend.position = "none") +
xlab("Time since sunset (h)") +
ylab("Activity difference \n (M.bechsteinii - N.leisleri)")
Fig5 <- Fig5a / Fig5b
Fig5
The following section shows how different aspects of the dialy activity patterns can be extracted from the HGAM output. We focus here on determining the timinig for onset and end of the daily activity period, the timing of peak activity and the intensity of activity during the night.
# Onset of activity up time: When does p(activity) > 0.5 first
# End of activity time: When does p(activity) > 0.5 last
fit.values %>% group_by(species) %>%
filter(plogis(est+b0) > .5) %>%
summarise(a.onset = as_hms(min(time_to_set)*60),
a.end = as_hms(max(time_to_set)*60))
## # A tibble: 2 x 3
## species a.onset a.end
## <fct> <time> <time>
## 1 M.bechsteinii 00'30.879867" 07'10.972640"
## 2 N.leisleri 00'12.125519" 07'23.475539"
# Peak activity: what are the two highest values for p(activity)?
fit.values %>% group_by(species) %>%
filter(plogis(est+b0) == max(plogis(est+b0))) %>%
summarise(peak.a = max(plogis(est+b0)),
peak.a.low = plogis(est+b0-2*se),
peak.a.up = plogis(est+b0+2*se))
## # A tibble: 2 x 4
## species peak.a peak.a.low peak.a.up
## <fct> <dbl> <dbl> <dbl>
## 1 M.bechsteinii 0.770 0.751 0.788
## 2 N.leisleri 0.822 0.798 0.845
# Peak activity timing: time of day whith maximum p(activity)
fit.values %>% group_by(species) %>%
filter(plogis(est+b0) == max(plogis(est+b0))) %>%
group_by(species) %>%
summarise(t.peak = as_hms(time_to_set*60))
## # A tibble: 2 x 2
## species t.peak
## <fct> <time>
## 1 M.bechsteinii 01'33.394363"
## 2 N.leisleri 00'37.131317"
# Activity density: area under the curve when p(activity) > 0.5
fit.values %>% group_by(species) %>%
filter(plogis(est+b0) > .5) %>%
summarise(auc = bayestestR::auc(time_to_set, plogis(est+b0),
method = "spline"),
auc.low = bayestestR::auc(time_to_set, plogis(est+b0-2*se),
method = "spline"),
auc.up = bayestestR::auc(time_to_set, plogis(est+b0+2*se),
method = "spline"))
## # A tibble: 2 x 4
## species auc auc.low auc.up
## <fct> <dbl> <dbl> <dbl>
## 1 M.bechsteinii 4.70 4.56 4.84
## 2 N.leisleri 3.42 3.23 3.62
We can also use HGAM to compare the timing of daily activity depending on the reproductive status of individuals within each species. The models used here are very similar to those used in section 2. with the main difference that we are now comparing a model that assumes a common activity pattern for all statuses (model 0) and one that allows reproductive statuses to have differing smoothing functions.
Exclude individuals of unknown reproductive status
df_1min %>% filter(rep.state != "unknown") %>%
group_by(species) %>%
summarise(nID = n_distinct(ID),
nObs = n())
## # A tibble: 2 x 3
## species nID nObs
## <fct> <int> <int>
## 1 M.bechsteinii 35 384536
## 2 N.leisleri 19 203261
df_1min %>% filter(rep.state != "unknown") %>%
group_by(species, rep.state) %>%
summarise(nID = n_distinct(ID),
nObs = n())
## # A tibble: 8 x 4
## # Groups: species [2]
## species rep.state nID nObs
## <fct> <fct> <int> <int>
## 1 M.bechsteinii lactating 13 102076
## 2 M.bechsteinii non.repro 4 68685
## 3 M.bechsteinii post-lactating 12 135582
## 4 M.bechsteinii pregnant 13 78193
## 5 N.leisleri lactating 11 71560
## 6 N.leisleri non.repro 2 23296
## 7 N.leisleri post-lactating 7 58889
## 8 N.leisleri pregnant 6 49516
# Remove individuals of unknown status
df_1min.Mb <- df_1min %>% filter(species == "M.bechsteinii" & rep.state != "unknown") %>% droplevels()
df_1min.Nl <- df_1min %>% filter(species == "N.leisleri" & rep.state != "unknown") %>% droplevels()
fit.gam.Mb.0 <- bam(activity ~
s(time_to_set, bs = c("cc"), k = k) +
s(ID, bs = "re") + #, k = 25
s(date_f, bs = "re"), #k = 173
rho = r1, AR.start = start.event,
data = df_1min.Mb,
method = "fREML", discrete=T, family = "binomial",
knots=list(time_to_rise=c(min_set, max_set)))
summary(fit.gam.Mb.0)
##
## Family: binomial
## Link function: logit
##
## Formula:
## activity ~ s(time_to_set, bs = c("cc"), k = k) + s(ID, bs = "re") +
## s(date_f, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.6686 0.1146 -14.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(time_to_set) 45.50 118 573547 < 2e-16 ***
## s(ID) 28.39 35 41969 0.00503 **
## s(date_f) 226.32 252 63199 3.32e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.519 Deviance explained = 44.5%
## fREML = 4.0871e+05 Scale est. = 1 n = 384536
### M1: difference in reproductive status on average & in smooth ----
fit.gam.Mb.1 <- bam(activity ~ rep.state +
s(time_to_set, bs = c("cc"), k = k,
by = rep.state) +
s(ID, bs = "re") + s(date_f, bs = "re"),
rho = r1, AR.start = start.event,
data = df_1min.Mb,
method = "fREML", discrete=T, family = "binomial",
knots=list(time_to_rise=c(min_set, max_set)))
summary(fit.gam.Mb.1)
##
## Family: binomial
## Link function: logit
##
## Formula:
## activity ~ rep.state + s(time_to_set, bs = c("cc"), k = k, by = rep.state) +
## s(ID, bs = "re") + s(date_f, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.2606 0.1477 -8.532 < 2e-16 ***
## rep.statenon.repro -0.7385 0.2799 -2.639 0.00832 **
## rep.statepost-lactating -0.8867 0.1325 -6.692 2.2e-11 ***
## rep.statepregnant -0.1065 0.1996 -0.534 0.59357
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(time_to_set):rep.statelactating 33.82 118 77911 < 2e-16 ***
## s(time_to_set):rep.statenon.repro 30.96 118 159447 < 2e-16 ***
## s(time_to_set):rep.statepost-lactating 38.59 118 159446 < 2e-16 ***
## s(time_to_set):rep.statepregnant 31.39 118 58687 < 2e-16 ***
## s(ID) 26.75 34 17596 0.0664 .
## s(date_f) 224.82 252 44573 5.29e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.526 Deviance explained = 45.3%
## fREML = 4.07e+05 Scale est. = 1 n = 384536
## df AIC delta.AIC
## fit.gam.Mb.1 392.2189 106210.7 0.000
## fit.gam.Mb.0 301.9440 109867.9 3657.224
fit.gam.Nl.0 <- bam(activity ~
s(time_to_set, bs = c("cc"), k = k) +
s(ID, bs = "re") + s(date_f, bs = "re"),
rho = r1, AR.start = start.event,
data = df_1min.Nl,
method = "fREML", discrete=T, family = "binomial",
knots=list(time_to_rise=c(min_set, max_set)))
summary(fit.gam.Nl.0)
##
## Family: binomial
## Link function: logit
##
## Formula:
## activity ~ s(time_to_set, bs = c("cc"), k = k) + s(ID, bs = "re") +
## s(date_f, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.7416 0.3772 -4.617 3.89e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(time_to_set) 45.71 118 401373 <2e-16 ***
## s(ID) 16.94 18 2697157 <2e-16 ***
## s(date_f) 114.41 126 4172289 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.35 Deviance explained = 32.8%
## fREML = 2.1573e+05 Scale est. = 1 n = 203261
fit.gam.Nl.1 <- bam(activity ~ rep.state +
s(time_to_set, bs = c("cc"), k = k,
by = rep.state) +
s(ID, bs = "re") + s(date_f, bs = "re"),
rho = r1, AR.start = start.event,
data = df_1min.Nl,
method = "fREML", discrete=T, family = "binomial",
knots=list(time_to_rise=c(min_set, max_set)))
summary(fit.gam.Nl.1)
##
## Family: binomial
## Link function: logit
##
## Formula:
## activity ~ rep.state + s(time_to_set, bs = c("cc"), k = k, by = rep.state) +
## s(ID, bs = "re") + s(date_f, bs = "re")
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.3364 0.5211 -2.564 0.010336 *
## rep.statenon.repro 1.0943 1.3571 0.806 0.420023
## rep.statepost-lactating 0.7711 0.2588 2.980 0.002887 **
## rep.statepregnant -2.6379 0.6794 -3.883 0.000103 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(time_to_set):rep.statelactating 36.84 118 146915 < 2e-16 ***
## s(time_to_set):rep.statenon.repro 28.29 118 17017 < 2e-16 ***
## s(time_to_set):rep.statepost-lactating 36.58 118 46074 < 2e-16 ***
## s(time_to_set):rep.statepregnant 36.46 118 58619 < 2e-16 ***
## s(ID) 15.94 17 904539 0.0185 *
## s(date_f) 112.07 126 764998 3.43e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.367 Deviance explained = 34.3%
## fREML = 2.1435e+05 Scale est. = 1 n = 203261
## df AIC delta.AIC
## fit.gam.Nl.1 272.7888 54313.97 0.000
## fit.gam.Nl.0 178.7082 57287.66 2973.691
fit.values.Mb <- evaluate_smooth(fit.gam.Mb.1, "s(time_to_set)", n = 244,
overall_uncertainty = T,
unconditional = T)
fit.values.Nl <- evaluate_smooth(fit.gam.Nl.1, "s(time_to_set)", n = 244,
overall_uncertainty = T,
unconditional = T)
b0 <- coef(fit.gam.Mb.1)[1]
Fig6a <- ggplot(data = fit.values.Mb,
aes(x = time_to_set, y = plogis(est+b0),
fill = rep.state, group = rep.state)) +
geom_ribbon(aes(ymin = plogis(est+b0 - 2 * se) ,
ymax = plogis(est+b0 + 2 * se)),
alpha = .5) +
geom_line(size = .5) +
geom_hline(yintercept = 0.5, linetype = "dashed") +
scale_fill_wsj() + theme_bw(14) +
#facet_wrap(~rep.state) +
xlab("time since sunset (h)") +
ylab("Activity probability \n") +
ylim(0, 1) +
#theme(legend.position = "none") +
theme(legend.position = c(.15,.745),
legend.title=element_blank()) +
ggtitle("M.bechsteinii")
b0 <- coef(fit.gam.Nl.1)[1]
Fig6b <- ggplot(data = fit.values.Nl,
aes(x = time_to_set, y = plogis(est+b0),
fill = rep.state, group = rep.state)) +
geom_ribbon(aes(ymin = plogis(est+b0 - 2 * se) ,
ymax = plogis(est+b0 + 2 * se)),
alpha = .5) +
geom_line(size = .5) +
geom_hline(yintercept = 0.5, linetype = "dashed") +
scale_fill_wsj() + theme_bw(14) +
#facet_wrap(~rep.state) +
xlab("time since sunset (h)") +
ylab("Activity probability \n") +
ylim(0, 1) +
#theme(legend.position = "none") +
theme(legend.position = "none") +
ggtitle("N.leisleri")
Fig6 <- Fig6a + Fig6b
Fig6