Currently I have a dataframe of bear detections that I want to convert into a binary detection history (14 columns of day1, day2, day3, etc. where:
actual_date_out = the date the camera was deployed, binary detection history should start on this date
start_date = every date there's a detection of a bear, this is used to determine which of the 14 days should get a 1 for the detection history (e.g. actual_date_out = 12/1, start_date values for 12/3, 12/5, would yield 00101...)
What I'm trying to do is start a 14 day detection history (e.g. 001010001...) for each plot, starting with "actual_date_out" and ending 14 days after that. The problem is that sometimes the first detection (start_time) is e.g. 3 days after the deployment date and some dates don't have data, so I would have to fill in a 0 for some plots starting at the first deployment date (actual_date_out) and then for every preceding date that doesn't have a "start_time" column value (only dates that have start time row/presence have a 1). Some plots have 1 detection the whole time, others multiple for each day. The idea is each day just gets 0 or 1. Some plots may have multiple start_time values but each plot will only have one actual_date_out (each camera is only deployed once.)
Currently I have code that kind of works except it's putting a 1 for the first column and only 0s for all the preceding instead of searching for a start_time and assigning a 1. Only dates that also have a start_date get a value of 1.
library(tidyverse)
d <- read_csv(file.choose())
#Change actual_date_out to date format
d$actual_date_out <- as.Date(d$actual_date_out, format = "%m/%d/%Y")
d$retrieval_date <- as.Date(d$retrieval_date, format = "%m/%d/%Y")
d$start_time <- as.Date(d$start_time, format = "%m/%d/%Y")
#For each plot, find the earliest date in actual_date_out and then add the next 13 days. For each plot, each of the remaining 13 days should be added with a pres_abs = 0
d1 <- d |>
group_by(plot) |>
mutate(start_time = min(actual_date_out)) |>
complete(actual_date_out = seq.Date(first(start_time), first(start_time) + 20, by = "day")) |>
mutate(pres_abs = if_else(is.na(pres_abs), 0, pres_abs)) |>
ungroup()
# Removes duplicate entries by taking max of pres_abs
d2 <- d1 |>
group_by(plot, actual_date_out) |>
slice_max(pres_abs, with_ties = FALSE) |>
ungroup()
# Create news days variable as a sequence
d2 <- d2 |>
group_by(plot) |>
arrange(actual_date_out) |>
mutate(day = paste0("day_", row_number())) |>
ungroup()
# For each plot, take the actual_lat and actual_long from the first entry and propogate it to all the other entries for that plot
# For each plot, record the very first day in the 14 day period as actual_date_out
d2 <- d2 |>
group_by(plot) |>
mutate(actual_lat = first(actual_lat),
actual_long = first(actual_long),
first_day = first(actual_date_out)) |>
ungroup()
# Pivots to wider so each of the 14 days is a binarized variable
d3 <- d2 |>
select(-c(retrieval_date,start_time, actual_date_out)) |>
pivot_wider(id_cols = c(plot, actual_lat, actual_long, first_day),
names_from = day,
values_from = pres_abs)
> dput(d)
structure(list(plot = c("d22142", "d22142", "d22489", "d22489",
"d23081", "d23081", "d23081", "d23302", "d23544", "d23544", "d23544",
"d23544", "d23544", "d23544", "d23569", "d23569", "d23579", "d23647"
), actual_date_out = structure(c(17158, 17158, 17229, 17229,
17273, 17273, 17273, 17272, 17326, 17326, 17326, 17326, 17326,
17326, 17303, 17303, 17303, 17309), class = "Date"), retrieval_date = structure(c(17178,
17178, 17250, 17250, 17293, 17293, 17293, 17291, 17349, 17349,
17349, 17349, 17349, 17349, 17324, 17324, 17327, 17324), class = "Date"),
actual_lat = c(35.5767, 35.5767, 35.5901, 35.5901, 35.2851,
35.2851, 35.2851, 35.3086, 35.9439, 35.9439, 35.9439, 35.9439,
35.9439, 35.9439, 35.0581, 35.0581, 35.1264, 35.3453), actual_long = c(-82.4956,
-82.4956, -82.5901, -82.5901, -83.1089, -83.1089, -83.1089,
-82.5258, -82.6275, -82.6275, -82.6275, -82.6275, -82.6275,
-82.6275, -83.4274, -83.4274, -83.0983, -82.781), start_time = structure(c(17161,
17161, 17248, 17248, 17281, 17283, 17281, 17273, 17336, 17336,
17347, 17349, 17336, 17336, 17309, 17315, 17316, 17311), class = "Date"),
pres_abs = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1)), row.names = c(NA, -18L), spec = structure(list(
cols = list(plot = structure(list(), class = c("collector_character",
"collector")), actual_date_out = structure(list(), class = c("collector_character",
"collector")), retrieval_date = structure(list(), class = c("collector_character",
"collector")), actual_lat = structure(list(), class = c("collector_double",
"collector")), actual_long = structure(list(), class = c("collector_double",
"collector")), start_time = structure(list(), class = c("collector_character",
"collector")), pres_abs = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
