This document provides code to solve the most common issues faced when transforming tracking data in the format required for the Seabird Tracking Database https://www.seabirdtracking.org/ using R.
The script uses an artificial example than can be downloaded here: https://www.seabirdtracking.org/wp-content/uploads/2025/01/GPS_stdb_bad_example.csv
R is a free open-source software environment https://www.r-project.org/ and we recommend running R using R Studio https://posit.co/products/open-source/rstudio/
library(dplyr) #for general data wrangling  ## 
## Attaching package: 'dplyr'## The following objects are masked from 'package:stats':
## 
##     filter, lag## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, unionlibrary(leaflet) #for maps
#If you don't have a package installed use the following after removing "#"
#install.packages("tidyverse")
#install.packages("leaflet")Read in your tracking data csv (see the section at the end if you data is not all in one csv).
data <- read.csv("C:/Users/bethany.clark/OneDrive - BirdLife International/STDB/STDB_admin_shared_folder/GPS_stdb_bad_example.csv")
#Change the filepath to the location of your csv
head(data) #check the format##              datetime latitude longitude bird_id sex breed_stage tag_type
## 1 2015.12.05 12:13:00 -100.005   6.86970   Bird1   M brood-guard      GPS
## 2 2015.12.05 13:13:00 -100.345   6.87014   Bird1   M brood-guard      GPS
## 3 2015.12.05 14:13:00 -100.685   6.86029   Bird1   M brood-guard      GPS
## 4 2015.12.05 15:13:00 -101.025   6.85092   Bird1   M brood-guard      GPS
## 5 2015.12.05 16:13:00 -101.365   6.86155   Bird1   M brood-guard      GPS
## 6 2015.12.05 17:13:00 -101.705   6.87218   Bird1   M brood-guard      GPS#Remove NAs in the key variables
nrow(data)## [1] 46data <- data %>% tidyr::drop_na(latitude, longitude, datetime)
nrow(data) #Check the difference between the number of rows before and after, and investigate if needed## [1] 44Common issues include:
- Positions outside the boundaries e.g. lat >90 or < -90, lon
>180 or < -180
- Locations before or after deployment (e.g. of the institute, not the
bird!)
- Lat/lon reversed
summary(data$latitude);summary(data$longitude)##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -109.53 -105.87 -102.22  -98.34  -98.07    0.00##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   6.870   6.965   6.842   7.102   7.448plot(data$longitude,data$latitude)#Most likely, the lat and lon are the wrong way around!
 
data <- data %>% dplyr::rename(longitude = latitude, latitude = longitude)Use an interactive map to inspect data
#Check with an interactive plot
map.alldata <- leaflet() %>% ## start leaflet plot
  ## select background imagery
  addProviderTiles(providers$Esri.WorldImagery, group = "World Imagery") %>% 
  ## plot the points. Note: leaflet automatically finds lon / lat colonies
  addCircleMarkers(data = data,
                   radius = 3,
                   fillColor = "cyan",
                   fillOpacity = 0.5, 
                   stroke = F) %>%
  addPolylines(lng = data$longitude,
               lat = data$latitude, weight = 1,
               color = "cyan");map.alldata## Assuming "longitude" and "latitude" are longitude and latitude, respectively#Looks better, but there is an odd point at 0,0
#Remove the incorrect location
data <- data %>% dplyr::filter(latitude != 0 & longitude != 0)
#Alternative code
data <- data %>% dplyr::filter(longitude < -5)
#Check again with the interactive plot
map.alldata <- leaflet() %>% ## start leaflet plot
  ## select background imagery
  addProviderTiles(providers$Esri.WorldImagery, group = "World Imagery") %>% 
  ## plot the points. Note: leaflet automatically finds lon / lat colonies
  addCircleMarkers(data = data,
                   radius = 3,
                   fillColor = "cyan",
                   fillOpacity = 0.5, 
                   stroke = F) %>%
  addPolylines(lng = data$longitude,
               lat = data$latitude, weight = 1,
               color = "cyan");map.alldata## Assuming "longitude" and "latitude" are longitude and latitude, respectively#Fixed!The Seabird Tracking Database currently requires a particular timestamp format in the GMT time zone.
date_gmt dd/mm/yyyy
time_gmt hh:mm:ss
data$datetime[1]## [1] "2015.12.05 12:13:00"#Convert to datetime
date_time <- lubridate::ymd_hms(data$datetime, tz = "America/Mexico_City")
#OlsonNames() for list of accepted time zone codes
date_time[1]## [1] "2015-12-05 12:13:00 CST"#Convert to GMT
date_time_gmt <- lubridate::with_tz(date_time, tz = "GMT")
date_time_gmt[1]## [1] "2015-12-05 18:13:00 GMT"#Reformat to match template
data$date_gmt <- format(date_time_gmt, "%d/%m/%Y")
data$time_gmt <- format(date_time_gmt, "%H:%M:%S")To facilitate analysis, the database only accepts certain categories with specific spelling and format.
head(data) ##              datetime longitude latitude bird_id sex breed_stage tag_type
## 1 2015.12.05 12:13:00  -100.005  6.86970   Bird1   M brood-guard      GPS
## 2 2015.12.05 13:13:00  -100.345  6.87014   Bird1   M brood-guard      GPS
## 3 2015.12.05 14:13:00  -100.685  6.86029   Bird1   M brood-guard      GPS
## 4 2015.12.05 15:13:00  -101.025  6.85092   Bird1   M brood-guard      GPS
## 5 2015.12.05 16:13:00  -101.365  6.86155   Bird1   M brood-guard      GPS
## 6 2015.12.05 17:13:00  -101.705  6.87218   Bird1   M brood-guard      GPS
##     date_gmt time_gmt
## 1 05/12/2015 18:13:00
## 2 05/12/2015 19:13:00
## 3 05/12/2015 20:13:00
## 4 05/12/2015 21:13:00
## 5 05/12/2015 22:13:00
## 6 05/12/2015 23:13:00#Sex and breed_stage are included
unique(data$sex)## [1] "M" "F" NA#If there is no sex information, use "unknown", otherwise "female" or"male"
data$sex <- ifelse(data$sex == "M", "male", data$sex)
data$sex <- ifelse(data$sex == "F", "female", data$sex)
data$sex <- ifelse(is.na(data$sex), "unknown", data$sex)
unique(data$sex)## [1] "male"    "female"  "unknown"unique(data$breed_stage) #Check against the options## [1] "brood-guard" "incubating"#Options are nested
#If there is no info, use "unknown"
#If the bird is breeding, but unsure which stage, use "breeding"
#If more info is know, use "pre-egg",   "incubation",   "brood-guard", "post-guard",    "chick-rearing" or "creche"
#If non-breeding, but unsure, use "non-breeding", "migration",  "winter",   "sabbatical",   "pre-moult", "breeding  fail (breeding season)" 
#"incubating" should be "incubation", so correct all rows with this value
data$breed_stage <- ifelse(data$breed_stage == "incubating", "incubation", data$breed_stage)All the columns must be included even if you do not have the information.
#If the data are not split into tracks within birds, use 1 as the track ID
data$track_id <- 1
#In this example, the birds are breeding, so they must be "adult"
data$age <- "adult"
#other options are "immature"   "juvenile"  "fledgling" "unknown"
#This is a GPS dataset, so labeling the equinox period is not applicable and there is no Argos quality, so fill in with NA
data$equinox <- NA
#For GLS datasets, Equinox can still be NA, or "yes" or "no" if the periods have been marked. In this case, it is helpful to include how these are marked in dataset notes
data$argos_quality <- NA
#PPT dataset can have the following quality "G" "3" "2" "1" "0" "A" "B" "Z" Select with the correct column names in the correct order. The column names and order must match the template.
data_stdb <- data %>%
  dplyr::select(bird_id,sex,age,breed_stage,track_id,
                date_gmt,time_gmt,latitude,longitude,
                equinox,argos_quality)
#Check the output
head(data_stdb)##   bird_id  sex   age breed_stage track_id   date_gmt time_gmt latitude
## 1   Bird1 male adult brood-guard        1 05/12/2015 18:13:00  6.86970
## 2   Bird1 male adult brood-guard        1 05/12/2015 19:13:00  6.87014
## 3   Bird1 male adult brood-guard        1 05/12/2015 20:13:00  6.86029
## 4   Bird1 male adult brood-guard        1 05/12/2015 21:13:00  6.85092
## 5   Bird1 male adult brood-guard        1 05/12/2015 22:13:00  6.86155
## 6   Bird1 male adult brood-guard        1 05/12/2015 23:13:00  6.87218
##   longitude equinox argos_quality
## 1  -100.005      NA            NA
## 2  -100.345      NA            NA
## 3  -100.685      NA            NA
## 4  -101.025      NA            NA
## 5  -101.365      NA            NA
## 6  -101.705      NA            NASee https://www.seabirdtracking.org/instructions/ for instructions on how to register and fill in the dataset upload metadata form.
write.csv(data_stdb,"C:/Users/bethany.clark/OneDrive - BirdLife International/STDB/STDB_admin_shared_folder/GPS_stdb_bad_example_corrected.csv",
          row.names = F)
#Change the filepath to where you would like to export your csvBonus!
The section below shows the code but does not work with the example file.
All the data may not be in the one csv (generally a separate csv for each deployment). This code will combine them provided all the csvs are in the same folder, and there are no other files in the folder.
#If you don't have a package installed use the following
#install.packages("data.table")
folder <- "filepath"
list.files(folder) #Check the correct files are in the folder## character(0)files <- list.files(path = folder, full.names = T); files## character(0)data <- data.table::rbindlist(sapply(files, read.csv, simplify = F), fill = T, idcol = 'filename') 
head(data)## Null data.table (0 rows and 0 cols)table(data$filename) #Shows how many rows come from each file## < table of extent 0 >#The filename column can then be converted to bird ID or however else the files are defined