dev-check-model-reproducibility-highres.Rmd
This notebook gather the analysis performed in order to compare the
output of the original FishMap main.R
script between Client
and ThinkR. We will compare the results obtained with identical seeds
following the execution of the script
dev/run_main_and_save_output.R
.
Here, we use high resolution parameters :
k = 0.75
month_start <- 10
month_end <- 12
main.R
We generate the outputs on ThinkR machine. > make sure your
.Renviron variables FISHMAP_UPDATE_OUTPUTS
and
FISHMAP_OUTPUT_DIR
are correctly set
# Generate results (model files are compiled)
source(here::here("dev", "run_main_and_save_output.R"))
Executing this code results in four outputs to be compared.
output_dir <- Sys.getenv("FISHMAP_OUTPUT_DIR")
list.files(path = file.path(output_dir))
## character(0)
To check whether the seed is effectively making the output
reproducible, we will run a second time main.R
with the
same seed.
# Change output dir to avoid overriding previous run
Sys.setenv(FISHMAP_OUTPUT_DIR = "~/shared/outputs_fishmap_highres_rerun")
# Run main.R a second time (model files are already compiled from first run)
source(here::here("dev", "run_main_and_save_output.R"))
## Error in file(filename, "r", encoding = encoding): cannot open the connection
Note: model fit is performed in 186 steps in this run.
We list the resulting output files.
thinkr1_output_dir <- file.path("~","shared","outputs_fishmap_highres_rerun")
thinkr1_output <- paste0(
list.files(
path = thinkr1_output_dir,
full.names = TRUE
),
collapse = "\n"
)
glue::glue("The paths to ThinkR's second run output files are :\n {thinkr1_output}")
## The paths to ThinkR's second run output files are :
thinkr2_output_dir <- file.path("~","shared","outputs_fishmap_highres")
thinkr2_output <- paste0(
list.files(
path = thinkr2_output_dir,
full.names = TRUE
),
collapse = "\n"
)
glue::glue("The paths to ThinkR's first run output files are :\n {thinkr2_output}")
## The paths to ThinkR's first run output files are :
## /home/rstudio/shared/outputs_fishmap_highres/converge_output.rds
## /home/rstudio/shared/outputs_fishmap_highres/obj_input.rds
## /home/rstudio/shared/outputs_fishmap_highres/opt_output.rds
## /home/rstudio/shared/outputs_fishmap_highres/part1_output0.25.rds
## /home/rstudio/shared/outputs_fishmap_highres/part2_output0.25.rds
## /home/rstudio/shared/outputs_fishmap_highres/part3_output.rds
## /home/rstudio/shared/outputs_fishmap_highres/report_output.rds
In order to contrast output files from both ThinkR runs, we will use the package waldo.
# list of output fiiles to contrast
files_to_contrast <- list.files(path = file.path(thinkr1_output_dir))
# running waldo on each files comparing thinkR runs
purrr::map(
.x = files_to_contrast,
~ waldo::compare(
x = readRDS(
file.path(thinkr1_output_dir, .x)
),
y = readRDS(
file.path(thinkr2_output_dir, .x)
)
)
) %>% setNames(files_to_contrast)
## named list()
Both ThinkR outputs are identical. We will use one of them to now compare with the Client’s output.
We now load the outputs generated from Clients (BA and JC) in a temporary folder.
# Create tmp folder to store Client output
tmp_folder <- tempfile(pattern = "fishmap_highres")
dir.create(tmp_folder)
# Download and unzip JC highres outputs from Git repo
jc_zip_file_url <- "https://github.com/balglave/FishMap/files/10970908/outputs_fishmap_highres.zip"
download.file(
url = jc_zip_file_url,
destfile = file.path(tmp_folder, "jc_output.zip")
)
unzip(
zipfile = file.path(tmp_folder, "jc_output.zip"),
exdir = file.path(tmp_folder, "jc_output")
)
jc_output_dir <- file.path(tmp_folder, "jc_output", "outputs_fishmap_highres")
jc_output <- paste0(
list.files(
path = jc_output_dir,
full.names = TRUE
),
collapse = "\n"
)
glue::glue("The paths to Juliette's output files are :\n {jc_output}")
## The paths to Juliette's output files are :
## /tmp/RtmpMWf0bp/fishmap_highres5ded6e7c2666/jc_output/outputs_fishmap_highres/converge_output.rds
## /tmp/RtmpMWf0bp/fishmap_highres5ded6e7c2666/jc_output/outputs_fishmap_highres/obj_input.rds
## /tmp/RtmpMWf0bp/fishmap_highres5ded6e7c2666/jc_output/outputs_fishmap_highres/opt_output.rds
## /tmp/RtmpMWf0bp/fishmap_highres5ded6e7c2666/jc_output/outputs_fishmap_highres/report_output.rds
# Download and unzip BA highres outputs from Git repo
ba_zip_file_url <- "https://github.com/balglave/FishMap/files/10982408/outputs_fishmap_highres.zip"
download.file(
url = ba_zip_file_url,
destfile = file.path(tmp_folder, "ba_output.zip")
)
unzip(
zipfile = file.path(tmp_folder, "ba_output.zip"),
exdir = file.path(tmp_folder, "ba_output")
)
ba_output_dir <- file.path(tmp_folder, "ba_output","outputs_fishmap_highres")
ba_output <- paste0(
list.files(
path = ba_output_dir,
full.names = TRUE
),
collapse = "\n"
)
glue::glue("The paths to Baptiste's output files are :\n {ba_output}")
## The paths to Baptiste's output files are :
## /tmp/RtmpMWf0bp/fishmap_highres5ded6e7c2666/ba_output/outputs_fishmap_highres/converge_output.rds
## /tmp/RtmpMWf0bp/fishmap_highres5ded6e7c2666/ba_output/outputs_fishmap_highres/obj_input.rds
## /tmp/RtmpMWf0bp/fishmap_highres5ded6e7c2666/ba_output/outputs_fishmap_highres/opt_output.rds
## /tmp/RtmpMWf0bp/fishmap_highres5ded6e7c2666/ba_output/outputs_fishmap_highres/report_output.rds
Results between ThinkR and Client’s are not perfectly identical.
Important note : To compare numerical results we will set a tolerance in numerical differences to 10e-4.
We find differences in function code present in the outputs (see
section on obj_inputs.rds
). This might indicate a
difference in package version.
cf. sessionInfo()
provided by Clients.
We will use again waldo within a function to display the exact differences for each file.
# Create a function to explore waldo's output file by file between ThinkR and Baptiste + Juliette outputs
compare_output_file <- function(file_name, author) {
if (author == "juliette") {
client_output_dir <- jc_output_dir
} else if (author == "baptiste") {
client_output_dir <- ba_output_dir
} else {
stop("author must be either juliette or baptiste")
}
# running waldo on one file (thinkR ~ client)
message(glue::glue("contrasting output of {file_name} between thinkr and {author}"))
compare_author <- waldo::compare(
x = readRDS(
file.path(client_output_dir, file_name)
),
y = readRDS(
file.path(thinkr1_output_dir, file_name)
),
x_arg = author,
y_arg = "thinkr",
max_diffs = 100,
tolerance = 10e-4
)
return(compare_author)
}
converge_output.rds
output
compare_output_file(file_name = "converge_output.rds", author = "baptiste")
## contrasting output of converge_output.rds between thinkr and baptiste
## Warning in gzfile(file, "rb"): cannot open compressed file '/home/rstudio/shared/outputs_fishmap_highres_rerun/converge_output.rds', probable reason 'No
## such file or directory'
## Error in gzfile(file, "rb"): cannot open the connection
compare_output_file(file_name = "converge_output.rds", author = "juliette")
## contrasting output of converge_output.rds between thinkr and juliette
## Warning in gzfile(file, "rb"): cannot open compressed file '/home/rstudio/shared/outputs_fishmap_highres_rerun/converge_output.rds', probable reason 'No
## such file or directory'
## Error in gzfile(file, "rb"): cannot open the connection
opt_output.rds
output
compare_output_file(file_name = "opt_output.rds", author = "baptiste")
## contrasting output of opt_output.rds between thinkr and baptiste
## Warning in gzfile(file, "rb"): cannot open compressed file '/home/rstudio/shared/outputs_fishmap_highres_rerun/opt_output.rds', probable reason 'No such
## file or directory'
## Error in gzfile(file, "rb"): cannot open the connection
compare_output_file(file_name = "opt_output.rds", author = "juliette")
## contrasting output of opt_output.rds between thinkr and juliette
## Warning in gzfile(file, "rb"): cannot open compressed file '/home/rstudio/shared/outputs_fishmap_highres_rerun/opt_output.rds', probable reason 'No such
## file or directory'
## Error in gzfile(file, "rb"): cannot open the connection
report_output.rds
output
compare_output_file(file_name = "report_output.rds", author = "baptiste")
## contrasting output of report_output.rds between thinkr and baptiste
## Warning in gzfile(file, "rb"): cannot open compressed file '/home/rstudio/shared/outputs_fishmap_highres_rerun/report_output.rds', probable reason 'No
## such file or directory'
## Error in gzfile(file, "rb"): cannot open the connection
compare_output_file(file_name = "report_output.rds", author = "juliette")
## contrasting output of report_output.rds between thinkr and juliette
## Warning in gzfile(file, "rb"): cannot open compressed file '/home/rstudio/shared/outputs_fishmap_highres_rerun/report_output.rds', probable reason 'No
## such file or directory'
## Error in gzfile(file, "rb"): cannot open the connection
obj_input.rds
output
compare_output_file(file_name = "obj_input.rds", author = "juliette")
## contrasting output of obj_input.rds between thinkr and juliette
## Warning in gzfile(file, "rb"): cannot open compressed file '/home/rstudio/shared/outputs_fishmap_highres_rerun/obj_input.rds', probable reason 'No such
## file or directory'
## Error in gzfile(file, "rb"): cannot open the connection
We encounter an error when contrasting the fn
and
env
element of the obj_input.rds
object
between Baptiste’s version and ThinkR’s version. We run
waldo element by element except for the object of class
environment
, with a specific parameter
(ignore_function_env = TRUE
) that ignore function
environment comparison and allows the comparison to succeed. This error
might come from a difference in package version (cf. comment at the
beginning of section).
file_name <- "obj_input.rds"
author <- "baptiste"
client_output_dir <- ba_output_dir
message(glue::glue("contrasting elemts of {file_name} element by element between thinkr and {author}"))
## contrasting elemts of obj_input.rds element by element between thinkr and baptiste
client_obj <- readRDS(
file.path(client_output_dir, file_name)
)
thinkr_obj <- readRDS(
file.path(thinkr1_output_dir, file_name)
)
## Warning in gzfile(file, "rb"): cannot open compressed file '/home/rstudio/shared/outputs_fishmap_highres_rerun/obj_input.rds', probable reason 'No such
## file or directory'
## Error in gzfile(file, "rb"): cannot open the connection
# select elements of list that are not a env class
list_is_env <- purrr::map_lgl(client_obj, ~ inherits(.x, "environment"))
list_names_not_env <- names(client_obj)[!list_is_env]
purrr::map(
.x = list_names_not_env,
~ waldo::compare(
x = client_obj[[.x]],
y = thinkr_obj[[.x]],
x_arg = author,
y_arg = "thinkr",
tolerance = 10e-6,
max_diffs = 100,
ignore_function_env = TRUE
)
) %>% setNames(list_names_not_env)
## Error in is_missing(y): object 'thinkr_obj' not found
# delete temporary folder
unlink(tmp_folder)