dev-check-model-reproducibility-lowres.Rmd
This notebook gather the analysis performed in order to compare the
output of the original FishMap main.R
script between Client
and ThinkR. We will compare the results obtained with identical seeds
following the execution of the script
dev/run_main_and_save_output.R
.
Here, we use low resolution parameters :
k = 0.25
month_start <- 11
month_end <- 11
main.R
We generate the outputs on ThinkR machine. > make sure your
.Renviron variables FISHMAP_UPDATE_OUTPUTS
and
FISHMAP_OUTPUT_DIR
are correctly set
# Generate results (model files are compiled)
source(here::here("dev", "run_main_and_save_output.R"))
## Error in file(filename, "r", encoding = encoding): cannot open the connection
Note: model fit is perfomed in 118 steps in this run
Executing this code results in four outputs to be compared. Result are not showm here.
output_dir <- Sys.getenv("FISHMAP_OUTPUT_DIR")
list.files(path = file.path(output_dir))
## character(0)
To check whether the seed is effectively making the output
reproducible, we will run a second time main.R
with the
same seed. Result are not shown here.
# Create a temporary folder
tmp_folder <- tempfile(pattern = "fishmap_check")
dir.create(tmp_folder)
# Move ThinkR first execution output in temp folder
dir.create(file.path(tmp_folder, "thinkr_output"))
fs::file_move(
path = list.files(
path = output_dir,
full.names = TRUE
),
new_path = file.path(tmp_folder, "thinkr_output")
)
# Run main.R a second time (model files are already compiled from first run)
source(here::here("dev", "run_main_and_save_output.R"))
## Error in file(filename, "r", encoding = encoding): cannot open the connection
# Move ThinkR second execution in the temp folder
dir.create(file.path(tmp_folder, "thinkr_output_rerun"))
fs::file_move(
path = list.files(
path = output_dir,
full.names = TRUE
),
new_path = file.path(tmp_folder, "thinkr_output_rerun")
)
We list the resulting output files.
thinkr1_output_dir <- file.path(tmp_folder, "thinkr_output")
thinkr1_output <- paste0(
list.files(
path = thinkr1_output_dir,
full.names = TRUE
),
collapse = "\n"
)
glue::glue("The paths to ThinkR's second run output files are :\n {thinkr1_output}")
## The paths to ThinkR's second run output files are :
thinkr2_output_dir <- file.path(tmp_folder, "thinkr_output_rerun")
thinkr2_output <- paste0(
list.files(
path = thinkr2_output_dir,
full.names = TRUE
),
collapse = "\n"
)
glue::glue("The paths to ThinkR's first run output files are :\n {thinkr2_output}")
## The paths to ThinkR's first run output files are :
In order to contrast output files from both ThinkR runs, we will use the package waldo.
# list of output fiiles to contrast
files_to_contrast <- list.files(path = file.path(thinkr1_output_dir))
# running waldo on each files comparing thinkR runs
purrr::map(
.x = files_to_contrast,
~ waldo::compare(
x = readRDS(
file.path(thinkr1_output_dir, .x)
),
y = readRDS(
file.path(thinkr2_output_dir, .x)
)
)
) %>% setNames(files_to_contrast)
## named list()
Both ThinkR outputs are identical. We will use one of them to now compare with the Client’s output.
We now load the outputs generated from Clients (BA and JC) in a temporary folder.
# Download and unzip JC outputs from Git repo
jc_zip_file_url <- "https://github.com/balglave/FishMap/files/10897028/outputs_fishmap.zip"
download.file(
url = jc_zip_file_url,
destfile = file.path(tmp_folder, "jc_output.zip")
)
unzip(
zipfile = file.path(tmp_folder, "jc_output.zip"),
exdir = file.path(tmp_folder, "jc_output")
)
jc_output_dir <- file.path(tmp_folder, "jc_output")
jc_output <- paste0(
list.files(
path = jc_output_dir,
full.names = TRUE
),
collapse = "\n"
)
glue::glue("The paths to Juliette's output files are :\n {jc_output}")
## The paths to Juliette's output files are :
## /tmp/RtmpMWf0bp/fishmap_check5dedf4b1332/jc_output/converge_output.rds
## /tmp/RtmpMWf0bp/fishmap_check5dedf4b1332/jc_output/obj_input.rds
## /tmp/RtmpMWf0bp/fishmap_check5dedf4b1332/jc_output/opt_output.rds
## /tmp/RtmpMWf0bp/fishmap_check5dedf4b1332/jc_output/report_output.rds
# Download and unzip BA outputs from Git repo
ba_zip_file_url <- "https://github.com/balglave/FishMap/files/10912068/shared.zip"
download.file(
url = ba_zip_file_url,
destfile = file.path(tmp_folder, "ba_output.zip")
)
unzip(
zipfile = file.path(tmp_folder, "ba_output.zip"),
exdir = file.path(tmp_folder, "ba_output")
)
ba_output_dir <- file.path(tmp_folder, "ba_output", "shared", "outputs_fishmap")
ba_output <- paste0(
list.files(
path = ba_output_dir,
full.names = TRUE
),
collapse = "\n"
)
glue::glue("The paths to Baptiste's output files are :\n {ba_output}")
## The paths to Baptiste's output files are :
## /tmp/RtmpMWf0bp/fishmap_check5dedf4b1332/ba_output/shared/outputs_fishmap/converge_output.rds
## /tmp/RtmpMWf0bp/fishmap_check5dedf4b1332/ba_output/shared/outputs_fishmap/obj_input.rds
## /tmp/RtmpMWf0bp/fishmap_check5dedf4b1332/ba_output/shared/outputs_fishmap/opt_output.rds
## /tmp/RtmpMWf0bp/fishmap_check5dedf4b1332/ba_output/shared/outputs_fishmap/report_output.rds
Results between ThinkR and Client’s are not perfectly identical.
Important note : To compare numerical results we will set a tolerance in numerical differences to 10e-4.
We find differences in function code present in the outputs (see
section on obj_inputs.rds
). This might indicate a
difference in package version. Could you please
indicate the R
and TMB
package version you
used for generating the outputs ?
We will use again waldo within a function to display the exact differences for each file.
# Create a function to explore waldo's output file by file between ThinkR and Baptiste + Juliette outputs
compare_output_file <- function(file_name, author) {
if (author == "juliette") {
client_output_dir <- jc_output_dir
} else if (author == "baptiste") {
client_output_dir <- ba_output_dir
} else {
stop("author must be either juliette or baptiste")
}
# running waldo on one file (thinkR ~ client)
message(glue::glue("contrasting output of {file_name} between thinkr and {author}"))
compare_author <- waldo::compare(
x = readRDS(
file.path(client_output_dir, file_name)
),
y = readRDS(
file.path(thinkr1_output_dir, file_name)
),
x_arg = author,
y_arg = "thinkr",
max_diffs = 100,
tolerance = 10e-4
)
return(compare_author)
}
converge_output.rds
output
compare_output_file(file_name = "converge_output.rds", author = "baptiste")
## contrasting output of converge_output.rds between thinkr and baptiste
## Warning in gzfile(file, "rb"): cannot open compressed file '/tmp/RtmpMWf0bp/fishmap_check5dedf4b1332/thinkr_output/converge_output.rds', probable reason
## 'No such file or directory'
## Error in gzfile(file, "rb"): cannot open the connection
compare_output_file(file_name = "converge_output.rds", author = "juliette")
## contrasting output of converge_output.rds between thinkr and juliette
## Warning in gzfile(file, "rb"): cannot open compressed file '/tmp/RtmpMWf0bp/fishmap_check5dedf4b1332/thinkr_output/converge_output.rds', probable reason
## 'No such file or directory'
## Error in gzfile(file, "rb"): cannot open the connection
opt_output.rds
output
compare_output_file(file_name = "opt_output.rds", author = "baptiste")
## contrasting output of opt_output.rds between thinkr and baptiste
## Warning in gzfile(file, "rb"): cannot open compressed file '/tmp/RtmpMWf0bp/fishmap_check5dedf4b1332/thinkr_output/opt_output.rds', probable reason 'No
## such file or directory'
## Error in gzfile(file, "rb"): cannot open the connection
compare_output_file(file_name = "opt_output.rds", author = "juliette")
## contrasting output of opt_output.rds between thinkr and juliette
## Warning in gzfile(file, "rb"): cannot open compressed file '/tmp/RtmpMWf0bp/fishmap_check5dedf4b1332/thinkr_output/opt_output.rds', probable reason 'No
## such file or directory'
## Error in gzfile(file, "rb"): cannot open the connection
report_output.rds
output
compare_output_file(file_name = "report_output.rds", author = "baptiste")
## contrasting output of report_output.rds between thinkr and baptiste
## Warning in gzfile(file, "rb"): cannot open compressed file '/tmp/RtmpMWf0bp/fishmap_check5dedf4b1332/thinkr_output/report_output.rds', probable reason
## 'No such file or directory'
## Error in gzfile(file, "rb"): cannot open the connection
compare_output_file(file_name = "report_output.rds", author = "juliette")
## contrasting output of report_output.rds between thinkr and juliette
## Warning in gzfile(file, "rb"): cannot open compressed file '/tmp/RtmpMWf0bp/fishmap_check5dedf4b1332/thinkr_output/report_output.rds', probable reason
## 'No such file or directory'
## Error in gzfile(file, "rb"): cannot open the connection
obj_input.rds
output
compare_output_file(file_name = "obj_input.rds", author = "juliette")
## contrasting output of obj_input.rds between thinkr and juliette
## Warning in gzfile(file, "rb"): cannot open compressed file '/tmp/RtmpMWf0bp/fishmap_check5dedf4b1332/thinkr_output/obj_input.rds', probable reason 'No
## such file or directory'
## Error in gzfile(file, "rb"): cannot open the connection
We encounter an error when contrasting the fn
and
env
element of the obj_input.rds
object
between Baptiste’s version and ThinkR’s version. We run
waldo element by element except for the object of class
environment
, with a specific parameter
(ignore_function_env = TRUE
) that ignore function
environment comparison and allows the comparison to succeed. This error
might come from a difference in package version (cf. comment at the
beginning of section).
file_name <- "obj_input.rds"
author <- "baptiste"
client_output_dir <- ba_output_dir
message(glue::glue("contrasting elemts of {file_name} element by element between thinkr and {author}"))
## contrasting elemts of obj_input.rds element by element between thinkr and baptiste
client_obj <- readRDS(
file.path(client_output_dir, file_name)
)
thinkr_obj <- readRDS(
file.path(thinkr1_output_dir, file_name)
)
## Warning in gzfile(file, "rb"): cannot open compressed file '/tmp/RtmpMWf0bp/fishmap_check5dedf4b1332/thinkr_output/obj_input.rds', probable reason 'No
## such file or directory'
## Error in gzfile(file, "rb"): cannot open the connection
# select elements of list that are not a env class
list_is_env <- purrr::map_lgl(client_obj, ~ inherits(.x, "environment"))
list_names_not_env <- names(client_obj)[!list_is_env]
purrr::map(
.x = list_names_not_env,
~ waldo::compare(
x = client_obj[[.x]],
y = thinkr_obj[[.x]],
x_arg = author,
y_arg = "thinkr",
tolerance = 10e-6,
max_diffs = 100,
ignore_function_env = TRUE
)
) %>% setNames(list_names_not_env)
## Error in is_missing(y): object 'thinkr_obj' not found
# delete temporary folder
unlink(tmp_folder)