Modelling the rainfall occurrence in France
Let us use the SB distribution to model rain occurrence across France. The data used is extracted from the ECAD database.
It consists in daily rainfall occurrence at D=37 locations in France.
using SpatialBernoulli, Optimization,OptimizationOptimJL, Optim
using CSV,DataFrames
using Random
import ForwardDiff
Random.seed!(1234)Random.TaskLocalRNG()Getting the data
station_50Q = CSV.read("data/transformedECAD_stations.csv",DataFrame);
Yobs=Matrix(CSV.read("data/transformedECAD_Yobs.csv",header=false,DataFrame));
my_distance =Matrix(CSV.read("data/transformedECAD_locsdistances.csv",header=false,DataFrame));37×37 Matrix{Float64}:
0.0 389.896 307.699 482.271 344.316 … 317.574 422.329 184.346
389.896 0.0 173.655 155.985 725.256 654.644 543.313 571.883
307.699 173.655 0.0 327.626 605.781 616.454 369.69 468.686
482.271 155.985 327.626 0.0 826.539 692.529 696.731 665.99
344.316 725.256 605.781 826.539 0.0 345.157 486.874 162.749
394.101 783.497 678.832 871.133 101.807 … 302.446 587.462 212.348
293.731 627.068 483.636 749.883 169.486 440.745 317.394 181.57
234.259 546.886 401.97 672.817 230.258 439.144 272.019 177.973
300.506 456.444 286.283 604.072 417.315 586.314 122.598 344.704
207.579 498.035 476.966 532.98 398.37 159.817 626.323 257.754
⋮ ⋱ ⋮
461.379 71.9253 221.477 146.955 794.846 724.88 585.041 642.74
406.969 208.141 345.24 128.604 747.013 577.301 695.839 584.3
765.765 633.85 780.438 495.006 1058.97 … 773.231 1121.29 906.242
309.531 348.062 174.423 501.18 514.518 622.706 195.552 412.007
476.645 688.338 516.76 836.328 410.028 692.304 171.331 429.33
434.226 724.252 563.543 860.259 273.911 588.111 286.226 333.402
317.574 654.644 616.454 692.529 345.157 0.0 698.055 263.061
422.329 543.313 369.69 696.731 486.874 … 698.055 0.0 445.693
184.346 571.883 468.686 665.99 162.749 263.061 445.693 0.0First, we split the data in months. Explicit seasonality in the sense of periodic parameterisation is not treated here, see here for using this type of model in a periodic setting, or here for other parameterisations.
my_locations = hcat(station_50Q.LON_idx, station_50Q.LAT_idx)
nlocs = length(my_locations[:, 1])
select_month = function (m::Int64, dates, Y::AbstractMatrix)
indicesm = findall(month.(dates) .== m)
return Y[:, indicesm]
end
using Dates
date_start = Date(1973)
date_end = Date(2024) - Day(1)
every_year = date_start:Day(1):date_end
dates = every_year
Ymonths = [select_month(m, every_year, Yobs) for m in 1:12];12-element Vector{Matrix{Bool}}:
[0 0 … 1 0; 0 0 … 0 0; … ; 0 0 … 1 1; 0 0 … 0 0]
[0 0 … 0 0; 0 0 … 0 0; … ; 1 0 … 0 1; 0 0 … 0 0]
[1 1 … 1 1; 0 1 … 0 0; … ; 0 0 … 1 1; 1 0 … 1 1]
[0 1 … 0 0; 0 1 … 1 1; … ; 0 1 … 1 0; 1 1 … 0 0]
[1 1 … 0 0; 1 0 … 0 1; … ; 1 0 … 0 0; 1 1 … 0 0]
[1 0 … 0 1; 1 0 … 1 1; … ; 0 1 … 0 1; 1 1 … 0 1]
[0 0 … 0 1; 0 0 … 0 0; … ; 0 0 … 0 1; 0 0 … 0 1]
[1 0 … 0 1; 1 0 … 0 0; … ; 0 1 … 1 1; 1 1 … 0 1]
[0 0 … 0 1; 0 0 … 0 0; … ; 0 0 … 0 0; 0 0 … 0 1]
[0 0 … 1 1; 1 1 … 1 0; … ; 0 0 … 1 1; 0 0 … 1 1]
[0 0 … 1 1; 0 0 … 1 1; … ; 0 0 … 1 1; 0 0 … 1 1]
[0 0 … 1 1; 1 0 … 0 1; … ; 1 0 … 1 1; 0 0 … 1 1]Inference for each month
We can now estimate model parameters for each month, using the fit_mle_vfast method. The results are saved to the data folder. This is easily multi-thread-able.
using JLD2
using LineSearches
for imonth in 1:12
y = Ymonths[imonth]
n = length(y[1, :])
init_range = 600
init_order = 0.5
init_lambda = fill(0.4, nlocs)
init_d = SB(init_range, 1.0, init_order, init_lambda, my_distance)
#pairwise solution
tdist = maximum(my_distance) / 3
wp = 1.0 .* (my_distance .< tdist)
@time sol_fixednu = fit_mle_vfast(init_d, y, wp;solver = Optim.LBFGS(
linesearch = LineSearches.BackTracking()
)
, order=1/2,maxiters = 100, return_sol=false)
# Save to JLD2 file
save("data/vfastfitted_month_QMC100" * string(imonth) * ".jld2", Dict("d" => sol_fixednu))
end 7.739438 seconds (18.82 M allocations: 911.995 MiB, 5.44% gc time, 98.23% compilation time: <1% of which was recompilation)
0.139211 seconds (150.93 k allocations: 7.317 MiB)
0.222412 seconds (183.58 k allocations: 9.617 MiB)
0.136025 seconds (130.64 k allocations: 6.443 MiB)
0.156769 seconds (134.27 k allocations: 6.698 MiB)
0.173542 seconds (161.81 k allocations: 8.083 MiB)
0.259515 seconds (198.07 k allocations: 10.637 MiB)
0.169386 seconds (141.52 k allocations: 7.209 MiB)
0.137833 seconds (123.39 k allocations: 5.932 MiB)
0.172875 seconds (154.56 k allocations: 7.573 MiB)
0.158121 seconds (154.56 k allocations: 7.573 MiB)
0.155842 seconds (150.93 k allocations: 7.317 MiB)Visualise the results : parameters
vec_models_vfast = Vector{SB}(undef, 12)
for imonth in 1:12
vec_models_vfast[imonth] = load("data/vfastfitted_month_QMC100" * string(imonth) * ".jld2")["d"]
end
ntot = length(Yobs[1,:])
using CairoMakie
function plot_parameters(models::Vector{SB})
months=1:12
month_labels = ["Jan","Feb","Mar","Apr","May","Jun",
"Jul","Aug","Sep","Oct","Nov","Dec"]
ρ = [m.range for m in models]
λ = hcat([m.λ for m in models]...) # (n_sites × n_months)
fig = Figure(size = (900, 500))
ax1 = Axis(fig[1, 1],
xlabel = "Month",
ylabel = L"\rho",
title = L"Estimated range parameter $\rho$",
xticks = (months, month_labels)
)
scatter!(ax1, months, ρ)
lines!(ax1, months, ρ)
ax2 = Axis(fig[1, 2],
xlabel = "Month",
ylabel = L"\lambda_s",
title = L"Estimated rain probabilities $\lambda_s$",
xticks = (months, month_labels)
)
for s in 1:size(λ, 1)
lines!(ax2, months, λ[s, :], linewidth = 1, alpha = 0.6)
end
fig
end
plot_parameters(vec_models_vfast)
Visualise the results : comparing simulations to observations
The goal of stochastic weather generators is to be able to simulate quickly many plausible sequences of the meteorological variables, sharing the statistical properties of the observations.
An objective of this secific model is to accurately reproduce large-scale spatial events, such as widespread dry or wet days. We define the ROR indicator as ROR(n) = \frac{1}{D}\sum_{s=1}^D Y_s^{(n)} A low (high) value of ROR(n) denotes a dry (wet) day for many locations at the same time.
The distribution in the observations is compared to the distribution evaluated from many simulations, as well as the autocorrelation of this indicator, for each season.
Unsuprisingly, there is a clear lack of temporal dependance when using a simple SpatialBernoulli, suggesting the need for a structured model (such as HMMs! ). However, the distribution of the indicator itself is not that far off.
Nb = 10
D = 37
using StatsBase
using LinearAlgebra, NaNMath
begin
Ysvf = zeros(Bool, nlocs, ntot, Nb)
@time "Simulations Y" for i in 1:Nb
for t in 1:ntot
m = month(every_year[t])
Ysvf[:, t, i] = rand(vec_models_vfast[m]);
end
end
end
RRmax = 0
RORo = [mean(r .> RRmax) for r in eachcol(Yobs)]
RORs = [[mean(r .> RRmax) for r in eachcol(rr)] for rr in eachslice(Ysvf, dims=3)]
maxlag = 10
include("assets/utilities.jl")
begin
# Makie ROR distribution and autocorrelation (2×4 grid)
JJA = [6, 7, 8]
MAM = [3, 4, 5]
SON = [9, 10, 11]
DJF = [12, 1, 2]
SEASONS = [DJF, MAM, JJA, SON]
seasonname = ["DJF", "MAM", "JJA", "SON"]
idx_seasons = [findall(month.(every_year) .∈ tuple(season)) for season in SEASONS]
fig_ROR = Figure(fontsize=19)
wwww = 200
hhhh = 150
# Row 1: Distribution plots
for m in eachindex(idx_seasons)
row = 1
col = m
# Distribution subplot
ax_dist = Axis(fig_ROR[row, col],
xlabel="ROR",
ylabel=col == 1 ? "Distribution" : "",
title=seasonname[m],
width=wwww,
height=hhhh)
xax = 0:(1/D):1.0
xaxbin = vcat(xax, [1.01])
errorlinehist!(ax_dist, [RORs[i][idx_seasons[m]] for i in 1:Nb];
label="",
color=:gray,
secondarycolor=:gray, normalization=:probability,
bins=xaxbin,
errortype=:percentile,
percentiles=[0, 100],
alpha=0.5,
secondaryalpha=0.2,
centertype=:median)
scatter!(ax_dist, xax, [mean(RORo[idx_seasons[m]] .== x) for x in xax], color=:blue, markersize=6, label=m == 1 ? "Observations" : nothing)
ylims!(ax_dist, 0, 0.06)
col > 1 && hideydecorations!(ax_dist, grid=false)
# Autocorrelation subplot
row = 2
ax_acf = Axis(fig_ROR[row, col],
xlabel="Lag",
ylabel=col == 1 ? "ACF" : "",
width=wwww,
height=hhhh,
)
rorsim = [RORs[i][idx_seasons[m]] for i in 1:Nb]
acf_sim = [autocor(rorsim[i], 0:maxlag) for i in 1:length(rorsim)]
errorline!(ax_acf, 0:maxlag, stack(acf_sim, dims=1)',
color=:gray,
secondarycolor=:gray,
errortype=:percentile,
percentiles=[0, 100],
secondaryalpha=0.2,
centertype=:median)
# Observations
acf_obs = autocor(RORo, 0:maxlag)
scatter!(ax_acf, 0:maxlag, acf_obs, color=:blue, markersize=7)
col > 1 && hideydecorations!(ax_acf, grid=false)
end
Legend(
fig_ROR[:, 5],
[
[LineElement(color=:gray), PolyElement(color=:gray, alpha=0.2)],
MarkerElement(color=:blue, marker=:circle, markersize=8)
],
[
"SB",
"Observations"
]
)
resize_to_layout!(fig_ROR)
fig_ROR
end