% Copyright 2010 Benedict Escoto
%
% This file is part of FAViR.
%
% FAViR is free software: you can redistribute it and/or modify it
% under the terms of the GNU General Public License as published by
% the Free Software Foundation, either version 2 of the License, or
% (at your option) any later version.
%
% FAViR is distributed in the hope that it will be useful, but WITHOUT
% ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
% or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
% License for more details.
%
% You should have received a copy of the GNU General Public License
% along with FAViR. If not, see .
<>=
library(favir)
InitPaper()
LookupDFMult <- function(df, ..., allow.NULL=FALSE) {
# Like LookupDF, but allow each argument to have multiple values
# The output will be a DF with one row for each element in the args
lookup.args <- data.frame(...)
result <- data.frame()
for (i in RowIndicies(lookup.args)) {
single.lookup.args <- c(list(df=df),
as.list(lookup.args[i,, drop=FALSE]),
list(allow.NULL=allow.NULL))
output <- do.call(LookupDF, single.lookup.args)
result <- rbind(result, output)
}
return(result)
}
TransposeDF <- function(list.of.lists) {
# Create data frame from a list of lists
#
# Each list should have the same names. If the large list has
# names, they will be used as the data frame row names.
num.rows <- length(list.of.lists)
if (identical(num.rows, 0))
return(data.frame())
first.list <- list.of.lists[[1]]
Assert(length(first.list) >= 1, "First element of list.of.lists is empty")
# Initialize df with right columns and row names
return.df <- list()
Assert(!any(is.null(names(first.list))), "Each list must have full names")
for (name in names(first.list))
return.df[[name]] <- rep(NA, num.rows)
return.df <- as.data.frame(return.df)
# Now populate row by row
for (i in 1:num.rows) {
cur.row <- list.of.lists[[i]]
for (name in names(cur.row))
return.df[i, name] <- cur.row[[name]]
}
# Add row names and return
rownames(return.df) <- names(list.of.lists)
return(return.df)
}
################### Example Input Data -- Replace this with your own
incurred.devel.df <- data.frame(origin = c(1995, 1996, 1997, 1998,
1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 1995, 1996, 1997, 1998,
1999, 2000, 2001, 2002, 2003, 2004,
2005, 1995, 1996, 1997, 1998, 1999,
2000, 2001, 2002, 2003, 2004, 1995,
1996, 1997, 1998, 1999, 2000, 2001,
2002, 2003, 1995, 1996, 1997, 1998,
1999, 2000, 2001, 2002, 1995, 1996,
1997, 1998, 1999, 2000, 2001, 1995,
1996, 1997, 1998, 1999, 2000, 1995,
1996, 1997, 1998, 1999, 1995, 1996,
1997, 1998, 1995, 1996, 1997, 1995,
1996, 1995),
dev = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 39, 39, 39, 39,
39, 39, 39, 39, 39, 51, 51, 51, 51,
51, 51, 51, 51, 63, 63, 63, 63, 63,
63, 63, 75, 75, 75, 75, 75, 75, 87,
87, 87, 87, 87, 99, 99, 99, 99, 111,
111, 111, 123, 123, 135),
value = c(44, 42, 17, 10, 13, 2, 4, 2,
3, 4, 21, 13, 1331, 1244, 1088, 781,
937, 751, 1286, 911, 1398, 1130,
915, 3319, 3508, 3438, 3135, 3506,
2639, 3570, 5023, 4021, 3981, 4020,
4603, 4169, 4085, 4828, 3622, 4915,
6617, 4825, 4232, 4842, 4371, 4442,
5447, 3931, 5377, 7194, 4252, 4970,
4482, 4777, 5790, 4077, 5546, 4334,
5059, 4626, 4914, 6112, 4244, 4369,
5083, 4734, 5110, 6295, 4386, 5155,
4794, 5176, 4395, 5205, 4804, 4401,
5205, 4399))
## Another possible incurred triangle
## incurred.devel.df <- data.frame(origin = c(1981, 1982, 1983, 1984,
## 1985, 1986, 1987, 1988, 1989, 1990,
## 1981, 1982, 1983, 1984, 1985, 1986,
## 1987, 1988, 1989, 1981, 1982, 1983,
## 1984, 1985, 1986, 1987, 1988, 1981,
## 1982, 1983, 1984, 1985, 1986, 1987,
## 1981, 1982, 1983, 1984, 1985, 1986,
## 1981, 1982, 1983, 1984, 1985, 1981,
## 1982, 1983, 1984, 1981, 1982, 1983,
## 1981, 1982, 1981),
## dev = c(9, 9, 9, 9, 9, 9, 9, 9, 9,
## 9, 21, 21, 21, 21, 21, 21, 21, 21,
## 21, 33, 33, 33, 33, 33, 33, 33, 33,
## 45, 45, 45, 45, 45, 45, 45, 57, 57,
## 57, 57, 57, 57, 69, 69, 69, 69, 69,
## 81, 81, 81, 81, 93, 93, 93, 105,
## 105, 117 ),
## value = c(5012L, 106L, 3410L, 5655L,
## 1092L, 1513L, 557L, 1351L, 3133L,
## 2063L, 8269L, 4285L, 8992L, 11555L,
## 9565L, 6445L, 4020L, 6947L, 5395L,
## 10907L, 5396L, 13873L, 15766L,
## 15836L, 11702L, 10946L, 13112L,
## 11805L, 10666L, 16141L, 21266L,
## 22169L, 12935L, 12314L, 13539L,
## 13782L, 18735L, 23425L, 25955L,
## 15852L, 16181L, 15599L, 22214L,
## 26083L, 26180L, 18009L, 15496L,
## 22863L, 27067L, 18608L, 16169L,
## 23466L, 18662L, 16704L, 18834L))
incurred.devel.tri <- as.triangle(incurred.devel.df)
incurred.devel.tri.df <- cbind(data.frame(origin=1995:2006),
as.data.frame(unclass(incurred.devel.tri)))
paid.devel.df <- data.frame(origin = c(1995, 1996, 1997, 1998, 1999,
2000, 2001, 2002, 2003, 2004, 2005,
2006, 1995, 1996, 1997, 1998, 1999,
2000, 2001, 2002, 2003, 2004, 2005,
1995, 1996, 1997, 1998, 1999, 2000,
2001, 2002, 2003, 2004, 1995, 1996,
1997, 1998, 1999, 2000, 2001, 2002,
2003, 1995, 1996, 1997, 1998, 1999,
2000, 2001, 2002, 1995, 1996, 1997,
1998, 1999, 2000, 2001, 1995, 1996,
1997, 1998, 1999, 2000, 1995, 1996,
1997, 1998, 1999, 1995, 1996, 1997,
1998, 1995, 1996, 1997, 1995, 1996,
1995),
dev = c(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 39, 39, 39, 39, 39, 39, 39, 39,
39, 51, 51, 51, 51, 51, 51, 51, 51, 63,
63, 63, 63, 63, 63, 63, 75, 75, 75, 75,
75, 75, 87, 87, 87, 87, 87, 99, 99, 99,
99, 111, 111, 111, 123, 123, 135),
value = c(3, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
1, 503, 465, 534, 329, 399, 328, 443,
401, 326, 524, 323, 2474, 2621, 2541,
2204, 2496, 1849, 2566, 3078, 2372,
2784, 3719, 4122, 3807, 3673, 4304,
3124, 4208, 5459, 4132, 4094, 4618,
4192, 4242, 5197, 3693, 5074, 6748,
4194, 4882, 4374, 4616, 5674, 3966,
5474, 4303, 4997, 4544, 4827, 6031,
4164, 4350, 5041, 4679, 5051, 6244,
4382, 5111, 4761, 5145, 4394, 5172,
4787, 4394, 5191, 4398))
paid.devel.tri <- as.triangle(paid.devel.df)
paid.devel.tri.df <- cbind(data.frame(origin=1995:2006),
as.data.frame(unclass(paid.devel.tri)))
premium.df <- data.frame(origin=1995:2006, premium=rep(6000, 12))
premium.df$apriori.loss <- 0.8*premium.df$premium
premium.df$apriori.ratio <- premium.df$apriori.loss / premium.df$premium
################### LDF Selection functions
LDF.GetAgeToAge <- function(devel.df) {
# Return dataframe of all age-to-age LDFs
#
# Arg: development dataframe with origin, dev, and value fields
#
# Output: Dataframe of LDFs with fields origin, start.dev, end.dev,
# and LDF.
sorted.df <- devel.df[order(devel.df$origin, devel.df$dev), ]
len <- nrow(sorted.df)
Assert(len >= 2, "devel.df has too few rows")
result <- data.frame(origin=c(), start.dev=c(), end.dev=c(), LDF=c())
for (i in 1:(len - 1)) {
cur.row <- sorted.df[i, ]
next.row <- sorted.df[i+1, ]
if (cur.row$origin != next.row$origin)
next
if (cur.row$value == 0 || is.na(cur.row$value) || is.na(next.row$value))
next
LDF <- next.row$value / cur.row$value
result <- rbind(result, list(origin=cur.row$origin, start.dev=cur.row$dev,
end.dev=next.row$dev, LDF=LDF))
}
return(result)
}
LDF.GetAgeToAgeMatrix <- function(age.df) {
# Return matrix of AgeToAge factors
#
# Arg: age-to-age data frame as produced by LDF.GetAgeToAge
# Output: "formatted" matrix of age-to-age factors
origins <- sort(unique(age.df$origin))
dev.df <- unique(data.frame(from=age.df$start.dev, to=age.df$end.dev))
def.df <- dev.df[order(dev.df$from, dev.df$to), ]
result <- matrix(nrow=length(origins), ncol=nrow(dev.df))
rownames(result) <- origins
colnames(result) <- paste(dev.df$from, "to", dev.df$to)
for (i in seq(along=origins)) {
origin <- origins[i]
for (j in seq(along=colnames(result))) {
LDF <- LookupDF(age.df, origin=origin, start.dev=dev.df$from[j],
end.dev=dev.df$to[j], allow.NULL=TRUE)$LDF
if (!is.null(LDF))
result[i, j] <- LDF
}
}
return(result)
}
LDF.getAverages <- function(triangle.df, LDF.df) {
# Return DF showing different ways of averaging the LDFs
#
# Args: Two data frames, one with raw losses, and the other of LDFs,
# like the kind produced by LDF.GetAgeToAge
#
# Output: A data frame with the fields method (char), start.dev
# (number), end.dev (number), and LDF (number)
GetLDFs <- function(start.dev, end.dev) {
# Get data frame of matching LDFs from LDF.df, sorted by origin
sub.df <- LDF.df[LDF.df$start.dev == start.dev &
LDF.df$end.dev == end.dev, ]
return(sub.df[order(sub.df$origin), ])
}
GetOrigins <- function(start.dev, end.dev) {
# Return DF of start.value, end.value matching given start and end devs
start.df <- triangle.df[triangle.df$dev == start.dev, ]
end.df <- triangle.df[triangle.df$dev == end.dev, ]
joined.df <- merge(start.df, end.df, by="origin")
pruned.df <- data.frame(origin=joined.df$origin,
start.value=joined.df$value.x,
end.value=joined.df$value.y)
return(pruned.df[order(pruned.df$origin), ])
}
AvgLast <- function(start.dev, end.dev, n) {
# Return straight average of last n LDFs
sub.df <- GetLDFs(start.dev, end.dev)
num.rows <- nrow(sub.df)
if (num.rows <= n)
return(mean(sub.df$LDF))
else return(mean(sub.df$LDF[(num.rows - n + 1):num.rows]))
}
AvgXHiLo <- function(start.dev, end.dev) {
# Return average, skipping the high and low LDFs (if at least 3)
LDFs <- sort(GetLDFs(start.dev, end.dev)$LDF)
if (length(LDFs) >= 3)
return(mean(LDFs[2:(length(LDFs) - 1)]))
return(mean(LDFs))
}
AvgWeighted <- function(start.dev, end.dev, n) {
# Return weighted average of last n LDFs
sub.df <- GetOrigins(start.dev, end.dev)
num.rows <- nrow(sub.df)
if (nrow(sub.df) > n)
sub.df <- sub.df[(num.rows - n + 1):num.rows, ]
if (sum(sub.df$start.value) == 0)
return(NA)
return(sum(sub.df$end.value) / sum(sub.df$start.value))
}
interval.df <- unique(data.frame(start.dev=LDF.df$start.dev,
end.dev=LDF.df$end.dev))
result.df <- data.frame(method=c(), start.dev=c(), end.dev=c(), LDF=c())
for (i in RowIndicies(interval.df)) {
start.dev <- interval.df$start.dev[i]
end.dev <- interval.df$end.dev[i]
avg <- data.frame(method="Average", start.dev=start.dev,
end.dev=end.dev, LDF=AvgLast(start.dev, end.dev, Inf))
avg.xhilo <- data.frame(method="Avg xHi,Lo", start.dev=start.dev,
end.dev=end.dev, LDF=AvgXHiLo(start.dev, end.dev))
avg.last5 <- data.frame(method="Avg Last 5", start.dev=start.dev,
end.dev=end.dev,
LDF=AvgLast(start.dev, end.dev, 5))
avg.weighted <- data.frame(method="Weighted Avg",
start.dev=start.dev, end.dev=end.dev,
LDF=AvgWeighted(start.dev, end.dev, Inf))
avg.wlast5 <- data.frame(method="Weighted Last 5",
start.dev=start.dev, end.dev=end.dev,
LDF=AvgWeighted(start.dev, end.dev, 5))
result.df <- rbind(result.df, avg, avg.xhilo, avg.last5, avg.weighted,
avg.wlast5)
}
return(result.df)
}
LDF.getAvgMatrix <- function(avg.df) {
# Return matrix of LDFs averaged in various ways
#
# Arg: a data frame of averages, as given by LDF.getAverages
#
# Output: a labeled matrix of averages
methods <- unique(avg.df$method)
dev.df <- unique(data.frame(from=avg.df$start.dev, to=avg.df$end.dev))
dev.df <- dev.df[order(dev.df$from, dev.df$to), ]
result <- matrix(nrow=length(methods), ncol=nrow(dev.df))
rownames(result) <- methods
colnames(result) <- paste(dev.df$from, "to", dev.df$to)
for (i in seq(along=methods)) {
method <- methods[i]
for (j in seq(along=colnames(result))) {
avg <- LookupDF(avg.df, method=method, start.dev=dev.df$from[j],
end.dev=dev.df$to[j], allow.NULL=TRUE)$LDF
if (!is.null(avg))
result[i, j] <- avg
}
}
return(result)
}
LDF.AgeToUlt <- function(age.to.age.df, tail.factor) {
# Return data frame of LDFs to ultimate
#
# Args: a data frame of age to age factors, as produced by
# LDF.getAgeToAge, and a tail factor to ultimate
#
# Output: a data frame with fields dev (age) and LDF (to ultimate)
Assert(length(unique(age.to.age.df$start.dev)) == nrow(age.to.age.df))
sorted.df <- age.to.age.df[order(-age.to.age.df$start.dev), ]
df <- data.frame(dev=c(sorted.df$end.dev[1], sorted.df$start.dev),
LDF=cumprod(c(tail.factor, sorted.df$LDF)))
return(df[order(df$dev), ])
}
LDF.AgeToUltFDF <- function(age.to.ult.df, label="LDFs to Ultimate",
textsize="normalsize",
caption="Selected LDFs to Ultimate") {
# Return printable display of final age to ultimate LDFs
l <- c(list("LDFs to Ultimate"), as.list(age.to.ult.df$LDF))
fdf <- FavirDF(as.data.frame(l), label=label, caption=caption)
names(fdf) <- c("X", as.character(age.to.ult.df$dev))
headings <- as.character(age.to.ult.df$dev)
names(headings) <- as.character(age.to.ult.df$dev)
FieldHeadings(fdf) <- c(list(X=""), headings)
FieldGroup(fdf, "LDF") <- 2:length(fdf)
GroupHeading(fdf, "LDF") <- "Development Age"
TextSize(fdf) <- textsize
return(fdf)
}
###################### Tail Methods
Tail.McClenahan <- function(LDFs) {
# Compute tail method according to McClenahan's method
#
# Args: LDFs, a vector of age to age devel factors
#
# Output: single age to ultimate tail factor computed using
# McClenahan's method
LDFs <- pmax(LDFs, 1.0)
if (all(LDFs <= 1.000001))
return(1.0)
# This method doesn't work well with LDFs near zero, so bump them up
LDFs <- ifelse(LDFs <= 1.000001, 1.000001, LDFs)
cumulative.loss <- cumprod(c(1.0, LDFs))
inc.loss <- (cumulative.loss[2:length(cumulative.loss)] -
cumulative.loss[1:(length(cumulative.loss) - 1)])
tail.model <- lm(log(inc.loss) ~ seq(along=inc.loss))
r <- exp(coef(tail.model)[2])
remaining.loss <- sum(inc.loss[length(inc.loss)]*r^(1:100))
return(1 + remaining.loss / Last(cumulative.loss))
}
Tail.ModMcClenahan <- function(devs, cumulative) {
# Get tail function according to modified McClenahan's method
#
# This is a modified version of McClenahan's function. Unlike some
# tail functions, it allows the date of loss developments to be
# given explicitly, and returns a function extrapolating future
# payments.
#
# Args: devs, a vector of ages (e.g. accident year age in months)
# cumulative, a vector of cumulative payments at each age
#
# Output: a function from development age to cumulative loss.
Assert(length(devs) >= 2 && length(devs) == length(cumulative))
len <- length(devs)
df <- data.frame(dev=devs, cumulative=cumulative)
start <- c(Last(df$cumulative), 1/Last(df$dev))
names(start) <- c("ult", "r")
cum.model <- nls(cumulative ~ ult * (1 - exp(-r*dev)), df, start=start)
# Fit to last payment
r <- coef(cum.model)["r"]
ult <- coef(cum.model)["ult"]
pred.last <- ult * (1 - exp(-r*Last(devs)))
actual.last <- Last(cumulative)
# Now construct tail function
tail.func <- function(dev) {
# Return cumulative paid to date given dev
return(ult * actual.last / pred.last * (1 - exp(-r*dev)))
}
return(tail.func)
}
Tail.ExponentialDecay <- function(LDFs) {
# Compute tail method by the exponential decay method, with Boor's
# "improvement 2"
#
# Args: LDFs, a vector of age to age devel factors
#
# Output: single age to ultimate tail factor computed by assuming
# exponential decay of LDFs.
LDFs <- pmax(LDFs, 1.0)
if (all(LDFs <= 1.000001))
return(1.0)
# This method doesn't work well with LDFs near zero, so bump them up
LDFs <- ifelse(LDFs <= 1.000001, 1.000001, LDFs)
tail.model <- lm(log(LDFs - 1) ~ seq(along=LDFs))
r <- exp(coef(tail.model)[2])
future.LDFs <- 1 + (Last(LDFs)-1)*r^(1:100)
return(prod(future.LDFs))
}
Tail.Sherman <- function(LDFs, future.links=15) {
# Compute tail method by Sherman's method
#
# Args: LDFs, a vector of age to age devel factors
# future.links, the number of years to extrapolate to
#
# Output: single age to ultimate tail factor computed by assuming
# power law governs LDF decay
LDFs <- pmax(LDFs, 1.0)
if (all(LDFs <= 1.000001))
return(1.0)
# This method doesn't work well with LDFs near zero, so bump them up
LDFs <- ifelse(LDFs <= 1.000001, 1.000001, LDFs)
log.year <- log(seq(along=LDFs))
tail.model <- lm(log(LDFs - 1) ~ log.year)
future.indep <- data.frame(log.year=log(Last(seq(along=LDFs)) +
0:future.links))
future.LDFs <- 1 + exp(predict(tail.model, newdata=future.indep))
# Now fit future.LDFs to last link and cap at 1.0
tail <- (prod(future.LDFs[2:length(future.LDFs)]) *
(Last(LDFs) / future.LDFs[1]))
return(max(tail, 1.0))
}
Tail.Summary <- function(LDF.age.to.age.df) {
# Return matrix summarizing tail results
#
# Args: data frame with fields start.dev, end.dev, and LDF
# Output: matrix with one column and named rows
LDFs <- LDF.age.to.age.df$LDF
mcclenahan <- Tail.McClenahan(LDFs)
ModMcc <- Tail.ModMcClenahan(LDF.age.to.age.df$end.dev, cumprod(LDFs))
mod.mcc.tail <- ModMcc(1e6) / ModMcc(Last(LDF.age.to.age.df$end.dev))
expon <- Tail.ExponentialDecay(LDFs)
sherman <- Tail.Sherman(LDFs)
result <- matrix(c(mcclenahan, mod.mcc.tail, expon, sherman), nrow=4)
colnames(result) <- "Tail Factor to Ultimate"
rownames(result) <- c("McClenahan Method (exponential)",
"Modified McClenahan Method",
"Exponential Decay of LDFs to 1.0",
"Sherman Method (inverse power law)")
return(result)
}
############################# LDF Interpolation
LDF.MakeInterpolate <- function(LDFs, devs, TailCumulative) {
# Use simple algorithm to interpolate LDFs
#
# Input: vectors of LDFs to ultimate and development ages (same length)
# Also takes a tail function that computes cumulative at dev.
#
# Output: Function from development age(s) to LDF to ultimate(s).
Assert(length(LDFs) >= 2 && length(LDFs) == length(devs))
# Make tail function
tail.scale <- Last(LDFs) * TailCumulative(Last(devs))
TailLDFs <- function(dev) tail.scale / TailCumulative(dev)
# Make head function
power.b <- log((LDFs[1] - 1) / (LDFs[2] - 1)) / log(devs[1] / devs[2])
power.a <- (LDFs[1] - 1) / devs[1] ^ power.b
head.func <- function(dev) (power.a * dev ^ power.b) + 1
# For body, add extra point at head and tail for smoothness
prefix <- min(devs) * .99
suffix <- max(devs) * 1.01
body.func <- splinefun(c(prefix, devs, suffix),
c(head.func(prefix), LDFs, TailLDFs(suffix)))
InterpolateLDF <- function(dev.vec) {
# Return interpolated LDF given development age
return(ifelse(dev.vec < min(devs), head.func(dev.vec),
ifelse(dev.vec > max(devs), TailLDFs(dev.vec),
body.func(dev.vec))))
}
return(InterpolateLDF)
}
LDF.getUlt <- function(LDF.selected) {
# Return matrix summarizing selected LDFs
LDF.ultimate <- LDF.selected
for (i in (length(LDF.ultimate)-1):1)
LDF.ultimate[i] <- LDF.ultimate[i+1] * LDF.selected[i]
pct.reported <- 100/LDF.ultimate
result <- rbind(LDF.selected, LDF.ultimate, pct.reported)
rownames(result) <- c('Selected LDFs', 'LDFs to Ult', '% Reported')
return (result)
}
Latest <- function(df.tri) {
# Return the latest diagonal in data frame format from a data frame triangle
helper <- function(origin) {
# Return dev and value as list for last eval given origin
sub.df <- df.tri[df.tri$origin==origin, ]
if (nrow(sub.df) < 1)
return(list(origin=NA, dev=NA, value=NA))
return(LookupDF(df.tri, origin=origin, dev=max(sub.df$dev)))
}
origins <- unique(df.tri$origin)
return(TransposeDF(Map(helper, origins)))
}
######################### Basic method functions
RemoveNARows <- function(df) {
# Remove any row with an NA from a data frame and return result
for (i in seq(along=df))
df <- df[!is.na(df[[i]]), ]
return(df)
}
ChainladderFDF <- function(latest.df, LDF.selected.ult) {
# Return a favir data frame indicating ultimate for each origin
# using Chain Ladder on
result <- latest.df
result$ult.ldf <- LookupDFMult(LDF.selected.ult, dev=latest.df$dev)$LDF
result$pct.devel <- 1 / result$ult.ldf
result$ult <- result$value * result$ult.ldf
result <- FavirDF(result)
FieldHeadings(result) <- list(dev="Development Age", value="Latest Diagonal",
ult.ldf="LDF to Ultimate",
pct.devel="Percent Developed",
ult="Ultimate Loss")
FieldGroup(result, "money") <- c("value", "ult")
FieldGroup(result, "origin") <- "origin"
FieldGroup(result, "dev") <- "dev"
FieldGroup(result, "LDF") <- "ult.ldf"
FieldGroup(result, "percent") <- "pct.devel"
return(result)
}
Bornhuetter <- function(LDFs, latest, apriori) {
# Return a vector of Bornhuetter-Ferguson developed ultimate losses
#
# Args: LDFs: a vector of LDFs to ultimate
# latest: a vector of latest loss valuations matching LDFs
# apriori: a vector of pre-data loss estimates
Assert(length(LDFs) == length(latest) && length(latest) == length(apriori))
return((1 - 1/LDFs) * apriori + latest)
}
BornhuetterFDF <- function(latest.df, LDF.selected.ult, apriori.byyear) {
# Return a favir data frame indicating ultimate for each origin
# using Bornhuetter-Ferguson
df <- latest.df
df$ult.ldf <- LookupDFMult(LDF.selected.ult, dev=latest.df$dev)$LDF
df$pct.devel <- 1 / df$ult.ldf
df$a.priori <- LookupDFMult(apriori.byyear,
origin=latest.df$origin)$apriori.loss
df$ult <- Bornhuetter(df$ult.ldf, df$value, df$a.priori)
fdf <- FavirDF(df)
FieldHeadings(fdf) <- list(dev="Development Age", value="Latest Diagonal",
ult.ldf="LDF to Ultimate",
pct.devel="Percent Developed",
a.priori="A Priori Loss",
ult="BF Ultimate Loss")
FieldGroup(fdf, "origin") <- "origin"
FieldGroup(fdf, "money") <- c("value", "a.priori", "ult")
FieldGroup(fdf, "dev") <- "dev"
FieldGroup(fdf, "percent") <- "pct.devel"
return(fdf)
}
CapeCod <- function(latest.df, LDF.selected.ult, premium.byyear) {
# Run CapeCod method to compute ultimate losses
#
# Args: Two data frames, one that has the latest evaluations by year,
# another with matching LDFs
#
# Output: a favir data frame with the fields
# premium - the premium used for the computation
# used.premium - the Cape Cod "used up premium" for each year
# exp.ratio - the ratio of loss to used premium
df <- data.frame(origin=latest.df$origin, value=latest.df$value)
df$LDF <- LookupDFMult(LDF.selected.ult, dev=latest.df$dev)$LDF
df$premium <- LookupDFMult(premium.byyear, origin=latest.df$origin)$premium
df$used.premium <- df$premium / df$LDF
df$exp.ratio <- df$value / df$used.premium
fdf <- FavirDF(df)
FieldHeadings(fdf) <- list(value="Latest Diagonal", LDF="LDF to Ultimate",
premium="Total Premium",
used.premium="Used-Up Premium",
exp.ratio="Expected Loss Ratio")
FieldGroup(fdf, "origin") <- "origin"
FieldGroup(fdf, "money") <- c("value", "premium", "used.premium")
FieldGroup(fdf, "percent") <- "exp.ratio"
FieldGroup(fdf, "LDF") <- "LDF"
return(fdf)
}
MatrixToFDF <- function(matrix, ..., rowname.col=NULL) {
# Convenience function: convert matrix in one step to favir data frame
#
# if row.name.col is not NULL, row names are added to the data frame
# under a separate name
df <- as.data.frame(matrix)
if (!is.null(rowname.col)) {
rowname.df <- data.frame(rownames(matrix))
names(rowname.df) <- rowname.col
df <- cbind(rowname.df, df)
}
return(FavirDF(df, ...))
}
PrintCombinedTable <- function(triangle.fdf, long.df, label="", caption="",
orientation="sideways", textsize=NULL) {
# Print the combined three-layer LDF table
#
# Args:
# triangle.fdf - a favir data frame
# long.df - the same triangle in long form
# label, caption, textsize, orientation - see arguments to FavirDF
#
# Output: A data frame of LDFs averaged in various ways
# It also prints the latex of the combined table.
combined.table.head <- triangle.fdf
Label(combined.table.head) <- label
Caption(combined.table.head) <- caption
if (!is.null(textsize))
TextSize(combined.table.head) <- textsize
LDF.age.to.age <- LDF.GetAgeToAge(long.df)
LDF.age.matrix <- LDF.GetAgeToAgeMatrix(LDF.age.to.age)
LDF.age.fdf <- MatrixToFDF(LDF.age.matrix, label="Simple LDFs",
rowname.col="origin")
FieldHeadings(LDF.age.fdf) <- list(origin=origin.field.heading)
FieldGroup(LDF.age.fdf, "LDF") <- 2:length(LDF.age.fdf)
GroupHeading(LDF.age.fdf, "LDF") <- "Age to Age Loss Development Factors"
LDF.avg.df <- LDF.getAverages(long.df, LDF.age.to.age)
LDF.avg.fdf <- MatrixToFDF(LDF.getAvgMatrix(LDF.avg.df), rowname.col="method",
orientation=orientation, label="Average LDFs")
FieldHeadings(LDF.avg.fdf) <- list(method="")
FieldGroup(LDF.avg.fdf, "LDF") <- 2:length(LDF.age.fdf)
GroupHeading(LDF.avg.fdf, "LDF") <- "Averaged Age-to-Age LDFs"
print(RBindPreLatex(PreLatexFDF(combined.table.head), PreLatexFDF(LDF.age.fdf),
PreLatexFDF(LDF.avg.fdf)))
return(LDF.avg.df)
}
MakeSummaryFDFs <- function(summary.df, origin.field.heading) {
# Return summary FDFs, possibly splitting them between multiple tables
maxcols <- 5
summary.origins <- unique(summary.df$origin)
summary.methods <- unique(summary.df$method)
origin.groups <- split(summary.origins,
(seq(along=summary.origins) - 1) %/% maxcols)
MakeFDF <- function(origins) {
# Return FDF with columns for the origins given
subdf <- summary.df[summary.df$origin %in% origins, ]
summary.wide <- reshape::recast(subdf, method ~ origin,
id.var=c("method", "origin"))
Assert(nrow(summary.wide) == length(summary.methods))
Assert(ncol(summary.wide) >= 2 && ncol(summary.wide) <= maxcols + 1)
fdf <- FavirDF(summary.wide, label="summary",
caption="Multi-method Development Summary")
FieldGroup(fdf, "money") <- 2:length(fdf)
FieldHeadings(fdf) <- list(method="Method")
if (Last(names(fdf)) == "Total" && length(fdf) > 2) {
FieldGroup(fdf, "origins") <- 2:(length(fdf) - 1)
GroupHeading(fdf, "origins") <- paste("Ultimate by", origin.field.heading)
} else if (Last(names(fdf)) != "Total") {
FieldGroup(fdf, "origins") <- 2:length(fdf)
GroupHeading(fdf, "origins") <- paste("Ultimate by", origin.field.heading)
}
return(fdf)
}
return(lapply(origin.groups, MakeFDF))
}
################################## Functions for regression reserving
MakeDevModels <- function(dev, mack.lm) {
# Given the mack lm model, produce models with intercepts
#
# Args:
# dev - development period name
# mack.lm - the lm object produced by the MackChainLadder function
#
# Output: A list with three elements
# link.lm - the default mack model
# intercept.lm - a lm object with just an intercept term
# both.lm - model with both intercept and link
df <- data.frame(increment=mack.lm$model$y - mack.lm$model$x,
start=mack.lm$model$x, weights=mack.lm$weights)
return(list(dev=dev,
link.lm=lm(increment ~ start + 0, weights = weights, data=df),
intercept.lm=lm(increment ~ 1, weights = weights, data=df),
both.lm=lm(increment ~ start, weights = weights, data=df)))
}
GetLinkDF <- function(dev, link.lm, intercept.lm, both.lm) {
# Return data frame about fitting a single development period
#
# Args:
# dev - development period as a constant
# link.lm - the lm object produced by the MackChainLadder function
# intercept.lm - lm object with just an intercept term
# both.lm - lm object with both link and intercept
#
# Output: a data frame with these fields:
# dev - the development period description
# start - the initial loss amount
# actual - the actual ending loss amount
# link.only - the fitted final loss amounts using the link-only model
# both - fitted losses from model with intercept and link
# intercept.only - fitted losses from model with just intercept
starting.loss <- link.lm$model$start
predict.df <- data.frame(start=c(0, starting.loss))
df <- data.frame(dev=dev, start=c(0, starting.loss),
actual=c(NA, link.lm$model$increment))
df$link.only <- predict(link.lm, predict.df)
df$intercept.only <- predict(intercept.lm, predict.df)
df$both <- predict(both.lm, predict.df)
return(df)
}
DevModelStats <- function(dev.model.list) {
# Return a list of statistics from a model list as produced by MakeDevModels
link.summary <- summary(dev.model.list$link.lm)
intercept.summary <- summary(dev.model.list$intercept.lm)
both.summary <- summary(dev.model.list$both.lm)
t.values <- both.summary$coefficients[, "t value"]
powers <- both.summary$coefficients[, "Pr(>|t|)"]
link.RS <- 1 - (link.summary$sigma / intercept.summary$sigma)^2
return(list(dev.period=dev.model.list$dev, link.RS=link.RS,
both.RS=both.summary$r.squared,
link.t=t.values[2], intercept.t=t.values[1],
link.power=powers[2], intercept.power=powers[1]))
}
FullTriangleDF <- function(half.tri, full.tri) {
# Return a full triangle data frame with 0's
#
# Args:
# half.tri - a half triangle in matrix form, with NAs where there are
# unobserved elements
#
# full.tri - a fully developed triangle. The "dev" column does not
# have to match the half.tri.
#
# Output: A data frame with the columns origin, dev, value, and
# actual. the dev column will be taken from half.tri. actual will
# be TRUE or FALSE depending on whether the value was in the half
# triangle. Additionally, the observed part will be offset
# backwards one period for plotting.
half.df <- as.data.frame(half.tri)
full.df <- as.data.frame(full.tri)
Assert("triangle" %in% class(half.tri) && "triangle" %in% class(full.tri),
"Must call FullTriangleDF with two triangle objects")
Assert(nrow(half.df) == nrow(full.df),
"Error in triangle manipulation---number of rows don't match")
Assert(all(names(full.df) == c("origin", "dev", "value")))
# Sort them both identically
half.df <- half.df[order(half.df$dev, half.df$origin), ]
full.df <- full.df[order(full.df$dev, full.df$origin), ]
full.df$dev <- half.df$dev # in case dev ages have been overwritten
full.df <- rbind(data.frame(origin=unique(full.df$origin), dev=0, value=0),
full.df)
dev.periods <- unique(full.df$dev)
GetObserved <- function(origin, dev) {
# Return TRUE or FALSE depending on whether the next development
# period after dev has been observed
if (dev == max(dev.periods))
return(FALSE)
next.dev <- dev.periods[min(which(dev.periods > dev))]
Assert(length(next.dev) == 1 && next.dev > dev)
next.value <- LookupDF(half.df, origin=origin, dev=next.dev)$value
return(!is.na(next.value))
}
# Now update with "observed" column
full.df$actual <- mapply(GetObserved, origin=full.df$origin, dev=full.df$dev)
return(full.df)
}
MackSummaryFDF <- function(mack.model, triangle.df, origin.field.heading) {
# Return an favir data frame summarizing the results of given mack model
mack.byorigin <- summary(mack.model)$ByOrigin
Assert(nrow(mack.byorigin) == nrow(triangle.df),
"Number of rows in Mack Output doesn't match")
new.col <- data.frame(origin=triangle.df$origin)
mack.fdf <- FavirDF(cbind(new.col, mack.byorigin))
FieldHeadings(mack.fdf) <- list(origin=origin.field.heading,
Latest="Latest Diagonal",
Dev.To.Date="Percent Developed",
Ultimate="Mack Ultimate",
IBNR="Bulk Reserve",
Mack.S.E="Mack Standard Error",
`CV(IBNR)`="CV of Bulk Reserves")
FieldGroup(mack.fdf, "money") <- c("Latest", "Ultimate", "IBNR",
"Mack.S.E")
FieldGroup(mack.fdf, "percent") <- "Dev.To.Date"
FieldGroup(mack.fdf, "origin") <- "origin"
mack.totals <- summary(mack.model)$Totals
SummaryRow(mack.fdf) <- list(origin="Total",
Latest=mack.totals["Latest:", ],
Ultimate=mack.totals["Ultimate:", ],
IBNR=mack.totals["IBNR:", ],
Mack.S.E=mack.totals["Mack S.E.:", ],
`CV(IBNR)`=mack.totals["CV(IBNR):", ])
return(mack.fdf)
}
MackResidualPlots <- function(mack.model, triangle.long) {
# Make Mack model residual plots
#
# Args:
# mack.model - the MackChainLadder object
# triangle.long - a triangle in long data frame format.
#
# Output:
# a list of models with names "loss", "origin", "cal", and "dev"
resid <- residuals(mack.model)
resid$dev.in.units <- sort(unique(triangle.long$dev))[resid$dev.period]
loss.plot <- (qplot(x=fitted.value, y=standard.residuals, data=resid)
+ geom_hline(aes(x=0), color=favir.colors["M2"])
+ geom_smooth(aes(x=fitted.value, y=standard.residuals), se=FALSE)
+ labs(x="Predicted Loss", y="Standardized Residual"))
origin.plot <- (ggplot(data=resid)
+ geom_point(aes(x=origin.period, y=standard.residuals))
+ geom_hline(aes(x=0), color=favir.colors["M2"])
+ geom_smooth(aes(x=origin.period, y=standard.residuals), se=FALSE)
+ labs(x=origin.field.heading, y="Standardized Residual")
+ xlim(range(resid$origin.period)))
cal.plot <- (qplot(x=cal.period, y=standard.residuals, data=resid)
+ geom_hline(aes(x=0), color=favir.colors["M2"])
+ geom_smooth(aes(x=cal.period, y=standard.residuals), se=FALSE)
+ labs(x="Calendar Year", y="Standardized Residual")
+ xlim(range(resid$origin.period)))
dev.plot <- (qplot(x=dev.in.units, y=standard.residuals, data=resid)
+ geom_hline(aes(x=0), color=favir.colors["M2"])
+ geom_smooth(aes(x=dev.in.units, y=standard.residuals), se=FALSE)
+ labs(x="Starting Development Age", y="Standardized Residual")
+ scale_x_continuous(breaks=resid$dev.in.units))
return(list(loss=loss.plot, origin=origin.plot, cal=cal.plot, dev=dev.plot))
}
RegressionModels <- function(triangle.df, premium.df) {
# Return a list of lists of alternate age-to-age models
#
# Args:
# triangle.df - a triangle in long data-frame form
# premium.df - the premium by year
#
# Output:
# A list with length the number of age-to-age development links.
# Each element in the list corresponds to the output of MakeDevModels
# Make a triangle of loss ratios, to adjust for changing premium amounts
triangle.df$value <- (triangle.df$value /
LookupDFMult(premium.df, origin=triangle.df$origin)$premium)
loss.ratio.tri <- as.triangle(triangle.df)
mack.lr.model <- ChainLadder::MackChainLadder(loss.ratio.tri)
Assert(length(mack.lr.model$Models) == ncol(loss.ratio.tri) - 1,
"Expected one model for each triangle column, except last")
model.names <- paste(head(colnames(loss.ratio.tri), -1), "to",
tail(colnames(loss.ratio.tri), -1))
alt.dev.models <- lapply(seq(along=mack.lr.model$Models),
function(i) MakeDevModels(model.names[i],
mack.lr.model$Models[[i]]))
names(alt.dev.models) <- model.names
return(alt.dev.models)
}
RegressionGraph <- function(alt.dev.models) {
# Return graph of regression results given list of models
#
# Args: list of models as produced by RegressionModels
#
# Output: Graph summarizing the regression
summary.regression.df <- do.call(rbind,
lapply(alt.dev.models,
function(model.list) do.call(GetLinkDF, model.list)))
color_manual_values <- c(favir.colors["A5"], favir.colors["M3"],
favir.colors["M4"])
names(color_manual_values) <- c("Link Only", "Intercept Only", "Both Terms")
reg.graph <- (ggplot(data=summary.regression.df) + facet_wrap(~ dev)
+ geom_line(aes(x=start, y=link.only, colour="Link Only"))
+ geom_line(aes(x=start, y=both, colour="Both Terms"))
+ geom_line(aes(x=start, y=intercept.only,
colour="Intercept Only"))
+ scale_colour_manual("Model", color_manual_values)
+ geom_point(aes(x=start, y=actual)))
ylim <- range(summary.regression.df$actual
[!is.na(summary.regression.df$actual)])
yrange <- ylim[2] - ylim[1]
reg.graph <- (reg.graph + labs(x="Starting Loss Ratio",
y="Increase in Loss Ratio")
+ coord_cartesian(ylim=c(ylim[1] - 0.05 * yrange,
ylim[2] + 0.05 * yrange))
+ scale_y_continuous(breaks=round(ggplot2::breaks(ylim,
"width", 4), 2)))
return(reg.graph)
}
RegressionStats <- function(alt.dev.models) {
# Return favir data frame of regression statistics
#
# Args: list of models as produced by RegressionModels
#
# Output: a favir data frame summarizing regression statistics
reg.stats.fdf <- TransposeDF(lapply(alt.dev.models, DevModelStats))
reg.stats.fdf <- FavirDF(reg.stats.fdf)
FieldHeadings(reg.stats.fdf) <- list(dev.period="Development Period",
link.RS="Link Only $R^2$ \\%",
both.RS="$R^2$ \\%",
intercept.t="Intercept $t$-Value",
link.t="Link $t$-Value",
intercept.power="Intercept $p$-val \\%",
link.power="Link Power $p$-val \\%")
FieldGroup(reg.stats.fdf, "both") <- 3:7
GroupHeading(reg.stats.fdf, "both") <- "Fit Results: Link and Intercept Model"
FieldGroup(reg.stats.fdf, "percent") <- c("link.RS", "both.RS",
"intercept.power", "link.power")
return(reg.stats.fdf)
}
@
<>=
# Produce the latex introduction
IncludePrelude("Basic Reserving Techniques", "Benedict Escoto")
@
\tableofcontents
\section{Introduction}
This paper is part of the FAViR series. The first part of the paper
presents various basic reserve development methods in R. These
methods include:
\begin{itemize}
\item Chain Ladder
\item Bornhuetter-Ferguson
\item Cape-Cod (Standard-Buhlmann)
\item Mack Chain Ladder
\item Munich Chain Ladder
\end{itemize}
\noindent The last two use code courtesy of Markus Gesmann and
estimate reserve uncertainty as well as the expected value. The
second part of the paper places these techniques in a popular
statistical evaluation \cite{brosius1993, venter1998, barnett2000}
framework and presents a couple of basic diagnostics which may
indicate which technique is more appropriate for the data in question.
Although the Chain Ladder and Bornhuetter-Ferguson family of reserving
methods are well-covered on the actuarial syllabus \cite{friedland2009},
this R implementation may be useful for several reasons. First, if R
is used for other methods, it may be convenient to use basic methods
in R as a check. Second, this paper may facilitate the production of
automated reserving reports. Third, basic reserving diagnostics and
uncertainty measurements can be time consuming to program and display.
\section{Original Data}
This chapter does not contain any techniques, but simply prints the
input data used for later methods. The reserving techniques in this
paper require only basic information:
\begin{enumerate}
\item Paid and case-incurred losses by development age and origin
\item Earned premium by origin
\item A priori loss by origin (for the Bornhuetter-Ferguson method)
\end{enumerate}
\noindent where ``origin'' can be accident year, policy year, etc.
<>=
origin.field.heading <- "Accident Year"
incurred.devel.tri.df <- FavirDF(incurred.devel.tri.df, orientation="sideways",
label="incurred tri",
caption="Incurred Loss Triangle")
FieldGroup(incurred.devel.tri.df, "origin") <- "origin"
FieldGroup(incurred.devel.tri.df, "money") <- 2:length(incurred.devel.tri.df)
FieldHeadings(incurred.devel.tri.df) <- list(origin=origin.field.heading)
GroupHeading(incurred.devel.tri.df,
"money") <- "Incurred Loss by Development Age"
print(incurred.devel.tri.df)
@
<>=
paid.devel.tri.df <- FavirDF(paid.devel.tri.df,
orientation="sideways", label="paid tri",
caption="Paid Loss Triangle")
FieldGroup(paid.devel.tri.df, "origin") <- "origin"
FieldGroup(paid.devel.tri.df, "money") <- 2:length(paid.devel.tri.df)
FieldHeadings(paid.devel.tri.df) <- list(origin=origin.field.heading)
GroupHeading(paid.devel.tri.df, "money") <- "Paid Loss by Development Age"
print(paid.devel.tri.df)
@
All the required data is shown in this section. Figure \ref{incurred
tri} is the input triangle showing incurred losses by accident year
and development month. Figure \ref{paid tri} is the corresponding
record of paid losses. Figure \ref{premium} shows the premium and a
priori loss estimates by accident year.
<>=
premium.field.heading <- "Earned Premium"
premium.fdf <- FavirDF(premium.df, label="premium",
caption="Premium and A Priori Loss")
FieldGroup(premium.fdf, "origin") <- "origin"
FieldGroup(premium.fdf, "money") <- c("premium", "apriori.loss")
FieldGroup(premium.fdf, "percent") <- "apriori.ratio"
FieldHeadings(premium.fdf) <- list(origin=origin.field.heading,
premium=premium.field.heading,
apriori.loss="A Priori Loss",
apriori.ratio="A Priori Loss Ratio")
SummaryRow(premium.fdf) <- list(origin="Avg",
premium=mean(premium.fdf$premium),
apriori.loss=mean(premium.fdf$apriori.loss),
apriori.ratio=mean(premium.fdf$apriori.ratio))
print(premium.fdf)
@
\clearpage
\section{Basic Methods}
This chapter includes the traditional Chain Ladder and
Bornhuetter-Ferguson methods. They are performed separately on paid
and case-incurred losses.
\subsection{LDF Selection}
Figure \ref{Combined Paid LDF Table} shows LDFs derived from paid loss
triangles in the traditional manner. Below we will use the weighted
average LDFs as our selected paid age-to-age factors. LDFs for
incurred loss are presented in figure \ref{Combined Incurred LDF Table}.
<>=
LDF.paid.avg.df <- PrintCombinedTable(paid.devel.tri.df, paid.devel.df,
textsize="tiny",
label="Combined Paid LDF Table",
caption="Traditional LDF Exhibit based on Paid Loss")
@
<>=
LDF.paid.selected.age.to.age.df <- LDF.paid.avg.df[LDF.paid.avg.df$method ==
"Weighted Avg", ]
@
<>=
LDF.inc.avg.df <- PrintCombinedTable(incurred.devel.tri.df, incurred.devel.df,
textsize="tiny",
label="Combined Incurred LDF Table",
caption="Traditional LDF Exhibit Based on Incurred Loss")
@
<>=
LDF.inc.selected.age.to.age.df <- LDF.inc.avg.df[LDF.inc.avg.df$method ==
"Weighted Avg", ]
@
\subsection{Tail Selection}
One family of methods estimates tail factors by fitting the age-to-age
factors for older years to various curves. The tail factor can be
found by extrapolating the curve to infinity. This section performs
this fitting separately for paid and incurred loss.
For paid loss, the factors in \ref{Combined Paid LDF Table} are used. The
trailing LDFs used for fitting are shown in figure \ref{Tail Paid Fit Factors}
and the results are shown in figure \ref{Tail Paid Results}.
For incurred loss, the factors are taken from \ref{Combined Incurred
LDF Table}. The trailing LDFs used for fitting are shown in figure
\ref{Tail Inc Fit Factors} and the results are shown in figure
\ref{Tail Inc Results}.
<>=
LDF.paid.tail.fitted.df <- Last(LDF.paid.selected.age.to.age.df, 4)
print(MatrixToFDF(LDF.getAvgMatrix(LDF.paid.tail.fitted.df),
label="Tail Paid Fit Factors",
caption="Tail Factors to Fit: Paid Loss"))
@
<>=
tail.paid.results.fdf <- MatrixToFDF(Tail.Summary(LDF.paid.tail.fitted.df),
rowname.col="Method",
label="Tail Paid Results",
caption="Results of Tail Fitting: Paid Loss")
print(tail.paid.results.fdf)
@
<>=
LDF.inc.tail.fitted.df <- Last(LDF.inc.selected.age.to.age.df, 4)
print(MatrixToFDF(LDF.getAvgMatrix(LDF.inc.tail.fitted.df),
label="Tail Inc Fit Factors",
caption="Tail Factors to Fit: Incurred Loss"))
@
<>=
tail.inc.results.fdf <- MatrixToFDF(Tail.Summary(LDF.inc.tail.fitted.df),
rowname.col="Method",
label="Tail Inc Results",
caption="Results of Tail Fitting: Incurred Loss")
print(tail.inc.results.fdf)
@
\subsection{Final LDF Selection}
Selecting the modified McClenahan tail factor, we arrive at the final
LDFs to ultimate. Paid LDFs are in figure \ref{LDFs to Ultimate
paid}; figure \ref{LDFs to Ultimate inc} has incurred LDFs to ultimate.
<>=
TailPaidCumulative <- Tail.ModMcClenahan(LDF.paid.tail.fitted.df$end.dev,
cumprod(LDF.paid.tail.fitted.df$LDF))
tail.paid.factor <- (TailPaidCumulative(1e6) /
TailPaidCumulative(Last(LDF.paid.tail.fitted.df$end.dev)))
LDF.paid.selected.ult <- LDF.AgeToUlt(LDF.paid.selected.age.to.age.df,
tail.paid.factor)
print(LDF.AgeToUltFDF(LDF.paid.selected.ult, textsize="tiny",
caption="Selected LDFs to Ultimate: Paid Loss",
label="LDFs to Ultimate paid"))
@
<>=
TailIncCumulative <- Tail.ModMcClenahan(LDF.inc.tail.fitted.df$end.dev,
cumprod(LDF.inc.tail.fitted.df$LDF))
tail.inc.factor <- (TailIncCumulative(1e6) /
TailIncCumulative(Last(LDF.inc.tail.fitted.df$end.dev)))
LDF.inc.selected.ult <- LDF.AgeToUlt(LDF.inc.selected.age.to.age.df,
tail.inc.factor)
print(LDF.AgeToUltFDF(LDF.paid.selected.ult, textsize="tiny",
caption="Selected LDFs to Ultimate: Incurred Loss",
label="LDFs to Ultimate inc"))
@
\subsection{Chain Ladder}
Figure \ref{chainladder paid} shows the results by accident year of
apply the basic chain ladder technique on paid losses. Figure
\ref{chainladder inc} shows the results by accident year of apply the
basic chain ladder technique on incurred losses.
<>=
chainladder.paid.fdf <- ChainladderFDF(Latest(paid.devel.df),
LDF.paid.selected.ult)
FieldHeadings(chainladder.paid.fdf)$origin <- origin.field.heading
Caption(chainladder.paid.fdf) <- "Results of Chain Ladder Method on Paid Loss"
Label(chainladder.paid.fdf) <- "chainladder paid"
print(chainladder.paid.fdf)
@
<>=
chainladder.inc.fdf <- ChainladderFDF(Latest(incurred.devel.df),
LDF.inc.selected.ult)
FieldHeadings(chainladder.inc.fdf)$origin <- origin.field.heading
Caption(chainladder.inc.fdf) <- "Results of Chain Ladder on Incurred Loss"
Label(chainladder.inc.fdf) <- "chainladder inc"
print(chainladder.inc.fdf)
@
\subsection{Bornhuetter-Ferguson}
Basic reserves by accident year according to the Bornhuetter-Ferguson
method applied to paid loss are shown in figure \ref{bf paid}. Figure
\ref{bf inc} is the corresponding incurred loss exhibit.
<>=
bf.paid.fdf <- BornhuetterFDF(Latest(paid.devel.df), LDF.paid.selected.ult,
premium.fdf)
FieldHeadings(bf.paid.fdf)$origin <- origin.field.heading
Caption(bf.paid.fdf) <- "Results of Bornhuetter-Ferguson Method on Paid Loss"
Label(bf.paid.fdf) <- "bf paid"
print(bf.paid.fdf)
@
<>=
bf.inc.fdf <- BornhuetterFDF(Latest(incurred.devel.df), LDF.inc.selected.ult,
premium.fdf)
FieldHeadings(bf.inc.fdf)$origin <- origin.field.heading
Caption(bf.inc.fdf) <- "Results of Bornhuetter-Ferguson Method on Incurred Loss"
Label(bf.inc.fdf) <- "bf inc"
print(bf.inc.fdf)
@
\subsection{Cape Cod (Stanard-Buhlmann)}
The Cape Cod technique has two stages. The first, picking a prior
loss ratio, is shown in figure \ref{capecod paid 1} for paid loss and
in figure \ref{capecod inc 1} for incurred loss. The resulting loss
ratio, as shown in the last row, is the ratio of the sum of latest
diagonals with the used-up premium.
This loss ratio is then used as the a priori loss ratio in the
Bornhuetter-Ferguson technique to determine the ultimate loss. Figure
\ref{capecod paid 2} demonstrates this for paid loss. Incurred loss
is shown in figure \ref{capecod inc 2}.
<>=
capecod.paid.fdf <- CapeCod(Latest(paid.devel.df), LDF.paid.selected.ult,
premium.fdf)
FieldHeadings(capecod.paid.fdf)$origin <- origin.field.heading
Caption(capecod.paid.fdf) <- "Cape Cod Loss Ratio Selection: Paid Loss"
Label(capecod.paid.fdf) <- "capecod paid 1"
capecod.paid.selected.lr <- (sum(capecod.paid.fdf[["value"]]) /
sum(capecod.paid.fdf[["used.premium"]]))
SummaryRow(capecod.paid.fdf) <- list(origin="Total",
value=sum(capecod.paid.fdf$value),
premium=sum(capecod.paid.fdf$premium),
used.premium=sum(capecod.paid.fdf$used.premium),
exp.ratio=capecod.paid.selected.lr)
print(capecod.paid.fdf)
@
This loss ratio is applied in figure \ref{capecod paid 2} on paid loss to
obtain the ultimate loss according to the Cape Cod method.
<>=
capecod.paid.apriori <- data.frame(origin=premium.df$origin,
apriori.loss=(premium.df$premium *
capecod.paid.selected.lr))
capecod.paid.ult.fdf <- BornhuetterFDF(Latest(paid.devel.df),
LDF.paid.selected.ult,
capecod.paid.apriori)
FieldHeadings(capecod.paid.ult.fdf)$origin <- origin.field.heading
FieldHeadings(capecod.paid.ult.fdf)$ult <- "Cape Cod Ultimate"
Caption(capecod.paid.ult.fdf) <- "Results of Cape Cod Method on Paid Loss"
Label(capecod.paid.ult.fdf) <- "capecod paid 2"
print(capecod.paid.ult.fdf)
@
<>=
capecod.inc.fdf <- CapeCod(Latest(incurred.devel.df), LDF.inc.selected.ult,
premium.fdf)
FieldHeadings(capecod.inc.fdf)$origin <- origin.field.heading
Caption(capecod.inc.fdf) <- "Cape Cod Loss Ratio Selection: Incurred Loss"
Label(capecod.inc.fdf) <- "capecod inc 1"
capecod.inc.selected.lr <- (sum(capecod.inc.fdf[["value"]]) /
sum(capecod.inc.fdf[["used.premium"]]))
SummaryRow(capecod.inc.fdf) <- list(origin="Total",
value=sum(capecod.inc.fdf$value),
premium=sum(capecod.inc.fdf$premium),
used.premium=sum(capecod.inc.fdf$used.premium),
exp.ratio=capecod.inc.selected.lr)
print(capecod.inc.fdf)
@
<>=
capecod.inc.apriori <- data.frame(origin=premium.df$origin,
apriori.loss=(premium.df$premium *
capecod.inc.selected.lr))
capecod.inc.ult.fdf <- BornhuetterFDF(Latest(incurred.devel.df),
LDF.inc.selected.ult,
capecod.inc.apriori)
FieldHeadings(capecod.inc.ult.fdf)$origin <- origin.field.heading
FieldHeadings(capecod.inc.ult.fdf)$ult <- "Cape Cod Ultimate"
Caption(capecod.inc.ult.fdf) <- "Results of Cape Cod Method on Incurred Loss"
Label(capecod.inc.ult.fdf) <- "capecod inc 2"
print(capecod.inc.ult.fdf)
@
\clearpage
\section{The ChainLadder Package}
This chapter uses the ChainLadder R package by Markus Gesmann. See\\
{\tt http://code.google.com/p/chainladder/} for more information on
this package.
\subsection{Mack Chain Ladder}
Thomas Mack derived in 1993 a very straightforward stochastic model
under which the traditional Chain Ladder method would be
reasonable.\cite{mack1993} Mack's model can be used to calculate the
standard deviation of bulk reserves.
\subsubsection{Paid Loss}
The results of Mack's Chain Ladder fitted model applied to paid loss
are summarized in figure \ref{mack paid summary}. For each origin
period, the expected ultimate should exactly match the simple chain
ladder results in figure \ref{chainladder paid}. The expected
development is graphed in figure \ref{mack paid plot}. Figure
\ref{mack paid residuals} shows standardized residuals with a
smoothing guide line. Because chain ladder methods choose different
factors for each development age, the development age factors should
be unbiased. However, if the other plots show any significant trends,
it may indicate that the assumptions behind the chain ladder method do
not hold. Barnett and Zehnwirth in \cite{barnett2000} discuss the
interpretation of residual plots.
<>=
mack.paid.model <- ChainLadder::MackChainLadder(
paid.devel.tri, est.sigma="Mack", tail=tail.paid.factor)
mack.paid.fdf <- MackSummaryFDF(mack.paid.model, incurred.devel.tri.df,
origin.field.heading)
Caption(mack.paid.fdf) <- paste("Mack Chain Ladder Results: Paid Loss")
Label(mack.paid.fdf) <- "mack paid summary"
print(mack.paid.fdf)
@
<>=
mack.full.paid.tri <- mack.paid.model$FullTriangle
if (Last(colnames(mack.full.paid.tri)) == "Inf")
mack.full.paid.tri <- as.triangle(mack.full.paid.tri[,
1:(ncol(mack.full.paid.tri) - 1)])
mack.paid.plot.df <- FullTriangleDF(paid.devel.tri, mack.full.paid.tri)
mack.paid.plot <- (ggplot(data=mack.paid.plot.df) +
geom_line(aes(x=dev, y=value, color=actual, group=origin)) +
scale_colour_manual("Status",
values=as.vector(favir.colors[c("M5", "M2")]),
labels=c("Actual", "Projected")) +
labs(x="Development Age", y="Loss") +
scale_x_continuous(breaks=unique(mack.paid.plot.df$dev)))
IncludeGraph(mack.paid.plot,
caption="Mack Actual and Predicted Development on Paid Loss",
label="mack paid plot", width=13, height=8)
@
<>=
mack.paid.resid.plots <- MackResidualPlots(mack.paid.model, paid.devel.df)
IncludeGrid(list("1.1"=mack.paid.resid.plots$loss,
"1.2"=mack.paid.resid.plots$origin,
"2.1"=mack.paid.resid.plots$cal,
"2.2"=mack.paid.resid.plots$dev),
width=16.5, height=14, label="mack paid residuals",
caption="Mack Model Residuals: Paid Loss")
@
\subsubsection{Incurred Loss}
The results of Mack's Chain Ladder fitted model applied to
case-incurred loss are summarized in figure \ref{mack inc summary}.
For each origin period, the expected ultimate should exactly match the
simple chain ladder results in figure \ref{chainladder inc}. As with
the paid residual plot, bias or trends in figure \ref{mack inc
residuals} may indicate a failure of model assumptions.
<>=
mack.inc.model <- ChainLadder::MackChainLadder(
incurred.devel.tri, est.sigma="Mack", tail=tail.inc.factor)
mack.inc.fdf <- MackSummaryFDF(mack.inc.model, incurred.devel.tri.df,
origin.field.heading)
Caption(mack.inc.fdf) <- paste("Mack Chain Ladder Results: Incurred Loss")
Label(mack.inc.fdf) <- "mack inc summary"
print(mack.inc.fdf)
@
<>=
mack.full.inc.tri <- mack.inc.model$FullTriangle
if (Last(colnames(mack.full.inc.tri)) == "Inf")
mack.full.inc.tri <- as.triangle(mack.full.inc.tri[,
1:(ncol(mack.full.inc.tri) - 1)])
mack.inc.plot.df <- FullTriangleDF(incurred.devel.tri, mack.full.inc.tri)
mack.inc.plot <- (ggplot(data=mack.inc.plot.df) +
geom_line(aes(x=dev, y=value, color=actual, group=origin)) +
scale_colour_manual("Status",
values=as.vector(favir.colors[c("M5", "M2")]),
labels=c("Actual", "Projected")) +
labs(x="Development Age", y="Loss") +
scale_x_continuous(breaks=unique(mack.inc.plot.df$dev)))
IncludeGraph(mack.inc.plot,
caption="Mack Actual and Predicted Development on Incurred Loss",
label="mack inc plot", width=13, height=8)
@
<>=
mack.inc.resid.plots <- MackResidualPlots(mack.inc.model, incurred.devel.df)
IncludeGrid(list("1.1"=mack.inc.resid.plots$loss,
"1.2"=mack.inc.resid.plots$origin,
"2.1"=mack.inc.resid.plots$cal,
"2.2"=mack.inc.resid.plots$dev),
width=16.5, height=14, label="mack inc residuals",
caption="Mack Model Residuals: Incurred Loss")
@
\subsection{Munich Chain Ladder}
The Munich Chain Ladder technique is also included in the ChainLadder
package by Markus Gesmann. Typically running chain ladder techniques
separately on paid and incurred triangles results in different
ultimate loss picks. The Munich Chain Ladder incorporates information
from both triangles when selecting LDFs. The results of the method
are shown in figure \ref{munich results}.
The central idea of the Munich Chain Ladder is that the paid/incurred
loss ratios at the beginning of each development period provide extra
information about the loss development in that period. For instance,
if the paid/incurred ratio is unusually low, greater than normal paid
development is more likely. Figure \ref{munich residuals} shows how
paid and incurred residuals depend on the previous ratios of paid to
incurred loss. Munich method adjusts the expected paid development
based on the slope of the line in the left graph. The expected
incurred development is adjusted by the right line's slope.
<>=
munich.model <- ChainLadder::MunichChainLadder(paid.devel.tri,
incurred.devel.tri,
tailP=tail.paid.factor, tailI=tail.inc.factor)
new.col <- data.frame(origin=incurred.devel.tri.df$origin)
munich.fdf <- FavirDF(cbind(new.col, summary(munich.model)$ByOrigin),
caption="Munich Chain Ladder Results",
label="munich results")
FieldGroup(munich.fdf, "origin") <- "origin"
FieldGroup(munich.fdf, "money") <- c("Latest Paid", "Latest Incurred",
"Ult. Paid", "Ult. Incurred")
FieldGroup(munich.fdf, "percent") <- c("Latest P/I Ratio", "Ult. P/I Ratio")
FieldHeadings(munich.fdf) <- list(origin=origin.field.heading,
"Ult. Paid"="Ultimate Paid",
"Ult. Incurred"="Ultimate Incurred",
"Latest P/I Ratio"="Latest P/I (\\%)",
"Ult. P/I Ratio"="Ultimate P/I (\\%)")
munich.totals <- summary(munich.model)$Totals
SummaryRow(munich.fdf) <- list(origin="Totals",
"Latest Paid"=munich.totals["Latest:", "Paid"],
"Latest Incurred"=munich.totals["Latest:", "Incurred"],
"Latest P/I Ratio"=munich.totals["Latest:", "P/I Ratio"],
"Ult. Paid"=munich.totals["Ultimate:", "Paid"],
"Ult. Incurred"=munich.totals["Ultimate:", "Incurred"],
"Ult. P/I Ratio"=munich.totals["Ultimate:", "P/I Ratio"])
print(munich.fdf)
@
<>=
munich.resid.df <- data.frame(paid=munich.model$PaidResiduals,
inc=munich.model$IncurredResiduals,
qinv=munich.model$QinverseResiduals,
q=munich.model$QResiduals)
munich.resid.df <- RemoveNARows(munich.resid.df)
munich.paid.plot <- (ggplot(data=munich.resid.df)
+ geom_hline(aes(x=0), color=favir.colors["M2"])
+ geom_vline(aes(x=0), color=favir.colors["M2"]))
munich.inc.plot <- (munich.paid.plot + geom_point(aes(x=q, y=inc))
+ geom_abline(intercept=0, slope=coef(munich.model$lambdaI))
+ labs(x="Paid/Incurred Residuals",
y="Incurred Loss Residuals"))
munich.paid.plot <- (munich.paid.plot + geom_point(aes(x=qinv, y=paid))
+ geom_abline(intercept=0,
slope=coef(munich.model$lambdaP))
+ labs(x="Incurred/Paid Residuals",
y="Paid Loss Residuals"))
IncludeGrid(list("1.1"=munich.paid.plot, "1.2"=munich.inc.plot),
width=16.5, height=8,
caption="Munich Chain Ladder Standardized Residuals",
label="munich residuals")
@
\clearpage
\section{Assumption Testing}
The choice of a development method and age-to-age factors can be
considered a special case of linear regression. Each development
period is a separate regression where loss development, the dependent
variable, depends on the starting loss, the independent variable.
Once reserving is construed as linear regression, we can use the
standard plots and measures of regression to test the assumptions of
our methods.
Figure \ref{regression paid graph} illustrates the results of running
three linear regressions on each age period's paid loss. Each
regression corresponds to a different reserving model. If the
Bornhuetter-Ferguson or Cape-Cod model is correct, the expected
development during each period is independent of the previous
development. Thus the regression line should be horizontal.
According to the Chain Ladder method, the development should be
proportional to the current total loss; thus the regression line is
sloped but should have no intercept term. Finally we can consider the
possibility that the expected development has both a slope and
intercept term.
Figure \ref{regression paid stats} shows common regression statistics
on paid loss by development period. The $R^2$ of the intercept-only
model will always be $0\%$ by definition. A positive $R^2$ for the
link-only (chain ladder) model means that it ``explains'' more of the
variation than the constant development model does. If we include
both an intercept and a link parameter, the $t$- and $p$- values of
each may indicate which fits the data better. The further the
$t$-value is away from 0 and the smaller the $p$-value, the more
important that parameter is to loss development.
Figures \ref{regression inc graph} and \ref{regression inc stats} are
the analogous exhibits covering regression on incurred loss.
<>=
dev.models.paid <- RegressionModels(paid.devel.df, premium.df)
regression.paid.graph <- RegressionGraph(dev.models.paid)
IncludeGraph(regression.paid.graph, width=16.5, height=14,
caption="Regression by Development Period: Paid Loss",
label="regression paid graph")
@
<>=
reg.paid.stats.fdf <- RegressionStats(dev.models.paid)
Caption(reg.paid.stats.fdf) <- "Regression Statistics: Paid Loss"
Label(reg.paid.stats.fdf) <- "regression paid stats"
print(reg.paid.stats.fdf)
@
<>=
dev.models.inc <- RegressionModels(incurred.devel.df, premium.df)
regression.inc.graph <- RegressionGraph(dev.models.inc)
IncludeGraph(regression.inc.graph, width=16.5, height=14,
caption="Regression by Development Period: Incurred Loss",
label="regression inc graph")
@
<>=
reg.inc.stats.fdf <- RegressionStats(dev.models.inc)
Caption(reg.inc.stats.fdf) <- "Regression Statistics: Incurred Loss"
Label(reg.inc.stats.fdf) <- "regression inc stats"
print(reg.inc.stats.fdf)
@
\clearpage
\section{Summary of Results}
This section simply compiles the results of the various methods
covered earlier. Figures \ref{summary} and following show the results
in tabular form, while figure \ref{summary plot} has the same
information in a bar graph.
<>=
# Build a data frame that has all ultimates by origin and method
summary.df <- rbind(data.frame(method="Paid: Chain Ladder",
origin=chainladder.paid.fdf$origin,
ult=chainladder.paid.fdf$ult),
data.frame(method="Incurred: Chain Ladder",
origin=chainladder.inc.fdf$origin,
ult=chainladder.inc.fdf$ult),
data.frame(method="Paid: Bornhuetter-Ferguson",
origin=bf.paid.fdf$origin,
ult=bf.paid.fdf$ult),
data.frame(method="Incurred: Bornhuetter-Ferguson",
origin=bf.inc.fdf$origin,
ult=bf.inc.fdf$ult),
data.frame(method="Paid: Cape-Cod",
origin=capecod.paid.ult.fdf$origin,
ult=capecod.paid.ult.fdf$ult),
data.frame(method="Incurred: Cape-Cod",
origin=capecod.inc.ult.fdf$origin,
ult=capecod.inc.ult.fdf$ult),
data.frame(method="Paid: Mack Chain Ladder",
origin=mack.paid.fdf$origin,
ult=mack.paid.fdf$Ultimate),
data.frame(method="Incurred: Mack Chain Ladder",
origin=mack.inc.fdf$origin,
ult=mack.inc.fdf$Ultimate),
data.frame(method="Paid: Munich Chain Ladder",
origin=munich.fdf$origin,
ult=munich.fdf[["Ult. Paid"]]),
data.frame(method="Incurred: Munich Chain Ladder",
origin=munich.fdf$origin,
ult=munich.fdf[["Ult. Incurred"]]))
totals.rows <- plyr::ddply(summary.df, .(method),
function(df) data.frame(method=df$method[[1]],
origin="Total",
ult=sum(df$ult)))
summary.df <- rbind(summary.df, totals.rows)
summary.fdf.list <- MakeSummaryFDFs(summary.df, origin.field.heading)
for (fdf in summary.fdf.list)
print(fdf)
@
<>=
summary.plot.df <- summary.df[summary.df$origin != "Total", ]
average.rows <- plyr::ddply(summary.plot.df, .(method),
function(df) data.frame(method=df$method[[1]],
origin="Average",
ult=mean(df$ult)))
summary.plot.df <- rbind(summary.plot.df, average.rows)
summary.plot <- (ggplot(aes(x=method, y=ult), data=summary.plot.df)
+ geom_bar(stat="identity")
+ coord_flip()
+ facet_wrap(~ origin, ncol=5)
+ labs(x="Development Method", y="Ultimate Loss"))
IncludeGraph(summary.plot, caption="Multi-Method Development Summary Plot",
label="summary plot", height=20, width=16.5)
@
\clearpage
\section{Legal}
<>=
IncludeLegal(author="Benedict Escoto", year=2010)
@
\bibliographystyle{plain}
\bibliography{BasicReserving}
\end{document}