Skip to content
Snippets Groups Projects
Commit 94752e4d authored by Reinhold Kainhofer's avatar Reinhold Kainhofer
Browse files

Describe table creation in vignette, build fixes

parent 83f3d949
No related branches found
No related tags found
No related merge requests found
......@@ -13,7 +13,8 @@ Depends:
methods,
scales,
utils,
pracma
pracma,
R (>= 2.10)
Suggests:
lifecontingencies,
MortalityLaws,
......@@ -29,6 +30,7 @@ Description: Classes to implement and plot cohort life tables
License: GPL (>= 2)
RoxygenNote: 7.1.1
Collate:
'DocumentData.R'
'mortalityTable.R'
'mortalityTable.period.R'
'mortalityTable.ageShift.R'
......
#' Austrian population count (exposure) and deaths in 2017
#'
#' @format A data frame holding female, male and total exposures as well as death
#' counts, indexed by age.
#'
#' @usage data(PopulationData.AT2017)
#' @source \url{http://www.mortality.org/}, \url{https://www.statistik.at/}
"PopulationData.AT2017"
......@@ -733,10 +733,10 @@ setMethod("mT.round", "mortalityTable.period",
setMethod("mT.round", "mortalityTable.trendProjection",
function(object, digits = 8) {
o = callNextMethod()
if (!is.null(o@trend) && !is.na(o@trend)) {
if (!is.null(o@trend) && !all(is.na(o@trend))) {
o@trend = round(o@trend, digits = digits)
}
if (!is.null(o@trend2) && !is.na(o@trend2)) {
if (!is.null(o@trend2) && !all(is.na(o@trend2))) {
o@trend2 = round(o@trend2, digits = digits)
}
o
......@@ -746,7 +746,7 @@ setMethod("mT.round", "mortalityTable.improvementFactors",
function(object, digits = 8) {
o = callNextMethod()
o@improvement = round(o@improvement, digits = digits)
if (!is.null(o@loading) && !is.na(o@loading)) {
if (!is.null(o@loading) && !all(is.na(o@loading))) {
o@loading = round(o@loading, digits = digits)
}
o
......
File added
library(usethis)
library(readxl)
library(here)
PopulationData.AT2017 = read_excel(here("data-raw", "Austria_Population2017_HMD_StatistikAustria.xlsx"), skip = 3)
usethis::use_data(PopulationData.AT2017)
File added
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/DocumentData.R
\docType{data}
\name{PopulationData.AT2017}
\alias{PopulationData.AT2017}
\title{Austrian population count (exposure) and deaths in 2017}
\format{
A data frame holding female, male and total exposures as well as death
counts, indexed by age.
}
\source{
\url{http://www.mortality.org/}, \url{https://www.statistik.at/}
}
\usage{
data(PopulationData.AT2017)
}
\description{
Austrian population count (exposure) and deaths in 2017
}
\keyword{datasets}
## ----echo = FALSE-------------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
options(tidyverse.quiet = TRUE)
## ----message=FALSE------------------------------------------------------------
library("MortalityTables")
......@@ -151,3 +152,123 @@ AVOe2005R.female.mod = setModification(AVOe2005R.female, modification = function
AVOe2005R.female.mod@name = "Modified table (lower bound of 3%)"
plot(AVOe2005R.female, AVOe2005R.female.mod, title = "Original and modified table")
## ----AustrianPopulation.RawData-----------------------------------------------
library(tidyverse)
data("PopulationData.AT2017", package = "MortalityTables")
PopulationData.AT2017.raw = PopulationData.AT2017 %>%
select(age, exposure.total, deaths.total) %>%
mutate(qraw = deaths.total / (exposure.total + deaths.total/2))
## ----AustrianPopulationTableRaw-----------------------------------------------
PopulationTable.AT2017 = mortalityTable.period(
name = "Austrian Population Mortality 2017 (raw)",
baseYear = 2017,
deathProbs = PopulationData.AT2017.raw$qraw,
ages = PopulationData.AT2017.raw$age,
exposures = PopulationData.AT2017.raw$exposure.total,
data = list(
deaths = PopulationData.AT2017.raw$deaths.total,
dim = list(sex = "u", collar = "Population", type = "raw", year = "2017")
)
)
plotMortalityTables(PopulationTable.AT2017, title = "Austrian population mortality (raw), 2017")
## ----AustrianPopulationTableSmooth--------------------------------------------
PopulationTable.AT2017.smooth = PopulationTable.AT2017 %>%
whittaker.mortalityTable(lambda = 1/10, d = 2, name.postfix = ", Whittaker") %>%
mT.setDimInfo(type = "smoothed")
plotMortalityTables(PopulationTable.AT2017, PopulationTable.AT2017.smooth, title = "Austrian population mortality (raw and smoothed), 2017") +
aes(colour = type)
## ----AustrianPopulationTableCut100--------------------------------------------
PopulationData.AT2017.raw %>% filter(age > 90)
PopulationTable.AT2017.cut = PopulationTable.AT2017.smooth %>%
mT.fillAges(0:99) %>%
mT.setName("Austrian Population Mortality 2017, Whittaker-smoothed and cut at age 99")
## ----AustrianPopulationTableExtrapolated--------------------------------------
PopulationTable.AT2017.ex = PopulationTable.AT2017.smooth %>%
mT.fitExtrapolationLaw(law = "HP2", fit = 75:99, extrapolate = 80:120, fadeIn = 80:95) %>%
mT.setDimInfo(type = "smoothed and extrapolated")
plotMortalityTables(PopulationTable.AT2017, PopulationTable.AT2017.smooth, PopulationTable.AT2017.ex, title = "Austrian population mortality (raw and smoothed), 2017") +
aes(colour = type)
## ----AustrianPopulationTableFitComparison-------------------------------------
plotMortalityTables(
PopulationTable.AT2017,
PopulationTable.AT2017.smooth %>%
mT.fitExtrapolationLaw(law = "HP2", fit = 75:99, extrapolate = 80:120, fadeIn = 80:95) %>%
mT.setDimInfo(type = "Extrapolation: HP2, Fit 75--99"),
PopulationTable.AT2017.smooth %>%
mT.fitExtrapolationLaw(law = "HP2", fit = 75:85, extrapolate = 80:120, fadeIn = 80:95) %>%
mT.setDimInfo(type = "Extrapolation: HP, Fit 75--85"),
PopulationTable.AT2017.smooth %>%
mT.fitExtrapolationLaw(law = "HP2", fit = 90:110, extrapolate = 80:120, fadeIn = 90:100) %>%
mT.setDimInfo(type = "Extrapolation: HP2, Fit 90--110"),
title = "Examples of different fitting ranges for extrapolation") +
aes(colour = type)
## ----AustrianPopulationTableFitFunctionComparison-----------------------------
plotMortalityTables(
PopulationTable.AT2017,
PopulationTable.AT2017.smooth %>%
mT.fitExtrapolationLaw(law = "HP2", fit = 75:99, extrapolate = 80:120, fadeIn = 80:95) %>%
mT.setDimInfo(type = "HP2"),
PopulationTable.AT2017.smooth %>%
mT.fitExtrapolationLaw(law = "thiele", fit = 75:99, extrapolate = 80:120, fadeIn = 80:95) %>%
mT.setDimInfo(type = "thiele"),
PopulationTable.AT2017.smooth %>%
mT.fitExtrapolationLaw(law = "ggompertz", fit = 75:99, extrapolate = 80:120, fadeIn = 80:95) %>%
mT.setDimInfo(type = "ggompertz"),
PopulationTable.AT2017.smooth %>%
mT.fitExtrapolationLaw(law = "carriere1", fit = 75:99, extrapolate = 80:120, fadeIn = 80:95) %>%
mT.setDimInfo(type = "carriere1"),
title = "Examples of different fitting functions for extrapolation (fit 75--99)",
ages = 75:120, legend.position = "bottom", legend.key.width = unit(15, "mm")) +
aes(colour = type) + labs(colour = "Mortality Law")
## ----AustrianPopulationTableTrendForecast-------------------------------------
mortalityTables.load("Austria_PopulationForecast")
plotMortalityTrend(mort.AT.forecast, title = "Forecast trend (medium scenario) by Statistik Austria")
## ----AustrianPopulationTableTrend---------------------------------------------
PopulationTable.AT2017.trend = PopulationTable.AT2017.ex %>%
mT.addTrend(mort.AT.forecast$m@trend, trendages = ages(mort.AT.forecast$m)) %>%
mT.setDimInfo(type = "smoothed, extrapolated, trend")
PopulationTable.AT2017.trend.ex = PopulationTable.AT2017.trend %>%
mT.extrapolateTrendExp(95) %>%
mT.setDimInfo(type = "smoothed, extrapolated, trend extrapolated")
plotMortalityTrend(PopulationTable.AT2017.trend, PopulationTable.AT2017.trend.ex,
title = "Extrapolating the trend via Exponential function") +
aes(color = type)
plotMortalityTables(PopulationTable.AT2017, PopulationTable.AT2017.smooth, PopulationTable.AT2017.ex, PopulationTable.AT2017.trend.ex, YOB = 1980, title = "Austrian population mortality (Period 2017 vs. Generation 1980)", legend.position = c(0.01, 0.99), legend.justification = c(0,1)) +
aes(colour = type)
## ----AustrianPopulationTableFinal---------------------------------------------
# Lots of non-essential or support information is stored inside the table's data field:
PopulationTable.AT2017.trend.ex@data$whittaker
# Clean up the table (remove all non-essential data, but do not modify results)
PopulationTable.AT2017.Cohort.FINAL = PopulationTable.AT2017.trend.ex %>%
mT.cleanup() %>%
mT.round(digits = 6) %>%
mT.setName("Austrian Population Mortality, Period 2017 with trend projection")
## ----AustrianPopulationTableScaled--------------------------------------------
TableForProduct = PopulationTable.AT2017.Cohort.FINAL %>%
mT.scaleProbs(factor = 1.25, name.postfix = "10% security added")
plotMortalityTables(TableForProduct, PopulationTable.AT2017.Cohort.FINAL,
title = "Adding a security loading of 25%", Period = 2017, legend.position = "bottom")
......@@ -16,6 +16,7 @@ vignette: >
```{r echo = FALSE}
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
options(tidyverse.quiet = TRUE)
```
The MortalityTables package provides the `mortalityTable` base class and
......@@ -416,8 +417,240 @@ plot(AVOe2005R.female, AVOe2005R.female.mod, title = "Original and modified tabl
## Creating mortality tables from data and modifying them using various helper functions
The package \code{MortalityTables} not only provides the data structures and some
examples of mortality tables, it also provides several functions to modify
existing tables. In particular, the functions available provide
examples of mortality tables, it also provides several functions to create mortality
tables from raw data and modify them. The package provides several editing functions,
which all begin with the prefix \code{mT.}.
Let us take as an example the provided dataset \code{PopulationData.AT2017} of
Austrian population data (exposure and deaths counts for the year 2017).
For simplicity, we only look at the unisex data (i.e. male + female numbers,
which are already provided as total exposure and total deaths). The raw mortality
can then be calculated as
\equation{\hat{q}_x = \frac{d_x}{E_x+\frac{d_x}{2}}}
```{r AustrianPopulation.RawData}
library(tidyverse)
data("PopulationData.AT2017", package = "MortalityTables")
PopulationData.AT2017.raw = PopulationData.AT2017 %>%
select(age, exposure.total, deaths.total) %>%
mutate(qraw = deaths.total / (exposure.total + deaths.total/2))
```
We now have all data needed to put it into a [MortalityTable] object (some fields
like the exposre and the data list are not strictly needed, but can be useful
later on):
```{r AustrianPopulationTableRaw}
PopulationTable.AT2017 = mortalityTable.period(
name = "Austrian Population Mortality 2017 (raw)",
baseYear = 2017,
deathProbs = PopulationData.AT2017.raw$qraw,
ages = PopulationData.AT2017.raw$age,
exposures = PopulationData.AT2017.raw$exposure.total,
data = list(
deaths = PopulationData.AT2017.raw$deaths.total,
dim = list(sex = "u", collar = "Population", type = "raw", year = "2017")
)
)
plotMortalityTables(PopulationTable.AT2017, title = "Austrian population mortality (raw), 2017")
```
Of course, we sooner or later want to work with a smooth table rather than the
raw death probabilities. The most common approach to smoothing mortality tables
is the Whittaker-Henderson method of graduation, which is provided by the
function [whittaker.mortalityTable()]. The parameters are the $\ĺambda$ smoothing
parameter (determining how smooth the result shall be, which in turn means that
the result might be quite distant from the raw probabiliteis in some ages) and
the order of differences $d$ (the default 2 typically suffices). Since we have
the exposures available and stored inside the table, the [whittaker.mortalityTable()]
function will use the exposures as weight and so try to match age ranges with
high exposure much better than e.g. old ages with hardly any living.
```{r AustrianPopulationTableSmooth}
PopulationTable.AT2017.smooth = PopulationTable.AT2017 %>%
whittaker.mortalityTable(lambda = 1/10, d = 2, name.postfix = ", Whittaker") %>%
mT.setDimInfo(type = "smoothed")
plotMortalityTables(PopulationTable.AT2017, PopulationTable.AT2017.smooth, title = "Austrian population mortality (raw and smoothed), 2017") +
aes(colour = type)
```
As a side note, this example also shows how the additional dimensional infos
set be either the constructor of the table or the [mT.setDimInfo()] function and
stored in the \code{table$data$dim} list can be used by ggplot as aesthetics.
Now, if we look at the exposures, we see that above age 95 we are below an
exposure of 5000 and at age 100 we are below exposure 500. So, for these old
ages we apparently do not have enough data to derive mortalities with sufficient
significance. So, let's cut the table at age 100:
```{r AustrianPopulationTableCut100}
PopulationData.AT2017.raw %>% filter(age > 90)
PopulationTable.AT2017.cut = PopulationTable.AT2017.smooth %>%
mT.fillAges(0:99) %>%
mT.setName("Austrian Population Mortality 2017, Whittaker-smoothed and cut at age 99")
```
Even though we don't have enough statistical data to derive significant mortalities
above 100, we still want to create a table that covers this age range by extrapolating
the significant table to higher ages. This is typically done by selecting a fitting
function and an appropriate age range, where the function is fit to the data.
The parameters of the function calibrated to match the mortalities in the fitting
range as good as possible are then used to extrapolate the mortalities with the
function to ages outside the existing table.
The function [mT.fitExtrapolationLaw] uses the package \code{MortalityLaws} and
the function [MortalityLaws::MortalityLaw()] to fit one of the mortality laws (
see [MortalityLaws::availableLaws()] for all available laws) to the data and use
that law to extrapolate to the desired ages, with a potential feding-in or fading-out
age range.
In this example, we fit a Heligman-Pollard-type law (HP2) to the raw data and use
it to extrapolate up to age 120. The age rante 80--95 is used to linearly
switch from the (smoothed) death probabilities of the input table to the
death probabilities calculated from the fitted law. So in this case, all observed
probabilities above age 95 are not used at all anyway.
```{r AustrianPopulationTableExtrapolated}
PopulationTable.AT2017.ex = PopulationTable.AT2017.smooth %>%
mT.fitExtrapolationLaw(law = "HP2", fit = 75:99, extrapolate = 80:120, fadeIn = 80:95) %>%
mT.setDimInfo(type = "smoothed and extrapolated")
plotMortalityTables(PopulationTable.AT2017, PopulationTable.AT2017.smooth, PopulationTable.AT2017.ex, title = "Austrian population mortality (raw and smoothed), 2017") +
aes(colour = type)
```
Using different laws and different fitting age ranges can result in quite different
results, so be carefull when extrapolating the table and always do a sanity-check
on the results!
```{r AustrianPopulationTableFitComparison}
plotMortalityTables(
PopulationTable.AT2017,
PopulationTable.AT2017.smooth %>%
mT.fitExtrapolationLaw(law = "HP2", fit = 75:99, extrapolate = 80:120, fadeIn = 80:95) %>%
mT.setDimInfo(type = "Extrapolation: HP2, Fit 75--99"),
PopulationTable.AT2017.smooth %>%
mT.fitExtrapolationLaw(law = "HP2", fit = 75:85, extrapolate = 80:120, fadeIn = 80:95) %>%
mT.setDimInfo(type = "Extrapolation: HP, Fit 75--85"),
PopulationTable.AT2017.smooth %>%
mT.fitExtrapolationLaw(law = "HP2", fit = 90:110, extrapolate = 80:120, fadeIn = 90:100) %>%
mT.setDimInfo(type = "Extrapolation: HP2, Fit 90--110"),
title = "Examples of different fitting ranges for extrapolation") +
aes(colour = type)
```
```{r AustrianPopulationTableFitFunctionComparison}
plotMortalityTables(
PopulationTable.AT2017,
PopulationTable.AT2017.smooth %>%
mT.fitExtrapolationLaw(law = "HP2", fit = 75:99, extrapolate = 80:120, fadeIn = 80:95) %>%
mT.setDimInfo(type = "HP2"),
PopulationTable.AT2017.smooth %>%
mT.fitExtrapolationLaw(law = "thiele", fit = 75:99, extrapolate = 80:120, fadeIn = 80:95) %>%
mT.setDimInfo(type = "thiele"),
PopulationTable.AT2017.smooth %>%
mT.fitExtrapolationLaw(law = "ggompertz", fit = 75:99, extrapolate = 80:120, fadeIn = 80:95) %>%
mT.setDimInfo(type = "ggompertz"),
PopulationTable.AT2017.smooth %>%
mT.fitExtrapolationLaw(law = "carriere1", fit = 75:99, extrapolate = 80:120, fadeIn = 80:95) %>%
mT.setDimInfo(type = "carriere1"),
title = "Examples of different fitting functions for extrapolation (fit 75--99)",
ages = 75:120, legend.position = "bottom", legend.key.width = unit(15, "mm")) +
aes(colour = type) + labs(colour = "Mortality Law")
```
The Austrian population mortality table for the year 2017 derived above is a
period life table describing the observed mortality only in the year 2017.
To describe death probabilities for a given person, one needs to take into account
the mortality improvements and project the mortality into the future from the
observation year. This can be done with age-dependent yearly mortality improvements, also called
mortaltity trend $\labmda_x$.
For simplicity, we will use the trend $\labmda_x$ of the medium scenario of the
mortality forecast of the Statistik Austria (forecast from 2016 to roughly 2080).
These forecast tables are available as the mortality table \code{mort.AT.forecast}
for male and female separately. Even though we derived a table for unisex, we
will apply the male trends for simplicity. In practice, of course you would
derive proper unisex trends from the available data.
```{r AustrianPopulationTableTrendForecast}
mortalityTables.load("Austria_PopulationForecast")
plotMortalityTrend(mort.AT.forecast, title = "Forecast trend (medium scenario) by Statistik Austria")
```
As we can see, the trends appear to be derived from data until age 94 and then set to a constant value ("floor").
Let us first apply the male trend to the observed period life table of the year 2017, and then extrapolate the trend from age 94 to higher ages by an exponential function towards zero. The first can be done with the function [mT.addTrend()], while the second can be done with [mT.extrapolateTrendExp()]:
```{r AustrianPopulationTableTrend}
PopulationTable.AT2017.trend = PopulationTable.AT2017.ex %>%
mT.addTrend(mort.AT.forecast$m@trend, trendages = ages(mort.AT.forecast$m)) %>%
mT.setDimInfo(type = "smoothed, extrapolated, trend")
PopulationTable.AT2017.trend.ex = PopulationTable.AT2017.trend %>%
mT.extrapolateTrendExp(95) %>%
mT.setDimInfo(type = "smoothed, extrapolated, trend extrapolated")
plotMortalityTrend(PopulationTable.AT2017.trend, PopulationTable.AT2017.trend.ex,
title = "Extrapolating the trend via Exponential function") +
aes(color = type)
plotMortalityTables(PopulationTable.AT2017, PopulationTable.AT2017.smooth, PopulationTable.AT2017.ex, PopulationTable.AT2017.trend.ex, YOB = 1980, title = "Austrian population mortality (Period 2017 vs. Generation 1980)", legend.position = c(0.01, 0.99), legend.justification = c(0,1)) +
aes(colour = type)
```
So we have now started from raw data, calculated the death probabilities, smoothed
them using Whittaker-Henderson, extrapolated to very old ages and added a trend
to create a nice Cohort Life Table.
We could now store the \code{PopulationTable.AT2017.trend.ex} in an .RData file
and distribute it to the public. However, we might miss that all our modification
were also recorded inside the mortality table (to allow later introspection into
what was done and what was the result). For a published table, this might not
be desired, so we first need to clean this additional support data with the
[mT.cleanup()] function, which does not modify the table itself, but only
removes all non-essential supporting information from the table:
```{r AustrianPopulationTableFinal}
# Lots of non-essential or support information is stored inside the table's data field:
PopulationTable.AT2017.trend.ex@data$whittaker
# Clean up the table (remove all non-essential data, but do not modify results)
PopulationTable.AT2017.Cohort.FINAL = PopulationTable.AT2017.trend.ex %>%
mT.cleanup() %>%
mT.round(digits = 6) %>%
mT.setName("Austrian Population Mortality, Period 2017 with trend projection")
```
Other functions that might be useful before publishing a table are:
* [mT.translate()], which simply moves the base year of the internal representation of a cohort life table to a different year (by applying the trend according to the translation), but leaves cohort death probabilities unchanged.
* [mT.round()], which rounds the probabilities of the base table and the trend to the given number of digits.
When using a population mortality table like the one we just derived in
insurance contracts, the actuary often considers adding a certain security
loading (e.g. 25\% on all death probabilities) to ensure sufficient security
and ensure the legal requirement of a prudent person.
This can be done with the function [mT.scaleProbs()]:
```{r AustrianPopulationTableScaled}
TableForProduct = PopulationTable.AT2017.Cohort.FINAL %>%
mT.scaleProbs(factor = 1.25, name.postfix = "10% security added")
plotMortalityTables(TableForProduct, PopulationTable.AT2017.Cohort.FINAL,
title = "Adding a security loading of 25%", Period = 2017, legend.position = "bottom")
```
## Pension Tables
Pension tables generalize mortality tables in that the state space is increased
......@@ -431,22 +664,33 @@ Possible states are:
* incapacity: disablity pension (in most cases permanent), not working, early pension
* retirement: old age pension, usually starting with a fixed age
* dead
* Widow/widower pension (if a widow exists at the time of death)
* Widow/widower pension (if a widow exists at the time of death)
Correspondingly, the `pensionTable` class offers the following slots describing
transition probabilities for the corresponding state transitions (by a
`mortalityTable`-derived object):
* `qxaa`: death probability of actives (active -> dead)
* `ix`: invalidity probability (active -> incapacity)
* `qix`: death probability of invalid (invalid -> dead)
* `rx`: reactivation probability (incapacity -> active)
* `apx`: retirement probability (active -> retirement), typically 1 for a fixed age
* `apx`: retirement probability of invalids (invalid -> retirement), typically 0 or 1 for a fixed age
* `qpx`: death probability of retired (retired -> dead)
* `hx`: probability of a widow at moment of death (dead -> widow), y(x) age differene
* `qxw`: death probability of widows/widowers
* `qgx`: death probability of total group (irrespective of state)
* `qx`
: death probability of actives (active -> dead)}
* `ix`
: invalidity probability (active -> incapacity)}
* `qix`
: death probability of invalid (invalid -> dead)}
* `rx`
: reactivation probability (incapacity -> active)}
* `apx`
: retirement probability (active -> retirement), typically 1 for a fixed age}
* `qpx`
: death probability of retired (retired -> dead)}
* `hx`
: probability of a widow at moment of death (dead -> widow), y(x) age differene}
* `qxw`
: death probability of widows/widowers}
* `yx`
: age difference of widow(er) at moment of death}
* `qgx`
: death probability of total group (irrespective of state)}
All functions that handle `mortalityTable` object can be used with these slots.
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment