From b8a228d9cd138f1cf3a3cbcc9fade64b6b774060 Mon Sep 17 00:00:00 2001
From: Reinhold Kainhofer <reinhold@kainhofer.com>
Date: Mon, 26 Oct 2020 16:55:37 +0100
Subject: [PATCH] Allow data.frame / matrix as age-dependent selection factors

---
 R/baseProbabilities.R     | 57 +++++++++++++++++++++++++++++++++------
 R/mortalityTable.period.R |  6 ++---
 man/baseProbabilities.Rd  |  4 ++-
 man/getOmega.Rd           |  5 ++++
 4 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/R/baseProbabilities.R b/R/baseProbabilities.R
index c916186..95454ba 100644
--- a/R/baseProbabilities.R
+++ b/R/baseProbabilities.R
@@ -1,7 +1,9 @@
 #' @include mortalityTable.R mortalityTable.period.R mortalityTable.ageShift.R mortalityTable.trendProjection.R mortalityTable.improvementFactors.R mortalityTable.mixed.R fillAges.R
 NULL
 
-#' Return the base death probabilities of the life table
+#' Return the base death probabilities of the life table (with a possible
+#' selection effect applied, but without further processing like modifications,
+#' margins or trend extrapolation)
 #'
 #' @param object The life table object (class inherited from mortalityTable)
 #' @param ... Other parameters (currently unused)
@@ -42,13 +44,52 @@ setMethod("baseProbabilities", "mortalityTable.period",
                   probs = object@deathProbs[cbind(rws, cols)]
               }
               if (!is.null(object@selectionFactors)) {
-                  # Add a terminal 1 just in case, so that after the selection period, no modification occurs
-                  sel.factors = c(object@selectionFactors, 1)
-                  # For each age, determine the index into the selection factor
-                  # (ages below selection Age get index 1) and multiply the base
-                  # probability with the corresponding selection factor
-                  sel.offset = pmin(length(sel.factors), offsets)
-                  probs = sel.factors[sel.offset] * probs
+                  if (is.vector(object@selectionFactors)) {
+                      # Add a terminal 1 just in case, so that after the selection period, no modification occurs
+                      sel.factors = c(object@selectionFactors, 1)
+                      # For each age, determine the index into the selection factor
+                      # (ages below selection Age get index 1) and multiply the base
+                      # probability with the corresponding selection factor
+                    sel.offset = pmin(length(sel.factors), offsets)
+                    factors = sel.factors[sel.offset]
+                  } else {
+                      # Assume that the array / data.frame given as selectionFactors
+                      # has the same ages as the base table
+                      # Optionally, allow an "age" or "Age" column in the data.frame or array
+                      # (of course, remove that column before extracting the factors)
+                      sf = cbind(object@selectionFactors, Ultimate = 1)
+                      ages = ages(object)
+                      if ("age" %in% colnames(sf)) {
+                          ages = sf$age
+                          sf$age = NULL
+                      }
+                      if ("Age" %in% colnames(sf)) {
+                          ages = sf$Age
+                          sf$Age = NULL
+                      }
+                      # Extract the selection factors just like the base probabilities
+                      # were extracted from the array / data.frame
+                      offsets = pmax(1, ages - selectionAge + 1)
+                      cols = pmin(ncol(sf), offsets)
+                      rws = seq_along(cols)
+                      if (object@selectInitialAge) {
+                          # if data gives selection by initial age, make sure to
+                          # walk along thw same row until the ultimate table is reached
+                          rws = rws - cols + 1
+                          # For ages before the first attained age of the ultimate table,
+                          # use the select probabilities, even before the desired select age
+                          cols = cols + pmin(0, rws - 1)
+                          rws = pmax(1, rws)
+                      }
+                      # exctract the corresponding columns for each age:
+                      # See https://stackoverflow.com/questions/20036255/get-the-vector-of-values-from-different-columns-of-a-matrix
+                      # TODO: Check if any index is outside the existing dimensions!
+                      factors = as.matrix(sf)[cbind(rws, cols)]
+
+                    # TODO: handle age-specific selection factors (e.g. 1980 CSO 10-year selection factors)
+
+                  }
+                  probs = factors * probs
               }
               object@modification(probs * (1 + object@loading))
           })
diff --git a/R/mortalityTable.period.R b/R/mortalityTable.period.R
index 6abbfef..241ba65 100644
--- a/R/mortalityTable.period.R
+++ b/R/mortalityTable.period.R
@@ -1,8 +1,8 @@
 #' @include mortalityTable.R
 NULL
 
-setClassUnion("numericArray", c("numeric", "array", "matrix"))
-setClassUnion("numericArrayOrNULL", c("numeric", "array", "matrix", "NULL"))
+setClassUnion("numericArray", c("numeric", "array", "matrix", "data.frame"))
+setClassUnion("numericArrayOrNULL", c("numeric", "array", "matrix", "data.frame", "NULL"))
 setClassUnion("numericOrNULL", c("numeric", "NULL"))
 setClassUnion("mortalityTableOrNULL", c("mortalityTable", "NULL"))
 
@@ -47,7 +47,7 @@ mortalityTable.period = setClass(
         ages = "numeric",
         deathProbs = "numericArray",
         exposures = "numericArrayOrNULL",
-        selectionFactors = "numericOrNULL",
+        selectionFactors = "numericArrayOrNULL",
         selectInitialAge = "logical",
         tableBeforeSelection = "mortalityTableOrNULL"
     ),
diff --git a/man/baseProbabilities.Rd b/man/baseProbabilities.Rd
index bf3f69f..507efff 100644
--- a/man/baseProbabilities.Rd
+++ b/man/baseProbabilities.Rd
@@ -32,6 +32,8 @@ life table given the birth year (if needed)
 }}
 
 \examples{
-# TODO
+baseProbabilities(DAV2004R.male.selekt)
+baseProbabilities(DAV2004R.male.selekt, selectionAge = 60)
+baseProbabilities(DAV2004R.male.selekt, selectionAge = 65)
 
 }
diff --git a/man/getOmega.Rd b/man/getOmega.Rd
index 2ca633d..0fa3c94 100644
--- a/man/getOmega.Rd
+++ b/man/getOmega.Rd
@@ -4,6 +4,7 @@
 \name{getOmega}
 \alias{getOmega}
 \alias{getOmega,mortalityTable.period-method}
+\alias{getOmega,mortalityTable.mixed-method}
 \alias{getOmega,mortalityTable.jointLives-method}
 \alias{getOmega,mortalityTable.observed-method}
 \title{Return the maximum age of the life table}
@@ -12,6 +13,8 @@ getOmega(object)
 
 \S4method{getOmega}{mortalityTable.period}(object)
 
+\S4method{getOmega}{mortalityTable.mixed}(object)
+
 \S4method{getOmega}{mortalityTable.jointLives}(object)
 
 \S4method{getOmega}{mortalityTable.observed}(object)
@@ -26,6 +29,8 @@ Return the maximum age of the life table
 \itemize{
 \item \code{mortalityTable.period}: Return the maximum age of the period life table
 
+\item \code{mortalityTable.mixed}: Return the maximum age of the mixed life table
+
 \item \code{mortalityTable.jointLives}: Return the maximum age of the joint lives mortality table (returns the maximum age of the first table used for joint lives, as the ages of the joint lives are now known to the function)
 
 \item \code{mortalityTable.observed}: Return the maximum age of the life table
-- 
GitLab