From 00951503d81ebef0bd45b28f0ddca96693db2147 Mon Sep 17 00:00:00 2001 From: Ethan Kang Date: Thu, 14 May 2026 00:20:55 -0700 Subject: [PATCH 1/3] Add workflow to sync main into docs/great-docs-prototype On every push to main, GitHub Actions automatically merges main into docs/great-docs-prototype so the docs branch stays current with all code changes while keeping its doc-engine files intact. --- .github/workflows/sync-main-to-docs.yml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .github/workflows/sync-main-to-docs.yml diff --git a/.github/workflows/sync-main-to-docs.yml b/.github/workflows/sync-main-to-docs.yml new file mode 100644 index 00000000..ae11e38f --- /dev/null +++ b/.github/workflows/sync-main-to-docs.yml @@ -0,0 +1,25 @@ +name: Sync main to docs/great-docs-prototype + +on: + push: + branches: [main] + +permissions: + contents: write + +jobs: + sync: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Merge main into docs/great-docs-prototype + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + git checkout docs/great-docs-prototype + git merge origin/main --no-edit -m "chore: sync main into docs/great-docs-prototype" + git push origin docs/great-docs-prototype From 1ac6ef10627f6757f5410c6f82dcd0d3418b6d20 Mon Sep 17 00:00:00 2001 From: Ethan Kang Date: Thu, 14 May 2026 21:11:32 -0700 Subject: [PATCH 2/3] docs: add doctest Examples for correlation, adjustments, and Munich family Add Sphinx .. testsetup:: / .. testcode:: / .. testoutput:: blocks showing parameter effects for DevelopmentCorrelation, ValuationCorrelation, DevelopmentConstant, MunichAdjustment, CaseOutstanding, BootstrapODPSample, BerquistSherman, and Trend (axis, piecewise trends, compound Trend with CapeCod on clrd). Use random_state=42 in BootstrapODPSample example. Parameters and Attributes sections are unchanged. Co-authored-by: Cursor --- chainladder/adjustments/berqsherm.py | 37 +++++++++++++ chainladder/adjustments/bootstrap.py | 35 +++++++++++++ chainladder/adjustments/trend.py | 72 ++++++++++++++++++++++++++ chainladder/core/correlation.py | 57 ++++++++++++++++++++ chainladder/development/constant.py | 28 ++++++++++ chainladder/development/munich.py | 30 +++++++++++ chainladder/development/outstanding.py | 30 +++++++++++ 7 files changed, 289 insertions(+) diff --git a/chainladder/adjustments/berqsherm.py b/chainladder/adjustments/berqsherm.py index a86bbb44..c5a23e17 100644 --- a/chainladder/adjustments/berqsherm.py +++ b/chainladder/adjustments/berqsherm.py @@ -42,6 +42,43 @@ class BerquistSherman(BaseEstimator, TransformerMixin, EstimatorIO): Two-period Exponential intercept parameters b_: Triangle Two-period Exponential slope parameters + + Examples + -------- + ``trend`` tilts the case-adequacy adjustment before ``Incurred`` is rebuilt; + on the ``MedMal`` slice the column totals move materially between ``0%`` + and ``15%`` annual drift. + + .. testsetup:: + + import chainladder as cl + import numpy as np + + .. testcode:: + + tri = cl.load_sample("berqsherm").loc["MedMal"] + base = cl.BerquistSherman( + paid_amount="Paid", + incurred_amount="Incurred", + reported_count="Reported", + closed_count="Closed", + trend=0.0, + ).fit(tri) + tilted = cl.BerquistSherman( + paid_amount="Paid", + incurred_amount="Incurred", + reported_count="Reported", + closed_count="Closed", + trend=0.15, + ).fit(tri) + print(round(float(np.nansum(base.adjusted_triangle_["Incurred"].values)), 2)) + print(round(float(np.nansum(tilted.adjusted_triangle_["Incurred"].values)), 2)) + + .. testoutput:: + + 1407473237.41 + 1126985253.66 + """ def __init__( diff --git a/chainladder/adjustments/bootstrap.py b/chainladder/adjustments/bootstrap.py index 1ad6b096..7586a7cb 100644 --- a/chainladder/adjustments/bootstrap.py +++ b/chainladder/adjustments/bootstrap.py @@ -46,6 +46,41 @@ class BootstrapODPSample(DevelopmentBase): A set of triangles represented by each simulation scale_: The scale parameter to be used in generating process risk + + Examples + -------- + ``n_periods`` is forwarded to the internal ``Development`` fit, which + changes the Pearson scale, while ``hat_adj`` toggles the residual + standardization used before resampling. + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + tri = cl.load_sample("raa") + hat = cl.BootstrapODPSample( + n_sims=5, random_state=42, hat_adj=True + ).fit(tri) + nohat = cl.BootstrapODPSample( + n_sims=5, random_state=42, hat_adj=False + ).fit(tri) + short_hist = cl.BootstrapODPSample( + n_sims=5, random_state=42, n_periods=3 + ).fit(tri) + print(round(float(hat.scale_), 6)) + print(round(float(short_hist.scale_), 6)) + print(round(float(hat.resampled_triangles_.mean().values[0, 0, 0, 0]), 4)) + print(round(float(nohat.resampled_triangles_.mean().values[0, 0, 0, 0]), 4)) + + .. testoutput:: + + 983.635027 + 322.397502 + 1455.5201 + 1532.5693 + """ def __init__( diff --git a/chainladder/adjustments/trend.py b/chainladder/adjustments/trend.py index 6e69e36e..4e1c4225 100644 --- a/chainladder/adjustments/trend.py +++ b/chainladder/adjustments/trend.py @@ -30,6 +30,78 @@ class Trend(BaseEstimator, TransformerMixin, EstimatorIO): trend_: A triangle representation of the trend factors + Examples + -------- + The same annual decimal trend is applied along ``origin`` or + ``valuation`` axes, producing different factor surfaces. + + .. testsetup:: + + import chainladder as cl + import numpy as np + + .. testcode:: + + tri = cl.load_sample("raa") + origin = cl.Trend(0.05, axis="origin").fit(tri) + val = cl.Trend(0.05, axis="valuation").fit(tri) + print(round(float(origin.trend_.values[0, 0, 2, 3]), 6)) + print(round(float(val.trend_.values[0, 0, 2, 3]), 6)) + + .. testoutput:: + + 1.4071 + 1.215506 + + Multiple ``trends`` with paired ``dates`` compound only across the + windows you specify, so the factors need not match a single flat trend. + + .. testcode:: + + tri = cl.load_sample("raa") + flat = cl.Trend(0.10, axis="origin").fit(tri) + piece = cl.Trend( + trends=[0.05, 0.05], + dates=[(None, "1985"), ("1985", None)], + axis="origin", + ).fit(tri) + print(round(float(flat.trend_.values[0, 0, 0, 0]), 6)) + print(round(float(piece.trend_.values[0, 0, 0, 0]), 6)) + + .. testoutput:: + + 2.357948 + 1.551328 + + ``trend_`` holds the compounded factor surface; ``transform`` applies it + so a downstream ``CapeCod`` can be run with ``trend=0`` while still + reflecting the staged annual assumptions. + + .. testcode:: + + tr = cl.load_sample("clrd")[["CumPaidLoss", "EarnedPremDIR"]].sum() + t_step = cl.Trend( + trends=[0.04, 0.02], + dates=[(None, "1995"), ("1995", None)], + axis="origin", + ).fit(tr["CumPaidLoss"]) + paid_leveled = t_step.transform(tr["CumPaidLoss"]) + ibnr = ( + cl.CapeCod() + .fit( + paid_leveled, + sample_weight=tr["EarnedPremDIR"].latest_diagonal, + ) + .ibnr_ + ) + print(round(float(t_step.trend_.values[0, 0, 2, 3]), 6)) + print(int(round(float(np.nansum(ibnr.values)), 0))) + + .. testoutput:: + + 1.21562 + 29278236 + """ def __init__(self, trends=0.0, dates=None, axis="origin"): diff --git a/chainladder/core/correlation.py b/chainladder/core/correlation.py index 942c0bb0..72f7f052 100644 --- a/chainladder/core/correlation.py +++ b/chainladder/core/correlation.py @@ -46,6 +46,34 @@ class DevelopmentCorrelation: confidence_interval: tuple Range within which ``t_expectation`` must fall for independence assumption to be significant. + + Examples + -------- + ``p_critical`` sets how wide the acceptance band is for the Spearman + composite statistic; tightening it can flip ``t_critical`` even when the + point estimate is unchanged. + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + tri = cl.load_sample("raa") + loose = cl.DevelopmentCorrelation(tri, p_critical=0.5) + tight = cl.DevelopmentCorrelation(tri, p_critical=0.99) + print(bool(loose.t_critical.iloc[0, 0])) + print(bool(tight.t_critical.iloc[0, 0])) + print(round(float(loose.confidence_interval[0]), 6)) + print(round(float(tight.confidence_interval[0]), 6)) + + .. testoutput:: + + False + True + -0.127467 + -0.002369 + """ def __init__(self, triangle, p_critical: float = 0.5): @@ -171,6 +199,35 @@ class ValuationCorrelation: The expected value of Z. z_variance : Triangle or DataFrame The variance value of Z. + + Examples + -------- + ``total=True`` follows Mack (1993) and returns ``DataFrame`` summaries; + ``total=False`` follows Mack (1997) and keeps a ``Triangle`` of + valuation-year diagnostics. + + .. testsetup:: + + import chainladder as cl + import numpy as np + + .. testcode:: + + tri = cl.load_sample("raa") + agg = cl.ValuationCorrelation(tri, p_critical=0.1, total=True) + yearly = cl.ValuationCorrelation(tri, p_critical=0.1, total=False) + print(type(agg.z_critical).__name__) + print(type(yearly.z_critical).__name__) + print(yearly.z_critical.shape) + print(int(np.nansum(yearly.z_critical.values))) + + .. testoutput:: + + DataFrame + Triangle + (1, 1, 1, 9) + 0 + """ def __init__(self, triangle: Triangle, p_critical: float = 0.1, total: bool = True): diff --git a/chainladder/development/constant.py b/chainladder/development/constant.py index 405655cd..041d333e 100644 --- a/chainladder/development/constant.py +++ b/chainladder/development/constant.py @@ -31,6 +31,34 @@ class DevelopmentConstant(DevelopmentBase): The estimated loss development patterns cdf_: Triangle The estimated cumulative development patterns + + Examples + -------- + ``patterns`` is interpreted as multiplicative link ratios when + ``style='ldf'``; swapping in a flat manual ladder changes the fitted + pattern immediately. + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + tri = cl.load_sample("raa") + dev = cl.Development().fit(tri) + n = dev.ldf_.shape[3] + fitted = {(i + 1) * 12: float(dev.ldf_.values[0, 0, 0, i]) for i in range(n)} + flat = {(i + 1) * 12: 1.2 for i in range(n)} + const_fitted = cl.DevelopmentConstant(patterns=fitted, style="ldf").fit(tri) + const_flat = cl.DevelopmentConstant(patterns=flat, style="ldf").fit(tri) + print(round(float(const_flat.ldf_.values[0, 0, 0, 0]), 4)) + print(round(float(const_fitted.ldf_.values[0, 0, 0, 0]), 6)) + + .. testoutput:: + + 1.2 + 2.999359 + """ def __init__(self, patterns=None, style="ldf", callable_axis=0, groupby=None): diff --git a/chainladder/development/munich.py b/chainladder/development/munich.py index fa1ad731..7210c822 100644 --- a/chainladder/development/munich.py +++ b/chainladder/development/munich.py @@ -47,6 +47,36 @@ class MunichAdjustment(DevelopmentBase): cdf_: Triangle The estimated bivariate cumulative development patterns + Examples + -------- + ``fillna=True`` imputes missing paid/incurred amounts with simple + chainladder expectations so the bivariate regression can still run. + + .. testsetup:: + + import chainladder as cl + import numpy as np + + .. testcode:: + + mcl = cl.load_sample("mcl").copy() + arr = np.asarray(mcl.values, dtype=float, copy=True) + arr[0, 1, 0, 2] = np.nan + mcl.values = arr + dev = cl.Development().fit_transform(mcl) + try: + cl.MunichAdjustment(("paid", "incurred"), fillna=False).fit(dev) + print("no_error") + except ValueError: + print("ValueError") + filled = cl.MunichAdjustment(("paid", "incurred"), fillna=True).fit(dev) + print(round(float(filled.ldf_.values[0, 0, 0, 0]), 6)) + + .. testoutput:: + + ValueError + 2.151329 + """ def __init__(self, paid_to_incurred=None, fillna=False): diff --git a/chainladder/development/outstanding.py b/chainladder/development/outstanding.py index ffaa5528..bc6ddbc5 100644 --- a/chainladder/development/outstanding.py +++ b/chainladder/development/outstanding.py @@ -49,6 +49,36 @@ class CaseOutstanding(DevelopmentBase): The paid to prior case ratios used for fitting the estimator paid_ldf_: The selected paid to prior case ratios of the fitted estimator + + Examples + -------- + ``paid_n_periods`` and ``case_n_periods`` control how many recent origin + years inform the ``Development`` weights that smooth the paid and case + patterns. + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + tri = cl.load_sample("usauto") + all_years = cl.CaseOutstanding( + paid_to_incurred=("paid", "incurred") + ).fit(tri) + three = cl.CaseOutstanding( + paid_to_incurred=("paid", "incurred"), + paid_n_periods=3, + case_n_periods=3, + ).fit(tri) + print(round(float(all_years.paid_ldf_.values[0, 0, 0, 0]), 6)) + print(round(float(three.paid_ldf_.values[0, 0, 0, 0]), 6)) + + .. testoutput:: + + 0.842814 + 0.833138 + """ def __init__( From ce197a615e822afd14383145d5dffff1c4580195 Mon Sep 17 00:00:00 2001 From: Ethan Kang Date: Fri, 15 May 2026 01:39:58 -0700 Subject: [PATCH 3/3] docs: add doctest Examples for workflow, tails, utils, and ParallelogramOLF Add Sphinx parameter-effect Examples for GridSearch, Pipeline, VotingChainladder (weights), read_pickle, minimum, maximum, read_json, concat, TailCurve, TailBondy, TailClark, and ParallelogramOLF. Replace PatsyFormula toy-DataFrame example with genins-based TweedieGLM and DevelopmentML pipeline demonstrations. Co-authored-by: Cursor --- chainladder/adjustments/parallelogram.py | 51 +++++++ chainladder/tails/bondy.py | 22 +++ chainladder/tails/clark.py | 22 +++ chainladder/tails/curve.py | 25 +++ chainladder/utils/utility_functions.py | 187 +++++++++++++++++++++++ chainladder/workflow/gridsearch.py | 75 ++++++++- chainladder/workflow/voting.py | 35 +++++ 7 files changed, 416 insertions(+), 1 deletion(-) diff --git a/chainladder/adjustments/parallelogram.py b/chainladder/adjustments/parallelogram.py index bb3e44cb..a663be4a 100644 --- a/chainladder/adjustments/parallelogram.py +++ b/chainladder/adjustments/parallelogram.py @@ -46,6 +46,57 @@ class ParallelogramOLF(BaseEstimator, TransformerMixin, EstimatorIO): olf_: A triangle representation of the on-level factors + + Examples + -------- + ``policy_length`` sets the earning window used in the parallelogram + geometry; a longer policy smooths rate changes over more months and + shifts the first on-level factor. + + .. testsetup:: + + import chainladder as cl + import pandas as pd + + .. testcode:: + + rate_history = pd.DataFrame( + {"EffDate": ["2010-07-01"], "RateChange": [0.20]} + ) + data = pd.DataFrame( + { + "Year": [2010, 2011, 2012, 2013, 2014], + "EarnedPremium": [10000] * 5, + } + ) + + def prem(): + return cl.Triangle( + data, origin="Year", columns="EarnedPremium", cumulative=True + ) + + olf_12 = cl.ParallelogramOLF( + rate_history, + change_col="RateChange", + date_col="EffDate", + policy_length=12, + approximation_grain="M", + ).fit_transform(prem()) + olf_24 = cl.ParallelogramOLF( + rate_history, + change_col="RateChange", + date_col="EffDate", + policy_length=24, + approximation_grain="M", + ).fit_transform(prem()) + print(round(float(olf_12.olf_.values[0, 0, 0, 0]), 6)) + print(round(float(olf_24.olf_.values[0, 0, 0, 0]), 6)) + + .. testoutput:: + + 1.170732 + 1.185185 + """ def __init__( diff --git a/chainladder/tails/bondy.py b/chainladder/tails/bondy.py index 11fa6fd4..50cc6e5a 100644 --- a/chainladder/tails/bondy.py +++ b/chainladder/tails/bondy.py @@ -44,6 +44,28 @@ class TailBondy(TailBase): -------- TailCurve + Examples + -------- + ``earliest_age`` controls which link ratios enter the Bondy exponent fit, + shifting the estimated ``b_`` parameter. + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + dev = cl.Development().fit_transform(cl.load_sample("raa")) + b_def = cl.TailBondy().fit(dev) + b_12 = cl.TailBondy(earliest_age=12).fit(dev) + print(round(float(b_def.b_.iloc[0, 0]), 6)) + print(round(float(b_12.b_.iloc[0, 0]), 6)) + + .. testoutput:: + + 0.5 + 0.48451 + """ def __init__(self, earliest_age=None, attachment_age=None, projection_period=12): diff --git a/chainladder/tails/clark.py b/chainladder/tails/clark.py index 052c2e6c..617f1d7d 100644 --- a/chainladder/tails/clark.py +++ b/chainladder/tails/clark.py @@ -45,6 +45,28 @@ class TailClark(TailBase): norm_resid_: Triangle The "Normalized" Residuals of the model according to Clark. + Examples + -------- + ``growth`` is passed through to the underlying ``ClarkLDF`` tail fit, so + the extrapolated tail LDF changes with the curve family. + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + dev = cl.Development().fit_transform(cl.load_sample("raa")) + log = cl.TailClark(growth="loglogistic").fit(dev) + wei = cl.TailClark(growth="weibull").fit(dev) + print(round(float(log.ldf_.values[0, 0, 0, -1]), 6)) + print(round(float(wei.ldf_.values[0, 0, 0, -1]), 6)) + + .. testoutput:: + + 1.188919 + 1.013531 + """ def __init__(self, growth="loglogistic", truncation_age=None, diff --git a/chainladder/tails/curve.py b/chainladder/tails/curve.py index aa89a030..f58c2f09 100644 --- a/chainladder/tails/curve.py +++ b/chainladder/tails/curve.py @@ -55,6 +55,31 @@ class TailCurve(TailBase): Slope parameter of the curve fit. intercept : DataFrame Intercept parameter of the curve fit. + + Examples + -------- + ``curve`` selects the regression form used in the tail extrapolation; the + implied last-period LDF differs between ``exponential`` and + ``inverse_power`` on the same ``tail_sample`` triangle. + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + tri = cl.load_sample("tail_sample") + dev = cl.Development().fit_transform(tri) + exp = cl.TailCurve(curve="exponential", extrap_periods=5).fit(dev) + inv = cl.TailCurve(curve="inverse_power", extrap_periods=5).fit(dev) + print(round(float(exp.ldf_.values[0, 0, 0, -1]), 6)) + print(round(float(inv.ldf_.values[0, 0, 0, -1]), 6)) + + .. testoutput:: + + 1.0093 + 1.039271 + """ def __init__( diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py index cd435f97..324f6ce3 100644 --- a/chainladder/utils/utility_functions.py +++ b/chainladder/utils/utility_functions.py @@ -265,6 +265,43 @@ def load_sample(key: str, *args, **kwargs) -> Triangle: def read_pickle(path): + """Load an object serialized with ``to_pickle`` (``dill`` format). + + Parameters + ---------- + path : str or path-like + Path to the pickle file. + + Returns + ------- + object + The deserialized triangle or estimator. + + Examples + -------- + + .. testsetup:: + + import tempfile + import os + + .. testcode:: + + import chainladder as cl + + tri = cl.load_sample("raa") + fd, p = tempfile.mkstemp(suffix=".pkl") + os.close(fd) + tri.to_pickle(p) + back = cl.read_pickle(p) + os.remove(p) + print(back == tri) + + .. testoutput:: + + True + + """ with open(path, "rb") as pkl: return dill.load(pkl) @@ -401,6 +438,26 @@ def read_csv( def read_json(json_str, array_backend=None): + """Deserialize JSON produced by ``to_json`` (triangle, estimator, or pipeline). + + Examples + -------- + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + dev = cl.Development(average="volume") + dev2 = cl.read_json(dev.to_json()) + print(dev2.get_params()["average"]) + + .. testoutput:: + + volume + + """ from chainladder import Triangle from chainladder.workflow import Pipeline @@ -596,6 +653,25 @@ def concat( Returns ------- Updated triangle + + Examples + -------- + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + clrd = cl.load_sample("clrd").groupby("LOB").sum().iloc[:2] + tri = clrd[["CumPaidLoss", "IncurLoss"]] + both = cl.concat([tri.iloc[:, 0:1], tri.iloc[:, 1:2]], axis=1) + print(both.shape[1]) + + .. testoutput:: + + 2 + """ if type(objs) not in (list, tuple): raise TypeError("objects to be concatenated must be in a list or tuple") @@ -700,10 +776,50 @@ def num_to_nan(arr: ArrayLike) -> ArrayLike: def minimum(x1, x2): + """Element-wise minimum of two triangles (delegates to ``Triangle.minimum``). + + Examples + -------- + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + tri = cl.load_sample("raa") + lo = cl.minimum(tri, tri * 0.5) + print(round(float(lo.values[0, 0, 0, 0]), 4)) + + .. testoutput:: + + 2506.0 + + """ return x1.minimum(x2) def maximum(x1, x2): + """Element-wise maximum of two triangles (delegates to ``Triangle.maximum``). + + Examples + -------- + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + tri = cl.load_sample("raa") + hi = cl.maximum(tri, tri * 0.5) + print(round(float(hi.values[0, 0, 0, 0]), 4)) + + .. testoutput:: + + 5012.0 + + """ return x1.maximum(x2) @@ -735,6 +851,77 @@ class PatsyFormula(BaseEstimator, TransformerMixin): design_info_: The patsy instructions for generating the design_matrix, X. + Examples + -------- + ``TweedieGLM`` passes ``design_matrix`` through ``PatsyFormula`` when + building its internal ``DevelopmentML`` pipeline. Adding ``C(origin)`` + expands the GLM and changes the fitted ``ldf_``. + + .. testsetup:: + + import chainladder as cl + + .. testcode:: + + genins = cl.load_sample("genins") + by_dev = cl.TweedieGLM(design_matrix="C(development)").fit(genins) + by_both = cl.TweedieGLM( + design_matrix="C(development) + C(origin)" + ).fit(genins) + print(len(by_dev.coef_)) + print(len(by_both.coef_)) + print(round(float(by_dev.ldf_.values[0, 0, 0, 0]), 6)) + print(round(float(by_both.ldf_.values[0, 0, 0, 0]), 6)) + + .. testoutput:: + + 10 + 19 + 3.508469 + 3.491031 + + The same formula strings are used explicitly as a pipeline step in + ``DevelopmentML``. + + .. testcode:: + + from sklearn.linear_model import LinearRegression + from sklearn.pipeline import Pipeline + from chainladder.utils.utility_functions import PatsyFormula + + genins = cl.load_sample("genins") + col = genins.columns[0] + dev_only = cl.DevelopmentML( + Pipeline( + [ + ("design_matrix", PatsyFormula("C(development)")), + ("model", LinearRegression(fit_intercept=False)), + ] + ), + y_ml=col, + fit_incrementals=False, + ).fit(genins) + with_origin = cl.DevelopmentML( + Pipeline( + [ + ( + "design_matrix", + PatsyFormula("C(development) + C(origin)"), + ), + ("model", LinearRegression(fit_intercept=False)), + ] + ), + y_ml=col, + fit_incrementals=False, + ).fit(genins) + print(len(dev_only.estimator_ml.named_steps.model.coef_)) + print(len(with_origin.estimator_ml.named_steps.model.coef_)) + + .. testoutput:: + + 10 + 19 + """ def __init__(self, formula=None): diff --git a/chainladder/workflow/gridsearch.py b/chainladder/workflow/gridsearch.py index 227a84fb..28d74cda 100644 --- a/chainladder/workflow/gridsearch.py +++ b/chainladder/workflow/gridsearch.py @@ -54,6 +54,42 @@ class GridSearch(BaseEstimator): results_: DataFrame A DataFrame with each param_grid key as a column and the ``scoring`` score as the last column + + Examples + -------- + Each row of ``results_`` is one ``ParameterGrid`` draw; changing + ``param_grid`` changes how many fits run and the reported scores. + + .. testsetup:: + + import chainladder as cl + import numpy as np + + .. testcode:: + + clrd = cl.load_sample("clrd") + medmal = clrd.groupby("LOB").sum().loc["medmal"]["CumPaidLoss"] + prem = clrd.groupby("LOB").sum().loc["medmal"]["EarnedPremDIR"].latest_diagonal + pipe = cl.Pipeline( + [("dev", cl.Development()), ("benk", cl.Benktander())] + ) + param_grid = {"benk__n_iters": [1, 4]} + scoring = { + "IBNR": lambda m: float(np.nansum(m.named_steps.benk.ibnr_.values)) + } + grid = cl.GridSearch( + pipe, param_grid, scoring=scoring, n_jobs=1 + ).fit(medmal, benk__sample_weight=prem) + print(len(grid.results_)) + print(int(round(grid.results_["IBNR"].iloc[0], 0))) + print(int(round(grid.results_["IBNR"].iloc[1], 0))) + + .. testoutput:: + + 2 + 1624377 + 1442665 + """ def __init__(self, estimator, param_grid, scoring, verbose=0, @@ -139,7 +175,44 @@ class Pipeline(PipelineSL, EstimatorIO): ---------- named_steps: bunch object, a dictionary with attribute access Read-only attribute to access any step parameter by user given name. - Keys are step names and values are steps parameters.""" + Keys are step names and values are steps parameters. + + Examples + -------- + Hyper-parameters are set with the ``step__param`` naming convention from + scikit-learn. Here ``Development`` averaging changes aggregate IBNR from + the same ``Chainladder`` final step. + + .. testsetup:: + + import chainladder as cl + import numpy as np + + .. testcode:: + + tri = cl.load_sample("raa") + pipe = cl.Pipeline( + [ + ("dev", cl.Development(average="simple")), + ("cl", cl.Chainladder()), + ] + ) + ib_simple = int( + round(float(np.nansum(pipe.fit_predict(tri).ibnr_.values)), 0) + ) + pipe.set_params(dev__average="volume") + ib_volume = int( + round(float(np.nansum(pipe.fit_predict(tri).ibnr_.values)), 0) + ) + print(ib_simple) + print(ib_volume) + + .. testoutput:: + + 93643 + 52135 + + """ def fit(self, X, y=None, sample_weight=None, **fit_params): if sample_weight: diff --git a/chainladder/workflow/voting.py b/chainladder/workflow/voting.py index b1bd8723..0035be99 100644 --- a/chainladder/workflow/voting.py +++ b/chainladder/workflow/voting.py @@ -239,6 +239,41 @@ class VotingChainladder(_BaseChainladderVoting, MethodBase): 1988 23106.943030 1989 20004.502125 1990 21605.832631 + + ``weights`` and ``default_weighting`` change how sub-model ultimates are + blended; skewing weights toward ``Chainladder`` pulls the ensemble away + from ``BornhuetterFerguson`` on late accident years. + + .. testcode:: + + import numpy as np + + raa = cl.load_sample("raa") + cl_ult = cl.Chainladder().fit(raa).ultimate_ + apriori = cl_ult * 0 + (float(cl_ult.sum()) / 10) + estimators = [ + ("bcl", cl.Chainladder()), + ("bf", cl.BornhuetterFerguson(apriori=1.0)), + ] + even = cl.VotingChainladder( + estimators=estimators, + weights=None, + default_weighting=(0.5, 0.5), + ).fit(raa, sample_weight=apriori) + w = np.ones((1, 1, raa.shape[2], 2)) + w[..., 0] = 0.9 + w[..., 1] = 0.1 + skewed = cl.VotingChainladder(estimators=estimators, weights=w).fit( + raa, sample_weight=apriori + ) + print(round(float(even.ultimate_.values[0, 0, -1, 0]), 2)) + print(round(float(skewed.ultimate_.values[0, 0, -1, 0]), 2)) + + .. testoutput:: + + 19694.23 + 18660.8 + """ @_deprecate_positional_args