{ "cells": [ { "cell_type": "markdown", "id": "d9de8716", "metadata": { "colab_type": "text", "id": "view-in-github" }, "source": [ "# Gamma-gamma Model\n", "\n", "In this notebook we show how to fit a Gamma-Gamma model in PyMC-Marketing. We compare the results with the [`lifetimes`](https://github.com/CamDavidsonPilon/lifetimes) package (no longer maintained and last meaningful update was July 2020). The model is presented in the paper: Fader, P. S., & Hardie, B. G. (2013). [The Gamma-Gamma model of monetary value](http://www.brucehardie.com/notes/025/gamma_gamma.pdf). February, 2, 1-9." ] }, { "cell_type": "markdown", "id": "a579696d", "metadata": {}, "source": [ "## Prepare Notebook" ] }, { "cell_type": "code", "execution_count": 1, "id": "813aa3e6", "metadata": {}, "outputs": [], "source": [ "import arviz as az\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "from lifetimes import GammaGammaFitter\n", "\n", "from pymc_marketing import clv\n", "\n", "# Plotting configuration\n", "az.style.use(\"arviz-darkgrid\")\n", "plt.rcParams[\"figure.figsize\"] = [10, 6]\n", "plt.rcParams[\"figure.dpi\"] = 100\n", "plt.rcParams[\"figure.facecolor\"] = \"white\"\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "%config InlineBackend.figure_format = \"retina\"" ] }, { "cell_type": "markdown", "id": "b4e9df33", "metadata": {}, "source": [ "## Load Data\n", "\n", "We start by loading the `CDNOW` dataset." ] }, { "cell_type": "code", "execution_count": 2, "id": "4039ce96", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | frequency | \n", "recency | \n", "T | \n", "monetary_value | \n", "customer_id | \n", "
|---|---|---|---|---|---|
| 0 | \n", "2 | \n", "30.43 | \n", "38.86 | \n", "22.35 | \n", "0 | \n", "
| 1 | \n", "1 | \n", "1.71 | \n", "38.86 | \n", "11.77 | \n", "1 | \n", "
| 2 | \n", "0 | \n", "0.00 | \n", "38.86 | \n", "0.00 | \n", "2 | \n", "
| 3 | \n", "0 | \n", "0.00 | \n", "38.86 | \n", "0.00 | \n", "3 | \n", "
| 4 | \n", "0 | \n", "0.00 | \n", "38.86 | \n", "0.00 | \n", "4 | \n", "
| \n", " | frequency | \n", "recency | \n", "T | \n", "monetary_value | \n", "customer_id | \n", "
|---|---|---|---|---|---|
| 0 | \n", "2 | \n", "30.43 | \n", "38.86 | \n", "22.35 | \n", "0 | \n", "
| 1 | \n", "1 | \n", "1.71 | \n", "38.86 | \n", "11.77 | \n", "1 | \n", "
| 5 | \n", "7 | \n", "29.43 | \n", "38.86 | \n", "73.74 | \n", "5 | \n", "
| 6 | \n", "1 | \n", "5.00 | \n", "38.86 | \n", "11.77 | \n", "6 | \n", "
| 8 | \n", "2 | \n", "35.71 | \n", "38.86 | \n", "25.55 | \n", "8 | \n", "
| \n", " | monetary_value | \n", "frequency | \n", "
|---|---|---|
| monetary_value | \n", "1.000000 | \n", "0.113884 | \n", "
| frequency | \n", "0.113884 | \n", "1.000000 | \n", "
| \n", " | coef | \n", "se(coef) | \n", "lower 95% bound | \n", "upper 95% bound | \n", "
|---|---|---|---|---|
| p | \n", "6.248802 | \n", "1.189687 | \n", "3.917016 | \n", "8.580589 | \n", "
| q | \n", "3.744588 | \n", "0.290166 | \n", "3.175864 | \n", "4.313313 | \n", "
| v | \n", "15.447748 | \n", "4.159994 | \n", "7.294160 | \n", "23.601336 | \n", "
\n",
"\n"
],
"text/plain": [
"\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n"
],
"text/plain": []
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n"
],
"text/plain": [
"\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"idata_map = model.fit(method=\"map\").posterior.to_dataframe()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "b8f11643",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"| \n", " | \n", " | p | \n", "q | \n", "v | \n", "
|---|---|---|---|---|
| chain | \n", "draw | \n", "\n", " | \n", " | \n", " |
| 0 | \n", "0 | \n", "6.248787 | \n", "3.744591 | \n", "15.447813 | \n", "
\n",
"\n"
],
"text/plain": [
"\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 449 seconds.\n"
]
}
],
"source": [
"sampler_kwargs = {\n",
" \"draws\": 2_000,\n",
" \"target_accept\": 0.9,\n",
" \"chains\": 4,\n",
" \"random_seed\": 42,\n",
"}\n",
"\n",
"idata_mcmc = model.fit(**sampler_kwargs)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "52c3b00e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <xarray.Dataset> Size: 208kB\n",
"Dimensions: (chain: 4, draw: 2000)\n",
"Coordinates:\n",
" * chain (chain) int64 32B 0 1 2 3\n",
" * draw (draw) int64 16kB 0 1 2 3 4 5 6 ... 1994 1995 1996 1997 1998 1999\n",
"Data variables:\n",
" p (chain, draw) float64 64kB 4.713 4.829 7.353 ... 5.578 6.317 5.654\n",
" q (chain, draw) float64 64kB 4.289 4.288 3.402 ... 4.176 3.863 3.836\n",
" v (chain, draw) float64 64kB 24.09 23.53 11.76 ... 20.22 16.49 17.34\n",
"Attributes:\n",
" created_at: 2024-07-01T15:48:01.851699\n",
" arviz_version: 0.17.1\n",
" inference_library: pymc\n",
" inference_library_version: 5.14.0\n",
" sampling_time: 448.93818640708923\n",
" tuning_steps: 1000<xarray.Dataset> Size: 992kB\n",
"Dimensions: (chain: 4, draw: 2000)\n",
"Coordinates:\n",
" * chain (chain) int64 32B 0 1 2 3\n",
" * draw (draw) int64 16kB 0 1 2 3 4 ... 1996 1997 1998 1999\n",
"Data variables: (12/17)\n",
" n_steps (chain, draw) float64 64kB 47.0 11.0 ... 35.0 23.0\n",
" acceptance_rate (chain, draw) float64 64kB 0.9899 0.9385 ... 1.0\n",
" reached_max_treedepth (chain, draw) bool 8kB False False ... False False\n",
" lp (chain, draw) float64 64kB -4.051e+03 ... -4.051e+03\n",
" process_time_diff (chain, draw) float64 64kB 0.02783 ... 0.03698\n",
" diverging (chain, draw) bool 8kB False False ... False False\n",
" ... ...\n",
" tree_depth (chain, draw) int64 64kB 6 4 6 6 5 2 ... 3 5 5 2 6 5\n",
" energy (chain, draw) float64 64kB 4.052e+03 ... 4.052e+03\n",
" perf_counter_start (chain, draw) float64 64kB 7.433e+04 ... 7.471e+04\n",
" index_in_trajectory (chain, draw) int64 64kB -19 5 -50 10 ... -27 1 16 -9\n",
" step_size_bar (chain, draw) float64 64kB 0.05982 ... 0.06508\n",
" perf_counter_diff (chain, draw) float64 64kB 0.04164 0.00677 ... 0.0375\n",
"Attributes:\n",
" created_at: 2024-07-01T15:48:01.937737\n",
" arviz_version: 0.17.1\n",
" inference_library: pymc\n",
" inference_library_version: 5.14.0\n",
" sampling_time: 448.93818640708923\n",
" tuning_steps: 1000<xarray.Dataset> Size: 45kB\n",
"Dimensions: (index: 946)\n",
"Coordinates:\n",
" * index (index) int64 8kB 0 1 5 6 8 10 ... 2347 2348 2349 2353 2355\n",
"Data variables:\n",
" frequency (index) int64 8kB 2 1 7 1 2 5 10 1 3 2 ... 1 2 1 2 7 1 2 5 4\n",
" recency (index) float64 8kB 30.43 1.71 29.43 ... 21.86 24.29 26.57\n",
" T (index) float64 8kB 38.86 38.86 38.86 ... 27.0 27.0 27.0\n",
" monetary_value (index) float64 8kB 22.35 11.77 73.74 ... 18.56 44.93 33.32\n",
" customer_id (index) int64 8kB 0 1 5 6 8 10 ... 2347 2348 2349 2353 2355| \n", " | mean | \n", "sd | \n", "hdi_3% | \n", "hdi_97% | \n", "mcse_mean | \n", "mcse_sd | \n", "ess_bulk | \n", "ess_tail | \n", "r_hat | \n", "
|---|---|---|---|---|---|---|---|---|---|
| p | \n", "6.403 | \n", "1.303 | \n", "4.217 | \n", "8.710 | \n", "0.032 | \n", "0.023 | \n", "1686.0 | \n", "1783.0 | \n", "1.0 | \n", "
| q | \n", "3.781 | \n", "0.290 | \n", "3.238 | \n", "4.338 | \n", "0.007 | \n", "0.005 | \n", "1753.0 | \n", "2187.0 | \n", "1.0 | \n", "
| v | \n", "16.051 | \n", "4.243 | \n", "8.050 | \n", "23.907 | \n", "0.107 | \n", "0.076 | \n", "1563.0 | \n", "1746.0 | \n", "1.0 | \n", "
| \n", " | mean | \n", "sd | \n", "hdi_3% | \n", "hdi_97% | \n", "
|---|---|---|---|---|
| x[0] | \n", "24.706 | \n", "0.512 | \n", "23.839 | \n", "25.762 | \n", "
| x[1] | \n", "18.994 | \n", "1.311 | \n", "16.641 | \n", "21.547 | \n", "
| x[2] | \n", "35.195 | \n", "0.924 | \n", "33.447 | \n", "36.887 | \n", "
| x[3] | \n", "35.195 | \n", "0.924 | \n", "33.447 | \n", "36.887 | \n", "
| x[4] | \n", "35.195 | \n", "0.924 | \n", "33.447 | \n", "36.887 | \n", "
| x[5] | \n", "71.387 | \n", "0.599 | \n", "70.165 | \n", "72.409 | \n", "
| x[6] | \n", "18.994 | \n", "1.311 | \n", "16.641 | \n", "21.547 | \n", "
| x[7] | \n", "35.195 | \n", "0.924 | \n", "33.447 | \n", "36.887 | \n", "
| x[8] | \n", "27.318 | \n", "0.394 | \n", "26.627 | \n", "28.107 | \n", "
| x[9] | \n", "35.195 | \n", "0.924 | \n", "33.447 | \n", "36.887 | \n", "
| \n", " | mean | \n", "sd | \n", "hdi_3% | \n", "hdi_97% | \n", "
|---|---|---|---|---|
| x | \n", "35.268 | \n", "0.629 | \n", "34.107 | \n", "36.442 | \n", "