Source code for apode.plots

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# This file is part of the
#   Apode Project (https://github.com/ngrion/apode).
# Copyright (c) 2020, Néstor Grión and Sofía Sappia
# License: MIT
#   Full Text: https://github.com/ngrion/apode/blob/master/LICENSE.txt

# =============================================================================
# DOCS
# =============================================================================

"""Plots for Apode."""

# =============================================================================
# IMPORTS
# =============================================================================
import attr

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd


# =============================================================================
# CONSTANTS
# =============================================================================
DEFAULT_HEIGHT = 4
DEFAULT_WIDTH = 5


# =============================================================================
# CLASSES
# =============================================================================


[docs]@attr.s(frozen=True)
class PlotAccessor:
    """Plots for Apode.

    The following plots are implemented:

    - hist : Histogram (default)
    - lorenz : Lorenz curve (relative, generalized, absolute)
    - pen : Pen Parade
    - tip : Tip curve

    Parameters
    ----------
    method : String
        Plot type.
    **kwargs
        Arbitrary keyword arguments.

    """

    idf = attr.ib()

    def __call__(self, method=None, **kwargs):
        """Return the ApodeData object."""
        method = "hist" if method is None else method
        method_func = getattr(self, method)
        return method_func(**kwargs)

    # ver n=0,1
    def _lorenz_data(self, alpha="r"):
        """Lorenz Curve data."""
        y = self.idf.data[self.idf.income_column].values
        y = np.sort(y)
        n = len(y)
        z = np.cumsum(y) / y.sum()
        q = np.arange(0, n + 1) / n
        qd = q
        if alpha == "r":
            pass
        elif alpha == "g":
            mu = np.mean(y)
            z = z * mu
            qd = q * mu
        elif alpha == "a":
            mu = np.mean(y)
            qd = q * 0
            z = np.cumsum(y - mu)
        z = np.insert(z, 0, 0)
        return pd.DataFrame({"population": q, "variable": z, "line": qd})

    # ver n=0,1
    def _pen_data(self, pline=None):
        """Pen Parade Curve data."""
        y = self.idf.data[self.idf.income_column].values
        y = np.sort(y)
        n = len(y)
        me = np.median(y)
        q = np.arange(0, n + 1) / n
        mu = np.mean(y)
        qd = np.ones(n + 1) * mu / me
        z = np.copy(y) / me
        z = np.insert(z, 0, 0)
        return pd.DataFrame({"population": q, "variable": z, "line": qd}), me

    # ver n=0,1
    def _tip_data(self, pline):
        """TIP Curve data."""
        if pline < 0:
            raise ValueError(f"'pline' must be >= 0. Found '{pline}'")
        y = self.idf.data[self.idf.income_column].values
        ys = np.sort(y)
        n = len(ys)
        q = sum(ys < pline)
        ygap = np.zeros(n)
        ygap[0:q] = (pline - ys[0:q]) / pline

        z = np.cumsum(ygap) / n
        z = np.insert(z, 0, 0)
        p = np.arange(0, n + 1) / n
        return pd.DataFrame({"population": p, "variable": z})

[docs]    def lorenz(self, alpha="r", ax=None, **kwargs):
        """Lorenz Curve.

        A Lorenz curve is a graphical representation of the distribution
        of income or wealth within a population. Lorenz curves graph
        percentiles of the population against cumulative income or wealth
        of people at or below that percentile. [13]_

        Parameters
        ----------
        alpha: string, optional(default='r')
            Options are r: relative, 'g': generalized, 'a': absolut.
        ax: axes object, optional

        Return
        ------
        out: plot
            Matplotlib plot

        References
        ----------
        .. [13] Lorenz, M. O. (1905). Methods for measuring concentration
           of wealth. Journal of the American Statistical Association
           9, 209-219.

        """
        df = self._lorenz_data(alpha)
        q = df.population
        z = df.variable
        qd = df.line
        if ax is None:
            ax = plt.gca()
            fig = plt.gcf()
            fig.set_size_inches(h=DEFAULT_HEIGHT, w=DEFAULT_WIDTH)
        ax.plot(q, z, **kwargs)
        ax.plot(q, qd, **kwargs)
        ax.set_xlabel("Cumulative % of population")
        if alpha == "r":
            ax.set_ylabel("Cumulative % of variable")
            ax.set_title("Lorenz Curve")
        elif alpha == "g":
            ax.set_ylabel("Scaled Cumulative % of variable")
            ax.set_title("Generalized Lorenz Curve")
        elif alpha == "a":
            ax.set_ylabel("Cumulative deviation")
            ax.set_title("Absolut Lorenz Curve")
        else:
            raise ValueError(
                f"'alpha' must be either 'r', 'g' or 'a'. Found '{alpha}'"
            )
        return ax

[docs]    def pen(self, pline=None, ax=None, **kwargs):
        """Pen Parade Curve.

        Pen's Parade or The Income Parade is a concept described in a 1971 book
        published by Dutch economist Jan Pen describing income distribution.
        The parade is defined as a succession of every person in the economy,
        with their height proportional to their income, and ordered from lowest
        to greatest. [14]_

        Parameters
        ----------
        pline: float, optional
        ax: axes object, optional

        Return
        ------
        out: plot
            Matplotlib plot

        References
        ----------
        .. [14] Pen, J. (1971). Income Distribution. London: Allen Lane,
           The Penguin Press.

        """
        df, me = self._pen_data(pline=None)
        q = df.population
        z = df.variable
        qd = df.line
        if ax is None:
            ax = plt.gca()
            fig = plt.gcf()
            fig.set_size_inches(h=DEFAULT_HEIGHT, w=DEFAULT_WIDTH)
        ax.plot(q, z, **kwargs)
        ax.plot(q, qd, label="Mean", **kwargs)
        if not (pline is None):
            qpl = np.ones(len(z)) * pline / me
            ax.plot(q, qpl, label="Poverty line")
        ax.set_xlabel("Cumulative % of population")
        ax.set_ylabel("Medianized variable")
        ax.set_title("Pen's Parade")
        ax.legend()
        return ax

[docs]    def tip(self, pline, ax=None, **kwargs):
        """TIP Curve.

        Three 'I's of Poverty (TIP) curves, based on distributions
        of poverty gaps, provide evocative graphical summaries of
        the incidence, intensity, and inequality dimensions of
        poverty, and a means for checking for unanimous poverty
        orderings according to a wide class of poverty indices. [15]_

        Parameters
        ----------
        pline: float, optional
        ax: axes object, optional

        Return
        ------
        out: plot
            Matplotlib plot

        References
        ----------
        .. [15] Jenkins S. P., Lambert P., 1997. Three “I’s of Poverty”
           Curves, with an Analysis of UK Poverty Trends, Oxford
           Economic Papers, 49, pp. 317-327.

        """
        df = self._tip_data(pline)
        p = df.population
        z = df.variable
        if ax is None:
            ax = plt.gca()
            fig = plt.gcf()
            fig.set_size_inches(h=DEFAULT_HEIGHT, w=DEFAULT_WIDTH)
        ax.plot(p, z, **kwargs)
        ax.set_title("TIP Curve")
        ax.set_ylabel("Cumulated poverty gaps")
        ax.set_xlabel("Cumulative % of population")
        return ax

    def __getattr__(self, aname):
        """Apply Plot method."""
        return getattr(self.idf.data.plot, aname)