Source code for pastas.stats.sgi

"""This module contains methods to compute the Standardized Groundwater Index."""

from numpy import array, linspace
from pandas import DataFrame, Series
from scipy.stats import norm


[docs]def sgi(series: Series, timescale_months: int = 1) -> Series: """Method to compute the Standardized Groundwater Index (SGI) :cite:t:`bloomfield_analysis_2013`. Parameters ---------- series: pandas.Series or Pandas.DataFrame Pandas time series of the groundwater levels for which the SGI is to be determined timescale_months: integer, optional Length of the aggregation period in months (default: 1; allowed: 1, 2, 3) Returns ------- sgi_series: pandas.Series or Pandas.DataFrame Pandas time series of the groundwater levels. Time series index should be a pandas DatetimeIndex. Notes ----- The Standardized Groundwater Index (SGI) is a non-parametric method to standardize groundwater levels. The SGI is calculated for each aggregation period within the year separately. The data within that period in all years in the series are used to determine the reference for which the index is calculated for each value in that period. The SGI is a dimensionless index and is used to compare groundwater levels across different wells. It may be useful to resample the time series to a monthly interval before computing the SGI. """ if timescale_months not in (1, 2, 3): raise ValueError( "SGI can only be called with timescale_months = 1, 2, or 3; not" + str(timescale_months) ) if isinstance(series, DataFrame): series = series.apply(sgi, timescale_months=timescale_months) elif isinstance(series, Series): # Create a copy to ensure series is untouched. # Set dtype to avoid conflict when assigning SGI values series = series.copy().dropna().astype(float) # Loop over the months for month in range(1, 13, timescale_months): sel = array(range(timescale_months)) + month data = series[series.index.month.isin(sel)] n = data.size # Number of observations pmin = 1 / (2 * n) pmax = 1 - pmin sgi_values = norm.ppf(linspace(pmin, pmax, n)) series.loc[data.sort_values().index] = sgi_values return series