Methods

Each forecast class has the predict method. This is the same parameterization for each of the forecast classes.

`tablespoon.forecasters.Mean.predict(df_historical, horizon=30, frequency=None, uncertainty_samples=5000, include_history=False)`

Predict - forecast method

Parameters:

Name	Type	Description	Default
`df_historical`	`pd.DataFrame`	A date sorted dataframe with the columns `ds` and `y`	required
`horizon`	`int`	Forecast horizon. Defaults to 30.	`30`
`frequency`	`int`	number of rows that make a seasonal period. Defaults to None.	`None`
`lag`	`int`	number of rows that make a seasonal period. Defaults to 1.	required
`uncertainty_samples`	`int`	number of uncertainty samples to draw. Defaults to 5000.	`5000`
`include_history`	`bool`	include history. Defaults to False.	`False`
`chain_ids`	`str`	identifiers for chain ids. Defaults to None.	required
`verbose`	`bool`	verbose. Defaults to False.	required

Returns:

Type	Description
	pd.DataFrame: A dataframe of predictions as `y_sim`

Example

import pandas as pd
import tablespoon as tbsp
from tablespoon.data import APPL
df_APPLE = APPL
df_APPLE = df_APPLE.assign(ds = lambda df: pd.to_datetime(df.ds))
mean = tbsp.Mean()
df_f = (n.predict(df_APPLE, horizon=7*4, frequency="D", lag = 1, uncertainty_samples = 500).assign(model = 'mean'))
df_f.head(10)

Source code in tablespoon/forecasters.py

def predict(
    self,
    df_historical,
    horizon=30,
    frequency=None,
    uncertainty_samples=5000,
    include_history=False,
):
    """Predict - forecast method

    Args:
        df_historical (pd.DataFrame): A date sorted dataframe with the columns `ds` and `y`
        horizon (int, optional): Forecast horizon. Defaults to 30.
        frequency (int, optional): number of rows that make a seasonal period. Defaults to None.
        lag (int, optional): number of rows that make a seasonal period. Defaults to 1.
        uncertainty_samples (int, optional): number of uncertainty samples to draw. Defaults to 5000.
        include_history (bool, optional): include history. Defaults to False.
        chain_ids (str, optional): identifiers for chain ids. Defaults to None.
        verbose (bool, optional): verbose. Defaults to False.

    Returns:
        pd.DataFrame: A dataframe of predictions as `y_sim`

    Example:
        ```py
        import pandas as pd
        import tablespoon as tbsp
        from tablespoon.data import APPL
        df_APPLE = APPL
        df_APPLE = df_APPLE.assign(ds = lambda df: pd.to_datetime(df.ds))
        mean = tbsp.Mean()
        df_f = (n.predict(df_APPLE, horizon=7*4, frequency="D", lag = 1, uncertainty_samples = 500).assign(model = 'mean'))
        df_f.head(10)
        ```
    """
    self.y = df_historical["y"]
    self.history_dates = get_sorted_dates(df_historical)
    last_date = self.history_dates.max()
    min_date = self.history_dates.min()
    check_historical_dates_are_contiguous(
        self.history_dates, min_date, last_date, frequency
    )
    dates = pd.date_range(
        start=last_date, periods=horizon + 1, freq=frequency
    )  # An extra in case we include start  # 'M','D', etc.
    dates = dates[dates > last_date]  # Drop start if equals last_date
    dates = dates[:horizon]  # Return correct number of periods
    if include_history:
        dates = np.concatenate((np.array(self.history_dates), dates))
    df_dates = pd.DataFrame({"ds": dates})
    df_samples = pd.DataFrame({"rep": np.arange(uncertainty_samples)})
    df_cross = df_dates.merge(df_samples, how="cross")
    # fit
    y = self.y
    T = len(y)
    deg_freedom = T - 1
    mu, sigma = norm.fit(y)
    rng = np.random.default_rng()
    forecast = np.empty([uncertainty_samples, horizon])
    for h in range(0, horizon):
        forecast[:, h] = mu + sigma * np.sqrt(1 + (1 / T)) * rng.standard_t(
            df=deg_freedom, size=uncertainty_samples
        )
    np_predictions = forecast.transpose().reshape(
        uncertainty_samples * horizon, 1
    )
    df_pred = pd.DataFrame(np_predictions, columns=["y_sim"])
    df_result = pd.concat([df_cross, df_pred], axis=1)
    return df_result

`tablespoon.forecasters.Naive.predict(df_historical, horizon=30, frequency=None, lag=1, uncertainty_samples=5000, include_history=False)`

Predict - forecast method

Parameters:

Name	Type	Description	Default
`df_historical`	`pd.DataFrame`	A date sorted dataframe with the columns `ds` and `y`	required
`horizon`	`int`	Forecast horizon. Defaults to 30.	`30`
`frequency`	`int`	number of rows that make a seasonal period. Defaults to None.	`None`
`lag`	`int`	number of rows that make a seasonal period. Defaults to 1.	`1`
`uncertainty_samples`	`int`	number of uncertainty samples to draw. Defaults to 5000.	`5000`
`include_history`	`bool`	include history. Defaults to False.	`False`
`chain_ids`	`str`	identifiers for chain ids. Defaults to None.	required
`verbose`	`bool`	verbose. Defaults to False.	required

Returns:

Type	Description
	pd.DataFrame: A dataframe of predictions as `y_sim`

Example

import pandas as pd
import tablespoon as tbsp
from tablespoon.data import APPL
df_APPLE = APPL
df_APPLE = df_APPLE.assign(ds = lambda df: pd.to_datetime(df.ds))
naive = tbsp.Naive()
df_f = (naive.predict(df_APPLE, horizon=7*4, frequency="D", lag = 1, uncertainty_samples = 500).assign(model = 'naive'))
df_f.head(10)

Source code in tablespoon/forecasters.py

def predict(
    self,
    df_historical,
    horizon=30,
    frequency=None,
    lag=1,
    uncertainty_samples=5000,
    include_history=False,
):
    """Predict - forecast method

    Args:
        df_historical (pd.DataFrame): A date sorted dataframe with the columns `ds` and `y`
        horizon (int, optional): Forecast horizon. Defaults to 30.
        frequency (int, optional): number of rows that make a seasonal period. Defaults to None.
        lag (int, optional): number of rows that make a seasonal period. Defaults to 1.
        uncertainty_samples (int, optional): number of uncertainty samples to draw. Defaults to 5000.
        include_history (bool, optional): include history. Defaults to False.
        chain_ids (str, optional): identifiers for chain ids. Defaults to None.
        verbose (bool, optional): verbose. Defaults to False.

    Returns:
        pd.DataFrame: A dataframe of predictions as `y_sim`

    Example:
        ```py
        import pandas as pd
        import tablespoon as tbsp
        from tablespoon.data import APPL
        df_APPLE = APPL
        df_APPLE = df_APPLE.assign(ds = lambda df: pd.to_datetime(df.ds))
        naive = tbsp.Naive()
        df_f = (naive.predict(df_APPLE, horizon=7*4, frequency="D", lag = 1, uncertainty_samples = 500).assign(model = 'naive'))
        df_f.head(10)
        ```
    """
    if frequency is None:
        send_helpful_frequency_error()
    self.y = df_historical["y"]
    self.history_dates = get_sorted_dates(df_historical)
    last_date = self.history_dates.max()
    min_date = self.history_dates.min()
    check_historical_dates_are_contiguous(
        self.history_dates, min_date, last_date, frequency
    )
    dates = pd.date_range(
        start=last_date, periods=horizon + 1, freq=frequency
    )  # An extra in case we include start  # 'M','D', etc.
    dates = dates[dates > last_date]  # Drop start if equals last_date
    dates = dates[:horizon]  # Return correct number of periods
    if include_history:
        dates = np.concatenate((np.array(self.history_dates), dates))
    df_dates = pd.DataFrame({"ds": dates})
    df_samples = pd.DataFrame({"rep": np.arange(uncertainty_samples)})
    df_cross = df_dates.merge(df_samples, how="cross")
    # fit
    t = lag + 1
    t_lag = t - lag
    y = self.y.to_numpy()
    end = len(y) - lag
    yt = y[t:]
    yt_lag = y[t_lag:end]
    y_last = y.take(-1)
    mod = sm.GLM(yt, yt_lag, family=sm.families.Gaussian())
    sigma = np.sqrt(mod.fit().scale)
    rng = np.random.default_rng()
    forecast = np.empty([uncertainty_samples, horizon])
    for h in range(0, horizon):
        forecast[:, h] = y_last + sigma * np.sqrt(h + 1) * rng.standard_normal(
            uncertainty_samples
        )
    np_predictions = forecast.transpose().reshape(
        uncertainty_samples * horizon, 1
    )
    df_pred = pd.DataFrame(np_predictions, columns=["y_sim"])
    df_result = pd.concat([df_cross, df_pred], axis=1)
    return df_result

`tablespoon.forecasters.Snaive.predict(df_historical, horizon=30, frequency=None, lag=7, uncertainty_samples=5000, include_history=False)`

Predict - forecast method

Parameters:

Name	Type	Description	Default
`df_historical`	`pd.DataFrame`	A date sorted dataframe with the columns `ds` and `y`	required
`horizon`	`int`	Forecast horizon. Defaults to 30.	`30`
`frequency`	`int`	number of rows that make a seasonal period. Defaults to None.	`None`
`lag`	`int`	number of rows that make a seasonal period. Defaults to 7 (7 days of a week).	`7`
`uncertainty_samples`	`int`	number of uncertainty samples to draw. Defaults to 5000.	`5000`
`include_history`	`bool`	include history. Defaults to False.	`False`
`chain_ids`	`str`	identifiers for chain ids. Defaults to None.	required
`verbose`	`bool`	verbose. Defaults to False.	required

Returns:

Type	Description
	pd.DataFrame: A dataframe of predictions as `y_sim`

Example

import tablespoon as tbsp
from tablespoon.data import SEAS
sn = tbsp.Snaive()
df_f = sn.predict(SEAS, horizon=7 * 4, frequency="D", lag=7, uncertainty_samples=800).assign(model="snaive")
df_f.head(10)

Source code in tablespoon/forecasters.py

def predict(
    self,
    df_historical,
    horizon=30,
    frequency=None,
    lag=7,
    uncertainty_samples=5000,
    include_history=False,
):
    """Predict - forecast method

    Args:
        df_historical (pd.DataFrame): A date sorted dataframe with the columns `ds` and `y`
        horizon (int, optional): Forecast horizon. Defaults to 30.
        frequency (int, optional): number of rows that make a seasonal period. Defaults to None.
        lag (int, optional): number of rows that make a seasonal period. Defaults to 7 (7 days of a week).
        uncertainty_samples (int, optional): number of uncertainty samples to draw. Defaults to 5000.
        include_history (bool, optional): include history. Defaults to False.
        chain_ids (str, optional): identifiers for chain ids. Defaults to None.
        verbose (bool, optional): verbose. Defaults to False.

    Returns:
        pd.DataFrame: A dataframe of predictions as `y_sim`

    Example:
        ```py
        import tablespoon as tbsp
        from tablespoon.data import SEAS
        sn = tbsp.Snaive()
        df_f = sn.predict(SEAS, horizon=7 * 4, frequency="D", lag=7, uncertainty_samples=800).assign(model="snaive")
        df_f.head(10)
        ```
    """
    self.y = df_historical["y"]
    self.history_dates = get_sorted_dates(df_historical)
    last_date = self.history_dates.max()
    min_date = self.history_dates.min()
    check_historical_dates_are_contiguous(
        self.history_dates, min_date, last_date, frequency
    )
    dates = pd.date_range(
        start=last_date, periods=horizon + 1, freq=frequency
    )  # An extra in case we include start  # 'M','D', etc.
    dates = dates[dates > last_date]  # Drop start if equals last_date
    dates = dates[:horizon]  # Return correct number of periods
    if include_history:
        dates = np.concatenate((np.array(self.history_dates), dates))
    df_dates = pd.DataFrame({"ds": dates})
    df_samples = pd.DataFrame({"rep": np.arange(uncertainty_samples)})
    df_cross = df_dates.merge(df_samples, how="cross")
    # fit
    y = self.y.to_numpy()
    last_start = len(y) - lag
    last_end = len(y)
    yt = y[lag:last_end]
    yt_lag = y[0:last_start]
    mod = sm.GLM(yt, yt_lag, family=sm.families.Gaussian())
    sigma = np.sqrt(mod.fit().scale)
    rng = np.random.default_rng()
    forecast = np.empty([uncertainty_samples, horizon])
    for h in range(0, horizon):
        forecast[:, h] = y[(len(y)) - (lag - ((h) % lag))] + sigma * np.sqrt(np.trunc(((h) * 1) / (lag)) + 1) * rng.standard_normal(uncertainty_samples)
    np_predictions = forecast.transpose().reshape(
        uncertainty_samples * horizon, 1
    )
    df_pred = pd.DataFrame(np_predictions, columns=["y_sim"])
    df_result = pd.concat([df_cross, df_pred], axis=1)
    return df_result

CV Class

`tablespoon.model_selection.TimeSeriesInitialSplit`

Time Series cross-validator with initial period

Provides time series splits for rolling origin type cross validation. This means users may set an initial time period. gap size, and increment_size.

Parameters:

Name	Description	Default
`initial`	int, default=21 Number of splits.	`7 * 3`
`increment_size`	int, default=7 Sets the size of the test set to be added at each iteration	`7`
`gap`	int, default=0 Number of samples to exclude from the end of each train set before the test set.	`0`

Examples:

import numpy as np
from tablespoon.model_selection import TimeSeriesInitialSplit
X = np.arange(0,50)
tscv = TimeSeriesInitialSplit()
for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]

> TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20] TEST: [21 22 23 24 25 26 27]
> TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27] TEST: [28 29 30 31 32 33 34]
> TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34] TEST: [35 36 37 38 39 40 41]
> TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41] TEST: [42 43 44 45 46 47 48]

Source code in tablespoon/model_selection.py

class TimeSeriesInitialSplit():
    """Time Series cross-validator with initial period

    Provides time series splits for rolling origin type 
    cross validation. This means users may set an initial
    time period. gap size, and increment_size.

    Parameters:
        initial : int, default=21
            Number of splits.
        increment_size : int, default=7
            Sets the size of the test set to be added at each iteration
        gap : int, default=0
            Number of samples to exclude from the end of each train set before
            the test set.
    Examples:
    ```py
    import numpy as np
    from tablespoon.model_selection import TimeSeriesInitialSplit
    X = np.arange(0,50)
    tscv = TimeSeriesInitialSplit()
    for train_index, test_index in tscv.split(X):
        print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X[train_index], X[test_index]

    > TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20] TEST: [21 22 23 24 25 26 27]
    > TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27] TEST: [28 29 30 31 32 33 34]
    > TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34] TEST: [35 36 37 38 39 40 41]
    > TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41] TEST: [42 43 44 45 46 47 48]
    ```
    """

    def __init__(self, initial=7 * 3, increment_size=7, gap=0):
        self.initial = initial
        self.increment_size = increment_size
        self.gap = gap

    def split(self, X, y=None, groups=None):
        """Generate indices to split data into training and test set.
        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.
        y : array-like of shape (n_samples,)
            Always ignored, exists for compatibility.
        groups : array-like of shape (n_samples,)
            Always ignored, exists for compatibility.
        Yields
        ------
        train : ndarray
            The training set indices for that split.
        test : ndarray
            The testing set indices for that split.
        """
        n_samples = len(X)
        initial = self.initial
        gap = self.gap
        increment_size = self.increment_size

        # Make sure we have enough samples for the given split parameters
        if initial > n_samples:
            raise ValueError(
                f"Cannot have number of initial_size={initial} greater"
                f" than the number of samples={n_samples}."
            )
        if n_samples - initial - increment_size - gap < 0:
            raise ValueError(
                f"Size of initial + increment_size + gap too large given sample"
                f"={n_samples} with initial={initial} increment_size={increment_size} and gap={gap}."
            )

        indices = arange(n_samples)
        test_starts = range(initial, n_samples, increment_size)
        for test_start in test_starts:
            test = indices[test_start + gap: test_start + increment_size + gap]
            if len(test) < increment_size:
                # break if the test set is smaller than a complete increment_size
                break
            else:
                yield (
                    indices[:test_start],
                    indices[test_start + gap: test_start + increment_size + gap],
                )

Forecaster Classes

`tablespoon.forecasters.Naive`

Bases: object

Naive Forecaster

Parameters:

Name	Type	Description	Default
`object`	`None`	instantiates a Naive Forecast object	required

Source code in tablespoon/forecasters.py

class Naive(object):
    """Naive Forecaster

    Args:
        object (None): instantiates a Naive Forecast object
    """

    def __init__(
        self,
        history_dates=None,
        include_history=False,
        y=None,
    ):
        self.include_history = include_history
        self.history_dates = history_dates

    def predict(
        self,
        df_historical,
        horizon=30,
        frequency=None,
        lag=1,
        uncertainty_samples=5000,
        include_history=False,
    ):
        """Predict - forecast method

        Args:
            df_historical (pd.DataFrame): A date sorted dataframe with the columns `ds` and `y`
            horizon (int, optional): Forecast horizon. Defaults to 30.
            frequency (int, optional): number of rows that make a seasonal period. Defaults to None.
            lag (int, optional): number of rows that make a seasonal period. Defaults to 1.
            uncertainty_samples (int, optional): number of uncertainty samples to draw. Defaults to 5000.
            include_history (bool, optional): include history. Defaults to False.
            chain_ids (str, optional): identifiers for chain ids. Defaults to None.
            verbose (bool, optional): verbose. Defaults to False.

        Returns:
            pd.DataFrame: A dataframe of predictions as `y_sim`

        Example:
            ```py
            import pandas as pd
            import tablespoon as tbsp
            from tablespoon.data import APPL
            df_APPLE = APPL
            df_APPLE = df_APPLE.assign(ds = lambda df: pd.to_datetime(df.ds))
            naive = tbsp.Naive()
            df_f = (naive.predict(df_APPLE, horizon=7*4, frequency="D", lag = 1, uncertainty_samples = 500).assign(model = 'naive'))
            df_f.head(10)
            ```
        """
        if frequency is None:
            send_helpful_frequency_error()
        self.y = df_historical["y"]
        self.history_dates = get_sorted_dates(df_historical)
        last_date = self.history_dates.max()
        min_date = self.history_dates.min()
        check_historical_dates_are_contiguous(
            self.history_dates, min_date, last_date, frequency
        )
        dates = pd.date_range(
            start=last_date, periods=horizon + 1, freq=frequency
        )  # An extra in case we include start  # 'M','D', etc.
        dates = dates[dates > last_date]  # Drop start if equals last_date
        dates = dates[:horizon]  # Return correct number of periods
        if include_history:
            dates = np.concatenate((np.array(self.history_dates), dates))
        df_dates = pd.DataFrame({"ds": dates})
        df_samples = pd.DataFrame({"rep": np.arange(uncertainty_samples)})
        df_cross = df_dates.merge(df_samples, how="cross")
        # fit
        t = lag + 1
        t_lag = t - lag
        y = self.y.to_numpy()
        end = len(y) - lag
        yt = y[t:]
        yt_lag = y[t_lag:end]
        y_last = y.take(-1)
        mod = sm.GLM(yt, yt_lag, family=sm.families.Gaussian())
        sigma = np.sqrt(mod.fit().scale)
        rng = np.random.default_rng()
        forecast = np.empty([uncertainty_samples, horizon])
        for h in range(0, horizon):
            forecast[:, h] = y_last + sigma * np.sqrt(h + 1) * rng.standard_normal(
                uncertainty_samples
            )
        np_predictions = forecast.transpose().reshape(
            uncertainty_samples * horizon, 1
        )
        df_pred = pd.DataFrame(np_predictions, columns=["y_sim"])
        df_result = pd.concat([df_cross, df_pred], axis=1)
        return df_result

`tablespoon.forecasters.Snaive`

Bases: object

Seasonal Naive Forecaster

Source code in tablespoon/forecasters.py

class Snaive(object):
    """
    Seasonal Naive Forecaster
    """

    def __init__(
        self,
        history_dates=None,
        include_history=False,
        y=None,
    ):
        self.include_history = include_history
        self.history_dates = history_dates
        self.include_history = include_history

    def predict(
        self,
        df_historical,
        horizon=30,
        frequency=None,
        lag=7,
        uncertainty_samples=5000,
        include_history=False,
    ):
        """Predict - forecast method

        Args:
            df_historical (pd.DataFrame): A date sorted dataframe with the columns `ds` and `y`
            horizon (int, optional): Forecast horizon. Defaults to 30.
            frequency (int, optional): number of rows that make a seasonal period. Defaults to None.
            lag (int, optional): number of rows that make a seasonal period. Defaults to 7 (7 days of a week).
            uncertainty_samples (int, optional): number of uncertainty samples to draw. Defaults to 5000.
            include_history (bool, optional): include history. Defaults to False.
            chain_ids (str, optional): identifiers for chain ids. Defaults to None.
            verbose (bool, optional): verbose. Defaults to False.

        Returns:
            pd.DataFrame: A dataframe of predictions as `y_sim`

        Example:
            ```py
            import tablespoon as tbsp
            from tablespoon.data import SEAS
            sn = tbsp.Snaive()
            df_f = sn.predict(SEAS, horizon=7 * 4, frequency="D", lag=7, uncertainty_samples=800).assign(model="snaive")
            df_f.head(10)
            ```
        """
        self.y = df_historical["y"]
        self.history_dates = get_sorted_dates(df_historical)
        last_date = self.history_dates.max()
        min_date = self.history_dates.min()
        check_historical_dates_are_contiguous(
            self.history_dates, min_date, last_date, frequency
        )
        dates = pd.date_range(
            start=last_date, periods=horizon + 1, freq=frequency
        )  # An extra in case we include start  # 'M','D', etc.
        dates = dates[dates > last_date]  # Drop start if equals last_date
        dates = dates[:horizon]  # Return correct number of periods
        if include_history:
            dates = np.concatenate((np.array(self.history_dates), dates))
        df_dates = pd.DataFrame({"ds": dates})
        df_samples = pd.DataFrame({"rep": np.arange(uncertainty_samples)})
        df_cross = df_dates.merge(df_samples, how="cross")
        # fit
        y = self.y.to_numpy()
        last_start = len(y) - lag
        last_end = len(y)
        yt = y[lag:last_end]
        yt_lag = y[0:last_start]
        mod = sm.GLM(yt, yt_lag, family=sm.families.Gaussian())
        sigma = np.sqrt(mod.fit().scale)
        rng = np.random.default_rng()
        forecast = np.empty([uncertainty_samples, horizon])
        for h in range(0, horizon):
            forecast[:, h] = y[(len(y)) - (lag - ((h) % lag))] + sigma * np.sqrt(np.trunc(((h) * 1) / (lag)) + 1) * rng.standard_normal(uncertainty_samples)
        np_predictions = forecast.transpose().reshape(
            uncertainty_samples * horizon, 1
        )
        df_pred = pd.DataFrame(np_predictions, columns=["y_sim"])
        df_result = pd.concat([df_cross, df_pred], axis=1)
        return df_result

`tablespoon.forecasters.Mean`

Bases: object

Mean Forecaster

Source code in tablespoon/forecasters.py

class Mean(object):
    """
    Mean Forecaster
    """

    def __init__(
        self,
        history_dates=None,
        include_history=False,
        y=None,
    ):
        self.include_history = include_history
        self.history_dates = history_dates

    def predict(
        self,
        df_historical,
        horizon=30,
        frequency=None,
        uncertainty_samples=5000,
        include_history=False,
    ):
        """Predict - forecast method

        Args:
            df_historical (pd.DataFrame): A date sorted dataframe with the columns `ds` and `y`
            horizon (int, optional): Forecast horizon. Defaults to 30.
            frequency (int, optional): number of rows that make a seasonal period. Defaults to None.
            lag (int, optional): number of rows that make a seasonal period. Defaults to 1.
            uncertainty_samples (int, optional): number of uncertainty samples to draw. Defaults to 5000.
            include_history (bool, optional): include history. Defaults to False.
            chain_ids (str, optional): identifiers for chain ids. Defaults to None.
            verbose (bool, optional): verbose. Defaults to False.

        Returns:
            pd.DataFrame: A dataframe of predictions as `y_sim`

        Example:
            ```py
            import pandas as pd
            import tablespoon as tbsp
            from tablespoon.data import APPL
            df_APPLE = APPL
            df_APPLE = df_APPLE.assign(ds = lambda df: pd.to_datetime(df.ds))
            mean = tbsp.Mean()
            df_f = (n.predict(df_APPLE, horizon=7*4, frequency="D", lag = 1, uncertainty_samples = 500).assign(model = 'mean'))
            df_f.head(10)
            ```
        """
        self.y = df_historical["y"]
        self.history_dates = get_sorted_dates(df_historical)
        last_date = self.history_dates.max()
        min_date = self.history_dates.min()
        check_historical_dates_are_contiguous(
            self.history_dates, min_date, last_date, frequency
        )
        dates = pd.date_range(
            start=last_date, periods=horizon + 1, freq=frequency
        )  # An extra in case we include start  # 'M','D', etc.
        dates = dates[dates > last_date]  # Drop start if equals last_date
        dates = dates[:horizon]  # Return correct number of periods
        if include_history:
            dates = np.concatenate((np.array(self.history_dates), dates))
        df_dates = pd.DataFrame({"ds": dates})
        df_samples = pd.DataFrame({"rep": np.arange(uncertainty_samples)})
        df_cross = df_dates.merge(df_samples, how="cross")
        # fit
        y = self.y
        T = len(y)
        deg_freedom = T - 1
        mu, sigma = norm.fit(y)
        rng = np.random.default_rng()
        forecast = np.empty([uncertainty_samples, horizon])
        for h in range(0, horizon):
            forecast[:, h] = mu + sigma * np.sqrt(1 + (1 / T)) * rng.standard_t(
                df=deg_freedom, size=uncertainty_samples
            )
        np_predictions = forecast.transpose().reshape(
            uncertainty_samples * horizon, 1
        )
        df_pred = pd.DataFrame(np_predictions, columns=["y_sim"])
        df_result = pd.concat([df_cross, df_pred], axis=1)
        return df_result

Data

APPL

APPL stock price data. A time series data set with non-seasonal patterns

SEAS

A seasonal time series

WALMART

Walmart sales for California from M5. A time series data set with seasonal patterns.