Source code for FIBbootstrap.utils

# Copyright 2016 Joshua Taillon
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pandas as pd
import numpy as np
from scikits import bootstrap
from tqdm import tqdm
from scipy import mean as sci_mean
from math import sqrt

__all__ = ['calculate_errors']


[docs]def calculate_errors(df, samples): """ Calculate the "error bars" of each column in a Pandas dataframe Parameters ---------- df: ~pandas.DataFrame dataframe on which to calculate samples: int number of bootstrap samples to use Returns ------- result: ~pandas.DataFrame dataframe with ``-`` and ``+`` error values (and mean) for each column in df """ result = pd.DataFrame(index=['Neg. CI', 'Pos. CI', '---', 'Mean', 'Std. Dev.', 'SEM'], columns=df.columns) col = df.columns[0] bar = tqdm(df.columns, desc='Bootstrapping confidence intervals (' + col + ')') for i, col in enumerate(bar): x = bootstrap.ci(data=df[col].dropna(axis=0), n_samples=samples, statfunction=sci_mean, output='errorbar') # noinspection PyUnresolvedReferences result[col] = pd.Series(np.append(x.flatten(), ['---', df[col].mean(), df[col].std(), df[col].std() / sqrt(df[col].count())]), index=['Neg. CI', 'Pos. CI', '---', 'Mean', 'Std. Dev.', 'SEM']) try: next_col = df.columns[i + 1] except IndexError as _: next_col = df.columns[i] bar.set_description(desc='Bootstrapping confidence intervals (' + next_col + ')') return result