# Copyright 2016 Joshua Taillon
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pandas as pd
import numpy as np
from scikits import bootstrap
from tqdm import tqdm
from scipy import mean as sci_mean
from math import sqrt
__all__ = ['calculate_errors']
[docs]def calculate_errors(df,
samples):
"""
Calculate the "error bars" of each column in a Pandas dataframe
Parameters
----------
df: ~pandas.DataFrame
dataframe on which to calculate
samples: int
number of bootstrap samples to use
Returns
-------
result: ~pandas.DataFrame
dataframe with ``-`` and ``+`` error values (and mean) for
each column in df
"""
result = pd.DataFrame(index=['Neg. CI', 'Pos. CI', '---', 'Mean',
'Std. Dev.', 'SEM'],
columns=df.columns)
col = df.columns[0]
bar = tqdm(df.columns, desc='Bootstrapping confidence intervals (' +
col + ')')
for i, col in enumerate(bar):
x = bootstrap.ci(data=df[col].dropna(axis=0),
n_samples=samples,
statfunction=sci_mean, output='errorbar')
# noinspection PyUnresolvedReferences
result[col] = pd.Series(np.append(x.flatten(),
['---',
df[col].mean(),
df[col].std(),
df[col].std() /
sqrt(df[col].count())]),
index=['Neg. CI', 'Pos. CI', '---', 'Mean',
'Std. Dev.', 'SEM'])
try:
next_col = df.columns[i + 1]
except IndexError as _:
next_col = df.columns[i]
bar.set_description(desc='Bootstrapping confidence intervals (' +
next_col + ')')
return result