Source code for FIBbootstrap.tortuosity

# Copyright 2016 Joshua Taillon
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import glob
from .utils import calculate_errors
import pandas as pd

__all__ = ['bootstrap_tort_stats']

[docs]def bootstrap_tort_stats(csv_pattern=None, n_bootstrap=100000, thresh=0.75, save_output=False, data_output_fname=None, err_output_fname=None): """ Calculate errors for tortuosity profiles, as output by :py:mod:`FIBTortuosity <fibtortuosity>` module. Operates on many .csv files, each with a single tortuosity profile for a phase and direction (i.e. LSM-x, Pore-y, YSZ-z, etc.) Parameters ---------- csv_pattern: str glob pattern to grab csv files to process from output of tortuosity calculations. Usually, this will be something like: ``os.path.join(<path holding files>, "*.csv")`` n_bootstrap: int number of bootstrap samples to use when calculating confidence intervals thresh: float value between 0 and 1, defining from what portion of the profiles to calculate the errors. For example, for the default value of 0.75, the error in the tortuosity for the last 25% of euclidean distance values will be calculated. A ``thresh`` value of 0.0 would calculate the error on the whole profile. Usually, only a small value towards the end of the dataset is desired, so one can analyze how much the data was changing towards the end of the profile. save_output: bool switch to control whether or not the bootstrap data and error output is written directly to a CSV file in the current directory data_output_fname: :data:`None` or :class:`str` filename to use when saving the data output; if None, an appropriate string will be built from the input ``pattern`` err_output_fname: :data:`None` or :class:`str` filename to use when saving the error output; if None, an appropriate string will be built from the input ``pattern`` Returns ------- data_df: :class:`pandas.DataFrame` Dataframe with data from subvolume statistic calculations error_df: :class:`~pandas.DataFrame` Dataframe with low and high errors calculated using n_bootstrap samples """ csv_filelist = glob.glob(csv_pattern) # Automagically determine file names for output (if necessary) if save_output and data_output_fname is None: data_output_fname = csv_pattern[ :-1 * csv_pattern[::-1].index('*') - 1] + \ 'bootstrap_data.csv' if save_output and err_output_fname is None: err_output_fname = csv_pattern[ :-1 * csv_pattern[::-1].index('*') - 1] + \ 'bootstrap_errors.csv' # Initialize data frame and list to hold column names total_df = pd.DataFrame() names = [] # Loop over all the csv files and add the bootstrap results to the data # frame for file_ in csv_filelist: with open(file_, 'r') as f: # read first line to get name for dataset, and remove it # from the file object name = f.readline().strip('# ').strip(' \n') # Read the csv profile into a pandas DataFrame, using dropna() # to remove rows that do not have a valid tortuosity value df = pd.read_csv(f, skiprows=0, escapechar='#', # Spaces before 'nan' are necessary because of # the way we saved the data in fibtortuosity na_values=' nan').dropna(axis=0) df.columns = ['Euc_d', 'tort'] # Explicitly rename columns max_d = df['Euc_d'].iloc[-1] # Find last value of Euc_d # Only consider tortuosity values in last (1 - thresh) fraction # of euclidean distance values last_torts = df[df['Euc_d'] > thresh * max_d].tort names.append(name + ' tort') # Save name of this profile # Remove the index from these values so we end up with a cleaner # data frame in the next step last_torts.reset_index(drop=True, inplace=True) # Add tortuosity values to a new column in the data frame, # making sure that we do not take the index of the values into # account total_df = pd.concat([total_df, pd.DataFrame(last_torts)], ignore_index=True, axis=1) # Set names of data frame to include which profile is which total_df.columns = names # Calculate actual errors and save data data_df = total_df error_df = calculate_errors(total_df, n_bootstrap) if save_output: data_df.to_csv(path_or_buf=data_output_fname) print("Data output saved to {}".format(data_output_fname)) error_df.to_csv(path_or_buf=err_output_fname) print("Error output saved to {}".format(err_output_fname)) return data_df, error_df