# Copyright 2016 Joshua Taillon
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pandas as pd
from .utils import calculate_errors
import glob
__all__ = ['process_surface_stats_lsm_ysz']
[docs]def process_surface_stats_lsm_ysz(pattern,
n_bootstrap=100000,
save_output=False,
output_fname="subvolume_errors.csv"):
"""
Calculate errors for surface statistics, as output by the Avizo
subsampling script. This version operates on LSM-YSZ material names
Parameters
----------
pattern: str
glob pattern to grab csv files to process from output of subvolume
Avizo scripts
n_bootstrap: int
number of bootstrap samples to use when calculating confidence
intervals
save_output: bool
switch to control whether or not the output is written directly to a
CSV file in the current directory
output_fname: str
filename to use when saving the output
Returns
-------
error_df: ~pandas.DataFrame
Dataframe with low and high errors calculated using n_bootstrap
samples
"""
filelist = glob.glob(pattern)
if len(filelist) == 0:
raise ValueError("Did not find any .csv files to process!")
# Blank results dataframe:
data_df = pd.DataFrame()
for i, file_ in enumerate(filelist):
# Read data:
tmp_df = pd.read_csv(file_, header=0, skiprows=1)
# Delete unwanted columns:
tmp_df = tmp_df.drop('Closedness', axis=1).drop('Triangles', axis=1)
# Filter out anything that's not one of our materials:
PRE_df = tmp_df[tmp_df['Material'].isin(['PRE'])] \
.drop('Material', axis=1).reset_index(drop=True)
LSM_df = tmp_df[tmp_df['Material'].isin(['LSM'])] \
.drop('Material', axis=1).reset_index(drop=True)
YSZ_df = tmp_df[tmp_df['Material'].isin(['YSZ'])] \
.drop('Material', axis=1).reset_index(drop=True)
# set columns to be useful names
PRE_df.columns = ['PRE_A', 'PRE_V']
LSM_df.columns = ['LSM_A', 'LSM_V']
YSZ_df.columns = ['YSZ_A', 'YSZ_V']
# Concatenate all three columns into one dataframe (with one row)
subvol_df = pd.concat([PRE_df, LSM_df, YSZ_df],
axis=1,
ignore_index=False)
# Add this row to the results dataframe
data_df = data_df.append(subvol_df, ignore_index=True)
# Calculate various properties:
data_df['Total_V'] = data_df['PRE_V'] + data_df['LSM_V'] + data_df['YSZ_V']
data_df['Total_solid_V'] = data_df['LSM_V'] + data_df['YSZ_V']
data_df['PRE_volNormSA'] = data_df['PRE_A'].div(data_df['PRE_V'])
data_df['LSM_volNormSA'] = data_df['LSM_A'].div(data_df['LSM_V'])
data_df['YSZ_volNormSA'] = data_df['YSZ_A'].div(data_df['YSZ_V'])
data_df['PRE_BET_d'] = 6 * data_df['PRE_V'].div(data_df['PRE_A'])
data_df['LSM_BET_d'] = 6 * data_df['LSM_V'].div(data_df['LSM_A'])
data_df['YSZ_BET_d'] = 6 * data_df['YSZ_V'].div(data_df['YSZ_A'])
data_df['PRE_volFrac'] = data_df['PRE_V'].div(data_df['Total_V'])
data_df['LSM_volFrac'] = data_df['LSM_V'].div(data_df['Total_V'])
data_df['YSZ_volFrac'] = data_df['YSZ_V'].div(data_df['Total_V'])
data_df['LSM_solVolFrac'] = data_df['LSM_V'].div(data_df['Total_solid_V'])
data_df['YSZ_solVolFrac'] = data_df['YSZ_V'].div(data_df['Total_solid_V'])
output_df = data_df[['PRE_volNormSA',
'LSM_volNormSA',
'YSZ_volNormSA',
'PRE_BET_d',
'LSM_BET_d',
'YSZ_BET_d',
'PRE_volFrac',
'LSM_volFrac',
'YSZ_volFrac',
'LSM_solVolFrac',
'YSZ_solVolFrac']]
error_df = calculate_errors(output_df, n_bootstrap)
if save_output:
error_df.to_csv(path_or_buf=output_fname)
print("Output saved to {}".format(output_fname))
return error_df