#!/usr/bin/env bash
#  findNodes ".mv3d skeleton file"
#
#  searches through a skeleton file (as outputted by Amira in .mv3d format)
#  and reports the number of times each point is found into nodes.txt
#
#  set -xv

# input and output files
INPUT=$1
OUTPUT=nodes2.txt
LOOPFILE=loops.txt
TEMP1=`mktemp -p . --suffix ".mv3d"`
TEMP2=`mktemp -p . --suffix ".mv3d"`
TMP_OUTPUT=`mktemp -p . --suffix ".txt"`


# check to make sure skeleton file exists
if [ -f "$1" ]; then

# check if this input has had loops removed
# if so, change output files accordingly
if [[ "$1" =~ .*loopsRemoved.* ]]; then
    OUTPUT=nodes_afterLoopRemoval.txt
    LOOPFILE=loops_afterLoopRemoval.txt
fi

# make sure that we are using the right output file
while :
do
  if [ -f "$OUTPUT" ]; then
      echo ""
      echo "Default output file \"$OUTPUT\" already exists. Delete (d) or rename (r)?"
      read x
      case $x in
       d)
        rm $OUTPUT
	echo "Deleted $OUTPUT"
	break
	;;
       r)
        echo "New output file name?"
	read OUTPUT
	;;
       *)
        echo "Invalid response \"$x\""
	;;
      esac
  else
      break
  fi
done

# make sure that we are using the right loop file
while :
do
  if [ -f "$LOOPFILE" ]; then
      echo ""
      echo "Default output file \"$LOOPFILE\" already exists. Delete (d) or rename (r)?"
      read x
      case $x in
       d)
        rm $LOOPFILE
	echo "Deleted $LOOPFILE"
	break
	;;
       r)
        echo "New output file name?"
	read LOOPFILE
	;;
       *)
        echo "Invalid response \"$x\""
	;;
      esac
  else
      break
  fi
done

# remove header information from skeleton file
sed '/^\#/d' "$INPUT" > $TEMP1

## add blank line to top of file
(echo ""; cat $TEMP1) > $TEMP2

## grep for each blank line and return context
grep -C 1 "^$" $TEMP2 > $TEMP1

## remove blank lines and "--" from grep
sed '/^$/d;/--/d' $TEMP1 > $TEMP2
# this is a list of each node (endpoint of edge) preceeded by
# the edge number (should be 2 for each edge)

## find number of edges (number of unique lines in first
## column of skeleton data)
noEdges=$(awk '{print $1}' $TEMP2 | uniq | wc -l)

noLoops=$(uniq -c $TEMP2 | grep "2 " | wc -l)

## setup loopfile header
echo "##" `date` > $LOOPFILE
echo "##" >> $LOOPFILE
echo "## List of nodes in input skeleton: \"$1\"" >> $LOOPFILE
echo "## Script called from  working directory " `pwd` >> $LOOPFILE
echo "##" >> $LOOPFILE
echo "## $noEdges total edges in skeleton" >> $LOOPFILE
echo "## $noLoops loops in skeleton" >> $LOOPFILE
echo "##" >> $LOOPFILE
echo -e "## of" >> $LOOPFILE
echo -e "##edges\tedge #\tx\t\ty\t\tz\t\tThickness" >> $LOOPFILE
echo "##-------------------------------------------------------------------" >> $LOOPFILE

## find segments with identical nodes (loops) and output to loopfile
uniq -c $TEMP2 | grep "2 " >> $LOOPFILE

# remove edge number, leaving only x, y, z coordinates and thickness
cat $TEMP2 > $TEMP1
cat $TEMP1 | awk '{print $2" "$3" "$4" "$5}' > $TEMP2

# sort data (human numbering order) and
# report number of times each line is repeated
# e.g. a point repeated three times appears in 3 edges,
# and is therefore a node with connectivity = 3
sort -n $TEMP2 | uniq -c > $TEMP1

# remove each line beginning with "      1 "
# this means it only occurred once, and is therefore not a node
#egrep -v '^(      1 )' $TEMP1 > $TEMP2
cat $TEMP1 > $TEMP2

# sort resulting data in reverse order (big to small)
# and print to tmp output file
sort -nr $TEMP2 > $TMP_OUTPUT

# remove leading and trailing whitespace from output
sed -i 's/^[ \t]*//;s/[ \t]*$//' $TMP_OUTPUT

# find out how many edges we have and delete first line of tmp output file
# (number of blank lines is equal to the number of edges,
# so the first line is our noEdges)
#noEdges=$(head -n 1 $TMP_OUTPUT)
sed -i '1d' $TMP_OUTPUT

# replace spaces with tabs (for formatting)
sed -i 's/ /\t/g' $TMP_OUTPUT

# find out how many nodes we have
# (noNodes = number of lines in file)
noNodes=`wc -l $TMP_OUTPUT | awk '{print $1}'`

# setup header for output file
echo "##" `date` > $OUTPUT
echo "##" >> $OUTPUT
echo "## List of nodes in input skeleton: \"$1\"" >> $OUTPUT
echo "## Script called from  working directory " `pwd` >> $OUTPUT
echo "##" >> $OUTPUT
echo "## $noEdges total edges in skeleton" >> $OUTPUT
echo "## $noNodes total nodes in skeleton" >> $OUTPUT
echo "##" >> $OUTPUT
echo -e "## of" >> $OUTPUT
echo -e "##edges\tx\t\ty\t\tz\t\tThickness" >> $OUTPUT
echo "##-------------------------------------------------------------------" >> $OUTPUT

# transfer data to output file
cat $TMP_OUTPUT >> $OUTPUT

# clean up our mess
rm $TEMP1 $TEMP2 $TMP_OUTPUT


exit 0
fi

# couldn't find input file. clean up and quit.
echo "Could not find input file. Check command line parameters."
rm $TEMP1 $TEMP2 $TMP_OUTPUT
exit 1