#!/usr/bin/sh
#
# Revision: 1.1
#
# History:
#
# 1.1 - added HTML Tidy integration
#     - added Perl detection
# 1.0 - initial release


### CONFIGURE

include="*.htm?"  # file types to format
backup=no         # create backup files?
tidy=no           # use HTML Tidy?
tidycfg=/home/site/tidy/config.txt  # HTML Tidy configuration file
tidyerr=tidy_errors.txt             # HTML Tidy errors file


### PREPARE

unset IFS
PATH=/usr/bin:/usr/local/bin
PERL_BINARY=`whereis perl | cut -f2 -d" "`
TIDY_BINARY=~/bin/tidy
TIDY_EXISTS=`{ $TIDY_BINARY -v 2> /dev/null; echo $?; }`


### MAIN LOGIC

if [ "`dirname $PERL_BINARY`" = "." ]; then
  echo "Perl not found." 1>&2
  exit 1
fi

if [ $# -eq 0 ]; then
  echo "This script is used to format HTML code.\n"
  echo "Usage: `basename $0` [-r] INPUT [OUTPUT]\n"
  echo "INPUT can either be a source file or directory. If INPUT is a"
  echo "file, then an OUTPUT file is required. If INPUT is a directory,"
  echo "the script will format all HTML files in that directory. The -r"
  echo "switch will make the script parse recursively."
  exit 1
fi
if [ "$1" = "-r" ]; then
  recurse="$2 -name '$include'"
  shift
fi

input=$1
output=$2

formatHTML() {
  source=$1
  if [ "$tidy" = "yes" -a "$TIDY_EXISTS" = 0 ]; then
    $TIDY_BINARY -config $tidycfg -f $tidyerr $1 > $1.tidy
    source=$1.tidy
  fi
  $PERL_BINARY -e\
  '
    $html = `cat $ARGV[0]`;
    $html =~ s/(\r\n?|\n)+/\n/g;                    # normalize EOLs
    $html =~ s/\s*<tr/\n  <tr/gi;                   # format <tr>
    $html =~ s/\s*<\/tr>/\n  <\/tr>/gi;             # format </tr>
    $html =~ s/\s*<td([^>]*>)\s*/\n    <td$1/gi;    # format <td>
    $html =~ s/\s*<\/td>/<\/td>/gi;                 # format </td>
    $html =~ s/\s*<table/\n\n<table/gi;             # pad table with double-space
    $html =~ s/\s*<\/table>\s*/\n<\/table>\n\n/gi;  # pad table with double-space
    $html =~ s/(<\/table>\n+)(<\/td>)/$1    $2/gi;  # indent </td> after nested table
    $html =~ s/> +\n/>\n/g;                         # strip trailing spaces
    print $html;
  ' $source > $2
  if [ "$tidy" = "yes" ]; then
    rm $1.tidy
  fi
}

if [ "$tidy" = "yes" ]; then
  cat /dev/null > $tidyerr
fi

if [ -f "$input" -a ! "$recurse" ]; then
  if [ ! "$output" ]; then
    echo "`basename $0`: output file required" 1>&2
    exit 1
  fi
  formatHTML $input $output
elif [ -d "$input" ]; then
  for i in `eval "find ${recurse:-$include}"`; do
    cp $i $i.bak
    formatHTML $i.bak $i
    if [ "$backup" = "no" ]; then
      rm $i.bak
    fi
  done
else
  echo "`basename $0`: input file/directory does not exist" 1>&2
  exit 1
fi