#! /usr/bin/perl

# Copyright (c) 1996, 1997	Russell Quong.
#
# In the following, the "author" refers to "Russell Quong."
#
# Permission to use, copy, modify, distribute, and sell this software and
# its documentation for any purpose is hereby granted without fee, provided
# that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. All advertising materials mentioning features or use of this software
#    must display the following acknowledgement:
#      This product includes software developed by Russell Quong.
# 3. All HTML generated by ltoh must retain a visible notice that it
#    was generated by ltoh and contain a link to the ltoh web page
#
# Any or all of these provisions can be waived if you have specific,
# prior permission from the author.
#
# THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND,
# EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY
# WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
#
# IN NO EVENT SHALL RUSSELL QUONG BE LIABLE FOR ANY SPECIAL,
# INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY
# THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.

# ltoh.pl = a LaTeX to HTML converter
# Russell Quong		1996, 1997
# Version 97e.

# enable readable "English names" for variables, like $MATCH or $PERL_VERSION
# instead of " punctuation names" for variables, like $&     or $]
use English;

$false = 0;
$true = 1;

$author = "Russell W. Quong";
$version = "Version 97e.";
$reldate = "Mar 1997";
$status = "Experimental";
$dirsep = "/";			# directory separator character

$qval{"today"} = strip_hms_and_day( scalar(localtime()) );
$qval{"title"} = "";
$qval{"author"} = "Unknown document author";
$qval{"email"} = "Unknown email";
$qval{"url"} = "Unknown Web URL";
$qval{"keep_comments"} = 0;

$warnlevel = 3;

sub bad_command {
    my($err_msg, $descrip) = @_;
    return "<font color=maroon><em> $err_msg </em> $descrip </font>";
}


# lookup(varname) ==> get the value of variable VARNAME.
sub lookup {
    my($idx) = @_;
    if ( ! defined $qval{$idx} ) {
	return "$idx undefined";
    } else {
	return $qval{$idx};
    }
}

sub print_html_header {
    my($filename) = @_;
    print '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">', "\n";
    print "<HTML>\n";
    if ($qval{"title"} eq "") {
	$qval{"title"} = "$filename : [LaTeX --> HTML]";
    }
    print "<HEAD>\n<TITLE>\n", $qval{title} , "\n</TITLE>\n</HEAD>\n";
    print "<BODY>\n";
}

sub print_html_trailer {
    my($filename) = @_;
    # need to declare ctime separately from next line, ugh.  (scalar vs. array)
    print "<HR>\n";
    print "<font size=+0><EM>[Converted LaTeX --> HTML by";
    print ' <a href="http://www.best.com/~quong/ltoh.html">ltoh</a>]';
    print "</EM></font><br>\n";

    print "<ADDRESS>\n";
    if ($qval{"url"} =~ /Unknown/) {
	if (! ($qval{"author"} =~ /Unknown/)) {
	    print "$qval{author}\n" ;
	}
    } else {
	print "<A href=\"$qval{url}\">$qval{author}</A>\n";
    }
    if (! ($qval{"email"} =~ /Unknown/)) {
	print "(<A href=\"mailto:$qval{email}\"><SAMP>$qval{email}";
	print "</SAMP></A>)\n";
    }

    my($ctime);
    $ctime = localtime;
    $ctime = strip_hms_and_day($ctime);
    print "Last modified: <font color=maroon><samp>$ctime</samp></font>\n";

    my($mtime) = get_file_mtime($filename);
    if ($mtime ne $ctime) {
	print "(LaTeX doc modified: <font color=maroon><samp>";
	print $mtime;
	print "</samp></font>)<br>\n";
    }	
    print "</ADDRESS>\n";
    print "</BODY>\n</HTML>\n";
}

sub strip_hms_and_day {
    my($str_time) = @_;

    # The next two lines strip off
    # the hours:min:secs and the leading day of the week, giving
    #	Jul 29 1996
    $str_time =~ s/^[A-Z][a-z][a-z][ \t]*//;
    $str_time =~ s/\d+:\d+:\d+[ \t]*([A-Z][A-Z]T[ \t]*)?//;
    return $str_time;
}

#
# return the time (as a string) that the file XXX was last modified.
# called via:
#    $string = get_file_mtime (XXX);
#
sub get_file_mtime {
    my($filename) = @_;
    my($str_time) = '<Unable to determine time>';	# default value

    my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
	$atime,$mtime,$ctime,$blksize,$blocks)
	= stat($filename);
    if ($dev != null) {
	$str_time = localtime($mtime);
	# We now have string like:
	#	Mon Jul 29 19:53:49 PDT 1996
	$str_time = strip_hms_and_day($str_time);	

	warning(9, "File $filename last modified on $str_time");
    }
    return $str_time;
}

sub logmessage {
    my($prefix, @params) = @_;
    my($level) = 0;    
    if (@params > 1) {
	if ($params[0] =~ /\d/) {
	    $level = shift(@params);
	} else {
	    print ("No level given: @_");
	}
    }
    if ($level <= $warnlevel) {
	print "$prefix :: @params\n";
    } else {
#	print "level $level > warnlevel $warnleve\n";
    }
}

sub errmsg {
    logmessage(" * Error: ", @_);
}    
    
sub warning {
    logmessage(" + Warning: ", @_);
}

sub fyi {
    logmessage(" - fyi: ", @_);
}

#
# Try to figure out where the perl script is actually located.
# (1) We can follow symbolic links to ABSOLUTE PATHS correctly,
# (2) But we can't handle relative symbolic links to another directory,
#   as we need to keep track of where the link is relative to.
# Case (1) handles the common case of a link:  xyz --> /whereever/bin/ltoh
# Also, we can handle the last link referring to a file in the same dir.

sub get_scriptdir {
    fyi("PROGRAM_NAME = $PROGRAM_NAME");
    my($scriptfile) = $PROGRAM_NAME;	# maybe a link.  We trace it down.
    my($scriptdir, $fulldir) = ("." , "");

    while (-l $scriptfile) {
        fyi(9, "$scriptfile is link to -->");
	if ($scriptfile =~ /(.*) $dirsep ( [^$dirsep]+ )/x ) {
	    $scriptdir = $1;
#	    if ($scriptdir =~ /^$dirsep/) {
#		$fulldir = $scriptdir;
#	    } else {
#		$fulldir .= $scriptdir;
#	    }
#	    fyi(9, "scriptdir = $scriptdir");
#	    fyi(9, "fulldir = $fulldir");
	}
	$scriptfile = readlink $scriptfile;
        fyi(9, "   $scriptfile.");
        fyi(7, "scriptdir = $scriptdir");
    }
    return $scriptdir;
}

  #
  # handle all the arguments, in the @ARGV array.
  #
sub main {
    fyi("PROGRAM_VERSION = $version");
    fyi("  by $author.  1996, 1997.");

    my($nextline, $f);
    my($nspecfiles) = 0;
    my($scriptdir) = get_scriptdir();	# attempt to trace down a sym link.

    # list of spec files
    my(@specfiles) = (
        "$scriptdir$dirsep" . "ltoh.specs" , 
	"~$dirsep.ltoh.specs" ,
	".$dirsep.ltoh.specs"
    );

    foreach $f (@specfiles) {	
	$nspecfiles += read_specfile($f);
    }
    if ($nspecfiles == 0) {
	$nspecfiles += read_specfile("/usr/local/bin/ltoh.specs");
    }
    if ($nspecfiles == 0) {
	$nspecfiles += read_specfile("/usr/bin/ltoh.specs");
    }
    if ($nspecfiles == 0) {
	warning("Tried to read spec files: @specfiles");
	errmsg("No specification files found.  Bye-bye.");
	exit(1);
    }

    if (@ARGV == 0) {
        handle_file('-');	
    } 
    foreach $i (@ARGV) {
	my($texfile) = $i;
	if ($i =~ /^(.+)\.([ltex]+)$/ ) {
	    warning(7, "filebase = $1, suffix = $2, $input file = $texfile");
	} else {
	    $texfile = $i . ".tex";
	}
	if (! -r $texfile) {
	    print "Cannot read file: $texfile\n";
	    return -1;
	}
        handle_file($texfile);
    }
}

  #
  # process a specific file.  Called via:  handle_file($filename);
  #
sub handle_file {
    my($texfilename) = @_;
    my($INFILE) = $texfilename;
    if ($texfilename eq '-') {
	$INFILE = STDIN;
    } else {
	open($INFILE, $texfilename);
    }
    $qval{"title"} = "Unnamed Web page";
    fyi(5, "+ Reading LaTeX input ... handle_file($texfilename)");
    @orig = <$INFILE>;
    print_arr('Orig $lineno:', \@orig)		if ($warnlevel >= 8);
    do_comment_verbatim(\@orig);
    print_arr('Verb $lineno:', \@orig)		if ($warnlevel >= 8);
    do_tables(\@orig);
    print_arr('Tabl $lineno:', \@orig)		if ($warnlevel >= 8);
    do_begin_end(\@orig);
    print_arr('BegE $lineno:', \@orig)		if ($warnlevel >= 8);    
    do_tex_comms(\@orig);
    print_arr('Comm  $lineno:', \@orig)		if ($warnlevel >= 8);    
    mark_delims(\@orig);
    print_arr('Mark $lineno:\n', \@delim_lines)	if ($warnlevel >= 8);    
    do_simple_latex_defs(\@delim_lines);
    print_arr('{}  $lineno:\n', \@delim_lines)	if ($warnlevel >= 8);
    do_complicated_latex_defs(\@delim_lines);
    print_arr('{N} $lineno:\n', \@delim_lines)	if ($warnlevel >= 8);
    my($i);
    final_cleanup(\@delim_lines);
    print_arr('[<$lineno>]', \@delim_lines)	if ($warnlevel >= 8);
    print_arr("", \@delim_lines)		if ($warnlevel >= 8);
    print_html_file($texfilename);
}

# generate the HTML file.  Pass in the name of the .tex file
# print_html_file( "filename.tex" );
#
sub print_html_file {
    my($texfile) = @_;
    my($base, $suffix) = ($texfile, "");
    my($outfile) = $qval{"htmlfile_spec"};
    fyi(8,"texfile = $texfile, outfile = $outfile");

    if ($texfile =~ /^(.+)\.([^.]*)$/ ) {
	$base = $1;
	$suffix = $2;
    }
    fyi(8, "base = $base, suffix = $suffix, $input file = $texfile");

    $outfile =~ s/\$BASE/$base/e;
    $outfile =~ s/\$SUFFIX/$suffix/e;
    fyi(3, "+ texbase = $base  ;  HTML output file => $outfile  .");
    if (-e $outfile) {
	# 
	# comment out the following warning if desired.
	# 
#       print "Warn, file $outfile already exists\n";
    }
    open(OUTF , ">"."$outfile");

    #
    # print to OUTF by default, by selecting it.
    #
    select OUTF;

    print_html_header($texfile);
    #
    # print the generated HTML.
    # @delim_lines  is the final HTML after processing
    #
    print_arr("", \@delim_lines);	
    print_html_trailer($texfile);

    select STDOUT;
}

$re_fields = 4;

#
#   DO NOT EDIT THEM MANUALLY.
# associative arrays indexed by the LaTeX command
# containing the corresponding the HTML
# E.g, we might have something like:
# 	%html_start{'textbf'} = '<strong>';
# 	%html_end{'textbf'}   = '</strong>';
# The values are read in from a specification file.
#   DO NOT EDIT THEM MANUALLY.

sub do_spec_pseudo_op {
    my($line) = @_;
    if ($line =~ /^=\*\*\*=[ \t]+([^ \t\n]*)/ ) {
	chomp($line);
	my($op) = $1;
	fyi(7, "Psuedo-op $op: ($line)");   
	if ($line =~ /endfile/) {
	    return $op;
	}
	if ($line =~ /=[ \t]*echo./) {
	    print "$POSTMATCH\n";
	}
	if ($line =~ /=[ \t]*exit/) {
	    fyi(6, "Exit request, line $INPUT_LINE_NUMBER, $spec_fname\n");
	    exit(1);
	}
	if ($line =~ /=[ \t]*warnlevel\s*:=\s*(\d+)/i) {
	    $warnlevel = $1;
	}
	return "pseudo-op";
    } elsif ($line =~ /^([a-zA-Z_][\w]*)\s*:=(.*)$/ ) {
	my($name, $val, $oldval) = ($1, $2, "");
	$val =~ s/^[\s]*["]?//;
	$val =~ s/["]?[\s]*$//;
	$oldval = $qval{$name};
	$qval{$name} = $val;
	fyi(6, "set var: $name := ($val),  oldval = $oldval");
	return "set-var";
    } else {
	return "";
    }
}

# Example
# For the specification
# 	':b/e	:\begin{rqitemize}(\{[^\}]*\})*:<ul>:</>:'
# We get:
# 	$html_start{'\begin{rqitemize}'} = '<ul>';
# 	$html_end{'\begin{rqitemize}'} = '</ul>';
# 	$comm_regex{'\begin{rqitemize}'} = '\begin{rqitemize}(\{[^\}]*\})*';
# 	$comm_attr{'\begin{rqitemize}'} = 'b/e';
#
sub read_specfile {
    my($globbed_name) = @_;
    my($spec_fname) = glob($globbed_name);

    fyi(7, "globbed_name = $globbed_name, spec_fname = $spec_fname");
    
    if (! -r $spec_fname) {
	warning(7, "Unable to read ltoh spec file $spec_fname.");
	return 0;
    }
    open(SPECFILE, $spec_fname);

    my($line, $status);
    my($lineno) = 0;
    while ($line = <SPECFILE>) {
	$lineno++;
	# skip comments
	if ($line =~ /^[ \t]*[#;]/ || $line =~ /^[ \t]*$/) {
	    fyi(9, "Skipping comment: $line");
	    next;
	}
	$status = handle_specification($line);
	if ($status eq "endfile") {
	    last;
	}
    }
    fyi(3, "+ Read spec file ($spec_fname).");
    if ($lineno == 0) {
	warning(3, "+ Empty spec file ($spec_fname).");
    }
    return 1;
}

sub handle_specification {
    my($line) = @_;

    my(@arr);
    my($junk, $from, $to, $rest, $status);
    if ( ($status = do_spec_pseudo_op($line)) ne "") {
	return $status;
    }
    $splitter = substr($line, 0, 1);
    if ($splitter eq '[ \t]') {
	$splitter = "[ \t]+";
    } else {
	$splitter = "[" . quotemeta("$splitter") . "]";
    }
    fyi(8, "splitter = $splitter");
    @arr = split($splitter, $line, $re_fields+3);
    if (@arr < $re_fields+2) {
	warning("Malformed spec, line $lineno, too few fields" . "$line");
	next;
    } elsif (@arr > $re_fields+2) {
	warning("Malformed spec, line $lineno, too many fields" . "$line");
	next;
    } else {
	# everything is OK.  do nothing.
    }
    ($raw_spec, $from, $to_start, $to_end) = @arr[1 .. $re_fields ];

    # (a) if $from is "\begin{xyz}MORE-STUFF", strip off MORE-STUFF
    # (b) if $from is "\xyz{MORE-STUFF}", strip off MORE-STUFF
    # must check if $from looks like "\begin{...}" as must not apply (b)
    # to a \begin{...} construct
    if ($from =~ /\\begin\{.+\}/ ) {
	if ($from =~ s/(\\begin\{[^\}]+\})(.+)/$1/ ) {
	    fyi(7, "Stripped begin\{...\} args: $arr[2] --> $from");
	}
    } elsif ($from =~ /(\\[A-Za-z_]+)([^A-Za-z_].*)/ ) {
	$from =~ s/(\\[A-Za-z_]+)([^A-Za-z_].*)/$1/;
	fyi(7, "Stripped args: $arr[2] --> $from");
    }

    # if $to_end is "...</>...",
    # generate the corresponding closing tags, automatically.  Cool.
    #   Have to reverse the order, hence the while loop.
    #
    my($scratch) = "";
    if ($to_end =~ '</>') {
	$junk = $to_start;
	$scratch = "";
	while ($junk =~ s:<([^ \t>]+)([^>]*)>:\@$1\@:o ) {
	    $scratch = "</$1>" . $scratch;
	}
	$to_end =~ s:</>:$scratch:;
	fyi(7, "Gen to_end: to_start = $to_start, to_end = $to_end");
    }

    fyi(7, "from = $from, to_start = $to_start, to_end = $to_end");
    fyi(8, "raw_spec = $raw_spec");

    $comm_regex{$from} = $arr[2];
    $comm_attr{$from} = $raw_spec;
    $html_start{$from} = $to_start;
    $html_end{$from} = $to_end;

    if ($raw_spec =~ /re/i && $to_end ne "") {
	warning(5, "Unused specification: $to_end");
    }
}



  # Simple protection of special character sequences on a line
  # (1) group backslashes by pairs, 
  # (2) '@' to @AT@, and
  # (3) protect \{ and \}.
sub escape_line {
    my($line) = @_;
    $line =~ s:\@:\@AT\@:g;

    # handle latex math characters/macros.  Strip off math mode $...$ for now
    $line =~ s:\$ ([<>]) \$:\1:ogx;
    $line =~ s:\$ (&[gl]t;) \$:\1:ogx;
    $line =~ s:\$ (\\[a-zA-Z]+) \$:\1:ogx;

    # grab pairs of backslash characters and escape them.
    # as we need to distinguish between
    #  \{   ==> preserve '{' (escape it)
    #  \\{  ==> preserve '\' (escape it), but process '{' normally.
    #  \\\{ ==> preserve both '\' and '{' alone, follows from above
    $line =~ s:\\\\:\@BS\@\@BS\@:og;
    $line =~ s:\\\{:\@<<\@:og;
    $line =~ s:\\\}:\@>>\@:og;

    # unescape pairs of backslack characters.
    $line =~ s:\@BS\@\@BS\@:\\\\:og;

# not needed, yet.  (We run this function early in the processing,
# and this change unnecessarily mucks things up at this early stage).
# escape newlines				     
#    $line =~ s:\@AT\@NL\@AT\@:\@NL\@:og;


    return $line;
}

  #
  # print_arr(prefix_string, reference_to_an_array);
  # In prefix_string the string '$lineno' is converted to the array index.
  # If prefix_string ends with a '\n' (newline), append a newline to the line.
  # Ex:
  #   @lines = <>;			# read all input into an array
  #   print_arr('$lineno:', \@lines);	# pass in a reference to the array
sub print_arr {
    my($prefix, $arr) = @_;
    my($i);
    my($ndigits) = int( (log($#{$arr}) / exp(1) + .999) ) ;
    my($newline) = "";
    if ($prefix =~ s/(\\n|\n)$//o) {
	$newline = "\n";
    }
    foreach $i (0 .. $#{$arr}) {
	$s = $prefix;
	if ($prefix =~ /\$lineno/o) {
	    $s =~ s/\$lineno/sprintf("%$ndigits" . "d", $i)/ge;
	}
	print "$s$arr->[$i]$newline";
    }
}

$orig_inh = "";

  #
  # I've hard-wired the following "early actions"
  #   1) handle \begin{verbatim} for now.
  #   2) You can set HTML/Perl variables via lines that look like:
  # %-ltoh-	   pseudo-op-line
  # %-tex2html-	   pseudo-op-line	## backward compatibility for me [RQ]
  #
  #   2) I've hard coded a simplistic table hander too.  Needs updating.
  #      It handles \multicolumn{ncols}{.}{contents} quite crudely
  #      It handles \multirow{nrows}{.}{contents} quite crudely
  #   NOT any more, see
sub do_comment_verbatim {
    my($arr) = @_;
    my($arrlen) = scalar(@$arr); 
    vec($orig_inh, $arrlen-1, 0) = 0;		# pre-extend
    my($inh) = $false;
    for ($i=0; $i < $arrlen; $i++) {
        vec($orig_inh, $i, 1) = $inh;
	my($line) = $arr->[$i];

        if ($line =~ /^[%]+-tex2html-\s*(.*)/ ) {
	    warning(3, "Use of -tex2html- is deprecated.  Change to -ltoh-");  
	    handle_specification($1);
	} elsif ($line =~ /^[%]+-ltoh-\s*(.*)/ ) {
	    fyi(6, "Processing psuedo-op in LaTeX source ($1)");   
	    handle_specification($1);
	}
	# preserve special chars in html, always
	$line =~ s/\\&/&amp;/xg;
	$line =~ s/ < /&lt;/xg;
	$line =~ s/ > /&gt;/xg;
	if ($line =~ s:\\begin{verbatim}:<pre>: ) {
	    vec($orig_inh, $i, 1) = $true;	# inhibit further processing
            $inh = $true;
	} elsif ($line =~ s:\\end{verbatim}:</pre>: ) {
	    vec($orig_inh, $i, 1) = $true;	# inhibit further processing
            $inh = $false;		# allow processing on next line
	} elsif ($inh == $false && $line =~ s:^([ \t]*%.*):<!-- $1 -->: ) {
	    if (! lookup("keep_comments") ) {
		$line = "";
	    }
	    vec($orig_inh, $i, 1) = $true;	# inhibit further processing
	} elsif ($inh == $false && $line =~ /^\\end{document}/ ) {
	    $#$arr = $i;
	    fyi(8, "Truncating down to $i lines");
	    last;
	}
	$arr->[$i] = $line;
    }
}

# handle nested braces/parens/brackets.
# convert { ==> {:level:		level starts at 0
#
# E.g. a{b{c}d{e{f}}g} ==> a{:0:b{:1:c:1}d{:1:e{:2:f:2:}:1:}g:0:}
# 
sub mark_braces_one_line {
    my($ldelim, $rdelim, $line) = @_;
    my($newline, $level) = ("", 0);
    my($lookfor) = ($ldelim eq $rdelim) ? "[$ldelim]" : "[$ldelim$rdelim]";
    while ( $line =~ /(.*[^\\])($lookfor)(.*)/) {
        my($prev, $del, $after) = ($1, $2, $3);
	my($repl);
	if ($del eq $ldelim) {
	    $repl = "$del:$level:";
	    $level++;
	} elsif ($del eq $rdelim) {
	    $repl = ":$level:$del";
	    $level--;
        }
	$newline .= "$prev$repl";
	$line = $after;
    }
    return $newline;
}

# handle nested braces/parens/brackets.
# convert "{" ==> " {:level "	where level starts at 0
#
# E.g. a{b{c}d{e{f}}g}h ==> a {:0 b {:1 c 1:} d {:1 e {:2 f 2:}  1:} g 0:} h
# 
sub mark_braces_one_line {
    my($ldelim, $rdelim, $origline) = @_;
    my($newline, $level, $line) = ("", 0, $origline);
    my($lookfor) = ($ldelim eq $rdelim) ? "[$ldelim]" : "[$ldelim$rdelim]";
    my($prev, $del, $after);		# need after to survive the loop.

    $line =~ s/\@/ \@AT\@_/g;
    $line =~ s/\\\\/ \@2BS\@_/g;
    my($qldel) = quotemeta($ldelim);
    $line =~ s/\\$qldel/ \@L\@_/g;
    my($qrdel) = quotemeta($rdelim);
    $line =~ s/\\$qrdel/ \@R\@_/g;

    # need ".../s"  in the following while() to preserve newlines.  Ugh.
    while ( $line =~ /(.*?)($lookfor)(.*)/s) {	
        ($prev, $del, $after) = ($1, $2, $3);
	fyi(9,"mark_braces_one_line:prev=($prev),del=($del),after=($after)\n");
	my($repl);
	if ($del eq $ldelim) {
	    $repl = " $del:$level ";
	    $level++;
	} elsif ($del eq $rdelim) {
	    $level--;
	    $repl = " $level:$del ";
        }
	$newline .= "$prev$repl";
	$line = $after;
    }
    $newline .= $after;

    $newline =~ s/ \@R\@_/\\$qrdel/g;
    $newline =~ s/ \@L\@_/\\$qldel/g;
    $newline =~ s/ \@2BS\@_/\\\\/g;
    $newline =~ s/ \@AT\@_/\@/g;
    return $newline;
}

$talign{"l"} = "left";
$talign{"r"} = "right";
$talign{"c"} = "center";
$talign{"p"} = "left";
$talign{"X"} = "left";

sub do_tables {
    my($arr) = @_;
    my($arrlen) = scalar(@$arr);
    my($tlev, $endrow, $colnum) = (0, undef, 0);
    my(@cols) = ();
    for ($i=0; $i < $arrlen; $i++) {
	my($origline) = $arr->[$i];
	my($line) = $origline;		# 
	fyi(9, "Tables orig-line $i: $origline");    
        if ($origline =~ /\\begin\{[A-Za-z]*tabular[A-Za-z]*\}/) {
	    $line =~ s/(\\\\)? \s* \\[ch]line.*//x;
	    $line = mark_braces_one_line("{", "}", $line);
	    fyi(9, "Tables delim-line $i: $line");
	    $line =~ s/ \{:1 .*? 1:\} //g;	# remove nested brace stuff
	    $line =~ s/ \{:0 /\{/g;
	    $line =~ s/ 0:\} /\}/g;
	    fyi(9, "Tables squashed-line $i: $line");
	    my(@fragments) = split(/[{}]+\s*/,$line);
	    my($aligns) = $fragments[$#fragments];
	    my($tabletag);
	    if ( $aligns =~ /[|]/) {
		$tabletag = "<table border>";
	    } else {
		$tabletag = "<table>";
	    }
	    while ( $aligns =~ /([lcrpX])(.*)/s ) {
	    	my($lcr) = $1;
		my($rest) = $2;
		fyi(9, "col-align lcr = $lcr, rest = $rest");
		$aligns = $rest;
		push (@cols, $talign{$lcr});
	    }
	    $tlev ++;
	    $endrow = 1;
	    $colnum = 0;
	    $line =~ s/\\begin\{tabular[A-Za-z]*\}\{.*\}/$tabletag/;
	    fyi(9, "line =-> $line");
	    fyi(7, "Table with $#cols+1 columns.  Aligns = @cols");
	} elsif ($line =~ s:\\end{tabular[A-Za-z]*}:</table>: ) {
	    $tlev --;
	    @cols = ();
	} elsif ($tlev > 0) {
	    if ($endrow) {
		$line = "<TR> <TD> $line";
		$endrow = 0;
	    }
	    $line =~ s:\& (?![ampltg]+;):</TD> <TD>:gx;

	    # "&" ==> "</TD><TD>", but skip "&gt;" "&lt;" and "&amp;".
	    $line =~ s:\& (?![ampltg]+;):</TD> <TD>:gx;

	    # convert \multicolumn --> \mc
	    if ( $line =~ s:\\multicolumn{:\\mc\{:xg ) {
		fyi(9, "mc line: $line");
	    }

	    # add individual column alignments.
	    while (
	$line =~ m/(.*?) (<TD>) (\s* \\mc\{\d+\}\{.+?\}\{)? (.*)/sx ) {
		my($before, $after) = ($1, $4);
		fyi(9, "cols[colnum=$colnum] = $cols[$colnum]");
		my($mcspec) = $3;
		fyi(9, "mc: before=($before),mcspec=$mcspec,after=($after)");

		if ($mcspec eq "") {
		    if ($colnum > $#cols) {
			warning("Too many columns in table, line $i:\n + ($origline)");
			$line = "$before<TD align=left" . ">" . $after;
		    } else {
			$line = "$before<TD align=$cols[$colnum]>" . $after;
		    }
		    $colnum++;
		} else {
		    if ($mcspec =~ m:\\mc \{(\d+)\} \{([^}]+)\}:x ) {
			my($colspan) = $1;
			$colnum += $colspan;
			$2 =~ / ([lcrpX]) /x;
		$line = "$before<TD align=$talign{$1} colspan=$colspan>{" . $after;
		    }
		}
	    }
	    # strip off trailing \\ and/or \hline.  Each might be on sep line.
	    if ( $line =~ s:\\\\:</TD></TR>:xg ) {
		$endrow = 1;
		$colnum = 0;
	    }
	    $line =~ s/\s* \\hline//g;
	    $line =~ s/\\cline\{[0-9-]+\}//;
	}
	$arr->[$i] = $line;
    }
}

$re_tex_arg_0 = "(\[[^\]*\]|{[^}]*})*";		   # no nesting
$re_tex_arg_1 = "({(([^{}]*{[^{}]*}[^{}]*)*)})*";  # nesting up to level 1.

sub do_begin_end {
    my($arr) = @_;
    my($arrlen) = scalar(@$arr); 
    my($i, $level, $inh);
    my($comm, $commregex, $fromx, $tox, $latexcomm);
    for ($i=0; $i < $arrlen; $i++) {
	if ( vec($orig_inh, $i, 1) > 0) {	# skip if processing inhibited
	    next;
	}
	$line = $arr->[$i];
	$line = escape_line($line);
	if ( $line =~ m/\\((begin|end)\{[a-zA-Z_]+\})/ ) {
	    # Example:
	    # $latexcomm = \begin{rqitemize}[12pt]{article}
	    #      $comm = \begin{rqitemize}
	    #     $fromx = \begin{rqitemize}
	    #       $tox = <ul>
	    #	    
	    $latexcomm = $comm = $MATCH;
	    $comm =~ s/end/begin/;	# convert \end{comm} to \begin{comm}
	    if ( defined $html_start{$comm} ) {
		if ($latexcomm =~ /^\\end/) {
		    $fromx = $latexcomm;
		    $tox = $html_end{$comm};
		    $commregex = $latexcomm;
		} else {
		    $fromx = $comm_regex{$comm};
		    $tox = $html_start{$comm};
		    $commregex = $comm_regex{$comm};

# Flag an attempt to allow any \begin{xxx} to take further args, as
# error, as \begin{center} should not take any further args
#
		    if ($commregex eq $comm) {
		    	$commregex .= $re_tex_arg_0;
			fyi(7, "  B/E Spruce: $comm --> $commregex");
		    }
		}
		$commregex =~ s/^\\/\\\\/g;
		fyi(7, "  B/E Apply: $commregex ==> $tox, on: ($line)");
		$line =~ s/$commregex/$tox/;
	    } else {
		warning(6, "UNDEFINED COMMAND: $comm, ignoring.");
		$html_start{$comm} = "";
		$html_end{$comm} = "";
	    }
	}
	$arr->[$i] = $line;
    }
}

#
# handle straight-forward substitutions:
#	\xxx ---> yyy				     
#
sub do_tex_comms {
    my($arr) = @_;
    my($arrlen) = scalar(@$arr); 
    my($i, $comm, $comm_re, $fromx, $tox);
    for ($i=0; $i < $arrlen; $i++) {
	if ( vec($orig_inh, $i, 1) > 0) {	# skip if processing inhibited
	    next;
	}
	$line = $arr->[$i];

	# Find the latex command \xxx
	# We mark each command, incase there are multiple commands on a line.
	# First convert all \xxx --> @BS@xxx,
	# Next, one-by-one convert @BS@xxx --> \xxx and process it.
	# Thus, we are left with the original \xxx (not breaking old code).
	# 9/16/96.
	#
	$line =~ s/\\([A-Za-z]+)/\@BS\@$1/g;
	while ( $line =~ s/\@BS\@([A-Za-z]+)/\\$1/ ) {
	    $comm = '\\' . $1;
	    if (defined $html_start{$comm} && ($comm_attr{$comm} =~ /comm/)) {
		$tox = $html_start{$comm};
		  # might have:
		  #   $comm    = '\documentclass'
		  #   $comm_re = '\documentclass[^ \t]+'
		  # apply $comm_re --> $tox
		  # 
		$comm_re = $comm_regex{$comm};
		$comm_re =~ s/^\\/\\\\/g;	# escape the darn backslashes
		warning(7, "  TeXComm Apply: $comm_re ==> $tox on: ($line)");
		$line =~ s/$comm_re/$tox/g;
		my($a, $b, $c, $d, $e) = ($1, $2, $3, $4, $5);
		$line =~ s/\$1/$a/g;
		$line =~ s/\$2/$b/g;
		$line =~ s/\$3/$c/g;
		$line =~ s/\$4/$d/g;
		$line =~ s/\$5/$e/g;
	    }
	    if ( ! defined $html_start{$comm} ) {
		warning(3, "  Unknown \\comm: $comm, on: ($line)");
	    }
	}

	# 
	# Hardwired.  \today --> current date.
	# 
	my($today) = $qval{"today"};
	$line =~ s/\\today/$today/g;

	if ($line ne $arr->[$i]) {
	    warning(8, "regex: $arr->[$i] ==> $line");
	}
	$arr->[$i] = $line;
    }
}

# dstack = delimiter stack
# dsp = delimiter stack pointer
$dstack[40] = ( "" );
$dsp = 0;
$dcount = 1;			# we reserve dcount=0 as an error.
$argspan[100] = undef;		# argspan[i] = range of lines between brace i
$lstack[40] = ( "" );		# last id at this depth (upto depth 40).
$llinestack[40] = ( "" );	# last line containing id at this depth 

  #
  # the following apply only to complex commands (those processing their arg).
  #
$commstack[40] = ( "" );	# the command to 
$argsofar[40] = ( "" );		# number of args seen at level i so-far
$argwant[40] = ( "" );		# number of args wanted at level i
$arglist[40][20] = ( "" );	# the actual args for the level i command

$argrem_stack[40] = ( "" );	# num of args remaining at this depth.
$nextarg[100] = undef;		# nextarg[i] = id of next arg at same level

  #
  # label all { and } with id's. 
  # Split strings via braces namely up after '{' and before '}'
  #
  # Also link up arguments for "complicated" latex commands, which take
  # arguments.  We must defer processing of these commands
  # until we have seen the closing brace for the last argument,
  # in case the arguments (i) undergo processing and (ii) are nested.  Ugh.
  #
sub mark_delims {
    my($arr) = @_;
    my($arrlen) = scalar(@$arr); 
    my($i, $j, $id, $level, $line, $frag, $inh);
    $#delim_lines = $arrlen;		# pre extend array delim_lines
    $ndelim_lines = 0;
    for ($i=0; $i < $arrlen; $i++) {
	$inh = vec($orig_inh, $i, 1);
	$line = $arr->[$i];
	#
	# Also, handle $line == "", as subsequent code would mishandle it.
	# (The split yields zero fragments, and the line is not passed on).
	if ($inh > 0 || $line eq "") {
	    warning(8, "AFTER  $ndelim_lines: $line\n");
	    vec($delim_inh, $ndelim_lines, 1) = $inh;
	    $delim_lines[$ndelim_lines] = $line;
	    $ndelim_lines ++;
	    next;
	}
	warning(8, "mark_delims($line)");

	my($splitstr) = ' @x@ ';
	$line =~ s/\\[a-zA-Z_]+\{/$splitstr$MATCH/g;
	$line =~ s/{/\{$splitstr/g;
	$line =~ s/\}/$splitstr\}/g;
	my(@fragments) = split(/$splitstr/,$line);
	for ($j=0; $j < scalar(@fragments); $j++) {
	    $frag = $fragments[$j];
	    warning(9, "Frag  $j: $frag");
	    if ( $frag =~ /^\}/ ) {
		$dsp --;
		if ($dsp < 0 || ! defined($dstack[$dsp]) ) {
		    my($lineno) = "Line: $INPUT_LINE_NUMBER";
		    warning(
			"$lineno, IMBALANCED BRACES: Too many closing braces");
		    $id = -1;
		}
		$id = $dstack[$dsp];
		$dstack[$dsp] = undef;
		$lstack[$dsp] = $id;
		$lstack[$dsp+1] = undef;
		$llinestack[$dsp] = $ndelim_lines;

		my($tmp) = $ndelim_lines - 1;
		$argspan{$id} .= "$tmp";
		warning(7, "    mark: argspan{$id} = $argspan{$id}");
		
		$x = ":$id:\]";
		$frag =~ s/^\}/$x/;
	    }
	    if ( $frag =~ /\{$/ ) {
		$id = $dstack[$dsp] = $dcount;
		my($lastid) = $lstack[$dsp];
		  #
		  # link args only if we find things like:   xxx}{yyy
		  # Whoops, no, as a line break may split the  }{  pair
		  # In either case the prev arg must be on the prev line.
		$nextarg[$lastid] = undef;
		if ($lastid != undef && $llinestack[$dsp] >= $ndelim_lines-1) {
		    if ($lastid <= 0 || $lastid > $dcount) {
			warning("ERROR linking id's.  Contact author.");
		    }
		    $nextarg[$lastid] = $id;
		    warning(7, "  mark: nextarg[$lastid] = $nextarg[$lastid]");
		    warning(8, "  mark: id2comm{$lastid} = $id2comm{$lastid}");
		}
		$lstack[$dsp] = $id;

		$dsp ++;
		$dcount ++;

		if ( $frag =~ /(\\[\w]+)\{$/) {
		    warning(8, "mark:  id2comm{$id} = $1");
		    $id2comm{$id} = $1;
		} else {
		    $id2comm{$id} = "_:NULL_COMMAND:_";
		}
		my($tmp) = $ndelim_lines + 1;
		$argspan{$id} = "$tmp:";

		$x = "\[:$id:";
		$frag =~ s/\{$/$x/;
	    }
	    $frag =~ s/\n/\@NL\@/g;
	    warning(8, "END-MARK  $ndelim_lines: $frag");

	    $delim_lines[$ndelim_lines] = $frag;
	    vec($delim_inh, $ndelim_lines, 1) = $inh;
	    $ndelim_lines ++;
	}
    }
}

  #
  # handle "simple" latex commands not dependent on the data, namely:
  #    \command{ XXX } ===> <html_start_sequence> XXX <html_end_sequence>
  # In particular, the conversion does not depend on XXX.
  # Of course, we allow arbitrary nesting of braces in XXX,
  #  which was the hard part of all this.
  # At this point, all braces have been munged and tagged with an unique ID.
  #
sub do_simple_latex_defs {
    my($arr) = @_;
    my($arrlen) = scalar(@$arr);
    my($comm, $fromx, $tox, $id, $inh, $i, $j, $k, $l, $nargs, $argidx);
    for ($i=0; $i < $arrlen; $i++) {
	$inh = vec($delim_inh, $i, 1);
	$line = $arr->[$i];
	if ($inh > 0) {
	    next;
	}
	warning(8, "do_simple_brace_comms($line)");
	
	if ($line =~ /\[:([0-9]+):/) {
	    $id = $1;
	    $comm = $id2comm{$id};
	    warning(8, "    id2comm{$id} is $id2comm{$id}");
	    if ($comm ne "_:NULL_COMMAND:_") {
		if ($comm_attr{$comm} =~ /\{\d+\}/) {
		    next;		# next line.  We don't handle args.
		}
		$fromx = $comm . $MATCH;
		$fromx = quotemeta($fromx);
		if ( defined $html_start{$comm} ) {
		    $tox = $html_start{$comm};
		} else {
		    $tox = bad_command("Unknown LaTeX command", "$comm\{");
		    if ($nextarg[$id] != undef) {
			# it's tnot clear what error we should report
		    }
		}
		warning(8, "  {} Apply: $fromx ==> $tox on: $line");
		if (! ($line =~ s/$fromx/$tox/)) {
		    warning(3, "  {} FAILED: $fromx ==> $tox on: $line");
		}
	    }
	}
	if ($line =~ /:([0-9]+):\]/) {
	    $id = $1;
	    $comm = $id2comm{$id};
	    if ($comm ne "_:NULL_COMMAND:_") {
		if ($comm_attr{$comm} =~ /\{\d+\}/) {
		    next;		# next line.  We don't handle args.
		}
		$fromx = $MATCH;
		$fromx =~ s/:\]/:\\]/;		
		if ( defined $html_end{$comm} ) {
		    $tox = $html_end{$comm};
		} else {
		    $tox = bad_command("", "\}");
		}
		warning(8, "  {} Apply: $fromx ==> $tox on: $line");
		$line =~ s/$fromx/$tox/;
	    }
	}
	$arr->[$i] = $line;
    }
}

sub getspan {
    my($arr, $span) = @_;
    my($start,$end, $z) = split(/:/, $span, 3);
    my($k, $result) = (0, "");
    for ($k = $start; $k <= $end; $k++) {
	$result .= $arr->[$k];
    }
    return $result;
}
    
  #
  # Mark the "end arg" via ID of the arguments for multi-arg commands.
  # We process the complicated command when we reach the last brace.
  #
sub do_complicated_latex_defs {
    my($arr) = @_;
    my($arrlen) = scalar(@$arr);
    my($comm, $fromx, $tox, $id, $inh, $i, $j, $k, $l, $nwant, $argidx);
    for ($i=0; $i < $arrlen; $i++) {
	$inh = vec($delim_inh, $i, 1);
	if ($inh > 0) {
	    next;
	}
	$line = $arr->[$i];
	warning(8, "do_complicated_brace_comms($line)");
	if ($line =~ /\[:([0-9]+):/) {
	    $id = $1;
	    $from = $comm = $id2comm{$id};
	    my($multarg) = scalar($comm_attr{$comm} =~ /\{(\d+)\}/);
	    warning(9, "  multarg = $multarg");
	    if (! $multarg) {
		next;
	    }
	    $nwant = $1;
	    $argidx = $id;
	      #
	      # start from 2, as the first arg will always exist.
	      #
	    $argarr[$id][1] = $id;
	    for ($j=2; $j <= $nwant; $j++) {
		$argidx = $nextarg[$argidx];
		if ($argidx == undef) {
		    warning("Not enough arguments for $comm\n");
		    last;
		}
    	        $argarr[$id][$j] = $argidx;
	    }
	    $comm_trigger{$argidx} = $id;
	    warning(9, "    comm_trigger{$argidx} is $comm_trigger{$argidx}");
	}
	if ($line =~ /:([0-9]+):\]/) {
	    my($trigger_id) = $1;
	    if ( defined $comm_trigger{$trigger_id} ) {
		$id = $comm_trigger{$trigger_id};
		$comm = $id2comm{$id};
		if ($comm_attr{$comm} =~ /\{(\d+)\}/) {
		    my(@arglist);
		    $nwant = $1;
		    $tox = $html_start{$comm};
		    for ($j=1; $j <= $nwant; $j++) {
			$k = $argarr[$id][$j];
			my($currarg) = getspan($arr, $argspan{$k});
			warning(9, "  comp_def: arg[$id][$j] = $currarg");
			$tox =~ s/#$j/$currarg/g;
			$arglist[$j] = $currarg;
		    }
		    warning(7, "CPX  $comm --> tox = $tox");
		    my($a, $b) = split(/:/, $argspan{$id}, 2);
		    $arr->[$a-1] = $tox;
		    my($c, $d, $e) = split(/:/, $argspan{$argarr[$id][$nwant]}, 3);
		    warning(9, "CPX  erasing lines $a to $d");
		    for ($j=$a; $j <= $d; $j++) {
			my($o) = $arr->[$j];
			if ($o ne "") {
			    warning(8, "  Wiping out line[$j] --> nothing");
			}
			$arr->[$j] = "";
		    }
		}
		$line =~ s/:$trigger_id:\]/$html_end{$comm}/;
	    }
	}
	$arr->[$i] = $line;
    }
}

sub final_cleanup {
    my($arr) = @_;
    my($arrlen) = scalar(@$arr);
    my($inh);
    my($lastline);
    for ($i=0; $i < $arrlen; $i++) {
	$inh = vec($delim_inh, $i, 1);
	if ($inh > 0) {
	    next;
	}
	$line = $arr->[$i];

	  # restore newlines
	$line =~ s/(\@NL\@)+/\n/g;

	  # restore backslashes
	$line =~ s/\@BS\@/\\/g;

	  # restore newlines
	$line =~ s/(\@NL\@)+/\n/g;

	  # convert \\ (force line breaks) to HTML line breaks
	$line =~ s/\\\\/<br>/;

	# restore '@'
	$line =~ s:\@AT\@:\@:g;

	  # convert any remaining unprocessed braces back to their orig form
	$line =~ s/\[:\d+:/\{/;
	$line =~ s/:\d+:\]/\}/;

	# restore real '{' and '}', after removing extraneous '{' '}'
	$line =~ s:\{::g;
	$line =~ s:\}::g;
	$line =~ s:\@<<\@:{:g;
	$line =~ s:\@>>\@:}:g;
	$line =~ s:\@<<\@:\\\{:g;
	$line =~ s:\@>>\@:\\\}:g;

          # hard-wired.  Conversions.  Ugh.  No, double ugh.
	  # convert protected latex chars into single chars
	$line =~ s/\\([\{\}\_\&\$\#\%])/$1/og;
	  # drop tildes (forced spaces in LaTeX).
	$line =~ s/\.[~]+/\. /og;
	  # convert multiple dashes to a single dash.
	$line =~ s/[-][-]+/-/og;

#	if (($i > 0) && ($lastline =~ /^\s*\n$/) && ($line =~ /[^\s]+/)) {
	if (($i>=2) && ($lastline =~ /^\s*\n$/) && ($arr->[$i-2] =~ /\n$/)) {
	    $line = "<p>$line";
	}

	$lastline = $line;
	$arr->[$i] = $line;
    }
}

#
# start executing at main()
# 

main();

