\n", $qval{title} , "\n

#! /usr/bin/perl # Copyright (c) 1996, 1997 Russell Quong. # # In the following, the "author" refers to "Russell Quong." # # Permission to use, copy, modify, distribute, and sell this software and # its documentation for any purpose is hereby granted without fee, provided # that the following conditions are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. All advertising materials mentioning features or use of this software # must display the following acknowledgement: # This product includes software developed by Russell Quong. # 3. All HTML generated by ltoh must retain a visible notice that it # was generated by ltoh and contain a link to the ltoh web page # # Any or all of these provisions can be waived if you have specific, # prior permission from the author. # # THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, # EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY # WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. # # IN NO EVENT SHALL RUSSELL QUONG BE LIABLE FOR ANY SPECIAL, # INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, # WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY # THEORY OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. # ltoh.pl = a LaTeX to HTML converter # Russell Quong 1996, 1997 # Version 97e. # enable readable "English names" for variables, like $MATCH or $PERL_VERSION # instead of " punctuation names" for variables, like $& or $] use English; $false = 0; $true = 1; $author = "Russell W. Quong"; $version = "Version 97e."; $reldate = "Mar 1997"; $status = "Experimental"; $dirsep = "/"; # directory separator character $qval{"today"} = strip_hms_and_day( scalar(localtime()) ); $qval{"title"} = ""; $qval{"author"} = "Unknown document author"; $qval{"email"} = "Unknown email"; $qval{"url"} = "Unknown Web URL"; $qval{"keep_comments"} = 0; $warnlevel = 3; sub bad_command { my($err_msg, $descrip) = @_; return " $err_msg $descrip "; } # lookup(varname) ==> get the value of variable VARNAME. sub lookup { my($idx) = @_; if ( ! defined $qval{$idx} ) { return "$idx undefined"; } else { return $qval{$idx}; } } sub print_html_header { my($filename) = @_; print '', "\n"; print "\n"; if ($qval{"title"} eq "") { $qval{"title"} = "$filename : [LaTeX --> HTML]"; } print "\n\n", $qval{title} , "\n\n\n"; print "\n"; } sub print_html_trailer { my($filename) = @_; # need to declare ctime separately from next line, ugh. (scalar vs. array) print "

\n"; print "[Converted LaTeX --> HTML by"; print ' ltoh]'; print "
\n"; print "

\n"; if ($qval{"url"} =~ /Unknown/) { if (! ($qval{"author"} =~ /Unknown/)) { print "$qval{author}\n" ; } } else { print "$qval{author}\n"; } if (! ($qval{"email"} =~ /Unknown/)) { print "($qval{email}"; print ")\n"; } my($ctime); $ctime = localtime; $ctime = strip_hms_and_day($ctime); print "Last modified: $ctime\n"; my($mtime) = get_file_mtime($filename); if ($mtime ne $ctime) { print "(LaTeX doc modified: "; print $mtime; print ")
\n"; } print "

\n"; print "\n\n"; } sub strip_hms_and_day { my($str_time) = @_; # The next two lines strip off # the hours:min:secs and the leading day of the week, giving # Jul 29 1996 $str_time =~ s/^[A-Z][a-z][a-z][ \t]*//; $str_time =~ s/\d+:\d+:\d+[ \t]*([A-Z][A-Z]T[ \t]*)?//; return $str_time; } # # return the time (as a string) that the file XXX was last modified. # called via: # $string = get_file_mtime (XXX); # sub get_file_mtime { my($filename) = @_; my($str_time) = ''; # default value my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = stat($filename); if ($dev != null) { $str_time = localtime($mtime); # We now have string like: # Mon Jul 29 19:53:49 PDT 1996 $str_time = strip_hms_and_day($str_time); warning(9, "File $filename last modified on $str_time"); } return $str_time; } sub logmessage { my($prefix, @params) = @_; my($level) = 0; if (@params > 1) { if ($params[0] =~ /\d/) { $level = shift(@params); } else { print ("No level given: @_"); } } if ($level <= $warnlevel) { print "$prefix :: @params\n"; } else { # print "level $level > warnlevel $warnleve\n"; } } sub errmsg { logmessage(" * Error: ", @_); } sub warning { logmessage(" + Warning: ", @_); } sub fyi { logmessage(" - fyi: ", @_); } # # Try to figure out where the perl script is actually located. # (1) We can follow symbolic links to ABSOLUTE PATHS correctly, # (2) But we can't handle relative symbolic links to another directory, # as we need to keep track of where the link is relative to. # Case (1) handles the common case of a link: xyz --> /whereever/bin/ltoh # Also, we can handle the last link referring to a file in the same dir. sub get_scriptdir { fyi("PROGRAM_NAME = $PROGRAM_NAME"); my($scriptfile) = $PROGRAM_NAME; # maybe a link. We trace it down. my($scriptdir, $fulldir) = ("." , ""); while (-l $scriptfile) { fyi(9, "$scriptfile is link to -->"); if ($scriptfile =~ /(.*) $dirsep ( [^$dirsep]+ )/x ) { $scriptdir = $1; # if ($scriptdir =~ /^$dirsep/) { # $fulldir = $scriptdir; # } else { # $fulldir .= $scriptdir; # } # fyi(9, "scriptdir = $scriptdir"); # fyi(9, "fulldir = $fulldir"); } $scriptfile = readlink $scriptfile; fyi(9, " $scriptfile."); fyi(7, "scriptdir = $scriptdir"); } return $scriptdir; } # # handle all the arguments, in the @ARGV array. # sub main { fyi("PROGRAM_VERSION = $version"); fyi(" by $author. 1996, 1997."); my($nextline, $f); my($nspecfiles) = 0; my($scriptdir) = get_scriptdir(); # attempt to trace down a sym link. # list of spec files my(@specfiles) = ( "$scriptdir$dirsep" . "ltoh.specs" , "~$dirsep.ltoh.specs" , ".$dirsep.ltoh.specs" ); foreach $f (@specfiles) { $nspecfiles += read_specfile($f); } if ($nspecfiles == 0) { $nspecfiles += read_specfile("/usr/local/bin/ltoh.specs"); } if ($nspecfiles == 0) { $nspecfiles += read_specfile("/usr/bin/ltoh.specs"); } if ($nspecfiles == 0) { warning("Tried to read spec files: @specfiles"); errmsg("No specification files found. Bye-bye."); exit(1); } if (@ARGV == 0) { handle_file('-'); } foreach $i (@ARGV) { my($texfile) = $i; if ($i =~ /^(.+)\.([ltex]+)$/ ) { warning(7, "filebase = $1, suffix = $2, $input file = $texfile"); } else { $texfile = $i . ".tex"; } if (! -r $texfile) { print "Cannot read file: $texfile\n"; return -1; } handle_file($texfile); } } # # process a specific file. Called via: handle_file($filename); # sub handle_file { my($texfilename) = @_; my($INFILE) = $texfilename; if ($texfilename eq '-') { $INFILE = STDIN; } else { open($INFILE, $texfilename); } $qval{"title"} = "Unnamed Web page"; fyi(5, "+ Reading LaTeX input ... handle_file($texfilename)"); @orig = <$INFILE>; print_arr('Orig $lineno:', \@orig) if ($warnlevel >= 8); do_comment_verbatim(\@orig); print_arr('Verb $lineno:', \@orig) if ($warnlevel >= 8); do_tables(\@orig); print_arr('Tabl $lineno:', \@orig) if ($warnlevel >= 8); do_begin_end(\@orig); print_arr('BegE $lineno:', \@orig) if ($warnlevel >= 8); do_tex_comms(\@orig); print_arr('Comm $lineno:', \@orig) if ($warnlevel >= 8); mark_delims(\@orig); print_arr('Mark $lineno:\n', \@delim_lines) if ($warnlevel >= 8); do_simple_latex_defs(\@delim_lines); print_arr('{} $lineno:\n', \@delim_lines) if ($warnlevel >= 8); do_complicated_latex_defs(\@delim_lines); print_arr('{N} $lineno:\n', \@delim_lines) if ($warnlevel >= 8); my($i); final_cleanup(\@delim_lines); print_arr('[<$lineno>]', \@delim_lines) if ($warnlevel >= 8); print_arr("", \@delim_lines) if ($warnlevel >= 8); print_html_file($texfilename); } # generate the HTML file. Pass in the name of the .tex file # print_html_file( "filename.tex" ); # sub print_html_file { my($texfile) = @_; my($base, $suffix) = ($texfile, ""); my($outfile) = $qval{"htmlfile_spec"}; fyi(8,"texfile = $texfile, outfile = $outfile"); if ($texfile =~ /^(.+)\.([^.]*)$/ ) { $base = $1; $suffix = $2; } fyi(8, "base = $base, suffix = $suffix, $input file = $texfile"); $outfile =~ s/\$BASE/$base/e; $outfile =~ s/\$SUFFIX/$suffix/e; fyi(3, "+ texbase = $base ; HTML output file => $outfile ."); if (-e $outfile) { # # comment out the following warning if desired. # # print "Warn, file $outfile already exists\n"; } open(OUTF , ">"."$outfile"); # # print to OUTF by default, by selecting it. # select OUTF; print_html_header($texfile); # # print the generated HTML. # @delim_lines is the final HTML after processing # print_arr("", \@delim_lines); print_html_trailer($texfile); select STDOUT; } $re_fields = 4; # # DO NOT EDIT THEM MANUALLY. # associative arrays indexed by the LaTeX command # containing the corresponding the HTML # E.g, we might have something like: # %html_start{'textbf'} = ''; # %html_end{'textbf'} = ''; # The values are read in from a specification file. # DO NOT EDIT THEM MANUALLY. sub do_spec_pseudo_op { my($line) = @_; if ($line =~ /^=\*\*\*=[ \t]+([^ \t\n]*)/ ) { chomp($line); my($op) = $1; fyi(7, "Psuedo-op $op: ($line)"); if ($line =~ /endfile/) { return $op; } if ($line =~ /=[ \t]*echo./) { print "$POSTMATCH\n"; } if ($line =~ /=[ \t]*exit/) { fyi(6, "Exit request, line $INPUT_LINE_NUMBER, $spec_fname\n"); exit(1); } if ($line =~ /=[ \t]*warnlevel\s*:=\s*(\d+)/i) { $warnlevel = $1; } return "pseudo-op"; } elsif ($line =~ /^([a-zA-Z_][\w]*)\s*:=(.*)$/ ) { my($name, $val, $oldval) = ($1, $2, ""); $val =~ s/^[\s]*["]?//; $val =~ s/["]?[\s]*$//; $oldval = $qval{$name}; $qval{$name} = $val; fyi(6, "set var: $name := ($val), oldval = $oldval"); return "set-var"; } else { return ""; } } # Example # For the specification # ':b/e :\begin{rqitemize}(\{[^\}]*\})*:

'; # $html_end{'\begin{rqitemize}'} = '

) { $lineno++; # skip comments if ($line =~ /^[ \t]*[#;]/ || $line =~ /^[ \t]*$/) { fyi(9, "Skipping comment: $line"); next; } $status = handle_specification($line); if ($status eq "endfile") { last; } } fyi(3, "+ Read spec file ($spec_fname)."); if ($lineno == 0) { warning(3, "+ Empty spec file ($spec_fname)."); } return 1; } sub handle_specification { my($line) = @_; my(@arr); my($junk, $from, $to, $rest, $status); if ( ($status = do_spec_pseudo_op($line)) ne "") { return $status; } $splitter = substr($line, 0, 1); if ($splitter eq '[ \t]') { $splitter = "[ \t]+"; } else { $splitter = "[" . quotemeta("$splitter") . "]"; } fyi(8, "splitter = $splitter"); @arr = split($splitter, $line, $re_fields+3); if (@arr < $re_fields+2) { warning("Malformed spec, line $lineno, too few fields" . "$line"); next; } elsif (@arr > $re_fields+2) { warning("Malformed spec, line $lineno, too many fields" . "$line"); next; } else { # everything is OK. do nothing. } ($raw_spec, $from, $to_start, $to_end) = @arr[1 .. $re_fields ]; # (a) if $from is "\begin{xyz}MORE-STUFF", strip off MORE-STUFF # (b) if $from is "\xyz{MORE-STUFF}", strip off MORE-STUFF # must check if $from looks like "\begin{...}" as must not apply (b) # to a \begin{...} construct if ($from =~ /\\begin\{.+\}/ ) { if ($from =~ s/(\\begin\{[^\}]+\})(.+)/$1/ ) { fyi(7, "Stripped begin\{...\} args: $arr[2] --> $from"); } } elsif ($from =~ /(\\[A-Za-z_]+)([^A-Za-z_].*)/ ) { $from =~ s/(\\[A-Za-z_]+)([^A-Za-z_].*)/$1/; fyi(7, "Stripped args: $arr[2] --> $from"); } # if $to_end is "......", # generate the corresponding closing tags, automatically. Cool. # Have to reverse the order, hence the while loop. # my($scratch) = ""; if ($to_end =~ '') { $junk = $to_start; $scratch = ""; while ($junk =~ s:<([^ \t>]+)([^>]*)>:\@$1\@:o ) { $scratch = "" . $scratch; } $to_end =~ s::$scratch:; fyi(7, "Gen to_end: to_start = $to_start, to_end = $to_end"); } fyi(7, "from = $from, to_start = $to_start, to_end = $to_end"); fyi(8, "raw_spec = $raw_spec"); $comm_regex{$from} = $arr[2]; $comm_attr{$from} = $raw_spec; $html_start{$from} = $to_start; $html_end{$from} = $to_end; if ($raw_spec =~ /re/i && $to_end ne "") { warning(5, "Unused specification: $to_end"); } } # Simple protection of special character sequences on a line # (1) group backslashes by pairs, # (2) '@' to @AT@, and # (3) protect \{ and \}. sub escape_line { my($line) = @_; $line =~ s:\@:\@AT\@:g; # handle latex math characters/macros. Strip off math mode $...$ for now $line =~ s:\$ ([<>]) \$:\1:ogx; $line =~ s:\$ (&[gl]t;) \$:\1:ogx; $line =~ s:\$ (\\[a-zA-Z]+) \$:\1:ogx; # grab pairs of backslash characters and escape them. # as we need to distinguish between # \{ ==> preserve '{' (escape it) # \\{ ==> preserve '\' (escape it), but process '{' normally. # \\\{ ==> preserve both '\' and '{' alone, follows from above $line =~ s:\\\\:\@BS\@\@BS\@:og; $line =~ s:\\\{:\@<<\@:og; $line =~ s:\\\}:\@>>\@:og; # unescape pairs of backslack characters. $line =~ s:\@BS\@\@BS\@:\\\\:og; # not needed, yet. (We run this function early in the processing, # and this change unnecessarily mucks things up at this early stage). # escape newlines # $line =~ s:\@AT\@NL\@AT\@:\@NL\@:og; return $line; } # # print_arr(prefix_string, reference_to_an_array); # In prefix_string the string '$lineno' is converted to the array index. # If prefix_string ends with a '\n' (newline), append a newline to the line. # Ex: # @lines = <>; # read all input into an array # print_arr('$lineno:', \@lines); # pass in a reference to the array sub print_arr { my($prefix, $arr) = @_; my($i); my($ndigits) = int( (log($#{$arr}) / exp(1) + .999) ) ; my($newline) = ""; if ($prefix =~ s/(\\n|\n)$//o) { $newline = "\n"; } foreach $i (0 .. $#{$arr}) { $s = $prefix; if ($prefix =~ /\$lineno/o) { $s =~ s/\$lineno/sprintf("%$ndigits" . "d", $i)/ge; } print "$s$arr->[$i]$newline"; } } $orig_inh = ""; # # I've hard-wired the following "early actions" # 1) handle \begin{verbatim} for now. # 2) You can set HTML/Perl variables via lines that look like: # %-ltoh- pseudo-op-line # %-tex2html- pseudo-op-line ## backward compatibility for me [RQ] # # 2) I've hard coded a simplistic table hander too. Needs updating. # It handles \multicolumn{ncols}{.}{contents} quite crudely # It handles \multirow{nrows}{.}{contents} quite crudely # NOT any more, see sub do_comment_verbatim { my($arr) = @_; my($arrlen) = scalar(@$arr); vec($orig_inh, $arrlen-1, 0) = 0; # pre-extend my($inh) = $false; for ($i=0; $i < $arrlen; $i++) { vec($orig_inh, $i, 1) = $inh; my($line) = $arr->[$i]; if ($line =~ /^[%]+-tex2html-\s*(.*)/ ) { warning(3, "Use of -tex2html- is deprecated. Change to -ltoh-"); handle_specification($1); } elsif ($line =~ /^[%]+-ltoh-\s*(.*)/ ) { fyi(6, "Processing psuedo-op in LaTeX source ($1)"); handle_specification($1); } # preserve special chars in html, always $line =~ s/\\&/&/xg; $line =~ s/ < /</xg; $line =~ s/ > />/xg; if ($line =~ s:\\begin{verbatim}:

: ) {
	    vec($orig_inh, $i, 1) = $true;	# inhibit further processing
            $inh = $true;
	} elsif ($line =~ s:\\end{verbatim}:

: ) { vec($orig_inh, $i, 1) = $true; # inhibit further processing $inh = $false; # allow processing on next line } elsif ($inh == $false && $line =~ s:^([ \t]*%.*):: ) { if (! lookup("keep_comments") ) { $line = ""; } vec($orig_inh, $i, 1) = $true; # inhibit further processing } elsif ($inh == $false && $line =~ /^\\end{document}/ ) { $#$arr = $i; fyi(8, "Truncating down to $i lines"); last; } $arr->[$i] = $line; } } # handle nested braces/parens/brackets. # convert { ==> {:level: level starts at 0 # # E.g. a{b{c}d{e{f}}g} ==> a{:0:b{:1:c:1}d{:1:e{:2:f:2:}:1:}g:0:} # sub mark_braces_one_line { my($ldelim, $rdelim, $line) = @_; my($newline, $level) = ("", 0); my($lookfor) = ($ldelim eq $rdelim) ? "[$ldelim]" : "[$ldelim$rdelim]"; while ( $line =~ /(.*[^\\])($lookfor)(.*)/) { my($prev, $del, $after) = ($1, $2, $3); my($repl); if ($del eq $ldelim) { $repl = "$del:$level:"; $level++; } elsif ($del eq $rdelim) { $repl = ":$level:$del"; $level--; } $newline .= "$prev$repl"; $line = $after; } return $newline; } # handle nested braces/parens/brackets. # convert "{" ==> " {:level " where level starts at 0 # # E.g. a{b{c}d{e{f}}g}h ==> a {:0 b {:1 c 1:} d {:1 e {:2 f 2:} 1:} g 0:} h # sub mark_braces_one_line { my($ldelim, $rdelim, $origline) = @_; my($newline, $level, $line) = ("", 0, $origline); my($lookfor) = ($ldelim eq $rdelim) ? "[$ldelim]" : "[$ldelim$rdelim]"; my($prev, $del, $after); # need after to survive the loop. $line =~ s/\@/ \@AT\@_/g; $line =~ s/\\\\/ \@2BS\@_/g; my($qldel) = quotemeta($ldelim); $line =~ s/\\$qldel/ \@L\@_/g; my($qrdel) = quotemeta($rdelim); $line =~ s/\\$qrdel/ \@R\@_/g; # need ".../s" in the following while() to preserve newlines. Ugh. while ( $line =~ /(.*?)($lookfor)(.*)/s) { ($prev, $del, $after) = ($1, $2, $3); fyi(9,"mark_braces_one_line:prev=($prev),del=($del),after=($after)\n"); my($repl); if ($del eq $ldelim) { $repl = " $del:$level "; $level++; } elsif ($del eq $rdelim) { $level--; $repl = " $level:$del "; } $newline .= "$prev$repl"; $line = $after; } $newline .= $after; $newline =~ s/ \@R\@_/\\$qrdel/g; $newline =~ s/ \@L\@_/\\$qldel/g; $newline =~ s/ \@2BS\@_/\\\\/g; $newline =~ s/ \@AT\@_/\@/g; return $newline; } $talign{"l"} = "left"; $talign{"r"} = "right"; $talign{"c"} = "center"; $talign{"p"} = "left"; $talign{"X"} = "left"; sub do_tables { my($arr) = @_; my($arrlen) = scalar(@$arr); my($tlev, $endrow, $colnum) = (0, undef, 0); my(@cols) = (); for ($i=0; $i < $arrlen; $i++) { my($origline) = $arr->[$i]; my($line) = $origline; # fyi(9, "Tables orig-line $i: $origline"); if ($origline =~ /\\begin\{[A-Za-z]*tabular[A-Za-z]*\}/) { $line =~ s/(\\\\)? \s* \\[ch]line.*//x; $line = mark_braces_one_line("{", "}", $line); fyi(9, "Tables delim-line $i: $line"); $line =~ s/ \{:1 .*? 1:\} //g; # remove nested brace stuff $line =~ s/ \{:0 /\{/g; $line =~ s/ 0:\} /\}/g; fyi(9, "Tables squashed-line $i: $line"); my(@fragments) = split(/[{}]+\s*/,$line); my($aligns) = $fragments[$#fragments]; my($tabletag); if ( $aligns =~ /[|]/) { $tabletag = ""; } else { $tabletag = "

"; } while ( $aligns =~ /([lcrpX])(.*)/s ) { my($lcr) = $1; my($rest) = $2; fyi(9, "col-align lcr = $lcr, rest = $rest"); $aligns = $rest; push (@cols, $talign{$lcr}); } $tlev ++; $endrow = 1; $colnum = 0; $line =~ s/\\begin\{tabular[A-Za-z]*\}\{.*\}/$tabletag/; fyi(9, "line =-> $line"); fyi(7, "Table with $#cols+1 columns. Aligns = @cols"); } elsif ($line =~ s:\\end{tabular[A-Za-z]*}:

: ) { $tlev --; @cols = (); } elsif ($tlev > 0) { if ($endrow) { $line = " $line"; $endrow = 0; } $line =~ s:\& (?![ampltg]+;): :gx; # "&" ==> "", but skip ">" "<" and "&". $line =~ s:\& (?![ampltg]+;): :gx; # convert \multicolumn --> \mc if ( $line =~ s:\\multicolumn{:\\mc\{:xg ) { fyi(9, "mc line: $line"); } # add individual column alignments. while ( $line =~ m/(.*?) () (\s* \\mc\{\d+\}\{.+?\}\{)? (.*)/sx ) { my($before, $after) = ($1, $4); fyi(9, "cols[colnum=$colnum] = $cols[$colnum]"); my($mcspec) = $3; fyi(9, "mc: before=($before),mcspec=$mcspec,after=($after)"); if ($mcspec eq "") { if ($colnum > $#cols) { warning("Too many columns in table, line $i:\n + ($origline)"); $line = "$before" . $after; } else { $line = "$before" . $after; } $colnum++; } else { if ($mcspec =~ m:\\mc \{(\d+)\} \{([^}]+)\}:x ) { my($colspan) = $1; $colnum += $colspan; $2 =~ / ([lcrpX]) /x; $line = "$before{" . $after; } } } # strip off trailing \\ and/or \hline. Each might be on sep line. if ( $line =~ s:\\\\::xg ) { $endrow = 1; $colnum = 0; } $line =~ s/\s* \\hline//g; $line =~ s/\\cline\{[0-9-]+\}//; } $arr->[$i] = $line; } } $re_tex_arg_0 = "(\[[^\]*\]|{[^}]*})*"; # no nesting $re_tex_arg_1 = "({(([^{}]*{[^{}]*}[^{}]*)*)})*"; # nesting up to level 1. sub do_begin_end { my($arr) = @_; my($arrlen) = scalar(@$arr); my($i, $level, $inh); my($comm, $commregex, $fromx, $tox, $latexcomm); for ($i=0; $i < $arrlen; $i++) { if ( vec($orig_inh, $i, 1) > 0) { # skip if processing inhibited next; } $line = $arr->[$i]; $line = escape_line($line); if ( $line =~ m/\\((begin|end)\{[a-zA-Z_]+\})/ ) { # Example: # $latexcomm = \begin{rqitemize}[12pt]{article} # $comm = \begin{rqitemize} # $fromx = \begin{rqitemize} # $tox =

XXX # In particular, the conversion does not depend on XXX. # Of course, we allow arbitrary nesting of braces in XXX, # which was the hard part of all this. # At this point, all braces have been munged and tagged with an unique ID. # sub do_simple_latex_defs { my($arr) = @_; my($arrlen) = scalar(@$arr); my($comm, $fromx, $tox, $id, $inh, $i, $j, $k, $l, $nargs, $argidx); for ($i=0; $i < $arrlen; $i++) { $inh = vec($delim_inh, $i, 1); $line = $arr->[$i]; if ($inh > 0) { next; } warning(8, "do_simple_brace_comms($line)"); if ($line =~ /\[:([0-9]+):/) { $id = $1; $comm = $id2comm{$id}; warning(8, " id2comm{$id} is $id2comm{$id}"); if ($comm ne "_:NULL_COMMAND:_") { if ($comm_attr{$comm} =~ /\{\d+\}/) { next; # next line. We don't handle args. } $fromx = $comm . $MATCH; $fromx = quotemeta($fromx); if ( defined $html_start{$comm} ) { $tox = $html_start{$comm}; } else { $tox = bad_command("Unknown LaTeX command", "$comm\{"); if ($nextarg[$id] != undef) { # it's tnot clear what error we should report } } warning(8, " {} Apply: $fromx ==> $tox on: $line"); if (! ($line =~ s/$fromx/$tox/)) { warning(3, " {} FAILED: $fromx ==> $tox on: $line"); } } } if ($line =~ /:([0-9]+):\]/) { $id = $1; $comm = $id2comm{$id}; if ($comm ne "_:NULL_COMMAND:_") { if ($comm_attr{$comm} =~ /\{\d+\}/) { next; # next line. We don't handle args. } $fromx = $MATCH; $fromx =~ s/:\]/:\\]/; if ( defined $html_end{$comm} ) { $tox = $html_end{$comm}; } else { $tox = bad_command("", "\}"); } warning(8, " {} Apply: $fromx ==> $tox on: $line"); $line =~ s/$fromx/$tox/; } } $arr->[$i] = $line; } } sub getspan { my($arr, $span) = @_; my($start,$end, $z) = split(/:/, $span, 3); my($k, $result) = (0, ""); for ($k = $start; $k <= $end; $k++) { $result .= $arr->[$k]; } return $result; } # # Mark the "end arg" via ID of the arguments for multi-arg commands. # We process the complicated command when we reach the last brace. # sub do_complicated_latex_defs { my($arr) = @_; my($arrlen) = scalar(@$arr); my($comm, $fromx, $tox, $id, $inh, $i, $j, $k, $l, $nwant, $argidx); for ($i=0; $i < $arrlen; $i++) { $inh = vec($delim_inh, $i, 1); if ($inh > 0) { next; } $line = $arr->[$i]; warning(8, "do_complicated_brace_comms($line)"); if ($line =~ /\[:([0-9]+):/) { $id = $1; $from = $comm = $id2comm{$id}; my($multarg) = scalar($comm_attr{$comm} =~ /\{(\d+)\}/); warning(9, " multarg = $multarg"); if (! $multarg) { next; } $nwant = $1; $argidx = $id; # # start from 2, as the first arg will always exist. # $argarr[$id][1] = $id; for ($j=2; $j <= $nwant; $j++) { $argidx = $nextarg[$argidx]; if ($argidx == undef) { warning("Not enough arguments for $comm\n"); last; } $argarr[$id][$j] = $argidx; } $comm_trigger{$argidx} = $id; warning(9, " comm_trigger{$argidx} is $comm_trigger{$argidx}"); } if ($line =~ /:([0-9]+):\]/) { my($trigger_id) = $1; if ( defined $comm_trigger{$trigger_id} ) { $id = $comm_trigger{$trigger_id}; $comm = $id2comm{$id}; if ($comm_attr{$comm} =~ /\{(\d+)\}/) { my(@arglist); $nwant = $1; $tox = $html_start{$comm}; for ($j=1; $j <= $nwant; $j++) { $k = $argarr[$id][$j]; my($currarg) = getspan($arr, $argspan{$k}); warning(9, " comp_def: arg[$id][$j] = $currarg"); $tox =~ s/#$j/$currarg/g; $arglist[$j] = $currarg; } warning(7, "CPX $comm --> tox = $tox"); my($a, $b) = split(/:/, $argspan{$id}, 2); $arr->[$a-1] = $tox; my($c, $d, $e) = split(/:/, $argspan{$argarr[$id][$nwant]}, 3); warning(9, "CPX erasing lines $a to $d"); for ($j=$a; $j <= $d; $j++) { my($o) = $arr->[$j]; if ($o ne "") { warning(8, " Wiping out line[$j] --> nothing"); } $arr->[$j] = ""; } } $line =~ s/:$trigger_id:\]/$html_end{$comm}/; } } $arr->[$i] = $line; } } sub final_cleanup { my($arr) = @_; my($arrlen) = scalar(@$arr); my($inh); my($lastline); for ($i=0; $i < $arrlen; $i++) { $inh = vec($delim_inh, $i, 1); if ($inh > 0) { next; } $line = $arr->[$i]; # restore newlines $line =~ s/(\@NL\@)+/\n/g; # restore backslashes $line =~ s/\@BS\@/\\/g; # restore newlines $line =~ s/(\@NL\@)+/\n/g; # convert \\ (force line breaks) to HTML line breaks $line =~ s/\\\\/
/; # restore '@' $line =~ s:\@AT\@:\@:g; # convert any remaining unprocessed braces back to their orig form $line =~ s/\[:\d+:/\{/; $line =~ s/:\d+:\]/\}/; # restore real '{' and '}', after removing extraneous '{' '}' $line =~ s:\{::g; $line =~ s:\}::g; $line =~ s:\@<<\@:{:g; $line =~ s:\@>>\@:}:g; $line =~ s:\@<<\@:\\\{:g; $line =~ s:\@>>\@:\\\}:g; # hard-wired. Conversions. Ugh. No, double ugh. # convert protected latex chars into single chars $line =~ s/\\([\{\}\_\&\$\#\%])/$1/og; # drop tildes (forced spaces in LaTeX). $line =~ s/\.[~]+/\. /og; # convert multiple dashes to a single dash. $line =~ s/[-][-]+/-/og; # if (($i > 0) && ($lastline =~ /^\s*\n$/) && ($line =~ /[^\s]+/)) { if (($i>=2) && ($lastline =~ /^\s*\n$/) && ($arr->[$i-2] =~ /\n$/)) { $line = "

$line"; } $lastline = $line; $arr->[$i] = $line; } } # # start executing at main() # main();