1 # This file is part of reLyX
2 # Copyright (c) 1998-9 Amir Karger karger@post.harvard.edu
3 # You are free to use and modify this code under the terms of
4 # the GNU General Public Licence version 2 or later.
7 # This package prepares a LaTeX file for translation to LyX
8 # - Translates some local commands (e.g., {\em blah} to {\emph{blah}})
9 # - Prepares math mode stuff for LyX. LyX reads LaTeX math mode directly,
10 # so reLyX can basically copy all math mode exactly, but LyX is a
11 # bit stricter than LaTeX. E.g., translate 'x^2' -> 'x^{2}
12 # - Removes optional arguments if LyX doesn't understand them, e.g. \\
20 my $last_eaten; # last token we ate
22 # List of commands for which LyX doesn't support the optional argument
23 my @DeleteOptArg = map {"\\$_"} qw(\\ \\*
24 chapter section subsection subsubsection paragraph subparagraph
27 my $debug_on; # was -d option given?
29 ######################### PARSER INVOCATION ################################
31 # This subroutine opens the TeX parser and processes the file.
32 # Arg0 is the name of the input TeX file
33 # Arg1 is the name of the output "clean" file
35 my ($InFileName, $OutFileName) = (shift,shift);
37 $debug_on = (defined($main::opt_d) && $main::opt_d);
38 my $zzz=$debug_on ? " TeX file ($InFileName --> $OutFileName)\n" :"... ";
39 print STDERR "Cleaning$zzz";
40 open (OUTFILE, ">$OutFileName") or die "problem opening $OutFileName: $!\n";
42 # Create the list of tokens for the parser
43 # Parts of the token list are swiped from TeX.pm
44 my %MyTokens = ( '{' => $Text::TeX::Tokens{'{'},
45 '}' => $Text::TeX::Tokens{'}'},
46 '$' => $Text::TeX::Tokens{'$'},
47 '$$' => $Text::TeX::Tokens{'$$'},
48 '\begin' => $Text::TeX::Tokens{'\begin'},
49 '\end' => $Text::TeX::Tokens{'\end'},
52 # Put local tokens, like \em, into %MyTokens
53 #Note: \cal is "local", although it's found in math mode
54 # (The "map" just puts a backslash in front of each word in the list)
55 my @LocalTokens = qw (em rm bf tt sf sc sl it
56 rmfamily ttfamily sffamily mdseries bfseries
57 upshape itshape slshape scshape cal
59 foreach (@LocalTokens) {
60 $MyTokens{"\\$_"} = $Text::TeX::Tokens{'\em'}
62 # Now add any commands
63 &ReadCommands::Merge(\%MyTokens);
65 # Create the fileobject
66 my $file = new Text::TeX::OpenFile
68 'defaultact' => \&clean_tex,
69 'tokens' => \%MyTokens;
71 # Now actually process the file
74 #warn "Done cleaning TeX file\n";
75 } # end sub call_parser
78 ####################### MAIN TRANSLATING SUBROUTINE ########################
79 # Routine called by the TeX-parser to perform token-processing.
81 my($eaten,$txt) = (shift,shift);
84 # Sub translate is given a string and one of the translation tables below.
85 # It returns the translation, or just the string if there's no translation
86 # Translation table for TT::Begin::Group tokens
88 '$' => '\(', # LyX math mode doesn't
89 '$$' => '\[', # understand \$ or $$
92 # Translation table for TT::End::Group tokens
98 # Translation table for TT::Token tokens whose translations should
99 # NOT have whitespace after them! See sub translate...
100 # Note that tokens of type TT::EndLocal are always translated to '}'. So,
101 # any token defined as a local token *must* be translated to something
102 # with a '{' (e.g., '\em' -> '\emph{') or we'll have mismatched braces
103 my %no_ws_transtbl = (
112 '\rmfamily' => '\textrm{',
113 '\ttfamily' => '\texttt{',
114 '\sffamily' => '\textsf{',
115 '\mdseries' => '\textmd{',
116 '\bfseries' => '\textbf{',
117 '\upshape' => '\textup{',
118 '\itshape' => '\textit{',
119 '\slshape' => '\textsl{',
120 '\scshape' => '\textsc{',
121 '\cal' => '\mathcal{',
125 # a faux "switch" statement. sets $_ for later use in pattern
128 $type =~ s/^Text::TeX::// or die "Non-Text::TeX object";
129 my $printstr = ""; # default for undefined printstrs etc.
130 SWITCH: for ($type) {
131 # Handle blank lines.
136 # Handle the end of a local font command - insert a '}'
138 # we could just say $printstr='}'
139 $printstr = &translate('}', \%endtranstbl);
143 # $eaten->exact_print is undefined for previous environments
144 $outstr = $eaten->exact_print;
145 if (! defined $outstr) { # comment at end of paragraph
146 warn "Weird undefined token $eaten!" unless $eaten->comment;
150 # Handle LaTeX tokens
152 my $realtok = $eaten->print; # w/out whitespace
153 # If a comment is its own paragraph, print nothing
154 last SWITCH unless defined($realtok);
155 # Special handling for \verb and \verb*
156 if ($realtok =~ /^\\verb\*?/) {
157 $printstr = &Verbatim::copy_verb($txt,$eaten);
161 # Translate token if necessary, or just print it
162 # "no_ws" is HACK to remove whitespace, so '\em ' -> '\emph{'
163 $printstr = &translate($outstr, \%no_ws_transtbl, "no_ws");
165 # Ignore optional argument(s) if necessary
166 $printstr .= &handle_opt_args($eaten,$txt);
171 # Tokens taking arguments, like '^'
172 # ADD '{' if there isn't one before the argument!
173 # TODO can we check whether the command is \label, \include
174 # and not add the braces in that case?
175 if (/^BegArgsToken$/) {
178 # Ignore optional argument(s) if necessary
179 $printstr .= &handle_opt_args($eaten,$txt);
181 # Add beginning brace before the 1st argument if there isn't one
182 my $tok = $txt->lookAheadToken;
183 $printstr .= '{' unless ($tok =~ /\{/);
187 # End of one argument, beginning of next
188 # Note: by default ArgToken,EndArgsToken print nothing
189 # ADD '}' if there isn't one after the last argument
190 # Then read and print any optional arguments which may exist
191 # between this argument the next (we must do this here or we would
192 # add a '{' before an optional argument!)
193 # ADD '{' if there isn't one before the next argument!
194 # (just like we do in BegArgsToken and EndArgsToken)
196 $printstr = $outstr; # = ''
198 # Add '}' after the argument that ended if necessary
199 $printstr .= '}' unless $last_eaten->print eq "\}";
201 # Eat and print any optional arguments
202 $printstr .= &handle_opt_args($eaten,$txt);
204 # Add '{' before the next argument if necessary
205 my $tok = $txt->lookAheadToken;
206 $printstr .= '{' unless ($tok =~ /\{/);
210 # End of tokens taking arguments, like '^'
211 # ADD '}' if there isn't one after the last argument, i.e.,
212 # if the previous token *wasn't* a '}'
213 # Kludge: for TeX style \input command ("\input foo" with no
214 # braces) we need to read the whole filename, but parser will have
215 # read only one char. So read in the rest of the filename before
217 if (/^EndArgsToken$/) {
218 $printstr = $outstr; # = ''
220 unless ($last_eaten->print eq "\}") {
221 my $s = $eaten->base_token;
222 if ($s->print eq "\\input") {
223 my $t = $txt->lookAheadToken;
224 # For one-char filename (a.tex) do nothing
225 if ($t =~ /^[\w.\-]/) {
226 my $u = $txt->eatMultiToken;
231 # TeX \input always adds .tex ending
238 # Don't bother eating optional args coming after the last
239 # required arg: they'll just be copied as text
243 # Handle opening groups, like '{' and '$'.
244 if (/Begin::Group$/) {
245 $printstr = &translate($outstr,\%begtranstbl);
249 # Handle closing groups, like '}' and '$'.
251 $printstr = &translate($outstr, \%endtranstbl);
255 if (/Begin::Group::Args/) {
256 my $env = $eaten->environment;
258 if ($env eq "verbatim" || $env eq "reLyXskip") {
259 # copy everything up to "\end{foo}"
260 $printstr .= &Verbatim::copy_verbatim($txt, $eaten);
265 if (/End::Group::Args/) {
275 # The default action - print the string.
277 } # end SWITCH:for ($type)
279 # Actually print the string
280 if (defined $printstr) {
281 print OUTFILE $printstr;
282 $last_eaten = $eaten; #save for next time
283 } else {warn "Undefined printstr";}
285 } # end sub clean_tex
287 #################### TRANSLATOR SUBROUTINES ###############################
289 # Replace a string (possibly with whitespace around it) with another
290 # Arg0 is a string, Arg1 is a reference to a hash containing translations
291 # If a token not in the table is passed in, do nothing
292 # If Arg2 is defined AND the token is known, then remove whitespace from
293 # the end of the translated token. This is a HACK to do '\em ' -> '\emph{'
294 # Return the string, possibly modified
295 my ($tokstr, $transref) = (shift, shift);
296 my $remove_ws = shift;
297 my %transtable = %$transref;
299 # remove whitespace from the string (since transtable doesn't have it)
300 my $stripstr = $tokstr;
301 $stripstr =~ s/^\s*(\S+)\s*$/$1/ or warn "couldn't strip token";
302 if ( exists $transtable{$stripstr} ) {
303 # use \Q or \, (, $, and [ will be misinterpreted
304 $tokstr =~ s/\Q$stripstr\E/$transtable{$stripstr}/;
307 if (defined $remove_ws) {
315 sub handle_opt_args {
316 # read and concatenate OR IGNORE optional arguments
317 # Arg0 is a BegArgsToken or ArgToken
318 my ($eaten,$fileobject) = (shift,shift);
321 # If at end of paragraph, don't bother looking for optArgs
322 return "" unless $fileobject->lookAheadToken;
324 # Get the next argument(s) expected for this token == /^o*[rR]?$/
325 # If there are no args expected, just return
326 my $curr_args = $eaten->next_args($fileobject) or return "";
328 # Now print or ignore any optional arguments
329 # If there's an 'r' in curr_args, we're done for now
331 my $token_name = $eaten->token_name; # (needed for EndArgsToken, e.g.)
332 while ($curr_args =~ s/^o//) {
333 my $opt = $fileobject->eatOptionalArgument;
334 # Print any initial space before the optional argument
335 if ($foo = $opt->exact_print) {
336 if ($foo =~ /^(\s+)/) {
341 # Print the argument or ignore it
343 if (grep /^\Q$token_name\E$/, @DeleteOptArg) {
344 print "Optional argument '",$opt->print,
345 "' to macro $token_name ignored\n";
347 $outstr .= "[" . $opt->print . "]";
349 } # Was an optional argument found?
353 } # end sub handle_opt_args
355 1; # return true value to calling program