3 # This file is copyright (c) 1997-8 Ilya Zakharevich
4 # Modifications for reLyX by Amir Karger
5 # You are free to use and modify this code under the terms of
6 # the GNU General Public Licence version 2 or later.
10 #use vars qw($VERSION @ISA @EXPORT);
13 #require # AutoLoader; # To quiet AutoSplit.
16 # (Exporter AutoLoader);
17 # Items to export into callers namespace by default. Note: do not export
18 # names by default without a very good reason. Use EXPORT_OK instead.
19 # Do not simply export all your public functions/methods/constants.
26 # Preloaded methods go here.
28 # Does not deal with verbatims
29 # Spaces are treated bad.
31 ##################### GENERAL NOTES ##################################
32 # Each package describes a different sort of token.
34 # Chunk - default, just used as an ISA
35 # Text - plain text, made up of TT::$usualtokenclass stuff
36 # Paragraph - new paragraph starting (cuz you got \n\n in a latex file)
37 # Token - simple token, like ~ or \blah
38 # EndLocal - pseudotoken meaning that the scope of a local command (like
40 # BegArgsToken - pseudotoken which takes one or more arguments, like \section
41 # ArgToken - pseudotoken returned in between arguments to a BegArgsToken
42 # EndArgsToken - pseudotoken returned after we finish getting arguments
44 # LookAhead - a special kind of EndArgsToken when you want to look ahead
45 # BegArgsTokenLookedAhead - special kind of BegArgsToken (see man page)
46 # Begin::Group - Beginning of a group, i.e., '{'
47 # End::Group - End of a group, i.e., '}'
48 # Begin::Group::Args - begin group but get args first, i.e., '\begin'
49 # End::Group::Args - end group but get args first, i.e., '\end'
50 # SelfMatch - e.g., '$'. Matches itself, but otherwise like a Begin::Group
51 # Separator - e.g., '&' (not used in reLyX)
52 # Comment - (not used in reLyX)
54 # The main package is TT::OpenFile. It contains the subroutines that do
55 # most of the parsing work. TT::GetParagraph does some stuff too, but
56 # it's not a token you'd expect the code to return
58 # Package subroutines (other than 'new'):
59 # refine - takes a token to a more specific kind of token type
60 # e.g., '{' goes from TT::Token to TT::Begin::Group
61 # digest - extra actions to do once you've eaten the token.
62 # e.g., eating arguments of \begin, or popping various
63 # stacks when you get to an End::Group
64 # print - how to print the token (e.g., the text making up the token)
65 # exact_print - print the token exactly as it appeared in the file.
66 # Usually involves adding whitespace
68 # Token and pseudotokens have some more subs:
69 # base_token - the token this token is created from. It's the token
70 # itself for a Token, but not for pseudotokens
71 # token_name - the name of the base_token
74 # $tok->[0] will usually be the word (e.g., '\blah') the parser read
75 # For pseudotokens, it's something more complicated
76 # (some tokens, like Paragraph have nothing there, though)
77 # $tok->[1] will be any comment (usually ignored)
78 # $tok->[2] will be the exact thing the parser read (usu. [0] plus whitespace)
79 # $tok->[3] stores arguments for Begin::Group::Args and End::Group::Args
80 # $tok->[4] stores pointer to beginning token for End::Group::Args
81 # A TT::Group is a reference to an array of tokens. Often (but not always),
82 # the first and last groups are Begin::Group and End::Group tokens respectively.
84 # Pseudotokens are objects, one of whose fields is a reference to the token
85 # that created the pseudotoken
86 # BegArgToken, ArgToken, EndArgToken pseudotokens:
87 # $tok->[0][0] - token (e.g. a TT::Token) that begins this group
88 # $tok->[0][1] - number of arguments that that token takes
89 # $tok->[0][2] - (found only in ArgToken) number of arguments to the token
90 # that have been read so far
91 ################################################################################
93 ######################## GLOBAL VARIABLES ##################################
94 # Sorts of text you find in a LaTeX file. For matching
95 $notusualtoks = "\\\\" . '\${}^_~&@%'; # Why \\\\? double interpretation!
96 $notusualtokenclass = "[$notusualtoks]";
97 $usualtokenclass = "[^$notusualtoks]";
99 # Original $macro wouldn't recognize, e.g., '\section*'. Added '\*?' - Ak
100 # (Had to add it for \section and \\ separately.)
101 # \" or \frac, e.g. Note that it eats whitespace AFTER the token. This is
102 # correct LaTeX behavior, but if text follows such a macro, and you just
103 # print out the macro & then the text, they will run together.
104 $macro = '\\\\(?:[^a-zA-Z]\*?|([a-zA-Z]+\*?)\s*)'; # Has one level of grouping
105 #$macro = '\\\\(?:[^a-zA-Z]|([a-zA-Z]+)\s*)'; # Contains one level of grouping
107 # active is a backslashed macro or $$ (same as \[) or ^^ followed by a char
108 # (^^A means ASCII(1), e.g. See the TeXbook) or a special character like ~
109 $active = "$macro|\\\$\\\$|\\^\\^.|$notusualtokenclass"; # 1 level of grouping
111 # In TeX, ^joe is equivalent to ^{j}oe, so sometimes we use tokenpattern
112 # instead of multitokenpattern to get just one character
113 $tokenpattern = "($usualtokenclass)|$active"; # Two levels of grouping
114 $multitokenpattern = "($usualtokenclass+)|$active"; # Two levels of grouping
116 # Note: In original (CPAN) version, $commentpattern had "". It needs ''
117 # or otherwise '\s' gets translated to 's'
118 $commentpattern = '(?:%.*\n\s*)+'; #one or more comment lines
119 $whitespaceAndComment = '\s*(%.*\n[ \t]*)+';
121 # matches either nothing OR an argument in brackets ($1 doesn't include [])
122 $optionalArgument = "(?:\\[([^]]*)\\])?"; # Contains one level of grouping
124 # These tokens are built from other tokens, so they're pseudotokens
125 # (except BegArgsToken actually does have text!?)
126 for (qw(Text::TeX::ArgToken Text::TeX::BegArgsToken Text::TeX::EndArgsToken )) {
130 # More global variables can be found at the end of the file
131 # E.g., the main Tokens hash
133 ####################### Token Packages #####################################
135 package Text::TeX::Comment;
140 package Text::TeX::Chunk;
143 sub collect {$_[0]->[0]}
148 sub print {$_[0]->[0]}
149 # exact_print prints the *exact* text read, including whitespace
150 # (but not including comments...)
151 sub exact_print {$_[0]->[2]}
152 # print the comment that came before a token
153 sub comment {$_[0]->[1]}
158 package Text::TeX::Token;
159 @ISA = ('Text::TeX::Chunk');
163 return undef unless defined $self->[0];
166 if (defined ($tok = $txt->{tokens}->{$self->[0]})
167 and defined $tok->{class}) {
168 bless $self, $tok->{class};
172 # Name of the token. Same as print for Token, but ArgToken and
173 # EndArgsToken, e.g., print nothing!
175 my $tok = shift->base_token;
180 # For pseudotokens, this sub is more complicated, but a token is just a token.
184 # return the syntax argument created by reLyX
185 # Return "" if relyx_args is empty, i.e., if the token takes no args
186 # Return undef if relyx_args doesn't exist, i.e., if the token is unknown
188 warn "not enough args to Text::TeX::relyx_args" unless @_==2;
189 my ($tok,$object) = (shift, shift);
192 # Test copied from TT::OpenFile::eat
193 if (defined ($name = $tok->token_name)) {
194 #print "$name is defined\n";
195 if (defined ($entry = $object->{"tokens"}->{$name})) {
196 #print "Entry in MyTokens is defined\n";
197 if (exists ($entry->{"relyx_args"})) { # even if it's empty...
198 #print "the args are '",$entry->{"relyx_args"},"'\n";
199 return $entry->{"relyx_args"}
205 #print "did not exist";
207 } # end sub relyx_args
210 # Return the next argument(s) expected by this token.
211 # For regular Tokens: /^o*$/.
212 # For BegArgsTokens and ArgTokens: /^o*[rR]$/
213 # For EndArgsTokens: /^o*/. (in case opt args come after last required arg)
214 my ($eaten,$fileobject) = (shift,shift);
216 # Get the number & type of arguments of this token == /^[or]*$/
217 # If it takes no args, just return
218 # Will also return if curr_args is called for plain Text for some reason
219 my $syntax = $eaten->relyx_args($fileobject) or return "";
221 # If it takes just optional args, return them (it's a plain Token)
222 return $syntax if $syntax =~ /^o+$/;
224 # Number of arguments we've already read (== 0 for BegArgsToken)
225 # Note that we only get here for Beg/EndArgsToken or ArgToken
226 my $arg_num = $eaten->args_done;
228 # Split args into single "argument sets", each of which is 0 or more
229 # optional arguments followed by 0 or 1 required argument.
230 @args = ($syntax =~ /o*[rR]?/g);
231 push (@args,""); # necessary for EndArgsToken if $syntax ends with "r"
233 # Now return the n'th argument set
234 # e.g., if 0 args have been eaten, return the 0th element of @args,
235 # which is the first argument
236 return $args[$arg_num];
237 } # end sub curr_args
238 } # end package Text::TeX::Token
241 package Text::TeX::BegArgsToken;
242 @ISA = ('Text::TeX::Token');
244 my $tok = shift->base_token; # Token this pseudotoken was made from
249 my $tok = shift->base_token;
250 return $tok->exact_print;
253 # How many arguments we've read already.
254 # Obviously zero before we've begun to read the arguments
255 sub args_done {return 0}
257 sub base_token { return shift->[0]->[0] }
258 sub comment { return shift->base_token->comment }
262 package Text::TeX::ArgToken;
263 @ISA = ('Text::TeX::Token');
264 # This token isn't made from actual text, so it prints nothing
265 sub print {return ''}
266 sub exact_print {return ''}
268 # How many arguments we've read already.
269 # Luckily, this number is stored in the ArgToken token
270 sub args_done { return shift->[0]->[2] }
272 sub base_token { return shift->[0]->[0] }
276 package Text::TeX::EndArgsToken;
277 @ISA = ('Text::TeX::Token');
278 # This token isn't made because of real text, so it prints nothing
279 sub print {return ''}
280 sub exact_print {return ''}
282 # How many arguments we've read already.
283 # Obviously the total number of arguments, since we're done
284 sub args_done {return shift->[0]->[1]}
285 sub base_token { return shift->[0]->[0] }
289 package Text::TeX::EndLocal;
290 @ISA = ('Text::TeX::Token');
291 # No text in this token
292 sub print {return ''}
293 sub exact_print {return ''}
294 sub base_token { return shift->[0] }
298 package Text::TeX::Group;
299 sub new {shift; my $in = shift; bless $in}
301 local @arr; #arr becomes global for called subroutines
302 foreach (@{ $_[0] }) {
303 push(@arr, $_->print);
305 "`" . join("',`", @arr) . "'";
308 # exact_print prints w/out the quotes
310 local @arr; #arr becomes global for called subroutines
311 foreach (@{ $_[0] }) {
312 push(@arr, $_->exact_print);
314 join("", @arr); # ... and return it
317 # Not created straight from LaTeX, so it'll never have a comment
318 # (although comments can be in the subtokens in the group)
321 # Return what's in the group, i.e. strip out the '{' and '}' tokens
322 # if they exist. Return an array of tokens or just one token
324 #strip off TT::Begin::Group and TT::End::Group from beginning and end
325 # if they exist. eatBalanced will return Tokens, so don't worry about
326 # stripping too much from a group like {{foo} bar}. And eatGroup
327 # will return Begin::Group, Group, End::Group, so after stripping one,
328 # don't have to worry about stripping another.
330 if (ref($group->[0] ) eq "Text::TeX::Begin::Group" and
331 ref($group->[-1]) eq "Text::TeX::End::Group")
339 } elsif (!@$group) { # group was '{}'
340 return new Text::TeX::Token '','',''; # send back an empty token
342 warn "Text::TeX -- more than one token in group!" if $#$group > 1;
349 package Text::TeX::End::Group;
350 @ISA = ('Text::TeX::Chunk');
351 sub new {shift; my $in = shift; bless \$in}
352 sub digest { # 0: the token, 1: text object
353 # If there are any EndLocal tokens in $txt->{presynthetic}, do them first
354 # See TT::OpenFile::check_presynthetic for details
355 return if $_[1]->check_presynthetic($_[0]); # May change $_[0]
356 my $wa = $_[1]->curwaitforaction;
357 my $w = $_[1]->popwait;
358 warn "Expecting `$w', got `$_[0][0]'=`$_[0][0][0]' in `$ {$_[1]->{paragraph}}'"
360 &$wa if defined $wa; # i.e., do $txt->{waitforactions}[-1] if it exists
365 package Text::TeX::End::Group::Args;
366 @ISA = ('Text::TeX::End::Group');
368 sub digest { # 0: the token, 1: text object
369 # If there are any EndLocal tokens in $txt->{presynthetic}, do them first
370 # (Lamport p. 27 says \em is ended by '\end{blah}', not just '}')
371 # check_presynthetic will put the End::Group::Args token into pending_in
372 # so it'll be read on the next pass through eat. Since sub digest will
373 # be called again on this token, don't read the argument to \end{}
374 # on the first call to sub digest
375 # See TT::OpenFile::check_presynthetic for details
376 return if $_[1]->check_presynthetic($_[0]); # May change $_[0]
378 my $Token = $_[1]->{tokens}->{$_[0]->[0]};
379 my $count = $Token->{eatargs};
381 # Read environment you're ending (just like in Begin::Group::Args)
383 $tok = $_[1]->eatGroup(1);
384 if (@$tok == 3 and $tok->[0]->[0] eq '{') { # Special case for {\a}
389 #$_[0]->[0] .= ' ' . join ' ', map $_->[0], @arr;
391 my $s = $_[1]->starttoken;
393 # like TT::End::Group
394 my $wa = $_[1]->curwaitforaction;
395 my $w = $_[1]->popwait;
396 # If you got '}' when you wanted '\end'
397 warn "Expecting `$w', got $_[0]->[0] in `$ {$_[1]->{paragraph}}'"
399 # If you got \end{foo} when you wanted \end{bar}
400 if ($Token->{selfmatch} and $s->environment ne $_[0]->environment) {
401 warn "Expecting `$w" , "{", $s->environment,"}', got $_[0]->[0]",
402 "{", $_[0]->environment , "} in `$ {$_[1]->{paragraph}}'";
405 # If there was a waitforaction then do it now
407 $_[0]->[4] = $s; # Put the start data into the token
410 sub print { # need special print to print name of environment
412 my $env = $obj->environment; # assume we've already digested it
413 # Use the method for printing a regular old token, but append env. name
414 return $obj->SUPER::print . "{$env}";
419 my $env = $obj->environment; # assume we've already digested it
420 # Use the method for printing a regular old token, but append env. name
421 return $obj->SUPER::exact_print . "{$env}";
425 # this group's environment
426 return $_[0]->[3]->[0]->[0];
428 } # end package TT::End::Group::Args
431 package Text::TeX::Begin::Group::Args;
432 @ISA = ('Text::TeX::Begin::Group');
434 sub digest { # 0: the token, 1: text object
435 my $Token = $_[1]->{tokens}->{$_[0]->[0]};
436 my $count = $Token->{eatargs};
438 # Read the arguments, e.g., read "{blah}" for "\begin{blah}"
440 $tok = $_[1]->eatGroup(1);
441 if (@$tok == 3 and $tok->[0]->[0] eq '{') { # Special case for {\a}
446 # $_[0]->[0] .= ' ' . join ' ', map $_->[0], @arr;
448 $_[0]->SUPER::digest($_[1]); # i.e. do Begin::Group stuff (pushwait)
451 sub print { # need special print to print name of environment
453 my $env = $obj->environment; # assume we've already digested it
454 # Use the method for printing a regular old token, but append env. name
455 return $obj->SUPER::print . "{$env}";
460 my $env = $obj->environment; # assume we've already digested it
461 # Use the method for printing a regular old token, but append env. name
462 return $obj->SUPER::exact_print . "{$env}";
466 # this group's environment
467 return $_[0]->[3]->[0]->[0];
469 } # end package TT::Begin::Group::Args
472 package Text::TeX::Begin::Group;
473 @ISA = ('Text::TeX::Chunk');
474 # 0: the token, 1: text object
476 my ($tok, $txt) = (shift, shift);
477 # $dummy = the anonymous hash associated with this token in the %Tokens
478 my $dummy = $txt->{tokens}->{$tok->[0]};
480 # see if this group requires different actions
481 my $newaction; # action to do while parsing this group
482 my $waitaction; # action to do when you hit the matching End::Group
483 undef $waitaction; undef $newaction;
484 if (defined $dummy) {
485 if (exists $dummy->{newaction}) {
486 $newaction = $dummy->{newaction};
488 if (exists $dummy->{waitaction}) {
489 $waitaction = $dummy->{waitaction};
493 # push stuff onto stacks for this group
494 $txt->pushwait($tok, $newaction, $waitaction);
499 package Text::TeX::SelfMatch;
500 @ISA = ('Text::TeX::Chunk');
502 # This subroutine is never used. See sub digest below
503 if ($_[1]->curwait eq $_[0]->[0]) { #if you match what you're waiting for
504 bless $_[0], Text::TeX::End::Group;
505 } else { #you need to BE matched
506 bless $_[0], Text::TeX::Begin::Group;
509 # 0: the token, 1: text object
510 # Unfortunately, this sub IS necessary, because originally, a '$' (e.g.)
511 # is type TT::Token. Calling refine calls Chunk::refine, which blesses
512 # it to SelfMatch, but then SelfMatch::refine is never called! -Ak
513 sub digest { # XXXX Should not be needed?
514 # curwait returns undefined if not waiting for anything
515 if (defined ($cwt = $_[1]->curwait) && $cwt eq $_[0]->[0]) {
516 bless $_[0], Text::TeX::End::Group;
517 $_[0]->Text::TeX::End::Group::digest($_[1]);
519 bless $_[0], Text::TeX::Begin::Group;
520 $_[1]->pushwait($_[0]);
525 @Text::TeX::Text::ISA = ('Text::TeX::Chunk');
526 @Text::TeX::Paragraph::ISA = ('Text::TeX::Chunk');
527 @Text::TeX::BegArgsTokenLookedAhead::ISA = ('Text::TeX::BegArgsToken');
528 @Text::TeX::LookAhead::ISA = ('Text::TeX::EndArgsToken');
529 @Text::TeX::Separator::ISA = ('Text::TeX::Chunk');
531 ######################## MAIN CODE #########################################
533 package Text::TeX::GetParagraph;
534 # Get a new paragraph from the LaTeX file
535 # Get stuff until a non-empty line which follows an empty line
540 $fh = $ {$file->{fhs}}[-1] if @{$file->{fhs}};
541 return undef if (not defined $fh or eof($fh)) and $file->{readahead} eq "";
543 # See below: every time we call GetParagraph, we read one extra (non-empty)
544 # line, which we store in readahead for next time
545 my $string = $file->{readahead};
546 $file->{readahead} = ""; #default in case eof($fh) or !defined($fh)
548 if (defined $fh) { # i.e., if eof($fh) just return readahead from last time
549 # Read until an empty line (or eof)
550 while (defined ($in = <$fh>) && ($in =~ /\S/)) { # $in undefined at eof
553 # $in has the empty line we just read in. Add it for verbatim copying
554 $string .= $in if defined $in; # add whitespace
556 # Now read until NON-empty line (or eof)
557 while (defined ($in = <$fh>) && ($in !~ /\S/)) {
561 # Next time, the paragraph will begin with the non-empty line we just read
562 $file->{readahead} = $in if defined $in; # readahead stays "" at eof
565 bless \$string; # ... and return it
571 package Text::TeX::OpenFile;
573 $refgen = "TeXOpenFile0000";
576 # Description of OpenFile object:
577 # readahead - every time we read a paragraph we read one extra token. This
578 # token goes into 'readahead' and is prepended to the next paragraph
580 # paragraph - stores the paragraph we're currently parsing
581 # actions - what to do. TT::OpenFile->process calls the function pointed
582 # to by actions on each token it eats
583 # tokens - reference to a hash describing all tokens that the parser
585 # presynthetic - holds pseudotokens to deliver before a block ends.
586 # Specifically, it holds EndLocal tokens, so that we know to end
587 # a command like \em just before the '}' which ends a group
588 # synthetic - holds pseudotokens to deliver after block ends - specifically,
589 # it holds ArgToken (and EndArgsToken) tokens, which it returns
590 # in between arguments (and after all arguments) to a command.
591 # (also holds LookAhead tokens, which are like EndArgsTokens)
592 # pending_in - pseudotokens for input. Stuff is put here from synthetic or
593 # from pending_out, and if there's something in pending_in, sub
594 # eat doesn't bother eating a new token
595 # pending_out - pseudotokens for output -- stuff put here from presynthetic
596 # If there's anything in pending_out it gets returned or put into
597 # pending_in, and sub eat doesn't bother eating a new token
598 shift; my $file = shift; my %opt = @_;
601 open("::$refgen",$file) || die "Cannot open $file: $!";
602 die "End of file `$file' during opening" if eof("::$refgen");
604 my $fhs = defined $file ? ["::$refgen"] : [];
606 readahead => ($opt{string} || ""),
608 "paragraph" => undef,
609 "tokens" => ($opt{tokens} || \%Text::TeX::Tokens),
610 waitfors => [], options => \%opt,
611 waitforactions => [],
612 defaultacts => [$opt{defaultact}], # The last element is
616 actions => [defined $opt{action} ?
619 waitargcounts => [0],
623 presynthetic => [[]],
627 my $in = shift; my $i = 0;
628 for (@{$in->{fhs}}) {
630 || die "Cannot close $ {$in->{files}}[$i]: $!";
635 # Return the paragraph we're currently reading
636 # If called with an argument, get a new paragraph at end of par, otherwise
637 # don't. (Useful for looking ahead without affecting the file we're reading)
638 # Either way, return nothing at end of par.
641 my $get_paragraph = defined(shift);
642 #print "ep.in=$in\n";
644 # Return something if not at end of par
645 if ($in->{"paragraph"} and $ {$in->{"paragraph"}} ne "") {
647 # Done with all files and readahead?
648 } elsif (@{$in->{fhs}} and eof($ {$in->{fhs}}[-1]) and !$in->{readahead}) {
650 # No files and done with readahead?
651 } elsif (!@{$in->{fhs}} and $in->{readahead} eq '') {
654 if ($get_paragraph) {
655 #warn "getting new\n";
656 $in->{"paragraph"} = new Text::TeX::GetParagraph $in;
662 # pushwait means don't do stuff you've got waiting (like EndLocal tokens)
663 # until you're done with something else
664 # If Arg2 exists, then the upcoming group will have it as its action
665 # If Arg3 exists, then we'll do it when we get to the end of the upcoming group
666 sub pushwait { # 0: text object, 1: token, 2: ????
667 push(@{ $_[0]->{starttoken} }, $_[1]);
668 push(@{ $_[0]->{waitfors} }, $_[0]->{tokens}{$_[1]->[0]}{waitfor});
669 push(@{ $_[0]->{actions} },
670 defined $_[2] ? $_[2] : $_[0]->{defaultacts}[-1]);
671 push(@{ $_[0]->{waitforactions} }, $_[3]);
672 push(@{ $_[0]->{synthetic} }, []);
673 push(@{ $_[0]->{presynthetic} }, []); # so that a local argument won't
674 # finish at end of the nested group
677 # You've finished a group, so pop all the stuff pushwait pushed on
679 if ($#{ $_[0]->{waitfors} } < 0) {
680 warn "Got negative depth"; return;
682 my $rest = pop(@{ $_[0]->{synthetic} });
683 warn "Not enough arguments" if @$rest;
684 $rest = pop(@{ $_[0]->{presynthetic} });
685 warn "Presynthetic events remaining" if @$rest;
686 pop(@{ $_[0]->{starttoken} });
687 pop(@{ $_[0]->{actions} });
688 pop(@{ $_[0]->{waitforactions} });
689 pop(@{ $_[0]->{waitfors} });
692 # If there's anything in synthetic, pop it, reverse it, push it onto pending_out
694 my $rest = $ { $_[0]->{synthetic} }[-1];
696 push @{ $_[0]->{pending_out} }, reverse @{ pop @$rest };
700 sub pushsynthetic { # Add new list of events to do *after* the
702 my $rest = $ { shift->{synthetic} }[-1];
706 sub addpresynthetic { # Add to the list of events to do *before*
707 # the next end of group $uplevel above.
709 my $rest = $ { $txt->{presynthetic} }[-1];
712 # push @{ @$rest->[-1] }, @_;
718 # If anything exists in presynthetic[-1], pop it and CHANGE $_[1] to that.
719 # Push $_[1] AND (reverse of) anything else in presynthetic[-1] onto
720 # pending_in so that we do it before any more tokens are read.
721 # Otherwise, just return false.
722 # BUG?! I don't understand why we do reverse. It makes stuff come out FIFO!
723 sub check_presynthetic { # 0: text, 1: end token. Returns true on success
724 if (@{ $_[0]->{presynthetic}[-1] }) {
725 my $rest = $_[0]->{presynthetic}[-1];
726 my $next = pop @$rest;
727 push @{ $_[0]->{pending_in} }, $_[1], (reverse @$rest);
728 $#$rest = -1; # Delete them
736 # return what we're currently waiting for. Returns undef if not waiting
737 my $ref = $_[0]->{waitfors}; $$ref[-1];
740 sub curwaitforaction {
741 my $ref = $_[0]->{waitforactions}; $$ref[-1];
745 my $ref = $_[0]->{starttoken}; $$ref[-1];
748 # These are default bindings. You probably should override it.
750 # Eat '[blah]' or nothing. Brackets aren't returned in token's [0]
751 # but they are returned in [2], so exact_print will print them.
752 sub eatOptionalArgument {
753 # Call with no arg. Don't get new paragraph if at end of par
754 my $in = shift->paragraph;
755 return undef unless defined $in;
756 my $comment = ( $$in =~ s/^\s*($Text::TeX::commentpattern)//o );
757 if ($$in =~ s/^\s*$Text::TeX::optionalArgument//o) {
758 new Text::TeX::Token $1, $comment, $&;
760 warn "No optional argument found";
761 if ($comment) {new Text::TeX::Token undef, $comment}
766 # eat {blah} when it's an argument to a BegArgsToken.
767 # Returns a TT::Group of refined tokens
768 # This sub calls popsynthetic, so an ArgToken or EndArgsToken will be
769 # popped from synthetic into pending_in. This means that the ArgToken or
770 # EndArgsToken will be the next token returned by sub eat!
771 sub eatRequiredArgument {
773 my $group = $txt->eatGroup(@_);
779 # Call with no arg. Don't get new paragraph if at end of par
780 my $in = shift->paragraph;
781 return undef unless defined $in;
783 my ($comment) = ( $$in =~ s/^\s*($Text::TeX::commentpattern)//o );
784 if ($$in =~ s/^\s*$str//) {new Text::TeX::Token $&, $comment, $&}
786 warn "String `$str' expected, not found";
787 if ($comment) {new Text::TeX::Token undef, $comment}
792 # Eat '{blah}'. Braces aren't returned. Stuff is returned as a Group,
793 # where each member is an (unrefined) TT::Text or Token
797 warn "Did not get `{' when expected", return undef
798 unless defined ($in = $txt->eatFixedString('{')) && defined ($in->[0]);
799 $txt->eatBalancedRest;
803 sub eatBalancedRest {
805 my ($count,$in,@in) = (1);
808 warn "Unfinished balanced next", last EAT
809 unless defined ($in = $txt->eatMultiToken) && defined $in->[0];
811 $count++,redo if $in->[0] eq '{';
812 $count-- if $in->[0] eq '}';
813 # if !$count, remove '}' you just read and exit, else keep going
814 pop(@in), last EAT unless $count;
817 bless \@in, 'Text::TeX::Group';
820 # Eat stuff, either a token or a group (within {})
821 # Tokens will be refined.
822 # Braces ARE in the group
823 sub eatGroup { # If arg2==1 will eat exactly one
824 # group, otherwise a group or a
827 local ($in,$r,@in); #Note, this is a stupid way to name variables -Ak
828 if (defined ($in[0] = $txt->eatMultiToken(shift)) and defined $in[0]->[0]) {
829 $in[0]->refine($txt);
830 if (ref $in[0] ne 'Text::TeX::Begin::Group') {
832 } else { #it is the beginning of a group. So recurse until End::Group
833 while (defined ($r=ref($in = $txt->eatGroup)) # Eat many groups
834 && $r ne 'Text::TeX::End::Group') {
837 if (defined $r) {push(@in,$in)}
838 else {warn "Uncompleted group"}
839 } # end if Begin::Group
841 warn "Got nothing when argument expected";
844 bless \@in, 'Text::TeX::Group';
847 sub eatUntil { # We suppose that the text to match
848 # fits in a paragraph
852 while ( (!defined $txt->{'paragraph'} || $ {$txt->{'paragraph'}} !~ /$m/)
853 && defined ($in = $txt->eatGroup(1))) {
856 ($ {$txt->{'paragraph'}} =~ s/$m//) || warn "Delimiter `$m' not found";
857 bless \@in, 'Text::TeX::Group';
860 # return next token without eating it. Return '' if end of paragraph
861 sub lookAheadToken { # If arg2, will eat one token - WHY!? -Ak
863 # Call paragraph with no argument to say we're "just looking"
864 my $in = $txt->paragraph;
865 return '' unless $in; # To be able to match without warnings
868 /^(?:\s*)(?:$Text::TeX::commentpattern)?($Text::TeX::tokenpattern)/o) {
869 if (defined $2) {return $1} #if 1 usualtokenclass char, return it ($1==$2)
870 elsif (defined $3) {return "\\$3"} # Multiletter (\[a-zA-Z]+)
871 elsif (defined $1) {return $1} # \" or notusualtokenclass
876 # This is the main subroutine for eating a token.
877 # It returns a token as either TT::Text or TT::Token.
878 # Or it returns TT::Paragraph if it had to read a new paragraph in the TeX file.
879 sub eatMultiToken { # If arg2, will eat one token
881 # call paragraph with an arg so it gets new paragraph if necessary
882 my $in = $txt->paragraph(1);
883 return undef unless defined $in;
884 return new Text::TeX::Paragraph unless $in; #i.e., if it's a new paragraph
886 # eat a comment that comes before the token we're about to read
887 $comment = $2 if $$in =~ s/^(\s*)($Text::TeX::commentpattern)/$1/o;
888 my $nomulti = shift; #if arg2, eat one token
889 # Eat text or a token
890 # Cannot use if () BLOCK, because $& is local.
891 $got = $$in =~ s/^\s*($Text::TeX::tokenpattern)//o if $nomulti;
892 $got = $$in =~ s/^\s*($Text::TeX::multitokenpattern)//o unless $nomulti;
893 # $1 = \[^a-zA-Z] or special char like ~
894 # $2 = regular text. Return $& to include leading space!
895 # $3 = [a-zA-Z]+ which followed a backslash, i.e., a 'multiletter' command
896 if ($got and defined $2) {new Text::TeX::Text $&, $comment, $&}
897 elsif ($got and defined $3) {new Text::TeX::Token "\\$3", $comment, $&}
898 elsif ($got and defined $1) {new Text::TeX::Token $1, $comment, $&}
899 elsif ($comment) {new Text::TeX::Token undef, $comment, ""}
903 # This is the main subroutine for eating the file.
904 # It eats tokens and returns them. Sometimes it also returns pseudotokens.
906 # - if there's stuff in pending_out, return it
907 # - otherwise get stuff from pending_in OR eat a new token
908 # - refine the token, then digest it
909 # (- pop stuff from synthetic into pending_out for next time UNLESS
910 # you read a new command that takes arguments. E.g. x^\sqrt)
911 # - return the token unless it's special & has a 'type'
912 # - based on the type, set up one or more tokens to be handled later
913 # so that, e.g., type 'report_args' returns BegArgsToken, followed
914 # later by some number of ArgToken's, followed by an EndArgsToken
916 # LookAhead tokens can be used for _^. If you have x^a_b, the EndArgsToken
917 # for the ^ will be changed to a LookAhead, which notes that a _ is next.
918 # The _ has a BegArgsLookedAhead token instead of BegArgsToken. If anything
919 # other than _ or ^ follows the argument to the LookAhead token (for example,
920 # x^2+b, a regular old EndArgsToken is returned for the ^. reLyX doesn't use
921 # the LookAhead functionality. (phew!)
924 if ( @{ $txt->{pending_out} } ) {
925 my $out = pop @{ $txt->{pending_out} };
926 # E.g., if you have x^\sqrt2 -- when you pop and return the \sqrt
927 # EndArgsToken, you need to make sure the ^ EndArgsToken falls out next.
928 # But if pending_out is an ArgToken, *don't* pop the next thing
929 # (next ArgToken or EndArgsToken) out of synthetic yet
930 # Most often, synthetic will be empty, so popsynthetic will do nothing
931 $txt->popsynthetic if ref($out) eq 'Text::TeX::EndArgsToken';
932 if (ref $out eq 'Text::TeX::LookAhead') {
933 my $in = $txt->lookAheadToken;
934 if (defined ($res = $out->[0][2]{$in})) {
935 push @{$out->[0]}, $in, $res;
936 # actually eat what you looked ahead
937 $in = $txt->eatMultiToken(1); # XXXX may be wrong if next
938 # token needs to be eaten in
939 # the style `multi', like \left.
940 # Put it at beginning of pending_in so we do E.g., EndLocals first
941 splice @{ $txt->{pending_in} },
942 0, 0, (bless \$in, 'Text::TeX::LookedAhead');
945 return bless $out, 'Text::TeX::EndArgsToken';
950 } # end if pending_out
952 # We didn't get & return stuff from pending_out. So try to get stuff
953 # from pending_in. If there's nothing there, eat a new token.
954 my $in = pop @{ $txt->{pending_in} };
957 # after_lookahead is true if we got a LookedAhead token from pending_out
958 # because we looked ahead when there was a LookAhead token
959 $in = $$in, $after_lookahead = 1
960 if ref $in eq 'Text::TeX::LookedAhead';
963 # This will happen if we did pushsynthetic on the last token.
964 # That happened for report_args tokens, i.e., things that require
965 # arguments. \frac, e.g., will read either a character or
966 # a token *or* the '{' that begins a group, then popsynthetic below.
967 # \frac puts *two* tokens in {synthetic} so $one will be set TWICE
968 $one = 1 if @{ $txt->{synthetic}[-1] }; # Need to eat a group.
969 $in = $txt->eatMultiToken($one);
971 return undef unless defined $in;
974 my ($Token, $type, @arr);
977 && $in->[0] =~ /$Text::TeX::active/o
978 && defined ( $Token = $txt->{tokens}->{$in->[0]} )
979 && exists ($Token->{"Type"})
984 $type = $Token->{Type};
985 $txt->popsynthetic unless $type eq 'report_args';
987 # If the token is special enough that it's got a 'type', do more stuff
989 if ($type eq 'action') {
990 # return &{$Token->{sub}}($in);
991 return &{$Token->{'sub'}}($in); #Without 's it breaks strict refs -Ak
992 } elsif ($type eq 'argmask') {
993 # eatWithMask; # ????
994 } elsif ($type eq 'args') {
996 } elsif ($type eq 'local') {
997 $txt->addpresynthetic(new Text::TeX::EndLocal $in);
998 } elsif ($type eq 'report_args') {
999 my $count = $Token->{count};
1000 my $ordinal = $count;
1002 if ($res = $Token->{lookahead}) {
1003 $txt->pushsynthetic(new Text::TeX::LookAhead [$in, $count, $res]);
1005 # This will fall out after we read all the args this token needs
1006 $txt->pushsynthetic(new Text::TeX::EndArgsToken [$in, $count]);
1008 # One of these tokens will fall out after we finish each arg (except last)
1009 # Push on 3,2,1, so that when we *popsynthetic*, 1 will come off first
1011 # ArgToken->[0][2] will then be the number of args read so far for
1012 # the token held in ArgToken->[0][0]
1013 while (--$ordinal) {
1014 $txt->pushsynthetic(new Text::TeX::ArgToken [$in, $count, $ordinal]);
1016 if ($after_lookahead) {
1017 $out = new Text::TeX::BegArgsTokenLookedAhead [$in, $count];
1019 $out = new Text::TeX::BegArgsToken [$in, $count];
1022 warn "Format of token data unknown for `", $in->[0], "'";
1031 my $lastact = shift;
1042 warn "No `{' found after defin", return undef
1043 unless $args = $txt->eatUntil('{');
1044 warn "Argument list @$args too complicated", return undef
1045 unless @$args == 1 && $$args[0] =~ /^(\ \#\d)*$/;
1046 warn "No `}' found after defin", return undef
1047 unless $body = $txt->eatBalancedRest;
1048 #my @args=split(/(\#[\d\#])/,$$); # lipa
1051 # This is the main subroutine called by parsing programs. Basically, it
1052 # keeps eating tokens, then calling $txt->actions on that token
1054 my ($txt, $eaten, $act) = (shift);
1055 while (defined ($eaten = $txt->eat)) {
1056 if (defined ($act = $txt->{actions}[-1])) {
1061 } #END Text::TeX::OpenFile
1063 ##################### MORE GLOBAL STUFF ##################################
1064 %super_sub_lookahead = qw( ^ 1 _ 0 \\sb 0 \\sp 1 \\Sp 1 \\Sb 0 );
1066 # class => 'where to bless to', Type => how to process
1067 # eatargs => how many args to swallow before digesting
1070 '{' => {'class' => 'Text::TeX::Begin::Group', 'waitfor' => '}'},
1071 '}' => {'class' => 'Text::TeX::End::Group'},
1072 "\$" => {'class' => 'Text::TeX::SelfMatch', waitfor => "\$"},
1073 '$$' => {'class' => 'Text::TeX::SelfMatch', waitfor => '$$'},
1074 '\begin' => {class => 'Text::TeX::Begin::Group::Args',
1075 eatargs => 1, 'waitfor' => '\end', selfmatch => 1},
1076 '\end' => {class => 'Text::TeX::End::Group::Args', eatargs => 1, selfmatch => 1},
1077 '\left' => {class => 'Text::TeX::Begin::Group::Args',
1078 eatargs => 1, 'waitfor' => '\right'},
1079 '\right' => {class => 'Text::TeX::End::Group::Args', eatargs => 1},
1080 '\frac' => {Type => 'report_args', count => 2},
1081 '\sqrt' => {Type => 'report_args', count => 1},
1082 '\text' => {Type => 'report_args', count => 1},
1083 '\operatorname' => {Type => 'report_args', count => 1},
1084 '\operatornamewithlimits' => {Type => 'report_args', count => 1},
1085 '^' => {Type => 'report_args', count => 1,
1086 lookahead => \%super_sub_lookahead },
1087 '_' => {Type => 'report_args', count => 1,
1088 lookahead => \%super_sub_lookahead },
1089 '\em' => {Type => 'local'},
1090 '\bold' => {Type => 'local'},
1091 '\it' => {Type => 'local'},
1092 '\rm' => {Type => 'local'},
1093 '\mathcal' => {Type => 'local'},
1094 '\mathfrak' => {Type => 'local'},
1095 '\mathbb' => {Type => 'local'},
1096 '\\\\' => {'class' => 'Text::TeX::Separator'},
1097 '&' => {'class' => 'Text::TeX::Separator'},
1100 ############## I NEVER USE ANYTHING BELOW THIS LINE!! -Ak ##################
1104 (undef) x 8, # 1st row
1106 (undef) x 8, # 2nd row
1108 undef, undef, '\forall', undef, '\exists', undef, undef, '\???', # 3rd: symbols
1110 (undef) x 8, # 4th: numbers and symbols
1112 '\???', ( map {"\\$_"}
1113 qw(Alpha Beta Chi Delta Epsilon Phi Gamma
1114 Eta Iota vartheta Kappa Lambda Mu Nu Omicron
1115 Pi Theta Rho Sigma Tau Ypsilon varsigma Omega
1116 Xi Psi Zeta)), undef, '\therefore', undef, '\perp', undef,
1117 undef, ( map {"\\$_"}
1118 qw(alpha beta chi delta varepsilon phi gamma
1119 eta iota varphi kappa lambda mu nu omicron
1120 pi theta rho sigma tau ypsilon varpi omega
1121 xi psi zeta)), undef, undef, undef, undef, undef,
1122 (undef) x 8, # 9st row
1124 (undef) x 8, # 10nd row
1126 undef, undef, undef, '\leq', undef, '\infty', undef, undef, # 11th row
1127 undef, undef, undef, undef, '\from', undef, '\to', undef,
1128 '\circ', '\pm', undef, '\geq', '\times', undef, '\partial', '\bullet', # 12th row
1129 undef, '\neq', '\equiv', '\approx', '\dots', '\mid', '\hline', undef,
1130 '\Aleph', undef, undef, undef, '\otimes', '\oplus', '\empty', '\cap', # 13th row
1131 '\cup', undef, undef, undef, undef, undef, '\in', '\notin',
1132 undef, '\nabla', undef, undef, undef, '\prod', undef, '\cdot', # 14th row
1133 undef, '\wedge', '\vee', undef, undef, undef, undef, undef,
1134 undef, '\<', undef, undef, undef, '\sum', undef, undef, # 15th row
1136 undef, '\>', '\int', (undef) x 5, # 16th row
1140 $xfont{$_} = ['symbol', chr($i)] if defined $_;
1145 # This list was autogenerated by the following script:
1146 # Some handediting is required since MSSYMB.TEX is obsolete.
1149 ## extract_texchar.pl PLAIN.TEX MSSYMB.TEX
1152 #%fonts = (2 => "cmsy", 3 => "cmex", '\\msx@' => msam, '\\msy@' => msbm, );
1154 #while (defined ($_ = <ARGV>)) {
1155 # $list{$fonts{$2}}[hex $3] = $1
1156 # if /^\s*\\mathchardef(\\\w+)=\"\d([23]|\\ms[xy]\@)([\da-fA-F]+)\s+/o;
1159 #for $font (keys %list) {
1160 # print "\@$font = (\n ";
1161 # for $i (0 .. $#{$list{$font}}/8) {
1162 # print join ', ', map {packit($_)} @{$list{$font}}[ 8*$i .. 8*$i+7 ];
1170 # if (defined $cs) {
1171 # #$cs =~ s/\\\\/\\\\\\\\/g;
1179 undef, '\cdotp', '\times', '\ast', '\div', '\diamond', '\pm', '\mp',
1180 '\oplus', '\ominus', '\otimes', '\oslash', '\odot', '\bigcirc', '\circ', '\bullet',
1181 '\asymp', '\equiv', '\subseteq', '\supseteq', '\leq', '\geq', '\preceq', '\succeq',
1182 '\sim', '\approx', '\subset', '\supset', '\ll', '\gg', '\prec', '\succ',
1183 '\leftarrow', '\rightarrow', '\uparrow', '\downarrow', '\leftrightarrow', '\nearrow', '\searrow', '\simeq',
1184 '\Leftarrow', '\Rightarrow', '\Uparrow', '\Downarrow', '\Leftrightarrow', '\nwarrow', '\swarrow', '\propto',
1185 '\prime', '\infty', '\in', '\ni', '\bigtriangleup', '\bigtriangledown', '\not', '\mapstochar',
1186 '\forall', '\exists', '\neg', '\emptyset', '\Re', '\Im', '\top', '\perp',
1187 '\aleph', undef, undef, undef, undef, undef, undef, undef,
1188 undef, undef, undef, undef, undef, undef, undef, undef,
1189 undef, undef, undef, undef, undef, undef, undef, undef,
1190 undef, undef, undef, '\cup', '\cap', '\uplus', '\wedge', '\vee',
1191 '\vdash', '\dashv', undef, undef, undef, undef, undef, undef,
1192 '\langle', '\rangle', '\mid', '\parallel', undef, undef, '\setminus', '\wr',
1193 undef, '\amalg', '\nabla', '\smallint', '\sqcup', '\sqcap', '\sqsubseteq', '\sqsupseteq',
1194 undef, '\dagger', '\ddagger', undef, '\clubsuit', '\diamondsuit', '\heartsuit', '\spadesuit',
1198 undef, undef, undef, undef, undef, undef, undef, undef, # 0-7
1199 undef, undef, undef, undef, undef, undef, undef, undef, # 8-15
1200 undef, undef, undef, undef, undef, undef, undef, undef, # 16-23
1201 undef, undef, undef, undef, undef, undef, undef, undef, # 24-31
1202 undef, undef, undef, undef, undef, undef, undef, undef, # 32-39
1203 undef, undef, undef, undef, undef, undef, undef, undef, # 40-47
1204 undef, undef, undef, undef, undef, undef, undef, undef, # 48-55
1205 undef, undef, undef, undef, undef, undef, undef, undef, # 56-64
1206 undef, undef, undef, undef, undef, undef, '\bigsqcup', undef, # 64-71
1207 '\ointop', undef, '\bigodot', undef, '\bigoplus', undef, '\bigotimes', undef, # 72-79
1208 '\sum', '\prod', '\intop', '\bigcup', '\bigcap', '\biguplus', '\bigwedge', '\bigvee', # 80-87
1209 undef, undef, undef, undef, undef, undef, undef, undef,
1210 '\coprod', undef, undef, undef, undef, undef, undef, undef,
1214 '\boxdot', '\boxplus', '\boxtimes', '\square', '\blacksquare', '\centerdot', '\lozenge', '\blacklozenge',
1215 '\circlearrowright', '\circlearrowleft', '\rightleftharpoons', '\leftrightharpoons', '\boxminus', '\Vdash', '\Vvdash', '\vDash',
1216 '\twoheadrightarrow', '\twoheadleftarrow', '\leftleftarrows', '\rightrightarrows', '\upuparrows', '\downdownarrows', '\upharpoonright', '\downharpoonright',
1217 '\upharpoonleft', '\downharpoonleft', '\rightarrowtail', '\leftarrowtail', '\leftrightarrows', '\rightleftarrows', '\Lsh', '\Rsh',
1218 '\rightsquigarrow', '\leftrightsquigarrow', '\looparrowleft', '\looparrowright', '\circeq', '\succsim', '\gtrsim', '\gtrapprox',
1219 '\multimap', '\therefore', '\because', '\doteqdot', '\triangleq', '\precsim', '\lesssim', '\lessapprox',
1220 '\eqslantless', '\eqslantgtr', '\curlyeqprec', '\curlyeqsucc', '\preccurlyeq', '\leqq', '\leqslant', '\lessgtr',
1221 '\backprime', undef, '\risingdotseq', '\fallingdotseq', '\succcurlyeq', '\geqq', '\geqslant', '\gtrless',
1222 '\sqsubset', '\sqsupset', '\vartriangleright', '\vartriangleleft', '\trianglerighteq', '\trianglelefteq', '\bigstar', '\between',
1223 '\blacktriangledown', '\blacktriangleright', '\blacktriangleleft', undef, undef, '\vartriangle', '\blacktriangle', '\triangledown',
1224 '\eqcirc', '\lesseqgtr', '\gtreqless', '\lesseqqgtr', '\gtreqqless', '\yen', '\Rrightarrow', '\Lleftarrow',
1225 '\checkmark', '\veebar', '\barwedge', '\doublebarwedge', '\angle', '\measuredangle', '\sphericalangle', '\varpropto',
1226 '\smallsmile', '\smallfrown', '\Subset', '\Supset', '\Cup', '\Cap', '\curlywedge', '\curlyvee',
1227 '\leftthreetimes', '\rightthreetimes', '\subseteqq', '\supseteqq', '\bumpeq', '\Bumpeq', '\lll', '\ggg',
1228 '\ulcorner', '\urcorner', '\circledR', '\circledS', '\pitchfork', '\dotplus', '\backsim', '\backsimeq',
1229 '\llcorner', '\lrcorner', '\maltese', '\complement', '\intercal', '\circledcirc', '\circledast', '\circleddash',
1233 '\lvertneqq', '\gvertneqq', '\nleq', '\ngeq', '\nless', '\ngtr', '\nprec', '\nsucc',
1234 '\lneqq', '\gneqq', '\nleqslant', '\ngeqslant', '\lneq', '\gneq', '\npreceq', '\nsucceq',
1235 '\precnsim', '\succnsim', '\lnsim', '\gnsim', '\nleqq', '\ngeqq', '\precneqq', '\succneqq',
1236 '\precnapprox', '\succnapprox', '\lnapprox', '\gnapprox', '\nsim', '\ncong', undef, undef,
1237 '\varsubsetneq', '\varsupsetneq', '\nsubseteqq', '\nsupseteqq', '\subsetneqq', '\supsetneqq', '\varsubsetneqq', '\varsupsetneqq',
1238 '\subsetneq', '\supsetneq', '\nsubseteq', '\nsupseteq', '\nparallel', '\nmid', '\nshortmid', '\nshortparallel',
1239 '\nvdash', '\nVdash', '\nvDash', '\nVDash', '\ntrianglerighteq', '\ntrianglelefteq', '\ntriangleleft', '\ntriangleright',
1240 '\nleftarrow', '\nrightarrow', '\nLeftarrow', '\nRightarrow', '\nLeftrightarrow', '\nleftrightarrow', '\divideontimes', '\varnothing',
1241 '\nexists', undef, undef, undef, undef, undef, undef, undef,
1242 undef, undef, undef, undef, undef, undef, undef, undef,
1243 undef, undef, undef, undef, undef, undef, undef, undef,
1244 undef, undef, undef, undef, undef, undef, undef, undef,
1245 undef, undef, undef, undef, undef, undef, '\mho', '\eth',
1246 '\eqsim', '\beth', '\gimel', '\daleth', '\lessdot', '\gtrdot', '\ltimes', '\rtimes',
1247 '\shortmid', '\shortparallel', '\smallsetminus', '\thicksim', '\thickapprox', '\approxeq', '\succapprox', '\precapprox',
1248 '\curvearrowleft', '\curvearrowright', '\digamma', '\varkappa', undef, '\hslash', '\hbar', '\backepsilon',
1251 # Temporary workaround against Tk's \n (only cmsy contains often-used \otimes):
1253 $cmsy[ord "\n"] = undef;
1255 for $font (qw(cmsy cmex msam msbm)) {
1256 for $num (0 .. $#{$font}) {
1257 $xfont{$$font[$num]} = [$font, chr($num)] if defined $$font[$num];
1262 \int \intop \oint \ointop \restriction \upharpoonright
1263 \Doteq \doteqdot \doublecup \Cup \doublecap \Cap
1264 \llless \lll \gggtr \ggg \lnot \neg \land \wedge
1265 \lor \vee \le \leq \ge \geq \owns \ni \gets \leftarrow
1266 \to \rightarrow \< \langle \> \rangle \| \parallel
1269 for $from (keys %aliases) {
1270 $xfont{$from} = $xfont{$aliases{$from}} if exists $xfont{$aliases{$from}};
1274 # Autoload methods go after =cut, and are processed by the autosplit program.
1281 Text::TeX -- Perl module for parsing of C<TeX>.
1288 my($eaten,$txt) = (shift,shift);
1289 print "Comment: `", $eaten->[1], "'\n" if defined $eaten->[1];
1290 print "@{$txt->{waitfors}} ", ref $eaten, ": `", $eaten->[0], "'";
1291 if (defined $eaten->[3]) {
1292 my @arr = @{ $eaten->[3] };
1294 print " ", $_->print;
1300 my $file = new Text::TeX::OpenFile 'test.tex',
1301 'defaultact' => \&report;
1306 A new C<TeX> parser is created by
1308 $file = new Text::TeX::OpenFile $filename, attr1 => $val1, ...;
1310 $filename may be C<undef>, in this case the text to parse may be
1311 specified in the attribute C<string>.
1313 Recognized attributes are:
1319 contains the text to parse before parsing $filename.
1323 denotes a procedure to submit C<output tokens> to.
1327 gives a hash of C<descriptors> for C<input token>. A sane default is
1332 A call to the method C<process> launches the parser.
1336 When the parser is running, it processes input stream by splitting it
1337 into C<input tokens> using some I<heuristics> similar to the actual
1338 rules of TeX tokenizer. However, since it does not use I<the exact
1339 rules>, the resulting tokens may be wrong if some advanced TeX command
1340 are used, say, the character classes are changed.
1342 This should not be of any concern if the stream in question is a
1343 "user" file, but is important for "packages".
1347 The processed C<input tokens> are handled to the digester, which
1348 handles them according to the provided C<tokens> attribute.
1350 =head2 C<tokens> attribute
1352 This is a hash reference which describes how the C<input tokens>
1353 should be handled. A key to this hash is a literal like C<^> or
1354 C<\fraction>. A value should be another hash reference, with the
1355 following keys recognized:
1361 Into which class to bless the token. Several predefined classes are
1362 provided. The default is C<Text::TeX::Token>.
1366 What kind of special processing to do with the input after the
1367 C<class> methods are called. Recognized C<Type>s are:
1373 When the token of this C<Type> is encountered, it is converted into
1374 C<Text::Tex::BegArgsToken>. Then the arguments are processed as usual,
1375 and an C<output token> of type C<Text::Tex::ArgToken> is inserted
1376 between them. Finally, after all the arguments are processed, an
1377 C<output token> C<Text::Tex::EndArgsToken> is inserted.
1379 The first element of these simulated C<output tokens> is an array
1380 reference with the first element being the initial C<output token>
1381 which generated this sequence. The second element of the internal
1382 array is the number of arguments required by the C<input token>. The
1383 C<Text::Tex::ArgToken> token has a third element, which is the ordinal
1384 of the argument which ends immediately before this token.
1386 If requested, a token C<Text::Tex::LookAhead> may be returned instead
1387 of C<Text::Tex::EndArgsToken>. The additional elements of
1388 C<$token->[0]> are: the reference to the corresponding C<lookahead>
1389 attribute, the relevant key (text of following token) and the
1390 corresponding value.
1392 In such a case the input token which was looked-ahead would generate
1393 an output token of type C<Text::Tex::BegArgsTokenLookedAhead> (if it
1394 usually generates C<Text::Tex::BegArgsToken>).
1398 Means that these macro introduces a local change, which should be
1399 undone at the end of enclosing block. At the end of the block an
1400 output event C<Text::TeX::EndLocal> is delivered, with C<$token->[0]>
1401 being the output token for the I<local> event starting.
1403 Useful for font switching.
1409 Some additional keys may be recognized by the code for the particular
1416 number of arguments to the macro.
1420 gives the matching token for a I<starting delimiter> token.
1424 number of tokens to swallow literally and put into the relevant slot
1425 of the C<output token>. The surrounding braces are stripped.
1429 is used with C<eatargs==1>. Denotes that the matching token is also
1430 C<eatargs==1>, and the swallowed tokens should coinside (like with
1431 C<\begin{blah} ... \end{blah}>).
1435 is a hash with keys being texts of tokens which need to be treated
1436 specially after the end of arguments for the current token. If the
1437 corresponding text follows the token indeed, a token
1438 C<Text::Tex::LookAhead> is returned instead of
1439 C<Text::Tex::EndArgsToken>.
1443 =head2 Symbol font table
1445 The hash %Text::TeX::xfont contains the translation table from TeX
1446 tokens into the corresponding font elements. The values are array
1447 references of the form C<[fontname, char]>, Currently the only font
1448 supported is C<symbol>.
1454 Ilya Zakharevich, ilya@math.ohio-state.edu