]> git.lyx.org Git - lyx.git/blob - src/output_xhtml.cpp
Fix typo.
[lyx.git] / src / output_xhtml.cpp
1 /**
2  * \file output_xhtml.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Richard Heck
7  * 
8  * This code is based upon output_docbook.cpp
9  *
10  * Full author contact details are available in file CREDITS.
11  */
12
13 #include <config.h>
14
15 #include "output_xhtml.h"
16
17 #include "Buffer.h"
18 #include "buffer_funcs.h"
19 #include "BufferParams.h"
20 #include "Counters.h"
21 #include "Layout.h"
22 #include "OutputParams.h"
23 #include "Paragraph.h"
24 #include "ParagraphList.h"
25 #include "ParagraphParameters.h"
26 #include "sgml.h"
27 #include "Text.h"
28 #include "TextClass.h"
29
30 #include "support/lassert.h"
31 #include "support/debug.h"
32 #include "support/lstrings.h"
33
34 #include <vector>
35
36 using namespace std;
37 using namespace lyx::support;
38
39 namespace lyx {
40
41 namespace html {
42
43 docstring escapeChar(char_type c)
44 {
45         docstring str;
46         switch (c) {
47         case ' ':
48                 str += " ";
49                 break;
50         case '&':
51                 str += "&amp;";
52                 break;
53         case '<':
54                 str += "&lt;";
55                 break;
56         case '>':
57                 str += "&gt;";
58                 break;
59         default:
60                 str += c;
61                 break;
62         }
63         return str;
64 }
65
66
67 // escape what needs escaping
68 docstring htmlize(docstring const & str) {
69         odocstringstream d;
70         docstring::const_iterator it = str.begin();
71         docstring::const_iterator en = str.end();
72         for (; it != en; ++it)
73                 d << escapeChar(*it);
74         return d.str();
75 }
76
77
78 bool isFontTag(string const & s)
79 {
80         return s == "em" || s == "strong"; // others?
81 }
82 } // namespace html
83
84
85 docstring StartTag::asTag() const
86 {
87         string output = "<" + tag_;
88         if (!attr_.empty())
89                 output += " " + attr_;
90         output += ">";
91         return from_utf8(output);
92 }
93
94
95 docstring StartTag::asEndTag() const
96 {
97         string output = "</" + tag_ + ">";
98         return from_utf8(output);
99 }
100
101
102 docstring EndTag::asEndTag() const
103 {
104         string output = "</" + tag_ + ">";
105         return from_utf8(output);
106 }
107
108
109 docstring CompTag::asTag() const
110 {
111         string output = "<" + tag_;
112         if (!attr_.empty())
113                 output += " " + attr_;
114         output += " />";
115         return from_utf8(output);
116 }
117
118
119 ////////////////////////////////////////////////////////////////
120 ///
121 /// XHTMLStream
122 ///
123 ////////////////////////////////////////////////////////////////
124
125 XHTMLStream::XHTMLStream(odocstream & os) 
126                 :os_(os)
127 {}
128
129
130 void XHTMLStream::cr() 
131 {
132         // tabs?
133         os_ << from_ascii("\n");
134 }
135
136
137 bool XHTMLStream::closeFontTags()
138 {
139         // first, we close any open font tags we can close
140         StartTag curtag = tag_stack_.back();
141         while (html::isFontTag(curtag.tag_)) {
142                 os_ << curtag.asEndTag();
143                 tag_stack_.pop_back();
144                 if (tag_stack_.empty())
145                         // this probably shouldn't happen, since then the
146                         // font tags weren't in any other tag. but that
147                         // problem will likely be caught elsewhere.
148                         return true;
149                 curtag = tag_stack_.back();
150         }
151         // so we've hit a non-font tag. let's see if any of the
152         // remaining tags are font tags.
153         TagStack::const_iterator it = tag_stack_.begin();
154         TagStack::const_iterator en = tag_stack_.end();
155         bool noFontTags = true;
156         for (; it != en; ++it) {
157                 if (html::isFontTag(it->tag_)) {
158                         LYXERR0("Font tag `" << it->tag_ << "' still open in closeFontTags().");
159                         noFontTags = false;
160                 }
161         }
162         return noFontTags;
163 }
164
165
166 void XHTMLStream::clearTagDeque()
167 {
168         while (!pending_tags_.empty()) {
169                 StartTag const & tag = pending_tags_.front();
170                 // tabs?
171                 os_ << tag.asTag();
172                 tag_stack_.push_back(tag);
173                 pending_tags_.pop_front();
174         }
175 }
176
177
178 XHTMLStream & XHTMLStream::operator<<(docstring const & d)
179 {
180         clearTagDeque();
181         os_ << html::htmlize(d);
182         return *this;
183 }
184
185
186 XHTMLStream & XHTMLStream::operator<<(const char * s)
187 {
188         clearTagDeque();
189         os_ << html::htmlize(from_ascii(s));
190         return *this;
191 }
192
193
194 XHTMLStream & XHTMLStream::operator<<(char_type c)
195 {
196         clearTagDeque();
197         os_ << html::escapeChar(c);
198         return *this;
199 }
200
201
202 XHTMLStream & XHTMLStream::operator<<(StartTag const & tag) 
203 {
204         pending_tags_.push_back(tag);
205         if (tag.keepempty_)
206                 clearTagDeque();
207         return *this;
208 }
209
210
211 XHTMLStream & XHTMLStream::operator<<(CompTag const & tag) 
212 {
213         clearTagDeque();
214         // tabs?
215         os_ << tag.asTag();
216         return *this;
217 }
218
219
220 bool    XHTMLStream::isTagOpen(string const & stag)
221 {
222         TagStack::const_iterator sit = tag_stack_.begin();
223         TagStack::const_iterator const sen = tag_stack_.end();
224         for (; sit != sen; ++sit)
225                 // we could check for the
226                 if (sit->tag_ == stag) 
227                         return true;
228         return false;
229 }
230
231
232 // this is complicated, because we want to make sure that
233 // everything is properly nested. the code ought to make 
234 // sure of that, but we won't assert (yet) if we run into
235 // a problem. we'll just output error messages and try our
236 // best to make things work.
237 XHTMLStream & XHTMLStream::operator<<(EndTag const & etag)
238 {
239         // first make sure we're not closing an empty tag
240         if (!pending_tags_.empty()) {
241                 StartTag const & stag = pending_tags_.back();
242                 if (etag.tag_ == stag.tag_)  {
243                         // we have <tag></tag>, so we discard it and remove it 
244                         // from the pending_tags_.
245                         pending_tags_.pop_back();
246                         return *this;
247                 }
248                 // there is a pending tag that isn't the one we are trying
249                 // to close. 
250                 // is this tag itself pending?
251                 // non-const iterators because we may call erase().
252                 TagDeque::iterator dit = pending_tags_.begin();
253                 TagDeque::iterator const den = pending_tags_.end();
254                 for (; dit != den; ++dit) {
255                         if (dit->tag_ == etag.tag_) {
256                                 // it was pending, so we just erase it
257                                 LYXERR0("Tried to close pending tag `" << etag.tag_ 
258                                         << "' when other tags were pending. Tag discarded.");
259                                 pending_tags_.erase(dit);
260                                 return *this;
261                         }
262                 }
263                 // so etag isn't itself pending. is it even open?
264                 if (!isTagOpen(etag.tag_)) {
265                         LYXERR0("Tried to close `" << etag.tag_ 
266                                  << "' when tag was not open. Tag discarded.");
267                         return *this;
268                 }
269                 // ok, so etag is open.
270                 // our strategy will be as below: we will do what we need to 
271                 // do to close this tag.
272                 LYXERR0("Closing tag `" << etag.tag_ 
273                         << "' when other tags are pending. Discarded pending tags:");
274                 for (dit = pending_tags_.begin(); dit != den; ++dit)
275                         LYXERR0(dit->tag_);
276                 // clear the pending tags...
277                 pending_tags_.clear();
278                 // ...and then just fall through.
279         }
280
281         // is the tag we are closing the last one we opened?
282         if (etag.tag_ == tag_stack_.back().tag_) {
283                 // output it...
284                 os_ << etag.asEndTag();
285                 // ...and forget about it
286                 tag_stack_.pop_back();
287                 return *this;
288         } 
289         
290         // we are trying to close a tag other than the one last opened. 
291         // let's first see if this particular tag is still open somehow.
292         if (!isTagOpen(etag.tag_)) {
293                 LYXERR0("Tried to close `" << etag.tag_ 
294                         << "' when tag was not open. Tag discarded.");
295                 return *this;
296         }
297         
298         // so the tag was opened, but other tags have been opened since
299         // and not yet closed.
300         // if it's a font tag, though...
301         if (html::isFontTag(etag.tag_)) {
302                 // it won't be a problem if the other tags open since this one
303                 // are also font tags.
304                 TagStack::const_reverse_iterator rit = tag_stack_.rbegin();
305                 TagStack::const_reverse_iterator ren = tag_stack_.rend();
306                 for (; rit != ren; ++rit) {
307                         if (rit->tag_ == etag.tag_)
308                                 break;
309                         if (!html::isFontTag(rit->tag_)) {
310                                 // we'll just leave it and, presumably, have to close it later.
311                                 LYXERR0("Unable to close font tag `" << etag.tag_ 
312                                         << "' due to open non-font tag `" << rit->tag_ << "'.");
313                                 return *this;
314                         }
315                 }
316                 
317                 // so we have e.g.:
318                 //    <em>this is <strong>bold
319                 // and are being asked to closed em. we want:
320                 //    <em>this is <strong>bold</strong></em><strong>
321                 // first, we close the intervening tags...
322                 StartTag curtag = tag_stack_.back();
323                 // ...remembering them in a stack.
324                 TagStack fontstack;
325                 while (curtag.tag_ != etag.tag_) {
326                         os_ << curtag.asEndTag();
327                         fontstack.push_back(curtag);
328                         tag_stack_.pop_back();
329                         curtag = tag_stack_.back();
330                 }
331                 // now close our tag...
332                 os_ << etag.asEndTag();
333                 // ...and restore the other tags.
334                 rit = fontstack.rbegin();
335                 ren = fontstack.rend();
336                 for (; rit != ren; ++rit)
337                         pending_tags_.push_back(*rit);
338                 return *this;
339         }
340         
341         // it wasn't a font tag.
342         // so other tags were opened before this one and not properly closed. 
343         // so we'll close them, too. that may cause other issues later, but it 
344         // at least guarantees proper nesting.
345         LYXERR0("Closing tag `" << etag.tag_ 
346                 << "' when other tags are open, namely:");
347         StartTag curtag = tag_stack_.back();
348         while (curtag.tag_ != etag.tag_) {
349                 LYXERR0(curtag.tag_);
350                 os_ << curtag.asEndTag();
351                 tag_stack_.pop_back();
352                 curtag = tag_stack_.back();
353         }
354         // curtag is now the one we actually want.
355         os_ << curtag.asEndTag();
356         tag_stack_.pop_back();
357         
358         return *this;
359 }
360
361 // End code for XHTMLStream
362
363 namespace {
364         
365 // convenience functions
366
367 inline void openTag(XHTMLStream & xs, Layout const & lay)
368 {
369         xs << StartTag(lay.htmltag(), lay.htmlattr());
370 }
371
372
373 inline void closeTag(XHTMLStream & xs, Layout const & lay)
374 {
375         xs << EndTag(lay.htmltag());
376 }
377
378
379 inline void openLabelTag(XHTMLStream & xs, Layout const & lay)
380 {
381         xs << StartTag(lay.htmllabeltag(), lay.htmllabelattr());
382 }
383
384
385 inline void closeLabelTag(XHTMLStream & xs, Layout const & lay)
386 {
387         xs << EndTag(lay.htmllabeltag());
388 }
389
390
391 inline void openItemTag(XHTMLStream & xs, Layout const & lay)
392 {
393         xs << StartTag(lay.htmlitemtag(), lay.htmlitemattr(), true);
394 }
395
396
397 inline void closeItemTag(XHTMLStream & xs, Layout const & lay)
398 {
399         xs << EndTag(lay.htmlitemtag());
400 }
401
402 // end of convenience functions
403
404 ParagraphList::const_iterator searchParagraphHtml(
405         ParagraphList::const_iterator p,
406         ParagraphList::const_iterator const & pend)
407 {
408         for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p)
409                 ;
410
411         return p;
412 }
413
414
415 ParagraphList::const_iterator searchEnvironmentHtml(
416                 ParagraphList::const_iterator const pstart,
417                 ParagraphList::const_iterator const & pend)
418 {
419         ParagraphList::const_iterator p = pstart;
420         Layout const & bstyle = p->layout();
421         size_t const depth = p->params().depth();
422         for (++p; p != pend; ++p) {
423                 Layout const & style = p->layout();
424                 // It shouldn't happen that e.g. a section command occurs inside
425                 // a quotation environment, at a higher depth, but as of 6/2009,
426                 // it can happen. We pretend that it's just at lowest depth.
427                 if (style.latextype == LATEX_COMMAND)
428                         return p;
429                 // If depth is down, we're done
430                 if (p->params().depth() < depth)
431                         return p;
432                 // If depth is up, we're not done
433                 if (p->params().depth() > depth)
434                         continue;
435                 // Now we know we are at the same depth
436                 if (style.latextype == LATEX_PARAGRAPH
437                     || style.latexname() != bstyle.latexname())
438                         return p;
439         }
440         return pend;
441 }
442
443
444 ParagraphList::const_iterator makeParagraphs(Buffer const & buf,
445                                             XHTMLStream & xs,
446                                             OutputParams const & runparams,
447                                             Text const & text,
448                                             ParagraphList::const_iterator const & pbegin,
449                                             ParagraphList::const_iterator const & pend)
450 {
451         ParagraphList::const_iterator const begin = text.paragraphs().begin();
452         ParagraphList::const_iterator par = pbegin;
453         for (; par != pend; ++par) {
454                 Layout const & lay = par->layout();
455                 if (!lay.counter.empty())
456                         buf.params().documentClass().counters().step(lay.counter);
457                 // FIXME We should see if there's a label to be output and
458                 // do something with it.
459                 if (par != pbegin)
460                         xs.cr();
461
462                 // FIXME Should we really allow anything other than 'p' here?
463                 
464                 // If we are already in a paragraph, and this is the first one, then we
465                 // do not want to open the paragraph tag.
466                 bool const opened = 
467                         (par == pbegin && runparams.html_in_par) ? false : true;
468                 if (opened)
469                         openTag(xs, lay);
470                 docstring const deferred = 
471                         par->simpleLyXHTMLOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)));
472
473                 // We want to issue the closing tag if either:
474                 //   (i)  We opened it, and either html_in_par is false,
475                 //        or we're not in the last paragraph, anyway.
476                 //   (ii) We didn't open it and html_in_par is true, 
477                 //        but we are in the first par, and there is a next par.
478                 ParagraphList::const_iterator nextpar = par;
479                 nextpar++;
480                 bool const needclose = 
481                         (opened && (!runparams.html_in_par || nextpar != pend))
482                         || (!opened && runparams.html_in_par && par == pbegin && nextpar != pend);
483                 if (needclose) {
484                         closeTag(xs, lay);
485                         xs.cr();
486                 }
487                 if (!deferred.empty()) {
488                         xs << deferred;
489                         xs.cr();
490                 }
491         }
492         return pend;
493 }
494
495
496 ParagraphList::const_iterator makeBibliography(Buffer const & buf,
497                                 XHTMLStream & xs,
498                                 OutputParams const & runparams,
499                                 Text const & text,
500                                 ParagraphList::const_iterator const & pbegin,
501                                 ParagraphList::const_iterator const & pend) 
502 {
503         xs << StartTag("h2", "class='bibliography'");
504         xs << pbegin->layout().labelstring(false);
505         xs << EndTag("h2");
506         xs.cr();
507         xs << StartTag("div", "class='bibliography'");
508         xs.cr();
509         makeParagraphs(buf, xs, runparams, text, pbegin, pend);
510         xs << EndTag("div");
511         return pend;
512 }
513
514
515 bool isNormalEnv(Layout const & lay)
516 {
517         return lay.latextype == LATEX_ENVIRONMENT;
518 }
519
520         
521 ParagraphList::const_iterator makeEnvironmentHtml(Buffer const & buf,
522                                               XHTMLStream & xs,
523                                               OutputParams const & runparams,
524                                               Text const & text,
525                                               ParagraphList::const_iterator const & pbegin,
526                                               ParagraphList::const_iterator const & pend) 
527 {
528         ParagraphList::const_iterator const begin = text.paragraphs().begin();
529         ParagraphList::const_iterator par = pbegin;
530         Layout const & bstyle = par->layout();
531         depth_type const origdepth = pbegin->params().depth();
532
533         // open tag for this environment
534         openTag(xs, bstyle);
535         xs.cr();
536
537         // we will on occasion need to remember a layout from before.
538         Layout const * lastlay = 0;
539
540         while (par != pend) {
541                 Layout const & style = par->layout();
542                 // the counter only gets stepped if we're in some kind of list,
543                 // or if it's the first time through.
544                 if (!style.counter.empty() && (par == pbegin || !isNormalEnv(style)))
545                         buf.params().documentClass().counters().step(style.counter);
546                 ParagraphList::const_iterator send;
547                 // this will be positive, if we want to skip the initial word
548                 // (if it's been taken for the label).
549                 pos_type sep = 0;
550
551                 switch (style.latextype) {
552                 case LATEX_ENVIRONMENT:
553                 case LATEX_LIST_ENVIRONMENT:
554                 case LATEX_ITEM_ENVIRONMENT: {
555                         // There are two possiblities in this case. 
556                         // One is that we are still in the environment in which we 
557                         // started---which we will be if the depth is the same.
558                         if (par->params().depth() == origdepth) {
559                                 LASSERT(bstyle == style, /* */);
560                                 if (lastlay != 0) {
561                                         closeItemTag(xs, *lastlay);
562                                         lastlay = 0;
563                                 }
564                                 bool const labelfirst = style.htmllabelfirst();
565                                 if (isNormalEnv(style)) {
566                                         // in this case, we print the label only for the first 
567                                         // paragraph (as in a theorem).
568                                         openItemTag(xs, style);
569                                         if (par == pbegin && style.htmllabeltag() != "NONE") {
570                                                 docstring const lbl = 
571                                                                 pbegin->expandLabel(style, buf.params(), false);
572                                                 if (!lbl.empty()) {
573                                                         openLabelTag(xs, style);
574                                                         xs << lbl;
575                                                         closeLabelTag(xs, style);
576                                                 }
577                                                 xs.cr();
578                                         }
579                                 }       else { // some kind of list
580                                         if (!labelfirst)
581                                                 openItemTag(xs, style);
582                                         if (style.labeltype == LABEL_MANUAL
583                                             && style.htmllabeltag() != "NONE") {
584                                                 openLabelTag(xs, style);
585 //                                              sep = par->firstWordLyXHTML(xs, runparams);
586                                                 closeLabelTag(xs, style);
587                                                 xs.cr();
588                                         }
589                                         else if (style.labeltype != LABEL_NO_LABEL
590                                                  && style.htmllabeltag() != "NONE") {
591                                                 openLabelTag(xs, style);
592                                                 xs << par->expandLabel(style, buf.params(), false);
593                                                 closeLabelTag(xs, style);
594                                                 xs.cr();
595                                         }
596                                         if (labelfirst)
597                                                 openItemTag(xs, style);
598                                         else
599                                                 xs << StartTag("span", "class='" + to_utf8(style.name()) + " inneritem'");
600                                 }
601                                 par->simpleLyXHTMLOnePar(buf, xs, runparams, 
602                                         text.outerFont(distance(begin, par)), sep);
603                                 if (!isNormalEnv(style) && !labelfirst)
604                                         xs << EndTag("span");
605                                 ++par;
606                                 // We may not want to close the tag yet, in particular,
607                                 // if we're not at the end...
608                                 if (par != pend 
609                                         //  and are doing items...
610                                          && style.latextype == LATEX_ITEM_ENVIRONMENT
611                                          // and if the depth has changed...
612                                          && par->params().depth() != origdepth) {
613                                          // then we'll save this layout for later, and close it when
614                                          // we get another item.
615                                         lastlay = &style;
616                                 } else
617                                         closeItemTag(xs, style);
618                                 xs.cr();
619                         }
620                         // The other possibility is that the depth has increased, in which
621                         // case we need to recurse.
622                         else {
623                                 send = searchEnvironmentHtml(par, pend);
624                                 par = makeEnvironmentHtml(buf, xs, runparams, text, par, send);
625                         }
626                         break;
627                 }
628                 case LATEX_PARAGRAPH:
629                         send = searchParagraphHtml(par, pend);
630                         par = makeParagraphs(buf, xs, runparams, text, par, send);
631                         break;
632                 // Shouldn't happen
633                 case LATEX_BIB_ENVIRONMENT:
634                         send = par;
635                         ++send;
636                         par = makeParagraphs(buf, xs, runparams, text, par, send);
637                         break;
638                 // Shouldn't happen
639                 case LATEX_COMMAND:
640                         ++par;
641                         break;
642                 }
643         }
644
645         if (lastlay != 0)
646                 closeItemTag(xs, *lastlay);
647         closeTag(xs, bstyle);
648         xs.cr();
649         return pend;
650 }
651
652
653 void makeCommand(Buffer const & buf,
654                                           XHTMLStream & xs,
655                                           OutputParams const & runparams,
656                                           Text const & text,
657                                           ParagraphList::const_iterator const & pbegin)
658 {
659         Layout const & style = pbegin->layout();
660         if (!style.counter.empty())
661                 buf.params().documentClass().counters().step(style.counter);
662
663         openTag(xs, style);
664
665         // Label around sectioning number:
666         // FIXME Probably need to account for LABEL_MANUAL
667         if (style.labeltype != LABEL_NO_LABEL) {
668                 openLabelTag(xs, style);
669                 xs << pbegin->expandLabel(style, buf.params(), false);
670                 closeLabelTag(xs, style);
671                 // Otherwise the label might run together with the text
672                 xs << from_ascii(" ");
673         }
674
675         ParagraphList::const_iterator const begin = text.paragraphs().begin();
676         pbegin->simpleLyXHTMLOnePar(buf, xs, runparams,
677                         text.outerFont(distance(begin, pbegin)));
678         closeTag(xs, style);
679         xs.cr();
680 }
681
682 } // end anonymous namespace
683
684
685 void xhtmlParagraphs(Text const & text,
686                        Buffer const & buf,
687                        XHTMLStream & xs,
688                        OutputParams const & runparams)
689 {
690         ParagraphList const & paragraphs = text.paragraphs();
691         ParagraphList::const_iterator par = paragraphs.begin();
692         ParagraphList::const_iterator pend = paragraphs.end();
693
694         OutputParams ourparams = runparams;
695         while (par != pend) {
696                 Layout const & style = par->layout();
697                 ParagraphList::const_iterator lastpar = par;
698                 ParagraphList::const_iterator send;
699
700                 switch (style.latextype) {
701                 case LATEX_COMMAND: {
702                         // The files with which we are working never have more than
703                         // one paragraph in a command structure.
704                         // FIXME 
705                         // if (ourparams.html_in_par)
706                         //   fix it so we don't get sections inside standard, e.g.
707                         // note that we may then need to make runparams not const, so we
708                         // can communicate that back.
709                         // FIXME Maybe this fix should be in the routines themselves, in case
710                         // they are called from elsewhere.
711                         makeCommand(buf, xs, ourparams, text, par);
712                         ++par;
713                         break;
714                 }
715                 case LATEX_ENVIRONMENT:
716                 case LATEX_LIST_ENVIRONMENT:
717                 case LATEX_ITEM_ENVIRONMENT: {
718                         // FIXME Same fix here.
719                         send = searchEnvironmentHtml(par, pend);
720                         par = makeEnvironmentHtml(buf, xs, ourparams, text, par, send);
721                         break;
722                 }
723                 case LATEX_BIB_ENVIRONMENT: {
724                         // FIXME Same fix here.
725                         send = searchEnvironmentHtml(par, pend);
726                         par = makeBibliography(buf, xs, ourparams, text, par, send);
727                         break;
728                 }
729                 case LATEX_PARAGRAPH:
730                         send = searchParagraphHtml(par, pend);
731                         par = makeParagraphs(buf, xs, ourparams, text, par, send);
732                         break;
733                 }
734                 // FIXME??
735                 // makeEnvironment may process more than one paragraphs and bypass pend
736                 if (distance(lastpar, par) >= distance(lastpar, pend))
737                         break;
738         }
739 }
740
741
742 } // namespace lyx