2 * \file output_xhtml.cpp
3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
8 * This code is based upon output_docbook.cpp
10 * Full author contact details are available in file CREDITS.
15 #include "output_xhtml.h"
18 #include "buffer_funcs.h"
19 #include "BufferParams.h"
22 #include "OutputParams.h"
23 #include "Paragraph.h"
24 #include "ParagraphList.h"
25 #include "ParagraphParameters.h"
28 #include "TextClass.h"
30 #include "support/lassert.h"
31 #include "support/debug.h"
32 #include "support/lstrings.h"
37 using namespace lyx::support;
43 docstring escapeChar(char_type c)
67 // escape what needs escaping
68 docstring htmlize(docstring const & str) {
70 docstring::const_iterator it = str.begin();
71 docstring::const_iterator en = str.end();
72 for (; it != en; ++it)
78 bool isFontTag(string const & s)
80 return s == "em" || s == "strong"; // others?
84 ////////////////////////////////////////////////////////////////
88 ////////////////////////////////////////////////////////////////
90 XHTMLStream::XHTMLStream(odocstream & os)
95 void XHTMLStream::cr()
102 bool XHTMLStream::closeFontTags()
104 // first, we close any open font tags we can close
105 StartTag curtag = tag_stack_.back();
106 while (html::isFontTag(curtag.tag_)) {
107 os_ << "</" << curtag.tag_ << ">";
108 tag_stack_.pop_back();
109 if (tag_stack_.empty())
110 // this probably shouldn't happen, since then the
111 // font tags weren't in any other tag. but that
112 // problem will likely be caught elsewhere.
114 curtag = tag_stack_.back();
116 // so we've hit a non-font tag. let's see if any of the
117 // remaining tags are font tags.
118 TagStack::const_iterator it = tag_stack_.begin();
119 TagStack::const_iterator en = tag_stack_.end();
120 bool noFontTags = true;
121 for (; it != en; ++it) {
122 if (html::isFontTag(it->tag_)) {
123 LYXERR0("Font tag `" << it->tag_ << "' still open in closeFontTags().");
131 void XHTMLStream::clearTagDeque()
133 while (!pending_tags_.empty()) {
134 StartTag const & tag = pending_tags_.front();
136 os_ << "<" << tag.tag_ << " " << tag.attr_ << ">";
137 tag_stack_.push_back(tag);
138 pending_tags_.pop_front();
142 XHTMLStream & XHTMLStream::operator<<(docstring const & d)
144 // I'm tempted to make sure here that there are no tags in the input
146 os_ << html::htmlize(d);
152 XHTMLStream & XHTMLStream::operator<<(char_type c)
155 os_ << escapeChar(c);
161 XHTMLStream & XHTMLStream::operator<<(StartTag const & tag)
163 pending_tags_.push_back(tag);
170 XHTMLStream & XHTMLStream::operator<<(CompTag const & tag)
174 os_ << "<" << tag.tag_ << " " << tag.attr_ << " />";
179 bool XHTMLStream::isTagOpen(string const & stag)
181 TagStack::const_iterator sit = tag_stack_.begin();
182 TagStack::const_iterator const sen = tag_stack_.end();
183 for (; sit != sen; ++sit)
184 // we could check for the
185 if (sit->tag_ == stag)
191 // this is complicated, because we want to make sure that
192 // everything is properly nested. the code ought to make
193 // sure of that, but we won't assert (yet) if we run into
194 // a problem. we'll just output error messages and try our
195 // best to make things work.
196 XHTMLStream & XHTMLStream::operator<<(EndTag const & etag)
198 // first make sure we're not closing an empty tag
199 if (!pending_tags_.empty()) {
200 StartTag const & stag = pending_tags_.back();
201 if (etag.tag_ == stag.tag_) {
202 // we have <tag></tag>, so we discard it and remove it
203 // from the pending_tags_.
204 pending_tags_.pop_back();
207 // there is a pending tag that isn't the one we are trying
209 // is this tag itself pending?
210 // non-const iterators because we may call erase().
211 TagDeque::iterator dit = pending_tags_.begin();
212 TagDeque::iterator const den = pending_tags_.end();
213 for (; dit != den; ++dit) {
214 if (dit->tag_ == etag.tag_) {
215 // it was pending, so we just erase it
216 LYXERR0("Tried to close pending tag `" << etag.tag_
217 << "' when other tags were pending. Tag discarded.");
218 pending_tags_.erase(dit);
222 // so etag isn't itself pending. is it even open?
223 if (!isTagOpen(etag.tag_)) {
224 LYXERR0("Tried to close `" << etag.tag_
225 << "' when tag was not open. Tag discarded.");
228 // ok, so etag is open.
229 // our strategy will be as below: we will do what we need to
230 // do to close this tag.
231 LYXERR0("Closing tag `" << etag.tag_
232 << "' when other tags are pending. Discarded pending tags:");
233 for (dit = pending_tags_.begin(); dit != den; ++dit)
235 // clear the pending tags...
236 pending_tags_.clear();
237 // ...and then just fall through.
240 // is the tag we are closing the last one we opened?
241 if (etag.tag_ == tag_stack_.back().tag_) {
243 os_ << "</" << etag.tag_ << ">";
244 // ...and forget about it
245 tag_stack_.pop_back();
249 // we are trying to close a tag other than the one last opened.
250 // let's first see if this particular tag is still open somehow.
251 if (!isTagOpen(etag.tag_)) {
252 LYXERR0("Tried to close `" << etag.tag_
253 << "' when tag was not open. Tag discarded.");
257 // so the tag was opened, but other tags have been opened since
258 // and not yet closed.
259 // if it's a font tag, though...
260 if (html::isFontTag(etag.tag_)) {
261 // it won't be a problem if the other tags open since this one
262 // are also font tags.
263 TagStack::const_reverse_iterator rit = tag_stack_.rbegin();
264 TagStack::const_reverse_iterator ren = tag_stack_.rend();
265 for (; rit != ren; ++rit) {
266 if (!html::isFontTag(rit->tag_)) {
267 // we'll just leave it and, presumably, have to close it later.
268 LYXERR0("Unable to close font tag `" << etag.tag_
269 << "' due to open non-font tags.");
275 // <em>this is <strong>bold
276 // and are being asked to closed em. we want:
277 // <em>this is <strong>bold</strong></em><strong>
278 // first, we close the intervening tags...
279 StartTag curtag = tag_stack_.back();
280 // ...remembering them in a stack.
282 while (curtag.tag_ != etag.tag_) {
283 os_ << "</" << curtag.tag_ << ">";
284 fontstack.push_back(curtag);
285 tag_stack_.pop_back();
286 curtag = tag_stack_.back();
288 // now close our tag...
289 os_ << "</" << etag.tag_ << ">";
290 // ...and restore the other tags.
291 rit = fontstack.rbegin();
292 ren = fontstack.rend();
293 for (; rit != ren; ++rit)
294 pending_tags_.push_back(*rit);
298 // it wasn't a font tag.
299 // so other tags were opened before this one and not properly closed.
300 // so we'll close them, too. that may cause other issues later, but it
301 // at least guarantees proper nesting.
302 LYXERR0("Closing tag `" << etag.tag_
303 << "' when other tags are open, namely:");
304 StartTag curtag = tag_stack_.back();
305 while (curtag.tag_ != etag.tag_) {
306 LYXERR0(curtag.tag_);
307 os_ << "</" << curtag.tag_ << ">";
308 tag_stack_.pop_back();
309 curtag = tag_stack_.back();
311 // curtag is now the one we actually want.
312 os_ << "</" << curtag.tag_ << ">";
313 tag_stack_.pop_back();
320 ///////////////////////////////////////////////////////////////
321 // OLD STUFF to be replaced
323 // FIXME This needs to be protected somehow.
324 static vector<string> taglist;
326 bool openTag(odocstream & os, string const & tag, string const & attr)
330 os << from_ascii("<" + tag + (attr.empty() ? "" : " " + attr) + ">");
331 taglist.push_back(tag);
336 bool closeTag(odocstream & os, string const & tag)
340 // FIXME Check for proper nesting
341 if (taglist.empty()){
342 LYXERR0("Last tag not found when closing `" << tag << "'!");
345 string const & lasttag = taglist.back();
346 if (lasttag != tag) {
347 LYXERR0("Last tag was `" << lasttag << "' when closing `" << tag << "'!");
351 os << from_ascii("</" + tag + ">");
359 bool openTag(odocstream & os, Layout const & lay)
361 return html::openTag(os, lay.htmltag(), lay.htmlattr());
365 bool closeTag(odocstream & os, Layout const & lay)
367 return html::closeTag(os, lay.htmltag());
371 bool openLabelTag(odocstream & os, Layout const & lay)
373 return html::openTag(os, lay.htmllabeltag(), lay.htmllabelattr());
377 bool closeLabelTag(odocstream & os, Layout const & lay)
379 return html::closeTag(os, lay.htmllabeltag());
383 bool openItemTag(odocstream & os, Layout const & lay)
385 return html::openTag(os, lay.htmlitemtag(), lay.htmlitemattr());
389 bool closeItemTag(odocstream & os, Layout const & lay)
391 return html::closeTag(os, lay.htmlitemtag());
394 // end of old stuff to be replaced
395 ///////////////////////////////////////////////////////////////
397 ParagraphList::const_iterator searchParagraphHtml(
398 ParagraphList::const_iterator p,
399 ParagraphList::const_iterator const & pend)
401 for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p)
408 ParagraphList::const_iterator searchEnvironmentHtml(
409 ParagraphList::const_iterator const pstart,
410 ParagraphList::const_iterator const & pend)
412 ParagraphList::const_iterator p = pstart;
413 Layout const & bstyle = p->layout();
414 size_t const depth = p->params().depth();
415 for (++p; p != pend; ++p) {
416 Layout const & style = p->layout();
417 // It shouldn't happen that e.g. a section command occurs inside
418 // a quotation environment, at a higher depth, but as of 6/2009,
419 // it can happen. We pretend that it's just at lowest depth.
420 if (style.latextype == LATEX_COMMAND)
422 // If depth is down, we're done
423 if (p->params().depth() < depth)
425 // If depth is up, we're not done
426 if (p->params().depth() > depth)
428 // Now we know we are at the same depth
429 if (style.latextype == LATEX_PARAGRAPH
430 || style.latexname() != bstyle.latexname())
437 ParagraphList::const_iterator makeParagraphs(Buffer const & buf,
439 OutputParams const & runparams,
441 ParagraphList::const_iterator const & pbegin,
442 ParagraphList::const_iterator const & pend)
444 ParagraphList::const_iterator const begin = text.paragraphs().begin();
445 ParagraphList::const_iterator par = pbegin;
446 for (; par != pend; ++par) {
447 Layout const & lay = par->layout();
448 if (!lay.counter.empty())
449 buf.params().documentClass().counters().step(lay.counter);
450 // FIXME We should see if there's a label to be output and
451 // do something with it.
455 // FIXME Should we really allow anything other than 'p' here?
457 // If we are already in a paragraph, and this is the first one, then we
458 // do not want to open the paragraph tag.
460 (par == pbegin && runparams.html_in_par) ? false : openTag(os, lay);
461 docstring const deferred = par->simpleLyXHTMLOnePar(buf, os, runparams,
462 text.outerFont(distance(begin, par)));
464 // We want to issue the closing tag if either:
465 // (i) We opened it, and either html_in_par is false,
466 // or we're not in the last paragraph, anyway.
467 // (ii) We didn't open it and html_in_par is true,
468 // but we are in the first par, and there is a next par.
469 ParagraphList::const_iterator nextpar = par;
471 bool const needClose =
472 (opened && (!runparams.html_in_par || nextpar != pend))
473 || (!opened && runparams.html_in_par && par == pbegin && nextpar != pend);
478 if (!deferred.empty())
479 os << deferred << '\n';
485 ParagraphList::const_iterator makeBibliography(Buffer const & buf,
487 OutputParams const & runparams,
489 ParagraphList::const_iterator const & pbegin,
490 ParagraphList::const_iterator const & pend)
492 os << "<h2 class='bibliography'>"
493 << pbegin->layout().labelstring(false)
495 << "<div class='bibliography'>\n";
496 makeParagraphs(buf, os, runparams, text, pbegin, pend);
502 bool isNormalEnv(Layout const & lay)
504 return lay.latextype == LATEX_ENVIRONMENT;
508 ParagraphList::const_iterator makeEnvironmentHtml(Buffer const & buf,
510 OutputParams const & runparams,
512 ParagraphList::const_iterator const & pbegin,
513 ParagraphList::const_iterator const & pend)
515 ParagraphList::const_iterator const begin = text.paragraphs().begin();
516 ParagraphList::const_iterator par = pbegin;
517 Layout const & bstyle = par->layout();
518 depth_type const origdepth = pbegin->params().depth();
520 // Open tag for this environment
521 bool const main_tag_opened = openTag(os, bstyle);
524 // we will on occasion need to remember a layout from before.
525 Layout const * lastlay = 0;
527 while (par != pend) {
528 Layout const & style = par->layout();
529 // the counter only gets stepped if we're in some kind of list,
530 // or if it's the first time through.
531 if (!style.counter.empty() && (par == pbegin || !isNormalEnv(style)))
532 buf.params().documentClass().counters().step(style.counter);
533 ParagraphList::const_iterator send;
534 // this will be positive, if we want to skip the initial word
535 // (if it's been taken for the label).
538 switch (style.latextype) {
539 case LATEX_ENVIRONMENT:
540 case LATEX_LIST_ENVIRONMENT:
541 case LATEX_ITEM_ENVIRONMENT: {
542 // There are two possiblities in this case.
543 // One is that we are still in the environment in which we
544 // started---which we will be if the depth is the same.
545 if (par->params().depth() == origdepth) {
546 LASSERT(bstyle == style, /* */);
548 closeItemTag(os, *lastlay);
551 bool item_tag_opened = false;
552 bool const labelfirst = style.htmllabelfirst();
553 bool madelabel = false;
554 if (isNormalEnv(style)) {
555 // in this case, we print the label only for the first
556 // paragraph (as in a theorem).
557 item_tag_opened = openItemTag(os, style);
558 if (par == pbegin && style.htmllabeltag() != "NONE") {
559 docstring const lbl =
560 pbegin->expandLabel(style, buf.params(), false);
562 bool const label_tag_opened = openLabelTag(os, style);
564 if (label_tag_opened)
565 closeLabelTag(os, style);
569 } else { // some kind of list
571 item_tag_opened = openItemTag(os, style);
572 if (style.labeltype == LABEL_MANUAL
573 && style.htmllabeltag() != "NONE") {
574 madelabel = openLabelTag(os, style);
575 sep = par->firstWordLyXHTML(os, runparams);
577 closeLabelTag(os, style);
580 else if (style.labeltype != LABEL_NO_LABEL
581 && style.htmllabeltag() != "NONE") {
582 madelabel = openLabelTag(os, style);
583 os << par->expandLabel(style, buf.params(), false);
585 closeLabelTag(os, style);
589 item_tag_opened = openItemTag(os, style);
591 os << "<span class='" << style.name() << "inneritem'>";
593 par->simpleLyXHTMLOnePar(buf, os, runparams,
594 text.outerFont(distance(begin, par)), sep);
595 if (!isNormalEnv(style) && !labelfirst && madelabel)
598 if (item_tag_opened) {
599 // We may not want to close the tag yet, in particular,
600 // if we're not at the end...
602 // and are doing items...
603 && style.latextype == LATEX_ITEM_ENVIRONMENT
604 // and if the depth has changed...
605 && par->params().depth() != origdepth) {
606 // then we'll save this layout for later, and close it when
607 // we get another item.
610 closeItemTag(os, style);
614 // The other possibility is that the depth has increased, in which
615 // case we need to recurse.
617 send = searchEnvironmentHtml(par, pend);
618 par = makeEnvironmentHtml(buf, os, runparams, text, par, send);
622 case LATEX_PARAGRAPH:
623 send = searchParagraphHtml(par, pend);
624 par = makeParagraphs(buf, os, runparams, text, par, send);
627 case LATEX_BIB_ENVIRONMENT:
630 par = makeParagraphs(buf, os, runparams, text, par, send);
640 closeItemTag(os, *lastlay);
642 closeTag(os, bstyle);
648 void makeCommand(Buffer const & buf,
650 OutputParams const & runparams,
652 ParagraphList::const_iterator const & pbegin)
654 Layout const & style = pbegin->layout();
655 if (!style.counter.empty())
656 buf.params().documentClass().counters().step(style.counter);
658 bool const main_tag_opened = openTag(os, style);
660 // Label around sectioning number:
661 // FIXME Probably need to account for LABEL_MANUAL
662 if (style.labeltype != LABEL_NO_LABEL) {
663 bool const label_tag_opened = openLabelTag(os, style);
664 os << pbegin->expandLabel(style, buf.params(), false);
665 if (label_tag_opened)
666 closeLabelTag(os, style);
667 // Otherwise the label might run together with the text
671 ParagraphList::const_iterator const begin = text.paragraphs().begin();
672 pbegin->simpleLyXHTMLOnePar(buf, os, runparams,
673 text.outerFont(distance(begin, pbegin)));
679 } // end anonymous namespace
682 void xhtmlParagraphs(Text const & text,
685 OutputParams const & runparams)
687 ParagraphList const & paragraphs = text.paragraphs();
688 ParagraphList::const_iterator par = paragraphs.begin();
689 ParagraphList::const_iterator pend = paragraphs.end();
691 OutputParams ourparams = runparams;
692 while (par != pend) {
693 Layout const & style = par->layout();
694 ParagraphList::const_iterator lastpar = par;
695 ParagraphList::const_iterator send;
697 switch (style.latextype) {
698 case LATEX_COMMAND: {
699 // The files with which we are working never have more than
700 // one paragraph in a command structure.
702 // if (ourparams.html_in_par)
703 // fix it so we don't get sections inside standard, e.g.
704 // note that we may then need to make runparams not const, so we
705 // can communicate that back.
706 // FIXME Maybe this fix should be in the routines themselves, in case
707 // they are called from elsewhere.
708 makeCommand(buf, os, ourparams, text, par);
712 case LATEX_ENVIRONMENT:
713 case LATEX_LIST_ENVIRONMENT:
714 case LATEX_ITEM_ENVIRONMENT: {
715 // FIXME Same fix here.
716 send = searchEnvironmentHtml(par, pend);
717 par = makeEnvironmentHtml(buf, os, ourparams, text, par, send);
720 case LATEX_BIB_ENVIRONMENT: {
721 // FIXME Same fix here.
722 send = searchEnvironmentHtml(par, pend);
723 par = makeBibliography(buf, os, ourparams, text, par, send);
726 case LATEX_PARAGRAPH:
727 send = searchParagraphHtml(par, pend);
728 par = makeParagraphs(buf, os, ourparams, text, par, send);
732 // makeEnvironment may process more than one paragraphs and bypass pend
733 if (distance(lastpar, par) >= distance(lastpar, pend))