]> git.lyx.org Git - lyx.git/blob - src/output_xhtml.cpp
3b1a223d455aa6e2b2b7bc88c911b64552095c70
[lyx.git] / src / output_xhtml.cpp
1 /**
2  * \file output_xhtml.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Richard Heck
7  * 
8  * This code is based upon output_docbook.cpp
9  *
10  * Full author contact details are available in file CREDITS.
11  */
12
13 #include <config.h>
14
15 #include "output_xhtml.h"
16
17 #include "Buffer.h"
18 #include "buffer_funcs.h"
19 #include "BufferParams.h"
20 #include "Counters.h"
21 #include "Layout.h"
22 #include "OutputParams.h"
23 #include "Paragraph.h"
24 #include "ParagraphList.h"
25 #include "ParagraphParameters.h"
26 #include "sgml.h"
27 #include "Text.h"
28 #include "TextClass.h"
29
30 #include "support/lassert.h"
31 #include "support/debug.h"
32 #include "support/lstrings.h"
33
34 #include <vector>
35
36 using namespace std;
37 using namespace lyx::support;
38
39 namespace lyx {
40
41 namespace html {
42
43 docstring escapeChar(char_type c)
44 {
45         docstring str;
46         switch (c) {
47         case ' ':
48                 str += " ";
49                 break;
50         case '&':
51                 str += "&amp;";
52                 break;
53         case '<':
54                 str += "&lt;";
55                 break;
56         case '>':
57                 str += "&gt;";
58                 break;
59         default:
60                 str += c;
61                 break;
62         }
63         return str;
64 }
65
66
67 // escape what needs escaping
68 docstring htmlize(docstring const & str) {
69         odocstringstream d;
70         docstring::const_iterator it = str.begin();
71         docstring::const_iterator en = str.end();
72         for (; it != en; ++it)
73                 d << escapeChar(*it);
74         return d.str();
75 }
76
77
78 bool isFontTag(string const & s)
79 {
80         return s == "em" || s == "strong"; // others?
81 }
82 } // namespace html
83
84
85 docstring StartTag::asTag() const
86 {
87         string output = "<" + tag_;
88         if (!attr_.empty())
89                 output += " " + attr_;
90         output += ">";
91         return from_utf8(output);
92 }
93
94
95 docstring StartTag::asEndTag() const
96 {
97         string output = "</" + tag_ + ">";
98         return from_utf8(output);
99 }
100
101
102 docstring EndTag::asEndTag() const
103 {
104         string output = "</" + tag_ + ">";
105         return from_utf8(output);
106 }
107
108
109 docstring CompTag::asTag() const
110 {
111         string output = "<" + tag_;
112         if (!attr_.empty())
113                 output += " " + attr_;
114         output += " />";
115         return from_utf8(output);
116 }
117
118
119 ////////////////////////////////////////////////////////////////
120 ///
121 /// XHTMLStream
122 ///
123 ////////////////////////////////////////////////////////////////
124
125 XHTMLStream::XHTMLStream(odocstream & os) 
126                 :os_(os)
127 {}
128
129
130 void XHTMLStream::cr() 
131 {
132         // tabs?
133         os_ << from_ascii("\n");
134 }
135
136
137 bool XHTMLStream::closeFontTags()
138 {
139         // first, we close any open font tags we can close
140         StartTag curtag = tag_stack_.back();
141         while (html::isFontTag(curtag.tag_)) {
142                 os_ << curtag.asEndTag();
143                 tag_stack_.pop_back();
144                 if (tag_stack_.empty())
145                         // this probably shouldn't happen, since then the
146                         // font tags weren't in any other tag. but that
147                         // problem will likely be caught elsewhere.
148                         return true;
149                 curtag = tag_stack_.back();
150         }
151         // so we've hit a non-font tag. let's see if any of the
152         // remaining tags are font tags.
153         TagStack::const_iterator it = tag_stack_.begin();
154         TagStack::const_iterator en = tag_stack_.end();
155         bool noFontTags = true;
156         for (; it != en; ++it) {
157                 if (html::isFontTag(it->tag_)) {
158                         LYXERR0("Font tag `" << it->tag_ << "' still open in closeFontTags().");
159                         noFontTags = false;
160                 }
161         }
162         return noFontTags;
163 }
164
165
166 void XHTMLStream::clearTagDeque()
167 {
168         while (!pending_tags_.empty()) {
169                 StartTag const & tag = pending_tags_.front();
170                 // tabs?
171                 os_ << tag.asTag();
172                 tag_stack_.push_back(tag);
173                 pending_tags_.pop_front();
174         }
175 }
176
177 XHTMLStream & XHTMLStream::operator<<(docstring const & d)
178 {
179         // I'm tempted to make sure here that there are no tags in the input
180         clearTagDeque();
181         os_ << html::htmlize(d);
182         return *this;
183 }
184
185
186 XHTMLStream & XHTMLStream::operator<<(char_type c)
187 {
188         clearTagDeque();
189         os_ << html::escapeChar(c);
190         return *this;
191 }
192
193
194 XHTMLStream & XHTMLStream::operator<<(StartTag const & tag) 
195 {
196         pending_tags_.push_back(tag);
197         if (tag.keepempty_)
198                 clearTagDeque();
199         return *this;
200 }
201
202
203 XHTMLStream & XHTMLStream::operator<<(CompTag const & tag) 
204 {
205         clearTagDeque();
206         // tabs?
207         os_ << tag.asTag();
208         return *this;
209 }
210
211
212 bool    XHTMLStream::isTagOpen(string const & stag)
213 {
214         TagStack::const_iterator sit = tag_stack_.begin();
215         TagStack::const_iterator const sen = tag_stack_.end();
216         for (; sit != sen; ++sit)
217                 // we could check for the
218                 if (sit->tag_ == stag) 
219                         return true;
220         return false;
221 }
222
223
224 // this is complicated, because we want to make sure that
225 // everything is properly nested. the code ought to make 
226 // sure of that, but we won't assert (yet) if we run into
227 // a problem. we'll just output error messages and try our
228 // best to make things work.
229 XHTMLStream & XHTMLStream::operator<<(EndTag const & etag)
230 {
231         // first make sure we're not closing an empty tag
232         if (!pending_tags_.empty()) {
233                 StartTag const & stag = pending_tags_.back();
234                 if (etag.tag_ == stag.tag_)  {
235                         // we have <tag></tag>, so we discard it and remove it 
236                         // from the pending_tags_.
237                         pending_tags_.pop_back();
238                         return *this;
239                 }
240                 // there is a pending tag that isn't the one we are trying
241                 // to close. 
242                 // is this tag itself pending?
243                 // non-const iterators because we may call erase().
244                 TagDeque::iterator dit = pending_tags_.begin();
245                 TagDeque::iterator const den = pending_tags_.end();
246                 for (; dit != den; ++dit) {
247                         if (dit->tag_ == etag.tag_) {
248                                 // it was pending, so we just erase it
249                                 LYXERR0("Tried to close pending tag `" << etag.tag_ 
250                                         << "' when other tags were pending. Tag discarded.");
251                                 pending_tags_.erase(dit);
252                                 return *this;
253                         }
254                 }
255                 // so etag isn't itself pending. is it even open?
256                 if (!isTagOpen(etag.tag_)) {
257                         LYXERR0("Tried to close `" << etag.tag_ 
258                                  << "' when tag was not open. Tag discarded.");
259                         return *this;
260                 }
261                 // ok, so etag is open.
262                 // our strategy will be as below: we will do what we need to 
263                 // do to close this tag.
264                 LYXERR0("Closing tag `" << etag.tag_ 
265                         << "' when other tags are pending. Discarded pending tags:");
266                 for (dit = pending_tags_.begin(); dit != den; ++dit)
267                         LYXERR0(dit->tag_);
268                 // clear the pending tags...
269                 pending_tags_.clear();
270                 // ...and then just fall through.
271         }
272
273         // is the tag we are closing the last one we opened?
274         if (etag.tag_ == tag_stack_.back().tag_) {
275                 // output it...
276                 os_ << etag.asEndTag();
277                 // ...and forget about it
278                 tag_stack_.pop_back();
279                 return *this;
280         } 
281         
282         // we are trying to close a tag other than the one last opened. 
283         // let's first see if this particular tag is still open somehow.
284         if (!isTagOpen(etag.tag_)) {
285                 LYXERR0("Tried to close `" << etag.tag_ 
286                         << "' when tag was not open. Tag discarded.");
287                 return *this;
288         }
289         
290         // so the tag was opened, but other tags have been opened since
291         // and not yet closed.
292         // if it's a font tag, though...
293         if (html::isFontTag(etag.tag_)) {
294                 // it won't be a problem if the other tags open since this one
295                 // are also font tags.
296                 TagStack::const_reverse_iterator rit = tag_stack_.rbegin();
297                 TagStack::const_reverse_iterator ren = tag_stack_.rend();
298                 for (; rit != ren; ++rit) {
299                         if (rit->tag_ == etag.tag_)
300                                 break;
301                         if (!html::isFontTag(rit->tag_)) {
302                                 // we'll just leave it and, presumably, have to close it later.
303                                 LYXERR0("Unable to close font tag `" << etag.tag_ 
304                                         << "' due to open non-font tag `" << rit->tag_ << "'.");
305                                 return *this;
306                         }
307                 }
308                 
309                 // so we have e.g.:
310                 //    <em>this is <strong>bold
311                 // and are being asked to closed em. we want:
312                 //    <em>this is <strong>bold</strong></em><strong>
313                 // first, we close the intervening tags...
314                 StartTag curtag = tag_stack_.back();
315                 // ...remembering them in a stack.
316                 TagStack fontstack;
317                 while (curtag.tag_ != etag.tag_) {
318                         os_ << curtag.asEndTag();
319                         fontstack.push_back(curtag);
320                         tag_stack_.pop_back();
321                         curtag = tag_stack_.back();
322                 }
323                 // now close our tag...
324                 os_ << etag.asEndTag();
325                 // ...and restore the other tags.
326                 rit = fontstack.rbegin();
327                 ren = fontstack.rend();
328                 for (; rit != ren; ++rit)
329                         pending_tags_.push_back(*rit);
330                 return *this;
331         }
332         
333         // it wasn't a font tag.
334         // so other tags were opened before this one and not properly closed. 
335         // so we'll close them, too. that may cause other issues later, but it 
336         // at least guarantees proper nesting.
337         LYXERR0("Closing tag `" << etag.tag_ 
338                 << "' when other tags are open, namely:");
339         StartTag curtag = tag_stack_.back();
340         while (curtag.tag_ != etag.tag_) {
341                 LYXERR0(curtag.tag_);
342                 os_ << curtag.asEndTag();
343                 tag_stack_.pop_back();
344                 curtag = tag_stack_.back();
345         }
346         // curtag is now the one we actually want.
347         os_ << curtag.asEndTag();
348         tag_stack_.pop_back();
349         
350         return *this;
351 }
352
353 // End code for XHTMLStream
354
355 namespace {
356         
357 // convenience functions
358
359 inline void openTag(XHTMLStream & xs, Layout const & lay)
360 {
361         xs << StartTag(lay.htmltag(), lay.htmlattr());
362 }
363
364
365 inline void closeTag(XHTMLStream & xs, Layout const & lay)
366 {
367         xs << EndTag(lay.htmltag());
368 }
369
370
371 inline void openLabelTag(XHTMLStream & xs, Layout const & lay)
372 {
373         xs << StartTag(lay.htmllabeltag(), lay.htmllabelattr());
374 }
375
376
377 inline void closeLabelTag(XHTMLStream & xs, Layout const & lay)
378 {
379         xs << EndTag(lay.htmllabeltag());
380 }
381
382
383 inline void openItemTag(XHTMLStream & xs, Layout const & lay)
384 {
385         xs << StartTag(lay.htmlitemtag(), lay.htmlitemattr(), true);
386 }
387
388
389 inline void closeItemTag(XHTMLStream & xs, Layout const & lay)
390 {
391         xs << EndTag(lay.htmlitemtag());
392 }
393
394 // end of convenience functions
395
396 ParagraphList::const_iterator searchParagraphHtml(
397         ParagraphList::const_iterator p,
398         ParagraphList::const_iterator const & pend)
399 {
400         for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p)
401                 ;
402
403         return p;
404 }
405
406
407 ParagraphList::const_iterator searchEnvironmentHtml(
408                 ParagraphList::const_iterator const pstart,
409                 ParagraphList::const_iterator const & pend)
410 {
411         ParagraphList::const_iterator p = pstart;
412         Layout const & bstyle = p->layout();
413         size_t const depth = p->params().depth();
414         for (++p; p != pend; ++p) {
415                 Layout const & style = p->layout();
416                 // It shouldn't happen that e.g. a section command occurs inside
417                 // a quotation environment, at a higher depth, but as of 6/2009,
418                 // it can happen. We pretend that it's just at lowest depth.
419                 if (style.latextype == LATEX_COMMAND)
420                         return p;
421                 // If depth is down, we're done
422                 if (p->params().depth() < depth)
423                         return p;
424                 // If depth is up, we're not done
425                 if (p->params().depth() > depth)
426                         continue;
427                 // Now we know we are at the same depth
428                 if (style.latextype == LATEX_PARAGRAPH
429                     || style.latexname() != bstyle.latexname())
430                         return p;
431         }
432         return pend;
433 }
434
435
436 ParagraphList::const_iterator makeParagraphs(Buffer const & buf,
437                                             XHTMLStream & xs,
438                                             OutputParams const & runparams,
439                                             Text const & text,
440                                             ParagraphList::const_iterator const & pbegin,
441                                             ParagraphList::const_iterator const & pend)
442 {
443         ParagraphList::const_iterator const begin = text.paragraphs().begin();
444         ParagraphList::const_iterator par = pbegin;
445         for (; par != pend; ++par) {
446                 Layout const & lay = par->layout();
447                 if (!lay.counter.empty())
448                         buf.params().documentClass().counters().step(lay.counter);
449                 // FIXME We should see if there's a label to be output and
450                 // do something with it.
451                 if (par != pbegin)
452                         xs.cr();
453
454                 // FIXME Should we really allow anything other than 'p' here?
455                 
456                 // If we are already in a paragraph, and this is the first one, then we
457                 // do not want to open the paragraph tag.
458                 bool const opened = 
459                         (par == pbegin && runparams.html_in_par) ? false : true;
460                 if (opened)
461                         openTag(xs, lay);
462                 docstring const deferred = 
463                         par->simpleLyXHTMLOnePar(buf, xs, runparams, text.outerFont(distance(begin, par)));
464
465                 // We want to issue the closing tag if either:
466                 //   (i)  We opened it, and either html_in_par is false,
467                 //        or we're not in the last paragraph, anyway.
468                 //   (ii) We didn't open it and html_in_par is true, 
469                 //        but we are in the first par, and there is a next par.
470                 ParagraphList::const_iterator nextpar = par;
471                 nextpar++;
472                 bool const needclose = 
473                         (opened && (!runparams.html_in_par || nextpar != pend))
474                         || (!opened && runparams.html_in_par && par == pbegin && nextpar != pend);
475                 if (needclose) {
476                         closeTag(xs, lay);
477                         xs.cr();
478                 }
479                 if (!deferred.empty()) {
480                         xs << deferred;
481                         xs.cr();
482                 }
483         }
484         return pend;
485 }
486
487
488 ParagraphList::const_iterator makeBibliography(Buffer const & buf,
489                                 XHTMLStream & xs,
490                                 OutputParams const & runparams,
491                                 Text const & text,
492                                 ParagraphList::const_iterator const & pbegin,
493                                 ParagraphList::const_iterator const & pend) 
494 {
495         xs << StartTag("h2", "class='bibliography'");
496         xs << pbegin->layout().labelstring(false);
497         xs << EndTag("h2");
498         xs.cr();
499         xs << StartTag("div", "class='bibliography'");
500         xs.cr();
501         makeParagraphs(buf, xs, runparams, text, pbegin, pend);
502         xs << EndTag("div");
503         return pend;
504 }
505
506
507 bool isNormalEnv(Layout const & lay)
508 {
509         return lay.latextype == LATEX_ENVIRONMENT;
510 }
511
512         
513 ParagraphList::const_iterator makeEnvironmentHtml(Buffer const & buf,
514                                               XHTMLStream & xs,
515                                               OutputParams const & runparams,
516                                               Text const & text,
517                                               ParagraphList::const_iterator const & pbegin,
518                                               ParagraphList::const_iterator const & pend) 
519 {
520         ParagraphList::const_iterator const begin = text.paragraphs().begin();
521         ParagraphList::const_iterator par = pbegin;
522         Layout const & bstyle = par->layout();
523         depth_type const origdepth = pbegin->params().depth();
524
525         // open tag for this environment
526         openTag(xs, bstyle);
527         xs.cr();
528
529         // we will on occasion need to remember a layout from before.
530         Layout const * lastlay = 0;
531
532         while (par != pend) {
533                 Layout const & style = par->layout();
534                 // the counter only gets stepped if we're in some kind of list,
535                 // or if it's the first time through.
536                 if (!style.counter.empty() && (par == pbegin || !isNormalEnv(style)))
537                         buf.params().documentClass().counters().step(style.counter);
538                 ParagraphList::const_iterator send;
539                 // this will be positive, if we want to skip the initial word
540                 // (if it's been taken for the label).
541                 pos_type sep = 0;
542
543                 switch (style.latextype) {
544                 case LATEX_ENVIRONMENT:
545                 case LATEX_LIST_ENVIRONMENT:
546                 case LATEX_ITEM_ENVIRONMENT: {
547                         // There are two possiblities in this case. 
548                         // One is that we are still in the environment in which we 
549                         // started---which we will be if the depth is the same.
550                         if (par->params().depth() == origdepth) {
551                                 LASSERT(bstyle == style, /* */);
552                                 if (lastlay != 0) {
553                                         closeItemTag(xs, *lastlay);
554                                         lastlay = 0;
555                                 }
556                                 bool const labelfirst = style.htmllabelfirst();
557                                 if (isNormalEnv(style)) {
558                                         // in this case, we print the label only for the first 
559                                         // paragraph (as in a theorem).
560                                         openItemTag(xs, style);
561                                         if (par == pbegin && style.htmllabeltag() != "NONE") {
562                                                 docstring const lbl = 
563                                                                 pbegin->expandLabel(style, buf.params(), false);
564                                                 if (!lbl.empty()) {
565                                                         openLabelTag(xs, style);
566                                                         xs << lbl;
567                                                         closeLabelTag(xs, style);
568                                                 }
569                                                 xs.cr();
570                                         }
571                                 }       else { // some kind of list
572                                         if (!labelfirst)
573                                                 openItemTag(xs, style);
574                                         if (style.labeltype == LABEL_MANUAL
575                                             && style.htmllabeltag() != "NONE") {
576                                                 openLabelTag(xs, style);
577 //                                              sep = par->firstWordLyXHTML(xs, runparams);
578                                                 closeLabelTag(xs, style);
579                                                 xs.cr();
580                                         }
581                                         else if (style.labeltype != LABEL_NO_LABEL
582                                                  && style.htmllabeltag() != "NONE") {
583                                                 openLabelTag(xs, style);
584                                                 xs << par->expandLabel(style, buf.params(), false);
585                                                 closeLabelTag(xs, style);
586                                                 xs.cr();
587                                         }
588                                         if (labelfirst)
589                                                 openItemTag(xs, style);
590                                         else
591                                                 xs << StartTag("span", "class='" + to_utf8(style.name()) + " inneritem'>");
592                                 }
593                                 par->simpleLyXHTMLOnePar(buf, xs, runparams, 
594                                         text.outerFont(distance(begin, par)), sep);
595                                 if (!isNormalEnv(style) && !labelfirst)
596                                         xs << EndTag("span");
597                                 ++par;
598                                 // We may not want to close the tag yet, in particular,
599                                 // if we're not at the end...
600                                 if (par != pend 
601                                         //  and are doing items...
602                                          && style.latextype == LATEX_ITEM_ENVIRONMENT
603                                          // and if the depth has changed...
604                                          && par->params().depth() != origdepth) {
605                                          // then we'll save this layout for later, and close it when
606                                          // we get another item.
607                                         lastlay = &style;
608                                 } else
609                                         closeItemTag(xs, style);
610                                 xs.cr();
611                         }
612                         // The other possibility is that the depth has increased, in which
613                         // case we need to recurse.
614                         else {
615                                 send = searchEnvironmentHtml(par, pend);
616                                 par = makeEnvironmentHtml(buf, xs, runparams, text, par, send);
617                         }
618                         break;
619                 }
620                 case LATEX_PARAGRAPH:
621                         send = searchParagraphHtml(par, pend);
622                         par = makeParagraphs(buf, xs, runparams, text, par, send);
623                         break;
624                 // Shouldn't happen
625                 case LATEX_BIB_ENVIRONMENT:
626                         send = par;
627                         ++send;
628                         par = makeParagraphs(buf, xs, runparams, text, par, send);
629                         break;
630                 // Shouldn't happen
631                 case LATEX_COMMAND:
632                         ++par;
633                         break;
634                 }
635         }
636
637         if (lastlay != 0)
638                 closeItemTag(xs, *lastlay);
639         closeTag(xs, bstyle);
640         xs.cr();
641         return pend;
642 }
643
644
645 void makeCommand(Buffer const & buf,
646                                           XHTMLStream & xs,
647                                           OutputParams const & runparams,
648                                           Text const & text,
649                                           ParagraphList::const_iterator const & pbegin)
650 {
651         Layout const & style = pbegin->layout();
652         if (!style.counter.empty())
653                 buf.params().documentClass().counters().step(style.counter);
654
655         openTag(xs, style);
656
657         // Label around sectioning number:
658         // FIXME Probably need to account for LABEL_MANUAL
659         if (style.labeltype != LABEL_NO_LABEL) {
660                 openLabelTag(xs, style);
661                 xs << pbegin->expandLabel(style, buf.params(), false);
662                 closeLabelTag(xs, style);
663                 // Otherwise the label might run together with the text
664                 xs << from_ascii(" ");
665         }
666
667         ParagraphList::const_iterator const begin = text.paragraphs().begin();
668         pbegin->simpleLyXHTMLOnePar(buf, xs, runparams,
669                         text.outerFont(distance(begin, pbegin)));
670         closeTag(xs, style);
671         xs.cr();
672 }
673
674 } // end anonymous namespace
675
676
677 void xhtmlParagraphs(Text const & text,
678                        Buffer const & buf,
679                        XHTMLStream & xs,
680                        OutputParams const & runparams)
681 {
682         ParagraphList const & paragraphs = text.paragraphs();
683         ParagraphList::const_iterator par = paragraphs.begin();
684         ParagraphList::const_iterator pend = paragraphs.end();
685
686         OutputParams ourparams = runparams;
687         while (par != pend) {
688                 Layout const & style = par->layout();
689                 ParagraphList::const_iterator lastpar = par;
690                 ParagraphList::const_iterator send;
691
692                 switch (style.latextype) {
693                 case LATEX_COMMAND: {
694                         // The files with which we are working never have more than
695                         // one paragraph in a command structure.
696                         // FIXME 
697                         // if (ourparams.html_in_par)
698                         //   fix it so we don't get sections inside standard, e.g.
699                         // note that we may then need to make runparams not const, so we
700                         // can communicate that back.
701                         // FIXME Maybe this fix should be in the routines themselves, in case
702                         // they are called from elsewhere.
703                         makeCommand(buf, xs, ourparams, text, par);
704                         ++par;
705                         break;
706                 }
707                 case LATEX_ENVIRONMENT:
708                 case LATEX_LIST_ENVIRONMENT:
709                 case LATEX_ITEM_ENVIRONMENT: {
710                         // FIXME Same fix here.
711                         send = searchEnvironmentHtml(par, pend);
712                         par = makeEnvironmentHtml(buf, xs, ourparams, text, par, send);
713                         break;
714                 }
715                 case LATEX_BIB_ENVIRONMENT: {
716                         // FIXME Same fix here.
717                         send = searchEnvironmentHtml(par, pend);
718                         par = makeBibliography(buf, xs, ourparams, text, par, send);
719                         break;
720                 }
721                 case LATEX_PARAGRAPH:
722                         send = searchParagraphHtml(par, pend);
723                         par = makeParagraphs(buf, xs, ourparams, text, par, send);
724                         break;
725                 }
726                 // FIXME??
727                 // makeEnvironment may process more than one paragraphs and bypass pend
728                 if (distance(lastpar, par) >= distance(lastpar, pend))
729                         break;
730         }
731 }
732
733
734 } // namespace lyx