]> git.lyx.org Git - features.git/blob - src/output_xhtml.cpp
I am now going to proceed to break XHMTL output, and then reconstruct it
[features.git] / src / output_xhtml.cpp
1 /**
2  * \file output_xhtml.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author Richard Heck
7  * 
8  * This code is based upon output_docbook.cpp
9  *
10  * Full author contact details are available in file CREDITS.
11  */
12
13 #include <config.h>
14
15 #include "output_xhtml.h"
16
17 #include "Buffer.h"
18 #include "buffer_funcs.h"
19 #include "BufferParams.h"
20 #include "Counters.h"
21 #include "Layout.h"
22 #include "OutputParams.h"
23 #include "Paragraph.h"
24 #include "ParagraphList.h"
25 #include "ParagraphParameters.h"
26 #include "sgml.h"
27 #include "Text.h"
28 #include "TextClass.h"
29
30 #include "support/lassert.h"
31 #include "support/debug.h"
32 #include "support/lstrings.h"
33
34 #include <vector>
35
36 using namespace std;
37 using namespace lyx::support;
38
39 namespace lyx {
40
41 namespace html {
42
43 docstring escapeChar(char_type c)
44 {
45         docstring str;
46         switch (c) {
47         case ' ':
48                 str += " ";
49                 break;
50         case '&':
51                 str += "&amp;";
52                 break;
53         case '<':
54                 str += "&lt;";
55                 break;
56         case '>':
57                 str += "&gt;";
58                 break;
59         default:
60                 str += c;
61                 break;
62         }
63         return str;
64 }
65
66
67 // escape what needs escaping
68 docstring htmlize(docstring const & str) {
69         odocstringstream d;
70         docstring::const_iterator it = str.begin();
71         docstring::const_iterator en = str.end();
72         for (; it != en; ++it)
73                 d << escapeChar(*it);
74         return d.str();
75 }
76
77
78 bool isFontTag(string const & s)
79 {
80         return s == "em" || s == "strong"; // others?
81 }
82 } // namespace html
83
84
85 docstring StartTag::asTag() const
86 {
87         string output = "<" + tag_;
88         if (!attr_.empty())
89                 output += " " + attr_;
90         output += ">";
91         return from_utf8(output);
92 }
93
94
95 docstring StartTag::asEndTag() const
96 {
97         string output = "</" + tag_ + ">";
98         return from_utf8(output);
99 }
100
101
102 docstring EndTag::asEndTag() const
103 {
104         string output = "</" + tag_ + ">";
105         return from_utf8(output);
106 }
107
108
109 docstring CompTag::asTag() const
110 {
111         string output = "<" + tag_;
112         if (!attr_.empty())
113                 output += " " + attr_;
114         output += " />";
115         return from_utf8(output);
116 }
117
118
119 ////////////////////////////////////////////////////////////////
120 ///
121 /// XHTMLStream
122 ///
123 ////////////////////////////////////////////////////////////////
124
125 XHTMLStream::XHTMLStream(odocstream & os) 
126                 :os_(os)
127 {}
128
129
130 void XHTMLStream::cr() 
131 {
132         // tabs?
133         os_ << from_ascii("\n");
134 }
135
136
137 bool XHTMLStream::closeFontTags()
138 {
139         // first, we close any open font tags we can close
140         StartTag curtag = tag_stack_.back();
141         while (html::isFontTag(curtag.tag_)) {
142                 os_ << curtag.asEndTag();
143                 tag_stack_.pop_back();
144                 if (tag_stack_.empty())
145                         // this probably shouldn't happen, since then the
146                         // font tags weren't in any other tag. but that
147                         // problem will likely be caught elsewhere.
148                         return true;
149                 curtag = tag_stack_.back();
150         }
151         // so we've hit a non-font tag. let's see if any of the
152         // remaining tags are font tags.
153         TagStack::const_iterator it = tag_stack_.begin();
154         TagStack::const_iterator en = tag_stack_.end();
155         bool noFontTags = true;
156         for (; it != en; ++it) {
157                 if (html::isFontTag(it->tag_)) {
158                         LYXERR0("Font tag `" << it->tag_ << "' still open in closeFontTags().");
159                         noFontTags = false;
160                 }
161         }
162         return noFontTags;
163 }
164
165
166 void XHTMLStream::clearTagDeque()
167 {
168         while (!pending_tags_.empty()) {
169                 StartTag const & tag = pending_tags_.front();
170                 // tabs?
171                 os_ << tag.asTag();
172                 tag_stack_.push_back(tag);
173                 pending_tags_.pop_front();
174         }
175 }
176
177 XHTMLStream & XHTMLStream::operator<<(docstring const & d)
178 {
179         // I'm tempted to make sure here that there are no tags in the input
180         clearTagDeque();
181         os_ << html::htmlize(d);
182         return *this;
183 }
184
185
186 XHTMLStream & XHTMLStream::operator<<(char_type c)
187 {
188         clearTagDeque();
189         os_ << html::escapeChar(c);
190         return *this;
191 }
192
193
194 XHTMLStream & XHTMLStream::operator<<(StartTag const & tag) 
195 {
196         pending_tags_.push_back(tag);
197         if (tag.keepempty_)
198                 clearTagDeque();
199         return *this;
200 }
201
202
203 XHTMLStream & XHTMLStream::operator<<(CompTag const & tag) 
204 {
205         clearTagDeque();
206         // tabs?
207         os_ << tag.asTag();
208         return *this;
209 }
210
211
212 bool    XHTMLStream::isTagOpen(string const & stag)
213 {
214         TagStack::const_iterator sit = tag_stack_.begin();
215         TagStack::const_iterator const sen = tag_stack_.end();
216         for (; sit != sen; ++sit)
217                 // we could check for the
218                 if (sit->tag_ == stag) 
219                         return true;
220         return false;
221 }
222
223
224 // this is complicated, because we want to make sure that
225 // everything is properly nested. the code ought to make 
226 // sure of that, but we won't assert (yet) if we run into
227 // a problem. we'll just output error messages and try our
228 // best to make things work.
229 XHTMLStream & XHTMLStream::operator<<(EndTag const & etag)
230 {
231         // first make sure we're not closing an empty tag
232         if (!pending_tags_.empty()) {
233                 StartTag const & stag = pending_tags_.back();
234                 if (etag.tag_ == stag.tag_)  {
235                         // we have <tag></tag>, so we discard it and remove it 
236                         // from the pending_tags_.
237                         pending_tags_.pop_back();
238                         return *this;
239                 }
240                 // there is a pending tag that isn't the one we are trying
241                 // to close. 
242                 // is this tag itself pending?
243                 // non-const iterators because we may call erase().
244                 TagDeque::iterator dit = pending_tags_.begin();
245                 TagDeque::iterator const den = pending_tags_.end();
246                 for (; dit != den; ++dit) {
247                         if (dit->tag_ == etag.tag_) {
248                                 // it was pending, so we just erase it
249                                 LYXERR0("Tried to close pending tag `" << etag.tag_ 
250                                         << "' when other tags were pending. Tag discarded.");
251                                 pending_tags_.erase(dit);
252                                 return *this;
253                         }
254                 }
255                 // so etag isn't itself pending. is it even open?
256                 if (!isTagOpen(etag.tag_)) {
257                         LYXERR0("Tried to close `" << etag.tag_ 
258                                  << "' when tag was not open. Tag discarded.");
259                         return *this;
260                 }
261                 // ok, so etag is open.
262                 // our strategy will be as below: we will do what we need to 
263                 // do to close this tag.
264                 LYXERR0("Closing tag `" << etag.tag_ 
265                         << "' when other tags are pending. Discarded pending tags:");
266                 for (dit = pending_tags_.begin(); dit != den; ++dit)
267                         LYXERR0(dit->tag_);
268                 // clear the pending tags...
269                 pending_tags_.clear();
270                 // ...and then just fall through.
271         }
272
273         // is the tag we are closing the last one we opened?
274         if (etag.tag_ == tag_stack_.back().tag_) {
275                 // output it...
276                 os_ << etag.asEndTag();
277                 // ...and forget about it
278                 tag_stack_.pop_back();
279                 return *this;
280         } 
281         
282         // we are trying to close a tag other than the one last opened. 
283         // let's first see if this particular tag is still open somehow.
284         if (!isTagOpen(etag.tag_)) {
285                 LYXERR0("Tried to close `" << etag.tag_ 
286                         << "' when tag was not open. Tag discarded.");
287                 return *this;
288         }
289         
290         // so the tag was opened, but other tags have been opened since
291         // and not yet closed.
292         // if it's a font tag, though...
293         if (html::isFontTag(etag.tag_)) {
294                 // it won't be a problem if the other tags open since this one
295                 // are also font tags.
296                 TagStack::const_reverse_iterator rit = tag_stack_.rbegin();
297                 TagStack::const_reverse_iterator ren = tag_stack_.rend();
298                 for (; rit != ren; ++rit) {
299                         if (!html::isFontTag(rit->tag_)) {
300                                 // we'll just leave it and, presumably, have to close it later.
301                                 LYXERR0("Unable to close font tag `" << etag.tag_ 
302                                         << "' due to open non-font tags.");
303                                 return *this;
304                         }
305                 }
306                 
307                 // so we have e.g.:
308                 //    <em>this is <strong>bold
309                 // and are being asked to closed em. we want:
310                 //    <em>this is <strong>bold</strong></em><strong>
311                 // first, we close the intervening tags...
312                 StartTag curtag = tag_stack_.back();
313                 // ...remembering them in a stack.
314                 TagStack fontstack;
315                 while (curtag.tag_ != etag.tag_) {
316                         os_ << curtag.asEndTag();
317                         fontstack.push_back(curtag);
318                         tag_stack_.pop_back();
319                         curtag = tag_stack_.back();
320                 }
321                 // now close our tag...
322                 os_ << etag.asEndTag();
323                 // ...and restore the other tags.
324                 rit = fontstack.rbegin();
325                 ren = fontstack.rend();
326                 for (; rit != ren; ++rit)
327                         pending_tags_.push_back(*rit);
328                 return *this;
329         }
330         
331         // it wasn't a font tag.
332         // so other tags were opened before this one and not properly closed. 
333         // so we'll close them, too. that may cause other issues later, but it 
334         // at least guarantees proper nesting.
335         LYXERR0("Closing tag `" << etag.tag_ 
336                 << "' when other tags are open, namely:");
337         StartTag curtag = tag_stack_.back();
338         while (curtag.tag_ != etag.tag_) {
339                 LYXERR0(curtag.tag_);
340                 os_ << curtag.asEndTag();
341                 tag_stack_.pop_back();
342                 curtag = tag_stack_.back();
343         }
344         // curtag is now the one we actually want.
345         os_ << curtag.asEndTag();
346         tag_stack_.pop_back();
347         
348         return *this;
349 }
350
351 // End code for XHTMLStream
352
353 namespace {
354         
355 // convenience functions
356
357 inline void openTag(XHTMLStream & xs, Layout const & lay)
358 {
359         xs << StartTag(lay.htmltag(), lay.htmlattr());
360 }
361
362
363 inline void closeTag(XHTMLStream & xs, Layout const & lay)
364 {
365         xs << EndTag(lay.htmltag());
366 }
367
368
369 inline void openLabelTag(XHTMLStream & xs, Layout const & lay)
370 {
371         xs << StartTag(lay.htmllabeltag(), lay.htmllabelattr());
372 }
373
374
375 inline void closeLabelTag(XHTMLStream & xs, Layout const & lay)
376 {
377         xs << EndTag(lay.htmllabeltag());
378 }
379
380
381 inline void openItemTag(XHTMLStream & xs, Layout const & lay)
382 {
383         xs << StartTag(lay.htmlitemtag(), lay.htmlitemattr(), true);
384 }
385
386
387 inline void closeItemTag(XHTMLStream & xs, Layout const & lay)
388 {
389         xs << EndTag(lay.htmlitemtag());
390 }
391
392 // end of convenience functions
393
394 ParagraphList::const_iterator searchParagraphHtml(
395         ParagraphList::const_iterator p,
396         ParagraphList::const_iterator const & pend)
397 {
398         for (++p; p != pend && p->layout().latextype == LATEX_PARAGRAPH; ++p)
399                 ;
400
401         return p;
402 }
403
404
405 ParagraphList::const_iterator searchEnvironmentHtml(
406                 ParagraphList::const_iterator const pstart,
407                 ParagraphList::const_iterator const & pend)
408 {
409         ParagraphList::const_iterator p = pstart;
410         Layout const & bstyle = p->layout();
411         size_t const depth = p->params().depth();
412         for (++p; p != pend; ++p) {
413                 Layout const & style = p->layout();
414                 // It shouldn't happen that e.g. a section command occurs inside
415                 // a quotation environment, at a higher depth, but as of 6/2009,
416                 // it can happen. We pretend that it's just at lowest depth.
417                 if (style.latextype == LATEX_COMMAND)
418                         return p;
419                 // If depth is down, we're done
420                 if (p->params().depth() < depth)
421                         return p;
422                 // If depth is up, we're not done
423                 if (p->params().depth() > depth)
424                         continue;
425                 // Now we know we are at the same depth
426                 if (style.latextype == LATEX_PARAGRAPH
427                     || style.latexname() != bstyle.latexname())
428                         return p;
429         }
430         return pend;
431 }
432
433
434 ParagraphList::const_iterator makeParagraphs(Buffer const & buf,
435                                             XHTMLStream & xs,
436                                             OutputParams const & runparams,
437                                             Text const & text,
438                                             ParagraphList::const_iterator const & pbegin,
439                                             ParagraphList::const_iterator const & pend)
440 {
441         ParagraphList::const_iterator const begin = text.paragraphs().begin();
442         ParagraphList::const_iterator par = pbegin;
443         for (; par != pend; ++par) {
444                 Layout const & lay = par->layout();
445                 if (!lay.counter.empty())
446                         buf.params().documentClass().counters().step(lay.counter);
447                 // FIXME We should see if there's a label to be output and
448                 // do something with it.
449                 if (par != pbegin)
450                         xs.cr();
451
452                 // FIXME Should we really allow anything other than 'p' here?
453                 
454                 // If we are already in a paragraph, and this is the first one, then we
455                 // do not want to open the paragraph tag.
456                 bool const opened = 
457                         (par == pbegin && runparams.html_in_par) ? false : true;
458                 if (opened)
459                         openTag(xs, lay);
460                 docstring const deferred = from_ascii("");
461 //                              par->simpleLyXHTMLOnePar(buf, os, runparams, text.outerFont(distance(begin, par)));
462
463                 // We want to issue the closing tag if either:
464                 //   (i)  We opened it, and either html_in_par is false,
465                 //        or we're not in the last paragraph, anyway.
466                 //   (ii) We didn't open it and html_in_par is true, 
467                 //        but we are in the first par, and there is a next par.
468                 ParagraphList::const_iterator nextpar = par;
469                 nextpar++;
470                 bool const needclose = 
471                         (opened && (!runparams.html_in_par || nextpar != pend))
472                         || (!opened && runparams.html_in_par && par == pbegin && nextpar != pend);
473                 if (needclose) {
474                         closeTag(xs, lay);
475                         xs.cr();
476                 }
477                 if (!deferred.empty()) {
478                         xs << deferred;
479                         xs.cr();
480                 }
481         }
482         return pend;
483 }
484
485
486 ParagraphList::const_iterator makeBibliography(Buffer const & buf,
487                                 XHTMLStream & xs,
488                                 OutputParams const & runparams,
489                                 Text const & text,
490                                 ParagraphList::const_iterator const & pbegin,
491                                 ParagraphList::const_iterator const & pend) 
492 {
493         xs << StartTag("h2", "class='bibliography'");
494         xs << pbegin->layout().labelstring(false);
495         xs << EndTag("h2");
496         xs.cr();
497         xs << StartTag("div", "class='bibliography'");
498         xs.cr();
499         makeParagraphs(buf, xs, runparams, text, pbegin, pend);
500         xs << EndTag("div");
501         return pend;
502 }
503
504
505 bool isNormalEnv(Layout const & lay)
506 {
507         return lay.latextype == LATEX_ENVIRONMENT;
508 }
509
510         
511 ParagraphList::const_iterator makeEnvironmentHtml(Buffer const & buf,
512                                               XHTMLStream & xs,
513                                               OutputParams const & runparams,
514                                               Text const & text,
515                                               ParagraphList::const_iterator const & pbegin,
516                                               ParagraphList::const_iterator const & pend) 
517 {
518         ParagraphList::const_iterator const begin = text.paragraphs().begin();
519         ParagraphList::const_iterator par = pbegin;
520         Layout const & bstyle = par->layout();
521         depth_type const origdepth = pbegin->params().depth();
522
523         // open tag for this environment
524         openTag(xs, bstyle);
525         xs.cr();
526
527         // we will on occasion need to remember a layout from before.
528         Layout const * lastlay = 0;
529
530         while (par != pend) {
531                 Layout const & style = par->layout();
532                 // the counter only gets stepped if we're in some kind of list,
533                 // or if it's the first time through.
534                 if (!style.counter.empty() && (par == pbegin || !isNormalEnv(style)))
535                         buf.params().documentClass().counters().step(style.counter);
536                 ParagraphList::const_iterator send;
537                 // this will be positive, if we want to skip the initial word
538                 // (if it's been taken for the label).
539                 pos_type sep = 0;
540
541                 switch (style.latextype) {
542                 case LATEX_ENVIRONMENT:
543                 case LATEX_LIST_ENVIRONMENT:
544                 case LATEX_ITEM_ENVIRONMENT: {
545                         // There are two possiblities in this case. 
546                         // One is that we are still in the environment in which we 
547                         // started---which we will be if the depth is the same.
548                         if (par->params().depth() == origdepth) {
549                                 LASSERT(bstyle == style, /* */);
550                                 if (lastlay != 0) {
551                                         closeItemTag(xs, *lastlay);
552                                         lastlay = 0;
553                                 }
554                                 bool const labelfirst = style.htmllabelfirst();
555                                 if (isNormalEnv(style)) {
556                                         // in this case, we print the label only for the first 
557                                         // paragraph (as in a theorem).
558                                         openItemTag(xs, style);
559                                         if (par == pbegin && style.htmllabeltag() != "NONE") {
560                                                 docstring const lbl = 
561                                                                 pbegin->expandLabel(style, buf.params(), false);
562                                                 if (!lbl.empty()) {
563                                                         openLabelTag(xs, style);
564                                                         xs << lbl;
565                                                         closeLabelTag(xs, style);
566                                                 }
567                                                 xs.cr();
568                                         }
569                                 }       else { // some kind of list
570                                         if (!labelfirst)
571                                                 openItemTag(xs, style);
572                                         if (style.labeltype == LABEL_MANUAL
573                                             && style.htmllabeltag() != "NONE") {
574                                                 openLabelTag(xs, style);
575 //                                              sep = par->firstWordLyXHTML(xs, runparams);
576                                                 closeLabelTag(xs, style);
577                                                 xs.cr();
578                                         }
579                                         else if (style.labeltype != LABEL_NO_LABEL
580                                                  && style.htmllabeltag() != "NONE") {
581                                                 openLabelTag(xs, style);
582                                                 xs << par->expandLabel(style, buf.params(), false);
583                                                 closeLabelTag(xs, style);
584                                                 xs.cr();
585                                         }
586                                         if (labelfirst)
587                                                 openItemTag(xs, style);
588                                         else
589                                                 xs << StartTag("span", "class='" + to_utf8(style.name()) + " inneritem'>");
590                                 }
591 //                              par->simpleLyXHTMLOnePar(buf, os, runparams, 
592 //                                      text.outerFont(distance(begin, par)), sep);
593                                 if (!isNormalEnv(style) && !labelfirst)
594                                         xs << EndTag("span");
595                                 ++par;
596                                 // We may not want to close the tag yet, in particular,
597                                 // if we're not at the end...
598                                 if (par != pend 
599                                         //  and are doing items...
600                                          && style.latextype == LATEX_ITEM_ENVIRONMENT
601                                          // and if the depth has changed...
602                                          && par->params().depth() != origdepth) {
603                                          // then we'll save this layout for later, and close it when
604                                          // we get another item.
605                                         lastlay = &style;
606                                 } else
607                                         closeItemTag(xs, style);
608                                 xs.cr();
609                         }
610                         // The other possibility is that the depth has increased, in which
611                         // case we need to recurse.
612                         else {
613                                 send = searchEnvironmentHtml(par, pend);
614                                 par = makeEnvironmentHtml(buf, xs, runparams, text, par, send);
615                         }
616                         break;
617                 }
618                 case LATEX_PARAGRAPH:
619                         send = searchParagraphHtml(par, pend);
620                         par = makeParagraphs(buf, xs, runparams, text, par, send);
621                         break;
622                 // Shouldn't happen
623                 case LATEX_BIB_ENVIRONMENT:
624                         send = par;
625                         ++send;
626                         par = makeParagraphs(buf, xs, runparams, text, par, send);
627                         break;
628                 // Shouldn't happen
629                 case LATEX_COMMAND:
630                         ++par;
631                         break;
632                 }
633         }
634
635         if (lastlay != 0)
636                 closeItemTag(xs, *lastlay);
637         closeTag(xs, bstyle);
638         xs.cr();
639         return pend;
640 }
641
642
643 void makeCommand(Buffer const & buf,
644                                           XHTMLStream & xs,
645                                           OutputParams const & runparams,
646                                           Text const & text,
647                                           ParagraphList::const_iterator const & pbegin)
648 {
649         Layout const & style = pbegin->layout();
650         if (!style.counter.empty())
651                 buf.params().documentClass().counters().step(style.counter);
652
653         openTag(xs, style);
654
655         // Label around sectioning number:
656         // FIXME Probably need to account for LABEL_MANUAL
657         if (style.labeltype != LABEL_NO_LABEL) {
658                 openLabelTag(xs, style);
659                 xs << pbegin->expandLabel(style, buf.params(), false);
660                 closeLabelTag(xs, style);
661                 // Otherwise the label might run together with the text
662                 xs << from_ascii(" ");
663         }
664
665         ParagraphList::const_iterator const begin = text.paragraphs().begin();
666 //      pbegin->simpleLyXHTMLOnePar(buf, os, runparams,
667 //                      text.outerFont(distance(begin, pbegin)));
668         closeTag(xs, style);
669         xs.cr();
670 }
671
672 } // end anonymous namespace
673
674
675 void xhtmlParagraphs(Text const & text,
676                        Buffer const & buf,
677                        XHTMLStream & xs,
678                        OutputParams const & runparams)
679 {
680         ParagraphList const & paragraphs = text.paragraphs();
681         ParagraphList::const_iterator par = paragraphs.begin();
682         ParagraphList::const_iterator pend = paragraphs.end();
683
684         OutputParams ourparams = runparams;
685         while (par != pend) {
686                 Layout const & style = par->layout();
687                 ParagraphList::const_iterator lastpar = par;
688                 ParagraphList::const_iterator send;
689
690                 switch (style.latextype) {
691                 case LATEX_COMMAND: {
692                         // The files with which we are working never have more than
693                         // one paragraph in a command structure.
694                         // FIXME 
695                         // if (ourparams.html_in_par)
696                         //   fix it so we don't get sections inside standard, e.g.
697                         // note that we may then need to make runparams not const, so we
698                         // can communicate that back.
699                         // FIXME Maybe this fix should be in the routines themselves, in case
700                         // they are called from elsewhere.
701                         makeCommand(buf, xs, ourparams, text, par);
702                         ++par;
703                         break;
704                 }
705                 case LATEX_ENVIRONMENT:
706                 case LATEX_LIST_ENVIRONMENT:
707                 case LATEX_ITEM_ENVIRONMENT: {
708                         // FIXME Same fix here.
709                         send = searchEnvironmentHtml(par, pend);
710                         par = makeEnvironmentHtml(buf, xs, ourparams, text, par, send);
711                         break;
712                 }
713                 case LATEX_BIB_ENVIRONMENT: {
714                         // FIXME Same fix here.
715                         send = searchEnvironmentHtml(par, pend);
716                         par = makeBibliography(buf, xs, ourparams, text, par, send);
717                         break;
718                 }
719                 case LATEX_PARAGRAPH:
720                         send = searchParagraphHtml(par, pend);
721                         par = makeParagraphs(buf, xs, ourparams, text, par, send);
722                         break;
723                 }
724                 // FIXME??
725                 // makeEnvironment may process more than one paragraphs and bypass pend
726                 if (distance(lastpar, par) >= distance(lastpar, pend))
727                         break;
728         }
729 }
730
731
732 } // namespace lyx