Fix bug #7212: Paragraph::forToc has to include the labelString.

[lyx.git] / src / Paragraph.cpp
diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp

index 5843ba959d9696e48b214c753cd371d9b43b5543..67965ef4c5950da18f47d9143db248161b87d68b 100644 (file)
--- a/src/Paragraph.cpp
+++ b/src/Paragraph.cpp
@@ -51,6 +51,7 @@
  
  #include "insets/InsetBibitem.h"
  #include "insets/InsetLabel.h"
+#include "insets/InsetSpecialChar.h"
  
  #include "support/debug.h"
  #include "support/docstring_list.h"
@@ -71,7 +72,7 @@ namespace lyx {
  namespace {
  /// Inset identifier (above 0x10ffff, for ucs-4)
  char_type const META_INSET = 0x200001;
-};
+}
  
  
  /////////////////////////////////////////////////////////////////////
@@ -172,6 +173,20 @@ public:
                 return result;
         }
  
+       FontSpan const & getRange(pos_type pos) const
+       {
+               /// empty span to indicate mismatch
+               static FontSpan empty_;
+               RangesIterator et = ranges_.end();
+               RangesIterator it = ranges_.begin();
+               for (; it != et; ++it) {
+                       if(it->inside(pos)) {
+                               return it->range();
+                       }
+               }
+               return empty_;
+       }
+
         bool needsRefresh() const {
                 return needs_refresh_;
         }
@@ -357,6 +372,8 @@ public:
                 return speller_change_number > speller_state_.currentChangeNumber();
         }
  
+       bool ignoreWord(docstring const & word) const ;
+       
         void setMisspelled(pos_type from, pos_type to, SpellChecker::Result state)
         {
                 pos_type textsize = owner_->size();
@@ -1327,20 +1344,35 @@ bool Paragraph::Private::latexSpecialPhrase(odocstream & os, pos_type & i,
  void Paragraph::Private::validate(LaTeXFeatures & features) const
  {
         if (layout_->inpreamble && inset_owner_) {
+               bool const is_command = layout_->latextype == LATEX_COMMAND;
                 Buffer const & buf = inset_owner_->buffer();
                 BufferParams const & bp = buf.params();
                 Font f;
                 TexRow tr;
+               // Using a string stream here circumvents the encoding
+               // switching machinery of odocstream. Therefore the
+               // output is wrong if this paragraph contains content
+               // that needs to switch encoding.
                 odocstringstream ods;
-               // we have to provide all the optional arguments here, even though
-               // the last one is the only one we care about.
+               if (is_command) {
+                       ods << '\\' << from_ascii(layout_->latexname());
+                       // we have to provide all the optional arguments here, even though
+                       // the last one is the only one we care about.
+                       // Separate handling of optional argument inset.
+                       if (layout_->optargs != 0 || layout_->reqargs != 0)
+                               latexArgInsets(*owner_, ods, features.runparams(),
+                                                                                        layout_->reqargs, layout_->optargs);
+                       else
+                               ods << from_ascii(layout_->latexparam());
+               }
+               docstring::size_type const length = ods.str().length();
+               // this will output "{" at the beginning, but not at the end
                 owner_->latex(bp, f, ods, tr, features.runparams(), 0, -1, true);
-               docstring const d = ods.str();
-               if (!d.empty()) {
-                       // this will have "{" at the beginning, but not at the end
-                       string const content = to_utf8(d);
-                       string const cmd = layout_->latexname();
-                       features.addPreambleSnippet("\\" + cmd + content + "}");
+               if (ods.str().length() > length) {
+                       if (is_command)
+                               ods << '}';
+                       string const snippet = to_utf8(ods.str());
+                       features.addPreambleSnippet(snippet);
                 }
         }
  
@@ -2368,12 +2400,16 @@ void Paragraph::latex(BufferParams const & bparams,
                         open_font = false;
                 }
  
+               string const running_lang = runparams.use_polyglossia ?
+                       running_font.language()->polyglossia() : running_font.language()->babel();
                 // close babel's font environment before opening CJK.
-               if (!running_font.language()->babel().empty() &&
+               string const lang_end_command = runparams.use_polyglossia ?
+                       "\\end{$$lang}" : lyxrc.language_command_end;
+               if (!running_lang.empty() &&
                     font.language()->encoding()->package() == Encoding::CJK) {
-                               string end_tag = subst(lyxrc.language_command_end,
+                               string end_tag = subst(lang_end_command,
                                                         "$$lang",
-                                                       running_font.language()->babel());
+                                                       running_lang);
                                 os << from_ascii(end_tag);
                                 column += end_tag.length();
                 }
@@ -2511,7 +2547,7 @@ void Paragraph::latex(BufferParams const & bparams,
         if (allowcust && d->endTeXParParams(bparams, os, texrow, runparams)
             && runparams.encoding != prev_encoding) {
                 runparams.encoding = prev_encoding;
-               if (!bparams.useXetex)
+               if (!runparams.isFullUnicode())
                         os << setEncoding(prev_encoding->iconvName());
         }
  
@@ -2754,7 +2790,7 @@ docstring Paragraph::simpleLyXHTMLOnePar(Buffer const & buf,
                                 // We don't want to escape the entities. Note that
                                 // it is safe to do this, since str can otherwise
                                 // only be "-". E.g., it can't be "<".
-                               xs << XHTMLStream::NextRaw() << str;
+                               xs << XHTMLStream::ESCAPE_NONE << str;
                         } else
                                 xs << c;
                 }
@@ -2798,17 +2834,24 @@ bool Paragraph::isWordSeparator(pos_type pos) const
         char_type const c = d->text_[pos];
         // We want to pass the ' and escape chars to the spellchecker
         static docstring const quote = from_utf8(lyxrc.spellchecker_esc_chars + '\'');
-       return (!isLetterChar(c) && !isDigit(c) && !contains(quote, c))
+       return (!isLetterChar(c) && !isDigitASCII(c) && !contains(quote, c))
                 || pos == size();
  }
  
  
+bool Paragraph::isSameSpellRange(pos_type pos1, pos_type pos2) const
+{
+       return pos1 == pos2
+               || d->speller_state_.getRange(pos1) == d->speller_state_.getRange(pos2);
+}
+
+
  bool Paragraph::isChar(pos_type pos) const
  {
         if (Inset const * inset = getInset(pos))
                 return inset->isChar();
         char_type const c = d->text_[pos];
-       return !isLetterChar(c) && !isDigit(c) && !lyx::isSpace(c);
+       return !isLetterChar(c) && !isDigitASCII(c) && !lyx::isSpace(c);
  }
  
  
@@ -2906,7 +2949,7 @@ docstring Paragraph::asString(pos_type beg, pos_type end, int options) const
                     || (c == '\n' && (options & AS_STR_NEWLINES)))
                         os.put(c);
                 else if (c == META_INSET && (options & AS_STR_INSETS)) {
-                       getInset(i)->tocString(os);
+                       getInset(i)->toString(os);
                         if (getInset(i)->asInsetMath())
                                 os << " ";
                 }
@@ -2916,6 +2959,24 @@ docstring Paragraph::asString(pos_type beg, pos_type end, int options) const
  }
  
  
+void Paragraph::forToc(docstring & os, size_t maxlen) const
+{
+       if (!d->params_.labelString().empty())
+               os += d->params_.labelString() + ' ';
+       for (pos_type i = 0; i < size() && os.length() < maxlen; ++i) {
+               if (isDeleted(i))
+                       continue;
+               char_type const c = d->text_[i];
+               if (isPrintable(c))
+                       os += c;
+               else if (c == '\t' || c == '\n')
+                       os += ' ';
+               else if (c == META_INSET)
+                       getInset(i)->forToc(os, maxlen);
+       }
+}
+
+
  docstring Paragraph::stringify(pos_type beg, pos_type end, int options, OutputParams & runparams) const
  {
         odocstringstream os;
@@ -3070,7 +3131,7 @@ int Paragraph::checkBiblio(Buffer const & buffer)
  {
         // FIXME From JS:
         // This is getting more and more a mess. ...We really should clean
-       // up this bibitem issue for 1.6. See also bug 2743.
+       // up this bibitem issue for 1.6.
  
         // Add bibitem insets if necessary
         if (d->layout_->labeltype != LABEL_BIBLIO)
@@ -3258,36 +3319,44 @@ void Paragraph::changeCase(BufferParams const & bparams, pos_type pos,
  }
  
  
-bool Paragraph::find(docstring const & str, bool cs, bool mw,
-               pos_type pos, bool del) const
+int Paragraph::find(docstring const & str, bool cs, bool mw,
+               pos_type start_pos, bool del) const
  {
+       pos_type pos = start_pos;
         int const strsize = str.length();
         int i = 0;
         pos_type const parsize = d->text_.size();
-       for (i = 0; pos + i < parsize; ++i) {
-               if (i >= strsize)
-                       break;
-               if (cs && str[i] != d->text_[pos + i])
+       for (i = 0; i < strsize && pos < parsize; ++i, ++pos) {
+               // Ignore ligature break and hyphenation chars while searching
+               while (pos < parsize - 1 && isInset(pos)) {
+                       const InsetSpecialChar *isc = dynamic_cast<const InsetSpecialChar*>(getInset(pos));
+                       if (isc == 0
+                           || (isc->kind() != InsetSpecialChar::HYPHENATION
+                               && isc->kind() != InsetSpecialChar::LIGATURE_BREAK))
+                               break;
+                       pos++;
+               }
+               if (cs && str[i] != d->text_[pos])
                         break;
-               if (!cs && uppercase(str[i]) != uppercase(d->text_[pos + i]))
+               if (!cs && uppercase(str[i]) != uppercase(d->text_[pos]))
                         break;
-               if (!del && isDeleted(pos + i))
+               if (!del && isDeleted(pos))
                         break;
         }
  
         if (i != strsize)
-               return false;
+               return 0;
  
         // if necessary, check whether string matches word
         if (mw) {
-               if (pos > 0 && !isWordSeparator(pos - 1))
-                       return false;
-               if (pos + strsize < parsize
-                       && !isWordSeparator(pos + strsize))
-                       return false;
+               if (start_pos > 0 && !isWordSeparator(start_pos - 1))
+                       return 0;
+               if (pos < parsize
+                       && !isWordSeparator(pos))
+                       return 0;
         }
  
-       return true;
+       return pos - start_pos;
  }
  
  
@@ -3517,6 +3586,21 @@ bool Paragraph::needsSpellCheck() const
  }
  
  
+bool Paragraph::Private::ignoreWord(docstring const & word) const
+{
+       // Ignore words with digits
+       // FIXME: make this customizable
+       // (note that some checkers ignore words with digits by default)
+       docstring::const_iterator cit = word.begin();
+       docstring::const_iterator const end = word.end();
+       for (; cit != end; ++cit) {
+               if (isNumber((*cit)))
+                       return true;
+       }
+       return false;
+}
+
+
  SpellChecker::Result Paragraph::spellCheck(pos_type & from, pos_type & to,
         WordLangTuple & wl, docstring_list & suggestions,
         bool do_suggestion, bool check_learned) const
@@ -3533,7 +3617,7 @@ SpellChecker::Result Paragraph::spellCheck(pos_type & from, pos_type & to,
         if (from == to || from >= size())
                 return result;
  
-       docstring word = asString(from, to, AS_STR_INSETS + AS_STR_SKIPDELETE);
+       docstring word = asString(from, to, AS_STR_INSETS | AS_STR_SKIPDELETE);
         Language * lang = d->getSpellLanguage(from);
  
         wl = WordLangTuple(word, lang);
@@ -3542,10 +3626,7 @@ SpellChecker::Result Paragraph::spellCheck(pos_type & from, pos_type & to,
                 return result;
  
         if (needsSpellCheck() || check_learned) {
-               // Ignore words with digits
-               // FIXME: make this customizable
-               // (note that some checkers ignore words with digits by default)
-               if (!hasDigit(word)) {
+               if (!d->ignoreWord(word)) {
                         bool const trailing_dot = to < size() && d->text_[to] == '.';
                         result = speller->check(wl);
                         if (SpellChecker::misspelled(result) && trailing_dot) {
@@ -3555,6 +3636,11 @@ SpellChecker::Result Paragraph::spellCheck(pos_type & from, pos_type & to,
                                         LYXERR(Debug::GUI, "misspelled word is correct with dot: \"" <<
                                            word << "\" [" <<
                                            from << ".." << to << "]");
+                               } else {
+                                       // spell check with dot appended failed
+                                       // restore original word/lang value
+                                       word = asString(from, to, AS_STR_INSETS | AS_STR_SKIPDELETE);
+                                       wl = WordLangTuple(word, lang);
                                 }
                         }
                 }