X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2FParagraph.cpp;h=67965ef4c5950da18f47d9143db248161b87d68b;hb=68fe13dfa1ed24891d930b0d9b8b79e217c7f1e4;hp=1e90a95cb1462cb618019b1486b62b032cae799e;hpb=07924ac300f68eaf4825951af6a6ac3c14d6edd6;p=lyx.git

diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp
index 1e90a95cb1..67965ef4c5 100644
--- a/src/Paragraph.cpp
+++ b/src/Paragraph.cpp
@@ -51,6 +51,7 @@
 
 #include "insets/InsetBibitem.h"
 #include "insets/InsetLabel.h"
+#include "insets/InsetSpecialChar.h"
 
 #include "support/debug.h"
 #include "support/docstring_list.h"
@@ -71,7 +72,7 @@ namespace lyx {
 namespace {
 /// Inset identifier (above 0x10ffff, for ucs-4)
 char_type const META_INSET = 0x200001;
-};
+}
 
 
 /////////////////////////////////////////////////////////////////////
@@ -172,6 +173,20 @@ public:
 		return result;
 	}
 
+	FontSpan const & getRange(pos_type pos) const
+	{
+		/// empty span to indicate mismatch
+		static FontSpan empty_;
+		RangesIterator et = ranges_.end();
+		RangesIterator it = ranges_.begin();
+		for (; it != et; ++it) {
+			if(it->inside(pos)) {
+				return it->range();
+			}
+		}
+		return empty_;
+	}
+
 	bool needsRefresh() const {
 		return needs_refresh_;
 	}
@@ -357,6 +372,8 @@ public:
 		return speller_change_number > speller_state_.currentChangeNumber();
 	}
 
+	bool ignoreWord(docstring const & word) const ;
+	
 	void setMisspelled(pos_type from, pos_type to, SpellChecker::Result state)
 	{
 		pos_type textsize = owner_->size();
@@ -1111,7 +1128,7 @@ void Paragraph::Private::latexInset(BufferParams const & bparams,
 		texrow.start(owner_->id(), i + 1);
 		column = 0;
 	} else {
-		column += os.tellp() - len;
+		column += (unsigned int)(os.tellp() - len);
 	}
 
 	if (owner_->isDeleted(i))
@@ -1327,20 +1344,35 @@ bool Paragraph::Private::latexSpecialPhrase(odocstream & os, pos_type & i,
 void Paragraph::Private::validate(LaTeXFeatures & features) const
 {
 	if (layout_->inpreamble && inset_owner_) {
+		bool const is_command = layout_->latextype == LATEX_COMMAND;
 		Buffer const & buf = inset_owner_->buffer();
 		BufferParams const & bp = buf.params();
 		Font f;
 		TexRow tr;
+		// Using a string stream here circumvents the encoding
+		// switching machinery of odocstream. Therefore the
+		// output is wrong if this paragraph contains content
+		// that needs to switch encoding.
 		odocstringstream ods;
-		// we have to provide all the optional arguments here, even though
-		// the last one is the only one we care about.
+		if (is_command) {
+			ods << '\\' << from_ascii(layout_->latexname());
+			// we have to provide all the optional arguments here, even though
+			// the last one is the only one we care about.
+			// Separate handling of optional argument inset.
+			if (layout_->optargs != 0 || layout_->reqargs != 0)
+				latexArgInsets(*owner_, ods, features.runparams(),
+											 layout_->reqargs, layout_->optargs);
+			else
+				ods << from_ascii(layout_->latexparam());
+		}
+		docstring::size_type const length = ods.str().length();
+		// this will output "{" at the beginning, but not at the end
 		owner_->latex(bp, f, ods, tr, features.runparams(), 0, -1, true);
-		docstring const d = ods.str();
-		if (!d.empty()) {
-			// this will have "{" at the beginning, but not at the end
-			string const content = to_utf8(d);
-			string const cmd = layout_->latexname();
-			features.addPreambleSnippet("\\" + cmd + content + "}");
+		if (ods.str().length() > length) {
+			if (is_command)
+				ods << '}';
+			string const snippet = to_utf8(ods.str());
+			features.addPreambleSnippet(snippet);
 		}
 	}
 
@@ -2368,12 +2400,16 @@ void Paragraph::latex(BufferParams const & bparams,
 			open_font = false;
 		}
 
+		string const running_lang = runparams.use_polyglossia ?
+			running_font.language()->polyglossia() : running_font.language()->babel();
 		// close babel's font environment before opening CJK.
-		if (!running_font.language()->babel().empty() &&
+		string const lang_end_command = runparams.use_polyglossia ?
+			"\\end{$$lang}" : lyxrc.language_command_end;
+		if (!running_lang.empty() &&
 		    font.language()->encoding()->package() == Encoding::CJK) {
-				string end_tag = subst(lyxrc.language_command_end,
+				string end_tag = subst(lang_end_command,
 							"$$lang",
-							running_font.language()->babel());
+							running_lang);
 				os << from_ascii(end_tag);
 				column += end_tag.length();
 		}
@@ -2511,7 +2547,7 @@ void Paragraph::latex(BufferParams const & bparams,
 	if (allowcust && d->endTeXParParams(bparams, os, texrow, runparams)
 	    && runparams.encoding != prev_encoding) {
 		runparams.encoding = prev_encoding;
-		if (!bparams.useXetex)
+		if (!runparams.isFullUnicode())
 			os << setEncoding(prev_encoding->iconvName());
 	}
 
@@ -2754,7 +2790,7 @@ docstring Paragraph::simpleLyXHTMLOnePar(Buffer const & buf,
 				// We don't want to escape the entities. Note that
 				// it is safe to do this, since str can otherwise
 				// only be "-". E.g., it can't be "<".
-				xs << XHTMLStream::NextRaw() << str;
+				xs << XHTMLStream::ESCAPE_NONE << str;
 			} else
 				xs << c;
 		}
@@ -2798,17 +2834,24 @@ bool Paragraph::isWordSeparator(pos_type pos) const
 	char_type const c = d->text_[pos];
 	// We want to pass the ' and escape chars to the spellchecker
 	static docstring const quote = from_utf8(lyxrc.spellchecker_esc_chars + '\'');
-	return (!isLetterChar(c) && !isDigit(c) && !contains(quote, c))
+	return (!isLetterChar(c) && !isDigitASCII(c) && !contains(quote, c))
 		|| pos == size();
 }
 
 
+bool Paragraph::isSameSpellRange(pos_type pos1, pos_type pos2) const
+{
+	return pos1 == pos2
+		|| d->speller_state_.getRange(pos1) == d->speller_state_.getRange(pos2);
+}
+
+
 bool Paragraph::isChar(pos_type pos) const
 {
 	if (Inset const * inset = getInset(pos))
 		return inset->isChar();
 	char_type const c = d->text_[pos];
-	return !isLetterChar(c) && !isDigit(c) && !lyx::isSpace(c);
+	return !isLetterChar(c) && !isDigitASCII(c) && !lyx::isSpace(c);
 }
 
 
@@ -2906,7 +2949,7 @@ docstring Paragraph::asString(pos_type beg, pos_type end, int options) const
 		    || (c == '\n' && (options & AS_STR_NEWLINES)))
 			os.put(c);
 		else if (c == META_INSET && (options & AS_STR_INSETS)) {
-			getInset(i)->tocString(os);
+			getInset(i)->toString(os);
 			if (getInset(i)->asInsetMath())
 				os << " ";
 		}
@@ -2916,6 +2959,24 @@ docstring Paragraph::asString(pos_type beg, pos_type end, int options) const
 }
 
 
+void Paragraph::forToc(docstring & os, size_t maxlen) const
+{
+	if (!d->params_.labelString().empty())
+		os += d->params_.labelString() + ' ';
+	for (pos_type i = 0; i < size() && os.length() < maxlen; ++i) {
+		if (isDeleted(i))
+			continue;
+		char_type const c = d->text_[i];
+		if (isPrintable(c))
+			os += c;
+		else if (c == '\t' || c == '\n')
+			os += ' ';
+		else if (c == META_INSET)
+			getInset(i)->forToc(os, maxlen);
+	}
+}
+
+
 docstring Paragraph::stringify(pos_type beg, pos_type end, int options, OutputParams & runparams) const
 {
 	odocstringstream os;
@@ -3070,7 +3131,7 @@ int Paragraph::checkBiblio(Buffer const & buffer)
 {
 	// FIXME From JS:
 	// This is getting more and more a mess. ...We really should clean
-	// up this bibitem issue for 1.6. See also bug 2743.
+	// up this bibitem issue for 1.6.
 
 	// Add bibitem insets if necessary
 	if (d->layout_->labeltype != LABEL_BIBLIO)
@@ -3258,36 +3319,44 @@ void Paragraph::changeCase(BufferParams const & bparams, pos_type pos,
 }
 
 
-bool Paragraph::find(docstring const & str, bool cs, bool mw,
-		pos_type pos, bool del) const
+int Paragraph::find(docstring const & str, bool cs, bool mw,
+		pos_type start_pos, bool del) const
 {
+	pos_type pos = start_pos;
 	int const strsize = str.length();
 	int i = 0;
 	pos_type const parsize = d->text_.size();
-	for (i = 0; pos + i < parsize; ++i) {
-		if (i >= strsize)
-			break;
-		if (cs && str[i] != d->text_[pos + i])
+	for (i = 0; i < strsize && pos < parsize; ++i, ++pos) {
+		// Ignore ligature break and hyphenation chars while searching
+		while (pos < parsize - 1 && isInset(pos)) {
+			const InsetSpecialChar *isc = dynamic_cast<const InsetSpecialChar*>(getInset(pos));
+			if (isc == 0
+			    || (isc->kind() != InsetSpecialChar::HYPHENATION
+				&& isc->kind() != InsetSpecialChar::LIGATURE_BREAK))
+				break;
+			pos++;
+		}
+		if (cs && str[i] != d->text_[pos])
 			break;
-		if (!cs && uppercase(str[i]) != uppercase(d->text_[pos + i]))
+		if (!cs && uppercase(str[i]) != uppercase(d->text_[pos]))
 			break;
-		if (!del && isDeleted(pos + i))
+		if (!del && isDeleted(pos))
 			break;
 	}
 
 	if (i != strsize)
-		return false;
+		return 0;
 
 	// if necessary, check whether string matches word
 	if (mw) {
-		if (pos > 0 && !isWordSeparator(pos - 1))
-			return false;
-		if (pos + strsize < parsize
-			&& !isWordSeparator(pos + strsize))
-			return false;
+		if (start_pos > 0 && !isWordSeparator(start_pos - 1))
+			return 0;
+		if (pos < parsize
+			&& !isWordSeparator(pos))
+			return 0;
 	}
 
-	return true;
+	return pos - start_pos;
 }
 
 
@@ -3517,6 +3586,21 @@ bool Paragraph::needsSpellCheck() const
 }
 
 
+bool Paragraph::Private::ignoreWord(docstring const & word) const
+{
+	// Ignore words with digits
+	// FIXME: make this customizable
+	// (note that some checkers ignore words with digits by default)
+	docstring::const_iterator cit = word.begin();
+	docstring::const_iterator const end = word.end();
+	for (; cit != end; ++cit) {
+		if (isNumber((*cit)))
+			return true;
+	}
+	return false;
+}
+
+
 SpellChecker::Result Paragraph::spellCheck(pos_type & from, pos_type & to,
 	WordLangTuple & wl, docstring_list & suggestions,
 	bool do_suggestion, bool check_learned) const
@@ -3533,7 +3617,7 @@ SpellChecker::Result Paragraph::spellCheck(pos_type & from, pos_type & to,
 	if (from == to || from >= size())
 		return result;
 
-	docstring word = asString(from, to, AS_STR_INSETS + AS_STR_SKIPDELETE);
+	docstring word = asString(from, to, AS_STR_INSETS | AS_STR_SKIPDELETE);
 	Language * lang = d->getSpellLanguage(from);
 
 	wl = WordLangTuple(word, lang);
@@ -3542,10 +3626,7 @@ SpellChecker::Result Paragraph::spellCheck(pos_type & from, pos_type & to,
 		return result;
 
 	if (needsSpellCheck() || check_learned) {
-		// Ignore words with digits
-		// FIXME: make this customizable
-		// (note that some checkers ignore words with digits by default)
-		if (!hasDigit(word)) {
+		if (!d->ignoreWord(word)) {
 			bool const trailing_dot = to < size() && d->text_[to] == '.';
 			result = speller->check(wl);
 			if (SpellChecker::misspelled(result) && trailing_dot) {
@@ -3555,6 +3636,11 @@ SpellChecker::Result Paragraph::spellCheck(pos_type & from, pos_type & to,
 					LYXERR(Debug::GUI, "misspelled word is correct with dot: \"" <<
 					   word << "\" [" <<
 					   from << ".." << to << "]");
+				} else {
+					// spell check with dot appended failed
+					// restore original word/lang value
+					word = asString(from, to, AS_STR_INSETS | AS_STR_SKIPDELETE);
+					wl = WordLangTuple(word, lang);
 				}
 			}
 		}