X-Git-Url: https://git.lyx.org/gitweb/?a=blobdiff_plain;f=src%2FParagraph.cpp;h=8ba663939b563fa170b6d6988fb37380f054df0f;hb=e0fe63f31cac1f4e9936d7ba8e2d93f25117adb0;hp=c7aa8b9513c86b706a9b575636aa760385806350;hpb=370044551c4128e75552ffa3d3a7e706d4b5eadc;p=lyx.git diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp index c7aa8b9513..8ba663939b 100644 --- a/src/Paragraph.cpp +++ b/src/Paragraph.cpp @@ -112,7 +112,7 @@ public: if (range_.first > pos) { range_.first += offset; range_.last += offset; - } else if (range_.last > pos) { + } else if (range_.last >= pos) { range_.last += offset; } } @@ -288,14 +288,14 @@ public: /// Output the surrogate pair formed by \p c and \p next to \p os. /// \return the number of characters written. - int latexSurrogatePair(odocstream & os, char_type c, char_type next, + int latexSurrogatePair(otexstream & os, char_type c, char_type next, OutputParams const &); /// Output a space in appropriate formatting (or a surrogate pair /// if the next character is a combining character). /// \return whether a surrogate pair was output. bool simpleTeXBlanks(OutputParams const &, - odocstream &, TexRow & texrow, + otexstream &, pos_type i, unsigned int & column, Font const & font, @@ -304,21 +304,21 @@ public: /// Output consecutive unicode chars, belonging to the same script as /// specified by the latex macro \p ltx, to \p os starting from \p i. /// \return the number of characters written. - int writeScriptChars(odocstream & os, docstring const & ltx, + int writeScriptChars(otexstream & os, docstring const & ltx, Change const &, Encoding const &, pos_type & i); /// This could go to ParagraphParameters if we want to. - int startTeXParParams(BufferParams const &, odocstream &, TexRow &, + int startTeXParParams(BufferParams const &, otexstream &, OutputParams const &) const; /// This could go to ParagraphParameters if we want to. - int endTeXParParams(BufferParams const &, odocstream &, TexRow &, - OutputParams const &) const; + bool endTeXParParams(BufferParams const &, otexstream &, + OutputParams const &) const; /// void latexInset(BufferParams const &, - odocstream &, - TexRow & texrow, OutputParams &, + otexstream &, + OutputParams &, Font & running_font, Font & basefont, Font const & outerfont, @@ -330,30 +330,32 @@ public: /// void latexSpecialChar( - odocstream & os, + otexstream & os, OutputParams const & runparams, Font const & running_font, Change const & running_change, Layout const & style, pos_type & i, + pos_type end_pos, unsigned int & column); /// bool latexSpecialT1( char_type const c, - odocstream & os, + otexstream & os, pos_type i, unsigned int & column); /// bool latexSpecialTypewriter( char_type const c, - odocstream & os, + otexstream & os, pos_type i, unsigned int & column); /// bool latexSpecialPhrase( - odocstream & os, + otexstream & os, pos_type & i, + pos_type end_pos, unsigned int & column, OutputParams const & runparams); @@ -391,12 +393,9 @@ public: { pos_type textsize = owner_->size(); // check for sane arguments - if (to < from || from >= textsize) + if (to <= from || from >= textsize) return; - FontSpan fp = FontSpan(from, to); - // don't mark end of paragraph - if (fp.last >= textsize) - fp.last = textsize - 1; + FontSpan fp = FontSpan(from, to - 1); speller_state_.setRange(fp, state); } @@ -840,7 +839,7 @@ int Paragraph::eraseChars(pos_type start, pos_type end, bool trackChanges) } -int Paragraph::Private::latexSurrogatePair(odocstream & os, char_type c, +int Paragraph::Private::latexSurrogatePair(otexstream & os, char_type c, char_type next, OutputParams const & runparams) { // Writing next here may circumvent a possible font change between @@ -869,7 +868,7 @@ int Paragraph::Private::latexSurrogatePair(odocstream & os, char_type c, bool Paragraph::Private::simpleTeXBlanks(OutputParams const & runparams, - odocstream & os, TexRow & texrow, + otexstream & os, pos_type i, unsigned int & column, Font const & font, @@ -902,8 +901,7 @@ bool Paragraph::Private::simpleTeXBlanks(OutputParams const & runparams, || text_[i - 1] == ':' || text_[i - 1] == '!'))) { os << '\n'; - texrow.newline(); - texrow.start(owner_->id(), i + 1); + os.texrow().start(owner_->id(), i + 1); column = 0; } else if (style.free_spacing) { os << '~'; @@ -914,7 +912,7 @@ bool Paragraph::Private::simpleTeXBlanks(OutputParams const & runparams, } -int Paragraph::Private::writeScriptChars(odocstream & os, +int Paragraph::Private::writeScriptChars(otexstream & os, docstring const & ltx, Change const & runningChange, Encoding const & encoding, @@ -1008,8 +1006,7 @@ bool Paragraph::Private::isTextAt(string const & str, pos_type pos) const void Paragraph::Private::latexInset(BufferParams const & bparams, - odocstream & os, - TexRow & texrow, + otexstream & os, OutputParams & runparams, Font & running_font, Font & basefont, @@ -1024,7 +1021,7 @@ void Paragraph::Private::latexInset(BufferParams const & bparams, LASSERT(inset, /**/); if (style.pass_thru) { - inset->plaintext(os, runparams); + inset->plaintext(os.os(), runparams); return; } @@ -1052,8 +1049,7 @@ void Paragraph::Private::latexInset(BufferParams const & bparams, os << "\\protect "; } - texrow.newline(); - texrow.start(owner_->id(), i + 1); + os.texrow().start(owner_->id(), i + 1); column = 0; } @@ -1069,7 +1065,7 @@ void Paragraph::Private::latexInset(BufferParams const & bparams, } bool close = false; - odocstream::pos_type const len = os.tellp(); + odocstream::pos_type const len = os.os().tellp(); if (inset->forceLTR() && running_font.isRightToLeft() @@ -1095,7 +1091,7 @@ void Paragraph::Private::latexInset(BufferParams const & bparams, // ArabTeX, though, cannot handle this special behavior, it seems. bool arabtex = basefont.language()->lang() == "arabic_arabtex" || running_font.language()->lang() == "arabic_arabtex"; - if (open_font && inset->noFontChange()) { + if (open_font && !inset->inheritFont()) { bool closeLanguage = arabtex || basefont.isRightToLeft() == running_font.isRightToLeft(); unsigned int count = running_font.latexWriteEndChanges(os, @@ -1118,10 +1114,12 @@ void Paragraph::Private::latexInset(BufferParams const & bparams, } } - int tmp; + int prev_rows = os.texrow().rows(); try { - tmp = inset->latex(os, runparams); + runparams.lastid = id_; + runparams.lastpos = i; + inset->latex(os, runparams); } catch (EncodingException & e) { // add location information and throw again. e.par_id = id_; @@ -1136,12 +1134,11 @@ void Paragraph::Private::latexInset(BufferParams const & bparams, os << '}'; } - if (tmp) { - texrow.newlines(tmp); - texrow.start(owner_->id(), i + 1); + if (os.texrow().rows() > prev_rows) { + os.texrow().start(owner_->id(), i + 1); column = 0; } else { - column += (unsigned int)(os.tellp() - len); + column += (unsigned int)(os.os().tellp() - len); } if (owner_->isDeleted(i)) @@ -1149,22 +1146,24 @@ void Paragraph::Private::latexInset(BufferParams const & bparams, } -void Paragraph::Private::latexSpecialChar( - odocstream & os, - OutputParams const & runparams, - Font const & running_font, - Change const & running_change, - Layout const & style, - pos_type & i, - unsigned int & column) +void Paragraph::Private::latexSpecialChar(otexstream & os, + OutputParams const & runparams, + Font const & running_font, + Change const & running_change, + Layout const & style, + pos_type & i, + pos_type end_pos, + unsigned int & column) { char_type const c = text_[i]; if (style.pass_thru || runparams.pass_thru) { - if (c != '\0') - // FIXME UNICODE: This can fail if c cannot - // be encoded in the current encoding. + if (c != '\0') { + Encoding const * const enc = runparams.encoding; + if (enc && enc->latexChar(c, true).empty()) + throw EncodingException(c); os.put(c); + } return; } @@ -1245,7 +1244,7 @@ void Paragraph::Private::latexSpecialChar( default: // LyX, LaTeX etc. - if (latexSpecialPhrase(os, i, column, runparams)) + if (latexSpecialPhrase(os, i, end_pos, column, runparams)) return; if (c == '\0') @@ -1281,7 +1280,7 @@ void Paragraph::Private::latexSpecialChar( } -bool Paragraph::Private::latexSpecialT1(char_type const c, odocstream & os, +bool Paragraph::Private::latexSpecialT1(char_type const c, otexstream & os, pos_type i, unsigned int & column) { switch (c) { @@ -1309,7 +1308,7 @@ bool Paragraph::Private::latexSpecialT1(char_type const c, odocstream & os, } -bool Paragraph::Private::latexSpecialTypewriter(char_type const c, odocstream & os, +bool Paragraph::Private::latexSpecialTypewriter(char_type const c, otexstream & os, pos_type i, unsigned int & column) { switch (c) { @@ -1331,7 +1330,10 @@ bool Paragraph::Private::latexSpecialTypewriter(char_type const c, odocstream & } -bool Paragraph::Private::latexSpecialPhrase(odocstream & os, pos_type & i, +/// \param end_pos +/// If [start_pos, end_pos) does not include entirely the special phrase, then +/// do not apply the macro transformation. +bool Paragraph::Private::latexSpecialPhrase(otexstream & os, pos_type & i, pos_type end_pos, unsigned int & column, OutputParams const & runparams) { // FIXME: if we have "LaTeX" with a font @@ -1341,7 +1343,8 @@ bool Paragraph::Private::latexSpecialPhrase(odocstream & os, pos_type & i, // "words" for some definition of word for (size_t pnr = 0; pnr < phrases_nr; ++pnr) { - if (!isTextAt(special_phrases[pnr].phrase, i)) + if (!isTextAt(special_phrases[pnr].phrase, i) + || (end_pos != -1 && i + int(special_phrases[pnr].phrase.size()) > end_pos)) continue; if (runparams.moving_arg) os << "\\protect"; @@ -1361,26 +1364,27 @@ void Paragraph::Private::validate(LaTeXFeatures & features) const Buffer const & buf = inset_owner_->buffer(); BufferParams const & bp = buf.params(); Font f; - TexRow tr; + TexRow texrow; // Using a string stream here circumvents the encoding // switching machinery of odocstream. Therefore the // output is wrong if this paragraph contains content // that needs to switch encoding. odocstringstream ods; + otexstream os(ods, texrow); if (is_command) { - ods << '\\' << from_ascii(layout_->latexname()); + os << '\\' << from_ascii(layout_->latexname()); // we have to provide all the optional arguments here, even though // the last one is the only one we care about. // Separate handling of optional argument inset. if (layout_->optargs != 0 || layout_->reqargs != 0) - latexArgInsets(*owner_, ods, features.runparams(), - layout_->reqargs, layout_->optargs); + latexArgInsets(*owner_, os, features.runparams(), + layout_->reqargs, layout_->optargs); else - ods << from_ascii(layout_->latexparam()); + os << from_ascii(layout_->latexparam()); } docstring::size_type const length = ods.str().length(); // this will output "{" at the beginning, but not at the end - owner_->latex(bp, f, ods, tr, features.runparams(), 0, -1, true); + owner_->latex(bp, f, os, features.runparams(), 0, -1, true); if (ods.str().length() > length) { if (is_command) ods << '}'; @@ -2122,13 +2126,12 @@ string const corrected_env(string const & suffix, string const & env, } -void adjust_row_column(string const & str, TexRow & texrow, int & column) +void adjust_column(string const & str, int & column) { if (!contains(str, "\n")) column += str.size(); else { string tmp; - texrow.newline(); column = rsplit(str, tmp, '\n').size(); } } @@ -2137,8 +2140,7 @@ void adjust_row_column(string const & str, TexRow & texrow, int & column) int Paragraph::Private::startTeXParParams(BufferParams const & bparams, - odocstream & os, TexRow & texrow, - OutputParams const & runparams) const + otexstream & os, OutputParams const & runparams) const { int column = 0; @@ -2187,7 +2189,7 @@ int Paragraph::Private::startTeXParParams(BufferParams const & bparams, else output = corrected_env(begin_tag, "flushright", code, lastpar); os << from_ascii(output); - adjust_row_column(output, texrow, column); + adjust_column(output, column); break; } case LYX_ALIGN_RIGHT: { string output; @@ -2196,13 +2198,13 @@ int Paragraph::Private::startTeXParParams(BufferParams const & bparams, else output = corrected_env(begin_tag, "flushleft", code, lastpar); os << from_ascii(output); - adjust_row_column(output, texrow, column); + adjust_column(output, column); break; } case LYX_ALIGN_CENTER: { string output; output = corrected_env(begin_tag, "center", code, lastpar); os << from_ascii(output); - adjust_row_column(output, texrow, column); + adjust_column(output, column); break; } } @@ -2211,16 +2213,13 @@ int Paragraph::Private::startTeXParParams(BufferParams const & bparams, } -int Paragraph::Private::endTeXParParams(BufferParams const & bparams, - odocstream & os, TexRow & texrow, - OutputParams const & runparams) const +bool Paragraph::Private::endTeXParParams(BufferParams const & bparams, + otexstream & os, OutputParams const & runparams) const { - int column = 0; - LyXAlignment const curAlign = params_.align(); if (curAlign == layout_->align) - return column; + return false; switch (curAlign) { case LYX_ALIGN_NONE: @@ -2232,13 +2231,12 @@ int Paragraph::Private::endTeXParParams(BufferParams const & bparams, case LYX_ALIGN_LEFT: case LYX_ALIGN_RIGHT: case LYX_ALIGN_CENTER: - if (runparams.moving_arg) { + if (runparams.moving_arg) os << "\\protect"; - column = 8; - } break; } + string output; string const end_tag = "\n\\par\\end"; InsetCode code = ownerCode(); bool const lastpar = runparams.isLastPar; @@ -2251,40 +2249,34 @@ int Paragraph::Private::endTeXParParams(BufferParams const & bparams, case LYX_ALIGN_DECIMAL: break; case LYX_ALIGN_LEFT: { - string output; if (owner_->getParLanguage(bparams)->babel() != "hebrew") output = corrected_env(end_tag, "flushleft", code, lastpar); else output = corrected_env(end_tag, "flushright", code, lastpar); os << from_ascii(output); - adjust_row_column(output, texrow, column); break; } case LYX_ALIGN_RIGHT: { - string output; if (owner_->getParLanguage(bparams)->babel() != "hebrew") output = corrected_env(end_tag, "flushright", code, lastpar); else output = corrected_env(end_tag, "flushleft", code, lastpar); os << from_ascii(output); - adjust_row_column(output, texrow, column); break; } case LYX_ALIGN_CENTER: { - string output; output = corrected_env(end_tag, "center", code, lastpar); os << from_ascii(output); - adjust_row_column(output, texrow, column); break; } } - return column; + return !output.empty() || lastpar; } // This one spits out the text of the paragraph void Paragraph::latex(BufferParams const & bparams, Font const & outerfont, - odocstream & os, TexRow & texrow, + otexstream & os, OutputParams const & runparams, int start_pos, int end_pos, bool force) const { @@ -2331,7 +2323,7 @@ void Paragraph::latex(BufferParams const & bparams, Encoding const * const prev_encoding = runparams.encoding; - texrow.start(id(), 0); + os.texrow().start(id(), 0); // if the paragraph is empty, the loop will not be entered at all if (empty()) { @@ -2340,8 +2332,7 @@ void Paragraph::latex(BufferParams const & bparams, ++column; } if (allowcust) - column += d->startTeXParParams(bparams, os, texrow, - runparams); + column += d->startTeXParParams(bparams, os, runparams); } for (pos_type i = 0; i < size(); ++i) { @@ -2372,12 +2363,11 @@ void Paragraph::latex(BufferParams const & bparams, if (allowcust) column += d->startTeXParParams(bparams, os, - texrow, runparams); } - Change const & change = runparams.inDeletedInset ? runparams.changeOfDeletedInset - : lookupChange(i); + Change const & change = runparams.inDeletedInset + ? runparams.changeOfDeletedInset : lookupChange(i); if (bparams.outputChanges && runningChange != change) { if (open_font) { @@ -2436,8 +2426,9 @@ void Paragraph::latex(BufferParams const & bparams, if (!runparams.pass_thru && !style.pass_thru && runparams.encoding->package() != Encoding::none && font.language()->encoding()->package() != Encoding::none) { - pair const enc_switch = switchEncoding(os, bparams, - runparams, *(font.language()->encoding())); + pair const enc_switch = + switchEncoding(os.os(), bparams, runparams, + *(font.language()->encoding())); if (enc_switch.first) { column += enc_switch.second; runparams.encoding = font.language()->encoding(); @@ -2480,8 +2471,7 @@ void Paragraph::latex(BufferParams const & bparams, // latexSpecialChar ignores spaces if // style.pass_thru is false. if (i != body_pos - 1) { - if (d->simpleTeXBlanks( - runparams, os, texrow, + if (d->simpleTeXBlanks(runparams, os, i, column, font, style)) { // A surrogate pair was output. We // must not call latexSpecialChar @@ -2503,8 +2493,7 @@ void Paragraph::latex(BufferParams const & bparams, // and then split to handle the two modes separately. if (c == META_INSET) { if (i >= start_pos && (end_pos == -1 || i < end_pos)) { - d->latexInset(bparams, os, - texrow, rp, running_font, + d->latexInset(bparams, os, rp, running_font, basefont, outerfont, open_font, runningChange, style, i, column); } @@ -2512,7 +2501,7 @@ void Paragraph::latex(BufferParams const & bparams, if (i >= start_pos && (end_pos == -1 || i < end_pos)) { try { d->latexSpecialChar(os, rp, running_font, runningChange, - style, i, column); + style, i, end_pos, column); } catch (EncodingException & e) { if (runparams.dryrun) { os << "<" << _("LyX Warning: ") @@ -2562,7 +2551,7 @@ void Paragraph::latex(BufferParams const & bparams, os << "}]~"; } - if (allowcust && d->endTeXParParams(bparams, os, texrow, runparams) + if (allowcust && d->endTeXParParams(bparams, os, runparams) && runparams.encoding != prev_encoding) { runparams.encoding = prev_encoding; if (!runparams.isFullUnicode()) @@ -2733,10 +2722,11 @@ docstring Paragraph::simpleLyXHTMLOnePar(Buffer const & buf, bool emph_flag = false; bool bold_flag = false; - string closing_tag; Layout const & style = *d->layout_; + xs.startParagraph(allowEmpty()); + if (!runparams.for_toc && runparams.html_make_pars) { // generate a magic label for this paragraph string const attr = "id='" + magicLabel() + "'"; @@ -2816,6 +2806,7 @@ docstring Paragraph::simpleLyXHTMLOnePar(Buffer const & buf, } xs.closeFontTags(); + xs.endParagraph(); return retval; } @@ -2847,13 +2838,40 @@ bool Paragraph::isLineSeparator(pos_type pos) const bool Paragraph::isWordSeparator(pos_type pos) const { + if (pos == size()) + return true; if (Inset const * inset = getInset(pos)) return !inset->isLetter(); + // if we have a hard hyphen (no en- or emdash) or apostrophe + // we pass this to the spell checker + // FIXME: this method is subject to change, visit + // https://bugzilla.mozilla.org/show_bug.cgi?id=355178 + // to get an impression how complex this is. + if (isHardHyphenOrApostrophe(pos)) + return false; char_type const c = d->text_[pos]; - // We want to pass the ' and escape chars to the spellchecker - static docstring const quote = from_utf8(lyxrc.spellchecker_esc_chars + '\''); - return (!isLetterChar(c) && !isDigitASCII(c) && !contains(quote, c)) - || pos == size(); + // We want to pass the escape chars to the spellchecker + docstring const escape_chars = from_utf8(lyxrc.spellchecker_esc_chars); + return !isLetterChar(c) && !isDigitASCII(c) && !contains(escape_chars, c); +} + + +bool Paragraph::isHardHyphenOrApostrophe(pos_type pos) const +{ + pos_type const psize = size(); + if (pos >= psize) + return false; + char_type const c = d->text_[pos]; + if (c != '-' && c != '\'') + return false; + int nextpos = pos + 1; + int prevpos = pos > 0 ? pos - 1 : 0; + if ((nextpos == psize || isSpace(nextpos)) + && (pos == 0 || isSpace(prevpos))) + return false; + return c == '\'' + || ((nextpos == psize || d->text_[nextpos] != '-') + && (pos == 0 || d->text_[prevpos] != '-')); } @@ -3345,12 +3363,12 @@ int Paragraph::find(docstring const & str, bool cs, bool mw, int i = 0; pos_type const parsize = d->text_.size(); for (i = 0; i < strsize && pos < parsize; ++i, ++pos) { - // Ignore ligature break and hyphenation chars while searching + // Ignore "invisible" letters such as ligature breaks + // and hyphenation chars while searching while (pos < parsize - 1 && isInset(pos)) { - const InsetSpecialChar *isc = dynamic_cast(getInset(pos)); - if (isc == 0 - || (isc->kind() != InsetSpecialChar::HYPHENATION - && isc->kind() != InsetSpecialChar::LIGATURE_BREAK)) + odocstringstream os; + getInset(pos)->toString(os); + if (!getInset(pos)->isLetter() || !os.str().empty()) break; pos++; } @@ -3644,6 +3662,7 @@ SpellChecker::Result Paragraph::spellCheck(pos_type & from, pos_type & to, return result; if (needsSpellCheck() || check_learned) { + pos_type end = to; if (!d->ignoreWord(word)) { bool const trailing_dot = to < size() && d->text_[to] == '.'; result = speller->check(wl); @@ -3655,28 +3674,33 @@ SpellChecker::Result Paragraph::spellCheck(pos_type & from, pos_type & to, word << "\" [" << from << ".." << to << "]"); } else { - // spell check with dot appended failed + // spell check with dot appended failed too // restore original word/lang value word = asString(from, to, AS_STR_INSETS | AS_STR_SKIPDELETE); wl = WordLangTuple(word, lang); } } } - d->setMisspelled(from, to, result); + if (!SpellChecker::misspelled(result)) { + // area up to the begin of the next word is not misspelled + while (end < size() && isWordSeparator(end)) + ++end; + } + d->setMisspelled(from, end, result); } else { result = d->speller_state_.getState(from); } - bool const misspelled_ = SpellChecker::misspelled(result) ; - if (misspelled_ && do_suggestion) - speller->suggest(wl, suggestions); - else if (misspelled_) + if (do_suggestion) + suggestions.clear(); + + if (SpellChecker::misspelled(result)) { LYXERR(Debug::GUI, "misspelled word: \"" << word << "\" [" << from << ".." << to << "]"); - else - suggestions.clear(); - + if (do_suggestion) + speller->suggest(wl, suggestions); + } return result; } @@ -3765,9 +3789,14 @@ void Paragraph::spellCheck() const } -bool Paragraph::isMisspelled(pos_type pos) const +bool Paragraph::isMisspelled(pos_type pos, bool check_boundary) const { - return SpellChecker::misspelled(d->speller_state_.getState(pos)); + bool result = SpellChecker::misspelled(d->speller_state_.getState(pos)); + if (result || pos <= 0 || pos > size()) + return result; + if (check_boundary && (pos == size() || isWordSeparator(pos))) + result = SpellChecker::misspelled(d->speller_state_.getState(pos - 1)); + return result; }