From 8aa37c43a1e7bc6038322183c63aecdd2024ab59 Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Tue, 24 Feb 2015 21:58:27 +0100 Subject: [PATCH] Fix plaintext output of dashes (bug #3647) Previously, consecutive dashes in .lyx files were combined to endash and emdash in some cases, and in other cases they were output as is. This made the code complicated, and resulted in inconsitencies ((bug #3647). Now, a dash in a .lyx file is always a dash in the output, for all flavours. The special handling is moved to the input side, so that you still get an endash if you type two hyphens. If needed, this can be changed or made customizable without the need to update the file format again. Many thanks for the fruitful mailing list dicsussion, which contributed significantly to the final version. --- development/FORMAT | 5 ++ lib/lyx2lyx/LyX.py | 2 +- lib/lyx2lyx/lyx_2_2.py | 69 +++++++++++++++++- src/Paragraph.cpp | 72 +++---------------- src/Text.cpp | 45 +++++++++++- src/tex2lyx/Context.cpp | 5 +- src/tex2lyx/Context.h | 2 + src/tex2lyx/test/CJK.lyx.lyx | 2 +- src/tex2lyx/test/CJKutf8.lyx.lyx | 2 +- src/tex2lyx/test/DummyDocument.lyx.lyx | 2 +- src/tex2lyx/test/Dummy~Document.lyx.lyx | 2 +- src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx | 2 +- src/tex2lyx/test/algo2e.lyx.lyx | 2 +- .../test/box-color-size-space-align.lyx.lyx | 2 +- src/tex2lyx/test/test-insets.lyx.lyx | 6 +- src/tex2lyx/test/test-memoir.lyx.lyx | 2 +- src/tex2lyx/test/test-modules.lyx.lyx | 2 +- .../test/test-refstyle-theorems.lyx.lyx | 2 +- src/tex2lyx/test/test-scr.lyx.lyx | 2 +- src/tex2lyx/test/test-structure.lyx.lyx | 2 +- src/tex2lyx/test/test.lyx.lyx | 2 +- src/tex2lyx/test/verbatim.lyx.lyx | 2 +- src/tex2lyx/text.cpp | 20 +++++- src/version.h | 4 +- 24 files changed, 169 insertions(+), 89 deletions(-) diff --git a/development/FORMAT b/development/FORMAT index ea3fa8a03f..080906011f 100644 --- a/development/FORMAT +++ b/development/FORMAT @@ -11,6 +11,11 @@ adjustments are made to tex2lyx and bugs are fixed in lyx2lyx. ----------------------- +2015-02-24 Georg Baum + * Format incremented to 481 + "--" and "---" are not treated as endash and emdash anymore, since + we have unicode symbols for that now (bug 3647). + 2015-01-09 Jürgen Spitzmüller * Format incremented to 480: Add self-defined Question* and Question lemma types to diff --git a/lib/lyx2lyx/LyX.py b/lib/lyx2lyx/LyX.py index 658d0ec61e..eef9220a43 100644 --- a/lib/lyx2lyx/LyX.py +++ b/lib/lyx2lyx/LyX.py @@ -85,7 +85,7 @@ format_relation = [("0_06", [200], minor_versions("0.6" , 4)), ("1_6", range(277,346), minor_versions("1.6" , 10)), ("2_0", range(346,414), minor_versions("2.0", 8)), ("2_1", range(414,475), minor_versions("2.1", 0)), - ("2_2", range(475,481), minor_versions("2.2", 0)) + ("2_2", range(475,482), minor_versions("2.2", 0)) ] #################################################################### diff --git a/lib/lyx2lyx/lyx_2_2.py b/lib/lyx2lyx/lyx_2_2.py index eeea01e436..b890b0bec1 100644 --- a/lib/lyx2lyx/lyx_2_2.py +++ b/lib/lyx2lyx/lyx_2_2.py @@ -480,6 +480,71 @@ def revert_question_env(document): i = j + +def convert_dashes(document): + "convert -- and --- to \\twohyphens and \\threehyphens" + + if document.backend != "latex": + return + + i = 0 + while i < len(document.body): + words = document.body[i].split() + if len(words) > 1 and words[0] == "\\begin_inset" and \ + words[1] in ["ERT", "Formula", "IPA"]: + # must not replace anything in math + # filtering out IPA makes Text::readParToken() more simple + # skip ERT as well since it is not needed there + j = find_end_of_inset(document.body, i) + if j == -1: + document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i)) + i += 1 + else: + i = j + continue + while True: + j = document.body[i].find("--") + if j == -1: + break + front = document.body[i][:j] + back = document.body[i][j+2:] + # We can have an arbitrary number of consecutive hyphens. + # These must be split into the corresponding number of two and three hyphens + # We must match what LaTeX does: First try emdash, then endash, then single hyphen + if back.find("-") == 0: + back = back[1:] + if len(back) > 0: + document.body.insert(i+1, back) + document.body[i] = front + "\\threehyphens" + else: + if len(back) > 0: + document.body.insert(i+1, back) + document.body[i] = front + "\\twohyphens" + i += 1 + + +def revert_dashes(document): + "convert \\twohyphens and \\threehyphens to -- and ---" + + i = 0 + while i < len(document.body): + replaced = False + if document.body[i].find("\\twohyphens") >= 0: + document.body[i] = document.body[i].replace("\\twohyphens", "--") + replaced = True + if document.body[i].find("\\threehyphens") >= 0: + document.body[i] = document.body[i].replace("\\threehyphens", "---") + replaced = True + if replaced and i+1 < len(document.body) and \ + (document.body[i+1].find("\\") != 0 or \ + document.body[i+1].find("\\twohyphens") == 0 or + document.body[i+1].find("\\threehyphens") == 0) and \ + len(document.body[i]) + len(document.body[i+1]) <= 80: + document.body[i] = document.body[i] + document.body[i+1] + document.body[i+1:i+2] = [] + else: + i += 1 + ## # Conversion hub @@ -495,10 +560,12 @@ convert = [ [477, []], [478, []], [479, []], - [480, []] + [480, []], + [481, [convert_dashes]] ] revert = [ + [480, [revert_dashes]], [479, [revert_question_env]], [478, [revert_beamer_lemma]], [477, [revert_xarrow]], diff --git a/src/Paragraph.cpp b/src/Paragraph.cpp index 0570753fa9..b0fd796f47 100644 --- a/src/Paragraph.cpp +++ b/src/Paragraph.cpp @@ -364,12 +364,6 @@ public: pos_type i, unsigned int & column); /// - bool latexSpecialTypewriter( - char_type const c, - otexstream & os, - pos_type i, - unsigned int & column); - /// bool latexSpecialPhrase( otexstream & os, pos_type & i, @@ -1216,12 +1210,6 @@ void Paragraph::Private::latexSpecialChar(otexstream & os, && lyxrc.fontenc == "T1" && latexSpecialT1(c, os, i, column)) return; - // \tt font needs special treatment - if (!runparams.inIPA - && running_font.fontInfo().family() == TYPEWRITER_FAMILY - && latexSpecialTypewriter(c, os, i, column)) - return; - // Otherwise, we use what LaTeX provides us. switch (c) { case '\\': @@ -1242,6 +1230,14 @@ void Paragraph::Private::latexSpecialChar(otexstream & os, break; case '-': os << '-'; + if (i + 1 < end_pos && text_[i+1] == '-') { + // Prevent "--" becoming an endash and "---" becoming + // an emdash. + // Within \ttfamily, "--" is merged to "-" (no endash) + // so we avoid this rather irritating ligature as well + os << "{}"; + column += 2; + } break; case '\"': os << "\\char`\\\"{}"; @@ -1401,28 +1397,6 @@ bool Paragraph::Private::latexSpecialT3(char_type const c, otexstream & os, } -bool Paragraph::Private::latexSpecialTypewriter(char_type const c, otexstream & os, - pos_type i, unsigned int & column) -{ - switch (c) { - case '-': - // within \ttfamily, "--" is merged to "-" (no endash) - // so we avoid this rather irritating ligature - if (i + 1 < int(text_.size()) && text_[i + 1] == '-') { - os << "-{}"; - column += 2; - } else - os << '-'; - return true; - - // everything else has to be checked separately - // (depending on the encoding) - default: - return false; - } -} - - /// \param end_pos /// If [start_pos, end_pos) does not include entirely the special phrase, then /// do not apply the macro transformation. @@ -3159,31 +3133,7 @@ docstring Paragraph::simpleLyXHTMLOnePar(Buffer const & buf, } } else { char_type c = getUChar(buf.masterBuffer()->params(), i); - - if (style.pass_thru || runparams.pass_thru) - xs << c; - else if (c == '-' && !runparams.inIPA && - font.fontInfo().family() != TYPEWRITER_FAMILY) { - docstring str; - int j = i + 1; - if (j < size() && d->text_[j] == '-') { - j += 1; - if (j < size() && d->text_[j] == '-') { - str += from_ascii("—"); - i += 2; - } else { - str += from_ascii("–"); - i += 1; - } - } - else - str += c; - // We don't want to escape the entities. Note that - // it is safe to do this, since str can otherwise - // only be "-". E.g., it can't be "<". - xs << XHTMLStream::ESCAPE_NONE << str; - } else - xs << c; + xs << c; } font_old = font.fontInfo(); } @@ -3258,9 +3208,7 @@ bool Paragraph::isHardHyphenOrApostrophe(pos_type pos) const if ((nextpos == psize || isSpace(nextpos)) && (pos == 0 || isSpace(prevpos))) return false; - return c == '\'' - || ((nextpos == psize || d->text_[nextpos] != '-') - && (pos == 0 || d->text_[prevpos] != '-')); + return true; } diff --git a/src/Text.cpp b/src/Text.cpp index a448a5c7ee..cfb4f5c42a 100644 --- a/src/Text.cpp +++ b/src/Text.cpp @@ -498,6 +498,23 @@ void Text::readParToken(Paragraph & par, Lexer & lex, inset->read(lex); inset->setBuffer(*buf); par.insertInset(par.size(), inset.release(), font, change); + } else if (token == "\\twohyphens" || token == "\\threehyphens") { + // Ideally, this should be done by lyx2lyx, but lyx2lyx does not know the + // running font and does not know anything about layouts (and CopyStyle). + Layout const & layout(par.layout()); + FontInfo info = font.fontInfo(); + info.realize(layout.resfont); + if (layout.pass_thru || info.family() == TYPEWRITER_FAMILY) { + if (token == "\\twohyphens") + par.insert(par.size(), from_ascii("--"), font, change); + else + par.insert(par.size(), from_ascii("---"), font, change); + } else { + if (token == "\\twohyphens") + par.insertChar(par.size(), 0x2013, font, change); + else + par.insertChar(par.size(), 0x2014, font, change); + } } else if (token == "\\backslash") { par.appendChar('\\', font, change); } else if (token == "\\LyXTable") { @@ -1019,14 +1036,36 @@ void Text::insertChar(Cursor & cur, char_type c) } } - par.insertChar(cur.pos(), c, cur.current_font, + pos_type pos = cur.pos(); + if (!cur.paragraph().isPassThru() && owner_->lyxCode() != IPA_CODE && + cur.current_font.fontInfo().family() != TYPEWRITER_FAMILY && + c == '-' && pos > 0) { + if (par.getChar(pos - 1) == '-') { + // convert "--" to endash + par.eraseChar(pos - 1, cur.buffer()->params().track_changes); + c = 0x2013; + pos--; + } else if (par.getChar(pos - 1) == 0x2013) { + // convert "---" to emdash + par.eraseChar(pos - 1, cur.buffer()->params().track_changes); + c = 0x2014; + pos--; + } else if (par.getChar(pos - 1) == 0x2014) { + // convert "----" to "-" + par.eraseChar(pos - 1, cur.buffer()->params().track_changes); + c = '-'; + pos--; + } + } + + par.insertChar(pos, c, cur.current_font, cur.buffer()->params().track_changes); cur.checkBufferStructure(); // cur.screenUpdateFlags(Update::Force); bool boundary = cur.boundary() - || tm.isRTLBoundary(cur.pit(), cur.pos() + 1); - setCursor(cur, cur.pit(), cur.pos() + 1, false, boundary); + || tm.isRTLBoundary(cur.pit(), pos + 1); + setCursor(cur, cur.pit(), pos + 1, false, boundary); charInserted(cur); } diff --git a/src/tex2lyx/Context.cpp b/src/tex2lyx/Context.cpp index 6de70f9c1f..2f49a071f5 100644 --- a/src/tex2lyx/Context.cpp +++ b/src/tex2lyx/Context.cpp @@ -82,7 +82,8 @@ Context::Context(bool need_layout_, : need_layout(need_layout_), need_end_layout(false), need_end_deeper(false), has_item(false), deeper_paragraph(false), - new_layout_allowed(true), textclass(textclass_), + new_layout_allowed(true), merging_hyphens_allowed(true), + textclass(textclass_), layout(layout_), parent_layout(parent_layout_), font(font_) { @@ -240,6 +241,8 @@ void Context::dump(ostream & os, string const & desc) const os << "deeper_paragraph "; if (new_layout_allowed) os << "new_layout_allowed "; + if (merging_hyphens_allowed) + os << "merging_hyphens_allowed "; if (!extra_stuff.empty()) os << "extrastuff=[" << extra_stuff << "] "; if (!par_extra_stuff.empty()) diff --git a/src/tex2lyx/Context.h b/src/tex2lyx/Context.h index cf006f3222..ad95f02b0b 100644 --- a/src/tex2lyx/Context.h +++ b/src/tex2lyx/Context.h @@ -146,6 +146,8 @@ public: * would not work. */ bool new_layout_allowed; + /// May -- be converted to endash and --- to emdash? + bool merging_hyphens_allowed; /// Did we output anything yet in any context? static bool empty; diff --git a/src/tex2lyx/test/CJK.lyx.lyx b/src/tex2lyx/test/CJK.lyx.lyx index 16c3aef21f..8db24eddbc 100644 --- a/src/tex2lyx/test/CJK.lyx.lyx +++ b/src/tex2lyx/test/CJK.lyx.lyx @@ -1,5 +1,5 @@ #LyX file created by tex2lyx 2.2 -\lyxformat 480 +\lyxformat 481 \begin_document \begin_header \textclass article diff --git a/src/tex2lyx/test/CJKutf8.lyx.lyx b/src/tex2lyx/test/CJKutf8.lyx.lyx index a463be6ef0..1e98d78c60 100644 --- a/src/tex2lyx/test/CJKutf8.lyx.lyx +++ b/src/tex2lyx/test/CJKutf8.lyx.lyx @@ -1,5 +1,5 @@ #LyX file created by tex2lyx 2.2 -\lyxformat 480 +\lyxformat 481 \begin_document \begin_header \textclass article diff --git a/src/tex2lyx/test/DummyDocument.lyx.lyx b/src/tex2lyx/test/DummyDocument.lyx.lyx index f8e633d283..0d9fd27658 100644 --- a/src/tex2lyx/test/DummyDocument.lyx.lyx +++ b/src/tex2lyx/test/DummyDocument.lyx.lyx @@ -1,5 +1,5 @@ #LyX file created by tex2lyx 2.2 -\lyxformat 480 +\lyxformat 481 \begin_document \begin_header \textclass article diff --git a/src/tex2lyx/test/Dummy~Document.lyx.lyx b/src/tex2lyx/test/Dummy~Document.lyx.lyx index 3fe1ec2a95..c92cfb50cc 100644 --- a/src/tex2lyx/test/Dummy~Document.lyx.lyx +++ b/src/tex2lyx/test/Dummy~Document.lyx.lyx @@ -1,5 +1,5 @@ #LyX file created by tex2lyx 2.2 -\lyxformat 480 +\lyxformat 481 \begin_document \begin_header \textclass article diff --git a/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx b/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx index d74702eb47..fdabbdc478 100644 --- a/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx +++ b/src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx @@ -1,5 +1,5 @@ #LyX file created by tex2lyx 2.2 -\lyxformat 480 +\lyxformat 481 \begin_document \begin_header \textclass article diff --git a/src/tex2lyx/test/algo2e.lyx.lyx b/src/tex2lyx/test/algo2e.lyx.lyx index 6f5c5b6bc3..0f81d3b2fa 100644 --- a/src/tex2lyx/test/algo2e.lyx.lyx +++ b/src/tex2lyx/test/algo2e.lyx.lyx @@ -1,5 +1,5 @@ #LyX file created by tex2lyx 2.2 -\lyxformat 480 +\lyxformat 481 \begin_document \begin_header \textclass article diff --git a/src/tex2lyx/test/box-color-size-space-align.lyx.lyx b/src/tex2lyx/test/box-color-size-space-align.lyx.lyx index c45285a727..b83dfee0db 100644 --- a/src/tex2lyx/test/box-color-size-space-align.lyx.lyx +++ b/src/tex2lyx/test/box-color-size-space-align.lyx.lyx @@ -1,5 +1,5 @@ #LyX file created by tex2lyx 2.2 -\lyxformat 480 +\lyxformat 481 \begin_document \begin_header \textclass article diff --git a/src/tex2lyx/test/test-insets.lyx.lyx b/src/tex2lyx/test/test-insets.lyx.lyx index cfdb976a5c..9a80d4854e 100644 --- a/src/tex2lyx/test/test-insets.lyx.lyx +++ b/src/tex2lyx/test/test-insets.lyx.lyx @@ -1,5 +1,5 @@ #LyX file created by tex2lyx 2.2 -\lyxformat 480 +\lyxformat 481 \begin_document \begin_header \textclass article @@ -3507,7 +3507,7 @@ A long table \begin_inset Caption Standard \begin_layout Standard -A long table -- continued +A long table – continued \end_layout \end_inset @@ -6769,7 +6769,7 @@ fy ligature break. \end_layout \begin_layout Standard -There are dashes: endash in short form -- and long form –, emdash is alike: --- and —. If we really want several hyphens in a row, we need to separate them: - +There are dashes: endash in short form – and long form –, emdash is alike: — and —. If we really want several hyphens in a row, we need to separate them: - \begin_inset ERT status collapsed diff --git a/src/tex2lyx/test/test-memoir.lyx.lyx b/src/tex2lyx/test/test-memoir.lyx.lyx index 33fcffcc6a..a072761988 100644 --- a/src/tex2lyx/test/test-memoir.lyx.lyx +++ b/src/tex2lyx/test/test-memoir.lyx.lyx @@ -1,5 +1,5 @@ #LyX file created by tex2lyx 2.2 -\lyxformat 480 +\lyxformat 481 \begin_document \begin_header \textclass memoir diff --git a/src/tex2lyx/test/test-modules.lyx.lyx b/src/tex2lyx/test/test-modules.lyx.lyx index 537bb512ba..4bde71a59c 100644 --- a/src/tex2lyx/test/test-modules.lyx.lyx +++ b/src/tex2lyx/test/test-modules.lyx.lyx @@ -1,5 +1,5 @@ #LyX file created by tex2lyx 2.2 -\lyxformat 480 +\lyxformat 481 \begin_document \begin_header \textclass amsart diff --git a/src/tex2lyx/test/test-refstyle-theorems.lyx.lyx b/src/tex2lyx/test/test-refstyle-theorems.lyx.lyx index 69329b7b4e..07654f917e 100644 --- a/src/tex2lyx/test/test-refstyle-theorems.lyx.lyx +++ b/src/tex2lyx/test/test-refstyle-theorems.lyx.lyx @@ -1,5 +1,5 @@ #LyX file created by tex2lyx 2.2 -\lyxformat 480 +\lyxformat 481 \begin_document \begin_header \textclass book diff --git a/src/tex2lyx/test/test-scr.lyx.lyx b/src/tex2lyx/test/test-scr.lyx.lyx index 4e85153e21..31bd868c3e 100644 --- a/src/tex2lyx/test/test-scr.lyx.lyx +++ b/src/tex2lyx/test/test-scr.lyx.lyx @@ -1,5 +1,5 @@ #LyX file created by tex2lyx 2.2 -\lyxformat 480 +\lyxformat 481 \begin_document \begin_header \textclass scrbook diff --git a/src/tex2lyx/test/test-structure.lyx.lyx b/src/tex2lyx/test/test-structure.lyx.lyx index 88178202a5..7cb176df8d 100644 --- a/src/tex2lyx/test/test-structure.lyx.lyx +++ b/src/tex2lyx/test/test-structure.lyx.lyx @@ -1,5 +1,5 @@ #LyX file created by tex2lyx 2.2 -\lyxformat 480 +\lyxformat 481 \begin_document \begin_header \textclass article diff --git a/src/tex2lyx/test/test.lyx.lyx b/src/tex2lyx/test/test.lyx.lyx index 1be3357997..08c0097767 100644 --- a/src/tex2lyx/test/test.lyx.lyx +++ b/src/tex2lyx/test/test.lyx.lyx @@ -1,5 +1,5 @@ #LyX file created by tex2lyx 2.2 -\lyxformat 480 +\lyxformat 481 \begin_document \begin_header \textclass article diff --git a/src/tex2lyx/test/verbatim.lyx.lyx b/src/tex2lyx/test/verbatim.lyx.lyx index 7b2c177c1e..94dca029ce 100644 --- a/src/tex2lyx/test/verbatim.lyx.lyx +++ b/src/tex2lyx/test/verbatim.lyx.lyx @@ -1,5 +1,5 @@ #LyX file created by tex2lyx 2.2 -\lyxformat 480 +\lyxformat 481 \begin_document \begin_header \textclass article diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp index f2fe12def5..ce0443b7aa 100644 --- a/src/tex2lyx/text.cpp +++ b/src/tex2lyx/text.cpp @@ -2387,9 +2387,22 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, else if (t.cat() == catOther || t.cat() == catAlign || t.cat() == catParameter) { - // This translates "&" to "\\&" which may be wrong... context.check_layout(os); - os << t.cs(); + if (t.asInput() == "-" && p.next_token().asInput() == "-" && + context.merging_hyphens_allowed && + context.font.family != "ttfamily" && + !context.layout->pass_thru) { + if (p.next_next_token().asInput() == "-") { + // --- is emdash + os << to_utf8(docstring(1, 0x2014)); + p.get_token(); + } else + // -- is endash + os << to_utf8(docstring(1, 0x2013)); + p.get_token(); + } else + // This translates "&" to "\\&" which may be wrong... + os << t.cs(); } else if (p.isParagraph()) { @@ -3240,7 +3253,10 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, else if (t.cs() == "textipa") { context.check_layout(os); begin_inset(os, "IPA\n"); + bool merging_hyphens_allowed = context.merging_hyphens_allowed; + context.merging_hyphens_allowed = false; parse_text_in_inset(p, os, FLAG_ITEM, outer, context); + context.merging_hyphens_allowed = merging_hyphens_allowed; end_inset(os); preamble.registerAutomaticallyLoadedPackage("tipa"); preamble.registerAutomaticallyLoadedPackage("tipx"); diff --git a/src/version.h b/src/version.h index f29b0494c1..d1c747612a 100644 --- a/src/version.h +++ b/src/version.h @@ -36,8 +36,8 @@ extern char const * const lyx_version_info; // Do not remove the comment below, so we get merge conflict in // independent branches. Instead add your own. -#define LYX_FORMAT_LYX 480 // spitz: question and question* environments -#define LYX_FORMAT_TEX2LYX 480 +#define LYX_FORMAT_LYX 481 // gb: endash and emdash +#define LYX_FORMAT_TEX2LYX 481 #if LYX_FORMAT_TEX2LYX != LYX_FORMAT_LYX #ifndef _MSC_VER -- 2.39.2