]> git.lyx.org Git - features.git/commitdiff
Fix plaintext output of dashes (bug #3647)
authorGeorg Baum <baum@lyx.org>
Tue, 24 Feb 2015 20:58:27 +0000 (21:58 +0100)
committerGeorg Baum <baum@lyx.org>
Tue, 24 Feb 2015 21:06:12 +0000 (22:06 +0100)
Previously, consecutive dashes in .lyx files were combined to endash and emdash
in some cases, and in other cases they were output as is. This made the code
complicated, and resulted in inconsitencies ((bug #3647).
Now, a dash in a .lyx file is always a dash in the output, for all flavours.
The special handling is moved to the input side, so that you still get an
endash if you type two hyphens. If needed, this can be changed or made
customizable without the need to update the file format again. Many thanks
for the fruitful mailing list dicsussion, which contributed significantly to
the final version.

24 files changed:
development/FORMAT
lib/lyx2lyx/LyX.py
lib/lyx2lyx/lyx_2_2.py
src/Paragraph.cpp
src/Text.cpp
src/tex2lyx/Context.cpp
src/tex2lyx/Context.h
src/tex2lyx/test/CJK.lyx.lyx
src/tex2lyx/test/CJKutf8.lyx.lyx
src/tex2lyx/test/DummyDocument.lyx.lyx
src/tex2lyx/test/Dummy~Document.lyx.lyx
src/tex2lyx/test/XeTeX-polyglossia.lyx.lyx
src/tex2lyx/test/algo2e.lyx.lyx
src/tex2lyx/test/box-color-size-space-align.lyx.lyx
src/tex2lyx/test/test-insets.lyx.lyx
src/tex2lyx/test/test-memoir.lyx.lyx
src/tex2lyx/test/test-modules.lyx.lyx
src/tex2lyx/test/test-refstyle-theorems.lyx.lyx
src/tex2lyx/test/test-scr.lyx.lyx
src/tex2lyx/test/test-structure.lyx.lyx
src/tex2lyx/test/test.lyx.lyx
src/tex2lyx/test/verbatim.lyx.lyx
src/tex2lyx/text.cpp
src/version.h

index ea3fa8a03f9afffc0f65366403ef2491c5198e3d..080906011f12ee7621eabea5df9620b92c40c59e 100644 (file)
@@ -11,6 +11,11 @@ adjustments are made to tex2lyx and bugs are fixed in lyx2lyx.
 
 -----------------------
 
+2015-02-24 Georg Baum  <Georg.Baum@post.rwth-aachen.de>
+       * Format incremented to 481
+         "--" and "---" are not treated as endash and emdash anymore, since
+         we have unicode symbols for that now (bug 3647).
+
 2015-01-09 Jürgen Spitzmüller <spitz@lyx.org>
        * Format incremented to 480:
          Add self-defined Question* and Question lemma types to
index 658d0ec61e0263c4e57237934469ca9665015b59..eef9220a43847fd2edfe7fd014fa8b31cda3b42d 100644 (file)
@@ -85,7 +85,7 @@ format_relation = [("0_06",    [200], minor_versions("0.6" , 4)),
                    ("1_6", range(277,346), minor_versions("1.6" , 10)),
                    ("2_0", range(346,414), minor_versions("2.0", 8)),
                    ("2_1", range(414,475), minor_versions("2.1", 0)),
-                   ("2_2", range(475,481), minor_versions("2.2", 0))
+                   ("2_2", range(475,482), minor_versions("2.2", 0))
                   ]
 
 ####################################################################
index eeea01e436d3612ae6468009031a8f1434cf1fe3..b890b0bec1797fe2070ae473226f7c3b1535a75d 100644 (file)
@@ -480,6 +480,71 @@ def revert_question_env(document):
 
         i = j
 
+
+def convert_dashes(document):
+    "convert -- and --- to \\twohyphens and \\threehyphens"
+
+    if document.backend != "latex":
+        return
+
+    i = 0
+    while i < len(document.body):
+        words = document.body[i].split()
+        if len(words) > 1 and words[0] == "\\begin_inset" and \
+           words[1] in ["ERT", "Formula", "IPA"]:
+            # must not replace anything in math
+            # filtering out IPA makes Text::readParToken() more simple
+            # skip ERT as well since it is not needed there
+            j = find_end_of_inset(document.body, i)
+            if j == -1:
+                document.warning("Malformed LyX document: Can't find end of " + words[1] + " inset at line " + str(i))
+                i += 1
+            else:
+                i = j
+            continue
+        while True:
+            j = document.body[i].find("--")
+            if j == -1:
+                break
+            front = document.body[i][:j]
+            back = document.body[i][j+2:]
+            # We can have an arbitrary number of consecutive hyphens.
+            # These must be split into the corresponding number of two and three hyphens
+            # We must match what LaTeX does: First try emdash, then endash, then single hyphen
+            if back.find("-") == 0:
+                back = back[1:]
+                if len(back) > 0:
+                    document.body.insert(i+1, back)
+                document.body[i] = front + "\\threehyphens"
+            else:
+                if len(back) > 0:
+                    document.body.insert(i+1, back)
+                document.body[i] = front + "\\twohyphens"
+        i += 1
+
+
+def revert_dashes(document):
+    "convert \\twohyphens and \\threehyphens to -- and ---"
+
+    i = 0
+    while i < len(document.body):
+        replaced = False
+        if document.body[i].find("\\twohyphens") >= 0:
+            document.body[i] = document.body[i].replace("\\twohyphens", "--")
+            replaced = True
+        if document.body[i].find("\\threehyphens") >= 0:
+            document.body[i] = document.body[i].replace("\\threehyphens", "---")
+            replaced = True
+        if replaced and i+1 < len(document.body) and \
+           (document.body[i+1].find("\\") != 0 or \
+            document.body[i+1].find("\\twohyphens") == 0 or
+            document.body[i+1].find("\\threehyphens") == 0) and \
+           len(document.body[i]) + len(document.body[i+1]) <= 80:
+            document.body[i] = document.body[i] + document.body[i+1]
+            document.body[i+1:i+2] = []
+        else:
+            i += 1
+
   
 ##
 # Conversion hub
@@ -495,10 +560,12 @@ convert = [
            [477, []],
            [478, []],
            [479, []],
-           [480, []]
+           [480, []],
+           [481, [convert_dashes]]
           ]
 
 revert =  [
+           [480, [revert_dashes]],
            [479, [revert_question_env]],
            [478, [revert_beamer_lemma]],
            [477, [revert_xarrow]],
index 0570753fa9788a042bff406a175af3ebd48800d7..b0fd796f47e0927b2e879155efe7b3734f975108 100644 (file)
@@ -364,12 +364,6 @@ public:
                pos_type i,
                unsigned int & column);
        ///
-       bool latexSpecialTypewriter(
-               char_type const c,
-               otexstream & os,
-               pos_type i,
-               unsigned int & column);
-       ///
        bool latexSpecialPhrase(
                otexstream & os,
                pos_type & i,
@@ -1216,12 +1210,6 @@ void Paragraph::Private::latexSpecialChar(otexstream & os,
            && lyxrc.fontenc == "T1" && latexSpecialT1(c, os, i, column))
                return;
 
-       // \tt font needs special treatment
-       if (!runparams.inIPA
-            && running_font.fontInfo().family() == TYPEWRITER_FAMILY
-            && latexSpecialTypewriter(c, os, i, column))
-               return;
-
        // Otherwise, we use what LaTeX provides us.
        switch (c) {
        case '\\':
@@ -1242,6 +1230,14 @@ void Paragraph::Private::latexSpecialChar(otexstream & os,
                break;
        case '-':
                os << '-';
+               if (i + 1 < end_pos && text_[i+1] == '-') {
+                       // Prevent "--" becoming an endash and "---" becoming
+                       // an emdash.
+                       // Within \ttfamily, "--" is merged to "-" (no endash)
+                       // so we avoid this rather irritating ligature as well
+                       os << "{}";
+                       column += 2;
+               }
                break;
        case '\"':
                os << "\\char`\\\"{}";
@@ -1401,28 +1397,6 @@ bool Paragraph::Private::latexSpecialT3(char_type const c, otexstream & os,
 }
 
 
-bool Paragraph::Private::latexSpecialTypewriter(char_type const c, otexstream & os,
-       pos_type i, unsigned int & column)
-{
-       switch (c) {
-       case '-':
-               // within \ttfamily, "--" is merged to "-" (no endash)
-               // so we avoid this rather irritating ligature
-               if (i + 1 < int(text_.size()) && text_[i + 1] == '-') {
-                       os << "-{}";
-                       column += 2;
-               } else
-                       os << '-';
-               return true;
-
-       // everything else has to be checked separately
-       // (depending on the encoding)
-       default:
-               return false;
-       }
-}
-
-
 /// \param end_pos
 ///   If [start_pos, end_pos) does not include entirely the special phrase, then
 ///   do not apply the macro transformation.
@@ -3159,31 +3133,7 @@ docstring Paragraph::simpleLyXHTMLOnePar(Buffer const & buf,
                        }
                } else {
                        char_type c = getUChar(buf.masterBuffer()->params(), i);
-
-                       if (style.pass_thru || runparams.pass_thru)
-                               xs << c;
-                       else if (c == '-' && !runparams.inIPA &&
-                                font.fontInfo().family() != TYPEWRITER_FAMILY) {
-                               docstring str;
-                               int j = i + 1;
-                               if (j < size() && d->text_[j] == '-') {
-                                       j += 1;
-                                       if (j < size() && d->text_[j] == '-') {
-                                               str += from_ascii("&mdash;");
-                                               i += 2;
-                                       } else {
-                                               str += from_ascii("&ndash;");
-                                               i += 1;
-                                       }
-                               }
-                               else
-                                       str += c;
-                               // We don't want to escape the entities. Note that
-                               // it is safe to do this, since str can otherwise
-                               // only be "-". E.g., it can't be "<".
-                               xs << XHTMLStream::ESCAPE_NONE << str;
-                       } else
-                               xs << c;
+                       xs << c;
                }
                font_old = font.fontInfo();
        }
@@ -3258,9 +3208,7 @@ bool Paragraph::isHardHyphenOrApostrophe(pos_type pos) const
        if ((nextpos == psize || isSpace(nextpos))
                && (pos == 0 || isSpace(prevpos)))
                return false;
-       return c == '\''
-               || ((nextpos == psize || d->text_[nextpos] != '-')
-               && (pos == 0 || d->text_[prevpos] != '-'));
+       return true;
 }
 
 
index a448a5c7eebadc98947b2ffe2bc974adf43546e7..cfb4f5c42ad8661c6be73882f64db940afe7b3cf 100644 (file)
@@ -498,6 +498,23 @@ void Text::readParToken(Paragraph & par, Lexer & lex,
                inset->read(lex);
                inset->setBuffer(*buf);
                par.insertInset(par.size(), inset.release(), font, change);
+       } else if (token == "\\twohyphens" || token == "\\threehyphens") {
+               // Ideally, this should be done by lyx2lyx, but lyx2lyx does not know the
+               // running font and does not know anything about layouts (and CopyStyle).
+               Layout const & layout(par.layout());
+               FontInfo info = font.fontInfo();
+               info.realize(layout.resfont);
+               if (layout.pass_thru || info.family() == TYPEWRITER_FAMILY) {
+                       if (token == "\\twohyphens")
+                               par.insert(par.size(), from_ascii("--"), font, change);
+                       else
+                               par.insert(par.size(), from_ascii("---"), font, change);
+               } else {
+                       if (token == "\\twohyphens")
+                               par.insertChar(par.size(), 0x2013, font, change);
+                       else
+                               par.insertChar(par.size(), 0x2014, font, change);
+               }
        } else if (token == "\\backslash") {
                par.appendChar('\\', font, change);
        } else if (token == "\\LyXTable") {
@@ -1019,14 +1036,36 @@ void Text::insertChar(Cursor & cur, char_type c)
                }
        }
 
-       par.insertChar(cur.pos(), c, cur.current_font,
+       pos_type pos = cur.pos();
+       if (!cur.paragraph().isPassThru() && owner_->lyxCode() != IPA_CODE &&
+           cur.current_font.fontInfo().family() != TYPEWRITER_FAMILY &&
+           c == '-' && pos > 0) {
+               if (par.getChar(pos - 1) == '-') {
+                       // convert "--" to endash
+                       par.eraseChar(pos - 1, cur.buffer()->params().track_changes);
+                       c = 0x2013;
+                       pos--;
+               } else if (par.getChar(pos - 1) == 0x2013) {
+                       // convert "---" to emdash
+                       par.eraseChar(pos - 1, cur.buffer()->params().track_changes);
+                       c = 0x2014;
+                       pos--;
+               } else if (par.getChar(pos - 1) == 0x2014) {
+                       // convert "----" to "-"
+                       par.eraseChar(pos - 1, cur.buffer()->params().track_changes);
+                       c = '-';
+                       pos--;
+               }
+       }
+
+       par.insertChar(pos, c, cur.current_font,
                cur.buffer()->params().track_changes);
        cur.checkBufferStructure();
 
 //             cur.screenUpdateFlags(Update::Force);
        bool boundary = cur.boundary()
-               || tm.isRTLBoundary(cur.pit(), cur.pos() + 1);
-       setCursor(cur, cur.pit(), cur.pos() + 1, false, boundary);
+               || tm.isRTLBoundary(cur.pit(), pos + 1);
+       setCursor(cur, cur.pit(), pos + 1, false, boundary);
        charInserted(cur);
 }
 
index 6de70f9c1f3b5d2c81c51ac0b4995d5fe88eace8..2f49a071f5f663dd5df3b3a108cafaefe0e5225c 100644 (file)
@@ -82,7 +82,8 @@ Context::Context(bool need_layout_,
        : need_layout(need_layout_),
          need_end_layout(false), need_end_deeper(false),
          has_item(false), deeper_paragraph(false),
-         new_layout_allowed(true), textclass(textclass_),
+         new_layout_allowed(true), merging_hyphens_allowed(true),
+         textclass(textclass_),
          layout(layout_), parent_layout(parent_layout_),
          font(font_)
 {
@@ -240,6 +241,8 @@ void Context::dump(ostream & os, string const & desc) const
                os << "deeper_paragraph ";
        if (new_layout_allowed)
                os << "new_layout_allowed ";
+       if (merging_hyphens_allowed)
+               os << "merging_hyphens_allowed ";
        if (!extra_stuff.empty())
                os << "extrastuff=[" << extra_stuff << "] ";
        if (!par_extra_stuff.empty())
index cf006f3222964c6aa50d7d2f725045f3253d5924..ad95f02b0b8bd374be5b90532146e9faeb2f468f 100644 (file)
@@ -146,6 +146,8 @@ public:
         * would not work.
         */
        bool new_layout_allowed;
+       /// May -- be converted to endash and --- to emdash?
+       bool merging_hyphens_allowed;
        /// Did we output anything yet in any context?
        static bool empty;
 
index 16c3aef21f52faf096d77a1093ee7943321eb637..8db24eddbc964950f3f8e42bee14d768b18073e3 100644 (file)
@@ -1,5 +1,5 @@
 #LyX file created by tex2lyx 2.2
-\lyxformat 480
+\lyxformat 481
 \begin_document
 \begin_header
 \textclass article
index a463be6ef041283059c2797f38bbdf0332901f2a..1e98d78c60286b1a9d64ad5dda77a4fccf0041fc 100644 (file)
@@ -1,5 +1,5 @@
 #LyX file created by tex2lyx 2.2
-\lyxformat 480
+\lyxformat 481
 \begin_document
 \begin_header
 \textclass article
index f8e633d2839418b93deb73d6f26b0b66c9a1dfc5..0d9fd276587da3612b16b2ff5ddd32f5d37d60c4 100644 (file)
@@ -1,5 +1,5 @@
 #LyX file created by tex2lyx 2.2
-\lyxformat 480
+\lyxformat 481
 \begin_document
 \begin_header
 \textclass article
index 3fe1ec2a952768fdee4b6de7fb795c5e4a122c9a..c92cfb50cc667571d075cf8388d8d8c4365c898f 100644 (file)
@@ -1,5 +1,5 @@
 #LyX file created by tex2lyx 2.2
-\lyxformat 480
+\lyxformat 481
 \begin_document
 \begin_header
 \textclass article
index d74702eb47f71871e42dc726c879318356ca9c83..fdabbdc47803a3cf5a419da0874f8e229064d0ce 100644 (file)
@@ -1,5 +1,5 @@
 #LyX file created by tex2lyx 2.2
-\lyxformat 480
+\lyxformat 481
 \begin_document
 \begin_header
 \textclass article
index 6f5c5b6bc3cd0726773ce6438f05f9cfe2e3c198..0f81d3b2fa5a275c54936cd66db166f5c88ada9a 100644 (file)
@@ -1,5 +1,5 @@
 #LyX file created by tex2lyx 2.2
-\lyxformat 480
+\lyxformat 481
 \begin_document
 \begin_header
 \textclass article
index c45285a727d184e6a8e0e5361a32167fdcacb88b..b83dfee0db801e6094edc5dca930bdc280542eb5 100644 (file)
@@ -1,5 +1,5 @@
 #LyX file created by tex2lyx 2.2
-\lyxformat 480
+\lyxformat 481
 \begin_document
 \begin_header
 \textclass article
index cfdb976a5cd6208b98c4a9692ec9db3b41224a0e..9a80d4854e7d0031d6571d16acc27bcd1141a95b 100644 (file)
@@ -1,5 +1,5 @@
 #LyX file created by tex2lyx 2.2
-\lyxformat 480
+\lyxformat 481
 \begin_document
 \begin_header
 \textclass article
@@ -3507,7 +3507,7 @@ A long table
 \begin_inset Caption Standard
 
 \begin_layout Standard
-A long table -- continued
+A long table  continued
 \end_layout
 
 \end_inset
@@ -6769,7 +6769,7 @@ fy ligature break.
 \end_layout
 
 \begin_layout Standard
-There are dashes: endash in short form -- and long form –, emdash is alike: --- and —. If we really want several hyphens in a row, we need to separate them: -
+There are dashes: endash in short form – and long form –, emdash is alike: — and —. If we really want several hyphens in a row, we need to separate them: -
 \begin_inset ERT
 status collapsed
 
index 33fcffcc6a736da3af49290b18215ebdb2fda05b..a072761988c630834743a8e354429303cd12391d 100644 (file)
@@ -1,5 +1,5 @@
 #LyX file created by tex2lyx 2.2
-\lyxformat 480
+\lyxformat 481
 \begin_document
 \begin_header
 \textclass memoir
index 537bb512baa9e9ac821cec46a008c0d9fffda4a1..4bde71a59c8b0e3365d9de207cb91854c649b434 100644 (file)
@@ -1,5 +1,5 @@
 #LyX file created by tex2lyx 2.2
-\lyxformat 480
+\lyxformat 481
 \begin_document
 \begin_header
 \textclass amsart
index 69329b7b4e5c7060c7505f133d29dde8e719a471..07654f917ee36a549d81fc0cbd7a9dda4e15403d 100644 (file)
@@ -1,5 +1,5 @@
 #LyX file created by tex2lyx 2.2
-\lyxformat 480
+\lyxformat 481
 \begin_document
 \begin_header
 \textclass book
index 4e85153e2178cf253e746528c1d2e6bb7e9fd8bc..31bd868c3eb55af815a2c897419745f86e082f19 100644 (file)
@@ -1,5 +1,5 @@
 #LyX file created by tex2lyx 2.2
-\lyxformat 480
+\lyxformat 481
 \begin_document
 \begin_header
 \textclass scrbook
index 88178202a5f433fda637399d5f87d10497f09b84..7cb176df8da5f0b1af6c598326cd64f8661d29c0 100644 (file)
@@ -1,5 +1,5 @@
 #LyX file created by tex2lyx 2.2
-\lyxformat 480
+\lyxformat 481
 \begin_document
 \begin_header
 \textclass article
index 1be3357997fa0d37464a8ad116cd3cce810fb38d..08c00977678dd8352a2700fee8e5d1142c60ef27 100644 (file)
@@ -1,5 +1,5 @@
 #LyX file created by tex2lyx 2.2
-\lyxformat 480
+\lyxformat 481
 \begin_document
 \begin_header
 \textclass article
index 7b2c177c1e63472e864ea99dd7ac16a3c8e75607..94dca029cea81082e765fcab8b36b2a5e1320f2a 100644 (file)
@@ -1,5 +1,5 @@
 #LyX file created by tex2lyx 2.2
-\lyxformat 480
+\lyxformat 481
 \begin_document
 \begin_header
 \textclass article
index f2fe12def5dbe36ad70bfca283d103a82ed923e0..ce0443b7aa3c218003f8b0c2b9d10c0b9b4044bf 100644 (file)
@@ -2387,9 +2387,22 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                else if (t.cat() == catOther ||
                               t.cat() == catAlign ||
                               t.cat() == catParameter) {
-                       // This translates "&" to "\\&" which may be wrong...
                        context.check_layout(os);
-                       os << t.cs();
+                       if (t.asInput() == "-" && p.next_token().asInput() == "-" &&
+                           context.merging_hyphens_allowed &&
+                           context.font.family != "ttfamily" &&
+                           !context.layout->pass_thru) {
+                               if (p.next_next_token().asInput() == "-") {
+                                       // --- is emdash
+                                       os << to_utf8(docstring(1, 0x2014));
+                                       p.get_token();
+                               } else
+                                       // -- is endash
+                                       os << to_utf8(docstring(1, 0x2013));
+                               p.get_token();
+                       } else
+                               // This translates "&" to "\\&" which may be wrong...
+                               os << t.cs();
                }
 
                else if (p.isParagraph()) {
@@ -3240,7 +3253,10 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer,
                else if (t.cs() == "textipa") {
                        context.check_layout(os);
                        begin_inset(os, "IPA\n");
+                       bool merging_hyphens_allowed = context.merging_hyphens_allowed;
+                       context.merging_hyphens_allowed = false;
                        parse_text_in_inset(p, os, FLAG_ITEM, outer, context);
+                       context.merging_hyphens_allowed = merging_hyphens_allowed;
                        end_inset(os);
                        preamble.registerAutomaticallyLoadedPackage("tipa");
                        preamble.registerAutomaticallyLoadedPackage("tipx");
index f29b0494c1645291e21d363120e8bbe077258772..d1c747612a09dc973b3bc7fdabf5dc12df3bb226 100644 (file)
@@ -36,8 +36,8 @@ extern char const * const lyx_version_info;
 
 // Do not remove the comment below, so we get merge conflict in
 // independent branches. Instead add your own.
-#define LYX_FORMAT_LYX 480 // spitz: question and question* environments
-#define LYX_FORMAT_TEX2LYX 480
+#define LYX_FORMAT_LYX 481 // gb: endash and emdash
+#define LYX_FORMAT_TEX2LYX 481
 
 #if LYX_FORMAT_TEX2LYX != LYX_FORMAT_LYX
 #ifndef _MSC_VER