From 552da336ac9ef253edd0716c02f070d95755d479 Mon Sep 17 00:00:00 2001 From: Jean-Marc Lasgouttes Date: Sun, 16 Nov 2008 23:24:56 +0000 Subject: [PATCH] Now tex2lyx is able to parse stuff that is in unicodesymbols file, as well as accents. Time will tell how robust this is. All support for InsetLatexAccent has been removed. I did not do the support for \inputencoding yet, because it is more difficult to get right... git-svn-id: svn://svn.lyx.org/lyx/lyx-devel/trunk@27596 a592a061-630c-0410-9148-cb99ea01b6c8 --- src/tex2lyx/test/test.ltx | 6 +++ src/tex2lyx/text.cpp | 85 +++++++++++++++++---------------------- 2 files changed, 43 insertions(+), 48 deletions(-) diff --git a/src/tex2lyx/test/test.ltx b/src/tex2lyx/test/test.ltx index c94df237b9..fe37fb5ebe 100644 --- a/src/tex2lyx/test/test.ltx +++ b/src/tex2lyx/test/test.ltx @@ -75,6 +75,12 @@ foo & bar \\ bar & foo \end{tabular} +Let's try a few unicode characters: the (R) symbol \textregistered +(and the same one with braces \textregistered{} and a space after) or +maybe an accented a \'{a} or this one \'a or this \^\i. + +Watch out: \textregistered should be glued to its successor here. + Final Text. \end{document} diff --git a/src/tex2lyx/text.cpp b/src/tex2lyx/text.cpp index 3d56a919f2..a34ccc448a 100644 --- a/src/tex2lyx/text.cpp +++ b/src/tex2lyx/text.cpp @@ -17,6 +17,7 @@ #include "tex2lyx.h" #include "Context.h" +#include "Encoding.h" #include "FloatList.h" #include "Layout.h" #include "Length.h" @@ -2239,21 +2240,6 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, handle_ert(os, oss.str(), context); } -#if 0 -//FIXME: rewrite this - else if (t.cs() == "\"") { - context.check_layout(os); - string const name = p.verbatim_item(); - if (name == "a") os << '\xe4'; - else if (name == "o") os << '\xf6'; - else if (name == "u") os << '\xfc'; - else if (name == "A") os << '\xc4'; - else if (name == "O") os << '\xd6'; - else if (name == "U") os << '\xdc'; - else handle_ert(os, "\"{" + name + "}", context); - } -#endif - // Problem: \= creates a tabstop inside the tabbing environment // and else an accent. In the latter case we really would want // \={o} instead of \= o. @@ -2264,33 +2250,22 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, || t.cs() == "'" || t.cs() == "`" || t.cs() == "~" || t.cs() == "." || t.cs() == "=") { // we need the trim as the LyX parser chokes on such spaces - // The argument of InsetLatexAccent is parsed as a - // subset of LaTeX, so don't parse anything here, - // but use the raw argument. - // Otherwise we would convert \~{\i} wrongly. - // This will of course not translate \~{\ss} to \~{ß}, - // but that does at least compile and does only look - // strange on screen. - context.check_layout(os); - os << "\\i \\" << t.cs() << "{" - << trim(p.verbatim_item(), " ") - << "}\n"; - } - -#if 0 -//FIXME: rewrite this - else if (t.cs() == "ss") { - context.check_layout(os); - os << "\xdf"; - skip_braces(p); // eat {} - } -#endif - - else if (t.cs() == "i" || t.cs() == "j" || t.cs() == "l" || - t.cs() == "L") { context.check_layout(os); - os << "\\i \\" << t.cs() << "{}\n"; - skip_braces(p); // eat {} + // try to see whether the string is in unicodesymbols + docstring rem; + string command = t.asInput() + "{" + + trim(p.verbatim_item()) + + "}"; + docstring s = encodings.fromLaTeXCommand(from_utf8(command), rem); + if (!s.empty()) { + if (!rem.empty()) + cerr << "When parsing " << command + << ", result is " << to_utf8(s) + << "+" << to_utf8(rem) << endl; + os << to_utf8(s); + } else + // we did not find a non-ert version + handle_ert(os, command, context); } else if (t.cs() == "\\") { @@ -2544,6 +2519,18 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, } else { + // try to see whether the string is in unicodesymbols + docstring rem; + docstring s = encodings.fromLaTeXCommand(from_utf8(t.asInput()), rem); + if (!s.empty()) { + if (!rem.empty()) + cerr << "When parsing " << t.cs() + << ", result is " << to_utf8(s) + << "+" << to_utf8(rem) << endl; + context.check_layout(os); + os << to_utf8(s); + skip_braces(p); // eat {} + } //cerr << "#: " << t << " mode: " << mode << endl; // heuristic: read up to next non-nested space /* @@ -2557,14 +2544,16 @@ void parse_text(Parser & p, ostream & os, unsigned flags, bool outer, cerr << "found ERT: " << s << endl; handle_ert(os, s + ' ', context); */ - string name = t.asInput(); - if (p.next_token().asInput() == "*") { - // Starred commands like \vspace*{} - p.get_token(); // Eat '*' - name += '*'; + else { + string name = t.asInput(); + if (p.next_token().asInput() == "*") { + // Starred commands like \vspace*{} + p.get_token(); // Eat '*' + name += '*'; + } + if (!parse_command(name, p, os, outer, context)) + handle_ert(os, name, context); } - if (! parse_command(name, p, os, outer, context)) - handle_ert(os, name, context); } if (flags & FLAG_LEAVE) { -- 2.39.2