From 14a5d07df69bb493d8ec21947f177e3e62e1ffb1 Mon Sep 17 00:00:00 2001 From: Georg Baum Date: Mon, 11 Nov 2013 21:52:14 +0100 Subject: [PATCH] Fix encoding for copying LaTeX from clipboard If we call tex2lyx on a temporary file created from the clipboard, the file is always in utf8 encoding, without any temporary changes, even if it contains encoding changing LaTeX commands. Therefore, we must tell tex2lyx to use a fixed utf8 encoding for the whole file, and this is done using the new latexclipboard format. Previously, tex2lyx thought the encoding was latin1. As a side effect, the -e option is now also documented in the man page. --- lib/configure.py | 2 ++ src/CutAndPaste.cpp | 2 +- src/tex2lyx/Parser.cpp | 21 ++++++++++++++++----- src/tex2lyx/Parser.h | 8 +++++--- src/tex2lyx/tex2lyx.1in | 15 +++++++++++++++ src/tex2lyx/tex2lyx.cpp | 25 +++++++++++++++++++++++-- 6 files changed, 62 insertions(+), 11 deletions(-) diff --git a/lib/configure.py b/lib/configure.py index 115fb34ffb..a641d43ce8 100644 --- a/lib/configure.py +++ b/lib/configure.py @@ -553,6 +553,7 @@ def checkFormatEntries(dtl_tools): \Format luatex tex "LaTeX (LuaTeX)" "" "" "%%" "document,menu=export" "" \Format pdflatex tex "LaTeX (pdflatex)" "" "" "%%" "document,menu=export" "" \Format xetex tex "LaTeX (XeTeX)" "" "" "%%" "document,menu=export" "" +\Format latexclipboard tex "LaTeX (clipboard)" "" "" "%%" "" "" \Format text txt "Plain text" a "" "%%" "document,menu=export" "text/plain" \Format text2 txt "Plain text (pstotext)" "" "" "%%" "document" "" \Format text3 txt "Plain text (ps2ascii)" "" "" "%%" "document" "" @@ -665,6 +666,7 @@ def checkConverterEntries(): path, t2l = checkProg('a LaTeX/Noweb -> LyX converter', [in_binary_subdir, in_binary_subdir + version_suffix, in_binary_dir, in_binary_dir + version_suffix, 'tex2lyx' + version_suffix, 'tex2lyx'], rc_entry = [r'''\converter latex lyx "%% -f $$i $$o" "" +\converter latexclipboard lyx "%% -fixedenc utf8 -f $$i $$o" "" \converter literate lyx "%% -n -m noweb -f $$i $$o" ""'''], not_found = 'tex2lyx') if path == '': logger.warning("Failed to find tex2lyx on your system.") diff --git a/src/CutAndPaste.cpp b/src/CutAndPaste.cpp index 874b3411a5..ca014be600 100644 --- a/src/CutAndPaste.cpp +++ b/src/CutAndPaste.cpp @@ -1096,7 +1096,7 @@ bool pasteClipboardText(Cursor & cur, ErrorList & errorList, bool asParagraphs, // Then try TeX and HTML Clipboard::TextType types[2] = {Clipboard::HtmlTextType, Clipboard::LaTeXTextType}; - string names[2] = {"html", "latex"}; + string names[2] = {"html", "latexclipboard"}; for (int i = 0; i < 2; ++i) { if (type != types[i] && type != Clipboard::AnyTextType) continue; diff --git a/src/tex2lyx/Parser.cpp b/src/tex2lyx/Parser.cpp index 1f9ae6c3c5..23a1bf1a01 100644 --- a/src/tex2lyx/Parser.cpp +++ b/src/tex2lyx/Parser.cpp @@ -154,10 +154,14 @@ iparserdocstream & iparserdocstream::get(char_type &c) // -Parser::Parser(idocstream & is) - : lineno_(0), pos_(0), iss_(0), is_(is), encoding_iconv_("UTF-8"), - theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES) +Parser::Parser(idocstream & is, std::string const & fixedenc) + : lineno_(0), pos_(0), iss_(0), is_(is), + encoding_iconv_(fixedenc.empty() ? "UTF-8" : fixedenc), + theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES), + fixed_enc_(!fixedenc.empty()) { + if (fixed_enc_) + is_.setEncoding(fixedenc); } @@ -165,7 +169,9 @@ Parser::Parser(string const & s) : lineno_(0), pos_(0), iss_(new idocstringstream(from_utf8(s))), is_(*iss_), encoding_iconv_("UTF-8"), - theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES) + theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES), + // An idocstringstream can not change the encoding + fixed_enc_(true) { } @@ -261,7 +267,12 @@ bool Parser::setEncoding(std::string const & e) { //cerr << "setting encoding to " << e << std::endl; encoding_iconv_ = e; - is_.setEncoding(e); + // If the encoding is fixed, we must not change the stream encoding + // (because the whole input uses that encoding, e.g. if it comes from + // the clipboard). We still need to track the original encoding in + // encoding_iconv_, so that the generated output is correct. + if (!fixed_enc_) + is_.setEncoding(e); return true; } diff --git a/src/tex2lyx/Parser.h b/src/tex2lyx/Parser.h index ca0a917f43..15832f9fa7 100644 --- a/src/tex2lyx/Parser.h +++ b/src/tex2lyx/Parser.h @@ -172,7 +172,7 @@ class Parser { Parser & operator=(Parser const & p); public: /// - Parser(idocstream & is); + Parser(idocstream & is, std::string const & fixedenc); /// Parser(std::string const & s); /// @@ -351,10 +351,12 @@ private: std::string encoding_iconv_; /// CatCode theCatcode_[256]; - // + /// cat_type theCatcodesType_; - // + /// cat_type curr_cat_; + /// + bool fixed_enc_; }; diff --git a/src/tex2lyx/tex2lyx.1in b/src/tex2lyx/tex2lyx.1in index c91560a262..3f219622a4 100644 --- a/src/tex2lyx/tex2lyx.1in +++ b/src/tex2lyx/tex2lyx.1in @@ -21,6 +21,8 @@ options. .PP \fBtex2lyx\fR [ \fB\-userdir\fR \fIuserdir\fR ] [ \fB\-systemdir\fR \fIsystemdir\fR ] [ \fB\-f\fR ] [ \fB\-n\fR ] [ \fB\-c\fR \fItextclass\fR ] +[ \fB\-e\fR \fIencoding\fR ] +[ \fB\-fixedenc\fR \fIencoding\fR ] [\ \fB\-m\fR \fImodule1\fR[,\fImodule2\fR...]] [\ \fB\-s\fR\ \fIsfile1\fR[,\fIsfile2\fR...]] [ \fB\-skipchildren\fR ] [ \fB\-roundtrip\fR ] [ \fB\-copyfiles\fR ] \fIinputfile\fR [ \fIoutputfile\fR ] @@ -53,6 +55,15 @@ are loaded in the given order. If a module \fIfoo\fR depends on a module Force. \fBtex2lyx\fR will not run if the .lyx file it would generate already exists. Use the \fB\-f\fR option (carefully) to clobber any existing files. .TP +.BI \-e +Specify the default encoding using the LaTeX name as defined in the encodings +file. \fBtex2lyx\fR will use this encoding, but switch if it finds any encoding +changing commands in the input. +.TP +.BI \-fixedenc +Specify the encoding using the LaTeX name as defined in the encodings file. +\fBtex2lyx\fR will ignore any encoding changing commands in the input. +.TP .BI \-n Noweb. Translate a noweb (aka literate programming) file. This should be (almost?) equivalent to running \*[lq]noweb2lyx foo.tex foo.lyx\*[rq]. This option @@ -419,10 +430,14 @@ is your personal LyX directory, then the following files are read by tex2lyx: User's personal layout files for document classes .IP "\fI\s-1MY_LYXDIR\s0\fR/syntax.default" 4 User's personal syntax file +.IP "\fI\s-1MY_LYXDIR\s0\fR/encodings" 4 +User's personal encoding definition file .IP "\fI\s-1LIBDIR\s0\fR/layouts/*.layout" 4 System-wide layout files for document classes .IP "\fI\s-1LIBDIR\s0\fR/lib/syntax.default" 4 System-wide LaTeX syntax file +.IP "\fI\s-1LIBDIR\s0\fR/lib/encodings" 4 +System-wide encoding definition file .SH "SEE ALSO" \fIlyx@version_suffix@\fR\|(1), \fIlatex\fR\|(1) .SH "AUTHORS" diff --git a/src/tex2lyx/tex2lyx.cpp b/src/tex2lyx/tex2lyx.cpp index 8772d6bea7..9e3257cf73 100644 --- a/src/tex2lyx/tex2lyx.cpp +++ b/src/tex2lyx/tex2lyx.cpp @@ -494,7 +494,7 @@ void read_syntaxfile(FileName const & file_name) // modeled after TeX. // Unknown tokens are just silently ignored, this helps us to skip some // reLyX specific things. - Parser p(is); + Parser p(is, string()); while (p.good()) { Token const & t = p.get_token(); if (t.cat() == catEscape) { @@ -519,6 +519,7 @@ void read_syntaxfile(FileName const & file_name) string documentclass; string default_encoding; +bool fixed_encoding = false; string syntaxfile; bool copy_files = false; bool overwrite_files = false; @@ -537,6 +538,7 @@ int parse_help(string const &, string const &) "\t-m mod1[,mod2...] Load the given modules.\n" "\t-copyfiles Copy all included files to the directory of outfile.lyx.\n" "\t-e encoding Set the default encoding (latex name).\n" + "\t-fixedenc encoding Like -e, but ignore encoding changing commands while parsing.\n" "\t-f Force overwrite of .lyx files.\n" "\t-help Print this message and quit.\n" "\t-n translate literate programming (noweb, sweave,... ) file.\n" @@ -605,6 +607,16 @@ int parse_encoding(string const & arg, string const &) } +int parse_fixed_encoding(string const & arg, string const &) +{ + if (arg.empty()) + error_message("Missing encoding string after -fixedenc switch"); + default_encoding = arg; + fixed_encoding = true; + return 1; +} + + int parse_syntaxfile(string const & arg, string const &) { if (arg.empty()) @@ -686,6 +698,7 @@ void easyParse(int & argc, char * argv[]) cmdmap["-c"] = parse_class; cmdmap["-m"] = parse_module; cmdmap["-e"] = parse_encoding; + cmdmap["-fixedenc"] = parse_fixed_encoding; cmdmap["-f"] = parse_force; cmdmap["-s"] = parse_syntaxfile; cmdmap["-n"] = parse_noweb; @@ -775,6 +788,14 @@ bool roundtripMode() } +string fixedEncoding() +{ + if (fixed_encoding) + return default_encoding; + return ""; +} + + namespace { /*! @@ -803,7 +824,7 @@ bool tex2lyx(idocstream & is, ostream & os, string encoding) } } - Parser p(is); + Parser p(is, fixed_encoding ? default_encoding : string()); p.setEncoding(encoding); //p.dump(); -- 2.39.2