\end_layout
\begin_layout Description
-Allowbreak:
+allowbreak:
\bar under
\begin_inset Box Boxed
\end_layout
+\begin_layout Description
+baselineskip%:
+\begin_inset Box Boxed
+position "t"
+hor_pos "c"
+has_inner_box 1
+inner_pos "t"
+use_parbox 1
+use_makebox 0
+width "250baselineskip%"
+special "none"
+height "50baselineskip%"
+height_special "none"
+thickness "4baselineskip%"
+separation "9baselineskip%"
+shadowsize "4pt"
+framecolor "black"
+backgroundcolor "none"
+status open
+
+\begin_layout Plain Layout
+test
+\end_layout
+
+\end_inset
+
+
+\begin_inset CommandInset line
+LatexCommand rule
+offset "40baselineskip%"
+width "800baselineskip%"
+height "5.3baselineskip%"
+
+\end_inset
+
+
+\end_layout
+
+\begin_deeper
+\begin_layout Standard
+\begin_inset VSpace 200baselineskip%
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+Vertical space above this paragraph is 2·baselineskip.
+\end_layout
+
+\begin_layout Standard
+\begin_inset space \hspace*{}
+\length 75.2baselineskip%
+\end_inset
+
+Paragraph with
+\begin_inset space \hspace{}
+\length 135baselineskip%
+\end_inset
+
+horizontal space insets using baselineskip.
+\end_layout
+
+\end_deeper
\end_body
\end_document
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
'''
-This module offers several free functions to help with lyx2lyx'ing.
-More documentaton is below, but here is a quick guide to what
+This module offers several free functions to help with lyx2lyx'ing.
+More documentaton is below, but here is a quick guide to what
they do. Optional arguments are marked by brackets.
add_to_preamble(document, text):
default index is 0, so the material is inserted at the beginning.
Prepends a comment "% Added by lyx2lyx" to text.
-put_cmd_in_ert(arg):
- Here arg should be a list of strings (lines), which we want to
+put_cmd_in_ert(cmd):
+ Here cmd should be a list of strings (lines), which we want to
wrap in ERT. Returns a list of strings so wrapped.
A call to this routine will often go something like this:
i = find_token('\\begin_inset FunkyInset', ...)
from parser_tools import find_token, find_end_of_inset
from unicode_symbols import unicode_reps
-
# This will accept either a list of lines or a single line.
# It is bad practice to pass something with embedded newlines,
# though we will handle that.
# It should really be a list.
def insert_to_preamble(document, text, index = 0):
""" Insert text to the preamble at a given line"""
-
+
if not type(text) is list:
# split on \n just in case
# it'll give us the one element list we want
# if there's no \n, too
text = text.split('\n')
-
+
text.insert(0, "% Added by lyx2lyx")
document.preamble[index:index] = text
-def put_cmd_in_ert(arg):
- '''
- arg should be a list of lines we want to wrap in ERT.
- Returns a list of strings, with the lines so wrapped.
- '''
-
+# A dictionary of Unicode->LICR mappings for use in a Unicode string's translate() method
+# Created from the reversed list to keep the first of alternative definitions.
+licr_table = dict((ord(ch), cmd) for cmd, ch in unicode_reps[::-1])
+
+def put_cmd_in_ert(cmd):
+ """
+ Return ERT inset wrapping `cmd` as a list of strings.
+
+ `cmd` can be a string or list of lines. Non-ASCII characters are converted
+ to the respective LICR macros if defined in unicodesymbols.
+ """
ret = ["\\begin_inset ERT", "status collapsed", "", "\\begin_layout Plain Layout", ""]
- # It will be faster for us to work with a single string internally.
- # That way, we only go through the unicode_reps loop once.
- if type(arg) is list:
- s = "\n".join(arg)
+ # It will be faster to work with a single string internally.
+ if isinstance(cmd, list):
+ cmd = u"\n".join(cmd)
else:
- s = arg
- for rep in unicode_reps:
- s = s.replace(rep[1], rep[0])
- s = s.replace('\\', "\\backslash\n")
- ret += s.splitlines()
+ cmd = u"%s" % cmd # ensure it is an unicode instance
+ cmd = cmd.translate(licr_table)
+ cmd = cmd.replace("\\", "\\backslash\n")
+ ret += cmd.splitlines()
ret += ["\\end_layout", "", "\\end_inset"]
return ret
def latex_length(slen):
- '''
+ '''
Convert lengths to their LaTeX representation. Returns (bool, length),
where the bool tells us if it was a percentage, and the length is the
LaTeX representation.
# the + always precedes the -
# Convert relative lengths to LaTeX units
- units = {"text%":"\\textwidth", "col%":"\\columnwidth",
- "page%":"\\paperwidth", "line%":"\\linewidth",
- "theight%":"\\textheight", "pheight%":"\\paperheight"}
+ units = {"col%": "\\columnwidth",
+ "text%": "\\textwidth",
+ "page%": "\\paperwidth",
+ "line%": "\\linewidth",
+ "theight%": "\\textheight",
+ "pheight%": "\\paperheight",
+ "baselineskip%": "\\baselineskip"
+ }
for unit in list(units.keys()):
i = slen.find(unit)
if i == -1:
import sys, os
from parser_tools import find_token, find_end_of, find_tokens, get_value
-from unicode_symbols import read_unicodesymbols
+from unicode_symbols import unicode_reps
####################################################################
# Private helper functions
return l
-# FIXME: Remove this function if the version imported from unicode_symbols works.
-# This function was the predecessor from that function, that in the meanwhile got
-# new fixes.
-def read_unicodesymbols2():
- " Read the unicodesymbols list of unicode characters and corresponding commands."
-
- # Provide support for both python 2 and 3
- PY2 = sys.version_info[0] == 2
- if not PY2:
- unichr = chr
- # End of code to support for both python 2 and 3
-
- pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
- fp = open(os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols'))
- spec_chars = []
- # Two backslashes, followed by some non-word character, and then a character
- # in brackets. The idea is to check for constructs like: \"{u}, which is how
- # they are written in the unicodesymbols file; but they can also be written
- # as: \"u or even \" u.
- r = re.compile(r'\\\\(\W)\{(\w)\}')
- for line in fp.readlines():
- if line[0] != '#' and line.strip() != "":
- line=line.replace(' "',' ') # remove all quotation marks with spaces before
- line=line.replace('" ',' ') # remove all quotation marks with spaces after
- line=line.replace(r'\"','"') # replace \" by " (for characters with diaeresis)
- try:
- [ucs4,command,dead] = line.split(None,2)
- if command[0:1] != "\\":
- continue
- spec_chars.append([command, unichr(eval(ucs4))])
- except:
- continue
- m = r.match(command)
- if m != None:
- command = "\\\\"
- # If the character is a double-quote, then we need to escape it, too,
- # since it is done that way in the LyX file.
- if m.group(1) == "\"":
- command += "\\"
- commandbl = command
- command += m.group(1) + m.group(2)
- commandbl += m.group(1) + ' ' + m.group(2)
- spec_chars.append([command, unichr(eval(ucs4))])
- spec_chars.append([commandbl, unichr(eval(ucs4))])
- fp.close()
- return spec_chars
-
-
def extract_argument(line):
'Extracts a LaTeX argument from the start of line. Returns (arg, rest).'
return retval
-unicode_reps = read_unicodesymbols()
-
#Bug 5022....
#Might should do latex2ert first, then deal with stuff that DOESN'T
#end up inside ERT. That routine could be modified so that it returned
if len(words) > 1 and words[0] == "\\begin_inset" and \
words[1] in ["CommandInset", "External", "Formula", "Graphics", "listings"]:
# must not replace anything in insets that store LaTeX contents in .lyx files
- # (math and command insets withut overridden read() and write() methods
+ # (math and command insets without overridden read() and write() methods)
j = find_end_of_inset(document.body, i)
if j == -1:
- document.warning("Malformed LyX document: Can't find end of Formula inset at line " + str(i))
+ document.warning("Malformed LyX document: Can't find end of inset at line " + str(i))
i += 1
else:
i = j
from parser_tools import (del_token, del_value, del_complete_lines,
find_complete_lines, find_end_of, find_end_of_layout, find_end_of_inset,
find_re, find_token, find_token_backwards, get_containing_inset,
- get_containing_layout, get_bool_value, get_value, get_quoted_value)
-# find_tokens, find_token_exact, is_in_inset,
-# check_token, get_option_value
+ get_containing_layout, get_bool_value, get_value, get_quoted_value,
+ is_in_inset)
+# find_tokens, find_token_exact, check_token, get_option_value
-from lyx2lyx_tools import add_to_preamble, put_cmd_in_ert, revert_font_attrs, \
- insert_to_preamble
-# get_ert, lyx2latex, \
-# lyx2verbatim, length_in_bp, convert_info_insets
-# latex_length, revert_flex_inset, hex2ratio, str2bool
+from lyx2lyx_tools import (add_to_preamble, put_cmd_in_ert, revert_font_attrs,
+ insert_to_preamble, latex_length)
+# get_ert, lyx2latex, lyx2verbatim, length_in_bp, convert_info_insets
+# revert_flex_inset, hex2ratio, str2bool
####################################################################
# Private helper functions
continue
if get_value(document.body, 'template', i, j) == "Date":
document.body[i : j + 1] = put_cmd_in_ert("\\today ")
- i += 1
- continue
+ i = j+1 # skip inset
def convert_inputenc(document):
" Replace no longer supported input encoding settings. "
- i = find_token(document.header, "\\inputenc", 0)
+ i = find_token(document.header, "\\inputenc")
if i == -1:
return
if get_value(document.header, "\\inputencoding", i) == "pt254":
while i+1 < len(lines):
i += 1
line = lines[i]
- # skip lines without any dashes:
+ # skip lines without dashes:
if not re.search(u"[\u2013\u2014]|\\twohyphens|\\threehyphens", line):
continue
# skip label width string (see bug 10243):
continue
# do not touch hyphens in some insets (cf. lyx_2_2.convert_dashes):
try:
- value, start, end = get_containing_inset(lines, i)
+ inset_type, start, end = get_containing_inset(lines, i)
except TypeError: # no containing inset
- value, start, end = "no inset", -1, -1
- if (value.split()[0] in
+ inset_type, start, end = "no inset", -1, -1
+ if (inset_type.split()[0] in
["CommandInset", "ERT", "External", "Formula",
"FormulaMacro", "Graphics", "IPA", "listings"]
- or value == "Flex Code"):
+ or inset_type == "Flex Code"):
i = end
continue
try:
- layout, start, end, j = get_containing_layout(lines, i)
+ layoutname, start, end, j = get_containing_layout(lines, i)
except TypeError: # no (or malformed) containing layout
document.warning("Malformed LyX document: "
"Can't find layout at line %d" % i)
continue
- if layout == "LyX-Code":
+ if layoutname == "LyX-Code":
i = end
continue
# literal dash followed by a word or no-break space:
- if re.search(u"[\u2013\u2014]([\w\u00A0]|$)", line,
- flags=re.UNICODE):
+ if re.search(u"[\u2013\u2014]([\w\u00A0]|$)",
+ line, flags=re.UNICODE):
has_literal_dashes = True
# ligature dash followed by word or no-break space on next line:
if (re.search(r"(\\twohyphens|\\threehyphens)", line) and
'"ligature" dashes.\n Line breaks may have changed. '
'See UserGuide chapter 3.9.1 for details.')
break
- if has_literal_dashes:
+
+ if has_literal_dashes and not has_ligature_dashes:
use_dash_ligatures = False
- elif has_ligature_dashes:
+ elif has_ligature_dashes and not has_literal_dashes:
use_dash_ligatures = True
+
# insert the setting if there is a preferred value
if use_dash_ligatures is not None:
- i = find_token(document.header, "\\graphics")
- document.header.insert(i, "\\use_dash_ligatures %s"
+ document.header.insert(-1, "\\use_dash_ligatures %s"
% str(use_dash_ligatures).lower())
def revert_baselineskip(document):
- " Revert baselineskips to TeX code "
- i = 0
- vspaceLine = 0
- hspaceLine = 0
- while True:
- regexp = re.compile(r'^.*baselineskip%.*$')
- i = find_re(document.body, regexp, i)
- if i == -1:
- return
- vspaceLine = find_token(document.body, "\\begin_inset VSpace", i)
- if vspaceLine == i:
- # output VSpace inset as TeX code
- # first read out the values
- beg = document.body[i].rfind("VSpace ");
- end = document.body[i].rfind("baselineskip%");
- baselineskip = float(document.body[i][beg + 7:end]);
- # we store the value in percent, thus divide by 100
- baselineskip = baselineskip/100;
- baselineskip = str(baselineskip);
- # check if it is the starred version
- if document.body[i].find('*') != -1:
- star = '*'
- else:
- star = ''
- # now output TeX code
- endInset = find_end_of_inset(document.body, i)
- if endInset == -1:
- document.warning("Malformed LyX document: Missing '\\end_inset' of VSpace inset.")
- return
- else:
- document.body[vspaceLine: endInset + 1] = put_cmd_in_ert("\\vspace" + star + '{' + baselineskip + "\\baselineskip}")
- hspaceLine = find_token(document.body, "\\begin_inset space \\hspace", i - 1)
- document.warning("hspaceLine: " + str(hspaceLine))
- document.warning("i: " + str(i))
- if hspaceLine == i - 1:
- # output space inset as TeX code
- # first read out the values
- beg = document.body[i].rfind("\\length ");
- end = document.body[i].rfind("baselineskip%");
- baselineskip = float(document.body[i][beg + 7:end]);
- document.warning("baselineskip: " + str(baselineskip))
- # we store the value in percent, thus divide by 100
- baselineskip = baselineskip/100;
- baselineskip = str(baselineskip);
- # check if it is the starred version
- if document.body[i-1].find('*') != -1:
- star = '*'
- else:
- star = ''
- # now output TeX code
- endInset = find_end_of_inset(document.body, i)
- if endInset == -1:
- document.warning("Malformed LyX document: Missing '\\end_inset' of space inset.")
- return
- else:
- document.body[hspaceLine: endInset + 1] = put_cmd_in_ert("\\hspace" + star + '{' + baselineskip + "\\baselineskip}")
-
- i = i + 1
+ " Revert baselineskips to TeX code "
+ i = 0
+ regexp = re.compile(r'.*baselineskip%.*')
+ while True:
+ i = i + 1
+ i = find_re(document.body, regexp, i)
+ if i == -1:
+ return
+ if document.body[i].startswith("\\begin_inset VSpace"):
+ # output VSpace inset as TeX code
+ end = find_end_of_inset(document.body, i)
+ if end == -1:
+ document.warning("Malformed LyX document: "
+ "Can't find end of VSpace inset at line %d." % i)
+ continue
+ # read out the value
+ baselineskip = document.body[i].split()[-1]
+ # check if it is the starred version
+ star = '*' if '*' in document.body[i] else ''
+ # now output TeX code
+ cmd = "\\vspace%s{%s}" %(star, latex_length(baselineskip)[1])
+ document.body[i:end+1] = put_cmd_in_ert(cmd)
+ i += 8
+ continue
+ begin, end = is_in_inset(document.body, i, "\\begin_inset space \\hspace")
+ if begin != - 1:
+ # output space inset as TeX code
+ baselineskip = document.body[i].split()[-1]
+ star = '*' if '*' in document.body[i-1] else ''
+ cmd = "\\hspace%s{%s}" %(star, latex_length(baselineskip)[1])
+ document.body[begin:end+1] = put_cmd_in_ert(cmd)
def revert_rotfloat(document):
More documentaton is below, but here is a quick guide to what
they do. Optional arguments are marked by brackets.
-find_token(lines, token, start[, end[, ignorews]]):
+find_token(lines, token[, start[, end[, ignorews]]]):
Returns the first line i, start <= i < end, on which
token is found at the beginning. Returns -1 if not
found.
in whitespace do not count, except that there must be no
extra whitespace following token itself.
-find_token_exact(lines, token, start[, end]):
+find_token_exact(lines, token[, start[, end]]]):
As find_token, but with ignorews set to True.
-find_tokens(lines, tokens, start[, end[, ignorews]]):
+find_tokens(lines, tokens[, start[, end[, ignorews]]]):
Returns the first line i, start <= i < end, on which
one of the tokens in tokens is found at the beginning.
Returns -1 if not found.
in whitespace do not count, except that there must be no
extra whitespace following token itself.
-find_tokens_exact(lines, token, start[, end]):
+find_tokens_exact(lines, token[, start[, end]]):
As find_tokens, but with ignorews True.
find_token_backwards(lines, token, start):
is_in_inset(document.body, i, "\\begin_inset Tabular")
returns (-1,-1) if `i` is not within a "Tabular" inset (i.e. a table).
If it is, then it returns the line on which the table begins and the one
- on which it ends. Note that this pair will evaulate to
- boolean True, so
+ on which it ends.
+ Note that this pair will evaulate to boolean True, so (with the optional
+ default value set to False)
if is_in_inset(..., default=False):
will do what you expect.
"""
--- /dev/null
+# This file is part of lyx2lyx
+# -*- coding: utf-8 -*-
+# Copyright (C) 2018 The LyX team
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+" This modules tests the auxiliary functions for lyx2lyx."
+
+from lyx2lyx_tools import *
+
+import unittest
+
+class TestParserTools(unittest.TestCase):
+
+ def test_put_cmd_in_ert(self):
+ ert = ['\\begin_inset ERT',
+ 'status collapsed',
+ '',
+ '\\begin_layout Plain Layout',
+ '',
+ u'\\backslash',
+ u'texttt{Gr\\backslash',
+ u'"{u}\\backslash',
+ u'ss{}e}',
+ '\\end_layout',
+ '',
+ '\\end_inset']
+ self.assertEqual(put_cmd_in_ert(u"\\texttt{Grüße}"), ert)
+ self.assertEqual(put_cmd_in_ert([u"\\texttt{Grüße}"]), ert)
+
+ def test_latex_length(self):
+ self.assertEqual(latex_length("-30.5col%"), (True, "-0.305\\columnwidth"))
+ self.assertEqual(latex_length("35baselineskip%"), (True, "0.35\\baselineskip"))
+ self.assertEqual(latex_length("11em"), (False, "11em"))
+ self.assertEqual(latex_length("-0.4pt"), (False, "-0.4pt"))
+
+
+
+if __name__ == '__main__':
+ unittest.main()
" Import unicode_reps from this module for access to the unicode<->LaTeX mapping. "
-import sys, os, re
+import sys, os, re, codecs
# Provide support for both python 2 and 3
PY2 = sys.version_info[0] == 2
def read_unicodesymbols():
" Read the unicodesymbols list of unicode characters and corresponding commands."
- pathname = os.path.abspath(os.path.dirname(sys.argv[0]))
+ pathname = os.path.abspath(os.path.dirname(__file__))
filename = os.path.join(pathname.strip('lyx2lyx'), 'unicodesymbols')
- # For python 3+ we have to specify the encoding for those systems
- # where the default is not UTF-8
- fp = open(filename, encoding="utf8") if (not PY2) else open(filename)
+ # Read as Unicode strings in both, Python 2 and 3
+ # Specify the encoding for those systems where the default is not UTF-8
+ fp = codecs.open(filename, encoding="utf8")
- spec_chars = []
# A backslash, followed by some non-word character, and then a character
# in brackets. The idea is to check for constructs like: \"{u}, which is how
# they are written in the unicodesymbols file; but they can also be written
# The two backslashes in the string literal are needed to specify a literal
# backslash in the regex. Without r prefix, these would be four backslashes.
r = re.compile(r'\\(\W)\{(\w)\}')
+
+ spec_chars = []
for line in fp.readlines():
- if line[0] != '#' and line.strip() != "":
- # Note: backslashes in the string literals with r prefix are not escaped,
- # so one backslash in the source file equals one backslash in memory.
- # Without r prefix backslahses are escaped, so two backslashes in the
- # source file equal one backslash in memory.
- line=line.replace(' "',' ') # remove all quotation marks with spaces before
- line=line.replace('" ',' ') # remove all quotation marks with spaces after
- line=line.replace(r'\"','"') # unescape "
- line=line.replace(r'\\','\\') # unescape \
- try:
- [ucs4,command,dead] = line.split(None,2)
- if command[0:1] != "\\":
- continue
- if (line.find("notermination=text") < 0 and
- line.find("notermination=both") < 0 and command[-1] != "}"):
- command = command + "{}"
- spec_chars.append([command, unichr(eval(ucs4))])
- except:
+ if not line.strip() or line.startswith('#'):
+ # skip empty lines and comments
+ continue
+ # Note: backslashes in the string literals with r prefix are not escaped,
+ # so one backslash in the source file equals one backslash in memory.
+ # Without r prefix backslahses are escaped, so two backslashes in the
+ # source file equal one backslash in memory.
+ line=line.replace(' "',' ') # remove all quotation marks with spaces before
+ line=line.replace('" ',' ') # remove all quotation marks with spaces after
+ line=line.replace(r'\"','"') # unescape "
+ line=line.replace(r'\\','\\') # unescape \
+ try:
+ [ucs4,command,dead] = line.split(None,2)
+ if command[0:1] != "\\":
continue
- m = r.match(command)
- if m != None:
- command = "\\"
- commandbl = command
- command += m.group(1) + m.group(2)
- commandbl += m.group(1) + ' ' + m.group(2)
- spec_chars.append([command, unichr(eval(ucs4))])
- spec_chars.append([commandbl, unichr(eval(ucs4))])
+ literal_char = unichr(int(ucs4, 16))
+ if (line.find("notermination=text") < 0 and
+ line.find("notermination=both") < 0 and command[-1] != "}"):
+ command = command + "{}"
+ spec_chars.append([command, literal_char])
+ except:
+ continue
+ m = r.match(command)
+ if m != None:
+ command = "\\"
+ commandbl = command
+ command += m.group(1) + m.group(2)
+ commandbl += m.group(1) + ' ' + m.group(2)
+ spec_chars.append([command, literal_char])
+ spec_chars.append([commandbl, literal_char])
fp.close()
return spec_chars
unicode_reps = read_unicodesymbols()
+