int const max_count = 50;
int count = 0;
+ // Maximum number of binary chars allowed for latex detection
+ int const max_bin = 5;
+
string str;
string format;
bool firstLine = true;
bool backslash = false;
+ bool maybelatex = false;
+ int binchars = 0;
int dollars = 0;
- while ((count++ < max_count) && format.empty()) {
+ while ((count++ < max_count) && format.empty() && binchars <= max_bin) {
if (ifs.eof())
break;
contains(str, "$$") ||
contains(str, "\\[") ||
contains(str, "\\]"))
- format = "latex";
+ maybelatex = true;
else {
if (contains(str, '\\'))
backslash = true;
dollars += count_char(str, '$');
+ if (backslash && dollars > 1)
+ // inline equation
+ maybelatex = true;
}
+
+ binchars += count_bin_chars(str);
}
- if (format.empty() && backslash && dollars > 1)
- // inline equation
+ if (format.empty() && binchars <= max_bin && maybelatex)
format = "latex";
if (format.empty()) {
}
+int count_bin_chars(string const & str)
+{
+ QString const qstr = toqstr(str).simplified();
+ int count = 0;
+ QString::const_iterator cit = qstr.begin();
+ QString::const_iterator end = qstr.end();
+ for (; cit != end; ++cit) {
+ switch (cit->category()) {
+ case QChar::Separator_Line:
+ case QChar::Separator_Paragraph:
+ case QChar::Other_Control:
+ case QChar::Other_Format:
+ case QChar::Other_Surrogate:
+ case QChar::Other_PrivateUse:
+ case QChar::Other_NotAssigned:
+ ++count;
+ break;
+ default:
+ break;
+ }
+ }
+ return count;
+}
+
+
docstring const trim(docstring const & a, char const * p)
{
LASSERT(p, return a);
/// Count all occurences of char \a chr inside \a str
int count_char(docstring const & str, docstring::value_type chr);
+/** Count all occurences of binary chars inside \a str.
+ It is assumed that \a str is utf-8 encoded and that a binary char
+ belongs to the unicode class names Zl, Zp, Cc, Cf, Cs, Co, or Cn
+ (excluding white space characters such as '\t', '\n', '\v', '\f', '\r').
+ See http://www.unicode.org/Public/6.2.0/ucd/UnicodeData.txt
+*/
+int count_bin_chars(std::string const & str);
+
/** Trims characters off the end and beginning of a string.
\code
trim("ccabccc", "c") == "ab".