3 * This file is part of LyX, the document processor.
4 * Licence details can be found in the file COPYING.
8 * Full author contact details are available in file CREDITS.
11 // This file contains most of the magic that extracts "context
12 // information" from the unstructered layout-oriented stuff in
17 #include "MathExtern.h"
19 #include "InsetMathAMSArray.h"
20 #include "InsetMathArray.h"
21 #include "InsetMathChar.h"
22 #include "InsetMathDelim.h"
23 #include "InsetMathDiff.h"
24 #include "InsetMathExFunc.h"
25 #include "InsetMathExInt.h"
26 #include "InsetMathFont.h"
27 #include "InsetMathFrac.h"
28 #include "InsetMathLim.h"
29 #include "InsetMathMatrix.h"
30 #include "InsetMathNumber.h"
31 #include "InsetMathScript.h"
32 #include "InsetMathString.h"
33 #include "InsetMathSymbol.h"
35 #include "MathParser.h"
36 #include "MathStream.h"
40 #include "support/debug.h"
41 #include "support/docstream.h"
42 #include "support/FileName.h"
43 #include "support/filetools.h"
44 #include "support/gettext.h"
45 #include "support/lstrings.h"
46 #include "support/TempFile.h"
47 #include "support/textutils.h"
48 #include "support/unique_ptr.h"
56 using namespace lyx::support;
72 static char const * function_names[] = {
73 "arccos", "arcsin", "arctan", "arg", "bmod",
74 "cos", "cosh", "cot", "coth", "csc", "deg",
75 "det", "dim", "exp", "gcd", "hom", "inf", "ker",
76 "lg", "lim", "liminf", "limsup", "ln", "log",
77 "max", "min", "sec", "sin", "sinh", "sup",
78 "tan", "tanh", "Pr", nullptr
81 static size_t const npos = lyx::docstring::npos;
83 // define a function for tests
84 typedef bool TestItemFunc(MathAtom const &);
86 // define a function for replacing subexpressions
87 typedef MathAtom ReplaceArgumentFunc(const MathData & ar);
91 // try to extract a super/subscript
92 // modify iterator position to point behind the thing
93 bool extractScript(MathData & ar,
94 MathData::iterator & pos, MathData::iterator last, bool superscript)
96 // nothing to get here
100 // is this a scriptinset?
101 if (!(*pos)->asScriptInset())
104 // do we want superscripts only?
105 if (superscript && !(*pos)->asScriptInset()->hasUp())
108 // it is a scriptinset, use it.
115 // try to extract an "argument" to some function.
116 // returns position behind the argument
117 MathData::iterator extractArgument(MathData & ar,
118 MathData::iterator pos, MathData::iterator last,
119 ExternalMath kind, bool function = false)
121 // nothing to get here
125 // something delimited _is_ an argument
126 if ((*pos)->asDelimInset()) {
127 // leave out delimiters if this is a function argument
128 // unless we are doing MathML, in which case we do want
130 if (function && kind != MATHML && kind != HTML) {
131 MathData const & arg = (*pos)->asDelimInset()->cell(0);
132 MathData::const_iterator cur = arg.begin();
133 MathData::const_iterator end = arg.end();
135 ar.push_back(*cur++);
141 // if there's one, get following superscript only if this
142 // isn't a function argument
144 extractScript(ar, pos, last, true);
148 // always take the first thing, no matter what it is
151 // go ahead if possible
156 // if the next item is a super/subscript, it most certainly belongs
157 // to the thing we have
158 extractScript(ar, pos, last, false);
162 // but it might be more than that.
163 // FIXME: not implemented
164 //for (MathData::iterator it = pos + 1; it != last; ++it) {
165 // // always take the first thing, no matter
167 // ar.push_back(*it);
175 // returns sequence of char with same code starting at it up to end
176 // it might be less, though...
177 docstring charSequence
178 (MathData::const_iterator it, MathData::const_iterator end)
181 for (; it != end && (*it)->asCharInset(); ++it)
182 s += (*it)->getChar();
187 void extractStrings(MathData & ar)
189 //lyxerr << "\nStrings from: " << ar << endl;
190 for (size_t i = 0; i < ar.size(); ++i) {
191 if (!ar[i]->asCharInset())
193 docstring s = charSequence(ar.begin() + i, ar.end());
194 ar[i] = MathAtom(new InsetMathString(s));
195 ar.erase(i + 1, i + s.size());
197 //lyxerr << "\nStrings to: " << ar << endl;
201 void extractMatrices(MathData & ar)
203 //lyxerr << "\nMatrices from: " << ar << endl;
204 // first pass for explicitly delimited stuff
205 for (size_t i = 0; i < ar.size(); ++i) {
206 InsetMathDelim const * const inset = ar[i]->asDelimInset();
209 MathData const & arr = inset->cell(0);
212 if (!arr.front()->asGridInset())
214 ar[i] = MathAtom(new InsetMathMatrix(*(arr.front()->asGridInset()),
215 inset->left_, inset->right_));
218 // second pass for AMS "pmatrix" etc
219 for (size_t i = 0; i < ar.size(); ++i) {
220 InsetMathAMSArray const * const inset = ar[i]->asAMSArrayInset();
222 string left = inset->name_left();
225 string right = inset->name_right();
228 ar[i] = MathAtom(new InsetMathMatrix(*inset, from_ascii(left), from_ascii(right)));
231 //lyxerr << "\nMatrices to: " << ar << endl;
235 // convert this inset somehow to a string
236 bool extractString(MathAtom const & at, docstring & str)
239 str = docstring(1, at->getChar());
242 if (at->asStringInset()) {
243 str = at->asStringInset()->str();
250 // is this a known function?
251 bool isKnownFunction(docstring const & str)
253 for (int i = 0; function_names[i]; ++i) {
254 if (str == function_names[i])
261 // extract a function name from this inset
262 bool extractFunctionName(MathAtom const & at, docstring & str)
264 if (at->asSymbolInset()) {
265 str = at->asSymbolInset()->name();
266 return isKnownFunction(str);
268 if (at->asUnknownInset()) {
269 // assume it is well known...
273 if (at->asFontInset() && at->name() == "mathrm") {
274 // assume it is well known...
275 MathData const & ar = at->asFontInset()->cell(0);
276 str = charSequence(ar.begin(), ar.end());
277 return ar.size() == str.size();
283 bool testString(MathAtom const & at, docstring const & str)
286 return extractString(at, s) && str == s;
290 bool testString(MathAtom const & at, char const * const str)
292 return testString(at, from_ascii(str));
296 bool testSymbol(MathAtom const & at, docstring const & name)
298 return at->asSymbolInset() && at->asSymbolInset()->name() == name;
302 bool testSymbol(MathAtom const & at, char const * const name)
304 return testSymbol(at, from_ascii(name));
308 // search end of nested sequence
309 MathData::iterator endNestSearch(
310 MathData::iterator it,
311 const MathData::iterator& last,
312 TestItemFunc testOpen,
313 TestItemFunc testClose
316 for (int level = 0; it != last; ++it) {
328 // replace nested sequences by a real Insets
331 TestItemFunc testOpen,
332 TestItemFunc testClose,
333 ReplaceArgumentFunc replaceArg)
335 Buffer * buf = ar.buffer();
336 // use indices rather than iterators for the loop because we are going
337 // to modify the array.
338 for (size_t i = 0; i < ar.size(); ++i) {
339 // check whether this is the begin of the sequence
340 if (!testOpen(ar[i]))
343 // search end of sequence
344 MathData::iterator it = ar.begin() + i;
345 MathData::iterator jt = endNestSearch(it, ar.end(), testOpen, testClose);
349 // replace the original stuff by the new inset
350 ar[i] = replaceArg(MathData(buf, it + 1, jt));
351 ar.erase(it + 1, jt + 1);
358 // split scripts into separate super- and subscript insets. sub goes in
362 void splitScripts(MathData & ar)
364 Buffer * buf = ar.buffer();
365 //lyxerr << "\nScripts from: " << ar << endl;
366 for (size_t i = 0; i < ar.size(); ++i) {
367 InsetMathScript const * script = ar[i]->asScriptInset();
369 // is this a script inset and do we also have a superscript?
370 if (!script || !script->hasUp())
373 // we must have a nucleus if we only have a superscript
374 if (!script->hasDown() && script->nuc().empty())
377 if (script->nuc().size() == 1) {
378 // leave alone sums and integrals
379 InsetMathSymbol const * sym =
380 script->nuc().front()->asSymbolInset();
381 if (sym && (sym->name() == "sum" || sym->name() == "int"))
385 // create extra script inset and move superscript over
386 InsetMathScript * p = ar[i].nucleus()->asScriptInset();
387 auto q = make_unique<InsetMathScript>(buf, true);
388 swap(q->up(), p->up());
389 p->removeScript(true);
391 // if we don't have a subscript, get rid of the ScriptInset
392 if (!script->hasDown()) {
393 MathData arg(p->nuc());
394 MathData::const_iterator it = arg.begin();
395 MathData::const_iterator et = arg.end();
398 ar.insert(i++, *it++);
402 // insert new inset behind
403 ar.insert(i, MathAtom(q.release()));
405 //lyxerr << "\nScripts to: " << ar << endl;
413 void extractExps(MathData & ar)
415 Buffer * buf = ar.buffer();
416 //lyxerr << "\nExps from: " << ar << endl;
417 for (size_t i = 0; i + 1 < ar.size(); ++i) {
419 if (ar[i]->getChar() != 'e')
422 // we need an exponent but no subscript
423 InsetMathScript const * sup = ar[i + 1]->asScriptInset();
424 if (!sup || sup->hasDown())
427 // create a proper exp-inset as replacement
428 ar[i] = MathAtom(new InsetMathExFunc(buf, from_ascii("exp"), sup->cell(1)));
431 //lyxerr << "\nExps to: " << ar << endl;
436 // extract det(...) from |matrix|
438 void extractDets(MathData & ar)
440 Buffer * buf = ar.buffer();
441 //lyxerr << "\ndet from: " << ar << endl;
442 for (MathData::iterator it = ar.begin(); it != ar.end(); ++it) {
443 InsetMathDelim const * del = (*it)->asDelimInset();
448 *it = MathAtom(new InsetMathExFunc(buf, from_ascii("det"), del->cell(0)));
450 //lyxerr << "\ndet to: " << ar << endl;
458 bool isDigitOrSimilar(char_type c)
460 return ('0' <= c && c <= '9') || c == '.';
464 // returns sequence of digits
465 docstring digitSequence
466 (MathData::const_iterator it, MathData::const_iterator end)
469 for (; it != end && (*it)->asCharInset(); ++it) {
470 if (!isDigitOrSimilar((*it)->getChar()))
472 s += (*it)->getChar();
478 void extractNumbers(MathData & ar)
480 //lyxerr << "\nNumbers from: " << ar << endl;
481 for (size_t i = 0; i < ar.size(); ++i) {
482 if (!ar[i]->asCharInset())
484 if (!isDigitOrSimilar(ar[i]->asCharInset()->getChar()))
487 docstring s = digitSequence(ar.begin() + i, ar.end());
489 ar[i] = MathAtom(new InsetMathNumber(s));
490 ar.erase(i + 1, i + s.size());
492 //lyxerr << "\nNumbers to: " << ar << endl;
501 bool testOpenParen(MathAtom const & at)
503 return testString(at, "(");
507 bool testCloseParen(MathAtom const & at)
509 return testString(at, ")");
513 MathAtom replaceParenDelims(const MathData & ar)
515 return MathAtom(new InsetMathDelim(const_cast<Buffer *>(ar.buffer()),
516 from_ascii("("), from_ascii(")"), ar, true));
520 bool testOpenBracket(MathAtom const & at)
522 return testString(at, "[");
526 bool testCloseBracket(MathAtom const & at)
528 return testString(at, "]");
532 MathAtom replaceBracketDelims(const MathData & ar)
534 return MathAtom(new InsetMathDelim(const_cast<Buffer *>(ar.buffer()),
535 from_ascii("["), from_ascii("]"), ar, true));
539 bool testOpenVert(MathAtom const & at)
541 return testSymbol(at, "lvert");
545 bool testCloseVert(MathAtom const & at)
547 return testSymbol(at, "rvert");
551 MathAtom replaceVertDelims(const MathData & ar)
553 return MathAtom(new InsetMathDelim(const_cast<Buffer *>(ar.buffer()),
554 from_ascii("lvert"), from_ascii("rvert"), ar, true));
558 bool testOpenAngled(MathAtom const & at)
560 return testSymbol(at, "langle");
564 bool testCloseAngled(MathAtom const & at)
566 return testSymbol(at, "rangle");
570 MathAtom replaceAngledDelims(const MathData & ar)
572 return MathAtom(new InsetMathDelim(const_cast<Buffer *>(ar.buffer()),
573 from_ascii("langle"), from_ascii("rangle"), ar, true));
577 // replace '('...')', '['...']', '|'...'|', and '<'...'>' sequences by a real InsetMathDelim
578 void extractDelims(MathData & ar)
580 //lyxerr << "\nDelims from: " << ar << endl;
581 replaceNested(ar, testOpenParen, testCloseParen, replaceParenDelims);
582 replaceNested(ar, testOpenBracket, testCloseBracket, replaceBracketDelims);
583 replaceNested(ar, testOpenVert, testCloseVert, replaceVertDelims);
584 replaceNested(ar, testOpenAngled, testCloseAngled, replaceAngledDelims);
585 //lyxerr << "\nDelims to: " << ar << endl;
591 // search well-known functions
595 // replace 'f' '(...)' and 'f' '^n' '(...)' sequences by a real InsetMathExFunc
596 // assume 'extractDelims' ran before
597 void extractFunctions(MathData & ar, ExternalMath kind)
599 // FIXME From what I can see, this is quite broken right now, for reasons
600 // I will note below. (RGH)
602 // we need at least two items...
606 Buffer * buf = ar.buffer();
608 //lyxerr << "\nFunctions from: " << ar << endl;
609 for (size_t i = 0; i + 1 < ar.size(); ++i) {
610 MathData::iterator it = ar.begin() + i;
611 MathData::iterator jt = it + 1;
615 // it certainly is if it is well known...
617 // FIXME This will never give us anything. When we get here, *it will
618 // never point at a string, but only at a character. I.e., if we are
619 // working on "sin(x)", then we are seeing:
620 // [char s mathalpha][char i mathalpha][char n mathalpha][delim ( ) [char x mathalpha]]
621 // and of course we will not find the function name "sin" in there, but
624 // It appears that we original ran extractStrings() before we ran
625 // extractFunctions(), but Andre changed this at f200be55, I think
626 // because this messed up what he was trying to do with "dx" in the
627 // context of integrals.
629 // This could be fixed by looking at a charSequence instead of just at
630 // the various characters, one by one. But I am not sure I understand
631 // exactly what we are trying to do here. And it involves a lot of
633 if (!extractFunctionName(*it, name)) {
634 // is this a user defined function?
635 // probably not, if it doesn't have a name.
636 if (!extractString(*it, name))
638 // it is not if it has no argument
641 // guess so, if this is followed by
642 // a DelimInset with a single item in the cell
643 InsetMathDelim const * del = (*jt)->asDelimInset();
644 if (!del || del->cell(0).size() != 1)
646 // fall through into main branch
649 // do we have an exponent like in
650 // 'sin' '^2' 'x' -> 'sin(x)' '^2'
652 extractScript(exp, jt, ar.end(), true);
654 // create a proper inset as replacement
655 auto p = lyx::make_unique<InsetMathExFunc>(buf, name);
657 // jt points to the "argument". Get hold of this.
658 MathData::iterator st =
659 extractArgument(p->cell(0), jt, ar.end(), kind, true);
661 // replace the function name by a real function inset
662 *it = MathAtom(p.release());
664 // remove the source of the argument from the array
665 ar.erase(it + 1, st);
667 // re-insert exponent
668 ar.insert(i + 1, exp);
669 //lyxerr << "\nFunctions to: " << ar << endl;
678 bool testIntSymbol(MathAtom const & at)
680 return testSymbol(at, from_ascii("int"));
684 bool testIntegral(MathAtom const & at)
688 ( at->asScriptInset()
689 && !at->asScriptInset()->nuc().empty()
690 && testIntSymbol(at->asScriptInset()->nuc().back()) );
695 bool testIntDiff(MathAtom const & at)
697 return testString(at, "d");
701 // replace '\int' ['_^'] x 'd''x'(...)' sequences by a real InsetMathExInt
702 // assume 'extractDelims' ran before
703 void extractIntegrals(MathData & ar, ExternalMath kind)
705 // we need at least three items...
709 Buffer * buf = ar.buffer();
711 //lyxerr << "\nIntegrals from: " << ar << endl;
712 for (size_t i = 0; i + 1 < ar.size(); ++i) {
713 MathData::iterator it = ar.begin() + i;
716 MathData::iterator jt =
717 endNestSearch(it, ar.end(), testIntegral, testIntDiff);
719 // something sensible found?
723 // is this a integral name?
724 if (!testIntegral(*it))
727 // core is part from behind the scripts to the 'd'
728 auto p = lyx::make_unique<InsetMathExInt>(buf, from_ascii("int"));
730 // handle scripts if available
731 if (!testIntSymbol(*it)) {
732 p->cell(2) = (*it)->asScriptInset()->down();
733 p->cell(3) = (*it)->asScriptInset()->up();
735 p->cell(0) = MathData(buf, it + 1, jt);
737 // use the "thing" behind the 'd' as differential
738 MathData::iterator tt = extractArgument(p->cell(1), jt + 1, ar.end(), kind);
741 ar.erase(it + 1, tt);
742 *it = MathAtom(p.release());
744 //lyxerr << "\nIntegrals to: " << ar << endl;
748 bool testTermDelimiter(MathAtom const & at)
750 return testString(at, "+") || testString(at, "-");
754 // try to extract a "term", i.e., something delimited by '+' or '-'.
755 // returns position behind the term
756 MathData::iterator extractTerm(MathData & ar,
757 MathData::iterator pos, MathData::iterator last)
759 while (pos != last && !testTermDelimiter(*pos)) {
772 bool testEqualSign(MathAtom const & at)
774 return testString(at, "=");
778 bool testSumSymbol(MathAtom const & p)
780 return testSymbol(p, from_ascii("sum"));
784 bool testSum(MathAtom const & at)
788 ( at->asScriptInset()
789 && !at->asScriptInset()->nuc().empty()
790 && testSumSymbol(at->asScriptInset()->nuc().back()) );
794 // replace '\sum' ['_^'] f(x) sequences by a real InsetMathExInt
795 // assume 'extractDelims' ran before
796 void extractSums(MathData & ar)
798 // we need at least two items...
802 Buffer * buf = ar.buffer();
804 //lyxerr << "\nSums from: " << ar << endl;
805 for (size_t i = 0; i + 1 < ar.size(); ++i) {
806 MathData::iterator it = ar.begin() + i;
808 // is this a sum name?
812 // create a proper inset as replacement
813 auto p = lyx::make_unique<InsetMathExInt>(buf, from_ascii("sum"));
815 // collect lower bound and summation index
816 InsetMathScript const * sub = ar[i]->asScriptInset();
817 if (sub && sub->hasDown()) {
818 // try to figure out the summation index from the subscript
819 MathData const & md = sub->down();
820 MathData::const_iterator xt =
821 find_if(md.begin(), md.end(), &testEqualSign);
822 if (xt != md.end()) {
823 // we found a '=', use everything in front of that as index,
824 // and everything behind as lower index
825 p->cell(1) = MathData(buf, md.begin(), xt);
826 p->cell(2) = MathData(buf, xt + 1, md.end());
828 // use everything as summation index, don't use scripts.
833 // collect upper bound
834 if (sub && sub->hasUp())
835 p->cell(3) = sub->up();
837 // use something behind the script as core
838 MathData::iterator tt = extractTerm(p->cell(0), it + 1, ar.end());
841 ar.erase(it + 1, tt);
842 *it = MathAtom(p.release());
844 //lyxerr << "\nSums to: " << ar << endl;
849 // search differential stuff
852 // tests for 'd' or '\partial'
853 bool testDiffItem(MathAtom const & at)
855 if (testString(at, "d") || testSymbol(at, "partial"))
858 // we may have d^n .../d and splitScripts() has not yet seen it
859 InsetMathScript const * sup = at->asScriptInset();
860 if (sup && !sup->hasDown() && sup->hasUp() && sup->nuc().size() == 1) {
861 MathAtom const & ma = sup->nuc().front();
862 return testString(ma, "d") || testSymbol(ma, "partial");
868 bool testDiffArray(MathData const & ar)
870 return !ar.empty() && testDiffItem(ar.front());
874 bool testDiffFrac(MathAtom const & at)
878 && testDiffArray(at->asFracInset()->cell(0))
879 && testDiffArray(at->asFracInset()->cell(1));
883 void extractDiff(MathData & ar)
885 Buffer * buf = ar.buffer();
886 //lyxerr << "\nDiffs from: " << ar << endl;
887 for (size_t i = 0; i < ar.size(); ++i) {
888 MathData::iterator it = ar.begin() + i;
890 // is this a "differential fraction"?
891 if (!testDiffFrac(*it))
894 InsetMathFrac const * f = (*it)->asFracInset();
896 lyxerr << "should not happen" << endl;
900 // create a proper diff inset
901 auto diff = lyx::make_unique<InsetMathDiff>(buf);
903 // collect function, let jt point behind last used item
904 MathData::iterator jt = it + 1;
906 MathData numer(f->cell(0));
908 if (numer.size() > 1 && numer[1]->asScriptInset()) {
909 // this is something like d^n f(x) / d... or d^n / d...
912 if (numer.size() > 2)
913 diff->cell(0) = MathData(buf, numer.begin() + 2, numer.end());
915 jt = extractTerm(diff->cell(0), jt, ar.end());
917 // simply d f(x) / d... or d/d...
918 if (numer.size() > 1)
919 diff->cell(0) = MathData(buf, numer.begin() + 1, numer.end());
921 jt = extractTerm(diff->cell(0), jt, ar.end());
924 // collect denominator parts
925 MathData denom(f->cell(1));
927 for (MathData::iterator dt = denom.begin(); dt != denom.end();) {
929 MathData::iterator et
930 = find_if(dt + 1, denom.end(), &testDiffItem);
933 MathData::iterator st = et - 1;
934 InsetMathScript const * script = (*st)->asScriptInset();
935 if (script && script->hasUp()) {
936 // things like d.../dx^n
938 if (extractNumber(script->up(), mult)) {
939 //lyxerr << "mult: " << mult << endl;
940 if (mult < 0 || mult > 1000) {
941 lyxerr << "Cannot differentiate less than 0 or more than 1000 times !" << endl;
944 for (int ii = 0; ii < mult; ++ii)
945 diff->addDer(MathData(buf, dt + 1, st));
949 diff->addDer(MathData(buf, dt + 1, et));
955 ar.erase(it + 1, jt);
956 *it = MathAtom(diff.release());
958 //lyxerr << "\nDiffs to: " << ar << endl;
967 bool testRightArrow(MathAtom const & at)
969 return testSymbol(at, "to") || testSymbol(at, "rightarrow");
974 // replace '\lim_{x->x0} f(x)' sequences by a real InsetMathLim
975 // assume 'extractDelims' ran before
976 void extractLims(MathData & ar)
978 Buffer * buf = ar.buffer();
979 //lyxerr << "\nLimits from: " << ar << endl;
980 for (size_t i = 0; i < ar.size(); ++i) {
981 MathData::iterator it = ar.begin() + i;
983 // must be a script inset with a subscript (without superscript)
984 InsetMathScript const * sub = (*it)->asScriptInset();
985 if (!sub || !sub->hasDown() || sub->hasUp() || sub->nuc().size() != 1)
988 // is this a limit function?
989 if (!testSymbol(sub->nuc().front(), "lim"))
992 // subscript must contain a -> symbol
993 MathData const & s = sub->down();
994 MathData::const_iterator st = find_if(s.begin(), s.end(), &testRightArrow);
998 // the -> splits the subscript int x and x0
999 MathData x = MathData(buf, s.begin(), st);
1000 MathData x0 = MathData(buf, st + 1, s.end());
1002 // use something behind the script as core
1004 MathData::iterator tt = extractTerm(f, it + 1, ar.end());
1007 ar.erase(it + 1, tt);
1009 // create a proper inset as replacement
1010 *it = MathAtom(new InsetMathLim(buf, f, x, x0));
1012 //lyxerr << "\nLimits to: " << ar << endl;
1020 void extractStructure(MathData & ar, ExternalMath kind)
1022 //lyxerr << "\nStructure from: " << ar << endl;
1023 if (kind != MATHML && kind != HTML)
1026 extractIntegrals(ar, kind);
1027 if (kind != MATHML && kind != HTML)
1030 extractMatrices(ar);
1031 if (kind != MATHML && kind != HTML) {
1032 extractFunctions(ar, kind);
1039 //lyxerr << "\nStructure to: " << ar << endl;
1045 string captureOutput(string const & cmd, string const & data)
1047 // In order to avoid parsing problems with command interpreters
1048 // we pass input data through a file
1049 // Since the CAS is supposed to read the temp file we need
1050 // to unlock it on windows (bug 10262).
1051 unique_ptr<TempFile> tempfile(new TempFile("casinput"));
1052 tempfile->setAutoRemove(false);
1053 FileName const cas_tmpfile = tempfile->name();
1056 if (cas_tmpfile.empty()) {
1057 lyxerr << "Warning: cannot create temporary file."
1061 ofstream os(cas_tmpfile.toFilesystemEncoding().c_str());
1064 string command = cmd + " < "
1065 + quoteName(cas_tmpfile.toFilesystemEncoding());
1066 lyxerr << "calling: " << cmd
1067 << "\ninput: '" << data << "'" << endl;
1068 cmd_ret const ret = runCommand(command);
1069 cas_tmpfile.removeFile();
1073 size_t get_matching_brace(string const & str, size_t i)
1076 size_t n = str.size();
1078 i = str.find_first_of("{}", i+1);
1091 size_t get_matching_brace_back(string const & str, size_t i)
1095 i = str.find_last_of("{}", i-1);
1108 MathData pipeThroughMaxima(docstring const &, MathData const & ar)
1110 odocstringstream os;
1111 MaximaStream ms(os);
1113 docstring expr = os.str();
1114 docstring const header = from_ascii("simpsum:true;");
1117 for (int i = 0; i < 100; ++i) { // at most 100 attempts
1118 // try to fix missing '*' the hard way
1120 // > echo "2x;" | maxima
1122 // (C1) Incorrect syntax: x is not an infix operator
1126 lyxerr << "checking expr: '" << to_utf8(expr) << "'" << endl;
1127 docstring full = header + "tex(" + expr + ");";
1128 out = captureOutput("maxima", to_utf8(full));
1130 // leave loop if expression syntax is probably ok
1131 if (out.find("Incorrect syntax") == npos)
1134 // search line with "Incorrect syntax"
1135 istringstream is(out);
1139 if (line.find("Incorrect syntax") != npos)
1143 // 2nd next line is the one with caret
1146 size_t pos = line.find('^');
1147 lyxerr << "found caret at pos: '" << pos << "'" << endl;
1148 if (pos == npos || pos < 4)
1149 break; // caret position not found
1150 pos -= 4; // skip the "tex(" part
1151 if (expr[pos] == '*')
1152 break; // two '*' in a row are definitely bad
1153 expr.insert(pos, from_ascii("*"));
1156 vector<string> tmp = getVectorFromString(out, "$$");
1160 out = subst(subst(tmp[1], "\\>", string()), "{\\it ", "\\mathit{");
1161 lyxerr << "output: '" << out << "'" << endl;
1163 // Ugly code that tries to make the result prettier
1164 size_t i = out.find("\\mathchoice");
1166 size_t j = get_matching_brace(out, i + 12);
1167 size_t k = get_matching_brace(out, j + 1);
1168 k = get_matching_brace(out, k + 1);
1169 k = get_matching_brace(out, k + 1);
1170 string mid = out.substr(i + 13, j - i - 13);
1171 if (mid.find("\\over") != npos)
1172 mid = '{' + mid + '}';
1173 out = out.substr(0, i)
1175 + out.substr(k + 1);
1176 //lyxerr << "output: " << out << endl;
1177 i = out.find("\\mathchoice", i);
1180 i = out.find("\\over");
1182 size_t j = get_matching_brace_back(out, i - 1);
1183 if (j == npos || j == 0)
1185 size_t k = get_matching_brace(out, i + 5);
1186 if (k == npos || k + 1 == out.size())
1188 out = out.substr(0, j - 1)
1190 + out.substr(j, i - j)
1191 + out.substr(i + 5, k - i - 4)
1192 + out.substr(k + 2);
1193 //lyxerr << "output: " << out << endl;
1194 i = out.find("\\over", i + 4);
1197 mathed_parse_cell(res, from_utf8(out));
1202 MathData pipeThroughMaple(docstring const & extra, MathData const & ar)
1204 string header = "readlib(latex):\n";
1206 // remove the \\it for variable names
1207 //"#`latex/csname_font` := `\\it `:"
1209 "`latex/csname_font` := ``:\n";
1211 // export matrices in (...) instead of [...]
1213 "`latex/latex/matrix` := "
1214 "subs(`[`=`(`, `]`=`)`,"
1215 "eval(`latex/latex/matrix`)):\n";
1217 // replace \\cdots with proper '*'
1219 "`latex/latex/*` := "
1220 "subs(`\\,`=`\\cdot `,"
1221 "eval(`latex/latex/*`)):\n";
1223 // remove spurious \\noalign{\\medskip} in matrix output
1225 "`latex/latex/matrix`:= "
1226 "subs(`\\\\\\\\\\\\noalign{\\\\medskip}` = `\\\\\\\\`,"
1227 "eval(`latex/latex/matrix`)):\n";
1229 //"#`latex/latex/symbol` "
1230 // " := subs((\\'_\\' = \\'`\\_`\\',eval(`latex/latex/symbol`)): ";
1232 string trailer = "quit;";
1233 odocstringstream os;
1236 string expr = to_utf8(os.str());
1237 lyxerr << "ar: '" << ar << "'\n"
1238 << "ms: '" << expr << "'" << endl;
1240 for (int i = 0; i < 100; ++i) { // at most 100 attempts
1241 // try to fix missing '*' the hard way by using mint
1243 // ... > echo "1A;" | mint -i 1 -S -s -q
1246 // Probably missing an operator such as * p
1248 lyxerr << "checking expr: '" << expr << "'" << endl;
1249 string out = captureOutput("mint -i 1 -S -s -q -q", expr + ';');
1251 break; // expression syntax is ok
1252 istringstream is(out);
1255 if (!prefixIs(line, "on line"))
1256 break; // error message not identified
1258 size_t pos = line.find('^');
1259 if (pos == string::npos || pos < 15)
1260 break; // caret position not found
1261 pos -= 15; // skip the "on line ..." part
1262 if (expr[pos] == '*' || (pos > 0 && expr[pos - 1] == '*'))
1263 break; // two '*' in a row are definitely bad
1264 expr.insert(pos, 1, '*');
1267 // FIXME UNICODE Is utf8 encoding correct?
1268 string full = "latex(" + to_utf8(extra) + '(' + expr + "));";
1269 string out = captureOutput("maple -q", header + full + trailer);
1275 mathed_parse_cell(res, from_utf8(out));
1280 MathData pipeThroughOctave(docstring const &, MathData const & ar)
1282 odocstringstream os;
1283 OctaveStream vs(os);
1285 string expr = to_utf8(os.str());
1288 Buffer * buf = const_cast<Buffer *>(ar.buffer());
1289 lyxerr << "pipe: ar: '" << ar << "'\n"
1290 << "pipe: expr: '" << expr << "'" << endl;
1292 for (int i = 0; i < 100; ++i) { // at most 100 attempts
1294 // try to fix missing '*' the hard way
1296 // >>> ([[1 2 3 ];[2 3 1 ];[3 1 2 ]])([[1 2 3 ];[2 3 1 ];[3 1 2 ]])
1299 lyxerr << "checking expr: '" << expr << "'" << endl;
1300 out = captureOutput("octave -q 2>&1", expr);
1301 lyxerr << "output: '" << out << "'" << endl;
1303 // leave loop if expression syntax is probably ok
1304 if (out.find("parse error:") == string::npos)
1307 // search line with single caret
1308 istringstream is(out);
1312 lyxerr << "skipping line: '" << line << "'" << endl;
1313 if (line.find(">>> ") != string::npos)
1317 // found line with error, next line is the one with caret
1319 size_t pos = line.find('^');
1320 lyxerr << "caret line: '" << line << "'" << endl;
1321 lyxerr << "found caret at pos: '" << pos << "'" << endl;
1322 if (pos == string::npos || pos < 4)
1323 break; // caret position not found
1324 pos -= 4; // skip the ">>> " part
1325 if (expr[pos] == '*')
1326 break; // two '*' in a row are definitely bad
1327 expr.insert(pos, 1, '*');
1330 // remove 'ans = ' taking into account that there may be an
1331 // ansi control sequence before, such as '\033[?1034hans = '
1332 size_t i = out.find("ans = ");
1333 if (i == string::npos)
1335 out = out.substr(i + 6);
1337 // parse output as matrix or single number
1338 MathAtom at(new InsetMathArray(buf, from_ascii("array"), from_utf8(out)));
1339 InsetMathArray const * mat = at->asArrayInset();
1341 if (mat->ncols() == 1 && mat->nrows() == 1)
1342 res.append(mat->cell(0));
1344 res.push_back(MathAtom(
1345 new InsetMathDelim(buf, from_ascii("("), from_ascii(")"))));
1346 res.back().nucleus()->cell(0).push_back(at);
1352 string fromMathematicaName(string const & name)
1354 if (name == "Sin") return "sin";
1355 if (name == "Sinh") return "sinh";
1356 if (name == "ArcSin") return "arcsin";
1357 if (name == "Cos") return "cos";
1358 if (name == "Cosh") return "cosh";
1359 if (name == "ArcCos") return "arccos";
1360 if (name == "Tan") return "tan";
1361 if (name == "Tanh") return "tanh";
1362 if (name == "ArcTan") return "arctan";
1363 if (name == "Cot") return "cot";
1364 if (name == "Coth") return "coth";
1365 if (name == "Csc") return "csc";
1366 if (name == "Sec") return "sec";
1367 if (name == "Exp") return "exp";
1368 if (name == "Log") return "log";
1369 if (name == "Arg" ) return "arg";
1370 if (name == "Det" ) return "det";
1371 if (name == "GCD" ) return "gcd";
1372 if (name == "Max" ) return "max";
1373 if (name == "Min" ) return "min";
1374 if (name == "Erf" ) return "erf";
1375 if (name == "Erfc" ) return "erfc";
1380 void prettifyMathematicaOutput(string & out, string const & macroName,
1381 bool roman, bool translate)
1383 string const macro = "\\" + macroName + "{";
1384 size_t const len = macro.length();
1385 size_t i = out.find(macro);
1388 size_t const j = get_matching_brace(out, i + len);
1389 string const name = out.substr(i + len, j - i - len);
1390 out = out.substr(0, i)
1391 + (roman ? "\\mathrm{" : "")
1392 + (translate ? fromMathematicaName(name) : name)
1393 + out.substr(roman ? j : j + 1);
1394 //lyxerr << "output: " << out << endl;
1395 i = out.find(macro, i);
1400 MathData pipeThroughMathematica(docstring const &, MathData const & ar)
1402 odocstringstream os;
1403 MathematicaStream ms(os);
1405 // FIXME UNICODE Is utf8 encoding correct?
1406 string const expr = to_utf8(os.str());
1409 lyxerr << "expr: '" << expr << "'" << endl;
1411 string const full = "TeXForm[" + expr + "]";
1412 out = captureOutput("math", full);
1413 lyxerr << "output: '" << out << "'" << endl;
1415 size_t pos1 = out.find("Out[1]//TeXForm= ");
1416 size_t pos2 = out.find("In[2]:=");
1418 if (pos1 == string::npos || pos2 == string::npos)
1421 // get everything from pos1+17 to pos2
1422 out = out.substr(pos1 + 17, pos2 - pos1 - 17);
1423 out = subst(subst(out, '\r', ' '), '\n', ' ');
1425 // tries to make the result prettier
1426 prettifyMathematicaOutput(out, "Mfunction", true, true);
1427 prettifyMathematicaOutput(out, "Muserfunction", true, false);
1428 prettifyMathematicaOutput(out, "Mvariable", false, false);
1431 mathed_parse_cell(res, from_utf8(out));
1439 void write(MathData const & dat, TeXMathStream & wi)
1441 wi.firstitem() = true;
1443 for (MathData::const_iterator it = dat.begin(); it != dat.end(); ++it) {
1444 InsetMathChar const * const c = (*it)->asCharInset();
1453 wi.firstitem() = false;
1458 wi.firstitem() = false;
1463 void writeString(docstring const & s, TeXMathStream & os)
1474 if (os.output() == TeXMathStream::wsSearchAdv) {
1479 if (os.lockedMode()) {
1482 for (char_type c : str) {
1484 Encodings::latexMathChar(c, true, os.encoding(), cmd, space);
1486 os.pendingSpace(space);
1487 } catch (EncodingException const & e) {
1488 switch (os.output()) {
1489 case TeXMathStream::wsDryrun: {
1490 os << "<" << _("LyX Warning: ")
1491 << _("uncodable character") << " '";
1492 os << docstring(1, e.failed_char);
1496 case TeXMathStream::wsPreview: {
1497 // indicate the encoding error by a boxed '?'
1498 os << "{\\fboxsep=1pt\\fbox{?}}";
1499 LYXERR0("Uncodable character" << " '"
1500 << docstring(1, e.failed_char)
1504 case TeXMathStream::wsDefault:
1514 // We may already be inside an \ensuremath command.
1515 bool in_forced_mode = os.pendingBrace();
1517 // We will take care of matching braces.
1518 os.pendingBrace(false);
1520 for (char_type const c : str) {
1521 bool mathmode = in_forced_mode ? os.textMode() : !os.textMode();
1522 docstring command(1, c);
1524 bool termination = false;
1526 Encodings::latexMathChar(c, mathmode, os.encoding(), command, termination)) {
1527 if (os.textMode()) {
1528 if (in_forced_mode) {
1529 // we were inside \lyxmathsym
1532 in_forced_mode = false;
1534 if (!isASCII(c) && os.textMode()) {
1535 os << "\\ensuremath{";
1537 in_forced_mode = true;
1539 } else if (isASCII(c) && in_forced_mode) {
1540 // we were inside \ensuremath
1543 in_forced_mode = false;
1545 } else if (!os.textMode()) {
1546 if (in_forced_mode) {
1547 // we were inside \ensuremath
1549 in_forced_mode = false;
1551 os << "\\lyxmathsym{";
1552 in_forced_mode = true;
1557 // We may need a space if the command contains a macro
1558 // and the last char is ASCII.
1560 os.pendingSpace(true);
1561 } catch (EncodingException const & e) {
1562 switch (os.output()) {
1563 case TeXMathStream::wsDryrun: {
1564 os << "<" << _("LyX Warning: ")
1565 << _("uncodable character") << " '";
1566 os << docstring(1, e.failed_char);
1570 case TeXMathStream::wsPreview: {
1571 // indicate the encoding error by a boxed '?'
1572 os << "{\\fboxsep=1pt\\fbox{?}}";
1573 LYXERR0("Uncodable character" << " '"
1574 << docstring(1, e.failed_char)
1578 case TeXMathStream::wsDefault:
1586 if (in_forced_mode && os.textMode()) {
1587 // We have to care for closing \lyxmathsym
1591 os.pendingBrace(in_forced_mode);
1596 void normalize(MathData const & ar, NormalStream & os)
1598 for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it)
1599 (*it)->normalize(os);
1603 void octave(MathData const & dat, OctaveStream & os)
1606 extractStructure(ar, OCTAVE);
1607 for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it)
1612 void maple(MathData const & dat, MapleStream & os)
1615 extractStructure(ar, MAPLE);
1616 for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it)
1621 void maxima(MathData const & dat, MaximaStream & os)
1624 extractStructure(ar, MAXIMA);
1625 for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it)
1630 void mathematica(MathData const & dat, MathematicaStream & os)
1633 extractStructure(ar, MATHEMATICA);
1634 for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it)
1635 (*it)->mathematica(os);
1639 void mathmlize(MathData const & dat, MathMLStream & ms)
1642 extractStructure(ar, MATHML);
1646 } else if (ar.size() == 1) {
1651 for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it)
1652 (*it)->mathmlize(ms);
1659 void htmlize(MathData const & dat, HtmlStream & os)
1662 extractStructure(ar, HTML);
1665 if (ar.size() == 1) {
1669 for (MathData::const_iterator it = ar.begin(); it != ar.end(); ++it)
1674 // convert this inset somehow to a number
1675 bool extractNumber(MathData const & ar, int & i)
1677 idocstringstream is(charSequence(ar.begin(), ar.end()));
1679 // Do not convert is implicitly to bool, since that is forbidden in C++11.
1684 bool extractNumber(MathData const & ar, double & d)
1686 idocstringstream is(charSequence(ar.begin(), ar.end()));
1688 // Do not convert is implicitly to bool, since that is forbidden in C++11.
1693 MathData pipeThroughExtern(string const & lang, docstring const & extra,
1694 MathData const & ar)
1696 if (lang == "octave")
1697 return pipeThroughOctave(extra, ar);
1699 if (lang == "maxima")
1700 return pipeThroughMaxima(extra, ar);
1702 if (lang == "maple")
1703 return pipeThroughMaple(extra, ar);
1705 if (lang == "mathematica")
1706 return pipeThroughMathematica(extra, ar);
1708 // create normalized expression
1709 odocstringstream os;
1710 NormalStream ns(os);
1711 os << '[' << extra << ' ';
1714 // FIXME UNICODE Is utf8 encoding correct?
1715 string data = to_utf8(os.str());
1717 // search external script
1718 FileName const file = libFileSearch("mathed", "extern_" + lang);
1720 lyxerr << "converter to '" << lang << "' not found" << endl;
1724 // run external sript
1725 string out = captureOutput(file.absFileName(), data);
1727 mathed_parse_cell(res, from_utf8(out));