1 // This file contains most of the magic that extracts "context
2 // information" from the unstructered layout-oriented stuff in an
7 #include "math_amsarrayinset.h"
8 #include "math_arrayinset.h"
9 #include "math_charinset.h"
10 #include "math_deliminset.h"
11 #include "math_diffinset.h"
12 #include "math_exfuncinset.h"
13 #include "math_exintinset.h"
14 #include "math_fracinset.h"
15 #include "math_liminset.h"
16 #include "math_matrixinset.h"
17 #include "math_mathmlstream.h"
18 #include "math_numberinset.h"
19 #include "math_scriptinset.h"
20 #include "math_stringinset.h"
21 #include "math_symbolinset.h"
22 #include "math_unknowninset.h"
23 #include "math_parser.h"
26 #include "support/lyxlib.h"
27 #include "support/systemcall.h"
28 #include "support/filetools.h"
33 using std::istringstream;
38 ostream & operator<<(ostream & os, MathArray const & ar)
46 // define a function for tests
47 typedef bool TestItemFunc(MathInset *);
49 // define a function for replacing subexpressions
50 typedef MathInset * ReplaceArgumentFunc(const MathArray & ar);
54 // try to extract a super/subscript
55 // modify iterator position to point behind the thing
56 bool extractScript(MathArray & ar,
57 MathArray::iterator & pos, MathArray::iterator last)
59 // nothing to get here
63 // is this a scriptinset?
64 if (!(*pos)->asScriptInset())
67 // it is a scriptinset, use it.
74 // try to extract an "argument" to some function.
75 // returns position behind the argument
76 MathArray::iterator extractArgument(MathArray & ar,
77 MathArray::iterator pos, MathArray::iterator last, string const & = "")
79 // nothing to get here
83 // something deliminited _is_ an argument
84 if ((*pos)->asDelimInset()) {
89 // always take the first thing, no matter what it is
92 // go ahead if possible
97 // if the next item is a subscript, it most certainly belongs to the
99 extractScript(ar, pos, last);
103 // but it might be more than that.
104 // FIXME: not implemented
105 //for (MathArray::iterator it = pos + 1; it != last; ++it) {
106 // // always take the first thing, no matter
108 // ar.push_back(*it);
116 // returns sequence of char with same code starting at it up to end
117 // it might be less, though...
119 (MathArray::const_iterator it, MathArray::const_iterator end)
122 for (; it != end && (*it)->asCharInset(); ++it)
123 s += (*it)->getChar();
128 void extractStrings(MathArray & ar)
130 //lyxerr << "\nStrings from: " << ar << "\n";
131 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
132 if (!ar[i]->asCharInset())
134 string s = charSequence(ar.begin() + i, ar.end());
135 ar[i].reset(new MathStringInset(s));
136 ar.erase(i + 1, i + s.size());
138 //lyxerr << "\nStrings to: " << ar << "\n";
142 MathInset * singleItem(MathArray & ar)
144 return ar.size() == 1 ? ar.begin()->nucleus() : 0;
148 void extractMatrices(MathArray & ar)
150 //lyxerr << "\nMatrices from: " << ar << "\n";
151 // first pass for explicitly delimited stuff
152 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it) {
153 MathDelimInset * del = (*it)->asDelimInset();
156 MathInset * arr = singleItem(del->cell(0));
157 if (!arr || !arr->asGridInset())
159 *it = MathAtom(new MathMatrixInset(*(arr->asGridInset())));
162 // second pass for AMS "pmatrix" etc
163 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it) {
164 MathAMSArrayInset * ams = (*it)->asAMSArrayInset();
167 *it = MathAtom(new MathMatrixInset(*ams));
169 //lyxerr << "\nMatrices to: " << ar << "\n";
173 // convert this inset somehow to a string
174 bool extractString(MathInset * p, string & str)
179 str = string(1, p->getChar());
182 if (p->asStringInset()) {
183 str = p->asStringInset()->str();
190 // convert this inset somehow to a number
191 bool extractNumber(MathArray const & ar, int & i)
193 istringstream is(charSequence(ar.begin(), ar.end()).c_str());
199 bool extractNumber(MathArray const & ar, double & d)
201 istringstream is(charSequence(ar.begin(), ar.end()).c_str());
207 bool testString(MathInset * p, const string & str)
210 return extractString(p, s) && str == s;
214 // search end of nested sequence
215 MathArray::iterator endNestSearch(
216 MathArray::iterator it,
217 MathArray::iterator last,
218 TestItemFunc testOpen,
219 TestItemFunc testClose
222 for (int level = 0; it != last; ++it) {
223 if (testOpen(it->nucleus()))
225 if (testClose(it->nucleus()))
234 // replace nested sequences by a real Insets
237 TestItemFunc testOpen,
238 TestItemFunc testClose,
239 ReplaceArgumentFunc replaceArg
242 // use indices rather than iterators for the loop because we are going
243 // to modify the array.
244 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
245 // check whether this is the begin of the sequence
246 MathArray::iterator it = ar.begin() + i;
247 if (!testOpen(it->nucleus()))
250 // search end of sequence
251 MathArray::iterator jt = endNestSearch(it, ar.end(), testOpen, testClose);
255 // create a proper inset as replacement
256 MathInset * p = replaceArg(MathArray(it + 1, jt));
258 // replace the original stuff by the new inset
259 ar.erase(it + 1, jt + 1);
267 // split scripts into seperate super- and subscript insets. sub goes in
271 void splitScripts(MathArray & ar)
273 //lyxerr << "\nScripts from: " << ar << "\n";
274 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
275 MathArray::iterator it = ar.begin() + i;
277 // is this script inset?
278 MathScriptInset * p = (*it)->asScriptInset();
282 // no problem if we don't have both...
283 if (!p->hasUp() || !p->hasDown())
286 // create extra script inset and move superscript over
287 MathScriptInset * q = new MathScriptInset;
289 std::swap(q->up(), p->up());
290 p->removeScript(true);
292 // insert new inset behind
294 ar.insert(i, MathAtom(q));
296 //lyxerr << "\nScripts to: " << ar << "\n";
304 void extractExps(MathArray & ar)
306 //lyxerr << "\nExps from: " << ar << "\n";
308 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
309 MathArray::iterator it = ar.begin() + i;
312 MathCharInset const * p = (*it)->asCharInset();
313 if (!p || p->getChar() != 'e')
316 // we need an exponent but no subscript
317 MathScriptInset * sup = (*(it + 1))->asScriptInset();
318 if (!sup || sup->hasDown())
321 // create a proper exp-inset as replacement
322 *it = new MathExFuncInset("exp", sup->cell(1));
325 //lyxerr << "\nExps to: " << ar << "\n";
330 // extract det(...) from |matrix|
332 void extractDets(MathArray & ar)
334 //lyxerr << "\ndet from: " << ar << "\n";
335 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it) {
336 MathDelimInset * del = (*it)->asDelimInset();
341 *it = new MathExFuncInset("det", del->cell(0));
343 //lyxerr << "\ndet to: " << ar << "\n";
351 bool isDigitOrSimilar(char c)
353 return ('0' <= c && c <= '9') || c == '.';
357 // returns sequence of digits
359 (MathArray::const_iterator it, MathArray::const_iterator end)
362 for (; it != end && (*it)->asCharInset(); ++it) {
363 if (!isDigitOrSimilar((*it)->getChar()))
365 s += (*it)->getChar();
371 void extractNumbers(MathArray & ar)
373 //lyxerr << "\nNumbers from: " << ar << "\n";
374 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
375 if (!ar[i]->asCharInset())
377 if (!isDigitOrSimilar(ar[i]->asCharInset()->getChar()))
380 string s = digitSequence(ar.begin() + i, ar.end());
382 ar[i].reset(new MathNumberInset(s));
383 ar.erase(i + 1, i + s.size());
385 //lyxerr << "\nNumbers to: " << ar << "\n";
391 // search deliminiters
394 bool testOpenParan(MathInset * p)
396 return testString(p, "(");
400 bool testCloseParan(MathInset * p)
402 return testString(p, ")");
406 MathInset * replaceDelims(const MathArray & ar)
408 return new MathDelimInset("(", ")", ar);
412 // replace '('...')' sequences by a real MathDelimInset
413 void extractDelims(MathArray & ar)
415 //lyxerr << "\nDelims from: " << ar << "\n";
416 replaceNested(ar, testOpenParan, testCloseParan, replaceDelims);
417 //lyxerr << "\nDelims to: " << ar << "\n";
423 // search well-known functions
427 // replace 'f' '(...)' and 'f' '^n' '(...)' sequences by a real MathExFuncInset
428 // assume 'extractDelims' ran before
429 void extractFunctions(MathArray & ar)
431 // we need at least two items...
435 //lyxerr << "\nFunctions from: " << ar << "\n";
436 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
437 MathArray::iterator it = ar.begin() + i;
438 MathArray::iterator jt = it + 1;
442 if ((*it)->asUnknownInset()) {
443 // it certainly is if it is well known...
444 name = (*it)->asUnknownInset()->name();
446 // is this a user defined function?
447 // it it probably not, if it doesn't have a name.
448 if (!extractString((*it).nucleus(), name))
450 // it is not if it has no argument
453 // guess so, if this is followed by
454 // a DelimInset with a single item in the cell
455 MathDelimInset * del = (*jt)->asDelimInset();
456 if (!del || del->cell(0).size() != 1)
458 // fall trough into main branch
461 // do we have an exponent like in
462 // 'sin' '^2' 'x' -> 'sin(x)' '^2'
464 extractScript(exp, jt, ar.end());
466 // create a proper inset as replacement
467 MathExFuncInset * p = new MathExFuncInset(name);
469 // jt points to the "argument". Get hold of this.
470 MathArray::iterator st = extractArgument(p->cell(0), jt, ar.end());
472 // replace the function name by a real function inset
475 // remove the source of the argument from the array
476 ar.erase(it + 1, st);
478 // re-insert exponent
479 ar.insert(i + 1, exp);
480 //lyxerr << "\nFunctions to: " << ar << "\n";
489 bool testSymbol(MathInset * p, string const & name)
491 return p->asSymbolInset() && p->asSymbolInset()->name() == name;
495 bool testIntSymbol(MathInset * p)
497 return testSymbol(p, "int");
501 bool testIntDiff(MathInset * p)
503 return testString(p, "d");
507 // replace '\int' ['_^'] x 'd''x'(...)' sequences by a real MathExIntInset
508 // assume 'extractDelims' ran before
509 void extractIntegrals(MathArray & ar)
511 // we need at least three items...
515 //lyxerr << "\nIntegrals from: " << ar << "\n";
516 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
517 MathArray::iterator it = ar.begin() + i;
519 // is this a integral name?
520 if (!testIntSymbol(it->nucleus()))
524 MathArray::iterator jt =
525 endNestSearch(it, ar.end(), testIntSymbol, testIntDiff);
527 // something sensible found?
531 // create a proper inset as replacement
532 MathExIntInset * p = new MathExIntInset("int");
534 // collect subscript if any
535 MathArray::iterator st = it + 1;
537 if (MathScriptInset * sub = (*st)->asScriptInset())
538 if (sub->hasDown()) {
539 p->cell(2) = sub->down().data();
543 // collect superscript if any
545 if (MathScriptInset * sup = (*st)->asScriptInset())
547 p->cell(3) = sup->up().data();
551 // core ist part from behind the scripts to the 'd'
552 p->cell(0) = MathArray(st, jt);
554 // use the "thing" behind the 'd' as differential
555 MathArray::iterator tt = extractArgument(p->cell(1), jt + 1, ar.end());
558 ar.erase(it + 1, tt);
561 //lyxerr << "\nIntegrals to: " << ar << "\n";
570 bool testEqualSign(MathAtom const & at)
572 return testString(at.nucleus(), "=");
577 // replace '\sum' ['_^'] f(x) sequences by a real MathExIntInset
578 // assume 'extractDelims' ran before
579 void extractSums(MathArray & ar)
581 // we need at least two items...
585 //lyxerr << "\nSums from: " << ar << "\n";
586 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
587 MathArray::iterator it = ar.begin() + i;
589 // is this a sum name?
590 if (!testSymbol(it->nucleus(), "sum"))
593 // create a proper inset as replacement
594 MathExIntInset * p = new MathExIntInset("sum");
596 // collect lower bound and summation index
597 MathArray::iterator st = it + 1;
599 if (MathScriptInset * sub = (*st)->asScriptInset())
600 if (sub->hasDown()) {
601 // try to figure out the summation index from the subscript
602 MathArray & ar = sub->down().data();
603 MathArray::iterator it =
604 find_if(ar.begin(), ar.end(), &testEqualSign);
605 if (it != ar.end()) {
606 // we found a '=', use everything in front of that as index,
607 // and everything behind as lower index
608 p->cell(1) = MathArray(ar.begin(), it);
609 p->cell(2) = MathArray(it + 1, ar.end());
611 // use everything as summation index, don't use scripts.
617 // collect upper bound
619 if (MathScriptInset * sup = (*st)->asScriptInset())
621 p->cell(3) = sup->up().data();
625 // use some behind the script as core
626 MathArray::iterator tt = extractArgument(p->cell(0), st, ar.end());
629 ar.erase(it + 1, tt);
632 //lyxerr << "\nSums to: " << ar << "\n";
637 // search differential stuff
640 // tests for 'd' or '\partial'
641 bool testDiffItem(MathAtom const & at)
643 return testString(at.nucleus(), "d");
647 bool testDiffArray(MathArray const & ar)
649 return ar.size() && testDiffItem(ar.front());
653 bool testDiffFrac(MathInset * p)
655 MathFracInset * f = p->asFracInset();
656 return f && testDiffArray(f->cell(0)) && testDiffArray(f->cell(1));
660 // is this something like ^number?
661 bool extractDiffExponent(MathArray::iterator it, int & i)
663 if (!(*it)->asScriptInset())
667 if (!extractString((*it).nucleus(), s))
669 istringstream is(s.c_str());
675 void extractDiff(MathArray & ar)
677 //lyxerr << "\nDiffs from: " << ar << "\n";
678 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
679 MathArray::iterator it = ar.begin() + i;
681 // is this a "differential fraction"?
682 if (!testDiffFrac(it->nucleus()))
685 MathFracInset * f = (*it)->asFracInset();
687 lyxerr << "should not happen\n";
691 // create a proper diff inset
692 MathDiffInset * diff = new MathDiffInset;
694 // collect function, let jt point behind last used item
695 MathArray::iterator jt = it + 1;
697 MathArray & numer = f->cell(0);
698 if (numer.size() > 1 && numer[1]->asScriptInset()) {
699 // this is something like d^n f(x) / d... or d^n / d...
702 if (numer.size() > 2)
703 diff->cell(0) = MathArray(numer.begin() + 2, numer.end());
705 jt = extractArgument(diff->cell(0), jt, ar.end());
707 // simply d f(x) / d... or d/d...
708 if (numer.size() > 1)
709 diff->cell(0) = MathArray(numer.begin() + 1, numer.end());
711 jt = extractArgument(diff->cell(0), jt, ar.end());
714 // collect denominator parts
715 MathArray & denom = f->cell(1);
716 for (MathArray::iterator dt = denom.begin(); dt != denom.end();) {
718 MathArray::iterator et = find_if(dt + 1, denom.end(), &testDiffItem);
721 MathArray::iterator st = et - 1;
722 MathScriptInset * script = (*st)->asScriptInset();
723 if (script && script->hasUp()) {
724 // things like d.../dx^n
726 if (extractNumber(script->up().data(), mult)) {
727 //lyxerr << "mult: " << mult << endl;
728 for (int i = 0; i < mult; ++i)
729 diff->addDer(MathArray(dt + 1, st));
733 diff->addDer(MathArray(dt + 1, et));
739 ar.erase(it + 1, jt);
742 //lyxerr << "\nDiffs to: " << ar << "\n";
751 bool testRightArrow(MathAtom const & at)
754 testSymbol(at.nucleus(), "to") ||
755 testSymbol(at.nucleus(), "rightarrow");
760 // replace '\lim_{x->x0} f(x)' sequences by a real MathLimInset
761 // assume 'extractDelims' ran before
762 void extractLims(MathArray & ar)
764 // we need at least three items...
768 //lyxerr << "\nLimits from: " << ar << "\n";
769 for (MathArray::size_type i = 0; i + 2 < ar.size(); ++i) {
770 MathArray::iterator it = ar.begin() + i;
772 // is this a limit function?
773 if (!testSymbol(it->nucleus(), "lim"))
776 // the next one must be a subscript (without superscript)
777 MathScriptInset * sub = (*(it + 1))->asScriptInset();
778 if (!sub || !sub->hasDown() || sub->hasUp())
781 // and it must contain a -> symbol
782 MathArray & s = sub->down().data();
783 MathArray::iterator st = find_if(s.begin(), s.end(), &testRightArrow);
787 // the -> splits the subscript int x and x0
788 MathArray x = MathArray(s.begin(), st);
789 MathArray x0 = MathArray(st + 1, s.end());
791 // use something behind the script as core
793 MathArray::iterator tt = extractArgument(f, it + 2, ar.end());
795 // create a proper inset as replacement
796 MathLimInset * p = new MathLimInset(f, x, x0);
799 ar.erase(it + 1, tt);
802 //lyxerr << "\nLimits to: " << ar << "\n";
810 void extractStructure(MathArray & ar)
816 extractFunctions(ar);
818 extractIntegrals(ar);
827 void write(MathArray const & dat, WriteStream & wi)
831 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
836 void normalize(MathArray const & ar, NormalStream & os)
838 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
839 (*it)->normalize(os);
843 void octavize(MathArray const & dat, OctaveStream & os)
846 extractStructure(ar);
847 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
852 void maplize(MathArray const & dat, MapleStream & os)
855 extractStructure(ar);
856 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
861 void mathematicize(MathArray const & dat, MathematicaStream & os)
864 extractStructure(ar);
865 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
866 (*it)->mathematicize(os);
870 void mathmlize(MathArray const & dat, MathMLStream & os)
873 extractStructure(ar);
876 else if (ar.size() == 1)
877 os << ar.begin()->nucleus();
880 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
881 (*it)->mathmlize(os);
891 string captureOutput(string const & cmd, string const & data)
893 string outfile = lyx::tempName(string(), "mathextern");
894 string full = "echo '" + data + "' | (" + cmd + ") > " + outfile;
895 lyxerr << "calling: " << full << endl;
897 dummy.startscript(Systemcall::Wait, full);
898 string out = GetFileContents(outfile);
899 lyx::unlink(outfile);
900 lyxerr << "result: '" << out << "'" << endl;
905 MathArray pipeThroughMaple(string const & extra, MathArray const & ar)
907 string header = "readlib(latex):\n";
909 // remove the \\it for variable names
910 //"#`latex/csname_font` := `\\it `:"
912 "`latex/csname_font` := ``:\n";
914 // export matrices in (...) instead of [...]
916 "`latex/latex/matrix` := "
917 "subs(`[`=`(`, `]`=`)`,"
918 "eval(`latex/latex/matrix`)):\n";
920 // replace \\cdots with proper '*'
922 "`latex/latex/*` := "
923 "subs(`\\,`=`\\cdot `,"
924 "eval(`latex/latex/*`)):\n";
926 // remove spurious \\noalign{\\medskip} in matrix output
928 "`latex/latex/matrix`:= "
929 "subs(`\\\\\\\\\\\\noalign{\\\\medskip}` = `\\\\\\\\`,"
930 "eval(`latex/latex/matrix`)):\n";
932 //"#`latex/latex/symbol` "
933 // " := subs((\\'_\\' = \\'`\\_`\\',eval(`latex/latex/symbol`)): ";
935 string trailer = "quit;";
939 string expr = os.str().c_str();
940 lyxerr << "ar: '" << ar << "'\n";
941 lyxerr << "ms: '" << os.str() << "'\n";
943 for (int i = 0; i < 100; ++i) { // at most 100 attempts
944 // try to fix missing '*' the hard way by using mint
946 // ... > echo "1A;" | mint -i 1 -S -s -q
949 // Probably missing an operator such as * p
951 lyxerr << "checking expr: '" << expr << "'\n";
952 string out = captureOutput("mint -i 1 -S -s -q -q", expr + ";");
954 break; // expression syntax is ok
955 istringstream is(out.c_str());
958 if (line.find("on line") != 0)
959 break; // error message not identified
961 string::size_type pos = line.find('^');
962 if (pos == string::npos || pos < 15)
963 break; // caret position not found
964 pos -= 15; // skip the "on line ..." part
965 if (expr[pos] == '*' || (pos > 0 && expr[pos - 1] == '*'))
966 break; // two '*' in a row are definitely bad
967 expr.insert(pos, "*");
970 string full = "latex(" + extra + '(' + expr + "));";
971 string out = captureOutput("maple -q", header + full + trailer);
977 mathed_parse_cell(res, out);
982 MathArray pipeThroughOctave(string const &, MathArray const & ar)
987 string expr = os.str().c_str();
990 lyxerr << "pipe: ar: '" << ar << "'\n";
991 lyxerr << "pipe: expr: '" << expr << "'\n";
993 for (int i = 0; i < 100; ++i) { // at most 100 attempts
995 // try to fix missing '*' the hard way
997 // >>> ([[1 2 3 ];[2 3 1 ];[3 1 2 ]])([[1 2 3 ];[2 3 1 ];[3 1 2 ]])
1000 lyxerr << "checking expr: '" << expr << "'\n";
1001 out = captureOutput("octave -q 2>&1", expr);
1002 lyxerr << "checking out: '" << out << "'\n";
1004 // leave loop if expression syntax is probably ok
1005 if (out.find("parse error:") == string::npos)
1008 // search line with single caret
1009 istringstream is(out.c_str());
1013 lyxerr << "skipping line: '" << line << "'\n";
1014 if (line.find(">>> ") != string::npos)
1018 // found line with error, next line is the one with caret
1020 string::size_type pos = line.find('^');
1021 lyxerr << "caret line: '" << line << "'\n";
1022 lyxerr << "found caret at pos: '" << pos << "'\n";
1023 if (pos == string::npos || pos < 4)
1024 break; // caret position not found
1025 pos -= 4; // skip the ">>> " part
1026 if (expr[pos] == '*')
1027 break; // two '*' in a row are definitely bad
1028 expr.insert(pos, "*");
1035 out = out.substr(6);
1037 // parse output as matrix or single number
1038 MathAtom at(new MathArrayInset("array", out));
1039 MathArrayInset const * mat = at.nucleus()->asArrayInset();
1041 if (mat->ncols() == 1 && mat->nrows() == 1)
1042 res.append(mat->cell(0));
1044 res.push_back(MathAtom(new MathDelimInset("(", ")")));
1045 res.back()->cell(0).push_back(at);
1053 MathArray pipeThroughExtern(string const & lang, string const & extra,
1054 MathArray const & ar)
1056 if (lang == "octave")
1057 return pipeThroughOctave(extra, ar);
1059 if (lang == "maple")
1060 return pipeThroughMaple(extra, ar);
1062 // create normalized expression
1064 NormalStream ns(os);
1065 os << "[" << extra << ' ';
1068 string data = os.str().c_str();
1070 // search external script
1071 string file = LibFileSearch("mathed", "extern_" + lang);
1073 lyxerr << "converter to '" << lang << "' not found\n";
1077 // run external sript
1078 string out = captureOutput(file, data);
1080 mathed_parse_cell(res, out);