1 // This file contains most of the magic that extracts "context
2 // information" from the unstructered layout-oriented stuff in an
7 #include "math_amsarrayinset.h"
8 #include "math_arrayinset.h"
9 #include "math_charinset.h"
10 #include "math_deliminset.h"
11 #include "math_diffinset.h"
12 #include "math_exfuncinset.h"
13 #include "math_exintinset.h"
14 #include "math_fracinset.h"
15 #include "math_liminset.h"
16 #include "math_matrixinset.h"
17 #include "math_mathmlstream.h"
18 #include "math_numberinset.h"
19 #include "math_scriptinset.h"
20 #include "math_stringinset.h"
21 #include "math_symbolinset.h"
22 #include "math_unknowninset.h"
23 #include "math_parser.h"
26 #include "support/lyxlib.h"
27 #include "support/systemcall.h"
28 #include "support/filetools.h"
33 using std::istringstream;
38 ostream & operator<<(ostream & os, MathArray const & ar)
46 // define a function for tests
47 typedef bool TestItemFunc(MathInset *);
49 // define a function for replacing subexpressions
50 typedef MathInset * ReplaceArgumentFunc(const MathArray & ar);
54 // try to extract a super/subscript
55 // modify iterator position to point behind the thing
56 bool extractScript(MathArray & ar,
57 MathArray::iterator & pos, MathArray::iterator last)
59 // nothing to get here
63 // is this a scriptinset?
64 if (!(*pos)->asScriptInset())
67 // it is a scriptinset, use it.
74 // try to extract an "argument" to some function.
75 // returns position behind the argument
76 MathArray::iterator extractArgument(MathArray & ar,
77 MathArray::iterator pos, MathArray::iterator last, string const & = "")
79 // nothing to get here
83 // something deliminited _is_ an argument
84 if ((*pos)->asDelimInset()) {
89 // always take the first thing, no matter what it is
92 // go ahead if possible
97 // if the next item is a subscript, it most certainly belongs to the
99 extractScript(ar, pos, last);
103 // but it might be more than that.
104 // FIXME: not implemented
105 //for (MathArray::iterator it = pos + 1; it != last; ++it) {
106 // // always take the first thing, no matter
108 // ar.push_back(*it);
116 // returns sequence of char with same code starting at it up to end
117 // it might be less, though...
119 (MathArray::const_iterator it, MathArray::const_iterator end)
122 for (; it != end && (*it)->asCharInset(); ++it)
123 s += (*it)->getChar();
128 void extractStrings(MathArray & ar)
130 //lyxerr << "\nStrings from: " << ar << "\n";
131 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
132 if (!ar[i]->asCharInset())
134 string s = charSequence(ar.begin() + i, ar.end());
135 ar[i].reset(new MathStringInset(s));
136 ar.erase(i + 1, i + s.size());
138 //lyxerr << "\nStrings to: " << ar << "\n";
142 MathInset * singleItem(MathArray & ar)
144 return ar.size() == 1 ? ar.begin()->nucleus() : 0;
148 void extractMatrices(MathArray & ar)
150 //lyxerr << "\nMatrices from: " << ar << "\n";
151 // first pass for explicitly delimited stuff
152 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it) {
153 MathDelimInset * del = (*it)->asDelimInset();
156 MathInset * arr = singleItem(del->cell(0));
157 if (!arr || !arr->asGridInset())
159 *it = MathAtom(new MathMatrixInset(*(arr->asGridInset())));
162 // second pass for AMS "pmatrix" etc
163 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it) {
164 MathAMSArrayInset * ams = (*it)->asAMSArrayInset();
167 *it = MathAtom(new MathMatrixInset(*ams));
169 //lyxerr << "\nMatrices to: " << ar << "\n";
173 // convert this inset somehow to a string
174 bool extractString(MathInset * p, string & str)
179 str = string(1, p->getChar());
182 if (p->asStringInset()) {
183 str = p->asStringInset()->str();
190 // convert this inset somehow to a number
191 bool extractNumber(MathArray const & ar, int & i)
193 istringstream is(charSequence(ar.begin(), ar.end()).c_str());
199 bool extractNumber(MathArray const & ar, double & d)
201 istringstream is(charSequence(ar.begin(), ar.end()).c_str());
207 bool testString(MathInset * p, const string & str)
210 return extractString(p, s) && str == s;
214 // search end of nested sequence
215 MathArray::iterator endNestSearch(
216 MathArray::iterator it,
217 MathArray::iterator last,
218 TestItemFunc testOpen,
219 TestItemFunc testClose
222 for (int level = 0; it != last; ++it) {
223 if (testOpen(it->nucleus()))
225 if (testClose(it->nucleus()))
234 // replace nested sequences by a real Insets
237 TestItemFunc testOpen,
238 TestItemFunc testClose,
239 ReplaceArgumentFunc replaceArg
242 // use indices rather than iterators for the loop because we are going
243 // to modify the array.
244 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
245 // check whether this is the begin of the sequence
246 MathArray::iterator it = ar.begin() + i;
247 if (!testOpen(it->nucleus()))
250 // search end of sequence
251 MathArray::iterator jt = endNestSearch(it, ar.end(), testOpen, testClose);
255 // create a proper inset as replacement
256 MathInset * p = replaceArg(MathArray(it + 1, jt));
258 // replace the original stuff by the new inset
259 ar.erase(it + 1, jt + 1);
267 // split scripts into seperate super- and subscript insets. sub goes in
271 void splitScripts(MathArray & ar)
273 //lyxerr << "\nScripts from: " << ar << "\n";
274 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
275 MathArray::iterator it = ar.begin() + i;
277 // is this script inset?
278 MathScriptInset * p = (*it)->asScriptInset();
282 // no problem if we don't have both...
283 if (!p->hasUp() || !p->hasDown())
286 // create extra script inset and move superscript over
287 MathScriptInset * q = new MathScriptInset;
289 std::swap(q->up(), p->up());
290 p->removeScript(true);
292 // insert new inset behind
294 ar.insert(i, MathAtom(q));
296 //lyxerr << "\nScripts to: " << ar << "\n";
304 void extractExps(MathArray & ar)
306 //lyxerr << "\nExps from: " << ar << "\n";
308 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
309 MathArray::iterator it = ar.begin() + i;
312 MathCharInset const * p = (*it)->asCharInset();
313 if (!p || p->getChar() != 'e')
316 // we need an exponent but no subscript
317 MathScriptInset * sup = (*(it + 1))->asScriptInset();
318 if (!sup || sup->hasDown())
321 // create a proper exp-inset as replacement
322 *it = new MathExFuncInset("exp", sup->cell(1));
325 //lyxerr << "\nExps to: " << ar << "\n";
330 // extract det(...) from |matrix|
332 void extractDets(MathArray & ar)
334 //lyxerr << "\ndet from: " << ar << "\n";
335 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it) {
336 MathDelimInset * del = (*it)->asDelimInset();
341 *it = new MathExFuncInset("det", del->cell(0));
343 //lyxerr << "\ndet to: " << ar << "\n";
351 bool isDigitOrSimilar(char c)
353 return ('0' <= c && c <= '9') || c == '.';
357 // returns sequence of digits
359 (MathArray::const_iterator it, MathArray::const_iterator end)
362 for (; it != end && (*it)->asCharInset(); ++it) {
363 if (!isDigitOrSimilar((*it)->getChar()))
365 s += (*it)->getChar();
371 void extractNumbers(MathArray & ar)
373 //lyxerr << "\nNumbers from: " << ar << "\n";
374 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
375 if (!ar[i]->asCharInset())
377 if (!isDigitOrSimilar(ar[i]->asCharInset()->getChar()))
380 string s = digitSequence(ar.begin() + i, ar.end());
382 ar[i].reset(new MathNumberInset(s));
383 ar.erase(i + 1, i + s.size());
385 //lyxerr << "\nNumbers to: " << ar << "\n";
391 // search deliminiters
394 bool testOpenParan(MathInset * p)
396 return testString(p, "(");
400 bool testCloseParan(MathInset * p)
402 return testString(p, ")");
406 MathInset * replaceDelims(const MathArray & ar)
408 return new MathDelimInset("(", ")", ar);
412 // replace '('...')' sequences by a real MathDelimInset
413 void extractDelims(MathArray & ar)
415 //lyxerr << "\nDelims from: " << ar << "\n";
416 replaceNested(ar, testOpenParan, testCloseParan, replaceDelims);
417 //lyxerr << "\nDelims to: " << ar << "\n";
423 // search well-known functions
427 // replace 'f' '(...)' and 'f' '^n' '(...)' sequences by a real MathExFuncInset
428 // assume 'extractDelims' ran before
429 void extractFunctions(MathArray & ar)
431 // we need at least two items...
435 //lyxerr << "\nFunctions from: " << ar << "\n";
436 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
437 MathArray::iterator it = ar.begin() + i;
438 MathArray::iterator jt = it + 1;
442 if ((*it)->asUnknownInset()) {
443 // it certainly is if it is well known...
444 name = (*it)->name();
446 // is this a user defined function?
447 // it it probably not, if it doesn't have a name.
448 if (!extractString((*it).nucleus(), name))
450 // it is not if it has no argument
453 // guess so, if this is followed by
454 // a DelimInset with a single item in the cell
455 MathDelimInset * del = (*jt)->asDelimInset();
456 if (!del || del->cell(0).size() != 1)
458 // fall trough into main branch
461 // do we have an exponent like in
462 // 'sin' '^2' 'x' -> 'sin(x)' '^2'
464 extractScript(exp, jt, ar.end());
466 // create a proper inset as replacement
467 MathExFuncInset * p = new MathExFuncInset(name);
469 // jt points to the "argument". Get hold of this.
470 MathArray::iterator st = extractArgument(p->cell(0), jt, ar.end());
472 // replace the function name by a real function inset
475 // remove the source of the argument from the array
476 ar.erase(it + 1, st);
478 // re-insert exponent
479 ar.insert(i + 1, exp);
480 //lyxerr << "\nFunctions to: " << ar << "\n";
489 bool testSymbol(MathInset * p, string const & name)
491 return p->asSymbolInset() && p->asSymbolInset()->name() == name;
495 bool testIntSymbol(MathInset * p)
497 return testSymbol(p, "int");
501 bool testIntegral(MathInset * p)
506 && p->asScriptInset()->nuc().size()
507 && testIntSymbol(p->asScriptInset()->nuc().back().nucleus()) );
512 bool testIntDiff(MathInset * p)
514 return testString(p, "d");
518 // replace '\int' ['_^'] x 'd''x'(...)' sequences by a real MathExIntInset
519 // assume 'extractDelims' ran before
520 void extractIntegrals(MathArray & ar)
522 // we need at least three items...
526 //lyxerr << "\nIntegrals from: " << ar << "\n";
527 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
528 MathArray::iterator it = ar.begin() + i;
531 MathArray::iterator jt =
532 endNestSearch(it, ar.end(), testIntegral, testIntDiff);
534 // something sensible found?
538 // is this a integral name?
539 if (!testIntegral(it->nucleus()))
542 // core ist part from behind the scripts to the 'd'
543 MathExIntInset * p = new MathExIntInset("int");
545 // handle scripts if available
546 if (!testIntSymbol(it->nucleus())) {
547 p->cell(2) = it->nucleus()->asScriptInset()->down();
548 p->cell(3) = it->nucleus()->asScriptInset()->up();
550 p->cell(0) = MathArray(it + 1, jt);
552 // use the "thing" behind the 'd' as differential
553 MathArray::iterator tt = extractArgument(p->cell(1), jt + 1, ar.end());
556 ar.erase(it + 1, tt);
559 //lyxerr << "\nIntegrals to: " << ar << "\n";
568 bool testEqualSign(MathAtom const & at)
570 return testString(at.nucleus(), "=");
574 bool testSumSymbol(MathInset * p)
576 return testSymbol(p, "sum");
580 bool testSum(MathInset * p)
585 && p->asScriptInset()->nuc().size()
586 && testSumSymbol(p->asScriptInset()->nuc().back().nucleus()) );
590 // replace '\sum' ['_^'] f(x) sequences by a real MathExIntInset
591 // assume 'extractDelims' ran before
592 void extractSums(MathArray & ar)
594 // we need at least two items...
598 //lyxerr << "\nSums from: " << ar << "\n";
599 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
600 MathArray::iterator it = ar.begin() + i;
602 // is this a sum name?
603 if (!testSum(it->nucleus()))
606 // create a proper inset as replacement
607 MathExIntInset * p = new MathExIntInset("sum");
609 // collect lower bound and summation index
610 MathScriptInset * sub = (*it)->asScriptInset();
611 if (sub && sub->hasDown()) {
612 // try to figure out the summation index from the subscript
613 MathArray & ar = sub->down();
614 MathArray::iterator xt =
615 find_if(ar.begin(), ar.end(), &testEqualSign);
616 if (xt != ar.end()) {
617 // we found a '=', use everything in front of that as index,
618 // and everything behind as lower index
619 p->cell(1) = MathArray(ar.begin(), xt);
620 p->cell(2) = MathArray(xt + 1, ar.end());
622 // use everything as summation index, don't use scripts.
627 // collect upper bound
628 if (sub && sub->hasUp())
629 p->cell(3) = sub->up();
631 // use something behind the script as core
632 MathArray::iterator tt = extractArgument(p->cell(0), it + 1, ar.end());
635 ar.erase(it + 1, tt);
638 //lyxerr << "\nSums to: " << ar << "\n";
643 // search differential stuff
646 // tests for 'd' or '\partial'
647 bool testDiffItem(MathAtom const & at)
649 return testString(at.nucleus(), "d");
653 bool testDiffArray(MathArray const & ar)
655 return ar.size() && testDiffItem(ar.front());
659 bool testDiffFrac(MathInset * p)
661 MathFracInset * f = p->asFracInset();
662 return f && testDiffArray(f->cell(0)) && testDiffArray(f->cell(1));
666 // is this something like ^number?
667 bool extractDiffExponent(MathArray::iterator it, int & i)
669 if (!(*it)->asScriptInset())
673 if (!extractString((*it).nucleus(), s))
675 istringstream is(s.c_str());
681 void extractDiff(MathArray & ar)
683 //lyxerr << "\nDiffs from: " << ar << "\n";
684 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
685 MathArray::iterator it = ar.begin() + i;
687 // is this a "differential fraction"?
688 if (!testDiffFrac(it->nucleus()))
691 MathFracInset * f = (*it)->asFracInset();
693 lyxerr << "should not happen\n";
697 // create a proper diff inset
698 MathDiffInset * diff = new MathDiffInset;
700 // collect function, let jt point behind last used item
701 MathArray::iterator jt = it + 1;
703 MathArray & numer = f->cell(0);
704 if (numer.size() > 1 && numer[1]->asScriptInset()) {
705 // this is something like d^n f(x) / d... or d^n / d...
708 if (numer.size() > 2)
709 diff->cell(0) = MathArray(numer.begin() + 2, numer.end());
711 jt = extractArgument(diff->cell(0), jt, ar.end());
713 // simply d f(x) / d... or d/d...
714 if (numer.size() > 1)
715 diff->cell(0) = MathArray(numer.begin() + 1, numer.end());
717 jt = extractArgument(diff->cell(0), jt, ar.end());
720 // collect denominator parts
721 MathArray & denom = f->cell(1);
722 for (MathArray::iterator dt = denom.begin(); dt != denom.end();) {
724 MathArray::iterator et = find_if(dt + 1, denom.end(), &testDiffItem);
727 MathArray::iterator st = et - 1;
728 MathScriptInset * script = (*st)->asScriptInset();
729 if (script && script->hasUp()) {
730 // things like d.../dx^n
732 if (extractNumber(script->up(), mult)) {
733 //lyxerr << "mult: " << mult << endl;
734 for (int i = 0; i < mult; ++i)
735 diff->addDer(MathArray(dt + 1, st));
739 diff->addDer(MathArray(dt + 1, et));
745 ar.erase(it + 1, jt);
748 //lyxerr << "\nDiffs to: " << ar << "\n";
757 bool testRightArrow(MathAtom const & at)
760 testSymbol(at.nucleus(), "to") ||
761 testSymbol(at.nucleus(), "rightarrow");
766 // replace '\lim_{x->x0} f(x)' sequences by a real MathLimInset
767 // assume 'extractDelims' ran before
768 void extractLims(MathArray & ar)
770 // we need at least three items...
774 //lyxerr << "\nLimits from: " << ar << "\n";
775 for (MathArray::size_type i = 0; i + 2 < ar.size(); ++i) {
776 MathArray::iterator it = ar.begin() + i;
778 // is this a limit function?
779 if (!testSymbol(it->nucleus(), "lim"))
782 // the next one must be a subscript (without superscript)
783 MathScriptInset * sub = (*(it + 1))->asScriptInset();
784 if (!sub || !sub->hasDown() || sub->hasUp())
787 // and it must contain a -> symbol
788 MathArray & s = sub->down();
789 MathArray::iterator st = find_if(s.begin(), s.end(), &testRightArrow);
793 // the -> splits the subscript int x and x0
794 MathArray x = MathArray(s.begin(), st);
795 MathArray x0 = MathArray(st + 1, s.end());
797 // use something behind the script as core
799 MathArray::iterator tt = extractArgument(f, it + 2, ar.end());
801 // create a proper inset as replacement
802 MathLimInset * p = new MathLimInset(f, x, x0);
805 ar.erase(it + 1, tt);
808 //lyxerr << "\nLimits to: " << ar << "\n";
816 void extractStructure(MathArray & ar)
818 extractIntegrals(ar);
824 extractFunctions(ar);
833 void write(MathArray const & dat, WriteStream & wi)
837 wi.firstitem() = true;
838 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it) {
840 wi.firstitem() = false;
845 void normalize(MathArray const & ar, NormalStream & os)
847 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
848 (*it)->normalize(os);
852 void octavize(MathArray const & dat, OctaveStream & os)
855 extractStructure(ar);
856 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
861 void maplize(MathArray const & dat, MapleStream & os)
864 extractStructure(ar);
865 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
870 void mathematicize(MathArray const & dat, MathematicaStream & os)
873 extractStructure(ar);
874 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
875 (*it)->mathematicize(os);
879 void mathmlize(MathArray const & dat, MathMLStream & os)
882 extractStructure(ar);
885 else if (ar.size() == 1)
886 os << ar.begin()->nucleus();
889 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
890 (*it)->mathmlize(os);
900 string captureOutput(string const & cmd, string const & data)
902 string outfile = lyx::tempName(string(), "mathextern");
903 string full = "echo '" + data + "' | (" + cmd + ") > " + outfile;
904 lyxerr << "calling: " << full << endl;
906 dummy.startscript(Systemcall::Wait, full);
907 string out = GetFileContents(outfile);
908 lyx::unlink(outfile);
909 lyxerr << "result: '" << out << "'" << endl;
914 MathArray pipeThroughMaple(string const & extra, MathArray const & ar)
916 string header = "readlib(latex):\n";
918 // remove the \\it for variable names
919 //"#`latex/csname_font` := `\\it `:"
921 "`latex/csname_font` := ``:\n";
923 // export matrices in (...) instead of [...]
925 "`latex/latex/matrix` := "
926 "subs(`[`=`(`, `]`=`)`,"
927 "eval(`latex/latex/matrix`)):\n";
929 // replace \\cdots with proper '*'
931 "`latex/latex/*` := "
932 "subs(`\\,`=`\\cdot `,"
933 "eval(`latex/latex/*`)):\n";
935 // remove spurious \\noalign{\\medskip} in matrix output
937 "`latex/latex/matrix`:= "
938 "subs(`\\\\\\\\\\\\noalign{\\\\medskip}` = `\\\\\\\\`,"
939 "eval(`latex/latex/matrix`)):\n";
941 //"#`latex/latex/symbol` "
942 // " := subs((\\'_\\' = \\'`\\_`\\',eval(`latex/latex/symbol`)): ";
944 string trailer = "quit;";
948 string expr = os.str().c_str();
949 lyxerr << "ar: '" << ar << "'\n";
950 lyxerr << "ms: '" << os.str() << "'\n";
952 for (int i = 0; i < 100; ++i) { // at most 100 attempts
953 // try to fix missing '*' the hard way by using mint
955 // ... > echo "1A;" | mint -i 1 -S -s -q
958 // Probably missing an operator such as * p
960 lyxerr << "checking expr: '" << expr << "'\n";
961 string out = captureOutput("mint -i 1 -S -s -q -q", expr + ";");
963 break; // expression syntax is ok
964 istringstream is(out.c_str());
967 if (line.find("on line") != 0)
968 break; // error message not identified
970 string::size_type pos = line.find('^');
971 if (pos == string::npos || pos < 15)
972 break; // caret position not found
973 pos -= 15; // skip the "on line ..." part
974 if (expr[pos] == '*' || (pos > 0 && expr[pos - 1] == '*'))
975 break; // two '*' in a row are definitely bad
976 expr.insert(pos, "*");
979 string full = "latex(" + extra + '(' + expr + "));";
980 string out = captureOutput("maple -q", header + full + trailer);
986 mathed_parse_cell(res, out);
991 MathArray pipeThroughOctave(string const &, MathArray const & ar)
996 string expr = os.str().c_str();
999 lyxerr << "pipe: ar: '" << ar << "'\n";
1000 lyxerr << "pipe: expr: '" << expr << "'\n";
1002 for (int i = 0; i < 100; ++i) { // at most 100 attempts
1004 // try to fix missing '*' the hard way
1006 // >>> ([[1 2 3 ];[2 3 1 ];[3 1 2 ]])([[1 2 3 ];[2 3 1 ];[3 1 2 ]])
1009 lyxerr << "checking expr: '" << expr << "'\n";
1010 out = captureOutput("octave -q 2>&1", expr);
1011 lyxerr << "checking out: '" << out << "'\n";
1013 // leave loop if expression syntax is probably ok
1014 if (out.find("parse error:") == string::npos)
1017 // search line with single caret
1018 istringstream is(out.c_str());
1022 lyxerr << "skipping line: '" << line << "'\n";
1023 if (line.find(">>> ") != string::npos)
1027 // found line with error, next line is the one with caret
1029 string::size_type pos = line.find('^');
1030 lyxerr << "caret line: '" << line << "'\n";
1031 lyxerr << "found caret at pos: '" << pos << "'\n";
1032 if (pos == string::npos || pos < 4)
1033 break; // caret position not found
1034 pos -= 4; // skip the ">>> " part
1035 if (expr[pos] == '*')
1036 break; // two '*' in a row are definitely bad
1037 expr.insert(pos, "*");
1044 out = out.substr(6);
1046 // parse output as matrix or single number
1047 MathAtom at(new MathArrayInset("array", out));
1048 MathArrayInset const * mat = at.nucleus()->asArrayInset();
1050 if (mat->ncols() == 1 && mat->nrows() == 1)
1051 res.append(mat->cell(0));
1053 res.push_back(MathAtom(new MathDelimInset("(", ")")));
1054 res.back()->cell(0).push_back(at);
1062 MathArray pipeThroughExtern(string const & lang, string const & extra,
1063 MathArray const & ar)
1065 if (lang == "octave")
1066 return pipeThroughOctave(extra, ar);
1068 if (lang == "maple")
1069 return pipeThroughMaple(extra, ar);
1071 // create normalized expression
1073 NormalStream ns(os);
1074 os << "[" << extra << ' ';
1077 string data = os.str().c_str();
1079 // search external script
1080 string file = LibFileSearch("mathed", "extern_" + lang);
1082 lyxerr << "converter to '" << lang << "' not found\n";
1086 // run external sript
1087 string out = captureOutput(file, data);
1089 mathed_parse_cell(res, out);