1 // This file contains most of the magic that extracts "context
2 // information" from the unstructered layout-oriented stuff in an
7 #include "math_amsarrayinset.h"
8 #include "math_arrayinset.h"
9 #include "math_charinset.h"
10 #include "math_deliminset.h"
11 #include "math_diffinset.h"
12 #include "math_exfuncinset.h"
13 #include "math_exintinset.h"
14 #include "math_fracinset.h"
15 #include "math_liminset.h"
16 #include "math_matrixinset.h"
17 #include "math_mathmlstream.h"
18 #include "math_numberinset.h"
19 #include "math_scriptinset.h"
20 #include "math_stringinset.h"
21 #include "math_symbolinset.h"
22 #include "math_unknowninset.h"
23 #include "math_parser.h"
26 #include "support/lyxlib.h"
27 #include "support/systemcall.h"
28 #include "support/filetools.h"
33 using std::istringstream;
38 ostream & operator<<(ostream & os, MathArray const & ar)
46 // define a function for tests
47 typedef bool TestItemFunc(MathAtom const &);
49 // define a function for replacing subexpressions
50 typedef MathAtom ReplaceArgumentFunc(const MathArray & ar);
54 // try to extract a super/subscript
55 // modify iterator position to point behind the thing
56 bool extractScript(MathArray & ar,
57 MathArray::iterator & pos, MathArray::iterator last)
59 // nothing to get here
63 // is this a scriptinset?
64 if (!(*pos)->asScriptInset())
67 // it is a scriptinset, use it.
74 // try to extract an "argument" to some function.
75 // returns position behind the argument
76 MathArray::iterator extractArgument(MathArray & ar,
77 MathArray::iterator pos, MathArray::iterator last, string const & = "")
79 // nothing to get here
83 // something deliminited _is_ an argument
84 if ((*pos)->asDelimInset()) {
89 // always take the first thing, no matter what it is
92 // go ahead if possible
97 // if the next item is a subscript, it most certainly belongs to the
99 extractScript(ar, pos, last);
103 // but it might be more than that.
104 // FIXME: not implemented
105 //for (MathArray::iterator it = pos + 1; it != last; ++it) {
106 // // always take the first thing, no matter
108 // ar.push_back(*it);
116 // returns sequence of char with same code starting at it up to end
117 // it might be less, though...
119 (MathArray::const_iterator it, MathArray::const_iterator end)
122 for (; it != end && (*it)->asCharInset(); ++it)
123 s += (*it)->getChar();
128 void extractStrings(MathArray & ar)
130 //lyxerr << "\nStrings from: " << ar << "\n";
131 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
132 if (!ar[i]->asCharInset())
134 string s = charSequence(ar.begin() + i, ar.end());
135 ar[i] = MathAtom(new MathStringInset(s));
136 ar.erase(i + 1, i + s.size());
138 //lyxerr << "\nStrings to: " << ar << "\n";
142 void extractMatrices(MathArray & ar)
144 //lyxerr << "\nMatrices from: " << ar << "\n";
145 // first pass for explicitly delimited stuff
146 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
147 if (!ar[i]->asDelimInset())
149 MathArray const & arr = ar[i]->asDelimInset()->cell(0);
152 if (!arr.front()->asGridInset())
154 ar[i] = MathAtom(new MathMatrixInset(*(arr.front()->asGridInset())));
157 // second pass for AMS "pmatrix" etc
158 for (MathArray::size_type i = 0; i < ar.size(); ++i)
159 if (ar[i]->asAMSArrayInset())
160 ar[i] = MathAtom(new MathMatrixInset(*(ar[i]->asGridInset())));
161 //lyxerr << "\nMatrices to: " << ar << "\n";
165 // convert this inset somehow to a string
166 bool extractString(MathAtom const & at, string & str)
169 str = string(1, at->getChar());
172 if (at->asStringInset()) {
173 str = at->asStringInset()->str();
180 // convert this inset somehow to a number
181 bool extractNumber(MathArray const & ar, int & i)
183 istringstream is(charSequence(ar.begin(), ar.end()).c_str());
189 bool extractNumber(MathArray const & ar, double & d)
191 istringstream is(charSequence(ar.begin(), ar.end()).c_str());
197 bool testString(MathAtom const & at, const string & str)
200 return extractString(at, s) && str == s;
204 // search end of nested sequence
205 MathArray::iterator endNestSearch(
206 MathArray::iterator it,
207 MathArray::iterator last,
208 TestItemFunc testOpen,
209 TestItemFunc testClose
212 for (int level = 0; it != last; ++it) {
224 // replace nested sequences by a real Insets
227 TestItemFunc testOpen,
228 TestItemFunc testClose,
229 ReplaceArgumentFunc replaceArg
232 // use indices rather than iterators for the loop because we are going
233 // to modify the array.
234 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
235 // check whether this is the begin of the sequence
236 if (!testOpen(ar[i]))
239 // search end of sequence
240 MathArray::iterator it = ar.begin() + i;
241 MathArray::iterator jt = endNestSearch(it, ar.end(), testOpen, testClose);
245 // replace the original stuff by the new inset
246 ar.erase(it + 1, jt + 1);
248 // create a proper inset as replacement
249 ar[i] = replaceArg(MathArray(it + 1, jt));
256 // split scripts into seperate super- and subscript insets. sub goes in
260 void splitScripts(MathArray & ar)
262 //lyxerr << "\nScripts from: " << ar << "\n";
263 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
264 // is this script inset?
265 if (!ar[i]->asScriptInset())
268 // no problem if we don't have both...
269 if (!ar[i]->asScriptInset()->hasUp())
271 if (!ar[i]->asScriptInset()->hasDown())
274 // create extra script inset and move superscript over
275 MathScriptInset * p = ar[i].nucleus()->asScriptInset();
276 MathScriptInset * q = new MathScriptInset(true);
277 std::swap(q->up(), p->up());
278 p->removeScript(true);
280 // insert new inset behind
282 ar.insert(i, MathAtom(q));
284 //lyxerr << "\nScripts to: " << ar << "\n";
292 void extractExps(MathArray & ar)
294 //lyxerr << "\nExps from: " << ar << "\n";
295 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
297 if (ar[i]->getChar() != 'e')
300 // we need an exponent but no subscript
301 MathScriptInset const * sup = ar[i + 1]->asScriptInset();
302 if (!sup || sup->hasDown())
305 // create a proper exp-inset as replacement
306 ar[i] = MathAtom(new MathExFuncInset("exp", sup->cell(1)));
309 //lyxerr << "\nExps to: " << ar << "\n";
314 // extract det(...) from |matrix|
316 void extractDets(MathArray & ar)
318 //lyxerr << "\ndet from: " << ar << "\n";
319 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it) {
320 MathDelimInset const * del = (*it)->asDelimInset();
325 *it = MathAtom(new MathExFuncInset("det", del->cell(0)));
327 //lyxerr << "\ndet to: " << ar << "\n";
335 bool isDigitOrSimilar(char c)
337 return ('0' <= c && c <= '9') || c == '.';
341 // returns sequence of digits
343 (MathArray::const_iterator it, MathArray::const_iterator end)
346 for (; it != end && (*it)->asCharInset(); ++it) {
347 if (!isDigitOrSimilar((*it)->getChar()))
349 s += (*it)->getChar();
355 void extractNumbers(MathArray & ar)
357 //lyxerr << "\nNumbers from: " << ar << "\n";
358 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
359 if (!ar[i]->asCharInset())
361 if (!isDigitOrSimilar(ar[i]->asCharInset()->getChar()))
364 string s = digitSequence(ar.begin() + i, ar.end());
366 ar[i] = MathAtom(new MathNumberInset(s));
367 ar.erase(i + 1, i + s.size());
369 //lyxerr << "\nNumbers to: " << ar << "\n";
375 // search deliminiters
378 bool testOpenParan(MathAtom const & at)
380 return testString(at, "(");
384 bool testCloseParan(MathAtom const & at)
386 return testString(at, ")");
390 MathAtom replaceDelims(const MathArray & ar)
392 return MathAtom(new MathDelimInset("(", ")", ar));
396 // replace '('...')' sequences by a real MathDelimInset
397 void extractDelims(MathArray & ar)
399 //lyxerr << "\nDelims from: " << ar << "\n";
400 replaceNested(ar, testOpenParan, testCloseParan, replaceDelims);
401 //lyxerr << "\nDelims to: " << ar << "\n";
407 // search well-known functions
411 // replace 'f' '(...)' and 'f' '^n' '(...)' sequences by a real MathExFuncInset
412 // assume 'extractDelims' ran before
413 void extractFunctions(MathArray & ar)
415 // we need at least two items...
419 //lyxerr << "\nFunctions from: " << ar << "\n";
420 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
421 MathArray::iterator it = ar.begin() + i;
422 MathArray::iterator jt = it + 1;
426 if ((*it)->asUnknownInset()) {
427 // it certainly is if it is well known...
428 name = (*it)->name();
430 // is this a user defined function?
431 // it it probably not, if it doesn't have a name.
432 if (!extractString(*it, name))
434 // it is not if it has no argument
437 // guess so, if this is followed by
438 // a DelimInset with a single item in the cell
439 MathDelimInset const * del = (*jt)->asDelimInset();
440 if (!del || del->cell(0).size() != 1)
442 // fall trough into main branch
445 // do we have an exponent like in
446 // 'sin' '^2' 'x' -> 'sin(x)' '^2'
448 extractScript(exp, jt, ar.end());
450 // create a proper inset as replacement
451 MathExFuncInset * p = new MathExFuncInset(name);
453 // jt points to the "argument". Get hold of this.
454 MathArray::iterator st = extractArgument(p->cell(0), jt, ar.end());
456 // replace the function name by a real function inset
459 // remove the source of the argument from the array
460 ar.erase(it + 1, st);
462 // re-insert exponent
463 ar.insert(i + 1, exp);
464 //lyxerr << "\nFunctions to: " << ar << "\n";
473 bool testSymbol(MathAtom const & at, string const & name)
475 return at->asSymbolInset() && at->asSymbolInset()->name() == name;
479 bool testIntSymbol(MathAtom const & at)
481 return testSymbol(at, "int");
485 bool testIntegral(MathAtom const & at)
489 ( at->asScriptInset()
490 && at->asScriptInset()->nuc().size()
491 && testIntSymbol(at->asScriptInset()->nuc().back()) );
496 bool testIntDiff(MathAtom const & at)
498 return testString(at, "d");
502 // replace '\int' ['_^'] x 'd''x'(...)' sequences by a real MathExIntInset
503 // assume 'extractDelims' ran before
504 void extractIntegrals(MathArray & ar)
506 // we need at least three items...
510 //lyxerr << "\nIntegrals from: " << ar << "\n";
511 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
512 MathArray::iterator it = ar.begin() + i;
515 MathArray::iterator jt =
516 endNestSearch(it, ar.end(), testIntegral, testIntDiff);
518 // something sensible found?
522 // is this a integral name?
523 if (!testIntegral(*it))
526 // core ist part from behind the scripts to the 'd'
527 MathExIntInset * p = new MathExIntInset("int");
529 // handle scripts if available
530 if (!testIntSymbol(*it)) {
531 p->cell(2) = (*it)->asScriptInset()->down();
532 p->cell(3) = (*it)->asScriptInset()->up();
534 p->cell(0) = MathArray(it + 1, jt);
536 // use the "thing" behind the 'd' as differential
537 MathArray::iterator tt = extractArgument(p->cell(1), jt + 1, ar.end());
540 ar.erase(it + 1, tt);
543 //lyxerr << "\nIntegrals to: " << ar << "\n";
552 bool testEqualSign(MathAtom const & at)
554 return testString(at, "=");
558 bool testSumSymbol(MathAtom const & p)
560 return testSymbol(p, "sum");
564 bool testSum(MathAtom const & at)
568 ( at->asScriptInset()
569 && at->asScriptInset()->nuc().size()
570 && testSumSymbol(at->asScriptInset()->nuc().back()) );
574 // replace '\sum' ['_^'] f(x) sequences by a real MathExIntInset
575 // assume 'extractDelims' ran before
576 void extractSums(MathArray & ar)
578 // we need at least two items...
582 //lyxerr << "\nSums from: " << ar << "\n";
583 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
584 MathArray::iterator it = ar.begin() + i;
586 // is this a sum name?
590 // create a proper inset as replacement
591 MathExIntInset * p = new MathExIntInset("sum");
593 // collect lower bound and summation index
594 MathScriptInset const * sub = ar[i]->asScriptInset();
595 if (sub && sub->hasDown()) {
596 // try to figure out the summation index from the subscript
597 MathArray const & ar = sub->down();
598 MathArray::const_iterator xt =
599 find_if(ar.begin(), ar.end(), &testEqualSign);
600 if (xt != ar.end()) {
601 // we found a '=', use everything in front of that as index,
602 // and everything behind as lower index
603 p->cell(1) = MathArray(ar.begin(), xt);
604 p->cell(2) = MathArray(xt + 1, ar.end());
606 // use everything as summation index, don't use scripts.
611 // collect upper bound
612 if (sub && sub->hasUp())
613 p->cell(3) = sub->up();
615 // use something behind the script as core
616 MathArray::iterator tt = extractArgument(p->cell(0), it + 1, ar.end());
619 ar.erase(it + 1, tt);
622 //lyxerr << "\nSums to: " << ar << "\n";
627 // search differential stuff
630 // tests for 'd' or '\partial'
631 bool testDiffItem(MathAtom const & at)
633 return testString(at, "d");
637 bool testDiffArray(MathArray const & ar)
639 return ar.size() && testDiffItem(ar.front());
643 bool testDiffFrac(MathAtom const & at)
647 && testDiffArray(at->asFracInset()->cell(0))
648 && testDiffArray(at->asFracInset()->cell(1));
652 void extractDiff(MathArray & ar)
654 //lyxerr << "\nDiffs from: " << ar << "\n";
655 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
656 MathArray::iterator it = ar.begin() + i;
658 // is this a "differential fraction"?
659 if (!testDiffFrac(*it))
662 MathFracInset const * f = (*it)->asFracInset();
664 lyxerr << "should not happen\n";
668 // create a proper diff inset
669 MathDiffInset * diff = new MathDiffInset;
671 // collect function, let jt point behind last used item
672 MathArray::iterator jt = it + 1;
674 MathArray const & numer = f->cell(0);
675 if (numer.size() > 1 && numer[1]->asScriptInset()) {
676 // this is something like d^n f(x) / d... or d^n / d...
679 if (numer.size() > 2)
680 diff->cell(0) = MathArray(numer.begin() + 2, numer.end());
682 jt = extractArgument(diff->cell(0), jt, ar.end());
684 // simply d f(x) / d... or d/d...
685 if (numer.size() > 1)
686 diff->cell(0) = MathArray(numer.begin() + 1, numer.end());
688 jt = extractArgument(diff->cell(0), jt, ar.end());
691 // collect denominator parts
692 MathArray const & denom = f->cell(1);
693 for (MathArray::const_iterator dt = denom.begin(); dt != denom.end();) {
695 MathArray::const_iterator et
696 = find_if(dt + 1, denom.end(), &testDiffItem);
699 MathArray::const_iterator st = et - 1;
700 MathScriptInset const * script = (*st)->asScriptInset();
701 if (script && script->hasUp()) {
702 // things like d.../dx^n
704 if (extractNumber(script->up(), mult)) {
705 //lyxerr << "mult: " << mult << endl;
706 for (int i = 0; i < mult; ++i)
707 diff->addDer(MathArray(dt + 1, st));
711 diff->addDer(MathArray(dt + 1, et));
717 ar.erase(it + 1, jt);
718 *it = MathAtom(diff);
720 //lyxerr << "\nDiffs to: " << ar << "\n";
729 bool testRightArrow(MathAtom const & at)
731 return testSymbol(at, "to") || testSymbol(at, "rightarrow");
736 // replace '\lim_{x->x0} f(x)' sequences by a real MathLimInset
737 // assume 'extractDelims' ran before
738 void extractLims(MathArray & ar)
740 // we need at least three items...
744 //lyxerr << "\nLimits from: " << ar << "\n";
745 for (MathArray::size_type i = 0; i + 2 < ar.size(); ++i) {
746 MathArray::iterator it = ar.begin() + i;
748 // is this a limit function?
749 if (!testSymbol(*it, "lim"))
752 // the next one must be a subscript (without superscript)
753 MathScriptInset const * sub = (*(it + 1))->asScriptInset();
754 if (!sub || !sub->hasDown() || sub->hasUp())
757 // and it must contain a -> symbol
758 MathArray const & s = sub->down();
759 MathArray::const_iterator st = find_if(s.begin(), s.end(), &testRightArrow);
763 // the -> splits the subscript int x and x0
764 MathArray x = MathArray(s.begin(), st);
765 MathArray x0 = MathArray(st + 1, s.end());
767 // use something behind the script as core
769 MathArray::iterator tt = extractArgument(f, it + 2, ar.end());
772 ar.erase(it + 1, tt);
774 // create a proper inset as replacement
775 *it = MathAtom(new MathLimInset(f, x, x0));
777 //lyxerr << "\nLimits to: " << ar << "\n";
785 void extractStructure(MathArray & ar)
787 //lyxerr << "\nStructure from: " << ar << "\n";
788 extractIntegrals(ar);
794 extractFunctions(ar);
800 //lyxerr << "\nStructure to: " << ar << "\n";
804 void write(MathArray const & dat, WriteStream & wi)
808 wi.firstitem() = true;
809 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it) {
811 wi.firstitem() = false;
816 void normalize(MathArray const & ar, NormalStream & os)
818 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
819 (*it)->normalize(os);
823 void octavize(MathArray const & dat, OctaveStream & os)
826 extractStructure(ar);
827 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
832 void maplize(MathArray const & dat, MapleStream & os)
835 extractStructure(ar);
836 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
841 void mathematicize(MathArray const & dat, MathematicaStream & os)
844 extractStructure(ar);
845 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
846 (*it)->mathematicize(os);
850 void mathmlize(MathArray const & dat, MathMLStream & os)
853 extractStructure(ar);
856 else if (ar.size() == 1)
860 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
861 (*it)->mathmlize(os);
871 string captureOutput(string const & cmd, string const & data)
873 string outfile = lyx::tempName(string(), "mathextern");
874 string full = "echo '" + data + "' | (" + cmd + ") > " + outfile;
875 lyxerr << "calling: " << full << endl;
877 dummy.startscript(Systemcall::Wait, full);
878 string out = GetFileContents(outfile);
879 lyx::unlink(outfile);
880 lyxerr << "result: '" << out << "'" << endl;
885 MathArray pipeThroughMaple(string const & extra, MathArray const & ar)
887 string header = "readlib(latex):\n";
889 // remove the \\it for variable names
890 //"#`latex/csname_font` := `\\it `:"
892 "`latex/csname_font` := ``:\n";
894 // export matrices in (...) instead of [...]
896 "`latex/latex/matrix` := "
897 "subs(`[`=`(`, `]`=`)`,"
898 "eval(`latex/latex/matrix`)):\n";
900 // replace \\cdots with proper '*'
902 "`latex/latex/*` := "
903 "subs(`\\,`=`\\cdot `,"
904 "eval(`latex/latex/*`)):\n";
906 // remove spurious \\noalign{\\medskip} in matrix output
908 "`latex/latex/matrix`:= "
909 "subs(`\\\\\\\\\\\\noalign{\\\\medskip}` = `\\\\\\\\`,"
910 "eval(`latex/latex/matrix`)):\n";
912 //"#`latex/latex/symbol` "
913 // " := subs((\\'_\\' = \\'`\\_`\\',eval(`latex/latex/symbol`)): ";
915 string trailer = "quit;";
919 string expr = os.str().c_str();
920 lyxerr << "ar: '" << ar << "'\n";
921 lyxerr << "ms: '" << os.str() << "'\n";
923 for (int i = 0; i < 100; ++i) { // at most 100 attempts
924 // try to fix missing '*' the hard way by using mint
926 // ... > echo "1A;" | mint -i 1 -S -s -q
929 // Probably missing an operator such as * p
931 lyxerr << "checking expr: '" << expr << "'\n";
932 string out = captureOutput("mint -i 1 -S -s -q -q", expr + ";");
934 break; // expression syntax is ok
935 istringstream is(out.c_str());
938 if (line.find("on line") != 0)
939 break; // error message not identified
941 string::size_type pos = line.find('^');
942 if (pos == string::npos || pos < 15)
943 break; // caret position not found
944 pos -= 15; // skip the "on line ..." part
945 if (expr[pos] == '*' || (pos > 0 && expr[pos - 1] == '*'))
946 break; // two '*' in a row are definitely bad
947 expr.insert(pos, "*");
950 string full = "latex(" + extra + '(' + expr + "));";
951 string out = captureOutput("maple -q", header + full + trailer);
957 mathed_parse_cell(res, out);
962 MathArray pipeThroughOctave(string const &, MathArray const & ar)
967 string expr = os.str().c_str();
970 lyxerr << "pipe: ar: '" << ar << "'\n";
971 lyxerr << "pipe: expr: '" << expr << "'\n";
973 for (int i = 0; i < 100; ++i) { // at most 100 attempts
975 // try to fix missing '*' the hard way
977 // >>> ([[1 2 3 ];[2 3 1 ];[3 1 2 ]])([[1 2 3 ];[2 3 1 ];[3 1 2 ]])
980 lyxerr << "checking expr: '" << expr << "'\n";
981 out = captureOutput("octave -q 2>&1", expr);
982 lyxerr << "checking out: '" << out << "'\n";
984 // leave loop if expression syntax is probably ok
985 if (out.find("parse error:") == string::npos)
988 // search line with single caret
989 istringstream is(out.c_str());
993 lyxerr << "skipping line: '" << line << "'\n";
994 if (line.find(">>> ") != string::npos)
998 // found line with error, next line is the one with caret
1000 string::size_type pos = line.find('^');
1001 lyxerr << "caret line: '" << line << "'\n";
1002 lyxerr << "found caret at pos: '" << pos << "'\n";
1003 if (pos == string::npos || pos < 4)
1004 break; // caret position not found
1005 pos -= 4; // skip the ">>> " part
1006 if (expr[pos] == '*')
1007 break; // two '*' in a row are definitely bad
1008 expr.insert(pos, "*");
1015 out = out.substr(6);
1017 // parse output as matrix or single number
1018 MathAtom at(new MathArrayInset("array", out));
1019 MathArrayInset const * mat = at->asArrayInset();
1021 if (mat->ncols() == 1 && mat->nrows() == 1)
1022 res.append(mat->cell(0));
1024 res.push_back(MathAtom(new MathDelimInset("(", ")")));
1025 res.back().nucleus()->cell(0).push_back(at);
1033 MathArray pipeThroughExtern(string const & lang, string const & extra,
1034 MathArray const & ar)
1036 if (lang == "octave")
1037 return pipeThroughOctave(extra, ar);
1039 if (lang == "maple")
1040 return pipeThroughMaple(extra, ar);
1042 // create normalized expression
1044 NormalStream ns(os);
1045 os << "[" << extra << ' ';
1048 string data = os.str().c_str();
1050 // search external script
1051 string file = LibFileSearch("mathed", "extern_" + lang);
1053 lyxerr << "converter to '" << lang << "' not found\n";
1057 // run external sript
1058 string out = captureOutput(file, data);
1060 mathed_parse_cell(res, out);