6 // This file contains most of the magic that extracts "context
7 // information" from the unstructered layout-oriented stuff in an
12 #include "math_amsarrayinset.h"
13 #include "math_arrayinset.h"
14 #include "math_charinset.h"
15 #include "math_deliminset.h"
16 #include "math_diffinset.h"
17 #include "math_exfuncinset.h"
18 #include "math_exintinset.h"
19 #include "math_fracinset.h"
20 #include "math_liminset.h"
21 #include "math_matrixinset.h"
22 #include "math_mathmlstream.h"
23 #include "math_numberinset.h"
24 #include "math_scriptinset.h"
25 #include "math_stringinset.h"
26 #include "math_symbolinset.h"
27 #include "math_unknowninset.h"
28 #include "math_parser.h"
31 #include "support/lyxlib.h"
32 #include "support/systemcall.h"
33 #include "support/filetools.h"
38 using std::istringstream;
43 ostream & operator<<(ostream & os, MathArray const & ar)
51 // define a function for tests
52 typedef bool TestItemFunc(MathAtom const &);
54 // define a function for replacing subexpressions
55 typedef MathAtom ReplaceArgumentFunc(const MathArray & ar);
59 // try to extract a super/subscript
60 // modify iterator position to point behind the thing
61 bool extractScript(MathArray & ar,
62 MathArray::iterator & pos, MathArray::iterator last)
64 // nothing to get here
68 // is this a scriptinset?
69 if (!(*pos)->asScriptInset())
72 // it is a scriptinset, use it.
79 // try to extract an "argument" to some function.
80 // returns position behind the argument
81 MathArray::iterator extractArgument(MathArray & ar,
82 MathArray::iterator pos, MathArray::iterator last, string const & = "")
84 // nothing to get here
88 // something deliminited _is_ an argument
89 if ((*pos)->asDelimInset()) {
94 // always take the first thing, no matter what it is
97 // go ahead if possible
102 // if the next item is a subscript, it most certainly belongs to the
104 extractScript(ar, pos, last);
108 // but it might be more than that.
109 // FIXME: not implemented
110 //for (MathArray::iterator it = pos + 1; it != last; ++it) {
111 // // always take the first thing, no matter
113 // ar.push_back(*it);
121 // returns sequence of char with same code starting at it up to end
122 // it might be less, though...
124 (MathArray::const_iterator it, MathArray::const_iterator end)
127 for (; it != end && (*it)->asCharInset(); ++it)
128 s += (*it)->getChar();
133 void extractStrings(MathArray & ar)
135 //lyxerr << "\nStrings from: " << ar << "\n";
136 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
137 if (!ar[i]->asCharInset())
139 string s = charSequence(ar.begin() + i, ar.end());
140 ar[i] = MathAtom(new MathStringInset(s));
141 ar.erase(i + 1, i + s.size());
143 //lyxerr << "\nStrings to: " << ar << "\n";
147 void extractMatrices(MathArray & ar)
149 //lyxerr << "\nMatrices from: " << ar << "\n";
150 // first pass for explicitly delimited stuff
151 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
152 if (!ar[i]->asDelimInset())
154 MathArray const & arr = ar[i]->asDelimInset()->cell(0);
157 if (!arr.front()->asGridInset())
159 ar[i] = MathAtom(new MathMatrixInset(*(arr.front()->asGridInset())));
162 // second pass for AMS "pmatrix" etc
163 for (MathArray::size_type i = 0; i < ar.size(); ++i)
164 if (ar[i]->asAMSArrayInset())
165 ar[i] = MathAtom(new MathMatrixInset(*(ar[i]->asGridInset())));
166 //lyxerr << "\nMatrices to: " << ar << "\n";
170 // convert this inset somehow to a string
171 bool extractString(MathAtom const & at, string & str)
174 str = string(1, at->getChar());
177 if (at->asStringInset()) {
178 str = at->asStringInset()->str();
185 // convert this inset somehow to a number
186 bool extractNumber(MathArray const & ar, int & i)
188 istringstream is(charSequence(ar.begin(), ar.end()).c_str());
194 bool extractNumber(MathArray const & ar, double & d)
196 istringstream is(charSequence(ar.begin(), ar.end()).c_str());
202 bool testString(MathAtom const & at, const string & str)
205 return extractString(at, s) && str == s;
209 // search end of nested sequence
210 MathArray::iterator endNestSearch(
211 MathArray::iterator it,
212 MathArray::iterator last,
213 TestItemFunc testOpen,
214 TestItemFunc testClose
217 for (int level = 0; it != last; ++it) {
229 // replace nested sequences by a real Insets
232 TestItemFunc testOpen,
233 TestItemFunc testClose,
234 ReplaceArgumentFunc replaceArg
237 // use indices rather than iterators for the loop because we are going
238 // to modify the array.
239 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
240 // check whether this is the begin of the sequence
241 if (!testOpen(ar[i]))
244 // search end of sequence
245 MathArray::iterator it = ar.begin() + i;
246 MathArray::iterator jt = endNestSearch(it, ar.end(), testOpen, testClose);
250 // replace the original stuff by the new inset
251 ar.erase(it + 1, jt + 1);
253 // create a proper inset as replacement
254 ar[i] = replaceArg(MathArray(it + 1, jt));
261 // split scripts into seperate super- and subscript insets. sub goes in
265 void splitScripts(MathArray & ar)
267 //lyxerr << "\nScripts from: " << ar << "\n";
268 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
269 // is this script inset?
270 if (!ar[i]->asScriptInset())
273 // no problem if we don't have both...
274 if (!ar[i]->asScriptInset()->hasUp())
276 if (!ar[i]->asScriptInset()->hasDown())
279 // create extra script inset and move superscript over
280 MathScriptInset * p = ar[i].nucleus()->asScriptInset();
281 MathScriptInset * q = new MathScriptInset(true);
282 std::swap(q->up(), p->up());
283 p->removeScript(true);
285 // insert new inset behind
287 ar.insert(i, MathAtom(q));
289 //lyxerr << "\nScripts to: " << ar << "\n";
297 void extractExps(MathArray & ar)
299 //lyxerr << "\nExps from: " << ar << "\n";
300 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
302 if (ar[i]->getChar() != 'e')
305 // we need an exponent but no subscript
306 MathScriptInset const * sup = ar[i + 1]->asScriptInset();
307 if (!sup || sup->hasDown())
310 // create a proper exp-inset as replacement
311 ar[i] = MathAtom(new MathExFuncInset("exp", sup->cell(1)));
314 //lyxerr << "\nExps to: " << ar << "\n";
319 // extract det(...) from |matrix|
321 void extractDets(MathArray & ar)
323 //lyxerr << "\ndet from: " << ar << "\n";
324 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it) {
325 MathDelimInset const * del = (*it)->asDelimInset();
330 *it = MathAtom(new MathExFuncInset("det", del->cell(0)));
332 //lyxerr << "\ndet to: " << ar << "\n";
340 bool isDigitOrSimilar(char c)
342 return ('0' <= c && c <= '9') || c == '.';
346 // returns sequence of digits
348 (MathArray::const_iterator it, MathArray::const_iterator end)
351 for (; it != end && (*it)->asCharInset(); ++it) {
352 if (!isDigitOrSimilar((*it)->getChar()))
354 s += (*it)->getChar();
360 void extractNumbers(MathArray & ar)
362 //lyxerr << "\nNumbers from: " << ar << "\n";
363 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
364 if (!ar[i]->asCharInset())
366 if (!isDigitOrSimilar(ar[i]->asCharInset()->getChar()))
369 string s = digitSequence(ar.begin() + i, ar.end());
371 ar[i] = MathAtom(new MathNumberInset(s));
372 ar.erase(i + 1, i + s.size());
374 //lyxerr << "\nNumbers to: " << ar << "\n";
380 // search deliminiters
383 bool testOpenParan(MathAtom const & at)
385 return testString(at, "(");
389 bool testCloseParan(MathAtom const & at)
391 return testString(at, ")");
395 MathAtom replaceDelims(const MathArray & ar)
397 return MathAtom(new MathDelimInset("(", ")", ar));
401 // replace '('...')' sequences by a real MathDelimInset
402 void extractDelims(MathArray & ar)
404 //lyxerr << "\nDelims from: " << ar << "\n";
405 replaceNested(ar, testOpenParan, testCloseParan, replaceDelims);
406 //lyxerr << "\nDelims to: " << ar << "\n";
412 // search well-known functions
416 // replace 'f' '(...)' and 'f' '^n' '(...)' sequences by a real MathExFuncInset
417 // assume 'extractDelims' ran before
418 void extractFunctions(MathArray & ar)
420 // we need at least two items...
424 //lyxerr << "\nFunctions from: " << ar << "\n";
425 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
426 MathArray::iterator it = ar.begin() + i;
427 MathArray::iterator jt = it + 1;
431 if ((*it)->asUnknownInset()) {
432 // it certainly is if it is well known...
433 name = (*it)->name();
435 // is this a user defined function?
436 // it it probably not, if it doesn't have a name.
437 if (!extractString(*it, name))
439 // it is not if it has no argument
442 // guess so, if this is followed by
443 // a DelimInset with a single item in the cell
444 MathDelimInset const * del = (*jt)->asDelimInset();
445 if (!del || del->cell(0).size() != 1)
447 // fall trough into main branch
450 // do we have an exponent like in
451 // 'sin' '^2' 'x' -> 'sin(x)' '^2'
453 extractScript(exp, jt, ar.end());
455 // create a proper inset as replacement
456 MathExFuncInset * p = new MathExFuncInset(name);
458 // jt points to the "argument". Get hold of this.
459 MathArray::iterator st = extractArgument(p->cell(0), jt, ar.end());
461 // replace the function name by a real function inset
464 // remove the source of the argument from the array
465 ar.erase(it + 1, st);
467 // re-insert exponent
468 ar.insert(i + 1, exp);
469 //lyxerr << "\nFunctions to: " << ar << "\n";
478 bool testSymbol(MathAtom const & at, string const & name)
480 return at->asSymbolInset() && at->asSymbolInset()->name() == name;
484 bool testIntSymbol(MathAtom const & at)
486 return testSymbol(at, "int");
490 bool testIntegral(MathAtom const & at)
494 ( at->asScriptInset()
495 && at->asScriptInset()->nuc().size()
496 && testIntSymbol(at->asScriptInset()->nuc().back()) );
501 bool testIntDiff(MathAtom const & at)
503 return testString(at, "d");
507 // replace '\int' ['_^'] x 'd''x'(...)' sequences by a real MathExIntInset
508 // assume 'extractDelims' ran before
509 void extractIntegrals(MathArray & ar)
511 // we need at least three items...
515 //lyxerr << "\nIntegrals from: " << ar << "\n";
516 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
517 MathArray::iterator it = ar.begin() + i;
520 MathArray::iterator jt =
521 endNestSearch(it, ar.end(), testIntegral, testIntDiff);
523 // something sensible found?
527 // is this a integral name?
528 if (!testIntegral(*it))
531 // core ist part from behind the scripts to the 'd'
532 MathExIntInset * p = new MathExIntInset("int");
534 // handle scripts if available
535 if (!testIntSymbol(*it)) {
536 p->cell(2) = (*it)->asScriptInset()->down();
537 p->cell(3) = (*it)->asScriptInset()->up();
539 p->cell(0) = MathArray(it + 1, jt);
541 // use the "thing" behind the 'd' as differential
542 MathArray::iterator tt = extractArgument(p->cell(1), jt + 1, ar.end());
545 ar.erase(it + 1, tt);
548 //lyxerr << "\nIntegrals to: " << ar << "\n";
557 bool testEqualSign(MathAtom const & at)
559 return testString(at, "=");
563 bool testSumSymbol(MathAtom const & p)
565 return testSymbol(p, "sum");
569 bool testSum(MathAtom const & at)
573 ( at->asScriptInset()
574 && at->asScriptInset()->nuc().size()
575 && testSumSymbol(at->asScriptInset()->nuc().back()) );
579 // replace '\sum' ['_^'] f(x) sequences by a real MathExIntInset
580 // assume 'extractDelims' ran before
581 void extractSums(MathArray & ar)
583 // we need at least two items...
587 //lyxerr << "\nSums from: " << ar << "\n";
588 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
589 MathArray::iterator it = ar.begin() + i;
591 // is this a sum name?
595 // create a proper inset as replacement
596 MathExIntInset * p = new MathExIntInset("sum");
598 // collect lower bound and summation index
599 MathScriptInset const * sub = ar[i]->asScriptInset();
600 if (sub && sub->hasDown()) {
601 // try to figure out the summation index from the subscript
602 MathArray const & ar = sub->down();
603 MathArray::const_iterator xt =
604 find_if(ar.begin(), ar.end(), &testEqualSign);
605 if (xt != ar.end()) {
606 // we found a '=', use everything in front of that as index,
607 // and everything behind as lower index
608 p->cell(1) = MathArray(ar.begin(), xt);
609 p->cell(2) = MathArray(xt + 1, ar.end());
611 // use everything as summation index, don't use scripts.
616 // collect upper bound
617 if (sub && sub->hasUp())
618 p->cell(3) = sub->up();
620 // use something behind the script as core
621 MathArray::iterator tt = extractArgument(p->cell(0), it + 1, ar.end());
624 ar.erase(it + 1, tt);
627 //lyxerr << "\nSums to: " << ar << "\n";
632 // search differential stuff
635 // tests for 'd' or '\partial'
636 bool testDiffItem(MathAtom const & at)
638 return testString(at, "d");
642 bool testDiffArray(MathArray const & ar)
644 return ar.size() && testDiffItem(ar.front());
648 bool testDiffFrac(MathAtom const & at)
652 && testDiffArray(at->asFracInset()->cell(0))
653 && testDiffArray(at->asFracInset()->cell(1));
657 void extractDiff(MathArray & ar)
659 //lyxerr << "\nDiffs from: " << ar << "\n";
660 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
661 MathArray::iterator it = ar.begin() + i;
663 // is this a "differential fraction"?
664 if (!testDiffFrac(*it))
667 MathFracInset const * f = (*it)->asFracInset();
669 lyxerr << "should not happen\n";
673 // create a proper diff inset
674 MathDiffInset * diff = new MathDiffInset;
676 // collect function, let jt point behind last used item
677 MathArray::iterator jt = it + 1;
679 MathArray const & numer = f->cell(0);
680 if (numer.size() > 1 && numer[1]->asScriptInset()) {
681 // this is something like d^n f(x) / d... or d^n / d...
684 if (numer.size() > 2)
685 diff->cell(0) = MathArray(numer.begin() + 2, numer.end());
687 jt = extractArgument(diff->cell(0), jt, ar.end());
689 // simply d f(x) / d... or d/d...
690 if (numer.size() > 1)
691 diff->cell(0) = MathArray(numer.begin() + 1, numer.end());
693 jt = extractArgument(diff->cell(0), jt, ar.end());
696 // collect denominator parts
697 MathArray const & denom = f->cell(1);
698 for (MathArray::const_iterator dt = denom.begin(); dt != denom.end();) {
700 MathArray::const_iterator et
701 = find_if(dt + 1, denom.end(), &testDiffItem);
704 MathArray::const_iterator st = et - 1;
705 MathScriptInset const * script = (*st)->asScriptInset();
706 if (script && script->hasUp()) {
707 // things like d.../dx^n
709 if (extractNumber(script->up(), mult)) {
710 //lyxerr << "mult: " << mult << endl;
711 for (int i = 0; i < mult; ++i)
712 diff->addDer(MathArray(dt + 1, st));
716 diff->addDer(MathArray(dt + 1, et));
722 ar.erase(it + 1, jt);
723 *it = MathAtom(diff);
725 //lyxerr << "\nDiffs to: " << ar << "\n";
734 bool testRightArrow(MathAtom const & at)
736 return testSymbol(at, "to") || testSymbol(at, "rightarrow");
741 // replace '\lim_{x->x0} f(x)' sequences by a real MathLimInset
742 // assume 'extractDelims' ran before
743 void extractLims(MathArray & ar)
745 // we need at least three items...
749 //lyxerr << "\nLimits from: " << ar << "\n";
750 for (MathArray::size_type i = 0; i + 2 < ar.size(); ++i) {
751 MathArray::iterator it = ar.begin() + i;
753 // is this a limit function?
754 if (!testSymbol(*it, "lim"))
757 // the next one must be a subscript (without superscript)
758 MathScriptInset const * sub = (*(it + 1))->asScriptInset();
759 if (!sub || !sub->hasDown() || sub->hasUp())
762 // and it must contain a -> symbol
763 MathArray const & s = sub->down();
764 MathArray::const_iterator st = find_if(s.begin(), s.end(), &testRightArrow);
768 // the -> splits the subscript int x and x0
769 MathArray x = MathArray(s.begin(), st);
770 MathArray x0 = MathArray(st + 1, s.end());
772 // use something behind the script as core
774 MathArray::iterator tt = extractArgument(f, it + 2, ar.end());
777 ar.erase(it + 1, tt);
779 // create a proper inset as replacement
780 *it = MathAtom(new MathLimInset(f, x, x0));
782 //lyxerr << "\nLimits to: " << ar << "\n";
790 void extractStructure(MathArray & ar)
792 //lyxerr << "\nStructure from: " << ar << "\n";
793 extractIntegrals(ar);
799 extractFunctions(ar);
805 //lyxerr << "\nStructure to: " << ar << "\n";
809 void write(MathArray const & dat, WriteStream & wi)
813 wi.firstitem() = true;
814 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it) {
816 wi.firstitem() = false;
821 void normalize(MathArray const & ar, NormalStream & os)
823 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
824 (*it)->normalize(os);
828 void octavize(MathArray const & dat, OctaveStream & os)
831 extractStructure(ar);
832 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
837 void maplize(MathArray const & dat, MapleStream & os)
840 extractStructure(ar);
841 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
846 void mathematicize(MathArray const & dat, MathematicaStream & os)
849 extractStructure(ar);
850 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
851 (*it)->mathematicize(os);
855 void mathmlize(MathArray const & dat, MathMLStream & os)
858 extractStructure(ar);
861 else if (ar.size() == 1)
865 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
866 (*it)->mathmlize(os);
876 string captureOutput(string const & cmd, string const & data)
878 string outfile = lyx::tempName(string(), "mathextern");
879 string full = "echo '" + data + "' | (" + cmd + ") > " + outfile;
880 lyxerr << "calling: " << full << endl;
882 dummy.startscript(Systemcall::Wait, full);
883 string out = GetFileContents(outfile);
884 lyx::unlink(outfile);
885 lyxerr << "result: '" << out << "'" << endl;
890 MathArray pipeThroughMaple(string const & extra, MathArray const & ar)
892 string header = "readlib(latex):\n";
894 // remove the \\it for variable names
895 //"#`latex/csname_font` := `\\it `:"
897 "`latex/csname_font` := ``:\n";
899 // export matrices in (...) instead of [...]
901 "`latex/latex/matrix` := "
902 "subs(`[`=`(`, `]`=`)`,"
903 "eval(`latex/latex/matrix`)):\n";
905 // replace \\cdots with proper '*'
907 "`latex/latex/*` := "
908 "subs(`\\,`=`\\cdot `,"
909 "eval(`latex/latex/*`)):\n";
911 // remove spurious \\noalign{\\medskip} in matrix output
913 "`latex/latex/matrix`:= "
914 "subs(`\\\\\\\\\\\\noalign{\\\\medskip}` = `\\\\\\\\`,"
915 "eval(`latex/latex/matrix`)):\n";
917 //"#`latex/latex/symbol` "
918 // " := subs((\\'_\\' = \\'`\\_`\\',eval(`latex/latex/symbol`)): ";
920 string trailer = "quit;";
924 string expr = os.str().c_str();
925 lyxerr << "ar: '" << ar << "'\n";
926 lyxerr << "ms: '" << os.str() << "'\n";
928 for (int i = 0; i < 100; ++i) { // at most 100 attempts
929 // try to fix missing '*' the hard way by using mint
931 // ... > echo "1A;" | mint -i 1 -S -s -q
934 // Probably missing an operator such as * p
936 lyxerr << "checking expr: '" << expr << "'\n";
937 string out = captureOutput("mint -i 1 -S -s -q -q", expr + ";");
939 break; // expression syntax is ok
940 istringstream is(out.c_str());
943 if (line.find("on line") != 0)
944 break; // error message not identified
946 string::size_type pos = line.find('^');
947 if (pos == string::npos || pos < 15)
948 break; // caret position not found
949 pos -= 15; // skip the "on line ..." part
950 if (expr[pos] == '*' || (pos > 0 && expr[pos - 1] == '*'))
951 break; // two '*' in a row are definitely bad
952 expr.insert(pos, "*");
955 string full = "latex(" + extra + '(' + expr + "));";
956 string out = captureOutput("maple -q", header + full + trailer);
962 mathed_parse_cell(res, out);
967 MathArray pipeThroughOctave(string const &, MathArray const & ar)
972 string expr = os.str().c_str();
975 lyxerr << "pipe: ar: '" << ar << "'\n";
976 lyxerr << "pipe: expr: '" << expr << "'\n";
978 for (int i = 0; i < 100; ++i) { // at most 100 attempts
980 // try to fix missing '*' the hard way
982 // >>> ([[1 2 3 ];[2 3 1 ];[3 1 2 ]])([[1 2 3 ];[2 3 1 ];[3 1 2 ]])
985 lyxerr << "checking expr: '" << expr << "'\n";
986 out = captureOutput("octave -q 2>&1", expr);
987 lyxerr << "checking out: '" << out << "'\n";
989 // leave loop if expression syntax is probably ok
990 if (out.find("parse error:") == string::npos)
993 // search line with single caret
994 istringstream is(out.c_str());
998 lyxerr << "skipping line: '" << line << "'\n";
999 if (line.find(">>> ") != string::npos)
1003 // found line with error, next line is the one with caret
1005 string::size_type pos = line.find('^');
1006 lyxerr << "caret line: '" << line << "'\n";
1007 lyxerr << "found caret at pos: '" << pos << "'\n";
1008 if (pos == string::npos || pos < 4)
1009 break; // caret position not found
1010 pos -= 4; // skip the ">>> " part
1011 if (expr[pos] == '*')
1012 break; // two '*' in a row are definitely bad
1013 expr.insert(pos, "*");
1020 out = out.substr(6);
1022 // parse output as matrix or single number
1023 MathAtom at(new MathArrayInset("array", out));
1024 MathArrayInset const * mat = at->asArrayInset();
1026 if (mat->ncols() == 1 && mat->nrows() == 1)
1027 res.append(mat->cell(0));
1029 res.push_back(MathAtom(new MathDelimInset("(", ")")));
1030 res.back().nucleus()->cell(0).push_back(at);
1038 MathArray pipeThroughExtern(string const & lang, string const & extra,
1039 MathArray const & ar)
1041 if (lang == "octave")
1042 return pipeThroughOctave(extra, ar);
1044 if (lang == "maple")
1045 return pipeThroughMaple(extra, ar);
1047 // create normalized expression
1049 NormalStream ns(os);
1050 os << "[" << extra << ' ';
1053 string data = os.str().c_str();
1055 // search external script
1056 string file = LibFileSearch("mathed", "extern_" + lang);
1058 lyxerr << "converter to '" << lang << "' not found\n";
1062 // run external sript
1063 string out = captureOutput(file, data);
1065 mathed_parse_cell(res, out);