1 // This file contains most of the magic that extracts "context
2 // information" from the unstructered layout-oriented stuff in an
7 #include "math_amsarrayinset.h"
8 #include "math_arrayinset.h"
9 #include "math_charinset.h"
10 #include "math_deliminset.h"
11 #include "math_diffinset.h"
12 #include "math_exfuncinset.h"
13 #include "math_exintinset.h"
14 #include "math_fracinset.h"
15 #include "math_matrixinset.h"
16 #include "math_mathmlstream.h"
17 #include "math_numberinset.h"
18 #include "math_scriptinset.h"
19 #include "math_stringinset.h"
20 #include "math_symbolinset.h"
21 #include "math_unknowninset.h"
22 #include "math_parser.h"
25 #include "support/lyxlib.h"
26 #include "support/systemcall.h"
27 #include "support/filetools.h"
32 using std::istringstream;
37 ostream & operator<<(ostream & os, MathArray const & ar)
45 // define a function for tests
46 typedef bool TestItemFunc(MathInset *);
48 // define a function for replacing subexpressions
49 typedef MathInset * ReplaceArgumentFunc(const MathArray & ar);
53 // try to extract a super/subscript
54 // modify iterator position to point behind the thing
55 bool extractScript(MathArray & ar,
56 MathArray::iterator & pos, MathArray::iterator last)
58 // nothing to get here
62 // is this a scriptinset?
63 if (!(*pos)->asScriptInset())
66 // it is a scriptinset, use it.
73 // try to extract an "argument" to some function.
74 // returns position behind the argument
75 MathArray::iterator extractArgument(MathArray & ar,
76 MathArray::iterator pos, MathArray::iterator last, string const & = "")
78 // nothing to get here
82 // something deliminited _is_ an argument
83 if ((*pos)->asDelimInset()) {
88 // always take the first thing, no matter what it is
91 // go ahead if possible
96 // if the next item is a subscript, it most certainly belongs to the
98 extractScript(ar, pos, last);
102 // but it might be more than that.
103 // FIXME: not implemented
104 //for (MathArray::iterator it = pos + 1; it != last; ++it) {
105 // // always take the first thing, no matter
107 // ar.push_back(*it);
115 MathScriptInset const * asScript(MathArray::const_iterator it)
119 if (it->nucleus()->asScriptInset())
124 return it->nucleus()->asScriptInset();
129 // returns sequence of char with same code starting at it up to end
130 // it might be less, though...
132 (MathArray::const_iterator it, MathArray::const_iterator end)
135 for (; it != end && (*it)->asCharInset(); ++it)
136 s += (*it)->getChar();
141 void extractStrings(MathArray & ar)
143 //lyxerr << "\nStrings from: " << ar << "\n";
144 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
145 if (!ar[i]->asCharInset())
147 string s = charSequence(ar.begin() + i, ar.end());
148 ar[i].reset(new MathStringInset(s));
149 ar.erase(i + 1, i + s.size());
151 //lyxerr << "\nStrings to: " << ar << "\n";
155 MathInset * singleItem(MathArray & ar)
157 return ar.size() == 1 ? ar.begin()->nucleus() : 0;
161 void extractMatrices(MathArray & ar)
163 //lyxerr << "\nMatrices from: " << ar << "\n";
164 // first pass for explicitly delimited stuff
165 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it) {
166 MathDelimInset * del = (*it)->asDelimInset();
169 MathInset * arr = singleItem(del->cell(0));
170 if (!arr || !arr->asGridInset())
172 *it = MathAtom(new MathMatrixInset(*(arr->asGridInset())));
175 // second pass for AMS "pmatrix" etc
176 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it) {
177 MathAMSArrayInset * ams = (*it)->asAMSArrayInset();
180 *it = MathAtom(new MathMatrixInset(*ams));
182 //lyxerr << "\nMatrices to: " << ar << "\n";
186 // convert this inset somehow to a string
187 bool extractString(MathInset * p, string & str)
192 str = string(1, p->getChar());
195 if (p->asStringInset()) {
196 str = p->asStringInset()->str();
203 // convert this inset somehow to a number
204 bool extractNumber(MathArray const & ar, int & i)
206 istringstream is(charSequence(ar.begin(), ar.end()).c_str());
212 bool extractNumber(MathArray const & ar, double & d)
214 istringstream is(charSequence(ar.begin(), ar.end()).c_str());
220 bool testString(MathInset * p, const string & str)
223 return extractString(p, s) && str == s;
227 // search end of nested sequence
228 MathArray::iterator endNestSearch(
229 MathArray::iterator it,
230 MathArray::iterator last,
231 TestItemFunc testOpen,
232 TestItemFunc testClose
235 for (int level = 0; it != last; ++it) {
236 if (testOpen(it->nucleus()))
238 if (testClose(it->nucleus()))
247 // replace nested sequences by a real Insets
250 TestItemFunc testOpen,
251 TestItemFunc testClose,
252 ReplaceArgumentFunc replaceArg
255 // use indices rather than iterators for the loop because we are going
256 // to modify the array.
257 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
258 // check whether this is the begin of the sequence
259 MathArray::iterator it = ar.begin() + i;
260 if (!testOpen(it->nucleus()))
263 // search end of sequence
264 MathArray::iterator jt = endNestSearch(it, ar.end(), testOpen, testClose);
268 // create a proper inset as replacement
269 MathInset * p = replaceArg(MathArray(it + 1, jt));
271 // replace the original stuff by the new inset
272 ar.erase(it + 1, jt + 1);
280 // split scripts into seperate super- and subscript insets. sub goes in
284 void splitScripts(MathArray & ar)
286 //lyxerr << "\nScripts from: " << ar << "\n";
287 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
288 MathArray::iterator it = ar.begin() + i;
290 // is this script inset?
291 MathScriptInset * p = (*it)->asScriptInset();
295 // no problem if we don't have both...
296 if (!p->hasUp() || !p->hasDown())
299 // create extra script inset and move superscript over
300 MathScriptInset * q = new MathScriptInset;
302 q->up().data().swap(p->up().data());
303 p->removeScript(true);
305 // insert new inset behind
307 ar.insert(i, MathAtom(q));
309 //lyxerr << "\nScripts to: " << ar << "\n";
317 void extractExps(MathArray & ar)
319 //lyxerr << "\nExps from: " << ar << "\n";
321 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
322 MathArray::iterator it = ar.begin() + i;
325 MathCharInset const * p = (*it)->asCharInset();
326 if (!p || p->getChar() != 'e')
329 // we need an exponent but no subscript
330 MathScriptInset * sup = (*(it + 1))->asScriptInset();
331 if (!sup || sup->hasDown())
334 // create a proper exp-inset as replacement
335 *it = new MathExFuncInset("exp", sup->cell(1));
338 //lyxerr << "\nExps to: " << ar << "\n";
343 // extract det(...) from |matrix|
345 void extractDets(MathArray & ar)
347 //lyxerr << "\ndet from: " << ar << "\n";
348 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it) {
349 MathDelimInset * del = (*it)->asDelimInset();
354 *it = new MathExFuncInset("det", del->cell(0));
356 //lyxerr << "\ndet to: " << ar << "\n";
364 bool isDigitOrSimilar(char c)
366 return ('0' <= c && c <= '9') || c == '.';
370 // returns sequence of digits
372 (MathArray::const_iterator it, MathArray::const_iterator end)
375 for (; it != end && (*it)->asCharInset(); ++it) {
376 if (!isDigitOrSimilar((*it)->getChar()))
378 s += (*it)->getChar();
384 void extractNumbers(MathArray & ar)
386 //lyxerr << "\nNumbers from: " << ar << "\n";
387 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
388 if (!ar[i]->asCharInset())
390 if (!isDigitOrSimilar(ar[i]->asCharInset()->getChar()))
393 string s = digitSequence(ar.begin() + i, ar.end());
395 ar[i].reset(new MathNumberInset(s));
396 ar.erase(i + 1, i + s.size());
398 //lyxerr << "\nNumbers to: " << ar << "\n";
404 // search deliminiters
407 bool testOpenParan(MathInset * p)
409 return testString(p, "(");
413 bool testCloseParan(MathInset * p)
415 return testString(p, ")");
419 MathInset * replaceDelims(const MathArray & ar)
421 return new MathDelimInset("(", ")", ar);
425 // replace '('...')' sequences by a real MathDelimInset
426 void extractDelims(MathArray & ar)
428 //lyxerr << "\nDelims from: " << ar << "\n";
429 replaceNested(ar, testOpenParan, testCloseParan, replaceDelims);
430 //lyxerr << "\nDelims to: " << ar << "\n";
436 // search well-known functions
440 // replace 'f' '(...)' and 'f' '^n' '(...)' sequences by a real MathExFuncInset
441 // assume 'extractDelims' ran before
442 void extractFunctions(MathArray & ar)
444 // we need at least two items...
448 //lyxerr << "\nFunctions from: " << ar << "\n";
449 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
450 MathArray::iterator it = ar.begin() + i;
451 MathArray::iterator jt = it + 1;
455 if ((*it)->asUnknownInset()) {
456 // it certainly is if it is well known...
457 name = (*it)->asUnknownInset()->name();
459 // is this a user defined function?
460 // it it probably not, if it doesn't have a name.
461 if (!extractString((*it).nucleus(), name))
463 // it is not if it has no argument
466 // guess so, if this is followed by
467 // a DelimInset with a single item in the cell
468 MathDelimInset * del = (*jt)->asDelimInset();
469 if (!del || del->cell(0).size() != 1)
471 // fall trough into main branch
474 // do we have an exponent like in
475 // 'sin' '^2' 'x' -> 'sin(x)' '^2'
477 extractScript(exp, jt, ar.end());
479 // create a proper inset as replacement
480 MathExFuncInset * p = new MathExFuncInset(name);
482 // jt points to the "argument". Get hold of this.
483 MathArray::iterator st = extractArgument(p->cell(0), jt, ar.end());
485 // replace the function name by a real function inset
488 // remove the source of the argument from the array
489 ar.erase(it + 1, st);
491 // re-insert exponent
492 ar.insert(i + 1, exp);
493 //lyxerr << "\nFunctions to: " << ar << "\n";
502 bool testSymbol(MathInset * p, string const & name)
504 return p->asSymbolInset() && p->asSymbolInset()->name() == name;
508 bool testIntSymbol(MathInset * p)
510 return testSymbol(p, "int");
514 bool testIntDiff(MathInset * p)
516 return testString(p, "d");
520 // replace '\int' ['_^'] x 'd''x'(...)' sequences by a real MathExIntInset
521 // assume 'extractDelims' ran before
522 void extractIntegrals(MathArray & ar)
524 // we need at least three items...
528 //lyxerr << "\nIntegrals from: " << ar << "\n";
529 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
530 MathArray::iterator it = ar.begin() + i;
532 // is this a integral name?
533 if (!testIntSymbol(it->nucleus()))
537 MathArray::iterator jt =
538 endNestSearch(it, ar.end(), testIntSymbol, testIntDiff);
540 // something sensible found?
544 // create a proper inset as replacement
545 MathExIntInset * p = new MathExIntInset("int");
547 // collect subscript if any
548 MathArray::iterator st = it + 1;
550 if (MathScriptInset * sub = (*st)->asScriptInset())
551 if (sub->hasDown()) {
552 p->cell(2) = sub->down().data();
556 // collect superscript if any
558 if (MathScriptInset * sup = (*st)->asScriptInset())
560 p->cell(3) = sup->up().data();
564 // core ist part from behind the scripts to the 'd'
565 p->cell(0) = MathArray(st, jt);
567 // use the "thing" behind the 'd' as differential
568 MathArray::iterator tt = extractArgument(p->cell(1), jt + 1, ar.end());
571 ar.erase(it + 1, tt);
574 //lyxerr << "\nIntegrals to: " << ar << "\n";
582 bool testSumSymbol(MathInset * p)
584 return testSymbol(p, "sum");
588 bool testEqualSign(MathAtom const & at)
590 return testString(at.nucleus(), "=");
595 // replace '\sum' ['_^'] f(x) sequences by a real MathExIntInset
596 // assume 'extractDelims' ran before
597 void extractSums(MathArray & ar)
599 // we need at least two items...
603 //lyxerr << "\nSums from: " << ar << "\n";
604 for (MathArray::size_type i = 0; i + 1< ar.size(); ++i) {
605 MathArray::iterator it = ar.begin() + i;
607 // is this a sum name?
608 if (!testSumSymbol(it->nucleus()))
611 // create a proper inset as replacement
612 MathExIntInset * p = new MathExIntInset("sum");
614 // collect lower bound and summation index
615 MathArray::iterator st = it + 1;
617 if (MathScriptInset * sub = (*st)->asScriptInset())
618 if (sub->hasDown()) {
619 // try to figure out the summation index from the subscript
620 MathArray & ar = sub->down().data();
621 MathArray::iterator it =
622 find_if(ar.begin(), ar.end(), &testEqualSign);
623 if (it != ar.end()) {
624 // we found a '=', use everything in front of that as index,
625 // and everything behind as lower index
626 p->cell(1) = MathArray(ar.begin(), it);
627 p->cell(2) = MathArray(it + 1, ar.end());
629 // use everything as summation index, don't use scripts.
635 // collect upper bound
637 if (MathScriptInset * sup = (*st)->asScriptInset())
639 p->cell(3) = sup->up().data();
643 // use some behind the script as core
644 MathArray::iterator tt = extractArgument(p->cell(0), st, ar.end());
647 ar.erase(it + 1, tt);
650 //lyxerr << "\nSums to: " << ar << "\n";
655 // search differential stuff
658 // tests for 'd' or '\partial'
659 bool testDiffItem(MathAtom const & at)
661 return testString(at.nucleus(), "d");
665 bool testDiffArray(MathArray const & ar)
667 return ar.size() && testDiffItem(ar.front());
671 bool testDiffFrac(MathInset * p)
673 MathFracInset * f = p->asFracInset();
674 return f && testDiffArray(f->cell(0)) && testDiffArray(f->cell(1));
678 // is this something like ^number?
679 bool extractDiffExponent(MathArray::iterator it, int & i)
681 if (!(*it)->asScriptInset())
685 if (!extractString((*it).nucleus(), s))
687 istringstream is(s.c_str());
693 void extractDiff(MathArray & ar)
695 //lyxerr << "\nDiffs from: " << ar << "\n";
696 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
697 MathArray::iterator it = ar.begin() + i;
699 // is this a "differential fraction"?
700 if (!testDiffFrac(it->nucleus()))
703 MathFracInset * f = (*it)->asFracInset();
705 lyxerr << "should not happen\n";
709 // create a proper diff inset
710 MathDiffInset * diff = new MathDiffInset;
712 // collect function, let jt point behind last used item
713 MathArray::iterator jt = it + 1;
715 MathArray & numer = f->cell(0);
716 if (numer.size() > 1 && numer[1]->asScriptInset()) {
717 // this is something like d^n f(x) / d... or d^n / d...
720 if (numer.size() > 2)
721 diff->cell(0) = MathArray(numer.begin() + 2, numer.end());
723 jt = extractArgument(diff->cell(0), jt, ar.end());
725 // simply d f(x) / d... or d/d...
726 if (numer.size() > 1)
727 diff->cell(0) = MathArray(numer.begin() + 1, numer.end());
729 jt = extractArgument(diff->cell(0), jt, ar.end());
732 // collect denominator parts
733 MathArray & denom = f->cell(1);
734 for (MathArray::iterator dt = denom.begin(); dt != denom.end();) {
736 MathArray::iterator et = find_if(dt + 1, denom.end(), &testDiffItem);
739 MathArray::iterator st = et - 1;
740 MathScriptInset * script = (*st)->asScriptInset();
741 if (script && script->hasUp()) {
742 // things like d.../dx^n
744 if (extractNumber(script->up().data(), mult)) {
745 //lyxerr << "mult: " << mult << endl;
746 for (int i = 0; i < mult; ++i)
747 diff->addDer(MathArray(dt + 1, st));
751 diff->addDer(MathArray(dt + 1, et));
757 ar.erase(it + 1, jt);
760 //lyxerr << "\nDiffs to: " << ar << "\n";
769 void extractStructure(MathArray & ar)
775 extractFunctions(ar);
777 extractIntegrals(ar);
785 void write(MathArray const & dat, WriteStream & wi)
789 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it) {
790 wi.firstitem() = (it == ar.begin());
791 MathInset const * p = it->nucleus();
792 if (it + 1 != ar.end()) {
793 if (MathScriptInset const * q = asScript(it)) {
804 void normalize(MathArray const & ar, NormalStream & os)
806 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
807 (*it)->normalize(os);
811 void octavize(MathArray const & dat, OctaveStream & os)
814 extractStructure(ar);
815 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it) {
816 MathInset const * p = it->nucleus();
817 if (it + 1 != ar.end()) {
818 if (MathScriptInset const * q = asScript(it)) {
829 void maplize(MathArray const & dat, MapleStream & os)
832 extractStructure(ar);
833 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it) {
834 MathInset const * p = it->nucleus();
835 if (it + 1 != ar.end()) {
836 if (MathScriptInset const * q = asScript(it)) {
847 void mathematicize(MathArray const & dat, MathematicaStream & os)
850 extractStructure(ar);
851 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it) {
852 MathInset const * p = it->nucleus();
853 if (it + 1 != ar.end()) {
854 if (MathScriptInset const * q = asScript(it)) {
855 q->mathematicize2(p, os);
860 p->mathematicize(os);
865 void mathmlize(MathArray const & dat, MathMLStream & os)
868 extractStructure(ar);
871 else if (ar.size() == 1)
872 os << ar.begin()->nucleus();
875 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it) {
876 MathInset const * p = it->nucleus();
877 if (it + 1 != ar.end()) {
878 if (MathScriptInset const * q = asScript(it)) {
879 q->mathmlize2(p, os);
895 string captureOutput(string const & cmd, string const & data)
897 string outfile = lyx::tempName(string(), "mathextern");
898 string full = "echo '" + data + "' | (" + cmd + ") > " + outfile;
899 lyxerr << "calling: " << full << endl;
901 dummy.startscript(Systemcall::Wait, full);
902 string out = GetFileContents(outfile);
903 lyx::unlink(outfile);
904 lyxerr << "result: '" << out << "'" << endl;
909 MathArray pipeThroughMaple(string const & extra, MathArray const & ar)
911 string header = "readlib(latex):\n";
913 // remove the \\it for variable names
914 //"#`latex/csname_font` := `\\it `:"
916 "`latex/csname_font` := ``:\n";
918 // export matrices in (...) instead of [...]
920 "`latex/latex/matrix` := "
921 "subs(`[`=`(`, `]`=`)`,"
922 "eval(`latex/latex/matrix`)):\n";
924 // replace \\cdots with proper '*'
926 "`latex/latex/*` := "
927 "subs(`\\,`=`\\cdot `,"
928 "eval(`latex/latex/*`)):\n";
930 // remove spurious \\noalign{\\medskip} in matrix output
932 "`latex/latex/matrix`:= "
933 "subs(`\\\\\\\\\\\\noalign{\\\\medskip}` = `\\\\\\\\`,"
934 "eval(`latex/latex/matrix`)):\n";
936 //"#`latex/latex/symbol` "
937 // " := subs((\\'_\\' = \\'`\\_`\\',eval(`latex/latex/symbol`)): ";
939 string trailer = "quit;";
943 string expr = os.str().c_str();
944 lyxerr << "ar: '" << ar << "'\n";
945 lyxerr << "ms: '" << os.str() << "'\n";
947 for (int i = 0; i < 100; ++i) { // at most 100 attempts
948 // try to fix missing '*' the hard way by using mint
950 // ... > echo "1A;" | mint -i 1 -S -s -q
953 // Probably missing an operator such as * p
955 lyxerr << "checking expr: '" << expr << "'\n";
956 string out = captureOutput("mint -i 1 -S -s -q -q", expr + ";");
958 break; // expression syntax is ok
959 istringstream is(out.c_str());
962 if (line.find("on line") != 0)
963 break; // error message not identified
965 string::size_type pos = line.find('^');
966 if (pos == string::npos || pos < 15)
967 break; // caret position not found
968 pos -= 15; // skip the "on line ..." part
969 if (expr[pos] == '*' || (pos > 0 && expr[pos - 1] == '*'))
970 break; // two '*' in a row are definitely bad
971 expr.insert(pos, "*");
974 string full = "latex(" + extra + '(' + expr + "));";
975 string out = captureOutput("maple -q", header + full + trailer);
981 mathed_parse_cell(res, out);
986 MathArray pipeThroughOctave(string const &, MathArray const & ar)
991 string expr = os.str().c_str();
994 lyxerr << "pipe: ar: '" << ar << "'\n";
995 lyxerr << "pipe: expr: '" << expr << "'\n";
997 for (int i = 0; i < 100; ++i) { // at most 100 attempts
999 // try to fix missing '*' the hard way
1001 // >>> ([[1 2 3 ];[2 3 1 ];[3 1 2 ]])([[1 2 3 ];[2 3 1 ];[3 1 2 ]])
1004 lyxerr << "checking expr: '" << expr << "'\n";
1005 out = captureOutput("octave -q 2>&1", expr);
1006 lyxerr << "checking out: '" << out << "'\n";
1008 // leave loop if expression syntax is probably ok
1009 if (out.find("parse error:") == string::npos)
1012 // search line with single caret
1013 istringstream is(out.c_str());
1017 lyxerr << "skipping line: '" << line << "'\n";
1018 if (line.find(">>> ") != string::npos)
1022 // found line with error, next line is the one with caret
1024 string::size_type pos = line.find('^');
1025 lyxerr << "caret line: '" << line << "'\n";
1026 lyxerr << "found caret at pos: '" << pos << "'\n";
1027 if (pos == string::npos || pos < 4)
1028 break; // caret position not found
1029 pos -= 4; // skip the ">>> " part
1030 if (expr[pos] == '*')
1031 break; // two '*' in a row are definitely bad
1032 expr.insert(pos, "*");
1039 out = out.substr(6);
1041 // parse output as matrix or single number
1042 MathAtom at(new MathArrayInset("array", out));
1043 MathArrayInset const * mat = at.nucleus()->asArrayInset();
1045 if (mat->ncols() == 1 && mat->nrows() == 1)
1046 res.push_back(mat->cell(0));
1048 res.push_back(MathAtom(new MathDelimInset("(", ")")));
1049 res.back()->cell(0).push_back(at);
1057 MathArray pipeThroughExtern(string const & lang, string const & extra,
1058 MathArray const & ar)
1060 if (lang == "octave")
1061 return pipeThroughOctave(extra, ar);
1063 if (lang == "maple")
1064 return pipeThroughMaple(extra, ar);
1066 // create normalized expression
1068 NormalStream ns(os);
1069 os << "[" << extra << ' ';
1072 string data = os.str().c_str();
1074 // search external script
1075 string file = LibFileSearch("mathed", "extern_" + lang);
1077 lyxerr << "converter to '" << lang << "' not found\n";
1081 // run external sript
1082 string out = captureOutput(file, data);
1084 mathed_parse_cell(res, out);