1 // This file contains most of the magic that extracts "context
2 // information" from the unstructered layout-oriented stuff in an
7 #include "math_amsarrayinset.h"
8 #include "math_arrayinset.h"
9 #include "math_charinset.h"
10 #include "math_deliminset.h"
11 #include "math_diffinset.h"
12 #include "math_exfuncinset.h"
13 #include "math_exintinset.h"
14 #include "math_fracinset.h"
15 #include "math_matrixinset.h"
16 #include "math_mathmlstream.h"
17 #include "math_numberinset.h"
18 #include "math_scriptinset.h"
19 #include "math_stringinset.h"
20 #include "math_symbolinset.h"
21 #include "math_unknowninset.h"
22 #include "math_parser.h"
25 #include "support/lyxlib.h"
26 #include "support/systemcall.h"
27 #include "support/filetools.h"
32 using std::istringstream;
37 ostream & operator<<(ostream & os, MathArray const & ar)
45 // define a function for tests
46 typedef bool TestItemFunc(MathInset *);
48 // define a function for replacing subexpressions
49 typedef MathInset * ReplaceArgumentFunc(const MathArray & ar);
53 // try to extract a super/subscript
54 // modify iterator position to point behind the thing
55 bool extractScript(MathArray & ar,
56 MathArray::iterator & pos, MathArray::iterator last)
58 // nothing to get here
62 // is this a scriptinset?
63 if (!(*pos)->asScriptInset())
66 // it is a scriptinset, use it.
73 // try to extract an "argument" to some function.
74 // returns position behind the argument
75 MathArray::iterator extractArgument(MathArray & ar,
76 MathArray::iterator pos, MathArray::iterator last, string const & = "")
78 // nothing to get here
82 // something deliminited _is_ an argument
83 if ((*pos)->asDelimInset()) {
88 // always take the first thing, no matter what it is
91 // go ahead if possible
96 // if the next item is a subscript, it most certainly belongs to the
98 extractScript(ar, pos, last);
102 // but it might be more than that.
103 // FIXME: not implemented
104 //for (MathArray::iterator it = pos + 1; it != last; ++it) {
105 // // always take the first thing, no matter
107 // ar.push_back(*it);
115 MathScriptInset const * asScript(MathArray::const_iterator it)
119 if (it->nucleus()->asScriptInset())
124 return it->nucleus()->asScriptInset();
129 // returns sequence of char with same code starting at it up to end
130 // it might be less, though...
132 (MathArray::const_iterator it, MathArray::const_iterator end)
135 for (; it != end && (*it)->asCharInset(); ++it)
136 s += (*it)->getChar();
141 void extractStrings(MathArray & ar)
143 //lyxerr << "\nStrings from: " << ar << "\n";
144 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
145 if (!ar[i]->asCharInset())
147 string s = charSequence(ar.begin() + i, ar.end());
148 ar[i].reset(new MathStringInset(s));
149 ar.erase(i + 1, i + s.size());
151 //lyxerr << "\nStrings to: " << ar << "\n";
155 MathInset * singleItem(MathArray & ar)
157 return ar.size() == 1 ? ar.begin()->nucleus() : 0;
161 void extractMatrices(MathArray & ar)
163 //lyxerr << "\nMatrices from: " << ar << "\n";
164 // first pass for explicitly delimited stuff
165 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it) {
166 MathDelimInset * del = (*it)->asDelimInset();
169 MathInset * arr = singleItem(del->cell(0));
170 if (!arr || !arr->asGridInset())
172 *it = MathAtom(new MathMatrixInset(*(arr->asGridInset())));
175 // second pass for AMS "pmatrix" etc
176 for (MathArray::iterator it = ar.begin(); it != ar.end(); ++it) {
177 MathAMSArrayInset * ams = (*it)->asAMSArrayInset();
180 *it = MathAtom(new MathMatrixInset(*ams));
182 //lyxerr << "\nMatrices to: " << ar << "\n";
186 // convert this inset somehow to a string
187 bool extractString(MathInset * p, string & str)
192 str = string(1, p->getChar());
195 if (p->asStringInset()) {
196 str = p->asStringInset()->str();
203 // convert this inset somehow to a number
204 bool extractNumber(MathArray const & ar, int & i)
206 istringstream is(charSequence(ar.begin(), ar.end()).c_str());
212 bool extractNumber(MathArray const & ar, double & d)
214 istringstream is(charSequence(ar.begin(), ar.end()).c_str());
220 bool testString(MathInset * p, const string & str)
223 return extractString(p, s) && str == s;
227 // search end of nested sequence
228 MathArray::iterator endNestSearch(
229 MathArray::iterator it,
230 MathArray::iterator last,
231 TestItemFunc testOpen,
232 TestItemFunc testClose
235 for (int level = 0; it != last; ++it) {
236 if (testOpen(it->nucleus()))
238 if (testClose(it->nucleus()))
247 // replace nested sequences by a real Insets
250 TestItemFunc testOpen,
251 TestItemFunc testClose,
252 ReplaceArgumentFunc replaceArg
255 // use indices rather than iterators for the loop because we are going
256 // to modify the array.
257 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
258 // check whether this is the begin of the sequence
259 MathArray::iterator it = ar.begin() + i;
260 if (!testOpen(it->nucleus()))
263 // search end of sequence
264 MathArray::iterator jt = endNestSearch(it, ar.end(), testOpen, testClose);
268 // create a proper inset as replacement
269 MathInset * p = replaceArg(MathArray(it + 1, jt));
271 // replace the original stuff by the new inset
272 ar.erase(it + 1, jt + 1);
280 // split scripts into seperate super- and subscript insets. sub goes in
284 void splitScripts(MathArray & ar)
286 //lyxerr << "\nScripts from: " << ar << "\n";
287 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
288 MathArray::iterator it = ar.begin() + i;
290 // is this script inset?
291 MathScriptInset * p = (*it)->asScriptInset();
295 // no problem if we don't have both...
296 if (!p->hasUp() || !p->hasDown())
299 // create extra script inset and move superscript over
300 MathScriptInset * q = new MathScriptInset;
302 q->up().data_.swap(p->up().data_);
303 p->removeScript(true);
305 // insert new inset behind
307 ar.insert(i, MathAtom(q));
309 //lyxerr << "\nScripts to: " << ar << "\n";
317 void extractExps(MathArray & ar)
319 //lyxerr << "\nExps from: " << ar << "\n";
321 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
322 MathArray::iterator it = ar.begin() + i;
325 MathCharInset const * p = (*it)->asCharInset();
326 if (!p || p->getChar() != 'e')
329 // we need an exponent but no subscript
330 MathScriptInset * sup = (*(it + 1))->asScriptInset();
331 if (!sup || sup->hasDown())
334 // create a proper exp-inset as replacement
335 MathExFuncInset * func = new MathExFuncInset("exp");
336 func->cell(0) = sup->cell(1);
342 //lyxerr << "\nExps to: " << ar << "\n";
350 bool isDigitOrSimilar(char c)
352 return ('0' <= c && c <= '9') || c == '.';
356 // returns sequence of digits
358 (MathArray::const_iterator it, MathArray::const_iterator end)
361 for (; it != end && (*it)->asCharInset(); ++it) {
362 if (!isDigitOrSimilar((*it)->getChar()))
364 s += (*it)->getChar();
370 void extractNumbers(MathArray & ar)
372 //lyxerr << "\nNumbers from: " << ar << "\n";
373 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
374 if (!ar[i]->asCharInset())
376 if (!isDigitOrSimilar(ar[i]->asCharInset()->getChar()))
379 string s = digitSequence(ar.begin() + i, ar.end());
381 ar[i].reset(new MathNumberInset(s));
382 ar.erase(i + 1, i + s.size());
384 //lyxerr << "\nNumbers to: " << ar << "\n";
390 // search deliminiters
393 bool testOpenParan(MathInset * p)
395 return testString(p, "(");
399 bool testCloseParan(MathInset * p)
401 return testString(p, ")");
405 MathInset * replaceDelims(const MathArray & ar)
407 MathDelimInset * del = new MathDelimInset("(", ")");
413 // replace '('...')' sequences by a real MathDelimInset
414 void extractDelims(MathArray & ar)
416 //lyxerr << "\nDelims from: " << ar << "\n";
417 replaceNested(ar, testOpenParan, testCloseParan, replaceDelims);
418 //lyxerr << "\nDelims to: " << ar << "\n";
424 // search well-known functions
428 // replace 'f' '(...)' and 'f' '^n' '(...)' sequences by a real MathExFuncInset
429 // assume 'extractDelims' ran before
430 void extractFunctions(MathArray & ar)
432 // we need at least two items...
436 //lyxerr << "\nFunctions from: " << ar << "\n";
437 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
438 MathArray::iterator it = ar.begin() + i;
439 MathArray::iterator jt = it + 1;
443 if ((*it)->asUnknownInset()) {
444 // it certainly is if it is well known...
445 name = (*it)->asUnknownInset()->name();
447 // is this a user defined function?
448 // it it probably not, if it doesn't have a name.
449 if (!extractString((*it).nucleus(), name))
451 // it is not if it has no argument
454 // guess so, if this is followed by
455 // a DelimInset with a single item in the cell
456 MathDelimInset * del = (*jt)->asDelimInset();
457 if (!del || del->cell(0).size() != 1)
459 // fall trough into main branch
462 // do we have an exponent like in
463 // 'sin' '^2' 'x' -> 'sin(x)' '^2'
465 extractScript(exp, jt, ar.end());
467 // create a proper inset as replacement
468 MathExFuncInset * p = new MathExFuncInset(name);
470 // jt points to the "argument". Get hold of this.
471 MathArray::iterator st = extractArgument(p->cell(0), jt, ar.end());
473 // replace the function name by a real function inset
476 // remove the source of the argument from the array
477 ar.erase(it + 1, st);
479 // re-insert exponent
480 ar.insert(i + 1, exp);
481 //lyxerr << "\nFunctions to: " << ar << "\n";
490 bool testSymbol(MathInset * p, string const & name)
492 return p->asSymbolInset() && p->asSymbolInset()->name() == name;
496 bool testIntSymbol(MathInset * p)
498 return testSymbol(p, "int");
502 bool testIntDiff(MathInset * p)
504 return testString(p, "d");
508 // replace '\int' ['_^'] x 'd''x'(...)' sequences by a real MathExIntInset
509 // assume 'extractDelims' ran before
510 void extractIntegrals(MathArray & ar)
512 // we need at least three items...
516 //lyxerr << "\nIntegrals from: " << ar << "\n";
517 for (MathArray::size_type i = 0; i + 1 < ar.size(); ++i) {
518 MathArray::iterator it = ar.begin() + i;
520 // is this a integral name?
521 if (!testIntSymbol(it->nucleus()))
525 MathArray::iterator jt =
526 endNestSearch(it, ar.end(), testIntSymbol, testIntDiff);
528 // something sensible found?
532 // create a proper inset as replacement
533 MathExIntInset * p = new MathExIntInset("int");
535 // collect subscript if any
536 MathArray::iterator st = it + 1;
538 if (MathScriptInset * sub = (*st)->asScriptInset())
539 if (sub->hasDown()) {
540 p->cell(2) = sub->down().data_;
544 // collect superscript if any
546 if (MathScriptInset * sup = (*st)->asScriptInset())
548 p->cell(3) = sup->up().data_;
552 // core ist part from behind the scripts to the 'd'
553 p->cell(0) = MathArray(st, jt);
555 // use the "thing" behind the 'd' as differential
556 MathArray::iterator tt = extractArgument(p->cell(1), jt + 1, ar.end());
559 ar.erase(it + 1, tt);
562 //lyxerr << "\nIntegrals to: " << ar << "\n";
570 bool testSumSymbol(MathInset * p)
572 return testSymbol(p, "sum");
576 bool testEqualSign(MathAtom const & at)
578 return testString(at.nucleus(), "=");
583 // replace '\sum' ['_^'] f(x) sequences by a real MathExIntInset
584 // assume 'extractDelims' ran before
585 void extractSums(MathArray & ar)
587 // we need at least two items...
591 //lyxerr << "\nSums from: " << ar << "\n";
592 for (MathArray::size_type i = 0; i + 1< ar.size(); ++i) {
593 MathArray::iterator it = ar.begin() + i;
595 // is this a sum name?
596 if (!testSumSymbol(it->nucleus()))
599 // create a proper inset as replacement
600 MathExIntInset * p = new MathExIntInset("sum");
602 // collect lower bound and summation index
603 MathArray::iterator st = it + 1;
605 if (MathScriptInset * sub = (*st)->asScriptInset())
606 if (sub->hasDown()) {
607 // try to figure out the summation index from the subscript
608 MathArray & ar = sub->down().data_;
609 MathArray::iterator it =
610 find_if(ar.begin(), ar.end(), &testEqualSign);
611 if (it != ar.end()) {
612 // we found a '=', use everything in front of that as index,
613 // and everything behind as lower index
614 p->cell(1) = MathArray(ar.begin(), it);
615 p->cell(2) = MathArray(it + 1, ar.end());
617 // use everything as summation index, don't use scripts.
623 // collect upper bound
625 if (MathScriptInset * sup = (*st)->asScriptInset())
627 p->cell(3) = sup->up().data_;
631 // use some behind the script as core
632 MathArray::iterator tt = extractArgument(p->cell(0), st, ar.end());
635 ar.erase(it + 1, tt);
638 //lyxerr << "\nSums to: " << ar << "\n";
643 // search differential stuff
646 // tests for 'd' or '\partial'
647 bool testDiffItem(MathAtom const & at)
649 return testString(at.nucleus(), "d");
653 bool testDiffArray(MathArray const & ar)
655 return ar.size() && testDiffItem(ar.front());
659 bool testDiffFrac(MathInset * p)
661 MathFracInset * f = p->asFracInset();
662 return f && testDiffArray(f->cell(0)) && testDiffArray(f->cell(1));
666 // is this something like ^number?
667 bool extractDiffExponent(MathArray::iterator it, int & i)
669 if (!(*it)->asScriptInset())
673 if (!extractString((*it).nucleus(), s))
675 istringstream is(s.c_str());
681 void extractDiff(MathArray & ar)
683 //lyxerr << "\nDiffs from: " << ar << "\n";
684 for (MathArray::size_type i = 0; i < ar.size(); ++i) {
685 MathArray::iterator it = ar.begin() + i;
687 // is this a "differential fraction"?
688 if (!testDiffFrac(it->nucleus()))
691 MathFracInset * f = (*it)->asFracInset();
693 lyxerr << "should not happen\n";
697 // create a proper diff inset
698 MathDiffInset * diff = new MathDiffInset;
700 // collect function, let jt point behind last used item
701 MathArray::iterator jt = it + 1;
703 MathArray & numer = f->cell(0);
704 if (numer.size() > 1 && numer[1]->asScriptInset()) {
705 // this is something like d^n f(x) / d... or d^n / d...
708 if (numer.size() > 2)
709 diff->cell(0) = MathArray(numer.begin() + 2, numer.end());
711 jt = extractArgument(diff->cell(0), jt, ar.end());
713 // simply d f(x) / d... or d/d...
714 if (numer.size() > 1)
715 diff->cell(0) = MathArray(numer.begin() + 1, numer.end());
717 jt = extractArgument(diff->cell(0), jt, ar.end());
720 // collect denominator parts
721 MathArray & denom = f->cell(1);
722 for (MathArray::iterator dt = denom.begin(); dt != denom.end();) {
724 MathArray::iterator et = find_if(dt + 1, denom.end(), &testDiffItem);
727 MathArray::iterator st = et - 1;
728 MathScriptInset * script = (*st)->asScriptInset();
729 if (script && script->hasUp()) {
730 // things like d.../dx^n
732 if (extractNumber(script->up().data_, mult)) {
733 //lyxerr << "mult: " << mult << endl;
734 for (int i = 0; i < mult; ++i)
735 diff->addDer(MathArray(dt + 1, st));
739 diff->addDer(MathArray(dt + 1, et));
745 ar.erase(it + 1, jt);
748 //lyxerr << "\nDiffs to: " << ar << "\n";
757 void extractStructure(MathArray & ar)
763 extractFunctions(ar);
764 extractIntegrals(ar);
772 void write(MathArray const & dat, WriteStream & wi)
776 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it) {
777 wi.firstitem() = (it == ar.begin());
778 MathInset const * p = it->nucleus();
779 if (it + 1 != ar.end()) {
780 if (MathScriptInset const * q = asScript(it)) {
791 void normalize(MathArray const & ar, NormalStream & os)
793 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it)
794 (*it)->normalize(os);
798 void octavize(MathArray const & dat, OctaveStream & os)
801 extractStructure(ar);
802 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it) {
803 MathInset const * p = it->nucleus();
804 if (it + 1 != ar.end()) {
805 if (MathScriptInset const * q = asScript(it)) {
816 void maplize(MathArray const & dat, MapleStream & os)
819 extractStructure(ar);
820 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it) {
821 MathInset const * p = it->nucleus();
822 if (it + 1 != ar.end()) {
823 if (MathScriptInset const * q = asScript(it)) {
834 void mathmlize(MathArray const & dat, MathMLStream & os)
837 extractStructure(ar);
840 else if (ar.size() == 1)
841 os << ar.begin()->nucleus();
844 for (MathArray::const_iterator it = ar.begin(); it != ar.end(); ++it) {
845 MathInset const * p = it->nucleus();
846 if (it + 1 != ar.end()) {
847 if (MathScriptInset const * q = asScript(it)) {
848 q->mathmlize2(p, os);
864 string captureOutput(string const & cmd, string const & data)
866 string outfile = lyx::tempName(string(), "mathextern");
867 string full = "echo '" + data + "' | (" + cmd + ") > " + outfile;
868 lyxerr << "calling: " << full << endl;
870 dummy.startscript(Systemcall::Wait, full);
871 string out = GetFileContents(outfile);
872 lyx::unlink(outfile);
873 lyxerr << "result: '" << out << "'" << endl;
878 MathArray pipeThroughMaple(string const & extra, MathArray const & ar)
880 string header = "readlib(latex):\n";
882 // remove the \\it for variable names
883 //"#`latex/csname_font` := `\\it `:"
885 "`latex/csname_font` := ``:\n";
887 // export matrices in (...) instead of [...]
889 "`latex/latex/matrix` := "
890 "subs(`[`=`(`, `]`=`)`,"
891 "eval(`latex/latex/matrix`)):\n";
893 // replace \\cdots with proper '*'
895 "`latex/latex/*` := "
896 "subs(`\\,`=`\\cdot `,"
897 "eval(`latex/latex/*`)):\n";
899 // remove spurious \\noalign{\\medskip} in matrix output
901 "`latex/latex/matrix`:= "
902 "subs(`\\\\\\\\\\\\noalign{\\\\medskip}` = `\\\\\\\\`,"
903 "eval(`latex/latex/matrix`)):\n";
905 //"#`latex/latex/symbol` "
906 // " := subs((\\'_\\' = \\'`\\_`\\',eval(`latex/latex/symbol`)): ";
908 string trailer = "quit;";
912 string expr = os.str().c_str();
913 lyxerr << "ar: '" << ar << "'\n";
915 for (int i = 0; i < 100; ++i) { // at most 100 attempts
916 // try to fix missing '*' the hard way by using mint
918 // ... > echo "1A;" | mint -i 1 -S -s -q
921 // Probably missing an operator such as * p
923 lyxerr << "checking expr: '" << expr << "'\n";
924 string out = captureOutput("mint -i 1 -S -s -q -q", expr + ";");
926 break; // expression syntax is ok
927 istringstream is(out.c_str());
930 if (line.find("on line") != 0)
931 break; // error message not identified
933 string::size_type pos = line.find('^');
934 if (pos == string::npos || pos < 15)
935 break; // caret position not found
936 pos -= 15; // skip the "on line ..." part
937 if (expr[pos] == '*' || (pos > 0 && expr[pos - 1] == '*'))
938 break; // two '*' in a row are definitely bad
939 expr.insert(pos, "*");
942 string full = "latex(" + extra + '(' + expr + "));";
943 string out = captureOutput("maple -q", header + full + trailer);
949 mathed_parse_cell(res, out);
954 MathArray pipeThroughOctave(string const &, MathArray const & ar)
959 string expr = os.str().c_str();
962 lyxerr << "pipe: ar: '" << ar << "'\n";
963 lyxerr << "pipe: expr: '" << expr << "'\n";
965 for (int i = 0; i < 100; ++i) { // at most 100 attempts
967 // try to fix missing '*' the hard way
969 // >>> ([[1 2 3 ];[2 3 1 ];[3 1 2 ]])([[1 2 3 ];[2 3 1 ];[3 1 2 ]])
972 lyxerr << "checking expr: '" << expr << "'\n";
973 out = captureOutput("octave -q 2>&1", expr);
974 lyxerr << "checking out: '" << out << "'\n";
976 // leave loop if expression syntax is probably ok
977 if (out.find("parse error:") == string::npos)
980 // search line with single caret
981 istringstream is(out.c_str());
985 lyxerr << "skipping line: '" << line << "'\n";
986 if (line.find(">>> ") != string::npos)
990 // found line with error, next line is the one with caret
992 string::size_type pos = line.find('^');
993 lyxerr << "caret line: '" << line << "'\n";
994 lyxerr << "found caret at pos: '" << pos << "'\n";
995 if (pos == string::npos || pos < 4)
996 break; // caret position not found
997 pos -= 4; // skip the ">>> " part
998 if (expr[pos] == '*')
999 break; // two '*' in a row are definitely bad
1000 expr.insert(pos, "*");
1007 out = out.substr(6);
1009 // parse output as matrix or single number
1010 MathAtom at(new MathArrayInset("array", out));
1011 MathArrayInset const * mat = at.nucleus()->asArrayInset();
1013 if (mat->ncols() == 1 && mat->nrows() == 1)
1014 res.push_back(mat->cell(0));
1016 res.push_back(MathAtom(new MathDelimInset("(", ")")));
1017 res.back()->cell(0).push_back(at);
1025 MathArray pipeThroughExtern(string const & lang, string const & extra,
1026 MathArray const & ar)
1028 if (lang == "octave")
1029 return pipeThroughOctave(extra, ar);
1031 if (lang == "maple")
1032 return pipeThroughMaple(extra, ar);
1034 // create normalized expression
1036 NormalStream ns(os);
1037 os << "[" << extra << ' ';
1040 string data = os.str().c_str();
1042 // search external script
1043 string file = LibFileSearch("mathed", "extern_" + lang);
1045 lyxerr << "converter to '" << lang << "' not found\n";
1049 // run external sript
1050 string out = captureOutput(file, data);
1052 mathed_parse_cell(res, out);