src/support/docstring.cpp

   1 /**
   2  * \file docstring.cpp
   3  * This file is part of LyX, the document processor.
   4  * Licence details can be found in the file COPYING.
   5  *
   6  * \author Georg Baum
   7  *
   8  * Full author contact details are available in file CREDITS.
   9  */
  10
  11 #include <config.h>
  12
  13 #include "support/docstring.h"
  14
  15 #include "support/lassert.h"
  16 #include "support/lstrings.h"
  17 #include "support/qstring_helpers.h"
  18 #include "support/unicode.h"
  19
  20 #include <QFile>
  21
  22 #include <locale>
  23 #include <iostream>
  24 #include <typeinfo>
  25
  26 using namespace std;
  27
  28
  29 namespace lyx {
  30
  31 docstring const from_ascii(char const * ascii)
  32 {
  33         docstring s;
  34         int n = strlen(ascii);
  35         s.resize(n);
  36         char_type *d = &s[0];
  37         while (--n >= 0) {
  38                 d[n] = ascii[n];
  39                 LASSERT(static_cast<unsigned char>(ascii[n]) < 0x80, /**/);
  40         }
  41         return s;
  42 }
  43
  44
  45 docstring const from_ascii(string const & ascii)
  46 {
  47         int const len = ascii.length();
  48         for (int i = 0; i < len; ++i)
  49                 LASSERT(static_cast<unsigned char>(ascii[i]) < 0x80, /**/);
  50         return docstring(ascii.begin(), ascii.end());
  51 }
  52
  53
  54 string const to_ascii(docstring const & ucs4)
  55 {
  56         int const len = ucs4.length();
  57         string ascii;
  58         ascii.resize(len);
  59         for (int i = 0; i < len; ++i) {
  60                 LASSERT(ucs4[i] < 0x80, /**/);
  61                 ascii[i] = static_cast<char>(ucs4[i]);
  62         }
  63         return ascii;
  64 }
  65
  66
  67 IconvProcessor & utf8ToUcs4()
  68 {
  69         static IconvProcessor iconv(ucs4_codeset, "UTF-8");
  70         return iconv;
  71 }
  72
  73
  74
  75 void utf8_to_ucs4(string const & utf8, docstring & ucs4)
  76 {
  77         size_t n = utf8.size();
  78         // as utf8 is a multi-byte encoding, there would be at most
  79         // n characters:
  80         ucs4.resize(n);
  81         if (n == 0)
  82                 return;
  83
  84         int maxoutsize = n * 4;
  85         // basic_string::data() is not recognized by some old gcc version
  86         // so we use &(ucs4[0]) instead.
  87         char * outbuf = (char *)(&(ucs4[0]));
  88         int bytes = utf8ToUcs4().convert(utf8.c_str(), n, outbuf, maxoutsize);
  89
  90         // adjust to the real converted size
  91         ucs4.resize(bytes/4);
  92 }
  93
  94
  95 docstring const from_utf8(string const & utf8)
  96 {
  97         docstring ucs4;
  98         utf8_to_ucs4(utf8, ucs4);
  99         return ucs4;
 100 }
 101
 102
 103 string const to_utf8(docstring const & ucs4)
 104 {
 105         vector<char> const utf8 = ucs4_to_utf8(ucs4.data(), ucs4.size());
 106         return string(utf8.begin(), utf8.end());
 107 }
 108
 109
 110 docstring const from_local8bit(string const & s)
 111 {
 112         return qstring_to_ucs4(QString::fromLocal8Bit(s.data(), s.length()));
 113 }
 114
 115
 116 /// Exception thrown by to_local8bit if the string could not be converted
 117 class to_local8bit_failure : public bad_cast {
 118 public:
 119         to_local8bit_failure() throw() : bad_cast() {}
 120         virtual ~to_local8bit_failure() throw() {}
 121         virtual const char* what() const throw()
 122         {
 123                 return "A string could not be converted from unicode to the local 8 bit encoding.";
 124         }
 125 };
 126
 127
 128 string const to_local8bit(docstring const & s)
 129 {
 130         // This conversion can fail, depending on input.
 131         if (s.empty())
 132                 return string();
 133         QByteArray const local = toqstr(s).toLocal8Bit();
 134         if (local.size() == 0)
 135                 throw to_local8bit_failure();
 136         return string(local.begin(), local.end());
 137 }
 138
 139
 140 docstring const from_filesystem8bit(string const & s)
 141 {
 142         QByteArray const encoded(s.c_str(), s.length());
 143         return qstring_to_ucs4(QFile::decodeName(encoded));
 144 }
 145
 146
 147 string const to_filesystem8bit(docstring const & s)
 148 {
 149         QByteArray const encoded = QFile::encodeName(toqstr(s));
 150         return string(encoded.begin(), encoded.end());
 151 }
 152
 153
 154 docstring const normalize_c(docstring const & s)
 155 {
 156         return qstring_to_ucs4(toqstr(s).normalized(QString::NormalizationForm_C));
 157 }
 158
 159
 160 bool operator==(lyx::docstring const & l, char const * r)
 161 {
 162         lyx::docstring::const_iterator it = l.begin();
 163         lyx::docstring::const_iterator end = l.end();
 164         for (; it != end; ++it, ++r) {
 165                 LASSERT(static_cast<unsigned char>(*r) < 0x80, /**/);
 166                 if (!*r)
 167                         return false;
 168                 if (*it != static_cast<lyx::docstring::value_type>(*r))
 169                         return false;
 170         }
 171         return *r == '\0';
 172 }
 173
 174
 175 lyx::docstring operator+(lyx::docstring const & l, char const * r)
 176 {
 177         lyx::docstring s(l);
 178         for (char const * c = r; *c; ++c) {
 179                 LASSERT(static_cast<unsigned char>(*c) < 0x80, /**/);
 180                 s.push_back(*c);
 181         }
 182         return s;
 183 }
 184
 185
 186 lyx::docstring operator+(char const * l, lyx::docstring const & r)
 187 {
 188         lyx::docstring s;
 189         for (char const * c = l; *c; ++c) {
 190                 LASSERT(static_cast<unsigned char>(*c) < 0x80, /**/);
 191                 s.push_back(*c);
 192         }
 193         s += r;
 194         return s;
 195 }
 196
 197
 198 lyx::docstring operator+(lyx::docstring const & l, char r)
 199 {
 200         LASSERT(static_cast<unsigned char>(r) < 0x80, /**/);
 201         docstring s = l;
 202         s += docstring::value_type(r);
 203         return s;
 204 }
 205
 206
 207 lyx::docstring operator+(char l, lyx::docstring const & r)
 208 {
 209         LASSERT(static_cast<unsigned char>(l) < 0x80, /**/);
 210         return lyx::docstring::value_type(l) + r;
 211 }
 212
 213
 214 lyx::docstring & operator+=(lyx::docstring & l, char const * r)
 215 {
 216         for (char const * c = r; *c; ++c) {
 217                 LASSERT(static_cast<unsigned char>(*c) < 0x80, /**/);
 218                 l.push_back(*c);
 219         }
 220         return l;
 221 }
 222
 223
 224 lyx::docstring & operator+=(lyx::docstring & l, char r)
 225 {
 226         LASSERT(static_cast<unsigned char>(r) < 0x80, /**/);
 227         l.push_back(r);
 228         return l;
 229 }
 230
 231 } // namespace lyx
 232
 233 #if ! defined(USE_WCHAR_T) && defined(__GNUC__)
 234
 235 // gcc does not have proper locale facets for lyx::char_type if
 236 // sizeof(wchar_t) == 2, so we have to implement them on our own.
 237
 238
 239 // We get undefined references to these virtual methods. This looks like
 240 // a bug in gcc. The implementation here does not do anything useful, since
 241 // it is overriden in ascii_ctype_facet.
 242 namespace std {
 243 template<> ctype<lyx::char_type>::~ctype() {}
 244 template<> bool
 245 ctype<lyx::char_type>::do_is(ctype<lyx::char_type>::mask, lyx::char_type) const { return false; }
 246 template<> lyx::char_type const *
 247 ctype<lyx::char_type>::do_is(const lyx::char_type *, const lyx::char_type *, ctype<lyx::char_type>::mask *) const { return 0; }
 248 template<> const lyx::char_type *
 249 ctype<lyx::char_type>::do_scan_is(ctype<lyx::char_type>::mask, const lyx::char_type *, const lyx::char_type *) const { return 0; }
 250 template<> const lyx::char_type *
 251 ctype<lyx::char_type>::do_scan_not(ctype<lyx::char_type>::mask, const lyx::char_type *, const lyx::char_type *) const { return 0; }
 252 template<> lyx::char_type ctype<lyx::char_type>::do_toupper(lyx::char_type) const { return 0; }
 253 template<> const lyx::char_type * ctype<lyx::char_type>::do_toupper(lyx::char_type *, lyx::char_type const *) const { return 0; }
 254 template<> lyx::char_type ctype<lyx::char_type>::do_tolower(lyx::char_type) const { return 0; }
 255 template<> const lyx::char_type * ctype<lyx::char_type>::do_tolower(lyx::char_type *, lyx::char_type const *) const { return 0; }
 256 template<> lyx::char_type ctype<lyx::char_type>::do_widen(char) const { return 0; }
 257 template<> const char *
 258 ctype<lyx::char_type>::do_widen(const char *, const char *, lyx::char_type *) const { return 0; }
 259 template<> char
 260 ctype<lyx::char_type>::do_narrow(const lyx::char_type, char) const { return 0; }
 261 template<> const lyx::char_type *
 262 ctype<lyx::char_type>::do_narrow(const lyx::char_type *, const lyx::char_type *, char, char *) const { return 0; }
 263 }
 264
 265
 266 namespace lyx {
 267
 268 class ctype_failure : public bad_cast {
 269 public:
 270         ctype_failure() throw() : bad_cast() {}
 271         virtual ~ctype_failure() throw() {}
 272         virtual const char* what() const throw()
 273         {
 274                 return "The ctype<lyx::char_type> locale facet does only support ASCII characters on this platform.";
 275         }
 276 };
 277
 278
 279 class num_put_failure : public bad_cast {
 280 public:
 281         num_put_failure() throw() : bad_cast() {}
 282         virtual ~num_put_failure() throw() {}
 283         virtual const char* what() const throw()
 284         {
 285                 return "The num_put locale facet does only support ASCII characters on this platform.";
 286         }
 287 };
 288
 289
 290 /// ctype facet for UCS4 characters. The implementation does only support pure
 291 /// ASCII, since we do not need anything else for now.
 292 /// The code is partly stolen from ctype<wchar_t> from gcc.
 293 class ascii_ctype_facet : public ctype<lyx::char_type>
 294 {
 295 public:
 296         typedef lyx::char_type char_type;
 297         typedef wctype_t wmask_type;
 298         explicit ascii_ctype_facet(size_t refs = 0) : ctype<char_type>(refs)
 299         {
 300                 M_initialize_ctype();
 301         }
 302 protected:
 303         bool       M_narrow_ok;
 304         char       M_narrow[128];
 305         wint_t     M_widen[1 + static_cast<unsigned char>(-1)];
 306         mask       M_bit[16];
 307         wmask_type M_wmask[16];
 308         wmask_type M_convert_to_wmask(const mask m) const
 309         {
 310                 wmask_type ret;
 311                 switch (m) {
 312                         case space:  ret = wctype("space");  break;
 313                         case print:  ret = wctype("print");  break;
 314                         case cntrl:  ret = wctype("cntrl");  break;
 315                         case upper:  ret = wctype("upper");  break;
 316                         case lower:  ret = wctype("lower");  break;
 317                         case alpha:  ret = wctype("alpha");  break;
 318                         case digit:  ret = wctype("digit");  break;
 319                         case punct:  ret = wctype("punct");  break;
 320                         case xdigit: ret = wctype("xdigit"); break;
 321                         case alnum:  ret = wctype("alnum");  break;
 322                         case graph:  ret = wctype("graph");  break;
 323                         default:     ret = wmask_type();
 324                 }
 325                 return ret;
 326         }
 327         void M_initialize_ctype()
 328         {
 329                 wint_t i;
 330                 for (i = 0; i < 128; ++i) {
 331                         const int c = wctob(i);
 332                         if (c == EOF)
 333                                 break;
 334                         else
 335                                 M_narrow[i] = static_cast<char>(c);
 336                 }
 337                 if (i == 128)
 338                         M_narrow_ok = true;
 339                 else
 340                         M_narrow_ok = false;
 341                 for (size_t i = 0; i < sizeof(M_widen) / sizeof(wint_t); ++i)
 342                         M_widen[i] = btowc(i);
 343
 344                 for (size_t i = 0; i <= 15; ++i) {
 345                         M_bit[i] = static_cast<mask>(1 << i);
 346                         M_wmask[i] = M_convert_to_wmask(M_bit[i]);
 347                 }
 348         }
 349         virtual ~ascii_ctype_facet() {}
 350         char_type do_toupper(char_type c) const
 351         {
 352                 if (c >= 0x80)
 353                         throw ctype_failure();
 354                 return toupper(static_cast<int>(c));
 355         }
 356         char_type const * do_toupper(char_type * lo, char_type const * hi) const
 357         {
 358                 while (lo < hi) {
 359                         if (*lo >= 0x80)
 360                                 throw ctype_failure();
 361                         *lo = toupper(static_cast<int>(*lo));
 362                         ++lo;
 363                 }
 364                 return hi;
 365         }
 366         char_type do_tolower(char_type c) const
 367         {
 368                 if (c >= 0x80)
 369                         throw ctype_failure();
 370                 return tolower(c);
 371         }
 372         char_type const * do_tolower(char_type * lo, char_type const * hi) const
 373         {
 374                 while (lo < hi) {
 375                         if (*lo >= 0x80)
 376                                 throw ctype_failure();
 377                         *lo = tolower(*lo);
 378                         ++lo;
 379                 }
 380                 return hi;
 381         }
 382         bool do_is(mask m, char_type c) const
 383         {
 384                 if (c >= 0x80)
 385                         throw ctype_failure();
 386                 // The code below works because c is in the ASCII range.
 387                 // We could not use iswctype() which is designed for a 2byte
 388                 // whar_t without encoding conversion otherwise.
 389                 bool ret = false;
 390                 // Generically, 15 (instead of 10) since we don't know the numerical
 391                 // encoding of the various categories in /usr/include/ctype.h.
 392                 const size_t bitmasksize = 15;
 393                 for (size_t bitcur = 0; bitcur <= bitmasksize; ++bitcur)
 394                         if (m & M_bit[bitcur] &&
 395                             iswctype(static_cast<int>(c), M_wmask[bitcur])) {
 396                                 ret = true;
 397                                 break;
 398                         }
 399                 return ret;
 400         }
 401         char_type const * do_is(char_type const * lo, char_type const * hi, mask * vec) const
 402         {
 403                 for (;lo < hi; ++vec, ++lo) {
 404                         if (*lo >= 0x80)
 405                                 throw ctype_failure();
 406                         // The code below works because c is in the ASCII range.
 407                         // We could not use iswctype() which is designed for a 2byte
 408                         // whar_t without encoding conversion otherwise.
 409                         // Generically, 15 (instead of 10) since we don't know the numerical
 410                         // encoding of the various categories in /usr/include/ctype.h.
 411                         const size_t bitmasksize = 15;
 412                         mask m = 0;
 413                         for (size_t bitcur = 0; bitcur <= bitmasksize; ++bitcur)
 414                                 if (iswctype(static_cast<int>(*lo), M_wmask[bitcur]))
 415                                         m |= M_bit[bitcur];
 416                         *vec = m;
 417                 }
 418                 return hi;
 419         }
 420         char_type const * do_scan_is(mask m, char_type const * lo, char_type const * hi) const
 421         {
 422                 while (lo < hi && !this->do_is(m, *lo))
 423                         ++lo;
 424                 return lo;
 425         }
 426         char_type const * do_scan_not(mask m, char_type const * lo, char_type const * hi) const
 427         {
 428                 while (lo < hi && this->do_is(m, *lo) != 0)
 429                         ++lo;
 430                 return lo;
 431         }
 432         char_type do_widen(char c) const
 433         {
 434                 if (static_cast<unsigned char>(c) < 0x80)
 435                         return c;
 436                 throw ctype_failure();
 437         }
 438         const char* do_widen(const char* lo, const char* hi, char_type* dest) const
 439         {
 440                 while (lo < hi) {
 441                         if (static_cast<unsigned char>(*lo) >= 0x80)
 442                                 throw ctype_failure();
 443                         *dest = *lo;
 444                         ++lo;
 445                         ++dest;
 446                 }
 447                 return hi;
 448         }
 449         char do_narrow(char_type wc, char) const
 450         {
 451                 if (wc < 0x80)
 452                         return static_cast<char>(wc);
 453                 throw ctype_failure();
 454         }
 455         const char_type * do_narrow(const char_type * lo, const char_type * hi, char, char * dest) const
 456         {
 457                 while (lo < hi) {
 458                         if (*lo < 0x80)
 459                                 *dest = static_cast<char>(*lo);
 460                         else
 461                                 throw ctype_failure();
 462                         ++lo;
 463                         ++dest;
 464                 }
 465                 return hi;
 466         }
 467 };
 468
 469
 470 /// Facet for outputting numbers to odocstreams as ascii.
 471 /// Here we simply need defining the virtual do_put functions.
 472 class ascii_num_put_facet : public num_put<lyx::char_type, ostreambuf_iterator<lyx::char_type, char_traits<lyx::char_type> > >
 473 {
 474         typedef ostreambuf_iterator<lyx::char_type, char_traits<lyx::char_type> > iter_type;
 475 public:
 476         ascii_num_put_facet(size_t refs = 0) : num_put<lyx::char_type, iter_type>(refs) {}
 477
 478         /// Facet for converting numbers to ascii strings.
 479         class string_num_put_facet : public num_put<char, basic_string<char>::iterator>
 480         {
 481         public:
 482                 string_num_put_facet() : num_put<char, basic_string<char>::iterator>(1) {}
 483         };
 484
 485 protected:
 486         iter_type
 487         do_put(iter_type oit, ios_base & b, char_type fill, bool v) const
 488         {
 489                 return do_put_helper(oit, b, fill, v);
 490         }
 491
 492         iter_type
 493         do_put(iter_type oit, ios_base & b, char_type fill, long v) const
 494         {
 495                 return do_put_helper(oit, b, fill, v);
 496         }
 497
 498         iter_type
 499         do_put(iter_type oit, ios_base & b, char_type fill, unsigned long v) const
 500         {
 501                 return do_put_helper(oit, b, fill, v);
 502         }
 503
 504 #ifdef _GLIBCXX_USE_LONG_LONG
 505         iter_type
 506         do_put(iter_type oit, ios_base & b, char_type fill, long long v) const
 507         {
 508                 return do_put_helper(oit, b, fill, v);
 509         }
 510
 511         iter_type
 512         do_put(iter_type oit, ios_base & b, char_type fill, unsigned long long v) const
 513         {
 514                 return do_put_helper(oit, b, fill, v);
 515         }
 516 #endif
 517
 518         iter_type
 519         do_put(iter_type oit, ios_base & b, char_type fill, double v) const
 520         {
 521                 return do_put_helper(oit, b, fill, v);
 522         }
 523
 524         iter_type
 525         do_put(iter_type oit, ios_base & b, char_type fill, long double v) const
 526         {
 527                 return do_put_helper(oit, b, fill, v);
 528         }
 529
 530         iter_type
 531         do_put(iter_type oit, ios_base & b, char_type fill, void const * v) const
 532         {
 533                 return do_put_helper(oit, b, fill, v);
 534         }
 535
 536 private:
 537         template <typename ValueType>
 538         iter_type
 539         do_put_helper(iter_type oit, ios_base & b, char_type fill, ValueType v) const
 540         {
 541                 if (fill >= 0x80)
 542                         throw num_put_failure();
 543
 544                 streamsize const sz = b.width() > b.precision() ?
 545                                            b.width() : b.precision();
 546                 // 64 is large enough, unless width or precision are bigger
 547                 streamsize const wd = (sz > 56 ? sz : 56) + 8;
 548                 string s(wd, '\0');
 549                 string_num_put_facet f;
 550                 string::const_iterator cit = s.begin();
 551                 string::const_iterator end =
 552                         f.put(s.begin(), b, fill, v);
 553                 for (; cit != end; ++cit, ++oit)
 554                         *oit = *cit;
 555
 556                 return oit;
 557         }
 558 };
 559
 560
 561 /// Facet for inputting ascii representations of numbers from idocstreams.
 562 /// Here we simply need defining the virtual do_get functions.
 563 class ascii_num_get_facet : public num_get<lyx::char_type, istreambuf_iterator<lyx::char_type, char_traits<lyx::char_type> > >
 564 {
 565         typedef istreambuf_iterator<lyx::char_type, char_traits<lyx::char_type> > iter_type;
 566 public:
 567         ascii_num_get_facet(size_t refs = 0) : num_get<lyx::char_type, iter_type>(refs) {}
 568
 569         /// Facet for converting ascii representation of numbers to a value.
 570         class string_num_get_facet : public num_get<char, basic_string<char>::iterator>
 571         {
 572         public:
 573                 string_num_get_facet() : num_get<char, basic_string<char>::iterator>(1) {}
 574         };
 575
 576         /// Numpunct facet defining the I/O format.
 577         class numpunct_facet : public numpunct<char>
 578         {
 579         public:
 580                 numpunct_facet() : numpunct<char>(1) {}
 581         };
 582
 583 protected:
 584         iter_type
 585         do_get(iter_type iit, iter_type eit, ios_base & b,
 586                 ios_base::iostate & err, bool & v) const
 587         {
 588                 if (b.flags() & ios_base::boolalpha) {
 589                         numpunct_facet p;
 590                         lyx::docstring const truename = from_local8bit(p.truename());
 591                         lyx::docstring const falsename = from_local8bit(p.falsename());
 592                         lyx::docstring s;
 593                         s.resize(16);
 594                         bool ok = true;
 595                         size_t n = 0;
 596                         size_t const tsize = truename.size();
 597                         size_t const fsize = falsename.size();
 598                         for (; iit != eit; ++iit) {
 599                                 s += *iit;
 600                                 ++n;
 601                                 bool true_ok = support::prefixIs(truename, s);
 602                                 bool false_ok = support::prefixIs(falsename, s);
 603                                 if (!true_ok && !false_ok) {
 604                                         ++iit;
 605                                         ok = false;
 606                                         break;
 607                                 }
 608                                 if ((true_ok && n == tsize) ||
 609                                     (false_ok && n == fsize)) {
 610                                         ++iit;
 611                                         break;
 612                                 }
 613                         }
 614                         if (ok) {
 615                                 err = ios_base::goodbit;
 616                                 v = truename == s ? true : false;
 617                         } else
 618                                 err = ios_base::failbit;
 619                         if (iit == eit)
 620                                 err |= ios_base::eofbit;
 621                         return iit;
 622                 } else {
 623                         long l;
 624                         iter_type end = this->do_get(iit, eit, b, err, l);
 625                         if (!(err & ios_base::failbit)) {
 626                                 if (l == 0)
 627                                         v = false;
 628                                 else if (l == 1)
 629                                         v = true;
 630                                 else
 631                                         err |= ios_base::failbit;
 632                         }
 633                         return end;
 634                 }
 635         }
 636
 637         iter_type
 638         do_get(iter_type iit, iter_type eit, ios_base & b,
 639                 ios_base::iostate & err, long & v) const
 640         {
 641                 return do_get_integer(iit, eit, b, err, v);
 642         }
 643
 644         iter_type
 645         do_get(iter_type iit, iter_type eit, ios_base & b,
 646                 ios_base::iostate & err, unsigned short & v) const
 647         {
 648                 return do_get_integer(iit, eit, b, err, v);
 649         }
 650
 651         iter_type
 652         do_get(iter_type iit, iter_type eit, ios_base & b,
 653                 ios_base::iostate & err, unsigned int & v) const
 654         {
 655                 return do_get_integer(iit, eit, b, err, v);
 656         }
 657
 658         iter_type
 659         do_get(iter_type iit, iter_type eit, ios_base & b,
 660                 ios_base::iostate & err, unsigned long & v) const
 661         {
 662                 return do_get_integer(iit, eit, b, err, v);
 663         }
 664
 665 #ifdef _GLIBCXX_USE_LONG_LONG
 666         iter_type
 667         do_get(iter_type iit, iter_type eit, ios_base & b,
 668                 ios_base::iostate & err, long long & v) const
 669         {
 670                 return do_get_integer(iit, eit, b, err, v);
 671         }
 672
 673         iter_type
 674         do_get(iter_type iit, iter_type eit, ios_base & b,
 675                 ios_base::iostate & err, unsigned long long & v) const
 676         {
 677                 return do_get_integer(iit, eit, b, err, v);
 678         }
 679 #endif
 680
 681         iter_type
 682         do_get(iter_type iit, iter_type eit, ios_base & b,
 683                 ios_base::iostate & err, float & v) const
 684         {
 685                 return do_get_float(iit, eit, b, err, v);
 686         }
 687
 688         iter_type
 689         do_get(iter_type iit, iter_type eit, ios_base & b,
 690                 ios_base::iostate & err, double & v) const
 691         {
 692                 return do_get_float(iit, eit, b, err, v);
 693         }
 694
 695         iter_type
 696         do_get(iter_type iit, iter_type eit, ios_base & b,
 697                 ios_base::iostate & err, long double & v) const
 698         {
 699                 return do_get_float(iit, eit, b, err, v);
 700         }
 701
 702         iter_type
 703         do_get(iter_type iit, iter_type eit, ios_base & b,
 704                 ios_base::iostate & err, void * & v) const
 705         {
 706                 unsigned long val;
 707                 iter_type end = do_get_integer(iit, eit, b, err, val);
 708                 if (!(err & ios_base::failbit))
 709                         v = reinterpret_cast<void *>(val);
 710                 return end;
 711         }
 712
 713 private:
 714         template <typename ValueType>
 715         iter_type
 716         do_get_integer(iter_type iit, iter_type eit, ios_base & b,
 717                         ios_base::iostate & err, ValueType & v) const
 718         {
 719                 string s;
 720                 s.reserve(64);
 721                 for (; iit != eit && isNumpunct(*iit); ++iit)
 722                         s += static_cast<char>(*iit);
 723                 // We add another character, not part of the numpunct facet,
 724                 // in order to avoid setting the eofbit in the stream state,
 725                 // which would prevent any further read. The space seems a
 726                 // good choice here.
 727                 s += ' ';
 728                 string_num_get_facet f;
 729                 f.get(s.begin(), s.end(), b, err, v);
 730                 if (iit == eit)
 731                     err |= ios_base::eofbit;
 732
 733                 return iit;
 734         }
 735
 736         bool isNumpunct(lyx::char_type const c) const
 737         {
 738                 /// Only account for the standard numpunct "C" locale facet.
 739                 return c < 0x80 && (c == '-' || c == '+' || isdigit(c)
 740                         || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
 741                         || c == 'x' || c == 'X');
 742         }
 743
 744         template <typename ValueType>
 745         iter_type
 746         do_get_float(iter_type iit, iter_type eit, ios_base & b,
 747                         ios_base::iostate & err, ValueType & v) const
 748         {
 749                 // Gather a string of the form
 750                 // [+-]? [0-9]* .? [0-9]* ([eE] [+-]? [0-9]+)?
 751                 string s;
 752                 s.reserve(64);
 753                 char c;
 754                 numpunct_facet p;
 755                 char const dot = p.decimal_point();
 756                 char const sep = p.thousands_sep();
 757                 // Get an optional sign
 758                 if (iit != eit && (*iit == '-' || *iit == '+')) {
 759                         s += static_cast<char>(*iit);
 760                         ++iit;
 761                 }
 762                 for (; iit != eit && isDigitOrSep(*iit, sep); ++iit)
 763                         s += static_cast<char>(*iit);
 764                 if (iit != eit && *iit == dot) {
 765                         s += dot;
 766                         ++iit;
 767                         for (; iit != eit && isDigitOrSep(*iit, 0); ++iit)
 768                                 s += static_cast<char>(*iit);
 769                         if (iit != eit && (*iit == 'e' || *iit == 'E')) {
 770                                 s += static_cast<char>(*iit);
 771                                 ++iit;
 772                                 for (; iit != eit && isDigitOrSep(*iit, 0); ++iit)
 773                                         s += static_cast<char>(*iit);
 774                         }
 775                 }
 776                 s += '\n';
 777                 string_num_get_facet f;
 778                 f.get(s.begin(), s.end(), b, err, v);
 779                 if (iit == eit)
 780                     err |= ios_base::eofbit;
 781
 782                 return iit;
 783         }
 784
 785         bool isDigitOrSep(lyx::char_type const c, char const sep) const
 786         {
 787                 return (c >= '0' && c <= '9') || (c != 0 && c == sep);
 788         }
 789 };
 790
 791
 792 /// class to add our facets to the global locale
 793 class locale_initializer {
 794 public:
 795         locale_initializer()
 796         {
 797                 locale global;
 798                 locale const loc1(global, new ascii_ctype_facet);
 799                 locale const loc2(loc1, new ascii_num_put_facet);
 800                 locale const loc3(loc2, new ascii_num_get_facet);
 801                 locale::global(loc3);
 802         }
 803 };
 804
 805
 806 namespace {
 807
 808 /// make sure that our facets get used
 809 static locale_initializer initializer;
 810
 811 }
 812 }
 813 #endif