3 * Copyright (c) 1998-2002
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
15 * VERSION: see <boost/version.hpp>
16 * DESCRIPTION: Implements high level class boost::RexEx
20 #define BOOST_REGEX_SOURCE
22 #include <boost/regex.hpp>
23 #include <boost/cregex.hpp>
24 #if !defined(BOOST_NO_STD_STRING)
27 #include <boost/regex/v4/fileiter.hpp>
28 typedef boost::match_flag_type match_flag_type;
32 #pragma warning(disable:4309)
38 #if __BORLANDC__ < 0x530
40 // we need to instantiate the vector classes we use
41 // since declaring a reference to type doesn't seem to
43 std::vector<std::size_t> inst1;
44 std::vector<std::string> inst2;
50 template <class iterator>
51 std::string to_string(iterator i, iterator j)
62 inline std::string to_string(const char* i, const char* j)
64 return std::string(i, j);
81 #ifndef BOOST_REGEX_NO_FILEITER
82 match_results<mapfile::iterator> fm;
86 #ifndef BOOST_REGEX_NO_FILEITER
87 mapfile::iterator fbase;
89 std::map<int, std::string, std::less<int> > strings;
90 std::map<int, std::ptrdiff_t, std::less<int> > positions;
93 RegExData() : e(), m(),
94 #ifndef BOOST_REGEX_NO_FILEITER
97 t(type_copy), pbase(0),
98 #ifndef BOOST_REGEX_NO_FILEITER
101 strings(), positions() {}
104 void RegExData::update()
106 strings.erase(strings.begin(), strings.end());
107 positions.erase(positions.begin(), positions.end());
110 for(unsigned int i = 0; i < m.size(); ++i)
112 if(m[i].matched) strings[i] = std::string(m[i].first, m[i].second);
113 positions[i] = m[i].matched ? m[i].first - pbase : -1;
116 #ifndef BOOST_REGEX_NO_FILEITER
119 for(unsigned int i = 0; i < fm.size(); ++i)
121 if(fm[i].matched) strings[i] = to_string(fm[i].first, fm[i].second);
122 positions[i] = fm[i].matched ? fm[i].first - fbase : -1;
129 void RegExData::clean()
131 #ifndef BOOST_REGEX_NO_FILEITER
132 fbase = mapfile::iterator();
133 fm = match_results<mapfile::iterator>();
141 pdata = new re_detail::RegExData();
144 RegEx::RegEx(const RegEx& o)
146 pdata = new re_detail::RegExData(*(o.pdata));
154 RegEx::RegEx(const char* c, bool icase)
156 pdata = new re_detail::RegExData();
157 SetExpression(c, icase);
160 RegEx::RegEx(const std::string& s, bool icase)
162 pdata = new re_detail::RegExData();
163 SetExpression(s.c_str(), icase);
166 RegEx& RegEx::operator=(const RegEx& o)
172 RegEx& RegEx::operator=(const char* p)
174 SetExpression(p, false);
178 unsigned int RegEx::SetExpression(const char* p, bool icase)
180 boost::uint_fast32_t f = icase ? regex::normal | regex::icase : regex::normal;
181 return pdata->e.set_expression(p, f);
184 unsigned int RegEx::error_code()const
186 return pdata->e.error_code();
190 std::string RegEx::Expression()const
192 return pdata->e.expression();
196 // now matching operators:
198 bool RegEx::Match(const char* p, match_flag_type flags)
200 pdata->t = re_detail::RegExData::type_pc;
205 if(regex_match(p, end, pdata->m, pdata->e, flags))
213 bool RegEx::Search(const char* p, match_flag_type flags)
215 pdata->t = re_detail::RegExData::type_pc;
220 if(regex_search(p, end, pdata->m, pdata->e, flags))
232 pred1(GrepCallback c, RegEx* i) : cb(c), pe(i) {}
233 bool operator()(const cmatch& m)
240 unsigned int RegEx::Grep(GrepCallback cb, const char* p, match_flag_type flags)
242 pdata->t = re_detail::RegExData::type_pc;
247 unsigned int result = regex_grep(re_detail::pred1(cb, this), p, end, pdata->e, flags);
255 std::vector<std::string>& v;
257 pred2(std::vector<std::string>& o, RegEx* e) : v(o), pe(e) {}
258 bool operator()(const cmatch& m)
261 v.push_back(std::string(m[0].first, m[0].second));
265 pred2& operator=(const pred2&);
269 unsigned int RegEx::Grep(std::vector<std::string>& v, const char* p, match_flag_type flags)
271 pdata->t = re_detail::RegExData::type_pc;
276 unsigned int result = regex_grep(re_detail::pred2(v, this), p, end, pdata->e, flags);
284 std::vector<std::size_t>& v;
287 pred3(std::vector<std::size_t>& o, const char* pb, RegEx* p) : v(o), base(pb), pe(p) {}
288 bool operator()(const cmatch& m)
291 v.push_back(static_cast<std::size_t>(m[0].first - base));
295 pred3& operator=(const pred3&);
298 unsigned int RegEx::Grep(std::vector<std::size_t>& v, const char* p, match_flag_type flags)
300 pdata->t = re_detail::RegExData::type_pc;
305 unsigned int result = regex_grep(re_detail::pred3(v, p, this), p, end, pdata->e, flags);
310 #ifndef BOOST_REGEX_NO_FILEITER
318 pred4(GrepFileCallback c, RegEx* i, const char* f) : cb(c), pe(i), file(f), ok(true) {}
319 bool operator()(const match_results<mapfile::iterator>& m)
321 pe->pdata->t = RegExData::type_pf;
330 void BuildFileList(std::list<std::string>* pl, const char* files, bool recurse)
332 file_iterator start(files);
336 // go through sub directories:
338 re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(buf, MAX_PATH, start.root()));
341 re_detail::overflow_error_if_not_zero(re_detail::strcpy_s(buf, MAX_PATH, "."));
342 re_detail::overflow_error_if_not_zero(re_detail::strcat_s(buf, MAX_PATH, directory_iterator::separator()));
343 re_detail::overflow_error_if_not_zero(re_detail::strcat_s(buf, MAX_PATH, "*"));
347 re_detail::overflow_error_if_not_zero(re_detail::strcat_s(buf, MAX_PATH, directory_iterator::separator()));
348 re_detail::overflow_error_if_not_zero(re_detail::strcat_s(buf, MAX_PATH, "*"));
350 directory_iterator dstart(buf);
351 directory_iterator dend;
353 // now get the file mask bit of "files":
354 const char* ptr = files;
356 while((ptr != files) && (*ptr != *directory_iterator::separator()) && (*ptr != '/'))--ptr;
357 if(ptr != files) ++ptr;
359 while(dstart != dend)
361 #if BOOST_WORKAROUND(BOOST_MSVC, >= 1400) && !defined(_WIN32_WCE) && !defined(UNDER_CE)
362 (::sprintf_s)(buf, sizeof(buf), "%s%s%s", dstart.path(), directory_iterator::separator(), ptr);
364 (std::sprintf)(buf, "%s%s%s", dstart.path(), directory_iterator::separator(), ptr);
366 BuildFileList(pl, buf, recurse);
372 pl->push_back(*start);
378 unsigned int RegEx::GrepFiles(GrepFileCallback cb, const char* files, bool recurse, match_flag_type flags)
380 unsigned int result = 0;
381 std::list<std::string> file_list;
382 BuildFileList(&file_list, files, recurse);
383 std::list<std::string>::iterator start, end;
384 start = file_list.begin();
385 end = file_list.end();
389 mapfile map((*start).c_str());
390 pdata->t = re_detail::RegExData::type_pf;
391 pdata->fbase = map.begin();
392 re_detail::pred4 pred(cb, this, (*start).c_str());
393 int r = regex_grep(pred, map.begin(), map.end(), pdata->e, flags);
405 unsigned int RegEx::FindFiles(FindFilesCallback cb, const char* files, bool recurse, match_flag_type flags)
407 unsigned int result = 0;
408 std::list<std::string> file_list;
409 BuildFileList(&file_list, files, recurse);
410 std::list<std::string>::iterator start, end;
411 start = file_list.begin();
412 end = file_list.end();
416 mapfile map((*start).c_str());
417 pdata->t = re_detail::RegExData::type_pf;
418 pdata->fbase = map.begin();
420 if(regex_search(map.begin(), map.end(), pdata->fm, pdata->e, flags))
423 if(false == cb((*start).c_str()))
435 #ifdef BOOST_REGEX_V3
436 #define regex_replace regex_merge
439 std::string RegEx::Merge(const std::string& in, const std::string& fmt,
440 bool copy, match_flag_type flags)
443 re_detail::string_out_iterator<std::string> i(result);
444 if(!copy) flags |= format_no_copy;
445 regex_replace(i, in.begin(), in.end(), pdata->e, fmt.c_str(), flags);
449 std::string RegEx::Merge(const char* in, const char* fmt,
450 bool copy, match_flag_type flags)
453 if(!copy) flags |= format_no_copy;
454 re_detail::string_out_iterator<std::string> i(result);
455 regex_replace(i, in, in + std::strlen(in), pdata->e, fmt, flags);
459 std::size_t RegEx::Split(std::vector<std::string>& v,
461 match_flag_type flags,
464 return regex_split(std::back_inserter(v), s, pdata->e, flags, max_count);
470 // now operators for returning what matched in more detail:
472 std::size_t RegEx::Position(int i)const
476 case re_detail::RegExData::type_pc:
477 return pdata->m[i].matched ? pdata->m[i].first - pdata->pbase : RegEx::npos;
478 #ifndef BOOST_REGEX_NO_FILEITER
479 case re_detail::RegExData::type_pf:
480 return pdata->fm[i].matched ? pdata->fm[i].first - pdata->fbase : RegEx::npos;
482 case re_detail::RegExData::type_copy:
484 std::map<int, std::ptrdiff_t, std::less<int> >::iterator pos = pdata->positions.find(i);
485 if(pos == pdata->positions.end())
487 return (*pos).second;
493 std::size_t RegEx::Marks()const
495 return pdata->e.mark_count();
499 std::size_t RegEx::Length(int i)const
503 case re_detail::RegExData::type_pc:
504 return pdata->m[i].matched ? pdata->m[i].second - pdata->m[i].first : RegEx::npos;
505 #ifndef BOOST_REGEX_NO_FILEITER
506 case re_detail::RegExData::type_pf:
507 return pdata->fm[i].matched ? pdata->fm[i].second - pdata->fm[i].first : RegEx::npos;
509 case re_detail::RegExData::type_copy:
511 std::map<int, std::string, std::less<int> >::iterator pos = pdata->strings.find(i);
512 if(pos == pdata->strings.end())
514 return (*pos).second.size();
520 bool RegEx::Matched(int i)const
524 case re_detail::RegExData::type_pc:
525 return pdata->m[i].matched;
526 #ifndef BOOST_REGEX_NO_FILEITER
527 case re_detail::RegExData::type_pf:
528 return pdata->fm[i].matched;
530 case re_detail::RegExData::type_copy:
532 std::map<int, std::string, std::less<int> >::iterator pos = pdata->strings.find(i);
533 if(pos == pdata->strings.end())
542 std::string RegEx::What(int i)const
547 case re_detail::RegExData::type_pc:
548 if(pdata->m[i].matched)
549 result.assign(pdata->m[i].first, pdata->m[i].second);
551 case re_detail::RegExData::type_pf:
552 if(pdata->m[i].matched)
553 result.assign(to_string(pdata->m[i].first, pdata->m[i].second));
555 case re_detail::RegExData::type_copy:
557 std::map<int, std::string, std::less<int> >::iterator pos = pdata->strings.find(i);
558 if(pos != pdata->strings.end())
559 result = (*pos).second;
566 const std::size_t RegEx::npos = ~static_cast<std::size_t>(0);
570 #if defined(__BORLANDC__) && (__BORLANDC__ >= 0x550) && (__BORLANDC__ <= 0x551) && !defined(_RWSTD_COMPILE_INSTANTIATE)
572 // this is an ugly hack to work around an ugly problem:
573 // by default this file will produce unresolved externals during
574 // linking unless _RWSTD_COMPILE_INSTANTIATE is defined (Borland bug).
575 // However if _RWSTD_COMPILE_INSTANTIATE is defined then we get separate
576 // copies of basic_string's static data in the RTL and this DLL, this messes
577 // with basic_string's memory management and results in run-time crashes,
578 // Oh sweet joy of Catch 22....
581 template<> template<>
582 basic_string<char>& BOOST_REGEX_DECL
583 basic_string<char>::replace<const char*>(char* f1, char* f2, const char* i1, const char* i2)
585 unsigned insert_pos = f1 - begin();
586 unsigned remove_len = f2 - f1;
587 unsigned insert_len = i2 - i1;
588 unsigned org_size = size();
589 if(insert_len > remove_len)
591 append(insert_len-remove_len, ' ');
592 std::copy_backward(begin() + insert_pos + remove_len, begin() + org_size, end());
593 std::copy(i1, i2, begin() + insert_pos);
597 std::copy(begin() + insert_pos + remove_len, begin() + org_size, begin() + insert_pos + insert_len);
598 std::copy(i1, i2, begin() + insert_pos);
599 erase(size() + insert_len - remove_len);
603 template<> template<>
604 basic_string<wchar_t>& BOOST_REGEX_DECL
605 basic_string<wchar_t>::replace<const wchar_t*>(wchar_t* f1, wchar_t* f2, const wchar_t* i1, const wchar_t* i2)
607 unsigned insert_pos = f1 - begin();
608 unsigned remove_len = f2 - f1;
609 unsigned insert_len = i2 - i1;
610 unsigned org_size = size();
611 if(insert_len > remove_len)
613 append(insert_len-remove_len, ' ');
614 std::copy_backward(begin() + insert_pos + remove_len, begin() + org_size, end());
615 std::copy(i1, i2, begin() + insert_pos);
619 std::copy(begin() + insert_pos + remove_len, begin() + org_size, begin() + insert_pos + insert_len);
620 std::copy(i1, i2, begin() + insert_pos);
621 erase(size() + insert_len - remove_len);