6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE w32_regex_traits.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Implements w32_regex_traits<char> (and associated helper classes).
19 #define BOOST_REGEX_SOURCE
20 #include <boost/regex/config.hpp>
22 #if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32)
23 #include <boost/regex/regex_traits.hpp>
24 #include <boost/regex/pattern_except.hpp>
26 #define WIN32_LEAN_AND_MEAN
33 #if defined(_MSC_VER) && !defined(_WIN32_WCE) && !defined(UNDER_CE)
34 #pragma comment(lib, "user32.lib")
37 #ifdef BOOST_NO_STDC_NAMESPACE
43 namespace boost{ namespace re_detail{
45 #ifdef BOOST_NO_ANSI_APIS
46 UINT get_code_page_for_locale_id(lcid_type idx)
48 WCHAR code_page_string[7];
49 if (::GetLocaleInfoW(idx, LOCALE_IDEFAULTANSICODEPAGE, code_page_string, 7) == 0)
52 return static_cast<UINT>(_wtol(code_page_string));
57 void w32_regex_traits_char_layer<char>::init()
59 // we need to start by initialising our syntax map so we know which
60 // character is used for which purpose:
61 std::memset(m_char_map, 0, sizeof(m_char_map));
63 std::string cat_name(w32_regex_traits<char>::get_catalog_name());
66 cat = ::boost::re_detail::w32_cat_open(cat_name);
69 std::string m("Unable to open message catalog: ");
70 std::runtime_error err(m + cat_name);
71 ::boost::re_detail::raise_runtime_error(err);
75 // if we have a valid catalog then load our messages:
79 for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
81 string_type mss = ::boost::re_detail::w32_cat_get(cat, this->m_locale, i, get_default_syntax(i));
82 for(string_type::size_type j = 0; j < mss.size(); ++j)
84 m_char_map[static_cast<unsigned char>(mss[j])] = i;
90 for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
92 const char* ptr = get_default_syntax(i);
95 m_char_map[static_cast<unsigned char>(*ptr)] = i;
101 // finish off by calculating our escape types:
103 unsigned char i = 'A';
106 if(m_char_map[i] == 0)
108 if(::boost::re_detail::w32_is(this->m_locale, 0x0002u, (char)i))
109 m_char_map[i] = regex_constants::escape_type_class;
110 else if(::boost::re_detail::w32_is(this->m_locale, 0x0001u, (char)i))
111 m_char_map[i] = regex_constants::escape_type_not_class;
116 // fill in lower case map:
118 char char_map[1 << CHAR_BIT];
119 for(int ii = 0; ii < (1 << CHAR_BIT); ++ii)
120 char_map[ii] = static_cast<char>(ii);
121 #ifndef BOOST_NO_ANSI_APIS
122 int r = ::LCMapStringA(this->m_locale, LCMAP_LOWERCASE, char_map, 1 << CHAR_BIT, this->m_lower_map, 1 << CHAR_BIT);
123 BOOST_ASSERT(r != 0);
125 UINT code_page = get_code_page_for_locale_id(this->m_locale);
126 BOOST_ASSERT(code_page != 0);
128 WCHAR wide_char_map[1 << CHAR_BIT];
129 int conv_r = ::MultiByteToWideChar(code_page, 0, char_map, 1 << CHAR_BIT, wide_char_map, 1 << CHAR_BIT);
130 BOOST_ASSERT(conv_r != 0);
132 WCHAR wide_lower_map[1 << CHAR_BIT];
133 int r = ::LCMapStringW(this->m_locale, LCMAP_LOWERCASE, wide_char_map, 1 << CHAR_BIT, wide_lower_map, 1 << CHAR_BIT);
134 BOOST_ASSERT(r != 0);
136 conv_r = ::WideCharToMultiByte(code_page, 0, wide_lower_map, r, this->m_lower_map, 1 << CHAR_BIT, NULL, NULL);
137 BOOST_ASSERT(conv_r != 0);
139 if(r < (1 << CHAR_BIT))
141 // if we have multibyte characters then not all may have been given
142 // a lower case mapping:
143 for(int jj = r; jj < (1 << CHAR_BIT); ++jj)
144 this->m_lower_map[jj] = static_cast<char>(jj);
147 #ifndef BOOST_NO_ANSI_APIS
148 r = ::GetStringTypeExA(this->m_locale, CT_CTYPE1, char_map, 1 << CHAR_BIT, this->m_type_map);
150 r = ::GetStringTypeExW(this->m_locale, CT_CTYPE1, wide_char_map, 1 << CHAR_BIT, this->m_type_map);
152 BOOST_ASSERT(0 != r);
155 BOOST_REGEX_DECL lcid_type BOOST_REGEX_CALL w32_get_default_locale()
157 return ::GetUserDefaultLCID();
160 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(char c, lcid_type idx)
162 #ifndef BOOST_NO_ANSI_APIS
164 if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER))
168 UINT code_page = get_code_page_for_locale_id(idx);
173 if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
177 if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_LOWER))
183 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(wchar_t c, lcid_type idx)
186 if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER))
190 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
191 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(unsigned short ca, lcid_type idx)
195 if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER))
201 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(char c, lcid_type idx)
203 #ifndef BOOST_NO_ANSI_APIS
205 if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER))
209 UINT code_page = get_code_page_for_locale_id(idx);
214 if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
218 if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_UPPER))
224 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(wchar_t c, lcid_type idx)
227 if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER))
231 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
232 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(unsigned short ca, lcid_type idx)
236 if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER))
242 void free_module(void* mod)
244 ::FreeLibrary(static_cast<HMODULE>(mod));
247 BOOST_REGEX_DECL cat_type BOOST_REGEX_CALL w32_cat_open(const std::string& name)
249 #ifndef BOOST_NO_ANSI_APIS
250 cat_type result(::LoadLibraryA(name.c_str()), &free_module);
253 LPWSTR wide_name = (LPWSTR)_alloca( (name.size() + 1) * sizeof(WCHAR) );
254 if (::MultiByteToWideChar(CP_ACP, 0, name.c_str(), name.size(), wide_name, name.size() + 1) == 0)
257 cat_type result(::LoadLibraryW(wide_name), &free_module);
262 BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::string& def)
264 #ifndef BOOST_NO_ANSI_APIS
266 if(0 == ::LoadStringA(
267 static_cast<HMODULE>(cat.get()),
277 int r = ::LoadStringW(
278 static_cast<HMODULE>(cat.get()),
286 LPSTR buf = (LPSTR)_alloca( (r + 1) * 2 );
287 if (::WideCharToMultiByte(CP_ACP, 0, wbuf, r, buf, (r + 1) * 2, NULL, NULL) == 0)
290 return std::string(buf);
293 #ifndef BOOST_NO_WREGEX
294 BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::wstring& def)
297 if(0 == ::LoadStringW(
298 static_cast<HMODULE>(cat.get()),
306 return std::wstring(buf);
308 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
309 BOOST_REGEX_DECL std::basic_string<unsigned short> BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::basic_string<unsigned short>& def)
311 unsigned short buf[256];
312 if(0 == ::LoadStringW(
313 static_cast<HMODULE>(cat.get()),
321 return std::basic_string<unsigned short>(buf);
325 BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_transform(lcid_type idx, const char* p1, const char* p2)
327 #ifndef BOOST_NO_ANSI_APIS
328 int bytes = ::LCMapStringA(
329 idx, // locale identifier
330 LCMAP_SORTKEY, // mapping transformation type
332 static_cast<int>(p2 - p1), // number of characters in source string
333 0, // destination buffer
334 0 // size of destination buffer
337 return std::string(p1, p2);
338 std::string result(++bytes, '\0');
339 bytes = ::LCMapStringA(
340 idx, // locale identifier
341 LCMAP_SORTKEY, // mapping transformation type
343 static_cast<int>(p2 - p1), // number of characters in source string
344 &*result.begin(), // destination buffer
345 bytes // size of destination buffer
348 UINT code_page = get_code_page_for_locale_id(idx);
350 return std::string(p1, p2);
352 int src_len = static_cast<int>(p2 - p1);
353 LPWSTR wide_p1 = (LPWSTR)_alloca( (src_len + 1) * 2 );
354 if(::MultiByteToWideChar(code_page, 0, p1, src_len, wide_p1, src_len + 1) == 0)
355 return std::string(p1, p2);
357 int bytes = ::LCMapStringW(
358 idx, // locale identifier
359 LCMAP_SORTKEY, // mapping transformation type
360 wide_p1, // source string
361 src_len, // number of characters in source string
362 0, // destination buffer
363 0 // size of destination buffer
366 return std::string(p1, p2);
367 std::string result(++bytes, '\0');
368 bytes = ::LCMapStringW(
369 idx, // locale identifier
370 LCMAP_SORTKEY, // mapping transformation type
371 wide_p1, // source string
372 src_len, // number of characters in source string
373 (LPWSTR)&*result.begin(), // destination buffer
374 bytes // size of destination buffer
377 if(bytes > static_cast<int>(result.size()))
378 return std::string(p1, p2);
379 while(result.size() && result[result.size()-1] == '\0')
381 result.erase(result.size()-1);
386 #ifndef BOOST_NO_WREGEX
387 BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_transform(lcid_type idx, const wchar_t* p1, const wchar_t* p2)
389 int bytes = ::LCMapStringW(
390 idx, // locale identifier
391 LCMAP_SORTKEY, // mapping transformation type
393 static_cast<int>(p2 - p1), // number of characters in source string
394 0, // destination buffer
395 0 // size of destination buffer
398 return std::wstring(p1, p2);
399 std::string result(++bytes, '\0');
400 bytes = ::LCMapStringW(
401 idx, // locale identifier
402 LCMAP_SORTKEY, // mapping transformation type
404 static_cast<int>(p2 - p1), // number of characters in source string
405 reinterpret_cast<wchar_t*>(&*result.begin()), // destination buffer *of bytes*
406 bytes // size of destination buffer
408 if(bytes > static_cast<int>(result.size()))
409 return std::wstring(p1, p2);
410 while(result.size() && result[result.size()-1] == L'\0')
412 result.erase(result.size()-1);
415 for(std::string::size_type i = 0; i < result.size(); ++i)
416 r2.append(1, static_cast<wchar_t>(static_cast<unsigned char>(result[i])));
419 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
420 BOOST_REGEX_DECL std::basic_string<unsigned short> BOOST_REGEX_CALL w32_transform(lcid_type idx, const unsigned short* p1, const unsigned short* p2)
422 int bytes = ::LCMapStringW(
423 idx, // locale identifier
424 LCMAP_SORTKEY, // mapping transformation type
425 (LPCWSTR)p1, // source string
426 static_cast<int>(p2 - p1), // number of characters in source string
427 0, // destination buffer
428 0 // size of destination buffer
431 return std::basic_string<unsigned short>(p1, p2);
432 std::string result(++bytes, '\0');
433 bytes = ::LCMapStringW(
434 idx, // locale identifier
435 LCMAP_SORTKEY, // mapping transformation type
436 (LPCWSTR)p1, // source string
437 static_cast<int>(p2 - p1), // number of characters in source string
438 reinterpret_cast<wchar_t*>(&*result.begin()), // destination buffer *of bytes*
439 bytes // size of destination buffer
441 if(bytes > static_cast<int>(result.size()))
442 return std::basic_string<unsigned short>(p1, p2);
443 while(result.size() && result[result.size()-1] == L'\0')
445 result.erase(result.size()-1);
447 std::basic_string<unsigned short> r2;
448 for(std::string::size_type i = 0; i < result.size(); ++i)
449 r2.append(1, static_cast<unsigned short>(static_cast<unsigned char>(result[i])));
454 BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_tolower(char c, lcid_type idx)
457 #ifndef BOOST_NO_ANSI_APIS
458 int b = ::LCMapStringA(
459 idx, // locale identifier
460 LCMAP_LOWERCASE, // mapping transformation type
462 1, // number of characters in source string
463 result, // destination buffer
464 1); // size of destination buffer
468 UINT code_page = get_code_page_for_locale_id(idx);
473 if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
477 int b = ::LCMapStringW(
478 idx, // locale identifier
479 LCMAP_LOWERCASE, // mapping transformation type
480 &wide_c, // source string
481 1, // number of characters in source string
482 &wide_result, // destination buffer
483 1); // size of destination buffer
487 if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0)
493 #ifndef BOOST_NO_WREGEX
494 BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_tolower(wchar_t c, lcid_type idx)
497 int b = ::LCMapStringW(
498 idx, // locale identifier
499 LCMAP_LOWERCASE, // mapping transformation type
501 1, // number of characters in source string
502 result, // destination buffer
503 1); // size of destination buffer
508 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
509 BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL w32_tolower(unsigned short c, lcid_type idx)
512 int b = ::LCMapStringW(
513 idx, // locale identifier
514 LCMAP_LOWERCASE, // mapping transformation type
515 (wchar_t const*)&c, // source string
516 1, // number of characters in source string
517 result, // destination buffer
518 1); // size of destination buffer
525 BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_toupper(char c, lcid_type idx)
528 #ifndef BOOST_NO_ANSI_APIS
529 int b = ::LCMapStringA(
530 idx, // locale identifier
531 LCMAP_UPPERCASE, // mapping transformation type
533 1, // number of characters in source string
534 result, // destination buffer
535 1); // size of destination buffer
539 UINT code_page = get_code_page_for_locale_id(idx);
544 if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
548 int b = ::LCMapStringW(
549 idx, // locale identifier
550 LCMAP_UPPERCASE, // mapping transformation type
551 &wide_c, // source string
552 1, // number of characters in source string
553 &wide_result, // destination buffer
554 1); // size of destination buffer
558 if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0)
564 #ifndef BOOST_NO_WREGEX
565 BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_toupper(wchar_t c, lcid_type idx)
568 int b = ::LCMapStringW(
569 idx, // locale identifier
570 LCMAP_UPPERCASE, // mapping transformation type
572 1, // number of characters in source string
573 result, // destination buffer
574 1); // size of destination buffer
579 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
580 BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL w32_toupper(unsigned short c, lcid_type idx)
583 int b = ::LCMapStringW(
584 idx, // locale identifier
585 LCMAP_UPPERCASE, // mapping transformation type
586 (wchar_t const*)&c, // source string
587 1, // number of characters in source string
588 result, // destination buffer
589 1); // size of destination buffer
596 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, char c)
599 #ifndef BOOST_NO_ANSI_APIS
600 if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation<char>::mask_base))
603 UINT code_page = get_code_page_for_locale_id(idx);
608 if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
611 if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & m & w32_regex_traits_implementation<char>::mask_base))
614 if((m & w32_regex_traits_implementation<char>::mask_word) && (c == '_'))
619 #ifndef BOOST_NO_WREGEX
620 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, wchar_t c)
623 if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation<wchar_t>::mask_base))
625 if((m & w32_regex_traits_implementation<wchar_t>::mask_word) && (c == '_'))
627 if((m & w32_regex_traits_implementation<wchar_t>::mask_unicode) && (c > 0xff))
631 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
632 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, unsigned short c)
635 if(::GetStringTypeExW(idx, CT_CTYPE1, (wchar_t const*)&c, 1, &mask) && (mask & m & w32_regex_traits_implementation<wchar_t>::mask_base))
637 if((m & w32_regex_traits_implementation<wchar_t>::mask_word) && (c == '_'))
639 if((m & w32_regex_traits_implementation<wchar_t>::mask_unicode) && (c > 0xff))