6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE w32_regex_traits.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Implements w32_regex_traits<char> (and associated helper classes).
19 #define BOOST_REGEX_SOURCE
20 #include <boost/regex/config.hpp>
22 #if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) && !defined(BOOST_REGEX_NO_WIN32_LOCALE)
23 #include <boost/regex/regex_traits.hpp>
24 #include <boost/regex/pattern_except.hpp>
26 #define WIN32_LEAN_AND_MEAN
33 #if defined(_MSC_VER) && !defined(_WIN32_WCE) && !defined(UNDER_CE)
34 #pragma comment(lib, "user32.lib")
37 #ifdef BOOST_NO_STDC_NAMESPACE
43 namespace boost{ namespace BOOST_REGEX_DETAIL_NS{
45 #ifdef BOOST_NO_ANSI_APIS
46 UINT get_code_page_for_locale_id(lcid_type idx)
48 WCHAR code_page_string[7];
49 if (::GetLocaleInfoW(idx, LOCALE_IDEFAULTANSICODEPAGE, code_page_string, 7) == 0)
52 return static_cast<UINT>(_wtol(code_page_string));
57 void w32_regex_traits_char_layer<char>::init()
59 // we need to start by initialising our syntax map so we know which
60 // character is used for which purpose:
61 std::memset(m_char_map, 0, sizeof(m_char_map));
63 std::string cat_name(w32_regex_traits<char>::get_catalog_name());
66 cat = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_open(cat_name);
69 std::string m("Unable to open message catalog: ");
70 std::runtime_error err(m + cat_name);
71 ::boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err);
75 // if we have a valid catalog then load our messages:
79 for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
81 string_type mss = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, i, get_default_syntax(i));
82 for(string_type::size_type j = 0; j < mss.size(); ++j)
84 m_char_map[static_cast<unsigned char>(mss[j])] = i;
90 for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
92 const char* ptr = get_default_syntax(i);
95 m_char_map[static_cast<unsigned char>(*ptr)] = i;
101 // finish off by calculating our escape types:
103 unsigned char i = 'A';
106 if(m_char_map[i] == 0)
108 if(::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, 0x0002u, (char)i))
109 m_char_map[i] = regex_constants::escape_type_class;
110 else if(::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, 0x0001u, (char)i))
111 m_char_map[i] = regex_constants::escape_type_not_class;
116 // fill in lower case map:
118 char char_map[1 << CHAR_BIT];
119 for(int ii = 0; ii < (1 << CHAR_BIT); ++ii)
120 char_map[ii] = static_cast<char>(ii);
121 #ifndef BOOST_NO_ANSI_APIS
122 int r = ::LCMapStringA(this->m_locale, LCMAP_LOWERCASE, char_map, 1 << CHAR_BIT, this->m_lower_map, 1 << CHAR_BIT);
123 BOOST_ASSERT(r != 0);
125 UINT code_page = get_code_page_for_locale_id(this->m_locale);
126 BOOST_ASSERT(code_page != 0);
128 WCHAR wide_char_map[1 << CHAR_BIT];
129 int conv_r = ::MultiByteToWideChar(code_page, 0, char_map, 1 << CHAR_BIT, wide_char_map, 1 << CHAR_BIT);
130 BOOST_ASSERT(conv_r != 0);
132 WCHAR wide_lower_map[1 << CHAR_BIT];
133 int r = ::LCMapStringW(this->m_locale, LCMAP_LOWERCASE, wide_char_map, 1 << CHAR_BIT, wide_lower_map, 1 << CHAR_BIT);
134 BOOST_ASSERT(r != 0);
136 conv_r = ::WideCharToMultiByte(code_page, 0, wide_lower_map, r, this->m_lower_map, 1 << CHAR_BIT, NULL, NULL);
137 BOOST_ASSERT(conv_r != 0);
139 if(r < (1 << CHAR_BIT))
141 // if we have multibyte characters then not all may have been given
142 // a lower case mapping:
143 for(int jj = r; jj < (1 << CHAR_BIT); ++jj)
144 this->m_lower_map[jj] = static_cast<char>(jj);
147 #ifndef BOOST_NO_ANSI_APIS
148 r = ::GetStringTypeExA(this->m_locale, CT_CTYPE1, char_map, 1 << CHAR_BIT, this->m_type_map);
150 r = ::GetStringTypeExW(this->m_locale, CT_CTYPE1, wide_char_map, 1 << CHAR_BIT, this->m_type_map);
152 BOOST_ASSERT(0 != r);
155 BOOST_REGEX_DECL lcid_type BOOST_REGEX_CALL w32_get_default_locale()
157 return ::GetUserDefaultLCID();
160 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(char c, lcid_type idx)
162 #ifndef BOOST_NO_ANSI_APIS
164 if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER))
168 UINT code_page = get_code_page_for_locale_id(idx);
173 if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
177 if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_LOWER))
183 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(wchar_t c, lcid_type idx)
186 if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER))
190 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
191 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(unsigned short ca, lcid_type idx)
195 if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER))
201 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(char c, lcid_type idx)
203 #ifndef BOOST_NO_ANSI_APIS
205 if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER))
209 UINT code_page = get_code_page_for_locale_id(idx);
214 if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
218 if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_UPPER))
224 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(wchar_t c, lcid_type idx)
227 if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER))
231 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
232 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(unsigned short ca, lcid_type idx)
236 if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER))
242 void free_module(void* mod)
244 ::FreeLibrary(static_cast<HMODULE>(mod));
247 BOOST_REGEX_DECL cat_type BOOST_REGEX_CALL w32_cat_open(const std::string& name)
249 #ifndef BOOST_NO_ANSI_APIS
250 cat_type result(::LoadLibraryA(name.c_str()), &free_module);
253 LPWSTR wide_name = (LPWSTR)_alloca( (name.size() + 1) * sizeof(WCHAR) );
254 if (::MultiByteToWideChar(CP_ACP, 0, name.c_str(), name.size(), wide_name, name.size() + 1) == 0)
257 cat_type result(::LoadLibraryW(wide_name), &free_module);
262 BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::string& def)
264 #ifndef BOOST_NO_ANSI_APIS
266 if(0 == ::LoadStringA(
267 static_cast<HMODULE>(cat.get()),
277 int r = ::LoadStringW(
278 static_cast<HMODULE>(cat.get()),
287 int buf_size = 1 + ::WideCharToMultiByte(CP_ACP, 0, wbuf, r, NULL, 0, NULL, NULL);
288 LPSTR buf = (LPSTR)_alloca(buf_size);
289 if (::WideCharToMultiByte(CP_ACP, 0, wbuf, r, buf, buf_size, NULL, NULL) == 0)
290 return def; // failed conversion.
292 return std::string(buf);
295 #ifndef BOOST_NO_WREGEX
296 BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::wstring& def)
299 if(0 == ::LoadStringW(
300 static_cast<HMODULE>(cat.get()),
308 return std::wstring(buf);
310 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
311 BOOST_REGEX_DECL std::basic_string<unsigned short> BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::basic_string<unsigned short>& def)
313 unsigned short buf[256];
314 if(0 == ::LoadStringW(
315 static_cast<HMODULE>(cat.get()),
323 return std::basic_string<unsigned short>(buf);
327 BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_transform(lcid_type idx, const char* p1, const char* p2)
329 #ifndef BOOST_NO_ANSI_APIS
330 int bytes = ::LCMapStringA(
331 idx, // locale identifier
332 LCMAP_SORTKEY, // mapping transformation type
334 static_cast<int>(p2 - p1), // number of characters in source string
335 0, // destination buffer
336 0 // size of destination buffer
339 return std::string(p1, p2);
340 std::string result(++bytes, '\0');
341 bytes = ::LCMapStringA(
342 idx, // locale identifier
343 LCMAP_SORTKEY, // mapping transformation type
345 static_cast<int>(p2 - p1), // number of characters in source string
346 &*result.begin(), // destination buffer
347 bytes // size of destination buffer
350 UINT code_page = get_code_page_for_locale_id(idx);
352 return std::string(p1, p2);
354 int src_len = static_cast<int>(p2 - p1);
355 LPWSTR wide_p1 = (LPWSTR)_alloca( (src_len + 1) * 2 );
356 if(::MultiByteToWideChar(code_page, 0, p1, src_len, wide_p1, src_len + 1) == 0)
357 return std::string(p1, p2);
359 int bytes = ::LCMapStringW(
360 idx, // locale identifier
361 LCMAP_SORTKEY, // mapping transformation type
362 wide_p1, // source string
363 src_len, // number of characters in source string
364 0, // destination buffer
365 0 // size of destination buffer
368 return std::string(p1, p2);
369 std::string result(++bytes, '\0');
370 bytes = ::LCMapStringW(
371 idx, // locale identifier
372 LCMAP_SORTKEY, // mapping transformation type
373 wide_p1, // source string
374 src_len, // number of characters in source string
375 (LPWSTR)&*result.begin(), // destination buffer
376 bytes // size of destination buffer
379 if(bytes > static_cast<int>(result.size()))
380 return std::string(p1, p2);
381 while(result.size() && result[result.size()-1] == '\0')
383 result.erase(result.size()-1);
388 #ifndef BOOST_NO_WREGEX
389 BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_transform(lcid_type idx, const wchar_t* p1, const wchar_t* p2)
391 int bytes = ::LCMapStringW(
392 idx, // locale identifier
393 LCMAP_SORTKEY, // mapping transformation type
395 static_cast<int>(p2 - p1), // number of characters in source string
396 0, // destination buffer
397 0 // size of destination buffer
400 return std::wstring(p1, p2);
401 std::string result(++bytes, '\0');
402 bytes = ::LCMapStringW(
403 idx, // locale identifier
404 LCMAP_SORTKEY, // mapping transformation type
406 static_cast<int>(p2 - p1), // number of characters in source string
407 reinterpret_cast<wchar_t*>(&*result.begin()), // destination buffer *of bytes*
408 bytes // size of destination buffer
410 if(bytes > static_cast<int>(result.size()))
411 return std::wstring(p1, p2);
412 while(result.size() && result[result.size()-1] == L'\0')
414 result.erase(result.size()-1);
417 for(std::string::size_type i = 0; i < result.size(); ++i)
418 r2.append(1, static_cast<wchar_t>(static_cast<unsigned char>(result[i])));
421 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
422 BOOST_REGEX_DECL std::basic_string<unsigned short> BOOST_REGEX_CALL w32_transform(lcid_type idx, const unsigned short* p1, const unsigned short* p2)
424 int bytes = ::LCMapStringW(
425 idx, // locale identifier
426 LCMAP_SORTKEY, // mapping transformation type
427 (LPCWSTR)p1, // source string
428 static_cast<int>(p2 - p1), // number of characters in source string
429 0, // destination buffer
430 0 // size of destination buffer
433 return std::basic_string<unsigned short>(p1, p2);
434 std::string result(++bytes, '\0');
435 bytes = ::LCMapStringW(
436 idx, // locale identifier
437 LCMAP_SORTKEY, // mapping transformation type
438 (LPCWSTR)p1, // source string
439 static_cast<int>(p2 - p1), // number of characters in source string
440 reinterpret_cast<wchar_t*>(&*result.begin()), // destination buffer *of bytes*
441 bytes // size of destination buffer
443 if(bytes > static_cast<int>(result.size()))
444 return std::basic_string<unsigned short>(p1, p2);
445 while(result.size() && result[result.size()-1] == L'\0')
447 result.erase(result.size()-1);
449 std::basic_string<unsigned short> r2;
450 for(std::string::size_type i = 0; i < result.size(); ++i)
451 r2.append(1, static_cast<unsigned short>(static_cast<unsigned char>(result[i])));
456 BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_tolower(char c, lcid_type idx)
459 #ifndef BOOST_NO_ANSI_APIS
460 int b = ::LCMapStringA(
461 idx, // locale identifier
462 LCMAP_LOWERCASE, // mapping transformation type
464 1, // number of characters in source string
465 result, // destination buffer
466 1); // size of destination buffer
470 UINT code_page = get_code_page_for_locale_id(idx);
475 if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
479 int b = ::LCMapStringW(
480 idx, // locale identifier
481 LCMAP_LOWERCASE, // mapping transformation type
482 &wide_c, // source string
483 1, // number of characters in source string
484 &wide_result, // destination buffer
485 1); // size of destination buffer
489 if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0)
490 return c; // No single byte lower case equivalent available
495 #ifndef BOOST_NO_WREGEX
496 BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_tolower(wchar_t c, lcid_type idx)
499 int b = ::LCMapStringW(
500 idx, // locale identifier
501 LCMAP_LOWERCASE, // mapping transformation type
503 1, // number of characters in source string
504 result, // destination buffer
505 1); // size of destination buffer
510 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
511 BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL w32_tolower(unsigned short c, lcid_type idx)
514 int b = ::LCMapStringW(
515 idx, // locale identifier
516 LCMAP_LOWERCASE, // mapping transformation type
517 (wchar_t const*)&c, // source string
518 1, // number of characters in source string
519 result, // destination buffer
520 1); // size of destination buffer
527 BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_toupper(char c, lcid_type idx)
530 #ifndef BOOST_NO_ANSI_APIS
531 int b = ::LCMapStringA(
532 idx, // locale identifier
533 LCMAP_UPPERCASE, // mapping transformation type
535 1, // number of characters in source string
536 result, // destination buffer
537 1); // size of destination buffer
541 UINT code_page = get_code_page_for_locale_id(idx);
546 if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
550 int b = ::LCMapStringW(
551 idx, // locale identifier
552 LCMAP_UPPERCASE, // mapping transformation type
553 &wide_c, // source string
554 1, // number of characters in source string
555 &wide_result, // destination buffer
556 1); // size of destination buffer
560 if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0)
561 return c; // No single byte upper case equivalent available.
566 #ifndef BOOST_NO_WREGEX
567 BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_toupper(wchar_t c, lcid_type idx)
570 int b = ::LCMapStringW(
571 idx, // locale identifier
572 LCMAP_UPPERCASE, // mapping transformation type
574 1, // number of characters in source string
575 result, // destination buffer
576 1); // size of destination buffer
581 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
582 BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL w32_toupper(unsigned short c, lcid_type idx)
585 int b = ::LCMapStringW(
586 idx, // locale identifier
587 LCMAP_UPPERCASE, // mapping transformation type
588 (wchar_t const*)&c, // source string
589 1, // number of characters in source string
590 result, // destination buffer
591 1); // size of destination buffer
598 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, char c)
601 #ifndef BOOST_NO_ANSI_APIS
602 if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation<char>::mask_base))
605 UINT code_page = get_code_page_for_locale_id(idx);
610 if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
613 if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & m & w32_regex_traits_implementation<char>::mask_base))
616 if((m & w32_regex_traits_implementation<char>::mask_word) && (c == '_'))
621 #ifndef BOOST_NO_WREGEX
622 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, wchar_t c)
625 if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation<wchar_t>::mask_base))
627 if((m & w32_regex_traits_implementation<wchar_t>::mask_word) && (c == '_'))
629 if((m & w32_regex_traits_implementation<wchar_t>::mask_unicode) && (c > 0xff))
633 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
634 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, unsigned short c)
637 if(::GetStringTypeExW(idx, CT_CTYPE1, (wchar_t const*)&c, 1, &mask) && (mask & m & w32_regex_traits_implementation<wchar_t>::mask_base))
639 if((m & w32_regex_traits_implementation<wchar_t>::mask_word) && (c == '_'))
641 if((m & w32_regex_traits_implementation<wchar_t>::mask_unicode) && (c > 0xff))
648 } // BOOST_REGEX_DETAIL_NS