6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
13 * LOCATION: see http://www.boost.org for most recent version.
14 * FILE w32_regex_traits.cpp
15 * VERSION see <boost/version.hpp>
16 * DESCRIPTION: Implements w32_regex_traits<char> (and associated helper classes).
19 #define BOOST_REGEX_SOURCE
20 #include <boost/regex/config.hpp>
22 #if defined(_WIN32) && !defined(BOOST_REGEX_NO_W32) && !defined(BOOST_REGEX_NO_WIN32_LOCALE)
23 #include <boost/regex/regex_traits.hpp>
24 #include <boost/regex/pattern_except.hpp>
26 #ifndef WIN32_LEAN_AND_MEAN
27 # define WIN32_LEAN_AND_MEAN
35 #if defined(_MSC_VER) && !defined(_WIN32_WCE) && !defined(UNDER_CE)
36 #pragma comment(lib, "user32.lib")
39 #ifdef BOOST_NO_STDC_NAMESPACE
45 namespace boost{ namespace BOOST_REGEX_DETAIL_NS{
47 #ifdef BOOST_NO_ANSI_APIS
48 UINT get_code_page_for_locale_id(lcid_type idx)
50 WCHAR code_page_string[7];
51 if (::GetLocaleInfoW(idx, LOCALE_IDEFAULTANSICODEPAGE, code_page_string, 7) == 0)
54 return static_cast<UINT>(_wtol(code_page_string));
59 void w32_regex_traits_char_layer<char>::init()
61 // we need to start by initialising our syntax map so we know which
62 // character is used for which purpose:
63 std::memset(m_char_map, 0, sizeof(m_char_map));
65 std::string cat_name(w32_regex_traits<char>::get_catalog_name());
68 cat = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_open(cat_name);
71 std::string m("Unable to open message catalog: ");
72 std::runtime_error err(m + cat_name);
73 ::boost::BOOST_REGEX_DETAIL_NS::raise_runtime_error(err);
77 // if we have a valid catalog then load our messages:
81 for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
83 string_type mss = ::boost::BOOST_REGEX_DETAIL_NS::w32_cat_get(cat, this->m_locale, i, get_default_syntax(i));
84 for(string_type::size_type j = 0; j < mss.size(); ++j)
86 m_char_map[static_cast<unsigned char>(mss[j])] = i;
92 for(regex_constants::syntax_type i = 1; i < regex_constants::syntax_max; ++i)
94 const char* ptr = get_default_syntax(i);
97 m_char_map[static_cast<unsigned char>(*ptr)] = i;
103 // finish off by calculating our escape types:
105 unsigned char i = 'A';
108 if(m_char_map[i] == 0)
110 if(::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, 0x0002u, (char)i))
111 m_char_map[i] = regex_constants::escape_type_class;
112 else if(::boost::BOOST_REGEX_DETAIL_NS::w32_is(this->m_locale, 0x0001u, (char)i))
113 m_char_map[i] = regex_constants::escape_type_not_class;
118 // fill in lower case map:
120 char char_map[1 << CHAR_BIT];
121 for(int ii = 0; ii < (1 << CHAR_BIT); ++ii)
122 char_map[ii] = static_cast<char>(ii);
123 #ifndef BOOST_NO_ANSI_APIS
124 int r = ::LCMapStringA(this->m_locale, LCMAP_LOWERCASE, char_map, 1 << CHAR_BIT, this->m_lower_map, 1 << CHAR_BIT);
125 BOOST_ASSERT(r != 0);
127 UINT code_page = get_code_page_for_locale_id(this->m_locale);
128 BOOST_ASSERT(code_page != 0);
130 WCHAR wide_char_map[1 << CHAR_BIT];
131 int conv_r = ::MultiByteToWideChar(code_page, 0, char_map, 1 << CHAR_BIT, wide_char_map, 1 << CHAR_BIT);
132 BOOST_ASSERT(conv_r != 0);
134 WCHAR wide_lower_map[1 << CHAR_BIT];
135 int r = ::LCMapStringW(this->m_locale, LCMAP_LOWERCASE, wide_char_map, 1 << CHAR_BIT, wide_lower_map, 1 << CHAR_BIT);
136 BOOST_ASSERT(r != 0);
138 conv_r = ::WideCharToMultiByte(code_page, 0, wide_lower_map, r, this->m_lower_map, 1 << CHAR_BIT, NULL, NULL);
139 BOOST_ASSERT(conv_r != 0);
141 if(r < (1 << CHAR_BIT))
143 // if we have multibyte characters then not all may have been given
144 // a lower case mapping:
145 for(int jj = r; jj < (1 << CHAR_BIT); ++jj)
146 this->m_lower_map[jj] = static_cast<char>(jj);
149 #ifndef BOOST_NO_ANSI_APIS
150 r = ::GetStringTypeExA(this->m_locale, CT_CTYPE1, char_map, 1 << CHAR_BIT, this->m_type_map);
152 r = ::GetStringTypeExW(this->m_locale, CT_CTYPE1, wide_char_map, 1 << CHAR_BIT, this->m_type_map);
154 BOOST_ASSERT(0 != r);
157 BOOST_REGEX_DECL lcid_type BOOST_REGEX_CALL w32_get_default_locale()
159 return ::GetUserDefaultLCID();
162 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(char c, lcid_type idx)
164 #ifndef BOOST_NO_ANSI_APIS
166 if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER))
170 UINT code_page = get_code_page_for_locale_id(idx);
175 if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
179 if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_LOWER))
185 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(wchar_t c, lcid_type idx)
188 if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER))
192 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
193 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_lower(unsigned short ca, lcid_type idx)
197 if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_LOWER))
203 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(char c, lcid_type idx)
205 #ifndef BOOST_NO_ANSI_APIS
207 if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER))
211 UINT code_page = get_code_page_for_locale_id(idx);
216 if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
220 if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & C1_UPPER))
226 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(wchar_t c, lcid_type idx)
229 if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER))
233 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
234 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is_upper(unsigned short ca, lcid_type idx)
238 if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & C1_UPPER))
244 void free_module(void* mod)
246 ::FreeLibrary(static_cast<HMODULE>(mod));
249 BOOST_REGEX_DECL cat_type BOOST_REGEX_CALL w32_cat_open(const std::string& name)
251 #ifndef BOOST_NO_ANSI_APIS
252 cat_type result(::LoadLibraryA(name.c_str()), &free_module);
255 LPWSTR wide_name = (LPWSTR)_alloca( (name.size() + 1) * sizeof(WCHAR) );
256 if (::MultiByteToWideChar(CP_ACP, 0, name.c_str(), name.size(), wide_name, name.size() + 1) == 0)
259 cat_type result(::LoadLibraryW(wide_name), &free_module);
264 BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::string& def)
266 #ifndef BOOST_NO_ANSI_APIS
268 if(0 == ::LoadStringA(
269 static_cast<HMODULE>(cat.get()),
279 int r = ::LoadStringW(
280 static_cast<HMODULE>(cat.get()),
289 int buf_size = 1 + ::WideCharToMultiByte(CP_ACP, 0, wbuf, r, NULL, 0, NULL, NULL);
290 LPSTR buf = (LPSTR)_alloca(buf_size);
291 if (::WideCharToMultiByte(CP_ACP, 0, wbuf, r, buf, buf_size, NULL, NULL) == 0)
292 return def; // failed conversion.
294 return std::string(buf);
297 #ifndef BOOST_NO_WREGEX
298 BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::wstring& def)
301 if(0 == ::LoadStringW(
302 static_cast<HMODULE>(cat.get()),
310 return std::wstring(buf);
312 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
313 BOOST_REGEX_DECL std::basic_string<unsigned short> BOOST_REGEX_CALL w32_cat_get(const cat_type& cat, lcid_type, int i, const std::basic_string<unsigned short>& def)
315 unsigned short buf[256];
316 if(0 == ::LoadStringW(
317 static_cast<HMODULE>(cat.get()),
325 return std::basic_string<unsigned short>(buf);
329 BOOST_REGEX_DECL std::string BOOST_REGEX_CALL w32_transform(lcid_type idx, const char* p1, const char* p2)
331 #ifndef BOOST_NO_ANSI_APIS
332 int bytes = ::LCMapStringA(
333 idx, // locale identifier
334 LCMAP_SORTKEY, // mapping transformation type
336 static_cast<int>(p2 - p1), // number of characters in source string
337 0, // destination buffer
338 0 // size of destination buffer
341 return std::string(p1, p2);
342 std::string result(++bytes, '\0');
343 bytes = ::LCMapStringA(
344 idx, // locale identifier
345 LCMAP_SORTKEY, // mapping transformation type
347 static_cast<int>(p2 - p1), // number of characters in source string
348 &*result.begin(), // destination buffer
349 bytes // size of destination buffer
352 UINT code_page = get_code_page_for_locale_id(idx);
354 return std::string(p1, p2);
356 int src_len = static_cast<int>(p2 - p1);
357 LPWSTR wide_p1 = (LPWSTR)_alloca( (src_len + 1) * 2 );
358 if(::MultiByteToWideChar(code_page, 0, p1, src_len, wide_p1, src_len + 1) == 0)
359 return std::string(p1, p2);
361 int bytes = ::LCMapStringW(
362 idx, // locale identifier
363 LCMAP_SORTKEY, // mapping transformation type
364 wide_p1, // source string
365 src_len, // number of characters in source string
366 0, // destination buffer
367 0 // size of destination buffer
370 return std::string(p1, p2);
371 std::string result(++bytes, '\0');
372 bytes = ::LCMapStringW(
373 idx, // locale identifier
374 LCMAP_SORTKEY, // mapping transformation type
375 wide_p1, // source string
376 src_len, // number of characters in source string
377 (LPWSTR)&*result.begin(), // destination buffer
378 bytes // size of destination buffer
381 if(bytes > static_cast<int>(result.size()))
382 return std::string(p1, p2);
383 while(result.size() && result[result.size()-1] == '\0')
385 result.erase(result.size()-1);
390 #ifndef BOOST_NO_WREGEX
391 BOOST_REGEX_DECL std::wstring BOOST_REGEX_CALL w32_transform(lcid_type idx, const wchar_t* p1, const wchar_t* p2)
393 int bytes = ::LCMapStringW(
394 idx, // locale identifier
395 LCMAP_SORTKEY, // mapping transformation type
397 static_cast<int>(p2 - p1), // number of characters in source string
398 0, // destination buffer
399 0 // size of destination buffer
402 return std::wstring(p1, p2);
403 std::string result(++bytes, '\0');
404 bytes = ::LCMapStringW(
405 idx, // locale identifier
406 LCMAP_SORTKEY, // mapping transformation type
408 static_cast<int>(p2 - p1), // number of characters in source string
409 reinterpret_cast<wchar_t*>(&*result.begin()), // destination buffer *of bytes*
410 bytes // size of destination buffer
412 if(bytes > static_cast<int>(result.size()))
413 return std::wstring(p1, p2);
414 while(result.size() && result[result.size()-1] == L'\0')
416 result.erase(result.size()-1);
419 for(std::string::size_type i = 0; i < result.size(); ++i)
420 r2.append(1, static_cast<wchar_t>(static_cast<unsigned char>(result[i])));
423 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
424 BOOST_REGEX_DECL std::basic_string<unsigned short> BOOST_REGEX_CALL w32_transform(lcid_type idx, const unsigned short* p1, const unsigned short* p2)
426 int bytes = ::LCMapStringW(
427 idx, // locale identifier
428 LCMAP_SORTKEY, // mapping transformation type
429 (LPCWSTR)p1, // source string
430 static_cast<int>(p2 - p1), // number of characters in source string
431 0, // destination buffer
432 0 // size of destination buffer
435 return std::basic_string<unsigned short>(p1, p2);
436 std::string result(++bytes, '\0');
437 bytes = ::LCMapStringW(
438 idx, // locale identifier
439 LCMAP_SORTKEY, // mapping transformation type
440 (LPCWSTR)p1, // source string
441 static_cast<int>(p2 - p1), // number of characters in source string
442 reinterpret_cast<wchar_t*>(&*result.begin()), // destination buffer *of bytes*
443 bytes // size of destination buffer
445 if(bytes > static_cast<int>(result.size()))
446 return std::basic_string<unsigned short>(p1, p2);
447 while(result.size() && result[result.size()-1] == L'\0')
449 result.erase(result.size()-1);
451 std::basic_string<unsigned short> r2;
452 for(std::string::size_type i = 0; i < result.size(); ++i)
453 r2.append(1, static_cast<unsigned short>(static_cast<unsigned char>(result[i])));
458 BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_tolower(char c, lcid_type idx)
461 #ifndef BOOST_NO_ANSI_APIS
462 int b = ::LCMapStringA(
463 idx, // locale identifier
464 LCMAP_LOWERCASE, // mapping transformation type
466 1, // number of characters in source string
467 result, // destination buffer
468 1); // size of destination buffer
472 UINT code_page = get_code_page_for_locale_id(idx);
477 if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
481 int b = ::LCMapStringW(
482 idx, // locale identifier
483 LCMAP_LOWERCASE, // mapping transformation type
484 &wide_c, // source string
485 1, // number of characters in source string
486 &wide_result, // destination buffer
487 1); // size of destination buffer
491 if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0)
492 return c; // No single byte lower case equivalent available
497 #ifndef BOOST_NO_WREGEX
498 BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_tolower(wchar_t c, lcid_type idx)
501 int b = ::LCMapStringW(
502 idx, // locale identifier
503 LCMAP_LOWERCASE, // mapping transformation type
505 1, // number of characters in source string
506 result, // destination buffer
507 1); // size of destination buffer
512 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
513 BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL w32_tolower(unsigned short c, lcid_type idx)
516 int b = ::LCMapStringW(
517 idx, // locale identifier
518 LCMAP_LOWERCASE, // mapping transformation type
519 (wchar_t const*)&c, // source string
520 1, // number of characters in source string
521 result, // destination buffer
522 1); // size of destination buffer
529 BOOST_REGEX_DECL char BOOST_REGEX_CALL w32_toupper(char c, lcid_type idx)
532 #ifndef BOOST_NO_ANSI_APIS
533 int b = ::LCMapStringA(
534 idx, // locale identifier
535 LCMAP_UPPERCASE, // mapping transformation type
537 1, // number of characters in source string
538 result, // destination buffer
539 1); // size of destination buffer
543 UINT code_page = get_code_page_for_locale_id(idx);
548 if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
552 int b = ::LCMapStringW(
553 idx, // locale identifier
554 LCMAP_UPPERCASE, // mapping transformation type
555 &wide_c, // source string
556 1, // number of characters in source string
557 &wide_result, // destination buffer
558 1); // size of destination buffer
562 if (::WideCharToMultiByte(code_page, 0, &wide_result, 1, result, 2, NULL, NULL) == 0)
563 return c; // No single byte upper case equivalent available.
568 #ifndef BOOST_NO_WREGEX
569 BOOST_REGEX_DECL wchar_t BOOST_REGEX_CALL w32_toupper(wchar_t c, lcid_type idx)
572 int b = ::LCMapStringW(
573 idx, // locale identifier
574 LCMAP_UPPERCASE, // mapping transformation type
576 1, // number of characters in source string
577 result, // destination buffer
578 1); // size of destination buffer
583 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
584 BOOST_REGEX_DECL unsigned short BOOST_REGEX_CALL w32_toupper(unsigned short c, lcid_type idx)
587 int b = ::LCMapStringW(
588 idx, // locale identifier
589 LCMAP_UPPERCASE, // mapping transformation type
590 (wchar_t const*)&c, // source string
591 1, // number of characters in source string
592 result, // destination buffer
593 1); // size of destination buffer
600 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, char c)
603 #ifndef BOOST_NO_ANSI_APIS
604 if(::GetStringTypeExA(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation<char>::mask_base))
607 UINT code_page = get_code_page_for_locale_id(idx);
612 if (::MultiByteToWideChar(code_page, 0, &c, 1, &wide_c, 1) == 0)
615 if(::GetStringTypeExW(idx, CT_CTYPE1, &wide_c, 1, &mask) && (mask & m & w32_regex_traits_implementation<char>::mask_base))
618 if((m & w32_regex_traits_implementation<char>::mask_word) && (c == '_'))
623 #ifndef BOOST_NO_WREGEX
624 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, wchar_t c)
627 if(::GetStringTypeExW(idx, CT_CTYPE1, &c, 1, &mask) && (mask & m & w32_regex_traits_implementation<wchar_t>::mask_base))
629 if((m & w32_regex_traits_implementation<wchar_t>::mask_word) && (c == '_'))
631 if((m & w32_regex_traits_implementation<wchar_t>::mask_unicode) && (c > 0xff))
635 #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
636 BOOST_REGEX_DECL bool BOOST_REGEX_CALL w32_is(lcid_type idx, boost::uint32_t m, unsigned short c)
639 if(::GetStringTypeExW(idx, CT_CTYPE1, (wchar_t const*)&c, 1, &mask) && (mask & m & w32_regex_traits_implementation<wchar_t>::mask_base))
641 if((m & w32_regex_traits_implementation<wchar_t>::mask_word) && (c == '_'))
643 if((m & w32_regex_traits_implementation<wchar_t>::mask_unicode) && (c > 0xff))
650 } // BOOST_REGEX_DETAIL_NS