From 5cc2983bee196f3c54e0486bd4e07bda04970747 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Sun, 5 Mar 2023 16:01:12 +0100
Subject: [PATCH 01/27] Test UTF-8 build variant with ASAN too

To avoid adding yet another build, enable UTF-8 and ASAN for the
existing C++20 build.
---
 .github/workflows/ci.yml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 962899a759..9f872ec731 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -108,10 +108,11 @@ jobs:
             skip_samples: true
             use_asan: true
             use_xvfb: true
-          - name: Ubuntu 22.04 wxGTK C++20
+          - name: Ubuntu 22.04 wxGTK UTF-8 C++20
             runner: ubuntu-22.04
-            configure_flags: --with-cxx=20
-            skip_samples: true
+            configure_flags: --with-cxx=20 --enable-utf8 --enable-utf8only
+            use_asan: true
+            use_xvfb: true
           - name: Ubuntu 18.04 wxX11
             runner: ubuntu-latest
             container: ubuntu:18.04

From 0f0ac398002ab5ca8a9a58f93c1fe4d777a0b915 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Sun, 5 Mar 2023 16:10:53 +0100
Subject: [PATCH 02/27] Don't claim that wxUSE_UNICODE_UTF8 is Unix-specific

It can be used under MSW too.
---
 build/cmake/options.cmake | 6 ++----
 configure                 | 4 ++--
 configure.in              | 4 ++--
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/build/cmake/options.cmake b/build/cmake/options.cmake
index 96f3c504c4..8c20872c2b 100644
--- a/build/cmake/options.cmake
+++ b/build/cmake/options.cmake
@@ -86,10 +86,8 @@ wx_option(wxUSE_STL "use standard C++ classes for everything" OFF)
 set(wxTHIRD_PARTY_LIBRARIES ${wxTHIRD_PARTY_LIBRARIES} wxUSE_STL "use C++ STL classes")
 wx_dependent_option(wxUSE_STD_CONTAINERS "use standard C++ container classes" ON "wxUSE_STL" OFF)
 
-if(NOT WIN32)
-    wx_option(wxUSE_UNICODE_UTF8 "use UTF-8 representation for strings (Unix only)" OFF)
-    wx_dependent_option(wxUSE_UTF8_LOCALE_ONLY "only support UTF-8 locales in UTF-8 build (Unix only)" ON "wxUSE_UNICODE_UTF8" OFF)
-endif()
+wx_option(wxUSE_UNICODE_UTF8 "use UTF-8 representation for strings" OFF)
+wx_dependent_option(wxUSE_UTF8_LOCALE_ONLY "only support UTF-8 locales in UTF-8 build" ON "wxUSE_UNICODE_UTF8" OFF)
 
 if(NOT WIN32)
     wx_option(wxUSE_VISIBILITY "use of ELF symbols visibility")
diff --git a/configure b/configure
index 5882f47a69..3bd652daeb 100755
--- a/configure
+++ b/configure
@@ -2086,8 +2086,8 @@ Optional Features:
   --disable-std_iostreams  disable use of standard C++ stream classes
  --enable-std_string_conv_in_wxstring     provide implicit conversion to std::string in wxString
  --disable-unsafe_conv_in_wxstring        disable unsafe implicit conversions in wxString
-  --enable-utf8           use UTF-8 representation for strings (Unix only)
-  --enable-utf8only       only support UTF-8 locales in UTF-8 build (Unix only)
+  --enable-utf8           use UTF-8 representation for strings
+  --enable-utf8only       only support UTF-8 locales in UTF-8 build
   --enable-extended_rtti  use extended RTTI (XTI)
   --disable-optimise      compile without optimisations
   --enable-profile        create code with profiling information
diff --git a/configure.in b/configure.in
index 74aa7bad06..7222e989b2 100644
--- a/configure.in
+++ b/configure.in
@@ -649,8 +649,8 @@ WX_ARG_ENABLE(std_containers,[  --enable-std_containers use standard C++ contain
 WX_ARG_DISABLE(std_iostreams,[  --disable-std_iostreams  disable use of standard C++ stream classes], wxUSE_STD_IOSTREAM)
 WX_ARG_ENABLE(std_string_conv_in_wxstring, [ --enable-std_string_conv_in_wxstring     provide implicit conversion to std::string in wxString], wxUSE_STD_STRING_CONV_IN_WXSTRING)
 WX_ARG_DISABLE(unsafe_conv_in_wxstring,     [ --disable-unsafe_conv_in_wxstring        disable unsafe implicit conversions in wxString], wxUSE_UNSAFE_WXSTRING_CONV)
-WX_ARG_ENABLE_PARAM(utf8,    [  --enable-utf8           use UTF-8 representation for strings (Unix only)], wxUSE_UNICODE_UTF8)
-WX_ARG_ENABLE(utf8only,      [  --enable-utf8only       only support UTF-8 locales in UTF-8 build (Unix only)], wxUSE_UNICODE_UTF8_LOCALE)
+WX_ARG_ENABLE_PARAM(utf8,    [  --enable-utf8           use UTF-8 representation for strings], wxUSE_UNICODE_UTF8)
+WX_ARG_ENABLE(utf8only,      [  --enable-utf8only       only support UTF-8 locales in UTF-8 build], wxUSE_UNICODE_UTF8_LOCALE)
 WX_ARG_ENABLE(extended_rtti, [  --enable-extended_rtti  use extended RTTI (XTI)], wxUSE_EXTENDED_RTTI)
 
 WX_ARG_DISABLE(optimise,   [  --disable-optimise      compile without optimisations], wxUSE_OPTIMISE)

From 0677d493dfac7a6b15d87adfc6e0f6e076f9e4b1 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Sun, 5 Mar 2023 16:23:50 +0100
Subject: [PATCH 03/27] Add wxUSE_UNICODE_UTF8 to wx/setup.h

This option can also be used under MSW, so move it to the common setup.h
instead of having it in setup.h.in only.

Also do the same thing for wxUSE_UTF8_LOCALE_ONLY, even if it's less
clear if this one is really useful in non-Unix environment.
---
 build/cmake/setup.h.in     |  8 ++++---
 include/wx/android/setup.h | 48 ++++++++++++++++++++++++++++++++++++++
 include/wx/gtk/setup.h     | 48 ++++++++++++++++++++++++++++++++++++++
 include/wx/msw/setup.h     | 48 ++++++++++++++++++++++++++++++++++++++
 include/wx/osx/setup.h     | 48 ++++++++++++++++++++++++++++++++++++++
 include/wx/setup_inc.h     | 48 ++++++++++++++++++++++++++++++++++++++
 include/wx/univ/setup.h    | 48 ++++++++++++++++++++++++++++++++++++++
 setup.h.in                 |  8 ++++---
 setup.h_vms                |  7 +++---
 9 files changed, 302 insertions(+), 9 deletions(-)

diff --git a/build/cmake/setup.h.in b/build/cmake/setup.h.in
index c36aa1ff0b..3fb7112a52 100644
--- a/build/cmake/setup.h.in
+++ b/build/cmake/setup.h.in
@@ -119,6 +119,11 @@
 #cmakedefine01 wxUSE_REPRODUCIBLE_BUILD
 
 
+#cmakedefine01 wxUSE_UNICODE_UTF8
+
+#cmakedefine01 wxUSE_UTF8_LOCALE_ONLY
+
+
 
 #cmakedefine01 wxUSE_ON_FATAL_EXCEPTION
 
@@ -591,9 +596,6 @@
 #cmakedefine01 wxUSE_SELECT_DISPATCHER
 #cmakedefine01 wxUSE_EPOLL_DISPATCHER
 
-#cmakedefine01 wxUSE_UNICODE_UTF8
-#cmakedefine01 wxUSE_UTF8_LOCALE_ONLY
-
 /*
    Use GStreamer for Unix.
 
diff --git a/include/wx/android/setup.h b/include/wx/android/setup.h
index 336f80fcbc..9b8b458969 100644
--- a/include/wx/android/setup.h
+++ b/include/wx/android/setup.h
@@ -77,6 +77,54 @@
 // Recommended setting: 0
 #define wxUSE_REPRODUCIBLE_BUILD 0
 
+// ----------------------------------------------------------------------------
+// wxString encoding settings
+// ----------------------------------------------------------------------------
+
+// If set to 1, wxString uses UTF-8 internally instead of UTF-32 (Unix) or
+// UTF-16 (MSW).
+//
+// This option can be set to 1 if you want to avoid the overhead of converting
+// between wchar_t encoding (UTF-32 or UTF-16) used by wxString by default and
+// UTF-8, i.e. it makes functions such as wxString::FromUTF8() and utf8_str()
+// much more efficient and constant time, as they don't perform any conversion
+// any longer, which is especially interesting in wxGTK where these functions
+// are used every time a GTK function is called. But this is compensated by
+// making all the non-UTF-8 functions less efficient, notably requiring a
+// conversion when passing any string to Win32 API.
+//
+// Moreover, accessing strings by character index becomes, in general, a O(N)
+// iteration, where N is the index, so only enable this option if you don't use
+// index access for arbitrary characters (unless it is done inside a loop
+// consecutively for all characters as this special access pattern is optimized
+// by caching the last accessed index -- but using iterate, or range for loop,
+// is still better even in this case), as otherwise you may observe significant
+// slowdown in your program performance.
+//
+// Default is 0
+//
+// Recommended setting: 0 but can be set to 1 for optimization purposes and if
+// you're sure that you're not using loops using indices to iterate over
+// strings in your code.
+#define wxUSE_UNICODE_UTF8 0
+
+// If set to 1, assume that all narrow strings use UTF-8.
+//
+// By default, wxWidgets assumes that all "char*" strings use the encoding of
+// the current locale, which is commonly, but not always, UTF-8 under Unix but
+// rarely UTF-8 under MSW. This option tells the library that all strings
+// always use UTF-8, avoiding the need to perform any conversions between them
+// and wxString internal representation when wxUSE_UNICODE_UTF8 is set to 1.
+//
+// In fact, using this option only makes sense when wxUSE_UNICODE_UTF8==1 and
+// it must not be enabled without the other option.
+//
+// Default is 0
+//
+// Recommended setting: 0 but can be set to 1 if your program is always run in
+// an UTF-8 locale.
+#define wxUSE_UTF8_LOCALE_ONLY 0
+
 // ----------------------------------------------------------------------------
 // debugging settings
 // ----------------------------------------------------------------------------
diff --git a/include/wx/gtk/setup.h b/include/wx/gtk/setup.h
index 210944074a..fd554c0aba 100644
--- a/include/wx/gtk/setup.h
+++ b/include/wx/gtk/setup.h
@@ -78,6 +78,54 @@
 // Recommended setting: 0
 #define wxUSE_REPRODUCIBLE_BUILD 0
 
+// ----------------------------------------------------------------------------
+// wxString encoding settings
+// ----------------------------------------------------------------------------
+
+// If set to 1, wxString uses UTF-8 internally instead of UTF-32 (Unix) or
+// UTF-16 (MSW).
+//
+// This option can be set to 1 if you want to avoid the overhead of converting
+// between wchar_t encoding (UTF-32 or UTF-16) used by wxString by default and
+// UTF-8, i.e. it makes functions such as wxString::FromUTF8() and utf8_str()
+// much more efficient and constant time, as they don't perform any conversion
+// any longer, which is especially interesting in wxGTK where these functions
+// are used every time a GTK function is called. But this is compensated by
+// making all the non-UTF-8 functions less efficient, notably requiring a
+// conversion when passing any string to Win32 API.
+//
+// Moreover, accessing strings by character index becomes, in general, a O(N)
+// iteration, where N is the index, so only enable this option if you don't use
+// index access for arbitrary characters (unless it is done inside a loop
+// consecutively for all characters as this special access pattern is optimized
+// by caching the last accessed index -- but using iterate, or range for loop,
+// is still better even in this case), as otherwise you may observe significant
+// slowdown in your program performance.
+//
+// Default is 0
+//
+// Recommended setting: 0 but can be set to 1 for optimization purposes and if
+// you're sure that you're not using loops using indices to iterate over
+// strings in your code.
+#define wxUSE_UNICODE_UTF8 0
+
+// If set to 1, assume that all narrow strings use UTF-8.
+//
+// By default, wxWidgets assumes that all "char*" strings use the encoding of
+// the current locale, which is commonly, but not always, UTF-8 under Unix but
+// rarely UTF-8 under MSW. This option tells the library that all strings
+// always use UTF-8, avoiding the need to perform any conversions between them
+// and wxString internal representation when wxUSE_UNICODE_UTF8 is set to 1.
+//
+// In fact, using this option only makes sense when wxUSE_UNICODE_UTF8==1 and
+// it must not be enabled without the other option.
+//
+// Default is 0
+//
+// Recommended setting: 0 but can be set to 1 if your program is always run in
+// an UTF-8 locale.
+#define wxUSE_UTF8_LOCALE_ONLY 0
+
 // ----------------------------------------------------------------------------
 // debugging settings
 // ----------------------------------------------------------------------------
diff --git a/include/wx/msw/setup.h b/include/wx/msw/setup.h
index d72f9de5d8..112f62f7ec 100644
--- a/include/wx/msw/setup.h
+++ b/include/wx/msw/setup.h
@@ -78,6 +78,54 @@
 // Recommended setting: 0
 #define wxUSE_REPRODUCIBLE_BUILD 0
 
+// ----------------------------------------------------------------------------
+// wxString encoding settings
+// ----------------------------------------------------------------------------
+
+// If set to 1, wxString uses UTF-8 internally instead of UTF-32 (Unix) or
+// UTF-16 (MSW).
+//
+// This option can be set to 1 if you want to avoid the overhead of converting
+// between wchar_t encoding (UTF-32 or UTF-16) used by wxString by default and
+// UTF-8, i.e. it makes functions such as wxString::FromUTF8() and utf8_str()
+// much more efficient and constant time, as they don't perform any conversion
+// any longer, which is especially interesting in wxGTK where these functions
+// are used every time a GTK function is called. But this is compensated by
+// making all the non-UTF-8 functions less efficient, notably requiring a
+// conversion when passing any string to Win32 API.
+//
+// Moreover, accessing strings by character index becomes, in general, a O(N)
+// iteration, where N is the index, so only enable this option if you don't use
+// index access for arbitrary characters (unless it is done inside a loop
+// consecutively for all characters as this special access pattern is optimized
+// by caching the last accessed index -- but using iterate, or range for loop,
+// is still better even in this case), as otherwise you may observe significant
+// slowdown in your program performance.
+//
+// Default is 0
+//
+// Recommended setting: 0 but can be set to 1 for optimization purposes and if
+// you're sure that you're not using loops using indices to iterate over
+// strings in your code.
+#define wxUSE_UNICODE_UTF8 0
+
+// If set to 1, assume that all narrow strings use UTF-8.
+//
+// By default, wxWidgets assumes that all "char*" strings use the encoding of
+// the current locale, which is commonly, but not always, UTF-8 under Unix but
+// rarely UTF-8 under MSW. This option tells the library that all strings
+// always use UTF-8, avoiding the need to perform any conversions between them
+// and wxString internal representation when wxUSE_UNICODE_UTF8 is set to 1.
+//
+// In fact, using this option only makes sense when wxUSE_UNICODE_UTF8==1 and
+// it must not be enabled without the other option.
+//
+// Default is 0
+//
+// Recommended setting: 0 but can be set to 1 if your program is always run in
+// an UTF-8 locale.
+#define wxUSE_UTF8_LOCALE_ONLY 0
+
 // ----------------------------------------------------------------------------
 // debugging settings
 // ----------------------------------------------------------------------------
diff --git a/include/wx/osx/setup.h b/include/wx/osx/setup.h
index 9be31964d4..f5619fad6f 100644
--- a/include/wx/osx/setup.h
+++ b/include/wx/osx/setup.h
@@ -84,6 +84,54 @@
 // Recommended setting: 0
 #define wxUSE_REPRODUCIBLE_BUILD 0
 
+// ----------------------------------------------------------------------------
+// wxString encoding settings
+// ----------------------------------------------------------------------------
+
+// If set to 1, wxString uses UTF-8 internally instead of UTF-32 (Unix) or
+// UTF-16 (MSW).
+//
+// This option can be set to 1 if you want to avoid the overhead of converting
+// between wchar_t encoding (UTF-32 or UTF-16) used by wxString by default and
+// UTF-8, i.e. it makes functions such as wxString::FromUTF8() and utf8_str()
+// much more efficient and constant time, as they don't perform any conversion
+// any longer, which is especially interesting in wxGTK where these functions
+// are used every time a GTK function is called. But this is compensated by
+// making all the non-UTF-8 functions less efficient, notably requiring a
+// conversion when passing any string to Win32 API.
+//
+// Moreover, accessing strings by character index becomes, in general, a O(N)
+// iteration, where N is the index, so only enable this option if you don't use
+// index access for arbitrary characters (unless it is done inside a loop
+// consecutively for all characters as this special access pattern is optimized
+// by caching the last accessed index -- but using iterate, or range for loop,
+// is still better even in this case), as otherwise you may observe significant
+// slowdown in your program performance.
+//
+// Default is 0
+//
+// Recommended setting: 0 but can be set to 1 for optimization purposes and if
+// you're sure that you're not using loops using indices to iterate over
+// strings in your code.
+#define wxUSE_UNICODE_UTF8 0
+
+// If set to 1, assume that all narrow strings use UTF-8.
+//
+// By default, wxWidgets assumes that all "char*" strings use the encoding of
+// the current locale, which is commonly, but not always, UTF-8 under Unix but
+// rarely UTF-8 under MSW. This option tells the library that all strings
+// always use UTF-8, avoiding the need to perform any conversions between them
+// and wxString internal representation when wxUSE_UNICODE_UTF8 is set to 1.
+//
+// In fact, using this option only makes sense when wxUSE_UNICODE_UTF8==1 and
+// it must not be enabled without the other option.
+//
+// Default is 0
+//
+// Recommended setting: 0 but can be set to 1 if your program is always run in
+// an UTF-8 locale.
+#define wxUSE_UTF8_LOCALE_ONLY 0
+
 // ----------------------------------------------------------------------------
 // debugging settings
 // ----------------------------------------------------------------------------
diff --git a/include/wx/setup_inc.h b/include/wx/setup_inc.h
index b5515be609..63198eb5bc 100644
--- a/include/wx/setup_inc.h
+++ b/include/wx/setup_inc.h
@@ -74,6 +74,54 @@
 // Recommended setting: 0
 #define wxUSE_REPRODUCIBLE_BUILD 0
 
+// ----------------------------------------------------------------------------
+// wxString encoding settings
+// ----------------------------------------------------------------------------
+
+// If set to 1, wxString uses UTF-8 internally instead of UTF-32 (Unix) or
+// UTF-16 (MSW).
+//
+// This option can be set to 1 if you want to avoid the overhead of converting
+// between wchar_t encoding (UTF-32 or UTF-16) used by wxString by default and
+// UTF-8, i.e. it makes functions such as wxString::FromUTF8() and utf8_str()
+// much more efficient and constant time, as they don't perform any conversion
+// any longer, which is especially interesting in wxGTK where these functions
+// are used every time a GTK function is called. But this is compensated by
+// making all the non-UTF-8 functions less efficient, notably requiring a
+// conversion when passing any string to Win32 API.
+//
+// Moreover, accessing strings by character index becomes, in general, a O(N)
+// iteration, where N is the index, so only enable this option if you don't use
+// index access for arbitrary characters (unless it is done inside a loop
+// consecutively for all characters as this special access pattern is optimized
+// by caching the last accessed index -- but using iterate, or range for loop,
+// is still better even in this case), as otherwise you may observe significant
+// slowdown in your program performance.
+//
+// Default is 0
+//
+// Recommended setting: 0 but can be set to 1 for optimization purposes and if
+// you're sure that you're not using loops using indices to iterate over
+// strings in your code.
+#define wxUSE_UNICODE_UTF8 0
+
+// If set to 1, assume that all narrow strings use UTF-8.
+//
+// By default, wxWidgets assumes that all "char*" strings use the encoding of
+// the current locale, which is commonly, but not always, UTF-8 under Unix but
+// rarely UTF-8 under MSW. This option tells the library that all strings
+// always use UTF-8, avoiding the need to perform any conversions between them
+// and wxString internal representation when wxUSE_UNICODE_UTF8 is set to 1.
+//
+// In fact, using this option only makes sense when wxUSE_UNICODE_UTF8==1 and
+// it must not be enabled without the other option.
+//
+// Default is 0
+//
+// Recommended setting: 0 but can be set to 1 if your program is always run in
+// an UTF-8 locale.
+#define wxUSE_UTF8_LOCALE_ONLY 0
+
 // ----------------------------------------------------------------------------
 // debugging settings
 // ----------------------------------------------------------------------------
diff --git a/include/wx/univ/setup.h b/include/wx/univ/setup.h
index e04243c292..442cd570e1 100644
--- a/include/wx/univ/setup.h
+++ b/include/wx/univ/setup.h
@@ -77,6 +77,54 @@
 // Recommended setting: 0
 #define wxUSE_REPRODUCIBLE_BUILD 0
 
+// ----------------------------------------------------------------------------
+// wxString encoding settings
+// ----------------------------------------------------------------------------
+
+// If set to 1, wxString uses UTF-8 internally instead of UTF-32 (Unix) or
+// UTF-16 (MSW).
+//
+// This option can be set to 1 if you want to avoid the overhead of converting
+// between wchar_t encoding (UTF-32 or UTF-16) used by wxString by default and
+// UTF-8, i.e. it makes functions such as wxString::FromUTF8() and utf8_str()
+// much more efficient and constant time, as they don't perform any conversion
+// any longer, which is especially interesting in wxGTK where these functions
+// are used every time a GTK function is called. But this is compensated by
+// making all the non-UTF-8 functions less efficient, notably requiring a
+// conversion when passing any string to Win32 API.
+//
+// Moreover, accessing strings by character index becomes, in general, a O(N)
+// iteration, where N is the index, so only enable this option if you don't use
+// index access for arbitrary characters (unless it is done inside a loop
+// consecutively for all characters as this special access pattern is optimized
+// by caching the last accessed index -- but using iterate, or range for loop,
+// is still better even in this case), as otherwise you may observe significant
+// slowdown in your program performance.
+//
+// Default is 0
+//
+// Recommended setting: 0 but can be set to 1 for optimization purposes and if
+// you're sure that you're not using loops using indices to iterate over
+// strings in your code.
+#define wxUSE_UNICODE_UTF8 0
+
+// If set to 1, assume that all narrow strings use UTF-8.
+//
+// By default, wxWidgets assumes that all "char*" strings use the encoding of
+// the current locale, which is commonly, but not always, UTF-8 under Unix but
+// rarely UTF-8 under MSW. This option tells the library that all strings
+// always use UTF-8, avoiding the need to perform any conversions between them
+// and wxString internal representation when wxUSE_UNICODE_UTF8 is set to 1.
+//
+// In fact, using this option only makes sense when wxUSE_UNICODE_UTF8==1 and
+// it must not be enabled without the other option.
+//
+// Default is 0
+//
+// Recommended setting: 0 but can be set to 1 if your program is always run in
+// an UTF-8 locale.
+#define wxUSE_UTF8_LOCALE_ONLY 0
+
 // ----------------------------------------------------------------------------
 // debugging settings
 // ----------------------------------------------------------------------------
diff --git a/setup.h.in b/setup.h.in
index f61b356607..41321542dc 100644
--- a/setup.h.in
+++ b/setup.h.in
@@ -122,6 +122,11 @@
 #define wxUSE_REPRODUCIBLE_BUILD 0
 
 
+#define wxUSE_UNICODE_UTF8 0
+
+#define wxUSE_UTF8_LOCALE_ONLY 0
+
+
 
 #define wxUSE_ON_FATAL_EXCEPTION 0
 
@@ -594,9 +599,6 @@
 #define wxUSE_SELECT_DISPATCHER 0
 #define wxUSE_EPOLL_DISPATCHER 0
 
-#define wxUSE_UNICODE_UTF8 0
-#define wxUSE_UTF8_LOCALE_ONLY 0
-
 /*
    Use GStreamer for Unix.
 
diff --git a/setup.h_vms b/setup.h_vms
index 5eb0ff9c56..7b8dea5026 100644
--- a/setup.h_vms
+++ b/setup.h_vms
@@ -169,6 +169,10 @@ typedef pid_t GPid;
 
 #define wxUSE_REPRODUCIBLE_BUILD 1
 
+#define wxUSE_UNICODE_UTF8 0
+
+#define wxUSE_UTF8_LOCALE_ONLY 0
+
 #define wxUSE_EXCEPTIONS 1
 
 #define wxUSE_EXTENDED_RTTI 0
@@ -652,9 +656,6 @@ typedef pid_t GPid;
 #define wxUSE_SELECT_DISPATCHER 1
 #define wxUSE_EPOLL_DISPATCHER 0
 
-#define wxUSE_UNICODE_UTF8 0
-#define wxUSE_UTF8_LOCALE_ONLY 0
-
 /*
    Use GStreamer for Unix.
 

From e733e28e7aff859f6b4ae7b1277574cb2410bb8f Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Sun, 5 Mar 2023 16:27:00 +0100
Subject: [PATCH 04/27] Add a build using wxUSE_UNICODE_UTF8 to Appveyor CI

---
 appveyor.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/appveyor.yml b/appveyor.yml
index e09662e27b..0c1175e651 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -34,6 +34,11 @@ environment:
     ARCH: x64
     wxUSE_STL: 1
     APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2022
+  - TOOLSET: msbuild
+    CONFIGURATION: Debug
+    ARCH: x64
+    wxUSE_UNICODE_UTF8: 1
+    APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2022
   - TOOLSET: nmake
     VS: '14.0'
     BUILD: debug
@@ -67,11 +72,13 @@ before_build:
 - ps: |
     $env:PATH = $env:PATH -replace "C:\\Program Files\\Git\\usr\\bin",""
     if (-not (Test-Path env:wxUSE_STL)) { $env:wxUSE_STL = '0' }
+    if (-not (Test-Path env:wxUSE_UNICODE_UTF8)) { $env:wxUSE_UNICODE_UTF8 = '0' }
     if (-not (Test-Path env:wxUSE_WEBVIEW_EDGE)) { $env:wxUSE_WEBVIEW_EDGE = '0' }
     if (($env:TOOLSET -ne "msys2") -and ($env:TOOLSET -ne "cygwin")) {
       $txt = gc include\wx\msw\setup.h
       Write-Output $txt |
       %{$_ -replace "define wxUSE_STL 0", "define wxUSE_STL $env:wxUSE_STL"} |
+      %{$_ -replace "define wxUSE_UNICODE_UTF8 0", "define wxUSE_UNICODE_UTF8 $env:wxUSE_UNICODE_UTF8"} |
       %{$_ -replace "define wxUSE_WEBVIEW_EDGE 0", "define wxUSE_WEBVIEW_EDGE $env:wxUSE_WEBVIEW_EDGE"} |
       sc include\wx\msw\setup.h
     }

From ad29bdc6e40b4f404114445b77ca21cd363cc805 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Sun, 5 Mar 2023 16:34:40 +0100
Subject: [PATCH 05/27] Change one of the MSW CI builds to use UTF-8

Cover a previously unused build configuration in the CI: UTF-8 with MSVS
under MSW.
---
 .github/workflows/ci_msw.yml | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci_msw.yml b/.github/workflows/ci_msw.yml
index 541d9a5ed2..84ba52172f 100644
--- a/.github/workflows/ci_msw.yml
+++ b/.github/workflows/ci_msw.yml
@@ -67,6 +67,7 @@ jobs:
           - configuration: 'DLL Debug'
             platform: 'x64'
             vsversion: 2022
+            use_utf8: true
           - configuration: 'Debug'
             platform: 'Win32'
             vsversion: 2019
@@ -81,14 +82,18 @@ jobs:
         with:
           submodules: 'recursive'
 
-      - name: Configure to use STL
-        if: matrix.use_stl
+      - name: Configure build options
         working-directory: include/wx/msw
         run: |
-            $txt = Get-Content setup.h
-            Write-Output $txt |
-            %{$_ -replace "define wxUSE_STL 0", "define wxUSE_STL 1"} |
-            Set-Content setup.h
+            $use_stl = "${{ matrix.use_stl }}" ? 1 : 0
+            $use_utf8 = "${{ matrix.use_utf8 }}" ? 1 : 0
+            if ( $use_stl -or $use_utf8 ) {
+              $txt = Get-Content setup.h
+              Write-Output $txt |
+              %{$_ -replace "define wxUSE_STL 0", "define wxUSE_STL $use_stl"} |
+              %{$_ -replace "define wxUSE_UNICODE_UTF8 0", "define wxUSE_UNICODE_UTF8 $use_utf8"} |
+              Set-Content setup.h
+            }
 
       - name: Add MSBuild to PATH
         uses: microsoft/setup-msbuild@v1.1.3

From 73ad17db4ec9a48e64ed40f5020918ec9a05a8a5 Mon Sep 17 00:00:00 2001
From: Ian Day <ian.day@cambrionix.com>
Date: Wed, 1 Mar 2023 14:26:05 +0000
Subject: [PATCH 06/27] Fix dereferencing invalid iterator in wxString in UTF-8
 build

Fix iterator going past end of string in PosLenToImpl, it can't become
end() as the end iterator can't be dereferenced.

Closes #23305.
---
 src/common/string.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/common/string.cpp b/src/common/string.cpp
index 64b7ae6be2..009cd2abea 100644
--- a/src/common/string.cpp
+++ b/src/common/string.cpp
@@ -229,7 +229,7 @@ void wxString::PosLenToImpl(size_t pos, size_t len,
             // going beyond the end of the string, just as std::string does
             const const_iterator e(end());
             const_iterator i(b);
-            while ( len && i <= e )
+            while ( len && i < e )
             {
                 ++i;
                 --len;

From eb4e75f8c3b52c1a648c8af7febd94e83b0af657 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Mon, 6 Mar 2023 00:28:24 +0100
Subject: [PATCH 07/27] Document that range for can be used for wxString
 iteration

This is simpler and better than using iterators.
---
 docs/doxygen/overviews/string.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/doxygen/overviews/string.h b/docs/doxygen/overviews/string.h
index 569411b16b..1d92ec1929 100644
--- a/docs/doxygen/overviews/string.h
+++ b/docs/doxygen/overviews/string.h
@@ -296,6 +296,18 @@ for (i = s.begin(); i != s.end(); ++i)
 }
 @endcode
 
+or, even simpler, range for loop:
+@code
+wxString s = "hello";
+for ( auto c : s )
+{
+    // do something with "c"
+}
+@endcode
+
+@note wxString iterators have unusual proxy-like semantics and can be used to
+    modify the string even when @e not using references, i.e. with just @c
+    auto, as in the example above.
 
 
 @section overview_string_related String Related Functions and Classes

From 1869f9ec6f342442d728a542403b7161dd63be90 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Mon, 6 Mar 2023 00:28:45 +0100
Subject: [PATCH 08/27] Use range for instead of iterators in unit test

No real changes, just simplify the code and ensure that iterating over a
string using range for loop works as expected.
---
 tests/strings/unicode.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/strings/unicode.cpp b/tests/strings/unicode.cpp
index 5d059cadc0..3fcd87dd09 100644
--- a/tests/strings/unicode.cpp
+++ b/tests/strings/unicode.cpp
@@ -441,16 +441,16 @@ void UnicodeTestCase::Iteration()
     // verify the string was decoded correctly:
     {
         size_t idx = 0;
-        for ( wxString::const_iterator i = text.begin(); i != text.end(); ++i, ++idx )
+        for ( auto c : text )
         {
-            CPPUNIT_ASSERT( *i == textUTF16[idx] );
+            CPPUNIT_ASSERT( c == textUTF16[idx++] );
         }
     }
 
     // overwrite the string with something that is shorter in UTF-8:
     {
-        for ( wxString::iterator i = text.begin(); i != text.end(); ++i )
-            *i = 'x';
+        for ( auto c : text )
+            c = 'x';
     }
 
     // restore the original text now:
@@ -459,9 +459,9 @@ void UnicodeTestCase::Iteration()
         wxString::const_iterator end2 = text.end();
 
         size_t idx = 0;
-        for ( wxString::iterator i = text.begin(); i != text.end(); ++i, ++idx )
+        for ( auto c : text )
         {
-            *i = textUTF16[idx];
+            c = textUTF16[idx++];
 
             CPPUNIT_ASSERT( end1 == text.end() );
             CPPUNIT_ASSERT( end2 == text.end() );
@@ -474,9 +474,9 @@ void UnicodeTestCase::Iteration()
     // and verify it again:
     {
         size_t idx = 0;
-        for ( wxString::const_iterator i = text.begin(); i != text.end(); ++i, ++idx )
+        for ( auto c : text )
         {
-            CPPUNIT_ASSERT( *i == textUTF16[idx] );
+            CPPUNIT_ASSERT( c == textUTF16[idx++] );
         }
     }
 }

From 52e5561ca539da3ce467996c7bb9b038229acb98 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Fri, 24 Mar 2023 19:39:24 +0100
Subject: [PATCH 09/27] Fix wxRegKey compilation in UTF-8 build

Use a macro to return a valid pointer to a wide string even in UTF-8
build, where we don't have a permanent wchar_t buffer to return a
pointer to.
---
 src/msw/registry.cpp | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/msw/registry.cpp b/src/msw/registry.cpp
index 4e022de4ca..0a6a7fb696 100644
--- a/src/msw/registry.cpp
+++ b/src/msw/registry.cpp
@@ -108,10 +108,13 @@ GetMSWAccessFlags(wxRegKey::AccessMode mode, wxRegKey::WOW64ViewMode viewMode);
 static wxString GetFullName(const wxRegKey *pKey);
 static wxString GetFullName(const wxRegKey *pKey, const wxString& szValue);
 
-// returns "value" argument of wxRegKey methods converted into a value that can
-// be passed to win32 registry functions; specifically, converts empty string
-// to nullptr
-static inline const wxChar *RegValueStr(const wxString& szValue);
+// Returns a (wide char) pointer to the string contents or null for an empty
+// string.
+//
+// Unfortunately this needs to be a macro to ensure that the temporary buffer
+// returned by t_str() in UTF-8 build lives long enough.
+#define RegValueStr(szValue) \
+    ((szValue).empty() ? nullptr : static_cast<const wchar_t*>(szValue.t_str()))
 
 // Return the user-readable name of the given REG_XXX type constant.
 static wxString GetTypeString(DWORD dwType)
@@ -1589,9 +1592,4 @@ inline void RemoveTrailingSeparator(wxString& str)
     str.Truncate(str.Len() - 1);
 }
 
-inline const wxChar *RegValueStr(const wxString& szValue)
-{
-    return szValue.empty() ? nullptr : szValue.t_str();
-}
-
 #endif // wxUSE_REGKEY

From 133fd7faa131a36073fba2dfeeba005eaf024859 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Sun, 26 Mar 2023 17:51:56 +0100
Subject: [PATCH 10/27] Fix wxString::GetCache() compilation in UTF-8 DLL build
 with MSVS

Don't declare static Cache variable inside wxString declaration itself
because it is implicitly DLL-exported, due to the use of
__declspec(dllexport) for the entire wxString class, but thread-specific
variables can't be exported, so this resulted in a compilation error.

Avoid this by using a static thread-specific variable inside GetCache(),
which had to be moved out of line.
---
 include/wx/string.h   | 4 ++--
 src/common/string.cpp | 7 ++++++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/wx/string.h b/include/wx/string.h
index b5cc276113..b434f9f8f0 100644
--- a/include/wx/string.h
+++ b/include/wx/string.h
@@ -447,8 +447,8 @@ private:
       unsigned lastUsed;
   };
 
-  static wxTHREAD_SPECIFIC_DECL Cache ms_cache;
-  static Cache& GetCache() { return ms_cache; }
+  // Implemented out of line because per-thread variable can't be DLL exported.
+  static Cache& GetCache();
 
   static Cache::Element *GetCacheBegin() { return GetCache().cached; }
   static Cache::Element *GetCacheEnd() { return GetCacheBegin() + Cache::SIZE; }
diff --git a/src/common/string.cpp b/src/common/string.cpp
index 009cd2abea..1e0b893a3f 100644
--- a/src/common/string.cpp
+++ b/src/common/string.cpp
@@ -75,7 +75,12 @@ const wxStringCharType WXDLLIMPEXP_BASE *wxEmptyStringImpl = "";
 const wxChar WXDLLIMPEXP_BASE *wxEmptyString = wxT("");
 #if wxUSE_STRING_POS_CACHE
 
-wxTHREAD_SPECIFIC_DECL wxString::Cache wxString::ms_cache;
+/* static */
+wxString::Cache& wxString::GetCache()
+{
+    static wxTHREAD_SPECIFIC_DECL Cache s_cache;
+    return s_cache;
+}
 
 // gdb seems to be unable to display thread-local variables correctly, at least
 // not my 6.4.98 version under amd64, so provide this debugging helper to do it

From 7fba58c74398d8824f88cccfc7efb539668050e9 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Mon, 27 Mar 2023 16:31:29 +0100
Subject: [PATCH 11/27] Fix a comment mentioning the now removed ANSI build

Speaking of ANSI build was confusing, so don't.
---
 include/wx/string.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/wx/string.h b/include/wx/string.h
index b434f9f8f0..46e95d7351 100644
--- a/include/wx/string.h
+++ b/include/wx/string.h
@@ -1685,7 +1685,7 @@ public:
     // conversions with (possible) format conversions: have to return a
     // buffer with temporary data
     //
-    // the functions defined (in either Unicode or ANSI) mode are mb_str() to
+    // All builds of the library define the same functions: mb_str() to
     // return an ANSI (multibyte) string, wc_str() to return a wide string and
     // fn_str() to return a string which should be used with the OS APIs
     // accepting the file names. The return value is always the same, but the

From d8cf6d03dde5efe9e3f10080c91a838638534c40 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Mon, 27 Mar 2023 16:25:01 +0100
Subject: [PATCH 12/27] Use the length of the buffer instead of recomputing it
 again

Micro optimization: use the already known buffer length instead of
calling wcslen() to compute it again.
---
 include/wx/string.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/wx/string.h b/include/wx/string.h
index 46e95d7351..45c23f3bd0 100644
--- a/include/wx/string.h
+++ b/include/wx/string.h
@@ -3645,7 +3645,7 @@ struct wxStringAsBufHelper<wchar_t>
     {
         wxScopedWCharBuffer wbuf(s.wc_str());
         if ( len )
-            *len = wxWcslen(wbuf);
+            *len = wbuf.length();
         return wbuf;
     }
 };

From aea45196ab8225e54df1828699b44b31ba48ecb9 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Mon, 27 Mar 2023 16:19:24 +0100
Subject: [PATCH 13/27] Fix using wxStringOutputStream with surrogates in UTF-8
 build

Under MSW, where wchar_t uses UTF-16, using wxString::length() was
wrong, as it could be smaller than the actual length of the wide
character string, e.g. 1 instead of 2 for a string containing a single
surrogate character, such as U+2070D used in wxStringOutputStream::Tell
unit test.

This makes this test pass under MSW too now.
---
 src/common/sstream.cpp | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/common/sstream.cpp b/src/common/sstream.cpp
index fe5c9ecde1..4558c334c6 100644
--- a/src/common/sstream.cpp
+++ b/src/common/sstream.cpp
@@ -127,10 +127,23 @@ wxStringOutputStream::wxStringOutputStream(wxString *pString, wxMBConv& conv)
     // length anyhow in this case.
 #if wxUSE_UNICODE_UTF8
     if ( conv.IsUTF8() )
+    {
         m_pos = m_str->utf8_length();
+    }
     else
-#endif // wxUSE_UNICODE_UTF8
-        m_pos = m_conv.FromWChar(nullptr, 0, m_str->wc_str(), m_str->length());
+    {
+        // Note that we can't just use wxString::length() because it may return
+        // a different value from the buffer length when wchar_t uses UTF-16
+        // (i.e. MSW) and the string contains any surrogates.
+        const wxScopedWCharBuffer wbuf(m_str->wc_str());
+        m_pos = m_conv.FromWChar(nullptr, 0, wbuf.data(), wbuf.length());
+    }
+#else // !wxUSE_UNICODE_UTF8
+    // When using wchar_t for internal representation, the string length and
+    // the length of the buffer returned by wc_str() are one and the same, so
+    // we can avoid creating a temporary buffer, unlike in UTF-8 case above.
+    m_pos = m_conv.FromWChar(nullptr, 0, m_str->wc_str(), m_str->length());
+#endif // wxUSE_UNICODE_UTF8/!wxUSE_UNICODE_UTF8
 }
 
 // ----------------------------------------------------------------------------

From fab541a8ff8e62914c6dee905558ec3270d51432 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Mon, 27 Mar 2023 16:29:14 +0100
Subject: [PATCH 14/27] Fix another surrogate-related bug in UTF-8 build in
 wxString

This is similar to the fix in the previous commit and is needed for the
same reason.
---
 include/wx/string.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/wx/string.h b/include/wx/string.h
index 45c23f3bd0..8d182e744e 100644
--- a/include/wx/string.h
+++ b/include/wx/string.h
@@ -987,7 +987,17 @@ public:
       // This is logically equivalent to strlen(str.mb_str()) but avoids
       // actually converting the string to multibyte and just computes the
       // length that it would have after conversion.
+
+      // Note that in UTF-8 build we need to use the actual wide character
+      // buffer length and not the string length, as it may be different when
+      // using surrogates, but in wchar_t build they're the same by definition
+      // and we can avoid creating an extra buffer.
+#if wxUSE_UNICODE_UTF8
+      const wxScopedWCharBuffer wbuf(str.wc_str());
+      const size_t ofs = wxConvLibc.FromWChar(nullptr, 0, wbuf.data(), wbuf.length());
+#else // wxUSE_UNICODE_WCHAR
       const size_t ofs = wxConvLibc.FromWChar(nullptr, 0, str.wc_str(), str.length());
+#endif
       return ofs == wxCONV_FAILED ? 0 : static_cast<ptrdiff_t>(ofs);
   }
 

From b1a30e96aec39abb65b844dd015085136d300fd4 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Mon, 27 Mar 2023 18:51:42 +0200
Subject: [PATCH 15/27] Fix using dangling pointer in iterator dtor in UTF-8
 builds

Destroying an iterator with a lifetime greater than that of the
associated string resulted in an invalid memory access due to using the
linked list of string iterators in the iterator dtor.

Fix this by clearing all the associated iterators when the string itself
is destroyed.

This fixes ASAN errors in wxDateTime::ParseDateTime() where "endTime"
const_iterator was destroyed after the destruction of the associated
"timestr" after successfully parsing the date.
---
 include/wx/string.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/include/wx/string.h b/include/wx/string.h
index 8d182e744e..81f564801e 100644
--- a/include/wx/string.h
+++ b/include/wx/string.h
@@ -274,6 +274,7 @@ public:
     ~wxStringIteratorNode()
         { clear(); }
 
+    inline void clear();
     inline void set(const wxString *str, wxStringImpl::const_iterator *citer)
         { clear(); DoSet(str, citer, nullptr); }
     inline void set(const wxString *str, wxStringImpl::iterator *iter)
@@ -285,7 +286,6 @@ public:
     wxStringIteratorNode *m_prev{nullptr}, *m_next{nullptr};
 
 private:
-    inline void clear();
     inline void DoSet(const wxString *str,
                       wxStringImpl::const_iterator *citer,
                       wxStringImpl::iterator *iter);
@@ -1213,16 +1213,24 @@ public:
     { assign(std::move(str), nLength); }
 
 
-#if wxUSE_STRING_POS_CACHE
+#if wxUSE_UNICODE_UTF8
   ~wxString()
   {
+#if wxUSE_STRING_POS_CACHE
       // we need to invalidate our cache entry as another string could be
       // recreated at the same address (unlikely, but still possible, with the
       // heap-allocated strings but perfectly common with stack-allocated ones)
       InvalidateCache();
-  }
 #endif // wxUSE_STRING_POS_CACHE
 
+      // We also need to clear any still existing iterators pointing into this
+      // string, as otherwise clearing them later, when they're destroyed,
+      // would try to use a dangling string pointer stored in them.
+      while ( m_iterators.ptr )
+          m_iterators.ptr->clear();
+  }
+#endif // wxUSE_UNICODE_UTF8
+
   #if wxUSE_UNICODE_WCHAR
     wxString(const std::wstring& str) : m_impl(str) {}
     wxString(std::wstring&& str) noexcept : m_impl(std::move(str)) {}

From f9c109957ed286e12bc15e93f9a8da0a7b78f7f1 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Mon, 27 Mar 2023 20:04:31 +0200
Subject: [PATCH 16/27] Don't assert when using "%s" with invalid char in UTF-8
 build

An existing unit test failed in UTF-8 build because using "%c" with an
invalid character triggered an assert due to trying to encode in UTF-8
(wchar_t)-1 that vswprintf() put into the returned buffer.

Fix this by not using the buffer contents at all if the function failed.
Note that although this seems to be harmless in wchar_t build, it was
still useless there, so don't make this specific to UTF-8 build.
---
 src/common/string.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/common/string.cpp b/src/common/string.cpp
index 1e0b893a3f..2c8c91b754 100644
--- a/src/common/string.cpp
+++ b/src/common/string.cpp
@@ -1852,6 +1852,13 @@ static int DoStringPrintfV(wxString& str,
         // options.
         if ( len < 0 )
         {
+            // When vswprintf() returns an error, it can leave invalid bytes in
+            // the buffer, e.g. using "%c" with an invalid character results in
+            // U+FFFFFFFF in the buffer, which would trigger an assert when we
+            // try to copy it back to wxString as UTF-8 in "tmp" buffer dtor,
+            // so ensure we don't try to do it.
+            buf[0] = L'\0';
+
             // assume it only returns error if there is not enough space, but
             // as we don't know how much we need, double the current size of
             // the buffer

From 2b5dbd1ec5ebecec28aec17ec76e6070b27f8024 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Mon, 27 Mar 2023 20:50:25 +0200
Subject: [PATCH 17/27] Get rid of CppUnit boilerplate in wxString unit test

No real changes, just remove the completely useless StringTestCase and
use Catch macros directly instead of CPPUNIT_ASSERT wrappers.
---
 tests/strings/strings.cpp | 763 ++++++++++++++++----------------------
 1 file changed, 324 insertions(+), 439 deletions(-)

diff --git a/tests/strings/strings.cpp b/tests/strings/strings.cpp
index 84569f043f..69a77329ea 100644
--- a/tests/strings/strings.cpp
+++ b/tests/strings/strings.cpp
@@ -20,104 +20,10 @@
 #include "wx/private/localeset.h"
 
 // ----------------------------------------------------------------------------
-// test class
+// tests
 // ----------------------------------------------------------------------------
 
-class StringTestCase : public CppUnit::TestCase
-{
-public:
-    StringTestCase();
-
-private:
-    CPPUNIT_TEST_SUITE( StringTestCase );
-        CPPUNIT_TEST( String );
-        CPPUNIT_TEST( PChar );
-        CPPUNIT_TEST( Format );
-        CPPUNIT_TEST( FormatUnicode );
-        CPPUNIT_TEST( Constructors );
-        CPPUNIT_TEST( StaticConstructors );
-        CPPUNIT_TEST( Extraction );
-        CPPUNIT_TEST( Trim );
-        CPPUNIT_TEST( Find );
-        CPPUNIT_TEST( Replace );
-        CPPUNIT_TEST( Match );
-        CPPUNIT_TEST( CaseChanges );
-        CPPUNIT_TEST( Compare );
-        CPPUNIT_TEST( CompareNoCase );
-        CPPUNIT_TEST( Contains );
-        CPPUNIT_TEST( ToInt );
-        CPPUNIT_TEST( ToUInt );
-        CPPUNIT_TEST( ToLong );
-        CPPUNIT_TEST( ToULong );
-#ifdef wxLongLong_t
-        CPPUNIT_TEST( ToLongLong );
-        CPPUNIT_TEST( ToULongLong );
-#endif // wxLongLong_t
-        CPPUNIT_TEST( ToDouble );
-        CPPUNIT_TEST( FromDouble );
-        CPPUNIT_TEST( StringBuf );
-        CPPUNIT_TEST( UTF8Buf );
-        CPPUNIT_TEST( CStrDataTernaryOperator );
-        CPPUNIT_TEST( CStrDataOperators );
-        CPPUNIT_TEST( CStrDataImplicitConversion );
-        CPPUNIT_TEST( ExplicitConversion );
-        CPPUNIT_TEST( IndexedAccess );
-        CPPUNIT_TEST( BeforeAndAfter );
-        CPPUNIT_TEST( ScopedBuffers );
-        CPPUNIT_TEST( SupplementaryUniChar );
-    CPPUNIT_TEST_SUITE_END();
-
-    void String();
-    void PChar();
-    void Format();
-    void FormatUnicode();
-    void Constructors();
-    void StaticConstructors();
-    void Extraction();
-    void Trim();
-    void Find();
-    void Replace();
-    void Match();
-    void CaseChanges();
-    void Compare();
-    void CompareNoCase();
-    void Contains();
-    void ToInt();
-    void ToUInt();
-    void ToLong();
-    void ToULong();
-#ifdef wxLongLong_t
-    void ToLongLong();
-    void ToULongLong();
-#endif // wxLongLong_t
-    void ToDouble();
-    void FromDouble();
-    void StringBuf();
-    void UTF8Buf();
-    void CStrDataTernaryOperator();
-    void DoCStrDataTernaryOperator(bool cond);
-    void CStrDataOperators();
-    void CStrDataImplicitConversion();
-    void ExplicitConversion();
-    void IndexedAccess();
-    void BeforeAndAfter();
-    void ScopedBuffers();
-    void SupplementaryUniChar();
-
-    wxDECLARE_NO_COPY_CLASS(StringTestCase);
-};
-
-// register in the unnamed registry so that these tests are run by default
-CPPUNIT_TEST_SUITE_REGISTRATION( StringTestCase );
-
-// also include in its own registry so that these tests can be run alone
-CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( StringTestCase, "StringTestCase" );
-
-StringTestCase::StringTestCase()
-{
-}
-
-void StringTestCase::String()
+TEST_CASE("StringAssign", "[wxString]")
 {
     wxString a, b, c;
 
@@ -133,11 +39,11 @@ void StringTestCase::String()
         a += b;
         a += c;
         c = wxT("Hello world! What's up?");
-        CPPUNIT_ASSERT( c != a );
+        CHECK( c != a );
     }
 }
 
-void StringTestCase::PChar()
+TEST_CASE("StringPChar", "[wxString]")
 {
     wxChar a [128];
     wxChar b [128];
@@ -151,17 +57,17 @@ void StringTestCase::PChar()
         wxStrcat (a, b);
         wxStrcat (a, c);
         wxStrcpy (c, wxT("Hello world! What's up?"));
-        CPPUNIT_ASSERT( wxStrcmp (c, a) != 0 );
+        CHECK( wxStrcmp (c, a) != 0 );
     }
 }
 
-void StringTestCase::Format()
+TEST_CASE("StringFormat", "[wxString]")
 {
     wxString s1,s2;
     s1.Printf(wxT("%03d"), 18);
-    CPPUNIT_ASSERT( s1 == wxString::Format(wxT("%03d"), 18) );
+    CHECK( s1 == wxString::Format(wxT("%03d"), 18) );
     s2.Printf(wxT("Number 18: %s\n"), s1.c_str());
-    CPPUNIT_ASSERT( s2 == wxString::Format(wxT("Number 18: %s\n"), s1.c_str()) );
+    CHECK( s2 == wxString::Format(wxT("Number 18: %s\n"), s1.c_str()) );
 
     static const size_t lengths[] = { 1, 512, 1024, 1025, 2048, 4096, 4097 };
     for ( size_t n = 0; n < WXSIZEOF(lengths); n++ )
@@ -169,37 +75,24 @@ void StringTestCase::Format()
         const size_t len = lengths[n];
 
         wxString s(wxT('Z'), len);
-        CPPUNIT_ASSERT_EQUAL( len, wxString::Format(wxT("%s"), s.c_str()).length());
+        CHECK( wxString::Format(wxT("%s"), s.c_str()).length() == len );
     }
 
     // wxString::Format() should not modify errno
     errno = 1234;
     wxString::Format("abc %d %d", 1, 1);
-    CPPUNIT_ASSERT_EQUAL(1234, errno);
+    CHECK( errno == 1234 );
 
     // Positional parameters tests:
-    CPPUNIT_ASSERT_EQUAL
-    (
-        "two one",
-        wxString::Format(wxT("%2$s %1$s"), wxT("one"), wxT("two"))
-    );
-
-    CPPUNIT_ASSERT_EQUAL
-    (
-        "hello hello",
-        wxString::Format("%1$s %1$s", "hello")
-    );
-
-    CPPUNIT_ASSERT_EQUAL
-    (
-        "4 world hello world 3",
-        wxString::Format("%4$d %2$s %1$s %2$s %3$d", "hello", "world", 3, 4)
-    );
+    CHECK(wxString::Format(wxT("%2$s %1$s"), wxT("one"), wxT("two")) == "two one");
+    CHECK(wxString::Format("%1$s %1$s", "hello") == "hello hello");
+    CHECK(wxString::Format("%4$d %2$s %1$s %2$s %3$d", "hello", "world", 3, 4)
+            == "4 world hello world 3");
 
     CHECK( wxString::Format("%1$o %1$d %1$x", 20) == "24 20 14" );
 }
 
-void StringTestCase::FormatUnicode()
+TEST_CASE("StringFormatUnicode", "[wxString]")
 {
     // At least under FreeBSD vsnprintf(), used by wxString::Format(), doesn't
     // work with Unicode strings unless a UTF-8 locale is used, so set it.
@@ -212,24 +105,24 @@ void StringTestCase::FormatUnicode()
     wxString s = wxString::Format(fmt, 1, 1);
     wxString expected(fmt);
     expected.Replace("%i", "1");
-    CPPUNIT_ASSERT_EQUAL( expected, s );
+    CHECK( s == expected );
 }
 
-void StringTestCase::Constructors()
+TEST_CASE("StringConstructors", "[wxString]")
 {
-    CPPUNIT_ASSERT_EQUAL( "", wxString('Z', 0) );
-    CPPUNIT_ASSERT_EQUAL( "Z", wxString('Z') );
-    CPPUNIT_ASSERT_EQUAL( "ZZZZ", wxString('Z', 4) );
-    CPPUNIT_ASSERT_EQUAL( "Hell", wxString("Hello", 4) );
-    CPPUNIT_ASSERT_EQUAL( "Hello", wxString("Hello", 5) );
+    CHECK( wxString('Z', 0) == "" );
+    CHECK( wxString('Z') == "Z" );
+    CHECK( wxString('Z', 4) == "ZZZZ" );
+    CHECK( wxString("Hello", 4) == "Hell" );
+    CHECK( wxString("Hello", 5) == "Hello" );
 
-    CPPUNIT_ASSERT_EQUAL( L"", wxString(L'Z', 0) );
-    CPPUNIT_ASSERT_EQUAL( L"Z", wxString(L'Z') );
-    CPPUNIT_ASSERT_EQUAL( L"ZZZZ", wxString(L'Z', 4) );
-    CPPUNIT_ASSERT_EQUAL( L"Hell", wxString(L"Hello", 4) );
-    CPPUNIT_ASSERT_EQUAL( L"Hello", wxString(L"Hello", 5) );
+    CHECK( wxString(L'Z', 0) == L"" );
+    CHECK( wxString(L'Z') == L"Z" );
+    CHECK( wxString(L'Z', 4) == L"ZZZZ" );
+    CHECK( wxString(L"Hello", 4) == L"Hell" );
+    CHECK( wxString(L"Hello", 5) == L"Hello" );
 
-    CPPUNIT_ASSERT_EQUAL( 0, wxString(wxString(), 17).length() );
+    CHECK( wxString(wxString(), 17).length() == 0 );
 
 #if wxUSE_UNICODE_UTF8
     // This string has 3 characters (<h>, <e'> and <l>), not 4 when using UTF-8
@@ -237,8 +130,8 @@ void StringTestCase::Constructors()
     if ( wxConvLibc.IsUTF8() )
     {
         wxString s3("h\xc3\xa9llo", 4);
-        CPPUNIT_ASSERT_EQUAL( 3, s3.length() );
-        CPPUNIT_ASSERT_EQUAL( 'l', (char)s3[2] );
+        CHECK( s3.length() == 3 );
+        CHECK( (char)s3[2] == 'l' );
     }
 #endif // wxUSE_UNICODE_UTF8
 
@@ -246,60 +139,60 @@ void StringTestCase::Constructors()
     static const char *s = "?really!";
     const char *start = wxStrchr(s, 'r');
     const char *end = wxStrchr(s, '!');
-    CPPUNIT_ASSERT_EQUAL( "really", wxString(start, end) );
+    CHECK( wxString(start, end) == "really" );
 
     // test if creating string from null C pointer works:
-    CPPUNIT_ASSERT_EQUAL( "", wxString((const char *)nullptr) );
+    CHECK( wxString((const char *)nullptr) == "" );
 }
 
-void StringTestCase::StaticConstructors()
+TEST_CASE("StringStaticConstructors", "[wxString]")
 {
-    CPPUNIT_ASSERT_EQUAL( "", wxString::FromAscii("") );
-    CPPUNIT_ASSERT_EQUAL( "", wxString::FromAscii("Hello", 0) );
-    CPPUNIT_ASSERT_EQUAL( "Hell", wxString::FromAscii("Hello", 4) );
-    CPPUNIT_ASSERT_EQUAL( "Hello", wxString::FromAscii("Hello", 5) );
-    CPPUNIT_ASSERT_EQUAL( "Hello", wxString::FromAscii("Hello") );
+    CHECK( wxString::FromAscii("") == "" );
+    CHECK( wxString::FromAscii("Hello", 0) == "" );
+    CHECK( wxString::FromAscii("Hello", 4) == "Hell" );
+    CHECK( wxString::FromAscii("Hello", 5) == "Hello" );
+    CHECK( wxString::FromAscii("Hello") == "Hello" );
 
     // FIXME: this doesn't work currently but should!
-    //CPPUNIT_ASSERT_EQUAL( 1, wxString::FromAscii("", 1).length() );
+    //CHECK( wxString::FromAscii("", 1).length() == 1 );
 
 
-    CPPUNIT_ASSERT_EQUAL( "", wxString::FromUTF8("") );
-    CPPUNIT_ASSERT_EQUAL( "", wxString::FromUTF8("Hello", 0) );
-    CPPUNIT_ASSERT_EQUAL( "Hell", wxString::FromUTF8("Hello", 4) );
-    CPPUNIT_ASSERT_EQUAL( "Hello", wxString::FromUTF8("Hello", 5) );
-    CPPUNIT_ASSERT_EQUAL( "Hello", wxString::FromUTF8("Hello") );
+    CHECK( wxString::FromUTF8("") == "" );
+    CHECK( wxString::FromUTF8("Hello", 0) == "" );
+    CHECK( wxString::FromUTF8("Hello", 4) == "Hell" );
+    CHECK( wxString::FromUTF8("Hello", 5) == "Hello" );
+    CHECK( wxString::FromUTF8("Hello") == "Hello" );
 
-    CPPUNIT_ASSERT_EQUAL( 2, wxString::FromUTF8("h\xc3\xa9llo", 3).length() );
+    CHECK( wxString::FromUTF8("h\xc3\xa9llo", 3).length() == 2 );
 
 
-    //CPPUNIT_ASSERT_EQUAL( 1, wxString::FromUTF8("", 1).length() );
+    //CHECK( wxString::FromUTF8("", 1).length() == 1 );
 }
 
-void StringTestCase::Extraction()
+TEST_CASE("StringExtraction", "[wxString]")
 {
     wxString s(wxT("Hello, world!"));
 
-    CPPUNIT_ASSERT( wxStrcmp( s.c_str() , wxT("Hello, world!") ) == 0 );
-    CPPUNIT_ASSERT( wxStrcmp( s.Left(5).c_str() , wxT("Hello") ) == 0 );
-    CPPUNIT_ASSERT( wxStrcmp( s.Right(6).c_str() , wxT("world!") ) == 0 );
-    CPPUNIT_ASSERT( wxStrcmp( s(3, 5).c_str() , wxT("lo, w") ) == 0 );
-    CPPUNIT_ASSERT( wxStrcmp( s.Mid(3).c_str() , wxT("lo, world!") ) == 0 );
-    CPPUNIT_ASSERT( wxStrcmp( s.substr(3, 5).c_str() , wxT("lo, w") ) == 0 );
-    CPPUNIT_ASSERT( wxStrcmp( s.substr(3).c_str() , wxT("lo, world!") ) == 0 );
+    CHECK( wxStrcmp( s.c_str() , wxT("Hello, world!") ) == 0 );
+    CHECK( wxStrcmp( s.Left(5).c_str() , wxT("Hello") ) == 0 );
+    CHECK( wxStrcmp( s.Right(6).c_str() , wxT("world!") ) == 0 );
+    CHECK( wxStrcmp( s(3, 5).c_str() , wxT("lo, w") ) == 0 );
+    CHECK( wxStrcmp( s.Mid(3).c_str() , wxT("lo, world!") ) == 0 );
+    CHECK( wxStrcmp( s.substr(3, 5).c_str() , wxT("lo, w") ) == 0 );
+    CHECK( wxStrcmp( s.substr(3).c_str() , wxT("lo, world!") ) == 0 );
 
     static const char *germanUTF8 = "Oberfl\303\244che";
     wxString strUnicode(wxString::FromUTF8(germanUTF8));
 
-    CPPUNIT_ASSERT( strUnicode.Mid(0, 10) == strUnicode );
-    CPPUNIT_ASSERT( strUnicode.Mid(7, 2) == "ch" );
+    CHECK( strUnicode.Mid(0, 10) == strUnicode );
+    CHECK( strUnicode.Mid(7, 2) == "ch" );
 
     wxString rest;
 
     #define TEST_STARTS_WITH(prefix, correct_rest, result)                    \
-        CPPUNIT_ASSERT_EQUAL(result, s.StartsWith(prefix, &rest));            \
+        CHECK(s.StartsWith(prefix, &rest) == result);                         \
         if ( result )                                                         \
-            CPPUNIT_ASSERT_EQUAL(correct_rest, rest)
+            CHECK(rest == correct_rest)
 
     TEST_STARTS_WITH( wxT("Hello"),           wxT(", world!"),      true  );
     TEST_STARTS_WITH( wxT("Hello, "),         wxT("world!"),        true  );
@@ -312,13 +205,13 @@ void StringTestCase::Extraction()
     #undef TEST_STARTS_WITH
 
     rest = "Hello world";
-    CPPUNIT_ASSERT( rest.StartsWith("Hello ", &rest) );
-    CPPUNIT_ASSERT_EQUAL("world", rest);
+    CHECK( rest.StartsWith("Hello ", &rest) );
+    CHECK( rest == "world" );
 
     #define TEST_ENDS_WITH(suffix, correct_rest, result)                      \
-        CPPUNIT_ASSERT_EQUAL(result, s.EndsWith(suffix, &rest));              \
+        CHECK(s.EndsWith(suffix, &rest) == result);                           \
         if ( result )                                                         \
-            CPPUNIT_ASSERT_EQUAL(correct_rest, rest)
+            CHECK(rest == correct_rest)
 
     TEST_ENDS_WITH( wxT(""),                 wxT("Hello, world!"), true  );
     TEST_ENDS_WITH( wxT("!"),                wxT("Hello, world"),  true  );
@@ -333,10 +226,10 @@ void StringTestCase::Extraction()
     #undef TEST_ENDS_WITH
 }
 
-void StringTestCase::Trim()
+TEST_CASE("StringTrim", "[wxString]")
 {
     #define TEST_TRIM( str , dir , result )  \
-        CPPUNIT_ASSERT( wxString(str).Trim(dir) == result )
+        CHECK( wxString(str).Trim(dir) == result )
 
     TEST_TRIM( wxT("  Test  "),  true, wxT("  Test") );
     TEST_TRIM( wxT("    "),      true, wxT("")       );
@@ -351,10 +244,10 @@ void StringTestCase::Trim()
     #undef TEST_TRIM
 }
 
-void StringTestCase::Find()
+TEST_CASE("StringFind", "[wxString]")
 {
     #define TEST_FIND( str , start , result )  \
-        CPPUNIT_ASSERT( wxString(str).find(wxT("ell"), start) == result );
+        CHECK( wxString(str).find(wxT("ell"), start) == result );
 
     TEST_FIND( wxT("Well, hello world"),  0, 1              );
     TEST_FIND( wxT("Well, hello world"),  6, 7              );
@@ -363,13 +256,13 @@ void StringTestCase::Find()
     #undef TEST_FIND
 }
 
-void StringTestCase::Replace()
+TEST_CASE("StringReplace", "[wxString]")
 {
     #define TEST_REPLACE( original , pos , len , replacement , result ) \
         { \
             wxString s = original; \
             s.replace( pos , len , replacement ); \
-            CPPUNIT_ASSERT_EQUAL( result, s ); \
+            CHECK( s == result ); \
         }
 
     TEST_REPLACE( wxT("012-AWORD-XYZ"), 4, 5, wxT("BWORD"),  wxT("012-BWORD-XYZ") );
@@ -383,7 +276,7 @@ void StringTestCase::Replace()
         { \
             wxString s(o,olen); \
             s.replace( pos , len , replacement ); \
-            CPPUNIT_ASSERT_EQUAL( wxString(r,rlen), s ); \
+            CHECK( s == wxString(r, rlen) ); \
         }
 
     TEST_NULLCHARREPLACE( wxT("null\0char"), 9, 5, 1, wxT("d"),
@@ -393,7 +286,7 @@ void StringTestCase::Replace()
         { \
             wxString s(o,olen); \
             s.Replace( olds, news, all ); \
-            CPPUNIT_ASSERT_EQUAL( wxString(r,rlen), s ); \
+            CHECK( s == wxString(r, rlen) ); \
         }
 
     TEST_WXREPLACE( wxT("null\0char"), 9, wxT("c"), wxT("de"), true,
@@ -413,10 +306,10 @@ void StringTestCase::Replace()
     #undef TEST_REPLACE
 }
 
-void StringTestCase::Match()
+TEST_CASE("StringMatch", "[wxString]")
 {
     #define TEST_MATCH( s1 , s2 , result ) \
-        CPPUNIT_ASSERT( wxString(s1).Matches(s2) == result )
+        CHECK( wxString(s1).Matches(s2) == result )
 
     TEST_MATCH( "foobar",       "foo*",        true  );
     TEST_MATCH( "foobar",       "*oo*",        true  );
@@ -434,7 +327,7 @@ void StringTestCase::Match()
 }
 
 
-void StringTestCase::CaseChanges()
+TEST_CASE("StringCaseChanges", "[wxString]")
 {
     wxString s1(wxT("Hello!"));
     wxString s1u(s1);
@@ -442,28 +335,28 @@ void StringTestCase::CaseChanges()
     s1u.MakeUpper();
     s1l.MakeLower();
 
-    CPPUNIT_ASSERT_EQUAL( wxT("HELLO!"), s1u );
-    CPPUNIT_ASSERT_EQUAL( wxT("hello!"), s1l );
+    CHECK( s1u == wxT("HELLO!") );
+    CHECK( s1l == wxT("hello!") );
 
     wxString s2u, s2l;
     s2u.MakeUpper();
     s2l.MakeLower();
 
-    CPPUNIT_ASSERT_EQUAL( "", s2u );
-    CPPUNIT_ASSERT_EQUAL( "", s2l );
+    CHECK( s2u == "" );
+    CHECK( s2l == "" );
 
 
     wxString s3("good bye");
-    CPPUNIT_ASSERT_EQUAL( "Good bye", s3.Capitalize() );
+    CHECK( s3.Capitalize() == "Good bye" );
     s3.MakeCapitalized();
-    CPPUNIT_ASSERT_EQUAL( "Good bye", s3 );
+    CHECK( s3 == "Good bye" );
 
-    CPPUNIT_ASSERT_EQUAL( "Abc", wxString("ABC").Capitalize() );
+    CHECK( wxString("ABC").Capitalize() == "Abc" );
 
-    CPPUNIT_ASSERT_EQUAL( "", wxString().Capitalize() );
+    CHECK( wxString().Capitalize() == "" );
 }
 
-void StringTestCase::Compare()
+TEST_CASE("StringCompare", "[wxString]")
 {
     wxString s1 = wxT("AHH");
     wxString eq = wxT("AHH");
@@ -472,31 +365,31 @@ void StringTestCase::Compare()
     wxString neq3 = wxT("AHHH");
     wxString neq4 = wxT("AhH");
 
-    CPPUNIT_ASSERT( s1 == eq );
-    CPPUNIT_ASSERT( s1 != neq1 );
-    CPPUNIT_ASSERT( s1 != neq2 );
-    CPPUNIT_ASSERT( s1 != neq3 );
-    CPPUNIT_ASSERT( s1 != neq4 );
+    CHECK( s1 == eq );
+    CHECK( s1 != neq1 );
+    CHECK( s1 != neq2 );
+    CHECK( s1 != neq3 );
+    CHECK( s1 != neq4 );
 
-    CPPUNIT_ASSERT( s1 == wxT("AHH") );
-    CPPUNIT_ASSERT( s1 != wxT("no") );
-    CPPUNIT_ASSERT( s1 < wxT("AZ") );
-    CPPUNIT_ASSERT( s1 <= wxT("AZ") );
-    CPPUNIT_ASSERT( s1 <= wxT("AHH") );
-    CPPUNIT_ASSERT( s1 > wxT("AA") );
-    CPPUNIT_ASSERT( s1 >= wxT("AA") );
-    CPPUNIT_ASSERT( s1 >= wxT("AHH") );
+    CHECK( s1 == wxT("AHH") );
+    CHECK( s1 != wxT("no") );
+    CHECK( s1 < wxT("AZ") );
+    CHECK( s1 <= wxT("AZ") );
+    CHECK( s1 <= wxT("AHH") );
+    CHECK( s1 > wxT("AA") );
+    CHECK( s1 >= wxT("AA") );
+    CHECK( s1 >= wxT("AHH") );
 
     // test comparison with C strings in Unicode build (must work in ANSI as
     // well, of course):
-    CPPUNIT_ASSERT( s1 == "AHH" );
-    CPPUNIT_ASSERT( s1 != "no" );
-    CPPUNIT_ASSERT( s1 < "AZ" );
-    CPPUNIT_ASSERT( s1 <= "AZ" );
-    CPPUNIT_ASSERT( s1 <= "AHH" );
-    CPPUNIT_ASSERT( s1 > "AA" );
-    CPPUNIT_ASSERT( s1 >= "AA" );
-    CPPUNIT_ASSERT( s1 >= "AHH" );
+    CHECK( s1 == "AHH" );
+    CHECK( s1 != "no" );
+    CHECK( s1 < "AZ" );
+    CHECK( s1 <= "AZ" );
+    CHECK( s1 <= "AHH" );
+    CHECK( s1 > "AA" );
+    CHECK( s1 >= "AA" );
+    CHECK( s1 >= "AHH" );
 
 //    wxString _s1 = wxT("A\0HH");
 //    wxString _eq = wxT("A\0HH");
@@ -511,18 +404,18 @@ void StringTestCase::Compare()
     neq3.insert(1,1,'\0');
     neq4.insert(1,1,'\0');
 
-    CPPUNIT_ASSERT( s1 == eq );
-    CPPUNIT_ASSERT( s1 != neq1 );
-    CPPUNIT_ASSERT( s1 != neq2 );
-    CPPUNIT_ASSERT( s1 != neq3 );
-    CPPUNIT_ASSERT( s1 != neq4 );
+    CHECK( s1 == eq );
+    CHECK( s1 != neq1 );
+    CHECK( s1 != neq2 );
+    CHECK( s1 != neq3 );
+    CHECK( s1 != neq4 );
 
-    CPPUNIT_ASSERT( wxString("\n").Cmp(" ") < 0 );
-    CPPUNIT_ASSERT( wxString("'").Cmp("!") > 0 );
-    CPPUNIT_ASSERT( wxString("!").Cmp("z") < 0 );
+    CHECK( wxString("\n").Cmp(" ") < 0 );
+    CHECK( wxString("'").Cmp("!") > 0 );
+    CHECK( wxString("!").Cmp("z") < 0 );
 }
 
-void StringTestCase::CompareNoCase()
+TEST_CASE("StringCompareNoCase", "[wxString]")
 {
     wxString s1 = wxT("AHH");
     wxString eq = wxT("AHH");
@@ -532,16 +425,16 @@ void StringTestCase::CompareNoCase()
     wxString neq2 = wxT("AH");
     wxString neq3 = wxT("AHHH");
 
-    #define CPPUNIT_CNCEQ_ASSERT(s1, s2) CPPUNIT_ASSERT( s1.CmpNoCase(s2) == 0)
-    #define CPPUNIT_CNCNEQ_ASSERT(s1, s2) CPPUNIT_ASSERT( s1.CmpNoCase(s2) != 0)
+    #define CHECK_EQ_NO_CASE(s1, s2) CHECK( s1.CmpNoCase(s2) == 0)
+    #define CHECK_NEQ_NO_CASE(s1, s2) CHECK( s1.CmpNoCase(s2) != 0)
 
-    CPPUNIT_CNCEQ_ASSERT( s1, eq );
-    CPPUNIT_CNCEQ_ASSERT( s1, eq2 );
-    CPPUNIT_CNCEQ_ASSERT( s1, eq3 );
+    CHECK_EQ_NO_CASE( s1, eq );
+    CHECK_EQ_NO_CASE( s1, eq2 );
+    CHECK_EQ_NO_CASE( s1, eq3 );
 
-    CPPUNIT_CNCNEQ_ASSERT( s1, neq );
-    CPPUNIT_CNCNEQ_ASSERT( s1, neq2 );
-    CPPUNIT_CNCNEQ_ASSERT( s1, neq3 );
+    CHECK_NEQ_NO_CASE( s1, neq );
+    CHECK_NEQ_NO_CASE( s1, neq2 );
+    CHECK_NEQ_NO_CASE( s1, neq3 );
 
 
 //    wxString _s1 = wxT("A\0HH");
@@ -560,20 +453,20 @@ void StringTestCase::CompareNoCase()
     neq2.insert(1,1,'\0');
     neq3.insert(1,1,'\0');
 
-    CPPUNIT_CNCEQ_ASSERT( s1, eq );
-    CPPUNIT_CNCEQ_ASSERT( s1, eq2 );
-    CPPUNIT_CNCEQ_ASSERT( s1, eq3 );
+    CHECK_EQ_NO_CASE( s1, eq );
+    CHECK_EQ_NO_CASE( s1, eq2 );
+    CHECK_EQ_NO_CASE( s1, eq3 );
 
-    CPPUNIT_CNCNEQ_ASSERT( s1, neq );
-    CPPUNIT_CNCNEQ_ASSERT( s1, neq2 );
-    CPPUNIT_CNCNEQ_ASSERT( s1, neq3 );
+    CHECK_NEQ_NO_CASE( s1, neq );
+    CHECK_NEQ_NO_CASE( s1, neq2 );
+    CHECK_NEQ_NO_CASE( s1, neq3 );
 
-    CPPUNIT_ASSERT( wxString("\n").CmpNoCase(" ") < 0 );
-    CPPUNIT_ASSERT( wxString("'").CmpNoCase("!") > 0);
-    CPPUNIT_ASSERT( wxString("!").Cmp("Z") < 0 );
+    CHECK( wxString("\n").CmpNoCase(" ") < 0 );
+    CHECK( wxString("'").CmpNoCase("!") > 0);
+    CHECK( wxString("!").Cmp("Z") < 0 );
 }
 
-void StringTestCase::Contains()
+TEST_CASE("StringContains", "[wxString]")
 {
     static const struct ContainsData
     {
@@ -596,7 +489,7 @@ void StringTestCase::Contains()
     for ( size_t n = 0; n < WXSIZEOF(containsData); n++ )
     {
         const ContainsData& cd = containsData[n];
-        CPPUNIT_ASSERT_EQUAL( cd.contains, wxString(cd.hay).Contains(cd.needle) );
+        CHECK( wxString(cd.hay).Contains(cd.needle) == cd.contains );
     }
 }
 
@@ -709,7 +602,7 @@ static const struct ToLongData
 
 wxGCC_WARNING_RESTORE(missing-field-initializers)
 
-void StringTestCase::ToInt()
+TEST_CASE("StringToInt", "[wxString]")
 {
     int i;
     for (size_t n = 0; n < WXSIZEOF(intData); n++)
@@ -719,26 +612,25 @@ void StringTestCase::ToInt()
         if (id.flags & (Number_Unsigned))
             continue;
 
-        CPPUNIT_ASSERT_EQUAL(id.IsOk(),
-            wxString(id.str).ToInt(&i, id.base));
+        CHECK(id.IsOk() == wxString(id.str).ToInt(&i, id.base));
 
         if (id.IsOk())
-            CPPUNIT_ASSERT_EQUAL(id.IValue(), i);
+            CHECK( i == id.IValue() );
     }
 
     // special case: check that the output is not modified if the parsing
     // failed completely
     i = 17;
-    CPPUNIT_ASSERT(!wxString("foo").ToInt(&i));
-    CPPUNIT_ASSERT_EQUAL(17, i);
+    CHECK(!wxString("foo").ToInt(&i));
+    CHECK( i == 17 );
 
     // also check that it is modified if we did parse something successfully in
     // the beginning of the string
-    CPPUNIT_ASSERT(!wxString("9 cats").ToInt(&i));
-    CPPUNIT_ASSERT_EQUAL(9, i);
+    CHECK(!wxString("9 cats").ToInt(&i));
+    CHECK( i == 9 );
 }
 
-void StringTestCase::ToUInt()
+TEST_CASE("StringToUInt", "[wxString]")
 {
     unsigned int i;
     for (size_t n = 0; n < WXSIZEOF(intData); n++)
@@ -748,26 +640,25 @@ void StringTestCase::ToUInt()
         if (id.flags & (Number_Signed))
             continue;
 
-        CPPUNIT_ASSERT_EQUAL(id.IsOk(),
-            wxString(id.str).ToUInt(&i, id.base));
+        CHECK(id.IsOk() == wxString(id.str).ToUInt(&i, id.base));
 
         if (id.IsOk())
-            CPPUNIT_ASSERT_EQUAL(id.UIValue(), i);
+            CHECK( i == id.UIValue() );
     }
 
     // special case: check that the output is not modified if the parsing
     // failed completely
     i = 17;
-    CPPUNIT_ASSERT(!wxString("foo").ToUInt(&i));
-    CPPUNIT_ASSERT_EQUAL(17, i);
+    CHECK(!wxString("foo").ToUInt(&i));
+    CHECK( i == 17 );
 
     // also check that it is modified if we did parse something successfully in
     // the beginning of the string
-    CPPUNIT_ASSERT(!wxString("9 cats").ToUInt(&i));
-    CPPUNIT_ASSERT_EQUAL(9, i);
+    CHECK(!wxString("9 cats").ToUInt(&i));
+    CHECK( i == 9 );
 }
 
-void StringTestCase::ToLong()
+TEST_CASE("StringToLong", "[wxString]")
 {
     long l;
     for ( size_t n = 0; n < WXSIZEOF(longData); n++ )
@@ -780,30 +671,28 @@ void StringTestCase::ToLong()
         // NOTE: unless you're using some exotic locale, ToCLong and ToLong
         //       should behave the same for our test data set:
 
-        CPPUNIT_ASSERT_EQUAL( ld.IsOk(),
-                              wxString(ld.str).ToCLong(&l, ld.base) );
+        CHECK( ld.IsOk() == wxString(ld.str).ToCLong(&l, ld.base) );
         if ( ld.IsOk() )
-            CPPUNIT_ASSERT_EQUAL( ld.LValue(), l );
+            CHECK( l == ld.LValue() );
 
-        CPPUNIT_ASSERT_EQUAL( ld.IsOk(),
-                              wxString(ld.str).ToLong(&l, ld.base) );
+        CHECK( ld.IsOk() == wxString(ld.str).ToLong(&l, ld.base) );
         if ( ld.IsOk() )
-            CPPUNIT_ASSERT_EQUAL( ld.LValue(), l );
+            CHECK( l == ld.LValue() );
     }
 
     // special case: check that the output is not modified if the parsing
     // failed completely
     l = 17;
-    CPPUNIT_ASSERT( !wxString("foo").ToLong(&l) );
-    CPPUNIT_ASSERT_EQUAL( 17, l );
+    CHECK( !wxString("foo").ToLong(&l) );
+    CHECK( l == 17 );
 
     // also check that it is modified if we did parse something successfully in
     // the beginning of the string
-    CPPUNIT_ASSERT( !wxString("9 cats").ToLong(&l) );
-    CPPUNIT_ASSERT_EQUAL( 9, l );
+    CHECK( !wxString("9 cats").ToLong(&l) );
+    CHECK( l == 9 );
 }
 
-void StringTestCase::ToULong()
+TEST_CASE("StringToULong", "[wxString]")
 {
     unsigned long ul;
     for ( size_t n = 0; n < WXSIZEOF(longData); n++ )
@@ -816,21 +705,19 @@ void StringTestCase::ToULong()
         // NOTE: unless you're using some exotic locale, ToCLong and ToLong
         //       should behave the same for our test data set:
 
-        CPPUNIT_ASSERT_EQUAL( ld.IsOk(),
-                              wxString(ld.str).ToCULong(&ul, ld.base) );
+        CHECK( ld.IsOk() == wxString(ld.str).ToCULong(&ul, ld.base) );
         if ( ld.IsOk() )
-            CPPUNIT_ASSERT_EQUAL( ld.ULValue(), ul );
+            CHECK( ul == ld.ULValue() );
 
-        CPPUNIT_ASSERT_EQUAL( ld.IsOk(),
-                              wxString(ld.str).ToULong(&ul, ld.base) );
+        CHECK( ld.IsOk() == wxString(ld.str).ToULong(&ul, ld.base) );
         if ( ld.IsOk() )
-            CPPUNIT_ASSERT_EQUAL( ld.ULValue(), ul );
+            CHECK( ul == ld.ULValue() );
     }
 }
 
 #ifdef wxLongLong_t
 
-void StringTestCase::ToLongLong()
+TEST_CASE("StringToLongLong", "[wxString]")
 {
     wxLongLong_t l;
     for ( size_t n = 0; n < WXSIZEOF(longData); n++ )
@@ -840,14 +727,13 @@ void StringTestCase::ToLongLong()
         if ( ld.flags & (Number_Long | Number_Unsigned) )
             continue;
 
-        CPPUNIT_ASSERT_EQUAL( ld.IsOk(),
-                              wxString(ld.str).ToLongLong(&l, ld.base) );
+        CHECK( ld.IsOk() == wxString(ld.str).ToLongLong(&l, ld.base) );
         if ( ld.IsOk() )
-            CPPUNIT_ASSERT_EQUAL( ld.LLValue(), l );
+            CHECK( l == ld.LLValue() );
     }
 }
 
-void StringTestCase::ToULongLong()
+TEST_CASE("StringToULongLong", "[wxString]")
 {
     wxULongLong_t ul;
     for ( size_t n = 0; n < WXSIZEOF(longData); n++ )
@@ -857,16 +743,15 @@ void StringTestCase::ToULongLong()
         if ( ld.flags & (Number_Long | Number_Signed) )
             continue;
 
-        CPPUNIT_ASSERT_EQUAL( ld.IsOk(),
-                              wxString(ld.str).ToULongLong(&ul, ld.base) );
+        CHECK( ld.IsOk() == wxString(ld.str).ToULongLong(&ul, ld.base) );
         if ( ld.IsOk() )
-            CPPUNIT_ASSERT_EQUAL( ld.ULLValue(), ul );
+            CHECK( ul == ld.ULLValue() );
     }
 }
 
 #endif // wxLongLong_t
 
-void StringTestCase::ToDouble()
+TEST_CASE("StringToDouble", "[wxString]")
 {
     double d;
     static const struct ToDoubleData
@@ -896,9 +781,9 @@ void StringTestCase::ToDouble()
     for ( n = 0; n < WXSIZEOF(doubleData); n++ )
     {
         const ToDoubleData& ld = doubleData[n];
-        CPPUNIT_ASSERT_EQUAL( ld.ok, wxString(ld.str).ToCDouble(&d) );
+        CHECK( wxString(ld.str).ToCDouble(&d) == ld.ok );
         if ( ld.ok )
-            CPPUNIT_ASSERT_EQUAL( ld.value, d );
+            CHECK( d == ld.value );
     }
 
 
@@ -912,7 +797,7 @@ void StringTestCase::ToDouble()
     wxLocale locale;
 
     // don't load default catalog, it may be unavailable:
-    CPPUNIT_ASSERT( locale.Init(wxLANGUAGE_FRENCH, wxLOCALE_DONT_LOAD_DEFAULT) );
+    CHECK( locale.Init(wxLANGUAGE_FRENCH, wxLOCALE_DONT_LOAD_DEFAULT) );
 
     static const struct ToDoubleData doubleData2[] =
     {
@@ -933,13 +818,13 @@ void StringTestCase::ToDouble()
     for ( n = 0; n < WXSIZEOF(doubleData2); n++ )
     {
         const ToDoubleData& ld = doubleData2[n];
-        CPPUNIT_ASSERT_EQUAL( ld.ok, wxString(ld.str).ToDouble(&d) );
+        CHECK( wxString(ld.str).ToDouble(&d) == ld.ok );
         if ( ld.ok )
-            CPPUNIT_ASSERT_EQUAL( ld.value, d );
+            CHECK( d == ld.value );
     }
 }
 
-void StringTestCase::FromDouble()
+TEST_CASE("StringFromDouble", "[wxString]")
 {
     static const struct FromDoubleTestData
     {
@@ -964,14 +849,14 @@ void StringTestCase::FromDouble()
     for ( unsigned n = 0; n < WXSIZEOF(testData); n++ )
     {
         const FromDoubleTestData& td = testData[n];
-        CPPUNIT_ASSERT_EQUAL( td.str, wxString::FromCDouble(td.value, td.prec) );
+        CHECK( wxString::FromCDouble(td.value, td.prec) == td.str );
     }
 
     if ( !wxLocale::IsAvailable(wxLANGUAGE_FRENCH) )
         return;
 
     wxLocale locale;
-    CPPUNIT_ASSERT( locale.Init(wxLANGUAGE_FRENCH, wxLOCALE_DONT_LOAD_DEFAULT) );
+    CHECK( locale.Init(wxLANGUAGE_FRENCH, wxLOCALE_DONT_LOAD_DEFAULT) );
 
     for ( unsigned m = 0; m < WXSIZEOF(testData); m++ )
     {
@@ -979,37 +864,37 @@ void StringTestCase::FromDouble()
 
         wxString str(td.str);
         str.Replace(".", ",");
-        CPPUNIT_ASSERT_EQUAL( str, wxString::FromDouble(td.value, td.prec) );
+        CHECK( wxString::FromDouble(td.value, td.prec) == str );
     }
 }
 
-void StringTestCase::StringBuf()
+TEST_CASE("StringStringBuf", "[wxString]")
 {
     // check that buffer can be used to write into the string
     wxString s;
     wxStrcpy(wxStringBuffer(s, 10), wxT("foo"));
 
-    CPPUNIT_ASSERT_EQUAL(3, s.length());
-    CPPUNIT_ASSERT(wxT('f') == s[0u]);
-    CPPUNIT_ASSERT(wxT('o') == s[1]);
-    CPPUNIT_ASSERT(wxT('o') == s[2]);
+    CHECK( s.length() == 3 );
+    CHECK(wxT('f') == s[0u]);
+    CHECK(wxT('o') == s[1]);
+    CHECK(wxT('o') == s[2]);
 
     {
         // also check that the buffer initially contains the original string
         // contents
         wxStringBuffer buf(s, 10);
-        CPPUNIT_ASSERT_EQUAL( wxT('f'), buf[0] );
-        CPPUNIT_ASSERT_EQUAL( wxT('o'), buf[1] );
-        CPPUNIT_ASSERT_EQUAL( wxT('o'), buf[2] );
-        CPPUNIT_ASSERT_EQUAL( wxT('\0'), buf[3] );
+        CHECK( buf[0] == wxT('f') );
+        CHECK( buf[1] == wxT('o') );
+        CHECK( buf[2] == wxT('o') );
+        CHECK( buf[3] == wxT('\0') );
     }
 
     {
         wxStringBufferLength buf(s, 10);
-        CPPUNIT_ASSERT_EQUAL( wxT('f'), buf[0] );
-        CPPUNIT_ASSERT_EQUAL( wxT('o'), buf[1] );
-        CPPUNIT_ASSERT_EQUAL( wxT('o'), buf[2] );
-        CPPUNIT_ASSERT_EQUAL( wxT('\0'), buf[3] );
+        CHECK( buf[0] == wxT('f') );
+        CHECK( buf[1] == wxT('o') );
+        CHECK( buf[2] == wxT('o') );
+        CHECK( buf[3] == wxT('\0') );
 
         // and check that it can be used to write only the specified number of
         // characters to the string
@@ -1017,11 +902,11 @@ void StringTestCase::StringBuf()
         buf.SetLength(4);
     }
 
-    CPPUNIT_ASSERT_EQUAL(4, s.length());
-    CPPUNIT_ASSERT(wxT('b') == s[0u]);
-    CPPUNIT_ASSERT(wxT('a') == s[1]);
-    CPPUNIT_ASSERT(wxT('r') == s[2]);
-    CPPUNIT_ASSERT(wxT('r') == s[3]);
+    CHECK( s.length() == 4 );
+    CHECK(wxT('b') == s[0u]);
+    CHECK(wxT('a') == s[1]);
+    CHECK(wxT('r') == s[2]);
+    CHECK(wxT('r') == s[3]);
 
     // check that creating buffer of length smaller than string works, i.e. at
     // least doesn't crash (it would if we naively copied the entire original
@@ -1029,7 +914,7 @@ void StringTestCase::StringBuf()
     *wxStringBuffer(s, 1) = '!';
 }
 
-void StringTestCase::UTF8Buf()
+TEST_CASE("StringUTF8Buf", "[wxString]")
 {
     // "czech" in Czech ("cestina"):
     static const char *textUTF8 = "\304\215e\305\241tina";
@@ -1037,30 +922,23 @@ void StringTestCase::UTF8Buf()
 
     wxString s;
     wxStrcpy(wxUTF8StringBuffer(s, 9), textUTF8);
-    CPPUNIT_ASSERT(s == textUTF16);
+    CHECK(s == textUTF16);
 
     {
         wxUTF8StringBufferLength buf(s, 20);
         wxStrcpy(buf, textUTF8);
         buf.SetLength(5);
     }
-    CPPUNIT_ASSERT(s == wxString(textUTF16, 0, 3));
+    CHECK(s == wxString(textUTF16, 0, 3));
 }
 
 
-
-void StringTestCase::CStrDataTernaryOperator()
-{
-    DoCStrDataTernaryOperator(true);
-    DoCStrDataTernaryOperator(false);
-}
-
 template<typename T> bool CheckStr(const wxString& expected, T s)
 {
     return expected == wxString(s);
 }
 
-void StringTestCase::DoCStrDataTernaryOperator(bool cond)
+void DoCStrDataTernaryOperator(bool cond)
 {
     // test compilation of wxCStrData when used with operator?: (the asserts
     // are not very important, we're testing if the code compiles at all):
@@ -1074,39 +952,46 @@ void StringTestCase::DoCStrDataTernaryOperator(bool cond)
     wxCLANG_WARNING_SUPPRESS(c++11-compat-deprecated-writable-strings)
 
     const wchar_t *wcStr = L"foo";
-    CPPUNIT_ASSERT( CheckStr(s, (cond ? s.c_str() : wcStr)) );
-    CPPUNIT_ASSERT( CheckStr(s, (cond ? s.c_str() : L"foo")) );
-    CPPUNIT_ASSERT( CheckStr(s, (cond ? wcStr : s.c_str())) );
-    CPPUNIT_ASSERT( CheckStr(s, (cond ? L"foo" : s.c_str())) );
+    CHECK( CheckStr(s, (cond ? s.c_str() : wcStr)) );
+    CHECK( CheckStr(s, (cond ? s.c_str() : L"foo")) );
+    CHECK( CheckStr(s, (cond ? wcStr : s.c_str())) );
+    CHECK( CheckStr(s, (cond ? L"foo" : s.c_str())) );
 
     const char *mbStr = "foo";
-    CPPUNIT_ASSERT( CheckStr(s, (cond ? s.c_str() : mbStr)) );
-    CPPUNIT_ASSERT( CheckStr(s, (cond ? s.c_str() : "foo")) );
-    CPPUNIT_ASSERT( CheckStr(s, (cond ? mbStr : s.c_str())) );
-    CPPUNIT_ASSERT( CheckStr(s, (cond ? "foo" : s.c_str())) );
+    CHECK( CheckStr(s, (cond ? s.c_str() : mbStr)) );
+    CHECK( CheckStr(s, (cond ? s.c_str() : "foo")) );
+    CHECK( CheckStr(s, (cond ? mbStr : s.c_str())) );
+    CHECK( CheckStr(s, (cond ? "foo" : s.c_str())) );
 
     wxGCC_WARNING_RESTORE(write-strings)
     wxCLANG_WARNING_RESTORE(c++11-compat-deprecated-writable-strings)
 
     wxString empty("");
-    CPPUNIT_ASSERT( CheckStr(empty, (cond ? empty.c_str() : wxEmptyString)) );
-    CPPUNIT_ASSERT( CheckStr(empty, (cond ? wxEmptyString : empty.c_str())) );
+    CHECK( CheckStr(empty, (cond ? empty.c_str() : wxEmptyString)) );
+    CHECK( CheckStr(empty, (cond ? wxEmptyString : empty.c_str())) );
 }
 
-void StringTestCase::CStrDataOperators()
+TEST_CASE("StringCStrDataTernaryOperator", "[wxString]")
+{
+    DoCStrDataTernaryOperator(true);
+    DoCStrDataTernaryOperator(false);
+}
+
+
+TEST_CASE("StringCStrDataOperators", "[wxString]")
 {
     wxString s("hello");
 
-    CPPUNIT_ASSERT( s.c_str()[0] == 'h' );
-    CPPUNIT_ASSERT( s.c_str()[1] == 'e' );
+    CHECK( s.c_str()[0] == 'h' );
+    CHECK( s.c_str()[1] == 'e' );
 
     // IMPORTANT: at least with the CRT coming with MSVC++ 2008 trying to access
     //            the final character results in an assert failure (with debug CRT)
-    //CPPUNIT_ASSERT( s.c_str()[5] == '\0' );
+    //CHECK( s.c_str()[5] == '\0' );
 
-    CPPUNIT_ASSERT( *s.c_str() == 'h' );
-    CPPUNIT_ASSERT( *(s.c_str() + 2) == 'l' );
-    //CPPUNIT_ASSERT( *(s.c_str() + 5) == '\0' );
+    CHECK( *s.c_str() == 'h' );
+    CHECK( *(s.c_str() + 2) == 'l' );
+    //CHECK( *(s.c_str() + 5) == '\0' );
 }
 
 bool CheckStrChar(const wxString& expected, char *s)
@@ -1118,48 +1003,48 @@ bool CheckStrConstChar(const wxString& expected, const char *s)
 bool CheckStrConstWChar(const wxString& expected, const wchar_t *s)
     { return CheckStr(expected, s); }
 
-void StringTestCase::CStrDataImplicitConversion()
+TEST_CASE("StringCStrDataImplicitConversion", "[wxString]")
 {
     wxString s("foo");
 
-    CPPUNIT_ASSERT( CheckStrConstWChar(s, s.c_str()) );
-    CPPUNIT_ASSERT( CheckStrConstChar(s, s.c_str()) );
+    CHECK( CheckStrConstWChar(s, s.c_str()) );
+    CHECK( CheckStrConstChar(s, s.c_str()) );
 
     // implicit conversion of wxString is not available in STL build
 #if !wxUSE_STL
-    CPPUNIT_ASSERT( CheckStrConstWChar(s, s) );
+    CHECK( CheckStrConstWChar(s, s) );
 #if wxUSE_UNSAFE_WXSTRING_CONV
-    CPPUNIT_ASSERT( CheckStrConstChar(s, s) );
+    CHECK( CheckStrConstChar(s, s) );
 #endif
 #endif
 }
 
-void StringTestCase::ExplicitConversion()
+TEST_CASE("StringExplicitConversion", "[wxString]")
 {
     wxString s("foo");
 
-    CPPUNIT_ASSERT( CheckStr(s, s.mb_str()) );
-    CPPUNIT_ASSERT( CheckStrConstChar(s, s.mb_str()) );
-    CPPUNIT_ASSERT( CheckStrChar(s, s.char_str()) );
+    CHECK( CheckStr(s, s.mb_str()) );
+    CHECK( CheckStrConstChar(s, s.mb_str()) );
+    CHECK( CheckStrChar(s, s.char_str()) );
 
-    CPPUNIT_ASSERT( CheckStr(s, s.wc_str()) );
-    CPPUNIT_ASSERT( CheckStrConstWChar(s, s.wc_str()) );
-    CPPUNIT_ASSERT( CheckStrWChar(s, s.wchar_str()) );
+    CHECK( CheckStr(s, s.wc_str()) );
+    CHECK( CheckStrConstWChar(s, s.wc_str()) );
+    CHECK( CheckStrWChar(s, s.wchar_str()) );
 }
 
-void StringTestCase::IndexedAccess()
+TEST_CASE("StringIndexedAccess", "[wxString]")
 {
     wxString s("bar");
-    CPPUNIT_ASSERT_EQUAL( 'r', (char)s[2] );
+    CHECK( (char)s[2] == 'r' );
 
     // this tests for a possible bug in UTF-8 based wxString implementation:
     // the 3rd character of the underlying byte string is going to change, but
     // the 3rd character of wxString should remain the same
     s[0] = L'\xe9';
-    CPPUNIT_ASSERT_EQUAL( 'r', (char)s[2] );
+    CHECK( (char)s[2] == 'r' );
 }
 
-void StringTestCase::BeforeAndAfter()
+TEST_CASE("StringBeforeAndAfter", "[wxString]")
 {
     // Construct a string with 2 equal signs in it by concatenating its three
     // parts: before the first "=", in between the two "="s and after the last
@@ -1173,33 +1058,33 @@ void StringTestCase::BeforeAndAfter()
 
     wxString r;
 
-    CPPUNIT_ASSERT_EQUAL( FIRST_PART, s.BeforeFirst('=', &r) );
-    CPPUNIT_ASSERT_EQUAL( MIDDLE_PART wxT("=") LAST_PART, r );
+    CHECK( s.BeforeFirst('=', &r) == FIRST_PART );
+    CHECK( r == MIDDLE_PART wxT("=") LAST_PART );
 
-    CPPUNIT_ASSERT_EQUAL( s, s.BeforeFirst('!', &r) );
-    CPPUNIT_ASSERT_EQUAL( "", r );
+    CHECK( s.BeforeFirst('!', &r) == s );
+    CHECK( r == "" );
 
 
-    CPPUNIT_ASSERT_EQUAL( FIRST_PART wxT("=") MIDDLE_PART, s.BeforeLast('=', &r) );
-    CPPUNIT_ASSERT_EQUAL( LAST_PART, r );
+    CHECK( s.BeforeLast('=', &r) == FIRST_PART wxT("=") MIDDLE_PART );
+    CHECK( r == LAST_PART );
 
-    CPPUNIT_ASSERT_EQUAL( "", s.BeforeLast('!', &r) );
-    CPPUNIT_ASSERT_EQUAL( s, r );
+    CHECK( s.BeforeLast('!', &r) == "" );
+    CHECK( r == s );
 
 
-    CPPUNIT_ASSERT_EQUAL( MIDDLE_PART wxT("=") LAST_PART, s.AfterFirst('=') );
-    CPPUNIT_ASSERT_EQUAL( "", s.AfterFirst('!') );
+    CHECK( s.AfterFirst('=') == MIDDLE_PART wxT("=") LAST_PART );
+    CHECK( s.AfterFirst('!') == "" );
 
 
-    CPPUNIT_ASSERT_EQUAL( LAST_PART, s.AfterLast('=') );
-    CPPUNIT_ASSERT_EQUAL( s, s.AfterLast('!') );
+    CHECK( s.AfterLast('=') == LAST_PART );
+    CHECK( s.AfterLast('!') == s );
 
     #undef LAST_PART
     #undef MIDDLE_PART
     #undef FIRST_PART
 }
 
-void StringTestCase::ScopedBuffers()
+TEST_CASE("StringScopedBuffers", "[wxString]")
 {
     // wxString relies on efficient buffers, verify they work as they should
 
@@ -1207,49 +1092,49 @@ void StringTestCase::ScopedBuffers()
 
     // non-owned buffer points to the string passed to it
     wxScopedCharBuffer sbuf = wxScopedCharBuffer::CreateNonOwned(literal);
-    CPPUNIT_ASSERT( sbuf.data() == literal );
+    CHECK( sbuf.data() == literal );
 
     // a copy of scoped non-owned buffer still points to the same string
     wxScopedCharBuffer sbuf2(sbuf);
-    CPPUNIT_ASSERT( sbuf.data() == sbuf2.data() );
+    CHECK( sbuf.data() == sbuf2.data() );
 
     // but assigning it to wxCharBuffer makes a full copy
     wxCharBuffer buf(sbuf);
-    CPPUNIT_ASSERT( buf.data() != literal );
-    CPPUNIT_ASSERT_EQUAL( std::string(literal), buf.data() );
+    CHECK( buf.data() != literal );
+    CHECK( buf.data() == std::string(literal) );
 
     wxCharBuffer buf2 = sbuf;
-    CPPUNIT_ASSERT( buf2.data() != literal );
-    CPPUNIT_ASSERT_EQUAL( std::string(literal), buf.data() );
+    CHECK( buf2.data() != literal );
+    CHECK( buf.data() == std::string(literal) );
 
     // Check that extending the buffer keeps it NUL-terminated.
     size_t len = 10;
 
     wxCharBuffer buf3(len);
-    CPPUNIT_ASSERT_EQUAL('\0', buf3.data()[len]);
+    CHECK( buf3.data()[len] == '\0' );
 
     wxCharBuffer buf4;
     buf4.extend(len);
-    CPPUNIT_ASSERT_EQUAL('\0', buf4.data()[len]);
+    CHECK( buf4.data()[len] == '\0' );
 
     wxCharBuffer buf5(5);
     buf5.extend(len);
-    CPPUNIT_ASSERT_EQUAL('\0', buf5.data()[len]);
+    CHECK( buf5.data()[len] == '\0' );
 }
 
-void StringTestCase::SupplementaryUniChar()
+TEST_CASE("StringSupplementaryUniChar", "[wxString]")
 {
     // Test wxString(wxUniChar ch, size_t nRepeat = 1),
     // which is implemented upon assign(size_t n, wxUniChar ch).
     {
         wxString s(wxUniChar(0x12345));
 #if wxUSE_UNICODE_UTF16
-        CPPUNIT_ASSERT_EQUAL(2, s.length());
-        CPPUNIT_ASSERT_EQUAL(0xD808, s[0].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0xDF45, s[1].GetValue());
+        CHECK( s.length() == 2 );
+        CHECK( s[0].GetValue() == 0xD808 );
+        CHECK( s[1].GetValue() == 0xDF45 );
 #else
-        CPPUNIT_ASSERT_EQUAL(1, s.length());
-        CPPUNIT_ASSERT_EQUAL(0x12345, s[0].GetValue());
+        CHECK( s.length() == 1 );
+        CHECK( s[0].GetValue() == 0x12345 );
 #endif
     }
 
@@ -1258,12 +1143,12 @@ void StringTestCase::SupplementaryUniChar()
         wxString s;
         s = wxUniChar(0x23456);
 #if wxUSE_UNICODE_UTF16
-        CPPUNIT_ASSERT_EQUAL(2, s.length());
-        CPPUNIT_ASSERT_EQUAL(0xD84D, s[0].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0xDC56, s[1].GetValue());
+        CHECK( s.length() == 2 );
+        CHECK( s[0].GetValue() == 0xD84D );
+        CHECK( s[1].GetValue() == 0xDC56 );
 #else
-        CPPUNIT_ASSERT_EQUAL(1, s.length());
-        CPPUNIT_ASSERT_EQUAL(0x23456, s[0].GetValue());
+        CHECK( s.length() == 1 );
+        CHECK( s[0].GetValue() == 0x23456 );
 #endif
     }
 
@@ -1272,12 +1157,12 @@ void StringTestCase::SupplementaryUniChar()
         wxString s = "A";
         s += wxUniChar(0x34567);
 #if wxUSE_UNICODE_UTF16
-        CPPUNIT_ASSERT_EQUAL(3, s.length());
-        CPPUNIT_ASSERT_EQUAL(0xD891, s[1].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0xDD67, s[2].GetValue());
+        CHECK( s.length() == 3 );
+        CHECK( s[1].GetValue() == 0xD891 );
+        CHECK( s[2].GetValue() == 0xDD67 );
 #else
-        CPPUNIT_ASSERT_EQUAL(2, s.length());
-        CPPUNIT_ASSERT_EQUAL(0x34567, s[1].GetValue());
+        CHECK( s.length() == 2 );
+        CHECK( s[1].GetValue() == 0x34567 );
 #endif
     }
 
@@ -1287,12 +1172,12 @@ void StringTestCase::SupplementaryUniChar()
         wxString s = "A";
         s << wxUniChar(0x45678);
 #if wxUSE_UNICODE_UTF16
-        CPPUNIT_ASSERT_EQUAL(3, s.length());
-        CPPUNIT_ASSERT_EQUAL(0xD8D5, s[1].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0xDE78, s[2].GetValue());
+        CHECK( s.length() == 3 );
+        CHECK( s[1].GetValue() == 0xD8D5 );
+        CHECK( s[2].GetValue() == 0xDE78 );
 #else
-        CPPUNIT_ASSERT_EQUAL(2, s.length());
-        CPPUNIT_ASSERT_EQUAL(0x45678, s[1].GetValue());
+        CHECK( s.length() == 2 );
+        CHECK( s[1].GetValue() == 0x45678 );
 #endif
     }
 
@@ -1301,15 +1186,15 @@ void StringTestCase::SupplementaryUniChar()
         wxString s = L"\x3042\x208\x3059";
         s.insert(1, 2, wxUniChar(0x12345));
 #if wxUSE_UNICODE_UTF16
-        CPPUNIT_ASSERT_EQUAL(7, s.length());
-        CPPUNIT_ASSERT_EQUAL(0xD808, s[1].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0xDF45, s[2].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0xD808, s[3].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0xDF45, s[4].GetValue());
+        CHECK( s.length() == 7 );
+        CHECK( s[1].GetValue() == 0xD808 );
+        CHECK( s[2].GetValue() == 0xDF45 );
+        CHECK( s[3].GetValue() == 0xD808 );
+        CHECK( s[4].GetValue() == 0xDF45 );
 #else
-        CPPUNIT_ASSERT_EQUAL(5, s.length());
-        CPPUNIT_ASSERT_EQUAL(0x12345, s[1].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0x12345, s[2].GetValue());
+        CHECK( s.length() == 5 );
+        CHECK( s[1].GetValue() == 0x12345 );
+        CHECK( s[2].GetValue() == 0x12345 );
 #endif
     }
 
@@ -1318,12 +1203,12 @@ void StringTestCase::SupplementaryUniChar()
         wxString s = L"\x3042\x208\x3059";
         s.insert(s.begin() + 1, wxUniChar(0x23456));
 #if wxUSE_UNICODE_UTF16
-        CPPUNIT_ASSERT_EQUAL(5, s.length());
-        CPPUNIT_ASSERT_EQUAL(0xD84D, s[1].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0xDC56, s[2].GetValue());
+        CHECK( s.length() == 5 );
+        CHECK( s[1].GetValue() == 0xD84D );
+        CHECK( s[2].GetValue() == 0xDC56 );
 #else
-        CPPUNIT_ASSERT_EQUAL(4, s.length());
-        CPPUNIT_ASSERT_EQUAL(0x23456, s[1].GetValue());
+        CHECK( s.length() == 4 );
+        CHECK( s[1].GetValue() == 0x23456 );
 #endif
     }
 
@@ -1332,12 +1217,12 @@ void StringTestCase::SupplementaryUniChar()
         wxString s = L"\x3042\x208\x3059";
         s.insert(s.begin() + 1, 2, wxUniChar(0x34567));
 #if wxUSE_UNICODE_UTF16
-        CPPUNIT_ASSERT_EQUAL(7, s.length());
-        CPPUNIT_ASSERT_EQUAL(0xD891, s[1].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0xDD67, s[2].GetValue());
+        CHECK( s.length() == 7 );
+        CHECK( s[1].GetValue() == 0xD891 );
+        CHECK( s[2].GetValue() == 0xDD67 );
 #else
-        CPPUNIT_ASSERT_EQUAL(5, s.length());
-        CPPUNIT_ASSERT_EQUAL(0x34567, s[1].GetValue());
+        CHECK( s.length() == 5 );
+        CHECK( s[1].GetValue() == 0x34567 );
 #endif
     }
 
@@ -1346,15 +1231,15 @@ void StringTestCase::SupplementaryUniChar()
         wxString s = L"\x3042\x208\x3059";
         s.replace(1, 2, 2, wxUniChar(0x45678));
 #if wxUSE_UNICODE_UTF16
-        CPPUNIT_ASSERT_EQUAL(5, s.length());
-        CPPUNIT_ASSERT_EQUAL(0xD8D5, s[1].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0xDE78, s[2].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0xD8D5, s[3].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0xDE78, s[4].GetValue());
+        CHECK( s.length() == 5 );
+        CHECK( s[1].GetValue() == 0xD8D5 );
+        CHECK( s[2].GetValue() == 0xDE78 );
+        CHECK( s[3].GetValue() == 0xD8D5 );
+        CHECK( s[4].GetValue() == 0xDE78 );
 #else
-        CPPUNIT_ASSERT_EQUAL(3, s.length());
-        CPPUNIT_ASSERT_EQUAL(0x45678, s[1].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0x45678, s[2].GetValue());
+        CHECK( s.length() == 3 );
+        CHECK( s[1].GetValue() == 0x45678 );
+        CHECK( s[2].GetValue() == 0x45678 );
 #endif
     }
 
@@ -1363,15 +1248,15 @@ void StringTestCase::SupplementaryUniChar()
         wxString s = L"\x3042\x208\x3059";
         s.replace(s.begin() + 1, s.end(), 2, wxUniChar(0x34567));
 #if wxUSE_UNICODE_UTF16
-        CPPUNIT_ASSERT_EQUAL(5, s.length());
-        CPPUNIT_ASSERT_EQUAL(0xD891, s[1].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0xDD67, s[2].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0xD891, s[3].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0xDD67, s[4].GetValue());
+        CHECK( s.length() == 5 );
+        CHECK( s[1].GetValue() == 0xD891 );
+        CHECK( s[2].GetValue() == 0xDD67 );
+        CHECK( s[3].GetValue() == 0xD891 );
+        CHECK( s[4].GetValue() == 0xDD67 );
 #else
-        CPPUNIT_ASSERT_EQUAL(3, s.length());
-        CPPUNIT_ASSERT_EQUAL(0x34567, s[1].GetValue());
-        CPPUNIT_ASSERT_EQUAL(0x34567, s[2].GetValue());
+        CHECK( s.length() == 3 );
+        CHECK( s[1].GetValue() == 0x34567 );
+        CHECK( s[2].GetValue() == 0x34567 );
 #endif
     }
 
@@ -1384,17 +1269,17 @@ void StringTestCase::SupplementaryUniChar()
         s += wxUniChar(0x12345);
         s += "y";
 #if wxUSE_UNICODE_UTF16
-        CPPUNIT_ASSERT_EQUAL(8, s.length());
-        CPPUNIT_ASSERT_EQUAL(2, s.find(wxUniChar(0x12345)));
-        CPPUNIT_ASSERT_EQUAL(5, s.find(wxUniChar(0x12345), 3));
-        CPPUNIT_ASSERT_EQUAL(5, s.rfind(wxUniChar(0x12345)));
-        CPPUNIT_ASSERT_EQUAL(2, s.rfind(wxUniChar(0x12345), 4));
+        CHECK( s.length() == 8 );
+        CHECK( s.find(wxUniChar(0x12345)) == 2 );
+        CHECK( s.find(wxUniChar(0x12345), 3) == 5 );
+        CHECK( s.rfind(wxUniChar(0x12345)) == 5 );
+        CHECK( s.rfind(wxUniChar(0x12345), 4) == 2 );
 #else
-        CPPUNIT_ASSERT_EQUAL(6, s.length());
-        CPPUNIT_ASSERT_EQUAL(2, s.find(wxUniChar(0x12345)));
-        CPPUNIT_ASSERT_EQUAL(4, s.find(wxUniChar(0x12345), 3));
-        CPPUNIT_ASSERT_EQUAL(4, s.rfind(wxUniChar(0x12345)));
-        CPPUNIT_ASSERT_EQUAL(2, s.rfind(wxUniChar(0x12345), 3));
+        CHECK( s.length() == 6 );
+        CHECK( s.find(wxUniChar(0x12345)) == 2 );
+        CHECK( s.find(wxUniChar(0x12345), 3) == 4 );
+        CHECK( s.rfind(wxUniChar(0x12345)) == 4 );
+        CHECK( s.rfind(wxUniChar(0x12345), 3) == 2 );
 #endif
     }
 

From 92649ca2e6f982d80314eb8dd758941a6894910a Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Mon, 27 Mar 2023 21:26:24 +0200
Subject: [PATCH 18/27] Fix formatting string_view in UTF-8 build

Don't use string_view data directly, as this doesn't respect its length
and would use the entire rest of the string this view is based on.

Instead, make a copy of just the part corresponding in the view to
ensure that it is NUL-terminated and also use a temporary buffer to hold
it to ensure that it lives long enough.
---
 include/wx/strvararg.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/include/wx/strvararg.h b/include/wx/strvararg.h
index 9161e87d59..4193079847 100644
--- a/include/wx/strvararg.h
+++ b/include/wx/strvararg.h
@@ -824,11 +824,19 @@ struct wxArgNormalizerUtf8<const std::string&>
 #ifdef __cpp_lib_string_view
 template<>
 struct wxArgNormalizerUtf8<const std::string_view&>
-    : public wxArgNormalizerUtf8<const char*>
 {
     wxArgNormalizerUtf8(const std::string_view& v,
                         const wxFormatString *fmt, unsigned index)
-        : wxArgNormalizerUtf8<const char*>(v.data(), fmt, index) {}
+        : m_str{v}
+    {
+        wxASSERT_ARG_TYPE( fmt, index, wxFormatString::Arg_String );
+    }
+
+    const char* get() const { return m_str.c_str(); }
+
+    // We need to store this string to ensure that we use a NUL-terminated
+    // buffer, i.e. we can't use string_view data directly.
+    const std::string m_str;
 };
 #endif // __cpp_lib_string_view
 

From 378f3860d2276548e7cba018888df3100315a773 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Tue, 28 Mar 2023 00:36:49 +0100
Subject: [PATCH 19/27] Allow setting locale for the tests

This is especially useful to set an UTF-8 locale on the startup to force
using UTF-8-specific code in UTF-8 build.
---
 tests/test.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/test.cpp b/tests/test.cpp
index 591c052b6c..aa8c28d023 100644
--- a/tests/test.cpp
+++ b/tests/test.cpp
@@ -633,6 +633,13 @@ bool TestApp::OnInit()
     cout << " as " << wxGetUserId()
          << std::endl;
 
+    // Optionally allow executing the tests in the locale specified by the
+    // standard environment variable, this is especially useful to use UTF-8
+    // for all tests by just setting WX_TEST_LOCALE=C.
+    wxString testLoc;
+    if ( wxGetEnv(wxASCII_STR("WX_TEST_LOCALE"), &testLoc) )
+        wxSetlocale(LC_ALL, testLoc);
+
 #if wxUSE_GUI
     // create a parent window to be used as parent for the GUI controls
     new wxTestableFrame();

From 1ac57942ae8d2f37559de543996b2aef2832a1a8 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Tue, 28 Mar 2023 12:47:26 +0100
Subject: [PATCH 20/27] Use range-for loop over wxString in
 Catch::StringMaker<wxString>

No changes, just simplify the code by using range-for.
---
 include/wx/catch_cppunit.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/include/wx/catch_cppunit.h b/include/wx/catch_cppunit.h
index f688077bd5..dc8e64024a 100644
--- a/include/wx/catch_cppunit.h
+++ b/include/wx/catch_cppunit.h
@@ -90,14 +90,12 @@ namespace Catch
         {
             std::string s;
             s.reserve(wxs.length());
-            for ( wxString::const_iterator i = wxs.begin();
-                  i != wxs.end();
-                  ++i )
+            for ( auto c : wxs )
             {
-                if ( !iswprint(*i) )
-                    s += wxString::Format(wxASCII_STR("\\u%04X"), *i).ToAscii();
+                if ( !iswprint(c) )
+                    s += wxString::Format(wxASCII_STR("\\u%04X"), c).ToAscii();
                 else
-                    s += *i;
+                    s += c;
             }
 
             return s;

From a098fb1ed65820ad6b1961363a29459c02c45f66 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Tue, 28 Mar 2023 12:48:04 +0100
Subject: [PATCH 21/27] Make Catch::StringMaker<wxString> more robust

Avoid asserts which could happen when converting a non-ASCII character
to char by representing such characters as hex escapes too, as we
already did for non-printable characters.

This gives correct error messages for expressions involving strings
instead of "unexpected exception" due to a failing assert in wxUniChar
char conversion operator.
---
 include/wx/catch_cppunit.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/wx/catch_cppunit.h b/include/wx/catch_cppunit.h
index dc8e64024a..c9b52dc157 100644
--- a/include/wx/catch_cppunit.h
+++ b/include/wx/catch_cppunit.h
@@ -92,7 +92,7 @@ namespace Catch
             s.reserve(wxs.length());
             for ( auto c : wxs )
             {
-                if ( !iswprint(c) )
+                if ( c >= 128 || !iswprint(c) )
                     s += wxString::Format(wxASCII_STR("\\u%04X"), c).ToAscii();
                 else
                     s += c;

From e4e3e7eb88de3b267709a891729b234d58b62e72 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Tue, 28 Mar 2023 13:44:30 +0100
Subject: [PATCH 22/27] Put quotes around strings in
 Catch::StringMaker<wxString>

This makes the output in case of test failure more readable, especially
if one of the strings is empty.
---
 include/wx/catch_cppunit.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/wx/catch_cppunit.h b/include/wx/catch_cppunit.h
index c9b52dc157..b58a7b5e7a 100644
--- a/include/wx/catch_cppunit.h
+++ b/include/wx/catch_cppunit.h
@@ -89,7 +89,8 @@ namespace Catch
         static std::string convert(const wxString& wxs)
         {
             std::string s;
-            s.reserve(wxs.length());
+            s.reserve(wxs.length() + 2);
+            s += '"';
             for ( auto c : wxs )
             {
                 if ( c >= 128 || !iswprint(c) )
@@ -97,6 +98,7 @@ namespace Catch
                 else
                     s += c;
             }
+            s += '"';
 
             return s;
         }

From a1d289fe3ea74aa1c713e0f02f5fd5f83810af58 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Tue, 28 Mar 2023 15:49:00 +0100
Subject: [PATCH 23/27] Fix recognizing locales using UTF-8 charset

Do not assume that C locale uses UTF-8, as this is simply not true and
none of the CRT functions handle UTF-8 correctly with this locale.

Do recognize locales explicitly using UTF-8 charset as being in UTF-8.
On most Unix systems (including Linux), it didn't really matter that we
didn't do it, because we used nl_langinfo() there, but it does matter a
lot with MSVC under MSW whose CRT supports UTF-8 now, but UTF-8
functions were not used there -- do use them now.
---
 src/common/wxcrt.cpp | 48 ++++++++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/src/common/wxcrt.cpp b/src/common/wxcrt.cpp
index bf00eecc86..1e8e03a6b1 100644
--- a/src/common/wxcrt.cpp
+++ b/src/common/wxcrt.cpp
@@ -1041,6 +1041,19 @@ char *strdup(const char *s)
 bool wxLocaleIsUtf8 = false; // the safer setting if not known
 #endif
 
+static bool wxIsCharsetUtf8(const char* charset)
+{
+    if ( strcmp(charset, "UTF-8") == 0 ||
+         strcmp(charset, "utf-8") == 0 ||
+         strcmp(charset, "UTF8") == 0 ||
+         strcmp(charset, "utf8") == 0 )
+    {
+        return true;
+    }
+
+    return false;
+}
+
 static bool wxIsLocaleUtf8()
 {
     // NB: we intentionally don't use wxLocale::GetSystemEncodingName(),
@@ -1051,31 +1064,28 @@ static bool wxIsLocaleUtf8()
     // GNU libc provides current character set this way (this conforms to
     // Unix98)
     const char *charset = nl_langinfo(CODESET);
-    if ( charset )
-    {
-        // "UTF-8" is used by modern glibc versions, but test other variants
-        // as well, just in case:
-        if ( strcmp(charset, "UTF-8") == 0 ||
-             strcmp(charset, "utf-8") == 0 ||
-             strcmp(charset, "UTF8") == 0 ||
-             strcmp(charset, "utf8") == 0 )
-        {
-            return true;
-        }
-    }
+    if ( charset && wxIsCharsetUtf8(charset) )
+        return true;
 #endif // HAVE_LANGINFO_H
 
-    // check if we're running under the "C" locale: it is 7bit subset
-    // of UTF-8, so it can be safely used with the UTF-8 build:
+    // check LC_CTYPE string: this also works with (sufficiently recent) MSVC
+    // and on any other system without nl_langinfo()
     const char *lc_ctype = setlocale(LC_CTYPE, nullptr);
-    if ( lc_ctype &&
-         (strcmp(lc_ctype, "C") == 0 || strcmp(lc_ctype, "POSIX") == 0) )
+    if ( lc_ctype )
     {
-        return true;
+        // check if we're running under the "C" locale: it is 7bit subset
+        // of UTF-8, so it can be safely used with the UTF-8 build:
+        if ( (strcmp(lc_ctype, "C") == 0 || strcmp(lc_ctype, "POSIX") == 0) )
+            return true;
+
+        // any other locale can also use UTF-8 encoding if it's explicitly
+        // specified
+        const char* charset = strrchr(lc_ctype, '.');
+        if ( charset && wxIsCharsetUtf8(charset + 1) )
+            return true;
     }
 
-    // we don't know what charset libc is using, so assume the worst
-    // to be safe:
+    // by default assume that we don't use UTF-8
     return false;
 }
 

From 488950f724047835dd444d2beb2f8b0a6d9e4fe2 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Tue, 28 Mar 2023 16:50:11 +0100
Subject: [PATCH 24/27] Implement wxString::Shrink() in terms of
 shrink_to_fit()

Now that we use C++11 there is no need to have our own Shrink()
implementation when we can just use the standard function.

Also mention that it's the same as shrink_to_fit() in Shrink()
documentation.

No real changes, just simplify the code and make it more efficient.
---
 include/wx/string.h   | 5 ++---
 interface/wx/string.h | 5 ++++-
 src/common/string.cpp | 8 --------
 3 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/include/wx/string.h b/include/wx/string.h
index 81f564801e..e07f559ba1 100644
--- a/include/wx/string.h
+++ b/include/wx/string.h
@@ -1368,7 +1368,7 @@ public:
   size_type capacity() const { return m_impl.capacity(); }
   void reserve(size_t sz) { m_impl.reserve(sz); }
 
-  void shrink_to_fit() { Shrink(); }
+  void shrink_to_fit() { m_impl.shrink_to_fit(); }
 
   void resize(size_t nSize, wxUniChar ch = wxT('\0'))
   {
@@ -2241,8 +2241,7 @@ public:
     // only works if the data of this string is not shared
   bool Alloc(size_t nLen) { reserve(nLen); return capacity() >= nLen; }
     // minimize the string's memory
-    // only works if the data of this string is not shared
-  bool Shrink();
+  bool Shrink() { shrink_to_fit(); return true; }
 
   // wxWidgets version 1 compatibility functions
 
diff --git a/interface/wx/string.h b/interface/wx/string.h
index 3aecdc6596..e872d25a54 100644
--- a/interface/wx/string.h
+++ b/interface/wx/string.h
@@ -1422,7 +1422,7 @@ public:
         wxStringBuffer and wxStringBufferLength classes may be very useful when working
         with some external API which requires the caller to provide a writable buffer.
 
-        See also the reserve() and resize() STL-like functions.
+        See also the reserve(), resize() and shrink_to_fit() STL-like functions.
     */
     ///@{
 
@@ -1468,6 +1468,9 @@ public:
     /**
         Minimizes the string's memory.
 
+        Please note that this method does the same thing as the standard
+        shrink_to_fit() one and shouldn't be used in new code.
+
         This can be useful after a call to Alloc() if too much memory were
         preallocated.
 
diff --git a/src/common/string.cpp b/src/common/string.cpp
index 2c8c91b754..600ad6a943 100644
--- a/src/common/string.cpp
+++ b/src/common/string.cpp
@@ -514,14 +514,6 @@ const char *wxString::AsChar(const wxMBConv& conv) const
     return m_convertedToChar.m_str;
 }
 
-// shrink to minimal size (releasing extra memory)
-bool wxString::Shrink()
-{
-  wxString tmp(begin(), end());
-  swap(tmp);
-  return true;
-}
-
 // ---------------------------------------------------------------------------
 // data access
 // ---------------------------------------------------------------------------

From 5a9e433524020dc6b653183bdd338bb3647902a9 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Tue, 28 Mar 2023 17:09:46 +0100
Subject: [PATCH 25/27] Fix handling non-ASCII format strings in UTF-8 build

Constructing wxString from "char*" format string was wrong as it
interpreted it in the current locale encoding which could be different
from UTF-8.
---
 src/common/string.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/common/string.cpp b/src/common/string.cpp
index 600ad6a943..a8706e854b 100644
--- a/src/common/string.cpp
+++ b/src/common/string.cpp
@@ -1753,7 +1753,7 @@ int wxString::DoPrintfUtf8(const char *format, ...)
     va_list argptr;
     va_start(argptr, format);
 
-    int iLen = PrintfV(format, argptr);
+    int iLen = PrintfV(wxString::FromUTF8(format), argptr);
 
     va_end(argptr);
 

From ae13c2592e675030f755b23448796e0c7dd7d45f Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Tue, 28 Mar 2023 17:10:35 +0100
Subject: [PATCH 26/27] Reimplement wxUTF8StringBuffer correctly and more
 efficiently

This buffer class can avoid copying strings entirely in UTF-8 build, as
it can write directly to the storage provided by the underlying
std::string, and it also needs to keep the contents in UTF-8, instead of
converting it from the current locale encoding, which was at best
useless, and ensure that it is correct, which is necessary with at least
MSVC, as its CRT can return invalid UTF-8 strings even when using this
encoding.

This finally fixes "PrintfError" unit test under MSW.
---
 include/wx/string.h   | 87 ++++++++++++++++++++++++++++++++++++++++++-
 src/common/string.cpp |  8 +---
 2 files changed, 87 insertions(+), 8 deletions(-)

diff --git a/include/wx/string.h b/include/wx/string.h
index e07f559ba1..6b6b0af3ae 100644
--- a/include/wx/string.h
+++ b/include/wx/string.h
@@ -3557,6 +3557,8 @@ private:
 
   friend class WXDLLIMPEXP_FWD_BASE wxStringIteratorNode;
   friend class WXDLLIMPEXP_FWD_BASE wxUniCharRef;
+  friend class wxUTF8StringBuffer;
+  friend class wxUTF8StringBufferLength;
 #endif // wxUSE_UNICODE_UTF8
 
   friend class WXDLLIMPEXP_FWD_BASE wxCStrData;
@@ -3802,8 +3804,89 @@ typedef wxStringInternalBufferLength          wxStringBufferLength;
 #endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
 
 #if wxUSE_UNICODE_UTF8
-typedef wxStringInternalBuffer                wxUTF8StringBuffer;
-typedef wxStringInternalBufferLength          wxUTF8StringBufferLength;
+
+// Special implementation of buffer classes for UTF-8 build which exploit the
+// fact that we can write directly to std::string used by wxString, avoiding an
+// extra copy which could be significant for long strings.
+
+namespace wxPrivate
+{
+
+class wxUTF8StringBufferBase
+{
+public:
+    using CharType = char;
+
+    ~wxUTF8StringBufferBase()
+    {
+        // This class works only with UTF-8 strings, so we need to check if the
+        // string has valid contents. Note that it isn't an error if it
+        // doesn't, as it can happen that the function we use this buffer with
+        // (e.g. vsnprintf()) writes something invalid into the provided buffer
+        // in some cases.
+        if ( !wxStringOperations::IsValidUtf8String(m_str.c_str()) )
+            m_str.clear();
+    }
+
+    operator char*() const { return const_cast<char*>(m_str.c_str()); }
+
+protected:
+    explicit wxUTF8StringBufferBase(std::string& str, size_t size)
+        : m_str{str}
+    {
+        m_str.resize(size);
+    }
+
+    std::string& m_str;
+
+    wxDECLARE_NO_COPY_CLASS(wxUTF8StringBufferBase);
+};
+
+} // wxPrivate
+
+class wxUTF8StringBuffer : public wxPrivate::wxUTF8StringBufferBase
+{
+public:
+    wxUTF8StringBuffer(wxString& str, size_t size)
+        : wxPrivate::wxUTF8StringBufferBase{str.m_impl, size}
+    {
+    }
+
+    ~wxUTF8StringBuffer()
+    {
+        // This class works only with NUL-terminated strings, so we need to
+        // resize the string to have the correct length.
+        m_str.resize(strlen(m_str.c_str()));
+    }
+
+private:
+    wxDECLARE_NO_COPY_CLASS(wxUTF8StringBuffer);
+};
+
+class wxUTF8StringBufferLength : public wxPrivate::wxUTF8StringBufferBase
+{
+public:
+    wxUTF8StringBufferLength(wxString& str, size_t size)
+        : wxPrivate::wxUTF8StringBufferBase{str.m_impl, size}
+    {
+    }
+
+    ~wxUTF8StringBufferLength()
+    {
+        wxASSERT_MSG( m_lenSet, "forgot to call SetLength()" );
+
+        m_str.resize(m_len);
+    }
+
+    void SetLength(size_t length) { m_len = length; m_lenSet = true; }
+
+protected:
+    size_t m_len = 0;
+    bool m_lenSet = false;
+
+    wxDECLARE_NO_COPY_CLASS(wxUTF8StringBufferLength);
+};
+
 #else // wxUSE_UNICODE_WCHAR
 
 // Note about inlined dtors in the classes below: this is done not for
diff --git a/src/common/string.cpp b/src/common/string.cpp
index a8706e854b..88668b7802 100644
--- a/src/common/string.cpp
+++ b/src/common/string.cpp
@@ -1899,16 +1899,12 @@ static int DoStringPrintfV(wxString& str,
 
 int wxString::PrintfV(const wxString& format, va_list argptr)
 {
-#if wxUSE_UNICODE_UTF8
-    typedef wxStringTypeBuffer<char> Utf8Buffer;
-#endif
-
 #if wxUSE_UTF8_LOCALE_ONLY
-    return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
+    return DoStringPrintfV<wxUTF8StringBuffer>(*this, format, argptr);
 #else
     #if wxUSE_UNICODE_UTF8
     if ( wxLocaleIsUtf8 )
-        return DoStringPrintfV<Utf8Buffer>(*this, format, argptr);
+        return DoStringPrintfV<wxUTF8StringBuffer>(*this, format, argptr);
     else
         // wxChar* version
         return DoStringPrintfV<wxStringBuffer>(*this, format, argptr);

From f1f612ea1a179948eb34f68fac40584315ff8aaa Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Tue, 28 Mar 2023 18:00:06 +0100
Subject: [PATCH 27/27] Use correct path for the test on AppVeyor CI in debug
 builds

Don't hardcode the path for DLL release configurations.
---
 build/tools/appveyor-test.bat | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/build/tools/appveyor-test.bat b/build/tools/appveyor-test.bat
index b3aa9f60a0..20b4a3ac00 100755
--- a/build/tools/appveyor-test.bat
+++ b/build/tools/appveyor-test.bat
@@ -8,9 +8,12 @@ goto %TOOLSET%
 
 :msbuild
 PATH=C:\projects\wxwidgets\lib\vc_x64_dll;%PATH%
-.\vc_x64_mswudll\test.exe
+if "%CONFIGURATION%"=="DLL Release" set suffix=dll
+if "%CONFIGURATION%"=="DLL Debug" set suffix=ddll
+if "%CONFIGURATION%"=="Debug" set suffix=d
+.\vc_x64_mswu%suffix%\test.exe
 if %errorlevel% NEQ 0 goto :error
-.\vc_x64_mswudll\test_gui.exe
+.\vc_x64_mswu%suffix%\test_gui.exe
 goto :eof
 
 :nmake