From b52728a62ae45a4f88126c5f4f6fafb04534daf5 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Wed, 21 Jun 2023 18:20:38 +0200
Subject: [PATCH 1/9] Fix memory leak of wxClipboard data on exit

When wxClipboard is destroyed as part of the program shutdown,
gdk_selection_owner_get() doesn't return our clipboard widget as owner
any more, so we don't reset the owner when Clear() is called and hence
never free the data.

Do it explicitly if we don't have clipboard ownership in Clear() any
longer to avoid memory leaks -- even though they are mostly harmless (as
they happen only once, on exit), they still show up in LSAN and similar
tools reports.
---
 src/gtk/clipbrd.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/gtk/clipbrd.cpp b/src/gtk/clipbrd.cpp
index a3be262f7e..91afdcd344 100644
--- a/src/gtk/clipbrd.cpp
+++ b/src/gtk/clipbrd.cpp
@@ -587,6 +587,15 @@ void wxClipboard::Clear()
         // it will free our data
         SetSelectionOwner(false);
     }
+    else
+    {
+        // We need to free our data directly to avoid leaking memory.
+        delete m_dataPrimary;
+        m_dataPrimary = nullptr;
+
+        delete m_dataClipboard;
+        m_dataClipboard = nullptr;
+    }
 
     m_targetRequested = nullptr;
     m_formatSupported = false;

From 1cf83980a2295b2c35bdd455dab54f57fe5597f9 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Wed, 21 Jun 2023 18:38:27 +0100
Subject: [PATCH 2/9] Refactor wxHTMLDataObject to keep wxMSW-specific parts
 together

Also add a link to the official CF_HTML format documentation.

No real changes, just prepare for further ones.
---
 src/common/dobjcmn.cpp | 116 ++++++++++++++++++++++++++---------------
 1 file changed, 74 insertions(+), 42 deletions(-)

diff --git a/src/common/dobjcmn.cpp b/src/common/dobjcmn.cpp
index df53042af8..22cb01ddf5 100644
--- a/src/common/dobjcmn.cpp
+++ b/src/common/dobjcmn.cpp
@@ -429,38 +429,26 @@ bool wxTextDataObject::SetData(size_t len, const void *buf)
 // wxHTMLDataObject
 // ----------------------------------------------------------------------------
 
-size_t wxHTMLDataObject::GetDataSize() const
-{
-    // Ensure that the temporary string returned by GetHTML() is kept alive for
-    // as long as we need it here.
-    const wxString& htmlStr = GetHTML();
-    const wxScopedCharBuffer buffer(htmlStr.utf8_str());
-
-    size_t size = buffer.length();
-
 #ifdef __WXMSW__
-    // On Windows we need to add some stuff to the string to satisfy
-    // its clipboard format requirements.
-    size += 400;
-#endif
 
-    return size;
+// Helper functions for MSW CF_HTML format, see MSDN for more information:
+//
+// https://learn.microsoft.com/en-us/windows/win32/dataxchg/html-clipboard-format
+namespace wxMSWClip
+{
+
+// Return the extra size needed by HTML data in addition to the length of the
+// HTML fragment itself.
+int GetExtraDataSize()
+{
+    // This more than covers the extra contents added by FillFromHTML() below.
+    return 400;
 }
 
-bool wxHTMLDataObject::GetDataHere(void *buf) const
+// Wrap HTML data with the extra information needed by CF_HTML and copy
+// everything into the provided buffer assumed to be of sufficient size.
+void FillFromHTML(char* buffer, const char* html)
 {
-    if ( !buf )
-        return false;
-
-    // Windows and Mac always use UTF-8, and docs suggest GTK does as well.
-    const wxString& htmlStr = GetHTML();
-    const wxScopedCharBuffer html(htmlStr.utf8_str());
-    if ( !html )
-        return false;
-
-    char* const buffer = static_cast<char*>(buf);
-
-#ifdef __WXMSW__
     // add the extra info that the MSW clipboard format requires.
 
         // Create a template string for the HTML header...
@@ -501,24 +489,12 @@ bool wxHTMLDataObject::GetDataHere(void *buf) const
     ptr = strstr(buffer, "EndFragment");
     sprintf(ptr+12, "%08u", (unsigned)(strstr(buffer, "<!--EndFrag") - buffer));
     *(ptr+12+8) = '\r';
-#else
-    strcpy(buffer, html);
-#endif // __WXMSW__
-
-    return true;
 }
 
-bool wxHTMLDataObject::SetData(size_t WXUNUSED(len), const void *buf)
+// Extract just the HTML fragment part from CF_HTML data, modifying the
+// provided string in place.
+void ExtractHTML(wxString& html)
 {
-    if ( buf == nullptr )
-        return false;
-
-    // Windows and Mac always use UTF-8, and docs suggest GTK does as well.
-    wxString html = wxString::FromUTF8(static_cast<const char*>(buf));
-
-#ifdef __WXMSW__
-    // To be consistent with other platforms, we only add the Fragment part
-    // of the Windows HTML clipboard format to the data object.
     int fragmentStart = html.rfind("StartFragment");
     int fragmentEnd = html.rfind("EndFragment");
 
@@ -530,6 +506,62 @@ bool wxHTMLDataObject::SetData(size_t WXUNUSED(len), const void *buf)
         if (startCommentEnd != wxNOT_FOUND && endCommentStart != wxNOT_FOUND)
             html = html.Mid(startCommentEnd, endCommentStart - startCommentEnd);
     }
+}
+
+} // anonymous namespace
+
+#endif // __WXMSW__
+
+size_t wxHTMLDataObject::GetDataSize() const
+{
+    // Ensure that the temporary string returned by GetHTML() is kept alive for
+    // as long as we need it here.
+    const wxString& htmlStr = GetHTML();
+    const wxScopedCharBuffer buffer(htmlStr.utf8_str());
+
+    size_t size = buffer.length();
+
+#ifdef __WXMSW__
+    size += wxMSWClip::GetExtraDataSize();
+#endif
+
+    return size;
+}
+
+bool wxHTMLDataObject::GetDataHere(void *buf) const
+{
+    if ( !buf )
+        return false;
+
+    // Windows and Mac always use UTF-8, and docs suggest GTK does as well.
+    const wxString& htmlStr = GetHTML();
+    const wxScopedCharBuffer html(htmlStr.utf8_str());
+    if ( !html )
+        return false;
+
+    char* const buffer = static_cast<char*>(buf);
+
+#ifdef __WXMSW__
+    wxMSWClip::FillFromHTML(buffer, html);
+#else
+    memcpy(buffer, html, html.length());
+#endif // __WXMSW__
+
+    return true;
+}
+
+bool wxHTMLDataObject::SetData(size_t len, const void *buf)
+{
+    if ( buf == nullptr )
+        return false;
+
+    // Windows and Mac always use UTF-8, and docs suggest GTK does as well.
+    wxString html = wxString::FromUTF8(static_cast<const char*>(buf), len);
+
+#ifdef __WXMSW__
+    // To be consistent with other platforms, we only add the Fragment part
+    // of the Windows HTML clipboard format to the data object.
+    wxMSWClip::ExtractHTML(html);
 #endif // __WXMSW__
 
     SetHTML( html );

From 649843a646b01877c2d7dd80c048b30f60d14455 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Wed, 21 Jun 2023 19:03:28 +0100
Subject: [PATCH 3/9] Use symbolic constants in CF_HTML-related code

No real changes, just use constants instead of a bunch of hardcoded
strings and numbers.
---
 src/common/dobjcmn.cpp | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/src/common/dobjcmn.cpp b/src/common/dobjcmn.cpp
index 22cb01ddf5..f8853499a4 100644
--- a/src/common/dobjcmn.cpp
+++ b/src/common/dobjcmn.cpp
@@ -437,6 +437,18 @@ bool wxTextDataObject::SetData(size_t len, const void *buf)
 namespace wxMSWClip
 {
 
+const char* const START_HTML_HEADER = "StartHTML:";
+const size_t START_HTML_HEADER_LEN = strlen(START_HTML_HEADER);
+
+const char* const END_HTML_HEADER = "EndHTML:";
+const size_t END_HTML_HEADER_LEN = strlen(END_HTML_HEADER);
+
+const char* const START_FRAGMENT_HEADER = "StartFragment:";
+const size_t START_FRAGMENT_HEADER_LEN = strlen(START_FRAGMENT_HEADER);
+
+const char* const END_FRAGMENT_HEADER = "EndFragment:";
+const size_t END_FRAGMENT_HEADER_LEN = strlen(END_FRAGMENT_HEADER);
+
 // Return the extra size needed by HTML data in addition to the length of the
 // HTML fragment itself.
 int GetExtraDataSize()
@@ -474,21 +486,21 @@ void FillFromHTML(char* buffer, const char* html)
     // necessary header information. Note, wsprintf() truncates the
     // string when you overwrite it so you follow up with code to replace
     // the 0 appended at the end with a '\r'...
-    char *ptr = strstr(buffer, "StartHTML");
-    sprintf(ptr+10, "%08u", (unsigned)(strstr(buffer, "<html>") - buffer));
-    *(ptr+10+8) = '\r';
+    char *ptr = strstr(buffer, START_HTML_HEADER);
+    sprintf(ptr+START_HTML_HEADER_LEN, "%08u", (unsigned)(strstr(buffer, "<html>") - buffer));
+    *(ptr+START_HTML_HEADER_LEN+8) = '\r';
 
-    ptr = strstr(buffer, "EndHTML");
-    sprintf(ptr+8, "%08u", (unsigned)strlen(buffer));
-    *(ptr+8+8) = '\r';
+    ptr = strstr(buffer, END_HTML_HEADER);
+    sprintf(ptr+END_HTML_HEADER_LEN, "%08u", (unsigned)strlen(buffer));
+    *(ptr+END_HTML_HEADER_LEN+8) = '\r';
 
-    ptr = strstr(buffer, "StartFragment");
-    sprintf(ptr+14, "%08u", (unsigned)(strstr(buffer, "<!--StartFrag") - buffer));
-    *(ptr+14+8) = '\r';
+    ptr = strstr(buffer, START_FRAGMENT_HEADER);
+    sprintf(ptr+START_FRAGMENT_HEADER_LEN, "%08u", (unsigned)(strstr(buffer, "<!--StartFrag") - buffer));
+    *(ptr+START_FRAGMENT_HEADER_LEN+8) = '\r';
 
-    ptr = strstr(buffer, "EndFragment");
-    sprintf(ptr+12, "%08u", (unsigned)(strstr(buffer, "<!--EndFrag") - buffer));
-    *(ptr+12+8) = '\r';
+    ptr = strstr(buffer, END_FRAGMENT_HEADER);
+    sprintf(ptr+END_FRAGMENT_HEADER_LEN, "%08u", (unsigned)(strstr(buffer, "<!--EndFrag") - buffer));
+    *(ptr+END_FRAGMENT_HEADER_LEN+8) = '\r';
 }
 
 // Extract just the HTML fragment part from CF_HTML data, modifying the

From ede59ecb16f3b994dc3bce1ec055bfeed933c12f Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Wed, 21 Jun 2023 19:06:01 +0100
Subject: [PATCH 4/9] Add a symbolic constant for the offsets lengths too

Get rid of the last hardcoded number in CF_HTML code.
---
 src/common/dobjcmn.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/common/dobjcmn.cpp b/src/common/dobjcmn.cpp
index f8853499a4..69beb10814 100644
--- a/src/common/dobjcmn.cpp
+++ b/src/common/dobjcmn.cpp
@@ -486,21 +486,23 @@ void FillFromHTML(char* buffer, const char* html)
     // necessary header information. Note, wsprintf() truncates the
     // string when you overwrite it so you follow up with code to replace
     // the 0 appended at the end with a '\r'...
+    const size_t OFFSET_LEN = 8; // All offsets are formatted using 8 digits.
+
     char *ptr = strstr(buffer, START_HTML_HEADER);
     sprintf(ptr+START_HTML_HEADER_LEN, "%08u", (unsigned)(strstr(buffer, "<html>") - buffer));
-    *(ptr+START_HTML_HEADER_LEN+8) = '\r';
+    *(ptr+START_HTML_HEADER_LEN+OFFSET_LEN) = '\r';
 
     ptr = strstr(buffer, END_HTML_HEADER);
     sprintf(ptr+END_HTML_HEADER_LEN, "%08u", (unsigned)strlen(buffer));
-    *(ptr+END_HTML_HEADER_LEN+8) = '\r';
+    *(ptr+END_HTML_HEADER_LEN+OFFSET_LEN) = '\r';
 
     ptr = strstr(buffer, START_FRAGMENT_HEADER);
     sprintf(ptr+START_FRAGMENT_HEADER_LEN, "%08u", (unsigned)(strstr(buffer, "<!--StartFrag") - buffer));
-    *(ptr+START_FRAGMENT_HEADER_LEN+8) = '\r';
+    *(ptr+START_FRAGMENT_HEADER_LEN+OFFSET_LEN) = '\r';
 
     ptr = strstr(buffer, END_FRAGMENT_HEADER);
     sprintf(ptr+END_FRAGMENT_HEADER_LEN, "%08u", (unsigned)(strstr(buffer, "<!--EndFrag") - buffer));
-    *(ptr+END_FRAGMENT_HEADER_LEN+8) = '\r';
+    *(ptr+END_FRAGMENT_HEADER_LEN+OFFSET_LEN) = '\r';
 }
 
 // Extract just the HTML fragment part from CF_HTML data, modifying the

From 93729d88755d63c8ccaa5ec282c4df9fe7ae341d Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Wed, 21 Jun 2023 19:50:46 +0100
Subject: [PATCH 5/9] Remove extra new lines from HTML copied to clipboard

No other program does it and having these extra new lines means that our
own GetData() returns extra new lines compared to the value passed to
SetData().
---
 src/common/dobjcmn.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/common/dobjcmn.cpp b/src/common/dobjcmn.cpp
index 69beb10814..a086e3282e 100644
--- a/src/common/dobjcmn.cpp
+++ b/src/common/dobjcmn.cpp
@@ -471,11 +471,11 @@ void FillFromHTML(char* buffer, const char* html)
         "StartFragment:00000000\r\n"
         "EndFragment:00000000\r\n"
         "<html><body>\r\n"
-        "<!--StartFragment -->\r\n");
+        "<!--StartFragment -->");
 
     // Append the HTML...
     strcat(buffer, html);
-    strcat(buffer, "\r\n");
+
     // Finish up the HTML format...
     strcat(buffer,
         "<!--EndFragment-->\r\n"

From 355db874bc290b11401ec55c8c2adca18c185630 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Wed, 21 Jun 2023 20:03:10 +0100
Subject: [PATCH 6/9] Fix offsets in HTML copied to clipboard

Notable use the correct value for StartFragment: header which must
contain the start of the HTML fragment data and not the start of
"<--StartFragment-->" comment.

Also make this code simpler and more efficient by remembering the
offsets as we're creating the string instead of using strstr() to find
them later.
---
 src/common/dobjcmn.cpp | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/common/dobjcmn.cpp b/src/common/dobjcmn.cpp
index a086e3282e..4639fe5ef7 100644
--- a/src/common/dobjcmn.cpp
+++ b/src/common/dobjcmn.cpp
@@ -469,39 +469,49 @@ void FillFromHTML(char* buffer, const char* html)
         "StartHTML:00000000\r\n"
         "EndHTML:00000000\r\n"
         "StartFragment:00000000\r\n"
-        "EndFragment:00000000\r\n"
+        "EndFragment:00000000\r\n");
+
+    const size_t startHTML = strlen(buffer);
+
+    strcat(buffer,
         "<html><body>\r\n"
         "<!--StartFragment -->");
 
+    const size_t startFragment = strlen(buffer);
+
     // Append the HTML...
     strcat(buffer, html);
 
+    const size_t endFragment = strlen(buffer);
+
     // Finish up the HTML format...
     strcat(buffer,
         "<!--EndFragment-->\r\n"
         "</body>\r\n"
         "</html>");
 
-    // Now go back, calculate all the lengths, and write out the
-    // necessary header information. Note, wsprintf() truncates the
-    // string when you overwrite it so you follow up with code to replace
-    // the 0 appended at the end with a '\r'...
+    const size_t endHTML = strlen(buffer);
+
+    // Now go back and write out the necessary header information.
+    //
+    // Note, wsprintf() truncates the string when you overwrite it so you
+    // follow up with code to replace the 0 appended at the end with a '\r'.
     const size_t OFFSET_LEN = 8; // All offsets are formatted using 8 digits.
 
     char *ptr = strstr(buffer, START_HTML_HEADER);
-    sprintf(ptr+START_HTML_HEADER_LEN, "%08u", (unsigned)(strstr(buffer, "<html>") - buffer));
+    sprintf(ptr+START_HTML_HEADER_LEN, "%08zu", startHTML);
     *(ptr+START_HTML_HEADER_LEN+OFFSET_LEN) = '\r';
 
     ptr = strstr(buffer, END_HTML_HEADER);
-    sprintf(ptr+END_HTML_HEADER_LEN, "%08u", (unsigned)strlen(buffer));
+    sprintf(ptr+END_HTML_HEADER_LEN, "%08zu", endHTML);
     *(ptr+END_HTML_HEADER_LEN+OFFSET_LEN) = '\r';
 
     ptr = strstr(buffer, START_FRAGMENT_HEADER);
-    sprintf(ptr+START_FRAGMENT_HEADER_LEN, "%08u", (unsigned)(strstr(buffer, "<!--StartFrag") - buffer));
+    sprintf(ptr+START_FRAGMENT_HEADER_LEN, "%08zu", startFragment);
     *(ptr+START_FRAGMENT_HEADER_LEN+OFFSET_LEN) = '\r';
 
     ptr = strstr(buffer, END_FRAGMENT_HEADER);
-    sprintf(ptr+END_FRAGMENT_HEADER_LEN, "%08u", (unsigned)(strstr(buffer, "<!--EndFrag") - buffer));
+    sprintf(ptr+END_FRAGMENT_HEADER_LEN, "%08zu", endFragment);
     *(ptr+END_FRAGMENT_HEADER_LEN+OFFSET_LEN) = '\r';
 }
 

From c7d414bbed45fc11f04d4a7c417420d7a131ad8a Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Wed, 21 Jun 2023 20:10:13 +0100
Subject: [PATCH 7/9] Make HTML pasting code more robust and efficient

Use StartFragment and EndFragment headers values in order to extract the
HTML fragment from the entire CF_HTML string, instead of searching for
"<!--StartFragment-->" and "<!--EndFragment-->" comments which could be
wrong (e.g. if a StartFragment comment actually appeared inside the HTML
fragment) and less efficient too.

Also add a simple pseudo-test, disabled by default, allowing to view the
clipboard contents if HTML is available on it.
---
 src/common/dobjcmn.cpp  | 50 ++++++++++++++++++++++++++++-------------
 tests/misc/guifuncs.cpp | 12 ++++++++++
 2 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/src/common/dobjcmn.cpp b/src/common/dobjcmn.cpp
index 4639fe5ef7..f6614d8048 100644
--- a/src/common/dobjcmn.cpp
+++ b/src/common/dobjcmn.cpp
@@ -437,6 +437,9 @@ bool wxTextDataObject::SetData(size_t len, const void *buf)
 namespace wxMSWClip
 {
 
+const char* const VERSION_HEADER = "Version:";
+const size_t VERSION_HEADER_LEN = strlen(VERSION_HEADER);
+
 const char* const START_HTML_HEADER = "StartHTML:";
 const size_t START_HTML_HEADER_LEN = strlen(START_HTML_HEADER);
 
@@ -515,21 +518,37 @@ void FillFromHTML(char* buffer, const char* html)
     *(ptr+END_FRAGMENT_HEADER_LEN+OFFSET_LEN) = '\r';
 }
 
-// Extract just the HTML fragment part from CF_HTML data, modifying the
-// provided string in place.
-void ExtractHTML(wxString& html)
+// Extract just the HTML fragment part from CF_HTML data.
+wxString ExtractHTML(const char* buffer, size_t len)
 {
-    int fragmentStart = html.rfind("StartFragment");
-    int fragmentEnd = html.rfind("EndFragment");
-
-    if (fragmentStart != wxNOT_FOUND && fragmentEnd != wxNOT_FOUND)
+    // Sanity check.
+    if ( len < VERSION_HEADER_LEN ||
+            wxCRT_StrnicmpA(buffer, VERSION_HEADER, VERSION_HEADER_LEN) != 0 )
     {
-        int startCommentEnd = html.find("-->", fragmentStart) + 3;
-        int endCommentStart = html.rfind("<!--", fragmentEnd);
-
-        if (startCommentEnd != wxNOT_FOUND && endCommentStart != wxNOT_FOUND)
-            html = html.Mid(startCommentEnd, endCommentStart - startCommentEnd);
+        // This doesn't look like CF_HTML at all, don't do anything.
+        return wxString();
     }
+
+    const char* ptr = strstr(buffer, START_FRAGMENT_HEADER);
+    if ( !ptr )
+        return wxString();
+
+    ptr += START_FRAGMENT_HEADER_LEN;
+
+    const int start = atoi(ptr);
+    if ( start < 0 || (unsigned)start >= len )
+        return wxString();
+
+    ptr = strstr(ptr, END_FRAGMENT_HEADER);
+    if ( !ptr )
+        return wxString();
+
+    ptr += END_FRAGMENT_HEADER_LEN;
+    const int end = atoi(ptr);
+    if ( end < 0 || end < start || (unsigned)end >= len )
+        return wxString();
+
+    return wxString::FromUTF8(buffer + start, end - start);
 }
 
 } // anonymous namespace
@@ -579,13 +598,14 @@ bool wxHTMLDataObject::SetData(size_t len, const void *buf)
     if ( buf == nullptr )
         return false;
 
-    // Windows and Mac always use UTF-8, and docs suggest GTK does as well.
-    wxString html = wxString::FromUTF8(static_cast<const char*>(buf), len);
+    const char* const buffer = static_cast<const char*>(buf);
 
 #ifdef __WXMSW__
     // To be consistent with other platforms, we only add the Fragment part
     // of the Windows HTML clipboard format to the data object.
-    wxMSWClip::ExtractHTML(html);
+    wxString html = wxMSWClip::ExtractHTML(buffer, len);
+#else
+    wxString html = wxString::FromUTF8(buffer, len);
 #endif // __WXMSW__
 
     SetHTML( html );
diff --git a/tests/misc/guifuncs.cpp b/tests/misc/guifuncs.cpp
index ed6016f5c9..ba4c8d6246 100644
--- a/tests/misc/guifuncs.cpp
+++ b/tests/misc/guifuncs.cpp
@@ -92,6 +92,18 @@ TEST_CASE("GUI::URLDataObject", "[guifuncs][clipboard]")
     CHECK( dobj2.GetURL() == url );
 }
 
+// This disabled by default test allows to check that we retrieve HTML data
+// from the system clipboard correctly.
+TEST_CASE("GUI::ShowHTML", "[.]")
+{
+    wxClipboardLocker lockClip;
+
+    wxHTMLDataObject dobj;
+    REQUIRE( wxTheClipboard->GetData(dobj) );
+
+    WARN("Clipboard contents:\n---start---\n" << dobj.GetHTML() << "\n---end--");
+}
+
 TEST_CASE("GUI::DataFormatCompare", "[guifuncs][dataformat]")
 {
     const wxDataFormat df(wxDF_TEXT);

From e4bec185051cd17db06f463b7e7639e11e10ab77 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Wed, 21 Jun 2023 20:19:08 +0100
Subject: [PATCH 8/9] Compute exact length needed for CF_HTML contents

Instead of just adding 400 and leaving unused and uninitialized data in
the clipboard data, compute exactly the size of the data we need.

Also replace multiple strlen() calls that are not really needed because
we already know the various lengths.

Still NUL-terminate clipboard data because everybody else does it, even
if it's not totally clear if it is required.
---
 src/common/dobjcmn.cpp | 75 +++++++++++++++++++++++-------------------
 1 file changed, 42 insertions(+), 33 deletions(-)

diff --git a/src/common/dobjcmn.cpp b/src/common/dobjcmn.cpp
index f6614d8048..04d058dbf2 100644
--- a/src/common/dobjcmn.cpp
+++ b/src/common/dobjcmn.cpp
@@ -452,48 +452,57 @@ const size_t START_FRAGMENT_HEADER_LEN = strlen(START_FRAGMENT_HEADER);
 const char* const END_FRAGMENT_HEADER = "EndFragment:";
 const size_t END_FRAGMENT_HEADER_LEN = strlen(END_FRAGMENT_HEADER);
 
-// Return the extra size needed by HTML data in addition to the length of the
-// HTML fragment itself.
-int GetExtraDataSize()
-{
-    // This more than covers the extra contents added by FillFromHTML() below.
-    return 400;
-}
-
-// Wrap HTML data with the extra information needed by CF_HTML and copy
-// everything into the provided buffer assumed to be of sufficient size.
-void FillFromHTML(char* buffer, const char* html)
-{
-    // add the extra info that the MSW clipboard format requires.
-
-        // Create a template string for the HTML header...
-    strcpy(buffer,
+const char* const CF_HTML_PREAMBLE =
         "Version:0.9\r\n"
         "StartHTML:00000000\r\n"
         "EndHTML:00000000\r\n"
         "StartFragment:00000000\r\n"
-        "EndFragment:00000000\r\n");
+        "EndFragment:00000000\r\n"
+        ;
+const size_t CF_HTML_PREAMBLE_LEN = strlen(CF_HTML_PREAMBLE);
 
-    const size_t startHTML = strlen(buffer);
-
-    strcat(buffer,
+const char* const CF_HTML_WRAP_START =
         "<html><body>\r\n"
-        "<!--StartFragment -->");
+        "<!--StartFragment -->"
+        ;
+const size_t CF_HTML_WRAP_START_LEN = strlen(CF_HTML_WRAP_START);
 
-    const size_t startFragment = strlen(buffer);
-
-    // Append the HTML...
-    strcat(buffer, html);
-
-    const size_t endFragment = strlen(buffer);
-
-    // Finish up the HTML format...
-    strcat(buffer,
+const char* const CF_HTML_WRAP_END =
         "<!--EndFragment-->\r\n"
         "</body>\r\n"
-        "</html>");
+        "</html>"
+        ;
+const size_t CF_HTML_WRAP_END_LEN = strlen(CF_HTML_WRAP_END);
 
-    const size_t endHTML = strlen(buffer);
+
+// Return the extra size needed by HTML data in addition to the length of the
+// HTML fragment itself.
+int GetExtraDataSize()
+{
+    // +1 is for the trailing NUL.
+    return CF_HTML_PREAMBLE_LEN + CF_HTML_WRAP_START_LEN + CF_HTML_WRAP_END_LEN + 1;
+}
+
+// Wrap HTML data with the extra information needed by CF_HTML and copy
+// everything into the provided buffer assumed to be of sufficient size.
+void FillFromHTML(char* buffer, const char* html, size_t lenHTML)
+{
+    // add the extra info that the MSW clipboard format requires.
+
+        // Create a template string for the HTML header...
+    strcpy(buffer, CF_HTML_PREAMBLE);
+    const size_t startHTML = CF_HTML_PREAMBLE_LEN;
+
+    strcat(buffer, CF_HTML_WRAP_START);
+    const size_t startFragment = startHTML + CF_HTML_WRAP_START_LEN;
+
+    // Append the HTML...
+    strncat(buffer, html, lenHTML);
+    const size_t endFragment = startFragment + lenHTML;
+
+    // Finish up the HTML format...
+    strcat(buffer, CF_HTML_WRAP_END);
+    const size_t endHTML = endFragment + CF_HTML_WRAP_END_LEN;
 
     // Now go back and write out the necessary header information.
     //
@@ -585,7 +594,7 @@ bool wxHTMLDataObject::GetDataHere(void *buf) const
     char* const buffer = static_cast<char*>(buf);
 
 #ifdef __WXMSW__
-    wxMSWClip::FillFromHTML(buffer, html);
+    wxMSWClip::FillFromHTML(buffer, html, html.length());
 #else
     memcpy(buffer, html, html.length());
 #endif // __WXMSW__

From 72c5691d2f318997f9d99eefcfcdce33250cbfa0 Mon Sep 17 00:00:00 2001
From: Vadim Zeitlin <vadim@wxwidgets.org>
Date: Wed, 21 Jun 2023 16:30:24 +0200
Subject: [PATCH 9/9] Fix buffer overrun in wxHTMLDataObject under non-MSW
 platforms

Using strcpy() in GetDataHere() added an extra NUL at the end which
didn't fit into the buffer of the size returned by GetDataSize(). This
could have been also fixed by returning an extra byte from the latter
function, but as the string doesn't need to be NUL-terminated,
apparently, just use memcpy() with the correct number of bytes instead.

Also, because the string is not necessarily NUL-terminated, use the
provided length in wxHTMLDataObject::SetData() instead of relying on the
buffer being NUL-terminated and reading uninitialized memory beyond its
size.

Add a unit test confirming that there are no more ASAN errors when using
this class.

Closes #23660.

Co-Authored-By: mcorino <martin@corino.nl>
---
 tests/misc/guifuncs.cpp | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tests/misc/guifuncs.cpp b/tests/misc/guifuncs.cpp
index ba4c8d6246..22a736636b 100644
--- a/tests/misc/guifuncs.cpp
+++ b/tests/misc/guifuncs.cpp
@@ -92,6 +92,22 @@ TEST_CASE("GUI::URLDataObject", "[guifuncs][clipboard]")
     CHECK( dobj2.GetURL() == url );
 }
 
+TEST_CASE("GUI::HTMLDataObject", "[guifuncs][clipboard]")
+{
+    const wxString text("<h1>Hello clipboard!</h1>");
+
+    wxHTMLDataObject* const dobj = new wxHTMLDataObject(text);
+    CHECK( dobj->GetHTML() == text );
+
+    wxClipboardLocker lockClip;
+    CHECK( wxTheClipboard->SetData(dobj) );
+    wxTheClipboard->Flush();
+
+    wxHTMLDataObject dobj2;
+    REQUIRE( wxTheClipboard->GetData(dobj2) );
+    CHECK( dobj2.GetHTML() == text );
+}
+
 // This disabled by default test allows to check that we retrieve HTML data
 // from the system clipboard correctly.
 TEST_CASE("GUI::ShowHTML", "[.]")