From b52728a62ae45a4f88126c5f4f6fafb04534daf5 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Wed, 21 Jun 2023 18:20:38 +0200 Subject: [PATCH 1/9] Fix memory leak of wxClipboard data on exit When wxClipboard is destroyed as part of the program shutdown, gdk_selection_owner_get() doesn't return our clipboard widget as owner any more, so we don't reset the owner when Clear() is called and hence never free the data. Do it explicitly if we don't have clipboard ownership in Clear() any longer to avoid memory leaks -- even though they are mostly harmless (as they happen only once, on exit), they still show up in LSAN and similar tools reports. --- src/gtk/clipbrd.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/gtk/clipbrd.cpp b/src/gtk/clipbrd.cpp index a3be262f7e..91afdcd344 100644 --- a/src/gtk/clipbrd.cpp +++ b/src/gtk/clipbrd.cpp @@ -587,6 +587,15 @@ void wxClipboard::Clear() // it will free our data SetSelectionOwner(false); } + else + { + // We need to free our data directly to avoid leaking memory. + delete m_dataPrimary; + m_dataPrimary = nullptr; + + delete m_dataClipboard; + m_dataClipboard = nullptr; + } m_targetRequested = nullptr; m_formatSupported = false; From 1cf83980a2295b2c35bdd455dab54f57fe5597f9 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Wed, 21 Jun 2023 18:38:27 +0100 Subject: [PATCH 2/9] Refactor wxHTMLDataObject to keep wxMSW-specific parts together Also add a link to the official CF_HTML format documentation. No real changes, just prepare for further ones. --- src/common/dobjcmn.cpp | 116 ++++++++++++++++++++++++++--------------- 1 file changed, 74 insertions(+), 42 deletions(-) diff --git a/src/common/dobjcmn.cpp b/src/common/dobjcmn.cpp index df53042af8..22cb01ddf5 100644 --- a/src/common/dobjcmn.cpp +++ b/src/common/dobjcmn.cpp @@ -429,38 +429,26 @@ bool wxTextDataObject::SetData(size_t len, const void *buf) // wxHTMLDataObject // ---------------------------------------------------------------------------- -size_t wxHTMLDataObject::GetDataSize() const -{ - // Ensure that the temporary string returned by GetHTML() is kept alive for - // as long as we need it here. - const wxString& htmlStr = GetHTML(); - const wxScopedCharBuffer buffer(htmlStr.utf8_str()); - - size_t size = buffer.length(); - #ifdef __WXMSW__ - // On Windows we need to add some stuff to the string to satisfy - // its clipboard format requirements. - size += 400; -#endif - return size; +// Helper functions for MSW CF_HTML format, see MSDN for more information: +// +// https://learn.microsoft.com/en-us/windows/win32/dataxchg/html-clipboard-format +namespace wxMSWClip +{ + +// Return the extra size needed by HTML data in addition to the length of the +// HTML fragment itself. +int GetExtraDataSize() +{ + // This more than covers the extra contents added by FillFromHTML() below. + return 400; } -bool wxHTMLDataObject::GetDataHere(void *buf) const +// Wrap HTML data with the extra information needed by CF_HTML and copy +// everything into the provided buffer assumed to be of sufficient size. +void FillFromHTML(char* buffer, const char* html) { - if ( !buf ) - return false; - - // Windows and Mac always use UTF-8, and docs suggest GTK does as well. - const wxString& htmlStr = GetHTML(); - const wxScopedCharBuffer html(htmlStr.utf8_str()); - if ( !html ) - return false; - - char* const buffer = static_cast(buf); - -#ifdef __WXMSW__ // add the extra info that the MSW clipboard format requires. // Create a template string for the HTML header... @@ -501,24 +489,12 @@ bool wxHTMLDataObject::GetDataHere(void *buf) const ptr = strstr(buffer, "EndFragment"); sprintf(ptr+12, "%08u", (unsigned)(strstr(buffer, "\r\n"); + ""); // Append the HTML... strcat(buffer, html); - strcat(buffer, "\r\n"); + // Finish up the HTML format... strcat(buffer, "\r\n" From 355db874bc290b11401ec55c8c2adca18c185630 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Wed, 21 Jun 2023 20:03:10 +0100 Subject: [PATCH 6/9] Fix offsets in HTML copied to clipboard Notable use the correct value for StartFragment: header which must contain the start of the HTML fragment data and not the start of "<--StartFragment-->" comment. Also make this code simpler and more efficient by remembering the offsets as we're creating the string instead of using strstr() to find them later. --- src/common/dobjcmn.cpp | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/common/dobjcmn.cpp b/src/common/dobjcmn.cpp index a086e3282e..4639fe5ef7 100644 --- a/src/common/dobjcmn.cpp +++ b/src/common/dobjcmn.cpp @@ -469,39 +469,49 @@ void FillFromHTML(char* buffer, const char* html) "StartHTML:00000000\r\n" "EndHTML:00000000\r\n" "StartFragment:00000000\r\n" - "EndFragment:00000000\r\n" + "EndFragment:00000000\r\n"); + + const size_t startHTML = strlen(buffer); + + strcat(buffer, "\r\n" ""); + const size_t startFragment = strlen(buffer); + // Append the HTML... strcat(buffer, html); + const size_t endFragment = strlen(buffer); + // Finish up the HTML format... strcat(buffer, "\r\n" "\r\n" ""); - // Now go back, calculate all the lengths, and write out the - // necessary header information. Note, wsprintf() truncates the - // string when you overwrite it so you follow up with code to replace - // the 0 appended at the end with a '\r'... + const size_t endHTML = strlen(buffer); + + // Now go back and write out the necessary header information. + // + // Note, wsprintf() truncates the string when you overwrite it so you + // follow up with code to replace the 0 appended at the end with a '\r'. const size_t OFFSET_LEN = 8; // All offsets are formatted using 8 digits. char *ptr = strstr(buffer, START_HTML_HEADER); - sprintf(ptr+START_HTML_HEADER_LEN, "%08u", (unsigned)(strstr(buffer, "") - buffer)); + sprintf(ptr+START_HTML_HEADER_LEN, "%08zu", startHTML); *(ptr+START_HTML_HEADER_LEN+OFFSET_LEN) = '\r'; ptr = strstr(buffer, END_HTML_HEADER); - sprintf(ptr+END_HTML_HEADER_LEN, "%08u", (unsigned)strlen(buffer)); + sprintf(ptr+END_HTML_HEADER_LEN, "%08zu", endHTML); *(ptr+END_HTML_HEADER_LEN+OFFSET_LEN) = '\r'; ptr = strstr(buffer, START_FRAGMENT_HEADER); - sprintf(ptr+START_FRAGMENT_HEADER_LEN, "%08u", (unsigned)(strstr(buffer, "" and "" comments which could be wrong (e.g. if a StartFragment comment actually appeared inside the HTML fragment) and less efficient too. Also add a simple pseudo-test, disabled by default, allowing to view the clipboard contents if HTML is available on it. --- src/common/dobjcmn.cpp | 50 ++++++++++++++++++++++++++++------------- tests/misc/guifuncs.cpp | 12 ++++++++++ 2 files changed, 47 insertions(+), 15 deletions(-) diff --git a/src/common/dobjcmn.cpp b/src/common/dobjcmn.cpp index 4639fe5ef7..f6614d8048 100644 --- a/src/common/dobjcmn.cpp +++ b/src/common/dobjcmn.cpp @@ -437,6 +437,9 @@ bool wxTextDataObject::SetData(size_t len, const void *buf) namespace wxMSWClip { +const char* const VERSION_HEADER = "Version:"; +const size_t VERSION_HEADER_LEN = strlen(VERSION_HEADER); + const char* const START_HTML_HEADER = "StartHTML:"; const size_t START_HTML_HEADER_LEN = strlen(START_HTML_HEADER); @@ -515,21 +518,37 @@ void FillFromHTML(char* buffer, const char* html) *(ptr+END_FRAGMENT_HEADER_LEN+OFFSET_LEN) = '\r'; } -// Extract just the HTML fragment part from CF_HTML data, modifying the -// provided string in place. -void ExtractHTML(wxString& html) +// Extract just the HTML fragment part from CF_HTML data. +wxString ExtractHTML(const char* buffer, size_t len) { - int fragmentStart = html.rfind("StartFragment"); - int fragmentEnd = html.rfind("EndFragment"); - - if (fragmentStart != wxNOT_FOUND && fragmentEnd != wxNOT_FOUND) + // Sanity check. + if ( len < VERSION_HEADER_LEN || + wxCRT_StrnicmpA(buffer, VERSION_HEADER, VERSION_HEADER_LEN) != 0 ) { - int startCommentEnd = html.find("-->", fragmentStart) + 3; - int endCommentStart = html.rfind(""); + "" + ; +const size_t CF_HTML_WRAP_START_LEN = strlen(CF_HTML_WRAP_START); - const size_t startFragment = strlen(buffer); - - // Append the HTML... - strcat(buffer, html); - - const size_t endFragment = strlen(buffer); - - // Finish up the HTML format... - strcat(buffer, +const char* const CF_HTML_WRAP_END = "\r\n" "\r\n" - ""); + "" + ; +const size_t CF_HTML_WRAP_END_LEN = strlen(CF_HTML_WRAP_END); - const size_t endHTML = strlen(buffer); + +// Return the extra size needed by HTML data in addition to the length of the +// HTML fragment itself. +int GetExtraDataSize() +{ + // +1 is for the trailing NUL. + return CF_HTML_PREAMBLE_LEN + CF_HTML_WRAP_START_LEN + CF_HTML_WRAP_END_LEN + 1; +} + +// Wrap HTML data with the extra information needed by CF_HTML and copy +// everything into the provided buffer assumed to be of sufficient size. +void FillFromHTML(char* buffer, const char* html, size_t lenHTML) +{ + // add the extra info that the MSW clipboard format requires. + + // Create a template string for the HTML header... + strcpy(buffer, CF_HTML_PREAMBLE); + const size_t startHTML = CF_HTML_PREAMBLE_LEN; + + strcat(buffer, CF_HTML_WRAP_START); + const size_t startFragment = startHTML + CF_HTML_WRAP_START_LEN; + + // Append the HTML... + strncat(buffer, html, lenHTML); + const size_t endFragment = startFragment + lenHTML; + + // Finish up the HTML format... + strcat(buffer, CF_HTML_WRAP_END); + const size_t endHTML = endFragment + CF_HTML_WRAP_END_LEN; // Now go back and write out the necessary header information. // @@ -585,7 +594,7 @@ bool wxHTMLDataObject::GetDataHere(void *buf) const char* const buffer = static_cast(buf); #ifdef __WXMSW__ - wxMSWClip::FillFromHTML(buffer, html); + wxMSWClip::FillFromHTML(buffer, html, html.length()); #else memcpy(buffer, html, html.length()); #endif // __WXMSW__ From 72c5691d2f318997f9d99eefcfcdce33250cbfa0 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Wed, 21 Jun 2023 16:30:24 +0200 Subject: [PATCH 9/9] Fix buffer overrun in wxHTMLDataObject under non-MSW platforms Using strcpy() in GetDataHere() added an extra NUL at the end which didn't fit into the buffer of the size returned by GetDataSize(). This could have been also fixed by returning an extra byte from the latter function, but as the string doesn't need to be NUL-terminated, apparently, just use memcpy() with the correct number of bytes instead. Also, because the string is not necessarily NUL-terminated, use the provided length in wxHTMLDataObject::SetData() instead of relying on the buffer being NUL-terminated and reading uninitialized memory beyond its size. Add a unit test confirming that there are no more ASAN errors when using this class. Closes #23660. Co-Authored-By: mcorino --- tests/misc/guifuncs.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/misc/guifuncs.cpp b/tests/misc/guifuncs.cpp index ba4c8d6246..22a736636b 100644 --- a/tests/misc/guifuncs.cpp +++ b/tests/misc/guifuncs.cpp @@ -92,6 +92,22 @@ TEST_CASE("GUI::URLDataObject", "[guifuncs][clipboard]") CHECK( dobj2.GetURL() == url ); } +TEST_CASE("GUI::HTMLDataObject", "[guifuncs][clipboard]") +{ + const wxString text("

Hello clipboard!

"); + + wxHTMLDataObject* const dobj = new wxHTMLDataObject(text); + CHECK( dobj->GetHTML() == text ); + + wxClipboardLocker lockClip; + CHECK( wxTheClipboard->SetData(dobj) ); + wxTheClipboard->Flush(); + + wxHTMLDataObject dobj2; + REQUIRE( wxTheClipboard->GetData(dobj2) ); + CHECK( dobj2.GetHTML() == text ); +} + // This disabled by default test allows to check that we retrieve HTML data // from the system clipboard correctly. TEST_CASE("GUI::ShowHTML", "[.]")