Make HTML pasting code more robust and efficient

Use StartFragment and EndFragment headers values in order to extract the
HTML fragment from the entire CF_HTML string, instead of searching for
"<!--StartFragment-->" and "<!--EndFragment-->" comments which could be
wrong (e.g. if a StartFragment comment actually appeared inside the HTML
fragment) and less efficient too.

Also add a simple pseudo-test, disabled by default, allowing to view the
clipboard contents if HTML is available on it.
This commit is contained in:
Vadim Zeitlin 2023-06-21 20:10:13 +01:00
parent 355db874bc
commit c7d414bbed
2 changed files with 47 additions and 15 deletions

View file

@ -437,6 +437,9 @@ bool wxTextDataObject::SetData(size_t len, const void *buf)
namespace wxMSWClip
{
const char* const VERSION_HEADER = "Version:";
const size_t VERSION_HEADER_LEN = strlen(VERSION_HEADER);
const char* const START_HTML_HEADER = "StartHTML:";
const size_t START_HTML_HEADER_LEN = strlen(START_HTML_HEADER);
@ -515,21 +518,37 @@ void FillFromHTML(char* buffer, const char* html)
*(ptr+END_FRAGMENT_HEADER_LEN+OFFSET_LEN) = '\r';
}
// Extract just the HTML fragment part from CF_HTML data, modifying the
// provided string in place.
void ExtractHTML(wxString& html)
// Extract just the HTML fragment part from CF_HTML data.
wxString ExtractHTML(const char* buffer, size_t len)
{
int fragmentStart = html.rfind("StartFragment");
int fragmentEnd = html.rfind("EndFragment");
if (fragmentStart != wxNOT_FOUND && fragmentEnd != wxNOT_FOUND)
// Sanity check.
if ( len < VERSION_HEADER_LEN ||
wxCRT_StrnicmpA(buffer, VERSION_HEADER, VERSION_HEADER_LEN) != 0 )
{
int startCommentEnd = html.find("-->", fragmentStart) + 3;
int endCommentStart = html.rfind("<!--", fragmentEnd);
if (startCommentEnd != wxNOT_FOUND && endCommentStart != wxNOT_FOUND)
html = html.Mid(startCommentEnd, endCommentStart - startCommentEnd);
// This doesn't look like CF_HTML at all, don't do anything.
return wxString();
}
const char* ptr = strstr(buffer, START_FRAGMENT_HEADER);
if ( !ptr )
return wxString();
ptr += START_FRAGMENT_HEADER_LEN;
const int start = atoi(ptr);
if ( start < 0 || (unsigned)start >= len )
return wxString();
ptr = strstr(ptr, END_FRAGMENT_HEADER);
if ( !ptr )
return wxString();
ptr += END_FRAGMENT_HEADER_LEN;
const int end = atoi(ptr);
if ( end < 0 || end < start || (unsigned)end >= len )
return wxString();
return wxString::FromUTF8(buffer + start, end - start);
}
} // anonymous namespace
@ -579,13 +598,14 @@ bool wxHTMLDataObject::SetData(size_t len, const void *buf)
if ( buf == nullptr )
return false;
// Windows and Mac always use UTF-8, and docs suggest GTK does as well.
wxString html = wxString::FromUTF8(static_cast<const char*>(buf), len);
const char* const buffer = static_cast<const char*>(buf);
#ifdef __WXMSW__
// To be consistent with other platforms, we only add the Fragment part
// of the Windows HTML clipboard format to the data object.
wxMSWClip::ExtractHTML(html);
wxString html = wxMSWClip::ExtractHTML(buffer, len);
#else
wxString html = wxString::FromUTF8(buffer, len);
#endif // __WXMSW__
SetHTML( html );

View file

@ -92,6 +92,18 @@ TEST_CASE("GUI::URLDataObject", "[guifuncs][clipboard]")
CHECK( dobj2.GetURL() == url );
}
// This disabled by default test allows to check that we retrieve HTML data
// from the system clipboard correctly.
TEST_CASE("GUI::ShowHTML", "[.]")
{
wxClipboardLocker lockClip;
wxHTMLDataObject dobj;
REQUIRE( wxTheClipboard->GetData(dobj) );
WARN("Clipboard contents:\n---start---\n" << dobj.GetHTML() << "\n---end--");
}
TEST_CASE("GUI::DataFormatCompare", "[guifuncs][dataformat]")
{
const wxDataFormat df(wxDF_TEXT);