⚠ This page is served via a proxy. Original site: https://github.com
This service does not collect credentials or authentication data.
Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/wp-includes/formatting.php
Original file line number Diff line number Diff line change
Expand Up @@ -4760,6 +4760,16 @@ function esc_textarea( $text ) {
function esc_xml( $text ) {
$safe_text = wp_check_invalid_utf8( $text );

// Strip invalid XML characters.
$is_utf8 = in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ), true );
if ( $is_utf8 ) {
$safe_text = preg_replace(
'/[^\x{9}\x{A}\x{D}\x{20}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]/u',
'',
$safe_text
);
}

$cdata_regex = '\<\!\[CDATA\[.*?\]\]\>';
$regex = <<<EOF
/
Expand Down
93 changes: 93 additions & 0 deletions tests/phpunit/tests/formatting/escXml.php
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,97 @@ public function data_ignores_cdata_sections() {
),
);
}

/**
* Test that invalid XML control characters are stripped.
*
* @dataProvider data_strips_invalid_xml_characters
*
* @param string $source The source string containing invalid XML characters.
* @param string $expected The expected string with invalid characters removed.
*/
public function test_strips_invalid_xml_characters( $source, $expected ) {
update_option( 'blog_charset', 'UTF-8' );
$actual = esc_xml( $source );
$this->assertSame( $expected, $actual );
}

/**
* Data provider for `test_strips_invalid_xml_characters()`.
*
* @return array {
* @type string $source The source string containing invalid XML characters.
* @type string $expected The expected string with invalid characters removed.
* }
*/
public function data_strips_invalid_xml_characters() {
return array(
// Vertical tab (0x0B) - invalid in XML.
array(
"This contains a vertical tab\x0Bcharacter",
'This contains a vertical tabcharacter',
),
// File separator (0x1C) - invalid in XML.
array(
"File separator\x1Ctest",
'File separatortest',
),
// NULL byte (0x00) - invalid in XML.
array(
"Text with\x00null byte",
'Text withnull byte',
),
// Bell character (0x07) - invalid in XML.
array(
"Bell\x07character",
'Bellcharacter',
),
// Multiple invalid characters.
array(
"Multiple\x00invalid\x0B\x1Ccharacters\x07here",
'Multipleinvalidcharactershere',
),
// Valid control characters should be preserved: tab (0x09), LF (0x0A), CR (0x0D).
array(
"Tab\tlinefeed\ncarriage return\rtest",
"Tab\tlinefeed\ncarriage return\rtest",
),
// Mix of valid and invalid.
array(
"Valid\ttab but\x0Binvalid vertical tab",
"Valid\ttab butinvalid vertical tab",
),
// Text without invalid characters should remain unchanged.
array(
'Normal text with spaces and punctuation!',
'Normal text with spaces and punctuation!',
),
// Unicode characters in valid range should be preserved.
array(
'Unicode: café, naïve, 日本語',
'Unicode: café, naïve, 日本語',
),
);
}

/**
* Test that invalid XML characters within CDATA sections are also stripped.
*/
public function test_strips_invalid_xml_characters_outside_cdata() {
update_option( 'blog_charset', 'UTF-8' );
$source = "Text\x0Bwith<![CDATA[valid <content>]]>and\x1Cmore\x00invalid";
$expected = 'Textwith<![CDATA[valid <content>]]>andmoreinvalid';
$actual = esc_xml( $source );
$this->assertSame( $expected, $actual );
}

/**
* Test that the function works correctly when charset is not UTF-8.
*/
public function test_non_utf8_charset_skips_invalid_character_stripping() {
update_option( 'blog_charset', 'ISO-8859-1' );
$source = "Test\x0Btext";
$actual = esc_xml( $source );
$this->assertIsString( $actual );
}
}
Loading