diff --git a/src/PHPCensor/Helper/CommandExecutor.php b/src/PHPCensor/Helper/CommandExecutor.php index 77e05069..31ccb6b9 100644 --- a/src/PHPCensor/Helper/CommandExecutor.php +++ b/src/PHPCensor/Helper/CommandExecutor.php @@ -176,13 +176,12 @@ class CommandExecutor implements CommandExecutorInterface */ public function replaceIllegalCharacters($utf8String) { + mb_substitute_character(0xFFFD); // is '�' + $legalUtf8String = mb_convert_encoding($utf8String, 'utf8', 'utf8'); $regexp = '/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F]' . - '|[\x00-\x7F][\x80-\xBF]+' . - '|([\xC0\xC1]|[\xF0-\xFF])[\x80-\xBF]*' . - '|[\xC2-\xDF]((?![\x80-\xBF])|[\x80-\xBF]{2,})' . - '|[\xE0-\xEF](([\x80-\xBF](?![\x80-\xBF]))|(?![\x80-\xBF]{2})|[\x80-\xBF]{3,})/S'; + '|[^\x{0}-\x{ffff}]/u'; // more than 3 byte UTF-8 sequences (unsupported in mysql) - return preg_replace($regexp, '�', $utf8String); + return preg_replace($regexp, '�', $legalUtf8String); } /** diff --git a/tests/PHPCensor/Helper/CommandExecutorTest.php b/tests/PHPCensor/Helper/CommandExecutorTest.php index 831d4599..e5dea453 100644 --- a/tests/PHPCensor/Helper/CommandExecutorTest.php +++ b/tests/PHPCensor/Helper/CommandExecutorTest.php @@ -86,24 +86,24 @@ EOD; public function testReplaceIllegalCharacters() { $this->assertEquals( - \Normalizer::normalize("start � end"), - \Normalizer::normalize($this->testedExecutor->replaceIllegalCharacters( + "start � end", + $this->testedExecutor->replaceIllegalCharacters( "start \xf0\x9c\x83\x96 end" - )) + ) ); $this->assertEquals( - \Normalizer::normalize("start � end"), - \Normalizer::normalize($this->testedExecutor->replaceIllegalCharacters( + "start � end", + $this->testedExecutor->replaceIllegalCharacters( "start \xF0\x9C\x83\x96 end" - )) + ) ); $this->assertEquals( - \Normalizer::normalize("start 123_X08�_X00�_Xa�_5432 end"), - \Normalizer::normalize($this->testedExecutor->replaceIllegalCharacters( + "start 123_X08�_X00�_Xa4�_5432 end", + $this->testedExecutor->replaceIllegalCharacters( "start 123_X08\x08_X00\x00_Xa4\xa4_5432 end" - )) + ) ); } }