Merge pull request #149 from SimonHeimberg/invalidCharactersUtf8

replace characters invalid for mysql
This commit is contained in:
Dmitry Khomutov 2018-02-16 15:49:22 +07:00 committed by GitHub
commit b82c581e4e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 13 additions and 14 deletions

View file

@ -176,13 +176,12 @@ class CommandExecutor implements CommandExecutorInterface
*/
public function replaceIllegalCharacters($utf8String)
{
mb_substitute_character(0xFFFD); // is '<27>'
$legalUtf8String = mb_convert_encoding($utf8String, 'utf8', 'utf8');
$regexp = '/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F]' .
'|[\x00-\x7F][\x80-\xBF]+' .
'|([\xC0\xC1]|[\xF0-\xFF])[\x80-\xBF]*' .
'|[\xC2-\xDF]((?![\x80-\xBF])|[\x80-\xBF]{2,})' .
'|[\xE0-\xEF](([\x80-\xBF](?![\x80-\xBF]))|(?![\x80-\xBF]{2})|[\x80-\xBF]{3,})/S';
'|[^\x{0}-\x{ffff}]/u'; // more than 3 byte UTF-8 sequences (unsupported in mysql)
return preg_replace($regexp, '<27>', $utf8String);
return preg_replace($regexp, '<27>', $legalUtf8String);
}
/**

View file

@ -86,24 +86,24 @@ EOD;
public function testReplaceIllegalCharacters()
{
$this->assertEquals(
\Normalizer::normalize("start <20> end"),
\Normalizer::normalize($this->testedExecutor->replaceIllegalCharacters(
"start <20> end",
$this->testedExecutor->replaceIllegalCharacters(
"start \xf0\x9c\x83\x96 end"
))
)
);
$this->assertEquals(
\Normalizer::normalize("start <20> end"),
\Normalizer::normalize($this->testedExecutor->replaceIllegalCharacters(
"start <20> end",
$this->testedExecutor->replaceIllegalCharacters(
"start \xF0\x9C\x83\x96 end"
))
)
);
$this->assertEquals(
\Normalizer::normalize("start 123_X08<30>_X00<30>_Xa<EFBFBD>_5432 end"),
\Normalizer::normalize($this->testedExecutor->replaceIllegalCharacters(
"start 123_X08<30>_X00<30>_Xa4<EFBFBD>_5432 end",
$this->testedExecutor->replaceIllegalCharacters(
"start 123_X08\x08_X00\x00_Xa4\xa4_5432 end"
))
)
);
}
}