Merge pull request #149 from SimonHeimberg/invalidCharactersUtf8
replace characters invalid for mysql
This commit is contained in:
commit
b82c581e4e
2 changed files with 13 additions and 14 deletions
|
|
@ -176,13 +176,12 @@ class CommandExecutor implements CommandExecutorInterface
|
|||
*/
|
||||
public function replaceIllegalCharacters($utf8String)
|
||||
{
|
||||
mb_substitute_character(0xFFFD); // is '<27>'
|
||||
$legalUtf8String = mb_convert_encoding($utf8String, 'utf8', 'utf8');
|
||||
$regexp = '/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F]' .
|
||||
'|[\x00-\x7F][\x80-\xBF]+' .
|
||||
'|([\xC0\xC1]|[\xF0-\xFF])[\x80-\xBF]*' .
|
||||
'|[\xC2-\xDF]((?![\x80-\xBF])|[\x80-\xBF]{2,})' .
|
||||
'|[\xE0-\xEF](([\x80-\xBF](?![\x80-\xBF]))|(?![\x80-\xBF]{2})|[\x80-\xBF]{3,})/S';
|
||||
'|[^\x{0}-\x{ffff}]/u'; // more than 3 byte UTF-8 sequences (unsupported in mysql)
|
||||
|
||||
return preg_replace($regexp, '<27>', $utf8String);
|
||||
return preg_replace($regexp, '<27>', $legalUtf8String);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -86,24 +86,24 @@ EOD;
|
|||
public function testReplaceIllegalCharacters()
|
||||
{
|
||||
$this->assertEquals(
|
||||
\Normalizer::normalize("start <20> end"),
|
||||
\Normalizer::normalize($this->testedExecutor->replaceIllegalCharacters(
|
||||
"start <20> end",
|
||||
$this->testedExecutor->replaceIllegalCharacters(
|
||||
"start \xf0\x9c\x83\x96 end"
|
||||
))
|
||||
)
|
||||
);
|
||||
|
||||
$this->assertEquals(
|
||||
\Normalizer::normalize("start <20> end"),
|
||||
\Normalizer::normalize($this->testedExecutor->replaceIllegalCharacters(
|
||||
"start <20> end",
|
||||
$this->testedExecutor->replaceIllegalCharacters(
|
||||
"start \xF0\x9C\x83\x96 end"
|
||||
))
|
||||
)
|
||||
);
|
||||
|
||||
$this->assertEquals(
|
||||
\Normalizer::normalize("start 123_X08<30>_X00<30>_Xa<EFBFBD>_5432 end"),
|
||||
\Normalizer::normalize($this->testedExecutor->replaceIllegalCharacters(
|
||||
"start 123_X08<30>_X00<30>_Xa4<EFBFBD>_5432 end",
|
||||
$this->testedExecutor->replaceIllegalCharacters(
|
||||
"start 123_X08\x08_X00\x00_Xa4\xa4_5432 end"
|
||||
))
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue