From 48e81efb866a3810f7e9506d2e21cd6d0d2fd962 Mon Sep 17 00:00:00 2001 From: SimonHeimberg Date: Sat, 3 Feb 2018 13:35:58 +0100 Subject: [PATCH] Fixed build log for invalid encoded utf8 characters. Issue #145. --- src/PHPCensor/Helper/CommandExecutor.php | 36 ++++++++++++++++-------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/src/PHPCensor/Helper/CommandExecutor.php b/src/PHPCensor/Helper/CommandExecutor.php index 1a2c5bcf..d1e366ff 100644 --- a/src/PHPCensor/Helper/CommandExecutor.php +++ b/src/PHPCensor/Helper/CommandExecutor.php @@ -98,26 +98,22 @@ class CommandExecutor implements CommandExecutorInterface $pipes = []; $process = proc_open($command, $descriptorSpec, $pipes, $this->buildPath, null); - $this->lastOutput = ''; - $this->lastError = ''; + $lastOutput = ''; + $lastError = ''; if (is_resource($process)) { fclose($pipes[0]); - list($this->lastOutput, $this->lastError) = $this->readAlternating([$pipes[1], $pipes[2]]); + list($lastOutput, $lastError) = $this->readAlternating([$pipes[1], $pipes[2]]); $status = proc_close($process); + + $lastOutput = $this->replaceIllegalCharacters($lastOutput); + $lastError = $this->replaceIllegalCharacters($lastError); } - $regexp = '/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F]' . - '|[\x00-\x7F][\x80-\xBF]+' . - '|([\xC0\xC1]|[\xF0-\xFF])[\x80-\xBF]*' . - '|[\xC2-\xDF]((?![\x80-\xBF])|[\x80-\xBF]{2,})' . - '|[\xE0-\xEF](([\x80-\xBF](?![\x80-\xBF]))|(?![\x80-\xBF]{2})|[\x80-\xBF]{3,})/S'; - $this->lastOutput = preg_replace($regexp, '�', $this->lastOutput); - $this->lastError = preg_replace($regexp, '�', $this->lastError); - - $this->lastOutput = array_filter(explode(PHP_EOL, $this->lastOutput)); + $this->lastOutput = array_filter(explode(PHP_EOL, $lastOutput)); + $this->lastError = $lastError; $shouldOutput = ($this->logExecOutput && ($this->verbose || $status != 0)); @@ -171,6 +167,22 @@ class CommandExecutor implements CommandExecutorInterface return $outputs; } + private static function replaceIllegalCharacters($utf8String) + { + $substCharCode = 65533; + mb_substitute_character($substCharCode); + $legalUtf8String = mb_convert_encoding($utf8String, 'utf8', 'utf8'); + + $regexp = '/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F]' . + '|[\x00-\x7F][\x80-\xBF]+' . + '|([\xC0\xC1]|[\xF0-\xFF])[\x80-\xBF]*' . + '|[\xC2-\xDF]((?![\x80-\xBF])|[\x80-\xBF]{2,})' . + '|[\xE0-\xEF](([\x80-\xBF](?![\x80-\xBF]))|(?![\x80-\xBF]{2})|[\x80-\xBF]{3,})/S'; + $cleanUtf8String = preg_replace($regexp, chr($substCharCode), $legalUtf8String); + + return $cleanUtf8String; + } + /** * Returns the output from the last command run. *