/dev/null", $output, $return_var); return $return_var === 0; } public static function decodeEncoding(string $log, string $logPath): string { try { $chardet = new Chardet(); } catch (\RuntimeException $exc) { $chardet = null; } /** @var Chardet $chardet */ // Whipper uses UTF-8 so we don't need to bother checking, especially as it's // possible a log may be falsely detected as a different encoding by chardet if (strpos($log, "Log created by: whipper") !== false) { return $log; } // To parse the log, we want to deal with the log in UTF-8. EAC by default should // always output to UTF-16 and XLD to UTF-8, but sometimes people view the log and // re-encode them to something else (like Windows-1251), and we need to use chardet // to detect this so we can then convert it to UTF-8. if (ord($log[0]) . ord($log[1]) == 0xFF . 0xFE) { $log = mb_convert_encoding(substr($log, 2), 'UTF-8', 'UTF-16LE'); } elseif (ord($log[0]) . ord($log[1]) == 0xFE . 0xFF) { $log = mb_convert_encoding(substr($log, 2), 'UTF-8', 'UTF-16BE'); } elseif (ord($log[0]) == 0xEF && ord($log[1]) == 0xBB && ord($log[2]) == 0xBF) { $log = substr($log, 3); } elseif ($chardet !== null) { $results = $chardet->analyze($logPath); if ($results['charset'] !== 'utf-8' && $results['confidence'] > 0.7) { // $log = mb_convert_encoding($log, 'UTF-8', $results['charset']); $tmp = @iconv($results['charset'], 'UTF-8', $log); // depending on your iconv version, some encodings may be represented // with prefix of mac-* or mac (like maccentraleurope vs mac-centraleurope) if ($tmp === false && substr($results['charset'], 0, 3) === 'mac') { $tmp = @iconv('mac-' . substr($results['charset'], 3), 'UTF-8', $log); } $log = $tmp; if ($log === false) { throw new \RuntimeException('Could not properly decode log encoding'); } } elseif ($results['charset'] !== 'utf-8' && $results['confidence'] > 0.3) { // If we've got a poor confidence on our decoding, we just use a generic // ISO-8859-1 as that covers a decent range of things that people would // inadvertently re-encode a log into. I seriously cannot express how // much I hate how EAC does not use always UTF-8. $log = iconv('ISO-8859-1', 'UTF-8', $log); } } return $log; } }