You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

66 line
2.9 KiB

  1. <?php
  2. namespace OrpheusNET\Logchecker;
  3. class Util
  4. {
  5. public static function commandExists(string $cmd)
  6. {
  7. $where = substr(strtolower(PHP_OS), 0, 3) === 'win' ? 'where' : 'command -v';
  8. exec("{$where} {$cmd} 2>/dev/null", $output, $return_var);
  9. return $return_var === 0;
  10. }
  11. public static function decodeEncoding(string $log, string $logPath): string
  12. {
  13. try {
  14. $chardet = new Chardet();
  15. } catch (\RuntimeException $exc) {
  16. $chardet = null;
  17. }
  18. /** @var Chardet $chardet */
  19. // Whipper uses UTF-8 so we don't need to bother checking, especially as it's
  20. // possible a log may be falsely detected as a different encoding by chardet
  21. if (strpos($log, "Log created by: whipper") !== false) {
  22. return $log;
  23. }
  24. // To parse the log, we want to deal with the log in UTF-8. EAC by default should
  25. // always output to UTF-16 and XLD to UTF-8, but sometimes people view the log and
  26. // re-encode them to something else (like Windows-1251), and we need to use chardet
  27. // to detect this so we can then convert it to UTF-8.
  28. if (ord($log[0]) . ord($log[1]) == 0xFF . 0xFE) {
  29. $log = mb_convert_encoding(substr($log, 2), 'UTF-8', 'UTF-16LE');
  30. } elseif (ord($log[0]) . ord($log[1]) == 0xFE . 0xFF) {
  31. $log = mb_convert_encoding(substr($log, 2), 'UTF-8', 'UTF-16BE');
  32. } elseif (ord($log[0]) == 0xEF && ord($log[1]) == 0xBB && ord($log[2]) == 0xBF) {
  33. $log = substr($log, 3);
  34. } elseif ($chardet !== null) {
  35. $results = $chardet->analyze($logPath);
  36. if ($results['charset'] !== 'utf-8' && $results['confidence'] > 0.7) {
  37. // $log = mb_convert_encoding($log, 'UTF-8', $results['charset']);
  38. $tmp = @iconv($results['charset'], 'UTF-8', $log);
  39. // depending on your iconv version, some encodings may be represented
  40. // with prefix of mac-* or mac (like maccentraleurope vs mac-centraleurope)
  41. if ($tmp === false && substr($results['charset'], 0, 3) === 'mac') {
  42. $tmp = @iconv('mac-' . substr($results['charset'], 3), 'UTF-8', $log);
  43. }
  44. $log = $tmp;
  45. if ($log === false) {
  46. throw new \RuntimeException('Could not properly decode log encoding');
  47. }
  48. } elseif ($results['charset'] !== 'utf-8' && $results['confidence'] > 0.3) {
  49. // If we've got a poor confidence on our decoding, we just use a generic
  50. // ISO-8859-1 as that covers a decent range of things that people would
  51. // inadvertently re-encode a log into. I seriously cannot express how
  52. // much I hate how EAC does not use always UTF-8.
  53. $log = iconv('ISO-8859-1', 'UTF-8', $log);
  54. }
  55. }
  56. return $log;
  57. }
  58. }