You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mediainfo.py 9.5 KiB

3 jaren geleden
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. import logging
  2. import os
  3. # Third-party modules
  4. from pymediainfo import MediaInfo
  5. import torrent_parser as tp
  6. from pyunpack import Archive
  7. from pathlib import Path
  8. import tempfile
  9. logger = logging.getLogger('main.' + __name__)
  10. def get_mediainfo(torrentfilename, media, media_roots):
  11. """
  12. Get filename(s) of video files in the torrent and run mediainfo and capture the output, extract if DVD found (Blurays not yet supported)
  13. then set the appropriate fields for the upload
  14. :param torrentfilename: str filename of torrent to parse from collate()
  15. :param media: str Validated media from collate()
  16. :param media_roots: Sanitised MediaDirectories from cfg for use by get_media_location()
  17. :return: mediainfo, releasedataout
  18. mediainfo: Mediainfo text output of the file(s)
  19. releaseadtaout: Fields gathered from mediainfo for SM upload
  20. """
  21. torrentmetadata = tp.parse_torrent_file(torrentfilename)
  22. torrentname = torrentmetadata['info']['name'] # Directory if >1 file, otherwise it is filename
  23. # print(torrentmetadata)
  24. mediainfosall = ""
  25. releasedataout = {}
  26. releasedataout['duration'] = 0
  27. # TODO Need to cleanup the logic to create an overall filename list to parse instead of the 3-way duplication we currently have
  28. if 'files' in torrentmetadata['info'].keys(): # Multiple files
  29. directory = torrentname
  30. logger.info(f'According torrent metadata the dir is {directory}')
  31. file_path = get_media_location(directory, True, media_roots)
  32. logger.info(f'Path to dir: {file_path}')
  33. for file in torrentmetadata['info']['files']:
  34. if len(torrentmetadata['info']['files']) == 1: # This might never happen, it could be just info.name if so
  35. filename = os.path.join(*file['path'])
  36. else:
  37. releasedataout['multiplefiles'] = True
  38. filename = os.path.join(*[file_path, *file['path']]) # Each file in the directory of source data for the torrent
  39. mediainfosall += str(MediaInfo.parse(filename, text=True))
  40. releasedataout['duration'] += get_mediainfo_duration(filename)
  41. # Get biggest file and mediainfo on this to set the fields for the release
  42. maxfile = max(torrentmetadata['info']['files'], key=lambda x: x['length']) # returns {'length': int, 'path': [str]} of largest file
  43. fileforsmfields = Path(*[file_path, *maxfile['path']]) # Assume the largest file is the main file that should populate SM upload fields
  44. else: # Single file
  45. releasedataout['multiplefiles'] = False
  46. filename = torrentname
  47. file_path = get_media_location(filename, False, media_roots)
  48. logger.debug(f'Filename for mediainfo: {file_path}')
  49. mediainfosall += str(MediaInfo.parse(file_path, text=True))
  50. releasedataout['duration'] += get_mediainfo_duration(file_path)
  51. fileforsmfields = file_path
  52. if fileforsmfields.suffix == '.iso' and media == 'DVD':
  53. # If DVD, extract the ISO and run mediainfo against appropriate files, if BR we skip as pyunpack (patool/7z) cannot extract them
  54. releasedataout['container'] = 'ISO'
  55. logger.info(f'Extracting ISO {fileforsmfields} to obtain mediainfo on it...')
  56. isovideoextensions = ('.vob', '.m2ts')
  57. tempdir = tempfile.TemporaryDirectory()
  58. Archive(fileforsmfields).extractall(tempdir.name)
  59. dir_files = []
  60. for root, subFolder, files in os.walk(tempdir.name):
  61. for item in files:
  62. filenamewithpath = os.path.join(root, item)
  63. dir_files.append(filenamewithpath)
  64. if list(filter(filenamewithpath.lower().endswith,
  65. isovideoextensions)): # Only gather mediainfo for DVD video files (BR when supported)
  66. mediainfosall += str(MediaInfo.parse(filenamewithpath, text=True))
  67. releasedataout['duration'] += get_mediainfo_duration(filenamewithpath)
  68. filesize = lambda f: os.path.getsize(f)
  69. fileforsmfields = sorted(dir_files, key=filesize)[-1] # Assume the largest file is the main file that should populate SM upload fields
  70. # Now we have decided which file will have its mediainfo parsed for SM fields, parse its mediainfo
  71. mediainforeleasedata = MediaInfo.parse(fileforsmfields)
  72. # Remove path to file in case it reveals usernames etc.
  73. replacement = str(Path(file_path).parent)
  74. mediainfosall = mediainfosall.replace(replacement, '')
  75. if Path(fileforsmfields).suffix == '.iso' and media == 'DVD':
  76. tempdir.cleanup()
  77. for track in mediainforeleasedata.tracks:
  78. if track.track_type == 'General':
  79. # releasedataout['language'] = track.audio_language_list # Will need to check if this is reliable
  80. if 'container' not in releasedataout: # Not an ISO, only set container if we do not already know its an ISO
  81. releasedataout['container'] = track.file_extension.upper()
  82. else: # We have ISO - get category data based Mediainfo if we have it
  83. if track.file_extension.upper() == 'VOB':
  84. releasedataout['category'] = 'DVD'
  85. elif track.file_extension.upper() == 'M2TS': # Not used yet as we cannot handle Bluray / UDF
  86. releasedataout['category'] = 'Bluray'
  87. if track.track_type == 'Video':
  88. validatecodec = {
  89. "MPEG Video": "MPEG-2",
  90. "AVC": "h264",
  91. "HEVC": "h265",
  92. "MPEG-4 Visual": "DivX", # MPEG-4 Part 2 / h263 , usually xvid / divx
  93. }
  94. for old, new in validatecodec.items():
  95. if track.format == old:
  96. releasedataout['codec'] = new
  97. standardresolutions = {
  98. "3840": "1920",
  99. "1920": "1080",
  100. "1280": "720",
  101. "720": "480",
  102. }
  103. for width, height in standardresolutions.items():
  104. if str(track.width) == width and str(track.height) == height:
  105. releasedataout['ressel'] = height
  106. if 'ressel' in releasedataout.keys(): # Known resolution type, try to determine if interlaced
  107. if track.scan_type == "Interlaced" or track.scan_type == "MBAFF":
  108. releasedataout['ressel'] += "i"
  109. else:
  110. releasedataout['ressel'] += "p" # Sometimes a Progressive encode has no field set
  111. else: # Custom resolution
  112. releasedataout['ressel'] = 'Other'
  113. releasedataout['resolution'] = str(track.width) + "x" + str(track.height)
  114. if track.track_type == 'Audio' or track.track_type == 'Audio #1': # Handle multiple audio streams, we just get data from the first for now
  115. if track.format in ["AAC", "DTS", "PCM", "AC3"]:
  116. releasedataout['audioformat'] = track.format
  117. elif track.format == "AC-3":
  118. releasedataout['audioformat'] = "AC3"
  119. elif track.format == "MPEG Audio" and track.format_profile == "Layer 3":
  120. releasedataout['audioformat'] = "MP3"
  121. elif track.format == "MPEG Audio" and track.format_profile == "Layer 2":
  122. releasedataout['audioformat'] = "MP2"
  123. logger.debug(f'Mediainfo interpreted data: {releasedataout}')
  124. return mediainfosall, releasedataout
  125. def get_mediainfo_duration(filename):
  126. """
  127. Get duration in mediainfo for filename
  128. :param filename:
  129. :return: float ms
  130. """
  131. mediainfo_for_duration = MediaInfo.parse(filename)
  132. for track in mediainfo_for_duration.tracks:
  133. if track.track_type == 'General':
  134. if track.duration is None:
  135. return 0
  136. else:
  137. logger.info(f'Mediainfo duration: {filename} {track.duration}')
  138. return float(track.duration) # time in ms
  139. def get_media_location(media_name, directory, media_roots):
  140. """
  141. Find the location of the directory or file of the source data for getmediainfo()
  142. :param media_name: str name of the file or directory
  143. :param directory: boolean true if dir, false if file
  144. :param fall_back_file: str fall back search cor
  145. :param media_roots: Sanitised MediaDirectories from cfg
  146. :return: full path to file/dir
  147. """
  148. # Find the file/dir and stop on the first hit, hopefully OS-side disk cache will mean this will not take too long
  149. media_location = None
  150. logger.info(f'Searching for {media_name}...')
  151. for media_dir_search in media_roots:
  152. for dirname, dirnames, filenames in os.walk(media_dir_search):
  153. if directory is True:
  154. for subdirname in dirnames:
  155. if subdirname == media_name:
  156. media_location = os.path.join(dirname, subdirname)
  157. return Path(media_dir_search, media_location)
  158. else:
  159. for filename in filenames:
  160. if filename == media_name:
  161. media_location = os.path.join(dirname, filename)
  162. return Path(media_dir_search, media_location)
  163. if media_location is None:
  164. media_not_found_error_msg = f'Mediainfo error - file/directory not found: {media_name} in any of the MediaDirectories specified: {media_roots}'
  165. logger.error(media_not_found_error_msg)
  166. raise RuntimeError(media_not_found_error_msg)