SrtParser.cs 3.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Globalization;
  4. using System.IO;
  5. using System.Text.RegularExpressions;
  6. using System.Threading;
  7. using MediaBrowser.Model.Extensions;
  8. using MediaBrowser.Model.MediaInfo;
  9. using Microsoft.Extensions.Logging;
  10. namespace MediaBrowser.MediaEncoding.Subtitles
  11. {
  12. public class SrtParser : ISubtitleParser
  13. {
  14. private readonly ILogger _logger;
  15. private readonly CultureInfo _usCulture = new CultureInfo("en-US");
  16. public SrtParser(ILogger logger)
  17. {
  18. _logger = logger;
  19. }
  20. public SubtitleTrackInfo Parse(Stream stream, CancellationToken cancellationToken)
  21. {
  22. var trackInfo = new SubtitleTrackInfo();
  23. var trackEvents = new List<SubtitleTrackEvent>();
  24. using (var reader = new StreamReader(stream))
  25. {
  26. string line;
  27. while ((line = reader.ReadLine()) != null)
  28. {
  29. cancellationToken.ThrowIfCancellationRequested();
  30. if (string.IsNullOrWhiteSpace(line))
  31. {
  32. continue;
  33. }
  34. var subEvent = new SubtitleTrackEvent { Id = line };
  35. line = reader.ReadLine();
  36. if (string.IsNullOrWhiteSpace(line))
  37. {
  38. continue;
  39. }
  40. var time = Regex.Split(line, @"[\t ]*-->[\t ]*");
  41. if (time.Length < 2)
  42. {
  43. // This occurs when subtitle text has an empty line as part of the text.
  44. // Need to adjust the break statement below to resolve this.
  45. _logger.LogWarning("Unrecognized line in srt: {0}", line);
  46. continue;
  47. }
  48. subEvent.StartPositionTicks = GetTicks(time[0]);
  49. var endTime = time[1];
  50. var idx = endTime.IndexOf(" ", StringComparison.Ordinal);
  51. if (idx > 0)
  52. endTime = endTime.Substring(0, idx);
  53. subEvent.EndPositionTicks = GetTicks(endTime);
  54. var multiline = new List<string>();
  55. while ((line = reader.ReadLine()) != null)
  56. {
  57. if (string.IsNullOrEmpty(line))
  58. {
  59. break;
  60. }
  61. multiline.Add(line);
  62. }
  63. subEvent.Text = string.Join(ParserValues.NewLine, multiline);
  64. subEvent.Text = subEvent.Text.Replace(@"\N", ParserValues.NewLine, StringComparison.OrdinalIgnoreCase);
  65. subEvent.Text = Regex.Replace(subEvent.Text, @"\{(?:\\\d?[\w.-]+(?:\([^\)]*\)|&H?[0-9A-Fa-f]+&|))+\}", string.Empty, RegexOptions.IgnoreCase);
  66. subEvent.Text = Regex.Replace(subEvent.Text, "<", "&lt;", RegexOptions.IgnoreCase);
  67. subEvent.Text = Regex.Replace(subEvent.Text, ">", "&gt;", RegexOptions.IgnoreCase);
  68. subEvent.Text = Regex.Replace(subEvent.Text, "&lt;(\\/?(font|b|u|i|s))((\\s+(\\w|\\w[\\w\\-]*\\w)(\\s*=\\s*(?:\\\".*?\\\"|'.*?'|[^'\\\">\\s]+))?)+\\s*|\\s*)(\\/?)&gt;", "<$1$3$7>", RegexOptions.IgnoreCase);
  69. trackEvents.Add(subEvent);
  70. }
  71. }
  72. trackInfo.TrackEvents = trackEvents.ToArray();
  73. return trackInfo;
  74. }
  75. long GetTicks(string time)
  76. {
  77. return TimeSpan.TryParseExact(time, @"hh\:mm\:ss\.fff", _usCulture, out var span)
  78. ? span.Ticks
  79. : (TimeSpan.TryParseExact(time, @"hh\:mm\:ss\,fff", _usCulture, out span)
  80. ? span.Ticks : 0);
  81. }
  82. }
  83. }