using N_m3u8DL_RE.Common.Entity; using System.Text; using System.Text.RegularExpressions; using System.Xml; namespace Mp4SubtitleParser { class SubEntity { public string Begin { get; set; } public string End { get; set; } public string Region { get; set; } public List Contents { get; set; } = new List(); public List ContentStrings { get; set; } = new List(); public override bool Equals(object? obj) { return obj is SubEntity entity && Begin == entity.Begin && End == entity.End && Region == entity.Region && ContentStrings.SequenceEqual(entity.ContentStrings); } public override int GetHashCode() { return HashCode.Combine(Begin, End, Region, ContentStrings); } } public partial class MP4TtmlUtil { [GeneratedRegex("(.+?)<\\/p>")] private static partial Regex LabelFixRegex(); [GeneratedRegex("\\")] private static partial Regex MultiElementsFixRegex(); [GeneratedRegex("\\([\\s\\S]*?)<\\/smpte:image>")] private static partial Regex ImageRegex(); public static bool CheckInit(byte[] data) { bool sawSTPP = false; //parse init new MP4Parser() .Box("moov", MP4Parser.Children) .Box("trak", MP4Parser.Children) .Box("mdia", MP4Parser.Children) .Box("minf", MP4Parser.Children) .Box("stbl", MP4Parser.Children) .FullBox("stsd", MP4Parser.SampleDescription) .Box("stpp", (box) => { sawSTPP = true; }) .Parse(data); return sawSTPP; } private static string ShiftTime(string xmlSrc, long segTimeMs, int index) { string Add(string xmlTime) { var dt = DateTime.ParseExact(xmlTime, "HH:mm:ss.fff", System.Globalization.CultureInfo.InvariantCulture); var ts = TimeSpan.FromMilliseconds(dt.TimeOfDay.TotalMilliseconds + segTimeMs * index); return string.Format("{0:00}:{1:00}:{2:00}.{3:000}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds); } if (!xmlSrc.Contains("")) return xmlSrc; var xmlDoc = new XmlDocument(); XmlNamespaceManager? nsMgr = null; xmlDoc.LoadXml(xmlSrc); var ttNode = xmlDoc.LastChild; if (nsMgr == null) { var ns = ((XmlElement)ttNode!).GetAttribute("xmlns"); nsMgr = new XmlNamespaceManager(xmlDoc.NameTable); nsMgr.AddNamespace("ns", ns); } var bodyNode = ttNode!.SelectSingleNode("ns:body", nsMgr); if (bodyNode == null) return xmlSrc; var _div = bodyNode.SelectSingleNode("ns:div", nsMgr); //Parse

label foreach (XmlElement _p in _div!.SelectNodes("ns:p", nsMgr)!) { var _begin = _p.GetAttribute("begin"); var _end = _p.GetAttribute("end"); _p.SetAttribute("begin", Add(_begin)); _p.SetAttribute("end", Add(_end)); //Console.WriteLine($"{_begin} {_p.GetAttribute("begin")}"); //Console.WriteLine($"{_end} {_p.GetAttribute("begin")}"); } return xmlDoc.OuterXml; } private static string GetTextFromElement(XmlElement node) { var sb = new StringBuilder(); foreach (XmlNode item in node.ChildNodes) { if (item.NodeType == XmlNodeType.Text) { sb.Append(item.InnerText.Trim()); } else if(item.NodeType == XmlNodeType.Element && item.Name == "br") { sb.AppendLine(); } } return sb.ToString(); } public static List SplitMultipleRootElements(string xml) { if (!MultiElementsFixRegex().IsMatch(xml)) return new List(); return MultiElementsFixRegex().Matches(xml).Select(m => m.Value).ToList(); } public static WebVttSub ExtractFromMp4s(IEnumerable items, long segTimeMs, long baseTimestamp = 0L) { //read ttmls List xmls = new List(); int segIndex = 0; foreach (var item in items) { var dataSeg = File.ReadAllBytes(item); var sawMDAT = false; //parse media new MP4Parser() .Box("mdat", MP4Parser.AllData((data) => { sawMDAT = true; // Join this to any previous payload, in case the mp4 has multiple // mdats. if (segTimeMs != 0) { var datas = SplitMultipleRootElements(Encoding.UTF8.GetString(data)); foreach (var item in datas) { xmls.Add(ShiftTime(item, segTimeMs, segIndex)); } } else { var datas = SplitMultipleRootElements(Encoding.UTF8.GetString(data)); foreach (var item in datas) { xmls.Add(item); } } })) .Parse(dataSeg,/* partialOkay= */ false); segIndex++; } return ExtractSub(xmls, baseTimestamp); } public static WebVttSub ExtractFromTTMLs(IEnumerable items, long segTimeMs, long baseTimestamp = 0L) { //read ttmls List xmls = new List(); int segIndex = 0; foreach (var item in items) { var xml = File.ReadAllText(item); if (segTimeMs != 0) { xmls.Add(ShiftTime(xml, segTimeMs, segIndex)); } else { xmls.Add(xml); } segIndex++; } return ExtractSub(xmls, baseTimestamp); } private static WebVttSub ExtractSub(List xmls, long baseTimestamp) { //parsing var xmlDoc = new XmlDocument(); var finalSubs = new List(); XmlNode? headNode = null; XmlNamespaceManager? nsMgr = null; var regex = LabelFixRegex(); foreach (var item in xmls) { var xmlContent = item; if (!xmlContent.Contains("{m.Groups[1].Value}

"); } catch (Exception) { xmlContentFix = xmlContentFix.Replace(m.Groups[1].Value, System.Web.HttpUtility.HtmlEncode(m.Groups[1].Value)); } } } xmlDoc.LoadXml(xmlContentFix); var ttNode = xmlDoc.LastChild; if (nsMgr == null) { var ns = ((XmlElement)ttNode!).GetAttribute("xmlns"); nsMgr = new XmlNamespaceManager(xmlDoc.NameTable); nsMgr.AddNamespace("ns", ns); } if (headNode == null) headNode = ttNode!.SelectSingleNode("ns:head", nsMgr); var bodyNode = ttNode!.SelectSingleNode("ns:body", nsMgr); if (bodyNode == null) continue; var _div = bodyNode.SelectSingleNode("ns:div", nsMgr); if (_div == null) continue; //PNG Subs var imageDic = new Dictionary(); //id, Base64 if (ImageRegex().IsMatch(xmlDoc.InnerXml)) { foreach (Match img in ImageRegex().Matches(xmlDoc.InnerXml)) { imageDic.Add(img.Groups[1].Value, img.Groups[2].Value); } } //Parse

label foreach (XmlElement _p in _div!.SelectNodes("ns:p", nsMgr)!) { var _begin = _p.GetAttribute("begin"); var _end = _p.GetAttribute("end"); var _region = _p.GetAttribute("region"); var _bgImg = _p.GetAttribute("smpte:backgroundImage"); var sub = new SubEntity { Begin = _begin, End = _end, Region = _region }; if (string.IsNullOrEmpty(_bgImg)) { var _spans = _p.ChildNodes; //Collect foreach (XmlNode _node in _spans) { if (_node.NodeType == XmlNodeType.Element) { var _span = (XmlElement)_node; if (string.IsNullOrEmpty(_span.InnerText)) continue; sub.Contents.Add(_span); sub.ContentStrings.Add(_span.OuterXml); } else if (_node.NodeType == XmlNodeType.Text) { var _span = new XmlDocument().CreateElement("span"); _span.InnerText = _node.Value!; sub.Contents.Add(_span); sub.ContentStrings.Add(_span.OuterXml); } } } else { var id = _bgImg.Replace("#", ""); if (imageDic.ContainsKey(id)) { var _span = new XmlDocument().CreateElement("span"); _span.InnerText = $"Base64::{imageDic[id]}"; sub.Contents.Add(_span); sub.ContentStrings.Add(_span.OuterXml); } } //Check if one

has been splitted var index = finalSubs.FindLastIndex(s => s.End == _begin && s.Region == _region && s.ContentStrings.SequenceEqual(sub.ContentStrings)); //Skip empty lines if (sub.ContentStrings.Count > 0) { //Extend

duration if (index != -1) finalSubs[index].End = sub.End; else if (!finalSubs.Contains(sub)) finalSubs.Add(sub); } } } var dic = new Dictionary(); foreach (var sub in finalSubs) { var key = $"{sub.Begin} --> {sub.End}"; foreach (var item in sub.Contents) { if (dic.ContainsKey(key)) { if (item.GetAttribute("tts:fontStyle") == "italic" || item.GetAttribute("tts:fontStyle") == "oblique") dic[key] = $"{dic[key]}\r\n{GetTextFromElement(item)}"; else dic[key] = $"{dic[key]}\r\n{GetTextFromElement(item)}"; } else { if (item.GetAttribute("tts:fontStyle") == "italic" || item.GetAttribute("tts:fontStyle") == "oblique") dic.Add(key, $"{GetTextFromElement(item)}"); else dic.Add(key, GetTextFromElement(item)); } } } StringBuilder vtt = new StringBuilder(); vtt.AppendLine("WEBVTT"); foreach (var item in dic) { vtt.AppendLine(item.Key); vtt.AppendLine(item.Value); vtt.AppendLine(); } return WebVttSub.Parse(vtt.ToString(), baseTimestamp); } } }