using System; using System.Collections.Generic; using System.Collections.ObjectModel; using SnowSoftware.Platform.Dis.Recognition.Extensions; using System.IO; using System.Linq; using System.Runtime.InteropServices; using System.Text; namespace SnowSoftware.Platform.Dis.Recognition.SoftwareRecognition { // TODO: hoist? // TODO: should PatternRepo be versioned ala. RuleTree? public class PatternRepo { internal const char WildcardMulti = (char)16; internal const char WildcardSingle = (char)17; internal const char Group = (char)18; internal const char NotGroup = (char)19; private readonly char[] _patterns; private readonly GCHandle _handle; private unsafe readonly char* _ptr; public PatternRepo(char[] patterns) { if (patterns == null) throw new PanicException("patterns cannot be null"); _patterns = patterns; // We do this to avoid bounds checking on the '_patterns' array (1.13x speed up on a test of 1,280,000 recognized software rows) // We need to pin it so the GC doesn't move the memory around _handle = GCHandle.Alloc(_patterns, GCHandleType.Pinned); unsafe { _ptr = (char*)_handle.AddrOfPinnedObject(); } } ~PatternRepo() { // TODO: it this the right way to do this? _handle.Free(); } public static (PatternRepo PatternRepo, string Error) ReadFromStream(Stream stream) { var ok = stream.TryReadArray<char>(out var patterns); if (!ok) { return (null, $"unable to read pattern-repo data"); } return (new PatternRepo(patterns), null); } public void WriteToStream(Stream stream) { stream.WriteArray(_patterns); } public bool Match(PatternHandle handle, string str) { if (handle.IsAny()) { return true; } if (handle.IsEmpty()) { return str.Length == 0; } unsafe { char* pPtr = _ptr; fixed (char* sPtr = str) { var firstPatIndex = handle.Index; var lastPatIndex = handle.Index + handle.Size - 1; const int firstStrIndex = 0; int lastStrIndex = str.Length - 1; var fpi = firstPatIndex; var bpi = lastPatIndex; var fsi = firstStrIndex; var bsi = lastStrIndex; var startsWildcard = GetChar(pPtr, firstPatIndex) == WildcardMulti; var endsWildcard = GetChar(pPtr, lastPatIndex) == WildcardMulti; var fDo = !startsWildcard && endsWildcard; var bDo = !endsWildcard && startsWildcard; if (fDo == bDo) { fDo = true; bDo = true; } var fSweep = startsWildcard; var bSweep = endsWildcard; while (true) { if (fDo) { while (true) { var match = true; var pi = fpi; var si = fsi; if (GetChar(pPtr, pi) == WildcardMulti) { if (pi == lastPatIndex) { return true; } pi++; fSweep = true; } while (true) { if (pi > lastPatIndex) { break; } var pCh = GetChar(pPtr, pi); if (pCh == WildcardMulti) { break; } if (si > lastStrIndex) { return false; } var sCh = GetChar(sPtr, si); if (pCh >= ' ') { if (sCh != pCh) { match = false; } } else { switch (pCh) { case WildcardSingle: if (char.IsSurrogate(sCh)) { si++; } break; case Group: if (!HandleGroup(pPtr, sCh, ref pi, +1)) { match = false; } break; case NotGroup: if (!HandleNotGroup(pPtr, sCh, ref pi, +1)) { match = false; } break; default: throw new PanicException("unknown special like character"); } } if (!match) { break; } pi++; si++; } if (!match) { if (fSweep) { fsi++; } else { return false; } } else { fpi = pi; fsi = si; if (fpi >= bpi) { return fpi - 1 != lastPatIndex || endsWildcard || fsi - 1 == lastStrIndex; } break; } } } if (bDo) { while (true) { var match = true; var pi = bpi; var si = bsi; if (GetChar(pPtr, pi) == WildcardMulti) { if (pi == firstPatIndex) { return true; } pi--; bSweep = true; } while (true) { if (pi < firstPatIndex) { break; } var pCh = GetChar(pPtr, pi); if (pCh == WildcardMulti) { break; } if (si < firstStrIndex) { return false; } var sCh = GetChar(sPtr, si); if (pCh >= ' ') { if (sCh != pCh) { match = false; } } else { switch (pCh) { case WildcardSingle: if (char.IsSurrogate(sCh)) { si--; } break; case Group: if (!HandleGroup(pPtr, sCh, ref pi, -1)) { match = false; } break; case NotGroup: if (!HandleNotGroup(pPtr, sCh, ref pi, -1)) { match = false; } break; default: throw new PanicException("unknown special like character"); } } if (!match) break; pi--; si--; } if (!match) { if (bSweep) { bsi--; } else return false; } else { bpi = pi; bsi = si; if (bpi <= fpi) { return bpi + 1 != firstPatIndex || startsWildcard || bsi + 1 == firstStrIndex; } break; } } } } } } } /// <summary> /// Get the pattern for the specified handle ( for debugging and error handling ) /// </summary> /// <param name="handle">The pattern handle to this repository</param> /// <returns></returns> public unsafe string GetPattern(PatternHandle handle) { if (handle.IsEmpty()) { return "[empty]"; } if (handle.IsAny()) { return "%"; } var patternSpan = new Span<char>(_ptr + handle.Index, handle.Size); var builder = new StringBuilder(); var isInGroup = false; foreach (var c in patternSpan) { switch (c) { case Group: case NotGroup: { isInGroup = !isInGroup; if (isInGroup) { builder.Append('['); if (c == NotGroup) { builder.Append('^'); } } else { builder.Append(']'); } break; } case WildcardMulti: builder.Append('%'); break; case WildcardSingle: builder.Append('_'); break; default: builder.Append(c); break; } } return builder.ToString(); } private static unsafe bool HandleGroup(char* pPtr, char sCh, ref int pi, int dir) { var match = false; while (true) { pi += dir; var pCh = GetChar(pPtr, pi); if (pCh == Group) { break; } if (pCh == sCh || pCh == WildcardSingle) { match = true; } } return match; } private static unsafe bool HandleNotGroup(char* pPtr, char sCh, ref int pi, int dir) { var match = true; while (true) { pi += dir; var pCh = GetChar(pPtr, pi); if (pCh == NotGroup) { break; } if (pCh == sCh || pCh == WildcardSingle) { match = false; } } return match; } private static unsafe char GetChar(char* ptr, int i) => *(ptr + i); } }