#region Disclaimer / License // Copyright (C) 2012, Jackie Ng // http://trac.osgeo.org/mapguide/wiki/maestro, jumpinjackie@gmail.com // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // // Original code by Michael Potter, made available under Public Domain // // http://www.codeproject.com/Articles/6943/A-Generic-Reusable-Diff-Algorithm-in-C-II/ #endregion using System; using System.Collections.Generic; using System.Collections.Specialized; using System.Linq; using System.Text; using System.Collections; namespace OSGeo.MapGuide.MaestroAPI.Resource.Comparison { /// /// Defines a list of difference /// public interface IDiffList { /// /// Gets the number of instances /// /// int Count(); /// /// Gets a diff at the specified index /// /// /// IComparable GetByIndex(int index); } internal enum DiffStatus { Matched = 1, NoMatch = -1, Unknown = -2 } internal class DiffState { private const int BAD_INDEX = -1; private int _startIndex; private int _length; public int StartIndex { get { return _startIndex; } } public int EndIndex { get { return ((_startIndex + _length) - 1); } } public int Length { get { int len; if (_length > 0) { len = _length; } else { if (_length == 0) { len = 1; } else { len = 0; } } return len; } } public DiffStatus Status { get { DiffStatus stat; if (_length > 0) { stat = DiffStatus.Matched; } else { switch (_length) { case -1: stat = DiffStatus.NoMatch; break; default: System.Diagnostics.Debug.Assert(_length == -2, "Invalid status: _length < -2"); //NOXLATE stat = DiffStatus.Unknown; break; } } return stat; } } public DiffState() { SetToUnkown(); } protected void SetToUnkown() { _startIndex = BAD_INDEX; _length = (int)DiffStatus.Unknown; } public void SetMatch(int start, int length) { System.Diagnostics.Debug.Assert(length > 0, "Length must be greater than zero"); //NOXLATE System.Diagnostics.Debug.Assert(start >= 0, "Start must be greater than or equal to zero"); //NOXLATE _startIndex = start; _length = length; } public void SetNoMatch() { _startIndex = BAD_INDEX; _length = (int)DiffStatus.NoMatch; } public bool HasValidLength(int newStart, int newEnd, int maxPossibleDestLength) { if (_length > 0) //have unlocked match { if ((maxPossibleDestLength < _length) || ((_startIndex < newStart) || (EndIndex > newEnd))) { SetToUnkown(); } } return (_length != (int)DiffStatus.Unknown); } } internal class DiffStateList { #if USE_HASH_TABLE private Hashtable _table; #else private DiffState[] _array; #endif public DiffStateList(int destCount) { #if USE_HASH_TABLE _table = new Hashtable(Math.Max(9,destCount/10)); #else _array = new DiffState[destCount]; #endif } public DiffState GetByIndex(int index) { #if USE_HASH_TABLE DiffState retval = (DiffState)_table[index]; if (retval == null) { retval = new DiffState(); _table.Add(index,retval); } #else DiffState retval = _array[index]; if (retval == null) { retval = new DiffState(); _array[index] = retval; } #endif return retval; } } /// /// Defines the status of a diff result span /// public enum DiffResultSpanStatus { /// /// /// NoChange, /// /// /// Replace, /// /// /// DeleteSource, /// /// /// AddDestination } /// /// Defines a diff result span /// public class DiffResultSpan : IComparable { private const int BAD_INDEX = -1; private int _destIndex; private int _sourceIndex; private int _length; private DiffResultSpanStatus _status; /// /// The destination index /// public int DestIndex { get { return _destIndex; } } /// /// The source index /// public int SourceIndex { get { return _sourceIndex; } } /// /// Gets the length of this span /// public int Length { get { return _length; } } /// /// Gets the status of this span /// public DiffResultSpanStatus Status { get { return _status; } } /// /// Initializes this instance /// /// /// /// /// protected DiffResultSpan( DiffResultSpanStatus status, int destIndex, int sourceIndex, int length) { _status = status; _destIndex = destIndex; _sourceIndex = sourceIndex; _length = length; } internal static DiffResultSpan CreateNoChange(int destIndex, int sourceIndex, int length) { return new DiffResultSpan(DiffResultSpanStatus.NoChange, destIndex, sourceIndex, length); } internal static DiffResultSpan CreateReplace(int destIndex, int sourceIndex, int length) { return new DiffResultSpan(DiffResultSpanStatus.Replace, destIndex, sourceIndex, length); } internal static DiffResultSpan CreateDeleteSource(int sourceIndex, int length) { return new DiffResultSpan(DiffResultSpanStatus.DeleteSource, BAD_INDEX, sourceIndex, length); } internal static DiffResultSpan CreateAddDestination(int destIndex, int length) { return new DiffResultSpan(DiffResultSpanStatus.AddDestination, destIndex, BAD_INDEX, length); } internal void AddLength(int i) { _length += i; } /// /// Returns a string that represents the current object /// /// public override string ToString() { return string.Format("{0} (Dest: {1},Source: {2}) {3}", //NOXLATE _status.ToString(), _destIndex.ToString(), _sourceIndex.ToString(), _length.ToString()); } #region IComparable Members /// /// Compares this instance against the specified object /// /// /// public int CompareTo(object obj) { return _destIndex.CompareTo(((DiffResultSpan)obj)._destIndex); } #endregion } /// /// Controls the level of perfection required when computing the differences /// public enum DiffEngineLevel { /// /// Fast, but imperfect /// FastImperfect, /// /// A balanced trade between speed and perfection /// Medium, /// /// Slow, but perfect /// SlowPerfect } /// /// Computes the differences between two sources /// public class DiffEngine { private IDiffList _source; private IDiffList _dest; private List _matchList; private DiffEngineLevel _level; private DiffStateList _stateList; /// /// Initializes a new instance /// public DiffEngine() { _source = null; _dest = null; _matchList = null; _stateList = null; _level = DiffEngineLevel.FastImperfect; } private int GetSourceMatchLength(int destIndex, int sourceIndex, int maxLength) { int matchCount; for (matchCount = 0; matchCount < maxLength; matchCount++) { if (_dest.GetByIndex(destIndex + matchCount).CompareTo(_source.GetByIndex(sourceIndex + matchCount)) != 0) { break; } } return matchCount; } private void GetLongestSourceMatch(DiffState curItem, int destIndex, int destEnd, int sourceStart, int sourceEnd) { int maxDestLength = (destEnd - destIndex) + 1; int curLength = 0; int curBestLength = 0; int curBestIndex = -1; int maxLength = 0; for (int sourceIndex = sourceStart; sourceIndex <= sourceEnd; sourceIndex++) { maxLength = Math.Min(maxDestLength, (sourceEnd - sourceIndex) + 1); if (maxLength <= curBestLength) { //No chance to find a longer one any more break; } curLength = GetSourceMatchLength(destIndex, sourceIndex, maxLength); if (curLength > curBestLength) { //This is the best match so far curBestIndex = sourceIndex; curBestLength = curLength; } //jump over the match sourceIndex += curBestLength; } //DiffState cur = _stateList.GetByIndex(destIndex); if (curBestIndex == -1) { curItem.SetNoMatch(); } else { curItem.SetMatch(curBestIndex, curBestLength); } } private void ProcessRange(int destStart, int destEnd, int sourceStart, int sourceEnd) { int curBestIndex = -1; int curBestLength = -1; int maxPossibleDestLength = 0; DiffState curItem = null; DiffState bestItem = null; for (int destIndex = destStart; destIndex <= destEnd; destIndex++) { maxPossibleDestLength = (destEnd - destIndex) + 1; if (maxPossibleDestLength <= curBestLength) { //we won't find a longer one even if we looked break; } curItem = _stateList.GetByIndex(destIndex); if (!curItem.HasValidLength(sourceStart, sourceEnd, maxPossibleDestLength)) { //recalc new best length since it isn't valid or has never been done. GetLongestSourceMatch(curItem, destIndex, destEnd, sourceStart, sourceEnd); } if (curItem.Status == DiffStatus.Matched) { switch (_level) { case DiffEngineLevel.FastImperfect: if (curItem.Length > curBestLength) { //this is longest match so far curBestIndex = destIndex; curBestLength = curItem.Length; bestItem = curItem; } //Jump over the match destIndex += curItem.Length - 1; break; case DiffEngineLevel.Medium: if (curItem.Length > curBestLength) { //this is longest match so far curBestIndex = destIndex; curBestLength = curItem.Length; bestItem = curItem; //Jump over the match destIndex += curItem.Length - 1; } break; default: if (curItem.Length > curBestLength) { //this is longest match so far curBestIndex = destIndex; curBestLength = curItem.Length; bestItem = curItem; } break; } } } if (curBestIndex < 0) { //we are done - there are no matches in this span } else { int sourceIndex = bestItem.StartIndex; _matchList.Add(DiffResultSpan.CreateNoChange(curBestIndex, sourceIndex, curBestLength)); if (destStart < curBestIndex) { //Still have more lower destination data if (sourceStart < sourceIndex) { //Still have more lower source data // Recursive call to process lower indexes ProcessRange(destStart, curBestIndex - 1, sourceStart, sourceIndex - 1); } } int upperDestStart = curBestIndex + curBestLength; int upperSourceStart = sourceIndex + curBestLength; if (destEnd > upperDestStart) { //we still have more upper dest data if (sourceEnd > upperSourceStart) { //set still have more upper source data // Recursive call to process upper indexes ProcessRange(upperDestStart, destEnd, upperSourceStart, sourceEnd); } } } } /// /// Performs the difference computation using the specified level of control /// /// /// /// /// The total execution time in seconds public double ProcessDiff(IDiffList source, IDiffList destination, DiffEngineLevel level) { _level = level; return ProcessDiff(source, destination); } /// /// Performs the difference computation /// /// /// /// The total execution time in seconds public double ProcessDiff(IDiffList source, IDiffList destination) { DateTime dt = DateTime.Now; _source = source; _dest = destination; _matchList = new List(); int dcount = _dest.Count(); int scount = _source.Count(); if ((dcount > 0) && (scount > 0)) { _stateList = new DiffStateList(dcount); ProcessRange(0, dcount - 1, 0, scount - 1); } TimeSpan ts = DateTime.Now - dt; return ts.TotalSeconds; } private bool AddChanges( List report, int curDest, int nextDest, int curSource, int nextSource) { bool retval = false; int diffDest = nextDest - curDest; int diffSource = nextSource - curSource; int minDiff = 0; if (diffDest > 0) { if (diffSource > 0) { minDiff = Math.Min(diffDest, diffSource); report.Add(DiffResultSpan.CreateReplace(curDest, curSource, minDiff)); if (diffDest > diffSource) { curDest += minDiff; report.Add(DiffResultSpan.CreateAddDestination(curDest, diffDest - diffSource)); } else { if (diffSource > diffDest) { curSource += minDiff; report.Add(DiffResultSpan.CreateDeleteSource(curSource, diffSource - diffDest)); } } } else { report.Add(DiffResultSpan.CreateAddDestination(curDest, diffDest)); } retval = true; } else { if (diffSource > 0) { report.Add(DiffResultSpan.CreateDeleteSource(curSource, diffSource)); retval = true; } } return retval; } /// /// Returns the result of the difference computation /// /// public List DiffReport() { var retval = new List(); int dcount = _dest.Count(); int scount = _source.Count(); //Deal with the special case of empty files if (dcount == 0) { if (scount > 0) { retval.Add(DiffResultSpan.CreateDeleteSource(0, scount)); } return retval; } else { if (scount == 0) { retval.Add(DiffResultSpan.CreateAddDestination(0, dcount)); return retval; } } _matchList.Sort(); int curDest = 0; int curSource = 0; DiffResultSpan last = null; //Process each match record foreach (DiffResultSpan drs in _matchList) { if ((!AddChanges(retval, curDest, drs.DestIndex, curSource, drs.SourceIndex)) && (last != null)) { last.AddLength(drs.Length); } else { retval.Add(drs); } curDest = drs.DestIndex + drs.Length; curSource = drs.SourceIndex + drs.Length; last = drs; } //Process any tail end data AddChanges(retval, curDest, dcount, curSource, scount); return retval; } } }