You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

329 lines
12 KiB
Plaintext

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/**
* 文本差异化比对工具,用于比对两段文本之间所存在的差异;
* 对HTML文本的比对中会完全忽略标签内的差异将<...>
* 中的内容当做一个整体的单词;
* 注1现有版本还未对执行效率进行优化
* 注2原始算法来自于网络:http://www.rohland.co.za/index.php/2009/10/31/csharp-html-diff-algorithm/
* 注3效率真低啊..........╮(╯▽╰)╭
* @author zhoujq | http://weibo.com/buxiaodewa
*/
(function () {
var root = self;
/**
* 匹配描述块,一个用来表示相同内容块在新旧文档内位置的描述对象
* @param {Number} startInOld [相同部分在旧文档中的起始位置]
* @param {Number} startInNew [相同部分在新文档中的起始位置]
* @param {Number} size [相同部分的连续词元个数]
*/
var Match = function (startInOld, startInNew, size) {
this.size = size;
this.startInOld = startInOld;
this.startInNew = startInNew;
this.endInOld = this.startInOld + this.size;
this.endInNew = this.startInNew + this.size;
};
/**
* 操作单元,一个操作单元描述了在具体位置所要
* 执行的具体操作(如insert、delete、equal与replace)
* @param {Function} action
* @param {Number} startInOld [相同部分在旧文档中的起始位置]
* @param {Number} endInOld [相同部分在旧文档中的结束位置]
* @param {Number} startInNew [相同部分在新文档中的起始位置]
* @param {Number} endInNew [相同部分在新文档中的结束位置]
*/
var Operation = function (action, startInOld, endInOld, startInNew, endInNew) {
this.action = action;
this.endInOld = endInOld;
this.endInNew = endInNew;
this.startInOld = startInOld;
this.startInNew = startInNew;
};
/**
* 将HTML解析成单词列表
* 例:<a> Hello World </a> ---> ["<a>"," ", "Hello", " ", "World", " ", "</a>"]
* @param {String} html
* @return {Array<String>}
*/
var convertHtml2Words = function (html) {
return html.match(/<[^>]+>|[^<|>|\w]|\w+\b|\s+/mg);
};
/**
* 差异对比构建类
* @param {String} oldVersion 老文档
* @param {String} newVersion 新文档
*/
var DiffBuilder = function (oldVersion, newVersion) {
this.oldVersion = oldVersion;
this.newVersion = newVersion;
this.operation = null;
this.wordIndices = {};
this.oldWords = [];
this.newWords = [];
this.content = [];
};
DiffBuilder.prototype = {
build: function () {
var sd = new Date();
this.splitInputs2Words();
this.indexNewWords();
this.operations = this.getOperations();
this.performOperation();
return this.content.join('');
},
performOperation: function () {
var opt;
for (var i = 0, len = this.operations.length; i < len; i++) {
opt = this.operations[i];
this.operation = opt;
opt.action.call(this, opt);
}
},
/**
* 将传入的文本切割成词元
*/
splitInputs2Words: function () {
if (this.oldVersion == "")
this.oldWords = [];
else
this.oldWords = convertHtml2Words(this.oldVersion);
if (this.newVersion == "")
this.newWords = [];
else
this.newWords = convertHtml2Words(this.newVersion);
},
/**
* 构建一个newWords队列对应的索引表
* 例如:
* this.newWords = ["<a>", " ", "Hello", " ", "World", " ", "</a>"];
*
* 构建成--->
*
* this.wordIndices = {
* "<a>" : [0],
* " " : [1, 3, 5]
* "Hello" : [2],
* "World" : [4],
* "</a>" : [6]
* };
*/
indexNewWords: function () {
var newWords = this.newWords;
var wordIndices = this.wordIndices;
var key, len = newWords.length;
var toString = Object.prototype.toString;
for (var i = 0; i < len; i++) {
key = newWords[i];
if (toString.call(wordIndices[key]) !== '[object Array]') {
wordIndices[key] = [];
}
wordIndices[key].push(i);
}
},
/**
* 将文档抽象为操作描述队列
* 例如:[euqal, insert, equal],一篇文档可以被抽象为由多个
* “操作命令与位置信息”组成的操作队列;
* @return {Array<Operation>}
*/
getOperations: function () {
var optInOld = 0;
var optInNew = 0;
var operations = [];
var match, action;
var matchStartsInOld;
var matchStartsInNew;
var matchs = this.matchingBlocks();
matchs.push(new Match(this.oldWords.length, this.newWords.length, 0));
for (var i = 0, len = matchs.length; i < len; i++) {
action = null;
match = matchs[i];
matchStartsInOld = (optInOld === match.startInOld);
matchStartsInNew = (optInNew === match.startInNew);
if (!matchStartsInOld && !matchStartsInNew) {
action = this.replace;
} else if (matchStartsInOld && !matchStartsInNew) {
action = this.insert;
} else if (!matchStartsInOld && matchStartsInNew) {
action = this.del;
}
if (action) {
operations.push(new Operation(action, optInOld, match.startInOld, optInNew, match.startInNew));
}
if (match.size) {
operations.push(new Operation(this.equal, match.startInOld, match.endInOld, match.startInNew, match.endInNew));
}
optInOld = match.endInOld;
optInNew = match.endInNew;
}
return operations;
},
/**
* 获取一个用于描述新旧文档内全部相同内容的匹配描述列表
* @return {Array<Match>} 文档匹配描述列表
*/
matchingBlocks: function () {
var matchingBlocks = [];
this.findMatchingBlocks(0, this.oldWords.length, 0, this.newWords.length, matchingBlocks);
return matchingBlocks;
},
/**
* 递归查找匹配项
* @param {Number} startInOld
* @param {Number} endInOld
* @param {Number} startInNew
* @param {Number} endInNew
* @param {Array<Match>} matchingBlocks
*/
findMatchingBlocks: function (startInOld, endInOld, startInNew, endInNew, matchingBlocks) {
var match = this.findMatch(startInOld, endInOld, startInNew, endInNew);
if (!match) {
return;
}
if (startInOld < match.startInOld && startInNew < match.startInNew) {
this.findMatchingBlocks(startInOld, match.startInOld, startInNew, match.startInNew, matchingBlocks);
}
matchingBlocks.push(match);
if (match.endInOld < endInOld && match.endInNew < endInNew) {
this.findMatchingBlocks(match.endInOld, endInOld, match.endInNew, endInNew, matchingBlocks);
}
},
/**
* 从指定位置开始查询第一块匹配的文本块
* @param {Number} startInOld
* @param {Number} endInOld
* @param {Number} startInNew
* @param {Number} endInNew
*/
findMatch: function (startInOld, endInOld, startInNew, endInNew) {
var bestMatchInOld = startInOld;
var bestMatchInNew = startInNew;
var bestMatchSize = 0;
var matchLengthAt = {};
var newMatchLengthAt;
var newMatchLength, wordIndexList, matchIndex, len;
for (var idxOld = startInOld; idxOld < endInOld; idxOld++) {
newMatchLengthAt = {};
wordIndexList = this.wordIndices[this.oldWords[idxOld]];
len = wordIndexList ? wordIndexList.length : 0;
for (var i = 0; i < len; i++) {
matchIndex = wordIndexList[i];
if (matchIndex < startInNew) {
continue;
}
if (matchIndex >= endInNew) {
break;
}
newMatchLength = (matchLengthAt[matchIndex - 1] || 0) + 1;
newMatchLengthAt[matchIndex] = newMatchLength;
if (newMatchLength > bestMatchSize) {
bestMatchInOld = idxOld - newMatchLength + 1;
bestMatchInNew = matchIndex - newMatchLength + 1;
bestMatchSize = newMatchLength;
}
}
matchLengthAt = newMatchLengthAt;
}
return bestMatchSize ? new Match(bestMatchInOld, bestMatchInNew, bestMatchSize) : null;
},
insert: function (opt, tagCls) {
//this.insertTag('ins', tagCls || 'diffins', this.newWords.slice(opt.startInNew, opt.endInNew));
},
del: function (opt, tagCls) {
this.insertTag('span', tagCls || 'diff', this.oldWords.slice(opt.startInOld, opt.endInOld));
},
equal: function (opt) {
this.content = this.content.concat(this.newWords.slice(opt.startInNew, opt.endInNew));
},
replace: function (opt) {
this.del(opt, 'diffmod');
this.insert(opt, 'diffmod');
},
/**
* 添加标签
* @param {String} tagName
* @param {String} cssCls
* @param {String} words
*/
insertTag: function (tagName, cssCls, words) {
var nonTags;
var that = this;
while (words.length) {
//获取words内“连续”的非标签字符
nonTags = this.extractConsecutiveWords(words, true);
if (nonTags.length) {
this.content.push(
this.warpText(nonTags.join(''), tagName, cssCls));
}
if (words.length) {
//获取words内“连续”的标签字符
this.content = this.content.concat(
this.extractConsecutiveWords(words, false));
}
}
},
/**
* 获取words内连续的“文本”或“标签”
* @param {Array<String>} words
* @param {Boolean} isTag
* @return {Array<String>}
*/
extractConsecutiveWords: function (words, isTag) {
var idxOfFirstTag = null;
for (var i = 0, len = words.length; i < len; i++) {
//注:是取判定条件的前一位
if (this.isTag(words[i]) == isTag) {
idxOfFirstTag = i;
break;
}
}
return words.splice(0, idxOfFirstTag !== null ? idxOfFirstTag : words.length);
},
warpText: function (text, tagName, cssCls) {
return '<' + tagName + ' class="' + cssCls + '">' + text + '</' + tagName + '>';
},
isOpeningTag: function (item) {
return /^\s*<[^>]+>\s*$/ig.test(item);
},
isClosingTag: function (item) {
return /^\s*<\/[^>]+>\s*$/ig.test(item);
},
isTag: function (item) {
return this.isOpeningTag(item) ? 1 : this.isClosingTag(item) ? 2 : 0;
}
};
root.getHTMLDiff = function (oldVersion, newVersion) {
return new DiffBuilder(oldVersion, newVersion).build();
};
/**
* 针对高级浏览器开启webWorker支持
*/
if (typeof postMessage === "function") {
onmessage = function (evt) {
var data = evt.data;
var diff = getHTMLDiff(data.oldVersion, data.newVersion);
postMessage(diff);
};
}
})();