491 lines
11 KiB
PHP
491 lines
11 KiB
PHP
<?php
|
|
|
|
namespace Caxy\HtmlDiff;
|
|
|
|
/**
|
|
* Class AbstractDiff
|
|
* @package Caxy\HtmlDiff
|
|
*/
|
|
abstract class AbstractDiff
|
|
{
|
|
/**
|
|
* @var array
|
|
*
|
|
* @deprecated since 0.1.0
|
|
*/
|
|
public static $defaultSpecialCaseTags = array('strong', 'b', 'i', 'big', 'small', 'u', 'sub', 'sup', 'strike', 's', 'p');
|
|
/**
|
|
* @var array
|
|
*
|
|
* @deprecated since 0.1.0
|
|
*/
|
|
public static $defaultSpecialCaseChars = array('.', ',', '(', ')', '\'');
|
|
/**
|
|
* @var bool
|
|
*
|
|
* @deprecated since 0.1.0
|
|
*/
|
|
public static $defaultGroupDiffs = true;
|
|
|
|
/**
|
|
* @var HtmlDiffConfig
|
|
*/
|
|
protected $config;
|
|
|
|
/**
|
|
* @var string
|
|
*/
|
|
protected $content;
|
|
/**
|
|
* @var string
|
|
*/
|
|
protected $oldText;
|
|
/**
|
|
* @var string
|
|
*/
|
|
protected $newText;
|
|
/**
|
|
* @var array
|
|
*/
|
|
protected $oldWords = array();
|
|
/**
|
|
* @var array
|
|
*/
|
|
protected $newWords = array();
|
|
|
|
/**
|
|
* @var DiffCache[]
|
|
*/
|
|
private $diffCaches = array();
|
|
|
|
/**
|
|
* AbstractDiff constructor.
|
|
*
|
|
* @param string $oldText
|
|
* @param string $newText
|
|
* @param string $encoding
|
|
* @param null|array $specialCaseTags
|
|
* @param null|bool $groupDiffs
|
|
*/
|
|
public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
|
|
{
|
|
mb_substitute_character(0x20);
|
|
|
|
$this->config = HtmlDiffConfig::create()->setEncoding($encoding);
|
|
|
|
if ($specialCaseTags !== null) {
|
|
$this->config->setSpecialCaseTags($specialCaseTags);
|
|
}
|
|
|
|
if ($groupDiffs !== null) {
|
|
$this->config->setGroupDiffs($groupDiffs);
|
|
}
|
|
|
|
$this->oldText = $this->purifyHtml(trim($oldText));
|
|
$this->newText = $this->purifyHtml(trim($newText));
|
|
$this->content = '';
|
|
}
|
|
|
|
/**
|
|
* @return bool|string
|
|
*/
|
|
abstract public function build();
|
|
|
|
/**
|
|
* @return DiffCache|null
|
|
*/
|
|
protected function getDiffCache()
|
|
{
|
|
if (!$this->hasDiffCache()) {
|
|
return null;
|
|
}
|
|
|
|
$hash = spl_object_hash($this->getConfig()->getCacheProvider());
|
|
|
|
if (!array_key_exists($hash, $this->diffCaches)) {
|
|
$this->diffCaches[$hash] = new DiffCache($this->getConfig()->getCacheProvider());
|
|
}
|
|
|
|
return $this->diffCaches[$hash];
|
|
}
|
|
|
|
/**
|
|
* @return bool
|
|
*/
|
|
protected function hasDiffCache()
|
|
{
|
|
return null !== $this->getConfig()->getCacheProvider();
|
|
}
|
|
|
|
/**
|
|
* @return HtmlDiffConfig
|
|
*/
|
|
public function getConfig()
|
|
{
|
|
return $this->config;
|
|
}
|
|
|
|
/**
|
|
* @param HtmlDiffConfig $config
|
|
*
|
|
* @return AbstractDiff
|
|
*/
|
|
public function setConfig(HtmlDiffConfig $config)
|
|
{
|
|
$this->config = $config;
|
|
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* @return int
|
|
*
|
|
* @deprecated since 0.1.0
|
|
*/
|
|
public function getMatchThreshold()
|
|
{
|
|
return $this->config->getMatchThreshold();
|
|
}
|
|
|
|
/**
|
|
* @param int $matchThreshold
|
|
*
|
|
* @return AbstractDiff
|
|
*
|
|
* @deprecated since 0.1.0
|
|
*/
|
|
public function setMatchThreshold($matchThreshold)
|
|
{
|
|
$this->config->setMatchThreshold($matchThreshold);
|
|
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* @param array $chars
|
|
*
|
|
* @deprecated since 0.1.0
|
|
*/
|
|
public function setSpecialCaseChars(array $chars)
|
|
{
|
|
$this->config->setSpecialCaseChars($chars);
|
|
}
|
|
|
|
/**
|
|
* @return array|null
|
|
*
|
|
* @deprecated since 0.1.0
|
|
*/
|
|
public function getSpecialCaseChars()
|
|
{
|
|
return $this->config->getSpecialCaseChars();
|
|
}
|
|
|
|
/**
|
|
* @param string $char
|
|
*
|
|
* @deprecated since 0.1.0
|
|
*/
|
|
public function addSpecialCaseChar($char)
|
|
{
|
|
$this->config->addSpecialCaseChar($char);
|
|
}
|
|
|
|
/**
|
|
* @param string $char
|
|
*
|
|
* @deprecated since 0.1.0
|
|
*/
|
|
public function removeSpecialCaseChar($char)
|
|
{
|
|
$this->config->removeSpecialCaseChar($char);
|
|
}
|
|
|
|
/**
|
|
* @param array $tags
|
|
*
|
|
* @deprecated since 0.1.0
|
|
*/
|
|
public function setSpecialCaseTags(array $tags = array())
|
|
{
|
|
$this->config->setSpecialCaseChars($tags);
|
|
}
|
|
|
|
/**
|
|
* @param string $tag
|
|
*
|
|
* @deprecated since 0.1.0
|
|
*/
|
|
public function addSpecialCaseTag($tag)
|
|
{
|
|
$this->config->addSpecialCaseTag($tag);
|
|
}
|
|
|
|
/**
|
|
* @param string $tag
|
|
*
|
|
* @deprecated since 0.1.0
|
|
*/
|
|
public function removeSpecialCaseTag($tag)
|
|
{
|
|
$this->config->removeSpecialCaseTag($tag);
|
|
}
|
|
|
|
/**
|
|
* @return array|null
|
|
*
|
|
* @deprecated since 0.1.0
|
|
*/
|
|
public function getSpecialCaseTags()
|
|
{
|
|
return $this->config->getSpecialCaseTags();
|
|
}
|
|
|
|
/**
|
|
* @return string
|
|
*/
|
|
public function getOldHtml()
|
|
{
|
|
return $this->oldText;
|
|
}
|
|
|
|
/**
|
|
* @return string
|
|
*/
|
|
public function getNewHtml()
|
|
{
|
|
return $this->newText;
|
|
}
|
|
|
|
/**
|
|
* @return string
|
|
*/
|
|
public function getDifference()
|
|
{
|
|
return $this->content;
|
|
}
|
|
|
|
/**
|
|
* @param bool $boolean
|
|
*
|
|
* @return $this
|
|
*
|
|
* @deprecated since 0.1.0
|
|
*/
|
|
public function setGroupDiffs($boolean)
|
|
{
|
|
$this->config->setGroupDiffs($boolean);
|
|
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* @return bool
|
|
*
|
|
* @deprecated since 0.1.0
|
|
*/
|
|
public function isGroupDiffs()
|
|
{
|
|
return $this->config->isGroupDiffs();
|
|
}
|
|
|
|
/**
|
|
* @param string $tag
|
|
*
|
|
* @return string
|
|
*/
|
|
protected function getOpeningTag($tag)
|
|
{
|
|
return "/<".$tag."[^>]*/i";
|
|
}
|
|
|
|
/**
|
|
* @param string $tag
|
|
*
|
|
* @return string
|
|
*/
|
|
protected function getClosingTag($tag)
|
|
{
|
|
return "</".$tag.">";
|
|
}
|
|
|
|
/**
|
|
* @param string $str
|
|
* @param string $start
|
|
* @param string $end
|
|
*
|
|
* @return string
|
|
*/
|
|
protected function getStringBetween($str, $start, $end)
|
|
{
|
|
$expStr = explode( $start, $str, 2 );
|
|
if ( count( $expStr ) > 1 ) {
|
|
$expStr = explode( $end, $expStr[ 1 ] );
|
|
if ( count( $expStr ) > 1 ) {
|
|
array_pop( $expStr );
|
|
|
|
return implode( $end, $expStr );
|
|
}
|
|
}
|
|
|
|
return '';
|
|
}
|
|
|
|
/**
|
|
* @param string $html
|
|
*
|
|
* @return string
|
|
*/
|
|
protected function purifyHtml($html)
|
|
{
|
|
if ( class_exists( 'Tidy' ) && false ) {
|
|
$config = array( 'output-xhtml' => true, 'indent' => false );
|
|
$tidy = new tidy();
|
|
$tidy->parseString( $html, $config, 'utf8' );
|
|
$html = (string) $tidy;
|
|
|
|
return $this->getStringBetween( $html, '<body>' );
|
|
}
|
|
|
|
return $html;
|
|
}
|
|
|
|
protected function splitInputsToWords()
|
|
{
|
|
$this->oldWords = $this->convertHtmlToListOfWords( $this->explode( $this->oldText ) );
|
|
$this->newWords = $this->convertHtmlToListOfWords( $this->explode( $this->newText ) );
|
|
}
|
|
|
|
/**
|
|
* @param string $text
|
|
*
|
|
* @return bool
|
|
*/
|
|
protected function isPartOfWord($text)
|
|
{
|
|
return ctype_alnum(str_replace($this->config->getSpecialCaseChars(), '', $text));
|
|
}
|
|
|
|
/**
|
|
* @param array $characterString
|
|
*
|
|
* @return array
|
|
*/
|
|
protected function convertHtmlToListOfWords($characterString)
|
|
{
|
|
$mode = 'character';
|
|
$current_word = '';
|
|
$words = array();
|
|
foreach ($characterString as $i => $character) {
|
|
switch ($mode) {
|
|
case 'character':
|
|
if ( $this->isStartOfTag( $character ) ) {
|
|
if ($current_word != '') {
|
|
$words[] = $current_word;
|
|
}
|
|
$current_word = "<";
|
|
$mode = 'tag';
|
|
} elseif (preg_match("/\s/", $character)) {
|
|
if ($current_word !== '') {
|
|
$words[] = $current_word;
|
|
}
|
|
$current_word = preg_replace('/\s+/S', ' ', $character);
|
|
$mode = 'whitespace';
|
|
} else {
|
|
if (
|
|
(ctype_alnum($character) && (strlen($current_word) == 0 || $this->isPartOfWord($current_word))) ||
|
|
(in_array($character, $this->config->getSpecialCaseChars()) && isset($characterString[$i+1]) && $this->isPartOfWord($characterString[$i+1]))
|
|
) {
|
|
$current_word .= $character;
|
|
} else {
|
|
$words[] = $current_word;
|
|
$current_word = $character;
|
|
}
|
|
}
|
|
break;
|
|
case 'tag' :
|
|
if ( $this->isEndOfTag( $character ) ) {
|
|
$current_word .= ">";
|
|
$words[] = $current_word;
|
|
$current_word = "";
|
|
|
|
if ( !preg_match('[^\s]', $character ) ) {
|
|
$mode = 'whitespace';
|
|
} else {
|
|
$mode = 'character';
|
|
}
|
|
} else {
|
|
$current_word .= $character;
|
|
}
|
|
break;
|
|
case 'whitespace':
|
|
if ( $this->isStartOfTag( $character ) ) {
|
|
if ($current_word !== '') {
|
|
$words[] = $current_word;
|
|
}
|
|
$current_word = "<";
|
|
$mode = 'tag';
|
|
} elseif ( preg_match( "/\s/", $character ) ) {
|
|
$current_word .= $character;
|
|
$current_word = preg_replace('/\s+/S', ' ', $current_word);
|
|
} else {
|
|
if ($current_word != '') {
|
|
$words[] = $current_word;
|
|
}
|
|
$current_word = $character;
|
|
$mode = 'character';
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
if ($current_word != '') {
|
|
$words[] = $current_word;
|
|
}
|
|
|
|
return $words;
|
|
}
|
|
|
|
/**
|
|
* @param string $val
|
|
*
|
|
* @return bool
|
|
*/
|
|
protected function isStartOfTag($val)
|
|
{
|
|
return $val == "<";
|
|
}
|
|
|
|
/**
|
|
* @param string $val
|
|
*
|
|
* @return bool
|
|
*/
|
|
protected function isEndOfTag($val)
|
|
{
|
|
return $val == ">";
|
|
}
|
|
|
|
/**
|
|
* @param string $value
|
|
*
|
|
* @return bool
|
|
*/
|
|
protected function isWhiteSpace($value)
|
|
{
|
|
return !preg_match( '[^\s]', $value );
|
|
}
|
|
|
|
/**
|
|
* @param string $value
|
|
*
|
|
* @return array
|
|
*/
|
|
protected function explode($value)
|
|
{
|
|
// as suggested by @onassar
|
|
return preg_split( '//u', $value );
|
|
}
|
|
}
|