wp-visitor-contributions/vendor/caxy/php-htmldiff/lib/Caxy/HtmlDiff/HtmlDiff.php

796 lines
25 KiB
PHP

<?php
namespace Caxy\HtmlDiff;
use Caxy\HtmlDiff\Table\TableDiff;
/**
* Class HtmlDiff
* @package Caxy\HtmlDiff
*/
class HtmlDiff extends AbstractDiff
{
/**
* @var array
*/
protected $wordIndices;
/**
* @var array
*/
protected $oldTables;
/**
* @var array
*/
protected $newTables;
/**
* @var array
*/
protected $newIsolatedDiffTags;
/**
* @var array
*/
protected $oldIsolatedDiffTags;
/**
* @param string $oldText
* @param string $newText
* @param HtmlDiffConfig|null $config
*
* @return self
*/
public static function create($oldText, $newText, HtmlDiffConfig $config = null)
{
$diff = new self($oldText, $newText);
if (null !== $config) {
$diff->setConfig($config);
}
return $diff;
}
/**
* @param $bool
*
* @return $this
*
* @deprecated since 0.1.0
*/
public function setUseTableDiffing($bool)
{
$this->config->setUseTableDiffing($bool);
return $this;
}
/**
* @param boolean $boolean
* @return HtmlDiff
*
* @deprecated since 0.1.0
*/
public function setInsertSpaceInReplace($boolean)
{
$this->config->setInsertSpaceInReplace($boolean);
return $this;
}
/**
* @return boolean
*
* @deprecated since 0.1.0
*/
public function getInsertSpaceInReplace()
{
return $this->config->isInsertSpaceInReplace();
}
/**
* @return string
*/
public function build()
{
if ($this->hasDiffCache() && $this->getDiffCache()->contains($this->oldText, $this->newText)) {
$this->content = $this->getDiffCache()->fetch($this->oldText, $this->newText);
return $this->content;
}
$this->splitInputsToWords();
$this->replaceIsolatedDiffTags();
$this->indexNewWords();
$operations = $this->operations();
foreach ($operations as $item) {
$this->performOperation( $item );
}
if ($this->hasDiffCache()) {
$this->getDiffCache()->save($this->oldText, $this->newText, $this->content);
}
return $this->content;
}
protected function indexNewWords()
{
$this->wordIndices = array();
foreach ($this->newWords as $i => $word) {
if ( $this->isTag( $word ) ) {
$word = $this->stripTagAttributes( $word );
}
if ( isset( $this->wordIndices[ $word ] ) ) {
$this->wordIndices[ $word ][] = $i;
} else {
$this->wordIndices[ $word ] = array( $i );
}
}
}
protected function replaceIsolatedDiffTags()
{
$this->oldIsolatedDiffTags = $this->createIsolatedDiffTagPlaceholders($this->oldWords);
$this->newIsolatedDiffTags = $this->createIsolatedDiffTagPlaceholders($this->newWords);
}
/**
* @param array $words
*
* @return array
*/
protected function createIsolatedDiffTagPlaceholders(&$words)
{
$openIsolatedDiffTags = 0;
$isolatedDiffTagIndicies = array();
$isolatedDiffTagStart = 0;
$currentIsolatedDiffTag = null;
foreach ($words as $index => $word) {
$openIsolatedDiffTag = $this->isOpeningIsolatedDiffTag($word, $currentIsolatedDiffTag);
if ($openIsolatedDiffTag) {
if ($openIsolatedDiffTags === 0) {
$isolatedDiffTagStart = $index;
}
$openIsolatedDiffTags++;
$currentIsolatedDiffTag = $openIsolatedDiffTag;
} elseif ($openIsolatedDiffTags > 0 && $this->isClosingIsolatedDiffTag($word, $currentIsolatedDiffTag)) {
$openIsolatedDiffTags--;
if ($openIsolatedDiffTags == 0) {
$isolatedDiffTagIndicies[] = array ('start' => $isolatedDiffTagStart, 'length' => $index - $isolatedDiffTagStart + 1, 'tagType' => $currentIsolatedDiffTag);
$currentIsolatedDiffTag = null;
}
}
}
$isolatedDiffTagScript = array();
$offset = 0;
foreach ($isolatedDiffTagIndicies as $isolatedDiffTagIndex) {
$start = $isolatedDiffTagIndex['start'] - $offset;
$placeholderString = $this->config->getIsolatedDiffTagPlaceholder($isolatedDiffTagIndex['tagType']);
$isolatedDiffTagScript[$start] = array_splice($words, $start, $isolatedDiffTagIndex['length'], $placeholderString);
$offset += $isolatedDiffTagIndex['length'] - 1;
}
return $isolatedDiffTagScript;
}
/**
* @param string $item
* @param null|string $currentIsolatedDiffTag
*
* @return false|string
*/
protected function isOpeningIsolatedDiffTag($item, $currentIsolatedDiffTag = null)
{
$tagsToMatch = $currentIsolatedDiffTag !== null
? array($currentIsolatedDiffTag => $this->config->getIsolatedDiffTagPlaceholder($currentIsolatedDiffTag))
: $this->config->getIsolatedDiffTags();
foreach ($tagsToMatch as $key => $value) {
if (preg_match("#<".$key."[^>]*>\\s*#iU", $item)) {
return $key;
}
}
return false;
}
/**
* @param string $item
* @param null|string $currentIsolatedDiffTag
*
* @return false|string
*/
protected function isClosingIsolatedDiffTag($item, $currentIsolatedDiffTag = null)
{
$tagsToMatch = $currentIsolatedDiffTag !== null
? array($currentIsolatedDiffTag => $this->config->getIsolatedDiffTagPlaceholder($currentIsolatedDiffTag))
: $this->config->getIsolatedDiffTags();
foreach ($tagsToMatch as $key => $value) {
if (preg_match("#</".$key."[^>]*>\\s*#iU", $item)) {
return $key;
}
}
return false;
}
/**
* @param Operation $operation
*/
protected function performOperation($operation)
{
switch ($operation->action) {
case 'equal' :
$this->processEqualOperation( $operation );
break;
case 'delete' :
$this->processDeleteOperation( $operation, "diffdel" );
break;
case 'insert' :
$this->processInsertOperation( $operation, "diffins");
break;
case 'replace':
$this->processReplaceOperation( $operation );
break;
default:
break;
}
}
/**
* @param Operation $operation
*/
protected function processReplaceOperation($operation)
{
$this->processDeleteOperation( $operation, "diffmod" );
$this->processInsertOperation( $operation, "diffmod" );
}
/**
* @param Operation $operation
* @param string $cssClass
*/
protected function processInsertOperation($operation, $cssClass)
{
$text = array();
foreach ($this->newWords as $pos => $s) {
if ($pos >= $operation->startInNew && $pos < $operation->endInNew) {
if ($this->config->isIsolatedDiffTagPlaceholder($s) && isset($this->newIsolatedDiffTags[$pos])) {
foreach ($this->newIsolatedDiffTags[$pos] as $word) {
$text[] = $word;
}
} else {
$text[] = $s;
}
}
}
$this->insertTag( "ins", $cssClass, $text );
}
/**
* @param Operation $operation
* @param string $cssClass
*/
protected function processDeleteOperation($operation, $cssClass)
{
$text = array();
foreach ($this->oldWords as $pos => $s) {
if ($pos >= $operation->startInOld && $pos < $operation->endInOld) {
if ($this->config->isIsolatedDiffTagPlaceholder($s) && isset($this->oldIsolatedDiffTags[$pos])) {
foreach ($this->oldIsolatedDiffTags[$pos] as $word) {
$text[] = $word;
}
} else {
$text[] = $s;
}
}
}
$this->insertTag( "del", $cssClass, $text );
}
/**
* @param Operation $operation
* @param int $pos
* @param string $placeholder
* @param bool $stripWrappingTags
*
* @return string
*/
protected function diffIsolatedPlaceholder($operation, $pos, $placeholder, $stripWrappingTags = true)
{
$oldText = implode("", $this->findIsolatedDiffTagsInOld($operation, $pos));
$newText = implode("", $this->newIsolatedDiffTags[$pos]);
if ($this->isListPlaceholder($placeholder)) {
return $this->diffList($oldText, $newText);
} elseif ($this->config->isUseTableDiffing() && $this->isTablePlaceholder($placeholder)) {
return $this->diffTables($oldText, $newText);
} elseif ($this->isLinkPlaceholder($placeholder)) {
return $this->diffLinks($oldText, $newText);
}
return $this->diffElements($oldText, $newText, $stripWrappingTags);
}
/**
* @param string $oldText
* @param string $newText
* @param bool $stripWrappingTags
*
* @return string
*/
protected function diffElements($oldText, $newText, $stripWrappingTags = true)
{
$wrapStart = '';
$wrapEnd = '';
if ($stripWrappingTags) {
$pattern = '/(^<[^>]+>)|(<\/[^>]+>$)/i';
$matches = array();
if (preg_match_all($pattern, $newText, $matches)) {
$wrapStart = isset($matches[0][0]) ? $matches[0][0] : '';
$wrapEnd = isset($matches[0][1]) ? $matches[0][1] : '';
}
$oldText = preg_replace($pattern, '', $oldText);
$newText = preg_replace($pattern, '', $newText);
}
$diff = HtmlDiff::create($oldText, $newText, $this->config);
return $wrapStart . $diff->build() . $wrapEnd;
}
/**
* @param string $oldText
* @param string $newText
*
* @return string
*/
protected function diffList($oldText, $newText)
{
$diff = ListDiffNew::create($oldText, $newText, $this->config);
return $diff->build();
}
/**
* @param string $oldText
* @param string $newText
*
* @return string
*/
protected function diffTables($oldText, $newText)
{
$diff = TableDiff::create($oldText, $newText, $this->config);
return $diff->build();
}
/**
* @param string $oldText
* @param string $newText
*
* @return string
*/
protected function diffLinks($oldText, $newText)
{
$oldHref = $this->getAttributeFromTag($oldText, 'href');
$newHref = $this->getAttributeFromTag($newText, 'href');
if ($oldHref != $newHref) {
return sprintf(
'%s%s',
$this->wrapText($oldText, 'del', 'diffmod diff-href'),
$this->wrapText($newText, 'ins', 'diffmod diff-href')
);
}
return $this->diffElements($oldText, $newText);
}
/**
* @param Operation $operation
*/
protected function processEqualOperation($operation)
{
$result = array();
foreach ($this->newWords as $pos => $s) {
if ($pos >= $operation->startInNew && $pos < $operation->endInNew) {
if ($this->config->isIsolatedDiffTagPlaceholder($s) && isset($this->newIsolatedDiffTags[$pos])) {
$result[] = $this->diffIsolatedPlaceholder($operation, $pos, $s);
} else {
$result[] = $s;
}
}
}
$this->content .= implode( "", $result );
}
/**
* @param string $text
* @param string $attribute
*
* @return null|string
*/
protected function getAttributeFromTag($text, $attribute)
{
$matches = array();
if (preg_match(sprintf('/<a\s+[^>]*%s=([\'"])(.*)\1[^>]*>/i', $attribute), $text, $matches)) {
return $matches[2];
}
return null;
}
/**
* @param string $text
*
* @return bool
*/
protected function isListPlaceholder($text)
{
return $this->isPlaceholderType($text, array('ol', 'dl', 'ul'));
}
/**
* @param string $text
*
* @return bool
*/
public function isLinkPlaceholder($text)
{
return $this->isPlaceholderType($text, 'a');
}
/**
* @param string $text
* @param array|string $types
* @param bool $strict
*
* @return bool
*/
protected function isPlaceholderType($text, $types, $strict = true)
{
if (!is_array($types)) {
$types = array($types);
}
$criteria = array();
foreach ($types as $type) {
if ($this->config->isIsolatedDiffTag($type)) {
$criteria[] = $this->config->getIsolatedDiffTagPlaceholder($type);
} else {
$criteria[] = $type;
}
}
return in_array($text, $criteria, $strict);
}
/**
* @param string $text
*
* @return bool
*/
protected function isTablePlaceholder($text)
{
return $this->isPlaceholderType($text, 'table');
}
/**
* @param Operation $operation
* @param int $posInNew
*
* @return array
*/
protected function findIsolatedDiffTagsInOld($operation, $posInNew)
{
$offset = $posInNew - $operation->startInNew;
return $this->oldIsolatedDiffTags[$operation->startInOld + $offset];
}
/**
* @param string $tag
* @param string $cssClass
* @param array $words
*/
protected function insertTag($tag, $cssClass, &$words)
{
while (true) {
if ( count( $words ) == 0 ) {
break;
}
$nonTags = $this->extractConsecutiveWords( $words, 'noTag' );
$specialCaseTagInjection = '';
$specialCaseTagInjectionIsBefore = false;
if ( count( $nonTags ) != 0 ) {
$text = $this->wrapText( implode( "", $nonTags ), $tag, $cssClass );
$this->content .= $text;
} else {
$firstOrDefault = false;
foreach ($this->config->getSpecialCaseOpeningTags() as $x) {
if ( preg_match( $x, $words[ 0 ] ) ) {
$firstOrDefault = $x;
break;
}
}
if ($firstOrDefault) {
$specialCaseTagInjection = '<ins class="mod">';
if ($tag == "del") {
unset( $words[ 0 ] );
}
} elseif ( array_search( $words[ 0 ], $this->config->getSpecialCaseClosingTags()) !== false ) {
$specialCaseTagInjection = "</ins>";
$specialCaseTagInjectionIsBefore = true;
if ($tag == "del") {
unset( $words[ 0 ] );
}
}
}
if ( count( $words ) == 0 && count( $specialCaseTagInjection ) == 0 ) {
break;
}
if ($specialCaseTagInjectionIsBefore) {
$this->content .= $specialCaseTagInjection . implode( "", $this->extractConsecutiveWords( $words, 'tag' ) );
} else {
$workTag = $this->extractConsecutiveWords( $words, 'tag' );
if ( isset( $workTag[ 0 ] ) && $this->isOpeningTag( $workTag[ 0 ] ) && !$this->isClosingTag( $workTag[ 0 ] ) ) {
if ( strpos( $workTag[ 0 ], 'class=' ) ) {
$workTag[ 0 ] = str_replace( 'class="', 'class="diffmod ', $workTag[ 0 ] );
$workTag[ 0 ] = str_replace( "class='", 'class="diffmod ', $workTag[ 0 ] );
} else {
$workTag[ 0 ] = str_replace( ">", ' class="diffmod">', $workTag[ 0 ] );
}
}
$this->content .= implode( "", $workTag ) . $specialCaseTagInjection;
}
}
}
/**
* @param string $word
* @param string $condition
*
* @return bool
*/
protected function checkCondition($word, $condition)
{
return $condition == 'tag' ? $this->isTag( $word ) : !$this->isTag( $word );
}
/**
* @param string $text
* @param string $tagName
* @param string $cssClass
*
* @return string
*/
protected function wrapText($text, $tagName, $cssClass)
{
return sprintf( '<%1$s class="%2$s">%3$s</%1$s>', $tagName, $cssClass, $text );
}
/**
* @param array $words
* @param string $condition
*
* @return array
*/
protected function extractConsecutiveWords(&$words, $condition)
{
$indexOfFirstTag = null;
$words = array_values($words);
foreach ($words as $i => $word) {
if ( !$this->checkCondition( $word, $condition ) ) {
$indexOfFirstTag = $i;
break;
}
}
if ($indexOfFirstTag !== null) {
$items = array();
foreach ($words as $pos => $s) {
if ($pos >= 0 && $pos < $indexOfFirstTag) {
$items[] = $s;
}
}
if ($indexOfFirstTag > 0) {
array_splice( $words, 0, $indexOfFirstTag );
}
return $items;
} else {
$items = array();
foreach ($words as $pos => $s) {
if ( $pos >= 0 && $pos <= count( $words ) ) {
$items[] = $s;
}
}
array_splice( $words, 0, count( $words ) );
return $items;
}
}
/**
* @param string $item
*
* @return bool
*/
protected function isTag($item)
{
return $this->isOpeningTag( $item ) || $this->isClosingTag( $item );
}
/**
* @param string $item
*
* @return bool
*/
protected function isOpeningTag($item)
{
return preg_match( "#<[^>]+>\\s*#iU", $item );
}
/**
* @param string $item
*
* @return bool
*/
protected function isClosingTag($item)
{
return preg_match( "#</[^>]+>\\s*#iU", $item );
}
/**
* @return Operation[]
*/
protected function operations()
{
$positionInOld = 0;
$positionInNew = 0;
$operations = array();
$matches = $this->matchingBlocks();
$matches[] = new Match( count( $this->oldWords ), count( $this->newWords ), 0 );
foreach ($matches as $i => $match) {
$matchStartsAtCurrentPositionInOld = ( $positionInOld == $match->startInOld );
$matchStartsAtCurrentPositionInNew = ( $positionInNew == $match->startInNew );
$action = 'none';
if ($matchStartsAtCurrentPositionInOld == false && $matchStartsAtCurrentPositionInNew == false) {
$action = 'replace';
} elseif ($matchStartsAtCurrentPositionInOld == true && $matchStartsAtCurrentPositionInNew == false) {
$action = 'insert';
} elseif ($matchStartsAtCurrentPositionInOld == false && $matchStartsAtCurrentPositionInNew == true) {
$action = 'delete';
} else { // This occurs if the first few words are the same in both versions
$action = 'none';
}
if ($action != 'none') {
$operations[] = new Operation( $action, $positionInOld, $match->startInOld, $positionInNew, $match->startInNew );
}
if ( count( $match ) != 0 ) {
$operations[] = new Operation( 'equal', $match->startInOld, $match->endInOld(), $match->startInNew, $match->endInNew() );
}
$positionInOld = $match->endInOld();
$positionInNew = $match->endInNew();
}
return $operations;
}
/**
* @return Match[]
*/
protected function matchingBlocks()
{
$matchingBlocks = array();
$this->findMatchingBlocks( 0, count( $this->oldWords ), 0, count( $this->newWords ), $matchingBlocks );
return $matchingBlocks;
}
/**
* @param int $startInOld
* @param int $endInOld
* @param int $startInNew
* @param int $endInNew
* @param array $matchingBlocks
*/
protected function findMatchingBlocks($startInOld, $endInOld, $startInNew, $endInNew, &$matchingBlocks)
{
$match = $this->findMatch( $startInOld, $endInOld, $startInNew, $endInNew );
if ($match !== null) {
if ($startInOld < $match->startInOld && $startInNew < $match->startInNew) {
$this->findMatchingBlocks( $startInOld, $match->startInOld, $startInNew, $match->startInNew, $matchingBlocks );
}
$matchingBlocks[] = $match;
if ( $match->endInOld() < $endInOld && $match->endInNew() < $endInNew ) {
$this->findMatchingBlocks( $match->endInOld(), $endInOld, $match->endInNew(), $endInNew, $matchingBlocks );
}
}
}
/**
* @param string $word
*
* @return string
*/
protected function stripTagAttributes($word)
{
$word = explode( ' ', trim( $word, '<>' ) );
return '<' . $word[ 0 ] . '>';
}
/**
* @param int $startInOld
* @param int $endInOld
* @param int $startInNew
* @param int $endInNew
*
* @return Match|null
*/
protected function findMatch($startInOld, $endInOld, $startInNew, $endInNew)
{
$bestMatchInOld = $startInOld;
$bestMatchInNew = $startInNew;
$bestMatchSize = 0;
$matchLengthAt = array();
for ($indexInOld = $startInOld; $indexInOld < $endInOld; $indexInOld++) {
$newMatchLengthAt = array();
$index = $this->oldWords[ $indexInOld ];
if ( $this->isTag( $index ) ) {
$index = $this->stripTagAttributes( $index );
}
if ( !isset( $this->wordIndices[ $index ] ) ) {
$matchLengthAt = $newMatchLengthAt;
continue;
}
foreach ($this->wordIndices[ $index ] as $indexInNew) {
if ($indexInNew < $startInNew) {
continue;
}
if ($indexInNew >= $endInNew) {
break;
}
$newMatchLength = ( isset( $matchLengthAt[ $indexInNew - 1 ] ) ? $matchLengthAt[ $indexInNew - 1 ] : 0 ) + 1;
$newMatchLengthAt[ $indexInNew ] = $newMatchLength;
if ($newMatchLength > $bestMatchSize ||
(
$this->isGroupDiffs() &&
$bestMatchSize > 0 &&
preg_match(
'/^\s+$/',
implode('', array_slice($this->oldWords, $bestMatchInOld, $bestMatchSize))
)
)
) {
$bestMatchInOld = $indexInOld - $newMatchLength + 1;
$bestMatchInNew = $indexInNew - $newMatchLength + 1;
$bestMatchSize = $newMatchLength;
}
}
$matchLengthAt = $newMatchLengthAt;
}
// Skip match if none found or match consists only of whitespace
if ($bestMatchSize != 0 &&
(
!$this->isGroupDiffs() ||
!preg_match('/^\s+$/', implode('', array_slice($this->oldWords, $bestMatchInOld, $bestMatchSize)))
)
) {
return new Match($bestMatchInOld, $bestMatchInNew, $bestMatchSize);
}
return null;
}
}