Plugin to allow visitor contributions to WordPress posts, wiki style.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

945 lines
30KB

  1. <?php
  2. namespace Caxy\HtmlDiff;
  3. class ListDiff extends HtmlDiff
  4. {
  5. /**
  6. * This is the minimum percentage a list item can match its counterpart in order to be considered a match.
  7. * @var integer
  8. */
  9. protected static $listMatchThreshold = 35;
  10. /** @var array */
  11. protected $listWords = array();
  12. /** @var array */
  13. protected $listTags = array();
  14. /** @var array */
  15. protected $listIsolatedDiffTags = array();
  16. /** @var array */
  17. protected $isolatedDiffTags = array (
  18. 'ol' => '[[REPLACE_ORDERED_LIST]]',
  19. 'ul' => '[[REPLACE_UNORDERED_LIST]]',
  20. 'dl' => '[[REPLACE_DEFINITION_LIST]]',
  21. );
  22. /**
  23. * List (li) placeholder.
  24. * @var string
  25. */
  26. protected static $listPlaceHolder = "[[REPLACE_LIST_ITEM]]";
  27. /**
  28. * Holds the type of list this is ol, ul, dl.
  29. * @var string
  30. */
  31. protected $listType;
  32. /**
  33. * Used to hold what type of list the old list is.
  34. * @var string
  35. */
  36. protected $oldListType;
  37. /**
  38. * Used to hold what type of list the new list is.
  39. * @var string
  40. */
  41. protected $newListType;
  42. /**
  43. * Hold the old/new content of the content of the list.
  44. * @var array
  45. */
  46. protected $list;
  47. /**
  48. * Contains the old/new child lists content within this list.
  49. * @var array
  50. */
  51. protected $childLists;
  52. /**
  53. * Contains the old/new text strings that match
  54. * @var array
  55. */
  56. protected $textMatches;
  57. /**
  58. * Contains the indexed start positions of each list within word string.
  59. * @var array
  60. */
  61. protected $listsIndex;
  62. /**
  63. * Array that holds the index of all content outside of the array. Format is array(index => content).
  64. * @var array
  65. */
  66. protected $contentIndex = array();
  67. /**
  68. * Holds the order and data on each list/content block within this list.
  69. * @var array
  70. */
  71. protected $diffOrderIndex = array();
  72. /**
  73. * This is the opening ol,ul,dl ist tag.
  74. * @var string
  75. */
  76. protected $oldParentTag;
  77. /**
  78. * This is the opening ol,ul,dl ist tag.
  79. * @var string
  80. */
  81. protected $newParentTag;
  82. /**
  83. * We're using the same functions as the parent in build() to get us to the point of
  84. * manipulating the data within this class.
  85. *
  86. * @return string
  87. */
  88. public function build()
  89. {
  90. // Use the parent functions to get the data we need organized.
  91. $this->splitInputsToWords();
  92. $this->replaceIsolatedDiffTags();
  93. $this->indexNewWords();
  94. // Now use the custom functions in this class to use the data and generate our diff.
  95. $this->diffListContent();
  96. return $this->content;
  97. }
  98. /**
  99. * Calls to the actual custom functions of this class, to diff list content.
  100. */
  101. protected function diffListContent()
  102. {
  103. /* Format the list we're focusing on.
  104. * There will always be one list, though passed as an array with one item.
  105. * Format this to only have the list contents, outside of the array.
  106. */
  107. $this->formatThisListContent();
  108. /* Build an index of content outside of list tags.
  109. */
  110. $this->indexContent();
  111. /* In cases where we're dealing with nested lists,
  112. * make sure we use placeholders to replace the nested lists
  113. */
  114. $this->replaceListIsolatedDiffTags();
  115. /* Build a list of matches we can reference when we diff the contents of the lists.
  116. * This is needed so that we each NEW list node is matched against the best possible OLD list node/
  117. * It helps us determine whether the list was added, removed, or changed.
  118. */
  119. $this->matchAndCompareLists();
  120. /* Go through the list of matches, content, and diff each.
  121. * Any nested lists would be sent to parent's diffList function, which creates a new listDiff class.
  122. */
  123. $this->diff();
  124. }
  125. /**
  126. * This function is used to populate both contentIndex and diffOrderIndex arrays for use in the diff function.
  127. */
  128. protected function indexContent()
  129. {
  130. $this->contentIndex = array();
  131. $this->diffOrderIndex = array('new' => array(), 'old' => array());
  132. foreach ($this->list as $type => $list) {
  133. $this->contentIndex[$type] = array();
  134. $depth = 0;
  135. $parentList = 0;
  136. $position = 0;
  137. $newBlock = true;
  138. $listCount = 0;
  139. $contentCount = 0;
  140. foreach ($list as $key => $word) {
  141. if (!$parentList && $this->isOpeningListTag($word)) {
  142. $depth++;
  143. $this->diffOrderIndex[$type][] = array('type' => 'list', 'position' => $listCount, 'index' => $key);
  144. $listCount++;
  145. continue;
  146. }
  147. if (!$parentList && $this->isClosingListTag($word)) {
  148. $depth--;
  149. if ($depth == 0) {
  150. $newBlock = true;
  151. }
  152. continue;
  153. }
  154. if ($this->isOpeningIsolatedDiffTag($word)) {
  155. $parentList++;
  156. }
  157. if ($this->isClosingIsolatedDiffTag($word)) {
  158. $parentList--;
  159. }
  160. if ($depth == 0) {
  161. if ($newBlock && !array_key_exists($contentCount, $this->contentIndex[$type])) {
  162. $this->diffOrderIndex[$type][] = array('type' => 'content', 'position' => $contentCount, 'index' => $key);
  163. $position = $contentCount;
  164. $this->contentIndex[$type][$position] = '';
  165. $contentCount++;
  166. }
  167. $this->contentIndex[$type][$position] .= $word;
  168. }
  169. $newBlock = false;
  170. }
  171. }
  172. }
  173. /*
  174. * This function is used to remove the wrapped ul, ol, or dl characters from this list
  175. * and sets the listType as ul, ol, or dl, so that we can use it later.
  176. * $list is being set here as well, as an array with the old and new version of this list content.
  177. */
  178. protected function formatThisListContent()
  179. {
  180. $formatArray = array(
  181. array('type' => 'old', 'array' => $this->oldIsolatedDiffTags),
  182. array('type' => 'new', 'array' => $this->newIsolatedDiffTags)
  183. );
  184. foreach ($formatArray as $item) {
  185. $values = array_values($item['array']);
  186. $this->list[$item['type']] = count($values)
  187. ? $this->formatList($values[0], $item['type'])
  188. : array();
  189. }
  190. $this->listType = $this->newListType ?: $this->oldListType;
  191. }
  192. /**
  193. *
  194. * @param array $arrayData
  195. * @param string $index
  196. * @return array
  197. */
  198. protected function formatList(array $arrayData, $index = 'old')
  199. {
  200. $openingTag = $this->getAndStripTag($arrayData[0]);
  201. $closingTag = $this->getAndStripTag($arrayData[count($arrayData) - 1]);
  202. if (array_key_exists($openingTag, $this->isolatedDiffTags) &&
  203. array_key_exists($closingTag, $this->isolatedDiffTags)
  204. ) {
  205. if ($index == 'new' && $this->isOpeningTag($arrayData[0])) {
  206. $this->newParentTag = $arrayData[0];
  207. $this->newListType = $this->getAndStripTag($arrayData[0]);
  208. }
  209. if ($index == 'old' && $this->isOpeningTag($arrayData[0])) {
  210. $this->oldParentTag = $arrayData[0];
  211. $this->oldListType = $this->getAndStripTag($arrayData[0]);
  212. }
  213. array_shift($arrayData);
  214. array_pop($arrayData);
  215. }
  216. return $arrayData;
  217. }
  218. /**
  219. * @param string $tag
  220. * @return string
  221. */
  222. protected function getAndStripTag($tag)
  223. {
  224. $content = explode(' ', preg_replace("/[^A-Za-z0-9 ]/", '', $tag));
  225. return $content[0];
  226. }
  227. protected function matchAndCompareLists()
  228. {
  229. /**
  230. * Build the an array (childLists) to hold the contents of the list nodes within this list.
  231. * This only holds the content of each list node.
  232. */
  233. $this->buildChildLists();
  234. /**
  235. * Index the list, starting positions, so that we can refer back to it later.
  236. * This is used to see where one list node starts and another ends.
  237. */
  238. $this->indexLists();
  239. /**
  240. * Compare the lists and build $textMatches array with the matches.
  241. * Each match is an array of "new" and "old" keys, with the id of the list it matches to.
  242. * Whenever there is no match (in cases where a new list item was added or removed), null is used instead of the id.
  243. */
  244. $this->compareChildLists();
  245. }
  246. /**
  247. * Creates matches for lists.
  248. */
  249. protected function compareChildLists()
  250. {
  251. $this->createNewOldMatches($this->childLists, $this->textMatches, 'content');
  252. }
  253. /**
  254. * Abstracted function used to match items in an array.
  255. * This is used primarily for populating lists matches.
  256. *
  257. * @param array $listArray
  258. * @param array $resultArray
  259. * @param string|null $column
  260. */
  261. protected function createNewOldMatches(&$listArray, &$resultArray, $column = null)
  262. {
  263. // Always compare the new against the old.
  264. // Compare each new string against each old string.
  265. $bestMatchPercentages = array();
  266. foreach ($listArray['new'] as $thisKey => $thisList) {
  267. $bestMatchPercentages[$thisKey] = array();
  268. foreach ($listArray['old'] as $thatKey => $thatList) {
  269. // Save the percent amount each new list content compares against the old list content.
  270. similar_text(
  271. $column ? $thisList[$column] : $thisList,
  272. $column ? $thatList[$column] : $thatList,
  273. $percentage
  274. );
  275. $bestMatchPercentages[$thisKey][] = $percentage;
  276. }
  277. }
  278. // Sort each array by value, highest percent to lowest percent.
  279. foreach ($bestMatchPercentages as &$thisMatch) {
  280. arsort($thisMatch);
  281. }
  282. // Build matches.
  283. $matches = array();
  284. $taken = array();
  285. $takenItems = array();
  286. $absoluteMatch = 100;
  287. foreach ($bestMatchPercentages as $item => $percentages) {
  288. $highestMatch = -1;
  289. $highestMatchKey = -1;
  290. $takeItemKey = -1;
  291. foreach ($percentages as $key => $percent) {
  292. // Check that the key for the percentage is not already taken and the new percentage is higher.
  293. if (!in_array($key, $taken) && $percent > $highestMatch) {
  294. // If an absolute match, choose this one.
  295. if ($percent == $absoluteMatch) {
  296. $highestMatch = $percent;
  297. $highestMatchKey = $key;
  298. $takenItemKey = $item;
  299. break;
  300. } else {
  301. // Get all the other matces for the same $key
  302. $columns = $this->getArrayColumn($bestMatchPercentages, $key);
  303. $thisBestMatches = array_filter(
  304. $columns,
  305. function ($v) use ($percent) {
  306. return $v > $percent;
  307. }
  308. );
  309. arsort($thisBestMatches);
  310. /**
  311. * If the list item does not meet the threshold, it will not be considered a match.
  312. */
  313. if ($percent >= self::$listMatchThreshold) {
  314. // If no greater amounts, use this one.
  315. if (!count($thisBestMatches)) {
  316. $highestMatch = $percent;
  317. $highestMatchKey = $key;
  318. $takenItemKey = $item;
  319. break;
  320. }
  321. // Loop through, comparing only the items that have not already been added.
  322. foreach ($thisBestMatches as $k => $v) {
  323. if (in_array($k, $takenItems)) {
  324. $highestMatch = $percent;
  325. $highestMatchKey = $key;
  326. $takenItemKey = $item;
  327. break(2);
  328. }
  329. }
  330. }
  331. }
  332. }
  333. }
  334. $matches[] = array('new' => $item, 'old' => $highestMatchKey > -1 ? $highestMatchKey : null);
  335. if ($highestMatchKey > -1) {
  336. $taken[] = $highestMatchKey;
  337. $takenItems[] = $takenItemKey;
  338. }
  339. }
  340. /* Checking for removed items. Basically, if a list item from the old lists is removed
  341. * it will not be accounted for, and will disappear in the results altogether.
  342. * Loop through all the old lists, any that has not been added, will be added as:
  343. * array( new => null, old => oldItemId )
  344. */
  345. $matchColumns = $this->getArrayColumn($matches, 'old');
  346. foreach ($listArray['old'] as $thisKey => $thisList) {
  347. if (!in_array($thisKey, $matchColumns)) {
  348. $matches[] = array('new' => null, 'old' => $thisKey);
  349. }
  350. }
  351. // Save the matches.
  352. $resultArray = $matches;
  353. }
  354. /**
  355. * This fuction is exactly like array_column. This is added for PHP versions that do not support array_column.
  356. * @param array $targetArray
  357. * @param mixed $key
  358. * @return array
  359. */
  360. protected function getArrayColumn(array $targetArray, $key)
  361. {
  362. $data = array();
  363. foreach ($targetArray as $item) {
  364. if (array_key_exists($key, $item)) {
  365. $data[] = $item[$key];
  366. }
  367. }
  368. return $data;
  369. }
  370. /**
  371. * Build multidimensional array holding the contents of each list node, old and new.
  372. */
  373. protected function buildChildLists()
  374. {
  375. $this->childLists['old'] = $this->getListsContent($this->list['old']);
  376. $this->childLists['new'] = $this->getListsContent($this->list['new']);
  377. }
  378. /**
  379. * Diff the actual contents of the lists against their matched counterpart.
  380. * Build the content of the class.
  381. */
  382. protected function diff()
  383. {
  384. // Add the opening parent node from listType. So if ol, <ol>, etc.
  385. $this->content = $this->addListTypeWrapper();
  386. $oldIndexCount = 0;
  387. $diffOrderNewKeys = array_keys($this->diffOrderIndex['new']);
  388. foreach ($this->diffOrderIndex['new'] as $key => $index) {
  389. if ($index['type'] == "list") {
  390. // Check to see if an old list was deleted.
  391. $oldMatch = $this->getArrayByColumnValue($this->textMatches, 'old', $index['position']);
  392. if ($oldMatch && $oldMatch['new'] === null) {
  393. $newList = '';
  394. $oldList = $this->getListByMatch($oldMatch, 'old');
  395. $this->content .= $this->addListElementToContent($newList, $oldList, $oldMatch, $index, 'old');
  396. }
  397. $match = $this->getArrayByColumnValue($this->textMatches, 'new', $index['position']);
  398. $newList = $this->childLists['new'][$match['new']];
  399. $oldList = $this->getListByMatch($match, 'old');
  400. $this->content .= $this->addListElementToContent($newList, $oldList, $match, $index, 'new');
  401. }
  402. if ($index['type'] == 'content') {
  403. $this->content .= $this->addContentElementsToContent($oldIndexCount, $index['position']);
  404. }
  405. $oldIndexCount++;
  406. if ($key == $diffOrderNewKeys[count($diffOrderNewKeys) - 1]) {
  407. foreach ($this->diffOrderIndex['old'] as $oldKey => $oldIndex) {
  408. if ($oldKey > $key) {
  409. if ($oldIndex['type'] == 'list') {
  410. $oldMatch = $this->getArrayByColumnValue($this->textMatches, 'old', $oldIndex['position']);
  411. if ($oldMatch && $oldMatch['new'] === null) {
  412. $newList = '';
  413. $oldList = $this->getListByMatch($oldMatch, 'old');
  414. $this->content .= $this->addListElementToContent($newList, $oldList, $oldMatch, $oldIndex, 'old');
  415. }
  416. } else {
  417. $this->content .= $this->addContentElementsToContent($oldKey);
  418. }
  419. }
  420. }
  421. }
  422. }
  423. // Add the closing parent node from listType. So if ol, </ol>, etc.
  424. $this->content .= $this->addListTypeWrapper(false);
  425. }
  426. /**
  427. *
  428. * @param string $newList
  429. * @param string $oldList
  430. * @param array $match
  431. * @param array $index
  432. * @return string
  433. */
  434. protected function addListElementToContent($newList, $oldList, array $match, array $index, $type)
  435. {
  436. $content = $this->list[$type][$index['index']];
  437. $content .= $this->processPlaceholders(
  438. $this->diffElements(
  439. $this->convertListContentArrayToString($oldList),
  440. $this->convertListContentArrayToString($newList),
  441. false
  442. ),
  443. $match
  444. );
  445. $content .= "</li>";
  446. return $content;
  447. }
  448. /**
  449. *
  450. * @param integer $oldIndexCount
  451. * @param null|integer $newPosition
  452. * @return string
  453. */
  454. protected function addContentElementsToContent($oldIndexCount, $newPosition = null)
  455. {
  456. $newContent = $newPosition && array_key_exists($newPosition, $this->contentIndex['new'])
  457. ? $this->contentIndex['new'][$newPosition]
  458. : '';
  459. $oldDiffOrderIndexMatch = array_key_exists($oldIndexCount, $this->diffOrderIndex['old'])
  460. ? $this->diffOrderIndex['old'][$oldIndexCount]
  461. : '';
  462. $oldContent = $oldDiffOrderIndexMatch && array_key_exists($oldDiffOrderIndexMatch['position'], $this->contentIndex['old'])
  463. ? $this->contentIndex['old'][$oldDiffOrderIndexMatch['position']]
  464. : '';
  465. $diffObject = new HtmlDiff($oldContent, $newContent);
  466. $content = $diffObject->build();
  467. return $content;
  468. }
  469. /**
  470. *
  471. * @param array $match
  472. * @param string $type
  473. * @return array|string
  474. */
  475. protected function getListByMatch(array $match, $type = 'new')
  476. {
  477. return array_key_exists($match[$type], $this->childLists[$type])
  478. ? $this->childLists[$type][$match[$type]]
  479. : '';
  480. }
  481. /**
  482. * This function replaces array_column function in PHP for older versions of php.
  483. *
  484. * @param array $parentArray
  485. * @param string $column
  486. * @param mixed $value
  487. * @param boolean $allMatches
  488. * @return array|boolean
  489. */
  490. protected function getArrayByColumnValue($parentArray, $column, $value, $allMatches = false)
  491. {
  492. $returnArray = array();
  493. foreach ($parentArray as $array) {
  494. if (array_key_exists($column, $array) && $array[$column] == $value) {
  495. if ($allMatches) {
  496. $returnArray[] = $array;
  497. } else {
  498. return $array;
  499. }
  500. }
  501. }
  502. return $allMatches ? $returnArray : false;
  503. }
  504. /**
  505. * Converts the list (li) content arrays to string.
  506. *
  507. * @param array $listContentArray
  508. * @return string
  509. */
  510. protected function convertListContentArrayToString($listContentArray)
  511. {
  512. if (!is_array($listContentArray)) {
  513. return $listContentArray;
  514. }
  515. $content = array();
  516. $words = explode(" ", $listContentArray['content']);
  517. $nestedListCount = 0;
  518. foreach ($words as $word) {
  519. $match = $word == self::$listPlaceHolder;
  520. $content[] = $match
  521. ? "<li>" . $this->convertListContentArrayToString($listContentArray['kids'][$nestedListCount]) . "</li>"
  522. : $word;
  523. if ($match) {
  524. $nestedListCount++;
  525. }
  526. }
  527. return implode(" ", $content);
  528. }
  529. /**
  530. * Return the contents of each list node.
  531. * Process any placeholders for nested lists.
  532. *
  533. * @param string $text
  534. * @param array $matches
  535. * @return string
  536. */
  537. protected function processPlaceholders($text, array $matches)
  538. {
  539. // Prepare return
  540. $returnText = array();
  541. // Save the contents of all list nodes, new and old.
  542. $contentVault = array(
  543. 'old' => $this->getListContent('old', $matches),
  544. 'new' => $this->getListContent('new', $matches)
  545. );
  546. $count = 0;
  547. // Loop through the text checking for placeholders. If a nested list is found, create a new ListDiff object for it.
  548. foreach (explode(' ', $text) as $word) {
  549. $preContent = $this->checkWordForDiffTag($this->stripNewLine($word));
  550. if (in_array(
  551. is_array($preContent) ? $preContent[1] : $preContent,
  552. $this->isolatedDiffTags
  553. )
  554. ) {
  555. $oldText = array_key_exists($count, $contentVault['old']) ? implode('', $contentVault['old'][$count]) : '';
  556. $newText = array_key_exists($count, $contentVault['new']) ? implode('', $contentVault['new'][$count]) : '';
  557. $content = $this->diffList($oldText, $newText);
  558. $count++;
  559. } else {
  560. $content = $preContent;
  561. }
  562. $returnText[] = is_array($preContent) ? $preContent[0] . $content . $preContent[2] : $content;
  563. }
  564. // Return the result.
  565. return implode(' ', $returnText);
  566. }
  567. /**
  568. * Checks to see if a diff tag is in string.
  569. *
  570. * @param string $word
  571. * @return string
  572. */
  573. protected function checkWordForDiffTag($word)
  574. {
  575. foreach ($this->isolatedDiffTags as $diffTag) {
  576. if (strpos($word, $diffTag) > -1) {
  577. $position = strpos($word, $diffTag);
  578. $length = strlen($diffTag);
  579. $result = array(
  580. substr($word, 0, $position),
  581. $diffTag,
  582. substr($word, ($position + $length))
  583. );
  584. return $result;
  585. }
  586. }
  587. return $word;
  588. }
  589. /**
  590. * Used to remove new lines.
  591. *
  592. * @param string $text
  593. * @return string
  594. */
  595. protected function stripNewLine($text)
  596. {
  597. return trim(preg_replace('/\s\s+/', ' ', $text));
  598. }
  599. /**
  600. * Grab the list content using the listsIndex array.
  601. *
  602. * @param string $indexKey
  603. * @param array $matches
  604. * @return array
  605. */
  606. protected function getListContent($indexKey = 'new', array $matches)
  607. {
  608. $bucket = array();
  609. if (isset($matches[$indexKey]) && $matches[$indexKey] !== null) {
  610. $start = $this->listsIndex[$indexKey][$matches[$indexKey]];
  611. $stop = $this->findEndForIndex($this->list[$indexKey], $start);
  612. for ($x = $start; $x <= $stop; $x++) {
  613. if (in_array($this->list[$indexKey][$x], $this->isolatedDiffTags)) {
  614. $bucket[] = $this->listIsolatedDiffTags[$indexKey][$x];
  615. }
  616. }
  617. }
  618. return $bucket;
  619. }
  620. /**
  621. * Finds the end of list within its index.
  622. *
  623. * @param array $index
  624. * @param integer $start
  625. * @return integer
  626. */
  627. protected function findEndForIndex(array $index, $start)
  628. {
  629. $array = array_splice($index, $start);
  630. $count = 0;
  631. foreach ($array as $key => $item) {
  632. if ($this->isOpeningListTag($item)) {
  633. $count++;
  634. }
  635. if ($this->isClosingListTag($item)) {
  636. $count--;
  637. if ($count === 0) {
  638. return $start + $key;
  639. }
  640. }
  641. }
  642. return $start + count($array);
  643. }
  644. /**
  645. * indexLists
  646. *
  647. * Index the list, starting positions, so that we can refer back to it later.
  648. * This is used to see where one list node starts and another ends.
  649. */
  650. protected function indexLists()
  651. {
  652. $this->listsIndex = array();
  653. $count = 0;
  654. foreach ($this->list as $type => $list) {
  655. $this->listsIndex[$type] = array();
  656. foreach ($list as $key => $listItem) {
  657. if ($this->isOpeningListTag($listItem)) {
  658. $count++;
  659. if ($count === 1) {
  660. $this->listsIndex[$type][] = $key;
  661. }
  662. }
  663. if ($this->isClosingListTag($listItem)) {
  664. $count--;
  665. }
  666. }
  667. }
  668. }
  669. /**
  670. * Adds the opening or closing list html element, based on listType.
  671. *
  672. * @param boolean $opening
  673. * @return string
  674. */
  675. protected function addListTypeWrapper($opening = true)
  676. {
  677. if ($opening) {
  678. return $this->newParentTag ?: $this->oldParentTag;
  679. } else {
  680. return "<" . (!$opening ? "/" : '') . $this->listType . ">";
  681. }
  682. }
  683. /**
  684. * Replace nested list with placeholders.
  685. */
  686. public function replaceListIsolatedDiffTags()
  687. {
  688. $this->listIsolatedDiffTags['old'] = $this->createIsolatedDiffTagPlaceholders($this->list['old']);
  689. $this->listIsolatedDiffTags['new'] = $this->createIsolatedDiffTagPlaceholders($this->list['new']);
  690. }
  691. /**
  692. * Grab the contents of a list node.
  693. *
  694. * @param array $contentArray
  695. * @param boolean $stripTags
  696. * @return array
  697. */
  698. protected function getListsContent(array $contentArray, $stripTags = true)
  699. {
  700. $lematches = array();
  701. $arrayDepth = 0;
  702. $nestedCount = array();
  703. foreach ($contentArray as $index => $word) {
  704. if ($this->isOpeningListTag($word)) {
  705. $arrayDepth++;
  706. if (!array_key_exists($arrayDepth, $nestedCount)) {
  707. $nestedCount[$arrayDepth] = 1;
  708. } else {
  709. $nestedCount[$arrayDepth]++;
  710. }
  711. continue;
  712. }
  713. if ($this->isClosingListTag($word)) {
  714. $arrayDepth--;
  715. continue;
  716. }
  717. if ($arrayDepth > 0) {
  718. $this->addStringToArrayByDepth($word, $lematches, $arrayDepth, 1, $nestedCount);
  719. }
  720. }
  721. return $lematches;
  722. }
  723. /**
  724. * This function helps build the list content array of a list.
  725. * If a list has another list within it, the inner list is replaced with the list placeholder and the inner list
  726. * content becomes a child of the parent list.
  727. * This goes recursively down.
  728. *
  729. * @param string $word
  730. * @param array $array
  731. * @param integer $targetDepth
  732. * @param integer $thisDepth
  733. * @param array $nestedCount
  734. */
  735. protected function addStringToArrayByDepth($word, array &$array, $targetDepth, $thisDepth, array $nestedCount)
  736. {
  737. // determine what depth we're at
  738. if ($targetDepth == $thisDepth) {
  739. // decide on what to do at this level
  740. if (array_key_exists('content', $array)) {
  741. $array['content'] .= $word;
  742. } else {
  743. // if we're on depth 1, add content
  744. if ($nestedCount[$targetDepth] > count($array)) {
  745. $array[] = array('content' => '', 'kids' => array());
  746. }
  747. $array[count($array) - 1]['content'] .= $word;
  748. }
  749. } else {
  750. // create first kid if not exist
  751. $newArray = array('content' => '', 'kids' => array());
  752. if (array_key_exists('kids', $array)) {
  753. if ($nestedCount[$targetDepth] > count($array['kids'])) {
  754. $array['kids'][] = $newArray;
  755. $array['content'] .= self::$listPlaceHolder;
  756. }
  757. // continue to the next depth
  758. $thisDepth++;
  759. // get last kid and send to next depth
  760. $this->addStringToArrayByDepth(
  761. $word,
  762. $array['kids'][count($array['kids']) - 1],
  763. $targetDepth,
  764. $thisDepth,
  765. $nestedCount
  766. );
  767. } else {
  768. if ($nestedCount[$targetDepth] > count($array[count($array) - 1]['kids'])) {
  769. $array[count($array) - 1]['kids'][] = $newArray;
  770. $array[count($array) - 1]['content'] .= self::$listPlaceHolder;
  771. }
  772. // continue to the next depth
  773. $thisDepth++;
  774. // get last kid and send to next depth
  775. $this->addStringToArrayByDepth(
  776. $word,
  777. $array[count($array) - 1]['kids'][count($array[count($array) - 1]['kids']) - 1],
  778. $targetDepth,
  779. $thisDepth,
  780. $nestedCount
  781. );
  782. }
  783. }
  784. }
  785. /**
  786. * Checks if text is opening list tag.
  787. *
  788. * @param string $item
  789. * @return boolean
  790. */
  791. protected function isOpeningListTag($item)
  792. {
  793. if (preg_match("#<li[^>]*>\\s*#iU", $item)) {
  794. return true;
  795. }
  796. return false;
  797. }
  798. /**
  799. * Check if text is closing list tag.
  800. *
  801. * @param string $item
  802. * @return boolean
  803. */
  804. protected function isClosingListTag($item)
  805. {
  806. if (preg_match("#</li[^>]*>\\s*#iU", $item)) {
  807. return true;
  808. }
  809. return false;
  810. }
  811. }