芝麻web文件管理V1.00
编辑当前文件:/home2/sdektunc/.trash/administrator.1/components/com_finder/src/Indexer/Parser/Html.php
* @license GNU General Public License version 2 or later; see LICENSE.txt */ namespace Joomla\Component\Finder\Administrator\Indexer\Parser; \defined('_JEXEC') or die; use Joomla\Component\Finder\Administrator\Indexer\Parser; /** * HTML Parser class for the Finder indexer package. * * @since 2.5 */ class Html extends Parser { /** * Method to parse input and extract the plain text. Because this method is * called from both inside and outside the indexer, it needs to be able to * batch out its parsing functionality to deal with the inefficiencies of * regular expressions. We will parse recursively in 2KB chunks. * * @param string $input The input to parse. * * @return string The plain text input. * * @since 2.5 */ public function parse($input) { // Strip invalid UTF-8 characters. $oldSetting = ini_get('mbstring.substitute_character'); ini_set('mbstring.substitute_character', 'none'); $input = mb_convert_encoding($input, 'UTF-8', 'UTF-8'); ini_set('mbstring.substitute_character', $oldSetting); // Remove anything between and tags. Do this first // because there might be removeBlocks($input, '