Source for file Tokenizer.php
Documentation is available at Tokenizer.php
* Format XML files containing unknown entities (like all of peardoc)
* phpDocumentor :: automatic documentation generator
* Copyright (c) 2004-2006 Gregory Beaver
* This library is free software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation;
* either version 2.1 of the License, or (at your option) any
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* @copyright 2004-2006 Gregory Beaver
* @license http://www.opensource.org/licenses/lgpl-license.php LGPL
* @version CVS: $Id: Tokenizer.php 289596 2009-10-12 21:08:13Z ashnazg $
* @link http://www.phpdoc.org
* @link http://pear.php.net/PhpDocumentor
* From the XML_Beautifier package
require_once 'XML/Beautifier/Tokenizer.php';
* Highlights source code using {@link parse()}
PHPDOC_XMLTOKEN_EVENT_NOEVENTS => 'normalHandler',
PHPDOC_XMLTOKEN_EVENT_XML => 'parseXMLHandler',
PHPDOC_XMLTOKEN_EVENT_PI => 'parsePiHandler',
PHPDOC_XMLTOKEN_EVENT_ATTRIBUTE => 'attrHandler',
PHPDOC_XMLTOKEN_EVENT_OPENTAG => 'tagHandler',
PHPDOC_XMLTOKEN_EVENT_IN_CDATA => 'realcdataHandler',
PHPDOC_XMLTOKEN_EVENT_DEF => 'defHandler',
PHPDOC_XMLTOKEN_EVENT_CLOSETAG => 'closetagHandler',
PHPDOC_XMLTOKEN_EVENT_ENTITY => 'entityHandler',
PHPDOC_XMLTOKEN_EVENT_COMMENT => 'commentHandler',
PHPDOC_XMLTOKEN_EVENT_SINGLEQUOTE => 'stringHandler',
PHPDOC_XMLTOKEN_EVENT_DOUBLEQUOTE => 'stringHandler',
PHPDOC_XMLTOKEN_EVENT_CDATA => 'parseCdataHandler',
* The parse() method is a do...while() loop that retrieves tokens one by
* one from the {@link $_event_stack}, and uses the token event array set up
* by the class constructor to call event handlers.
* The event handlers each process the tokens passed to them, and use the
* {@link _addoutput()} method to append the processed tokens to the
* {@link $_line} variable. The word parser calls {@link newLineNum()}
* every time a line is reached.
* In addition, the event handlers use special linking functions
* {@link _link()} and its cousins (_classlink(), etc.) to create in-code
* hyperlinks to the documentation for source code elements that are in the
* @uses setupStates() initialize parser state variables
* @uses configWordParser() pass $parse_data to prepare retrieval of tokens
* @param false|stringfull path to file with @filesource tag, if this
* @param false|integerstarting line number from {@}source linenum}
* @staticvar integer used for recursion limiting if a handler for
$parse_data = str_replace(array("\r\n", "\t"), array("\n", ' '), $parse_data);
// initialize variables so E_ALL error_reporting doesn't complain
$pevent = $this->_event_stack->getEvent();
$this->_last_pevent = $lpevent;
$this->_wp->setWhitespace(true);
$dbg_linenum = $this->_wp->linenum;
$dbg_pos = $this->_wp->getPos();
$this->_pv_last_word = $word;
$this->_pv_curline = $this->_wp->linenum;
$word = $this->_wp->getWord();
if (PHPDOCUMENTOR_DEBUG == true)
echo "|" . $this->_pv_last_word;
// echo "LINE: ".$this->_line."\n";
// echo "OUTPUT: ".$this->_output."\n";
echo $dbg_linenum. '-'. $dbg_pos . ": ";
echo "-------------------\n\n\n";
$this->$handle($word, $pevent);
echo ('WARNING: possible error, no handler for event number '. $pevent);
return $this->raiseError("FATAL ERROR, recursion limit reached");
} while (!($word === false));
* All Event Handlers use {@link checkEventPush()} and
* {@link checkEventPop()} to set up the event stack and parser state.
* @param string|array token value
* @param integer parser event from {@link Parser.inc}
* Most tokens only need highlighting, and this method handles them
function normalHandler($word, $pevent)
$this->_wp->backupPos($word);
$this->_addoutput($pevent);
$this->_curthing .= $word;
$this->_addoutput($pevent);
* handle <!-- comments -->
function commentHandler($word, $pevent)
$this->_wp->backupPos($word);
$this->_curthing .= $word;
$this->_addoutput($pevent);
* handle <?Processor instructions?>
function parsePiHandler($word, $pevent)
$this->_wp->backupPos($word);
$this->_addoutput($pevent);
if (!strlen($this->_curthing)) {
if (!isset ($this->_attrs) || !is_string($this->_attrs)) {
* handle <?xml Processor instructions?>
function parseXMLHandler($word, $pevent)
$this->_wp->backupPos($word);
$this->_curthing .= $word;
$this->_addoutput($pevent);
* handle <![CDATA[ unescaped text ]]>
function realcdataHandler($word, $pevent)
$this->_curthing .= $word;
$this->_addoutput($pevent);
function tagHandler($word, $pevent)
$this->_wp->backupPos($word);
$this->_tag = substr($word, 1);
$this->_addoutput($pevent);
function closetagHandler($word, $pevent)
$this->_wp->backupPos($word);
$this->_addoutput($pevent);
function defHandler($word, $pevent)
$this->_wp->backupPos($word);
$this->_curthing .= $word;
$this->_addoutput($pevent);
* Most tokens only need highlighting, and this method handles them
function attrHandler($word, $pevent)
if (!isset ($this->_attrs) || !is_array($this->_attrs)) {
$this->_wp->backupPos($word);
* handle attribute values
function stringHandler($word, $pevent)
$this->_attrs[$this->_attr] = $word;
function entityHandler($word, $pevent)
$this->_addoutput($pevent);
if (strlen($word) && $word{0} == '&') {
$this->_curthing .= $word;
function parseCdataHandler($word, $pevent)
$this->_wp->backupPos($word);
if (strlen($this->_curthing)) {
$this->_addoutput($pevent);
if (strlen($this->_curthing)) {
$this->_addoutput($pevent);
$this->_curthing .= $word;
* Handler for real character data
* @param object XML parser object
if ((string) $cdata === '') {
$this->_appendToParent($struct);
* This method adds output to {@link $_line}
* If a string with variables like "$test this" is present, then special
* handling is used to allow processing of the variable in context.
function _addoutput($event)
// echo "comment: $this->_curthing\n";
$this->$method($this->_curthing);
// echo "open tag: $this->_tag\n";
// var_dump($this->_attrs);
$this->$method(false, $this->_tag, $this->_attrs);
// echo "close tag: $this->_tag\n";
$this->$method(false, $this->_curthing);
if (!strlen($this->_curthing)) {
// echo "default: $this->_curthing\n";
$this->$method(false, $this->_curthing);
// echo "<!definition: $this->_curthing\n";
$this->$method(false, $this->_curthing);
// echo "<?pi: $this->_curthing\n";
// echo "<?pi attrs: $this->_attrs\n";
$this->$method(false, $this->_curthing, $this->_attrs);
// echo "<?xml: $this->_curthing\n";
$this->$method(false, $this->_curthing, $this->_attrs);
// echo "cdata: $this->_curthing\n";
$this->$method(false, $this->_curthing);
// echo "entity: $this->_curthing\n";
$this->$method(false, $this->_curthing, false, false, false);
* tell the parser's WordParser {@link $wp} to set up tokens to parse words by.
* tokens are word separators. In English, a space or punctuation are examples of tokens.
* In PHP, a token can be a ;, a parenthesis, or even the word "function"
* @param $value integer an event number
$this->_wp->setSeperator($this->tokens[($e + 100)]);
* this function checks whether parameter $word is a token for pushing a new event onto the Event Stack.
* @return mixed returns false, or the event number
if (isset ($this->pushEvent[$pevent]))
if (isset ($this->pushEvent[$pevent][strtolower($word)]))
$e = $this->pushEvent[$pevent][strtolower($word)];
$this->_event_stack->pushEvent($e);
* this function checks whether parameter $word is a token for popping the current event off of the Event Stack.
* @return mixed returns false, or the event number popped off of the stack
if (!isset ($this->popEvent[$pevent])) return false;
return $this->_event_stack->popEvent();
* Initialize all parser state variables
* @param boolean true if we are highlighting an inline {@}source} tag's
* @param false|stringname of class we are going to start from
* @uses $_wp sets to a new {@link phpDocumentor_HighlightWordParser}
$this->_wp->setup($parsedata);
$this->_event_stack->popEvent();
$this->_pv_linenum = null;
$this->_pv_next_word = false;
* Initialize the {@link $tokenpushEvent, $wordpushEvent} arrays
/**************************************************************/
// '&' => PHPDOC_XMLTOKEN_EVENT_ENTITY,
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
// '&' => PHPDOC_XMLTOKEN_EVENT_ENTITY,
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
if (isset ($lookup[$value]))
define("PHPDOC_XMLTOKEN_EVENT_NOEVENTS" , 1);
/** currently in starting state */
define("STATE_XMLTOKEN_NOEVENTS" , 101);
/** used when a processor instruction is found */
define("PHPDOC_XMLTOKEN_EVENT_PI" , 2);
/** currently in processor instruction */
define("STATE_XMLTOKEN_PI" , 102);
/** used when an open <tag> is found */
define("PHPDOC_XMLTOKEN_EVENT_OPENTAG" , 3);
/** currently parsing an open <tag> */
define("STATE_XMLTOKEN_OPENTAG" , 103);
/** used when a <tag attr="attribute"> is found */
define("PHPDOC_XMLTOKEN_EVENT_ATTRIBUTE" , 4);
/** currently parsing an open <tag> */
define("STATE_XMLTOKEN_ATTRIBUTE" , 104);
/** used when a close </tag> is found */
define("PHPDOC_XMLTOKEN_EVENT_CLOSETAG" , 5);
/** currently parsing a close </tag> */
define("STATE_XMLTOKEN_CLOSETAG" , 105);
/** used when an &entity; is found */
define("PHPDOC_XMLTOKEN_EVENT_ENTITY" , 6);
/** currently parsing an &entity; */
define("STATE_XMLTOKEN_ENTITY" , 106);
/** used when a <!-- comment --> is found */
define("PHPDOC_XMLTOKEN_EVENT_COMMENT" , 7);
/** currently parsing a <!-- comment --> */
define("STATE_XMLTOKEN_COMMENT" , 107);
/** used when a <!-- comment --> is found */
define("PHPDOC_XMLTOKEN_EVENT_SINGLEQUOTE" , 8);
/** currently parsing a <!-- comment --> */
define("STATE_XMLTOKEN_SINGLEQUOTE" , 108);
/** used when a <!-- comment --> is found */
define("PHPDOC_XMLTOKEN_EVENT_DOUBLEQUOTE" , 9);
/** currently parsing a <!-- comment --> */
define("STATE_XMLTOKEN_DOUBLEQUOTE" , 109);
/** used when a <! is found */
define("PHPDOC_XMLTOKEN_EVENT_DEF" , 10);
/** currently parsing a <! */
define("STATE_XMLTOKEN_DEF" , 110);
/** used when a <! is found */
define("PHPDOC_XMLTOKEN_EVENT_CDATA" , 11);
/** currently parsing a <! */
define("STATE_XMLTOKEN_CDATA" , 111);
/** used when a <?xml is found */
define("PHPDOC_XMLTOKEN_EVENT_XML" , 12);
/** currently parsing a <?xml */
define("STATE_XMLTOKEN_XML" , 112);
/** used when a <![CDATA[ section is found */
define('PHPDOC_XMLTOKEN_EVENT_IN_CDATA', 13);
/** currently parsing a <![CDATA[ ]]> */
define('STATE_XMLTOKEN_IN_CDATA', 113);
/** do not remove, needed in plain renderer */
define('PHPDOC_BEAUTIFIER_CDATA', 100000);
|