Source for file Tokenizer.php
Documentation is available at Tokenizer.php
* Format XML files containing unknown entities (like all of peardoc)
* phpDocumentor :: automatic documentation generator
* Copyright (c) 2004-2006 Gregory Beaver
* This library is free software; you can redistribute it
* and/or modify it under the terms of the GNU Lesser General
* Public License as published by the Free Software Foundation;
* either version 2.1 of the License, or (at your option) any
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
* @copyright 2004-2006 Gregory Beaver
* @license http://www.opensource.org/licenses/lgpl-license.php LGPL
* @version CVS: $Id: Tokenizer.php 289596 2009-10-12 21:08:13Z ashnazg $
* @link http://www.phpdoc.org
* @link http://pear.php.net/PhpDocumentor
* From the XML_Beautifier package
require_once 'XML/Beautifier/Tokenizer.php';
* Highlights source code using {@link parse()}
PHPDOC_XMLTOKEN_EVENT_NOEVENTS =>
'normalHandler',
PHPDOC_XMLTOKEN_EVENT_XML =>
'parseXMLHandler',
PHPDOC_XMLTOKEN_EVENT_PI =>
'parsePiHandler',
PHPDOC_XMLTOKEN_EVENT_ATTRIBUTE =>
'attrHandler',
PHPDOC_XMLTOKEN_EVENT_OPENTAG =>
'tagHandler',
PHPDOC_XMLTOKEN_EVENT_IN_CDATA =>
'realcdataHandler',
PHPDOC_XMLTOKEN_EVENT_DEF =>
'defHandler',
PHPDOC_XMLTOKEN_EVENT_CLOSETAG =>
'closetagHandler',
PHPDOC_XMLTOKEN_EVENT_ENTITY =>
'entityHandler',
PHPDOC_XMLTOKEN_EVENT_COMMENT =>
'commentHandler',
PHPDOC_XMLTOKEN_EVENT_SINGLEQUOTE =>
'stringHandler',
PHPDOC_XMLTOKEN_EVENT_DOUBLEQUOTE =>
'stringHandler',
PHPDOC_XMLTOKEN_EVENT_CDATA =>
'parseCdataHandler',
* The parse() method is a do...while() loop that retrieves tokens one by
* one from the {@link $_event_stack}, and uses the token event array set up
* by the class constructor to call event handlers.
* The event handlers each process the tokens passed to them, and use the
* {@link _addoutput()} method to append the processed tokens to the
* {@link $_line} variable. The word parser calls {@link newLineNum()}
* every time a line is reached.
* In addition, the event handlers use special linking functions
* {@link _link()} and its cousins (_classlink(), etc.) to create in-code
* hyperlinks to the documentation for source code elements that are in the
* @uses setupStates() initialize parser state variables
* @uses configWordParser() pass $parse_data to prepare retrieval of tokens
* @param false|stringfull path to file with @filesource tag, if this
* @param false|integerstarting line number from {@}source linenum}
* @staticvar integer used for recursion limiting if a handler for
$parse_data =
str_replace(array("\r\n", "\t"), array("\n", ' '), $parse_data);
// initialize variables so E_ALL error_reporting doesn't complain
$pevent =
$this->_event_stack->getEvent();
$this->_last_pevent =
$lpevent;
$this->_wp->setWhitespace(true);
$dbg_linenum =
$this->_wp->linenum;
$dbg_pos =
$this->_wp->getPos();
$this->_pv_last_word =
$word;
$this->_pv_curline =
$this->_wp->linenum;
$word =
$this->_wp->getWord();
if (PHPDOCUMENTOR_DEBUG ==
true)
echo
"|" .
$this->_pv_last_word;
// echo "LINE: ".$this->_line."\n";
// echo "OUTPUT: ".$this->_output."\n";
echo
$dbg_linenum.
'-'.
$dbg_pos .
": ";
echo
"-------------------\n\n\n";
$this->$handle($word, $pevent);
echo
('WARNING: possible error, no handler for event number '.
$pevent);
return $this->raiseError("FATAL ERROR, recursion limit reached");
} while (!($word ===
false));
* All Event Handlers use {@link checkEventPush()} and
* {@link checkEventPop()} to set up the event stack and parser state.
* @param string|array token value
* @param integer parser event from {@link Parser.inc}
* Most tokens only need highlighting, and this method handles them
function normalHandler($word, $pevent)
$this->_wp->backupPos($word);
$this->_addoutput($pevent);
$this->_curthing .=
$word;
$this->_addoutput($pevent);
* handle <!-- comments -->
function commentHandler($word, $pevent)
$this->_wp->backupPos($word);
$this->_curthing .=
$word;
$this->_addoutput($pevent);
* handle <?Processor instructions?>
function parsePiHandler($word, $pevent)
$this->_wp->backupPos($word);
$this->_addoutput($pevent);
if (!strlen($this->_curthing)) {
if (!isset
($this->_attrs) ||
!is_string($this->_attrs)) {
* handle <?xml Processor instructions?>
function parseXMLHandler($word, $pevent)
$this->_wp->backupPos($word);
$this->_curthing .=
$word;
$this->_addoutput($pevent);
* handle <![CDATA[ unescaped text ]]>
function realcdataHandler($word, $pevent)
$this->_curthing .=
$word;
$this->_addoutput($pevent);
function tagHandler($word, $pevent)
$this->_wp->backupPos($word);
$this->_tag =
substr($word, 1);
$this->_addoutput($pevent);
function closetagHandler($word, $pevent)
$this->_wp->backupPos($word);
$this->_addoutput($pevent);
function defHandler($word, $pevent)
$this->_wp->backupPos($word);
$this->_curthing .=
$word;
$this->_addoutput($pevent);
* Most tokens only need highlighting, and this method handles them
function attrHandler($word, $pevent)
if (!isset
($this->_attrs) ||
!is_array($this->_attrs)) {
$this->_wp->backupPos($word);
* handle attribute values
function stringHandler($word, $pevent)
$this->_attrs[$this->_attr] =
$word;
function entityHandler($word, $pevent)
$this->_addoutput($pevent);
if (strlen($word) &&
$word{0} ==
'&') {
$this->_curthing .=
$word;
function parseCdataHandler($word, $pevent)
$this->_wp->backupPos($word);
if (strlen($this->_curthing)) {
$this->_addoutput($pevent);
if (strlen($this->_curthing)) {
$this->_addoutput($pevent);
$this->_curthing .=
$word;
* Handler for real character data
* @param object XML parser object
if ((string)
$cdata ===
'') {
$this->_appendToParent($struct);
* This method adds output to {@link $_line}
* If a string with variables like "$test this" is present, then special
* handling is used to allow processing of the variable in context.
function _addoutput($event)
// echo "comment: $this->_curthing\n";
$this->$method($this->_curthing);
// echo "open tag: $this->_tag\n";
// var_dump($this->_attrs);
$this->$method(false, $this->_tag, $this->_attrs);
// echo "close tag: $this->_tag\n";
$this->$method(false, $this->_curthing);
if (!strlen($this->_curthing)) {
// echo "default: $this->_curthing\n";
$this->$method(false, $this->_curthing);
// echo "<!definition: $this->_curthing\n";
$this->$method(false, $this->_curthing);
// echo "<?pi: $this->_curthing\n";
// echo "<?pi attrs: $this->_attrs\n";
$this->$method(false, $this->_curthing, $this->_attrs);
// echo "<?xml: $this->_curthing\n";
$this->$method(false, $this->_curthing, $this->_attrs);
// echo "cdata: $this->_curthing\n";
$this->$method(false, $this->_curthing);
// echo "entity: $this->_curthing\n";
$this->$method(false, $this->_curthing, false, false, false);
* tell the parser's WordParser {@link $wp} to set up tokens to parse words by.
* tokens are word separators. In English, a space or punctuation are examples of tokens.
* In PHP, a token can be a ;, a parenthesis, or even the word "function"
* @param $value integer an event number
$this->_wp->setSeperator($this->tokens[($e +
100)]);
* this function checks whether parameter $word is a token for pushing a new event onto the Event Stack.
* @return mixed returns false, or the event number
if (isset
($this->pushEvent[$pevent]))
if (isset
($this->pushEvent[$pevent][strtolower($word)]))
$e =
$this->pushEvent[$pevent][strtolower($word)];
$this->_event_stack->pushEvent($e);
* this function checks whether parameter $word is a token for popping the current event off of the Event Stack.
* @return mixed returns false, or the event number popped off of the stack
if (!isset
($this->popEvent[$pevent])) return false;
return $this->_event_stack->popEvent();
* Initialize all parser state variables
* @param boolean true if we are highlighting an inline {@}source} tag's
* @param false|stringname of class we are going to start from
* @uses $_wp sets to a new {@link phpDocumentor_HighlightWordParser}
$this->_wp->setup($parsedata);
$this->_event_stack->popEvent();
$this->_pv_linenum =
null;
$this->_pv_next_word =
false;
* Initialize the {@link $tokenpushEvent, $wordpushEvent} arrays
/**************************************************************/
// '&' => PHPDOC_XMLTOKEN_EVENT_ENTITY,
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
// '&' => PHPDOC_XMLTOKEN_EVENT_ENTITY,
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
/**************************************************************/
if (isset
($lookup[$value]))
define("PHPDOC_XMLTOKEN_EVENT_NOEVENTS" , 1);
/** currently in starting state */
define("STATE_XMLTOKEN_NOEVENTS" , 101);
/** used when a processor instruction is found */
define("PHPDOC_XMLTOKEN_EVENT_PI" , 2);
/** currently in processor instruction */
define("STATE_XMLTOKEN_PI" , 102);
/** used when an open <tag> is found */
define("PHPDOC_XMLTOKEN_EVENT_OPENTAG" , 3);
/** currently parsing an open <tag> */
define("STATE_XMLTOKEN_OPENTAG" , 103);
/** used when a <tag attr="attribute"> is found */
define("PHPDOC_XMLTOKEN_EVENT_ATTRIBUTE" , 4);
/** currently parsing an open <tag> */
define("STATE_XMLTOKEN_ATTRIBUTE" , 104);
/** used when a close </tag> is found */
define("PHPDOC_XMLTOKEN_EVENT_CLOSETAG" , 5);
/** currently parsing a close </tag> */
define("STATE_XMLTOKEN_CLOSETAG" , 105);
/** used when an &entity; is found */
define("PHPDOC_XMLTOKEN_EVENT_ENTITY" , 6);
/** currently parsing an &entity; */
define("STATE_XMLTOKEN_ENTITY" , 106);
/** used when a <!-- comment --> is found */
define("PHPDOC_XMLTOKEN_EVENT_COMMENT" , 7);
/** currently parsing a <!-- comment --> */
define("STATE_XMLTOKEN_COMMENT" , 107);
/** used when a <!-- comment --> is found */
define("PHPDOC_XMLTOKEN_EVENT_SINGLEQUOTE" , 8);
/** currently parsing a <!-- comment --> */
define("STATE_XMLTOKEN_SINGLEQUOTE" , 108);
/** used when a <!-- comment --> is found */
define("PHPDOC_XMLTOKEN_EVENT_DOUBLEQUOTE" , 9);
/** currently parsing a <!-- comment --> */
define("STATE_XMLTOKEN_DOUBLEQUOTE" , 109);
/** used when a <! is found */
define("PHPDOC_XMLTOKEN_EVENT_DEF" , 10);
/** currently parsing a <! */
define("STATE_XMLTOKEN_DEF" , 110);
/** used when a <! is found */
define("PHPDOC_XMLTOKEN_EVENT_CDATA" , 11);
/** currently parsing a <! */
define("STATE_XMLTOKEN_CDATA" , 111);
/** used when a <?xml is found */
define("PHPDOC_XMLTOKEN_EVENT_XML" , 12);
/** currently parsing a <?xml */
define("STATE_XMLTOKEN_XML" , 112);
/** used when a <![CDATA[ section is found */
define('PHPDOC_XMLTOKEN_EVENT_IN_CDATA', 13);
/** currently parsing a <![CDATA[ ]]> */
define('STATE_XMLTOKEN_IN_CDATA', 113);
/** do not remove, needed in plain renderer */
define('PHPDOC_BEAUTIFIER_CDATA', 100000);