| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145 | <?php namespace Sieve;include_once('SieveToken.php');class SieveScanner{    public function __construct(&$script)    {        if ($script === null)            return;        $this->tokenize($script);    }    public function setPassthroughFunc($callback)    {        if ($callback == null || is_callable($callback))            $this->ptFn_ = $callback;    }    public function tokenize(&$script)    {        $pos = 0;        $line = 1;        $scriptLength = mb_strlen($script);        $unprocessedScript = $script;        //create one regex to find the right match        //avoids looping over all possible tokens: increases performance        $nameToType = [];        $regex = [];        // chr(65) == 'A'        $i = 65;        foreach ($this->tokenMatch_ as $type => $subregex) {            $nameToType[chr($i)] = $type;            $regex[] = "(?P<". chr($i) . ">^$subregex)";            $i++;        }        $regex = '/' . join('|', $regex) . '/';        while ($pos < $scriptLength)        {            if (preg_match($regex, $unprocessedScript, $match)) {                // only keep the group that match and we only want matches with group names                // we can use the group name to find the token type using nameToType                $filterMatch = array_filter(array_filter($match), 'is_string', ARRAY_FILTER_USE_KEY);                // the first element in filterMatch will contain the matched group and the key will be the name                $type = $nameToType[key($filterMatch)];                $currentMatch = current($filterMatch);                //create the token                $token = new SieveToken($type, $currentMatch, $line);                $this->tokens_[] = $token;                if ($type == SieveToken::Unknown)                    return;                // just remove the part that we parsed: don't extract the new substring using script length                // as mb_strlen is \theta(pos)  (it's linear in the position)                $matchLength = mb_strlen($currentMatch);                $unprocessedScript = mb_substr($unprocessedScript, $matchLength);                $pos += $matchLength;                $line += mb_substr_count($currentMatch, "\n");            } else {                $this->tokens_[] = new SieveToken(SieveToken::Unknown, '', $line);                return;            }        }        $this->tokens_[] = new SieveToken(SieveToken::ScriptEnd, '', $line);    }    public function nextTokenIs($type)    {        return $this->peekNextToken()->is($type);    }    public function peekNextToken()    {        $offset = 0;        do {            $next = $this->tokens_[$this->tokenPos_ + $offset++];        } while ($next->is(SieveToken::Comment|SieveToken::Whitespace));        return $next;    }    public function nextToken()    {        $token = $this->tokens_[$this->tokenPos_++];        while ($token->is(SieveToken::Comment|SieveToken::Whitespace))        {            if ($this->ptFn_ != null)                call_user_func($this->ptFn_, $token);            $token = $this->tokens_[$this->tokenPos_++];        }        return $token;    }    protected $ptFn_ = null;    protected $tokenPos_ = 0;    protected $tokens_ = array();    protected $tokenMatch_ = array (        SieveToken::LeftBracket       =>  '\[',        SieveToken::RightBracket      =>  '\]',        SieveToken::BlockStart        =>  '\{',        SieveToken::BlockEnd          =>  '\}',        SieveToken::LeftParenthesis   =>  '\(',        SieveToken::RightParenthesis  =>  '\)',        SieveToken::Comma             =>  ',',        SieveToken::Semicolon         =>  ';',        SieveToken::Whitespace        =>  '[ \r\n\t]+',        SieveToken::Tag               =>  ':[[:alpha:]_][[:alnum:]_]*(?=\b)',        /*        "                           # match a quotation mark        (                           # start matching parts that include an escaped quotation mark        ([^"]*[^"\\\\])             # match a string without quotation marks and not ending with a backlash        ?                           # this also includes the empty string        (\\\\\\\\)*                 # match any groups of even number of backslashes                                    # (thus the character after these groups are not escaped)        \\\\"                       # match an escaped quotation mark        )*                          # accept any number of strings that end with an escaped quotation mark        [^"]*                       # accept any trailing part that does not contain any quotation marks        "                           # end of the quoted string        */        SieveToken::QuotedString      =>  '"(([^"]*[^"\\\\])?(\\\\\\\\)*\\\\")*[^"]*"',        SieveToken::Number            =>  '[[:digit:]]+(?:[KMG])?(?=\b)',        SieveToken::Comment           =>  '(?:\/\*(?:[^\*]|\*(?=[^\/]))*\*\/|#[^\r\n]*\r?(\n|$))',        SieveToken::MultilineString   =>  'text:[ \t]*(?:#[^\r\n]*)?\r?\n(\.[^\r\n]+\r?\n|[^\.][^\r\n]*\r?\n)*\.\r?(\n|$)',        SieveToken::Identifier        =>  '[[:alpha:]_][[:alnum:]_]*(?=\b)',        SieveToken::Unknown           =>  '[^ \r\n\t]+'    );}
 |