| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145 | 
							- <?php namespace Sieve;
 
- include_once('SieveToken.php');
 
- class SieveScanner
 
- {
 
-     public function __construct(&$script)
 
-     {
 
-         if ($script === null)
 
-             return;
 
-         $this->tokenize($script);
 
-     }
 
-     public function setPassthroughFunc($callback)
 
-     {
 
-         if ($callback == null || is_callable($callback))
 
-             $this->ptFn_ = $callback;
 
-     }
 
-     public function tokenize(&$script)
 
-     {
 
-         $pos = 0;
 
-         $line = 1;
 
-         $scriptLength = mb_strlen($script);
 
-         $unprocessedScript = $script;
 
-         //create one regex to find the right match
 
-         //avoids looping over all possible tokens: increases performance
 
-         $nameToType = [];
 
-         $regex = [];
 
-         // chr(65) == 'A'
 
-         $i = 65;
 
-         foreach ($this->tokenMatch_ as $type => $subregex) {
 
-             $nameToType[chr($i)] = $type;
 
-             $regex[] = "(?P<". chr($i) . ">^$subregex)";
 
-             $i++;
 
-         }
 
-         $regex = '/' . join('|', $regex) . '/';
 
-         while ($pos < $scriptLength)
 
-         {
 
-             if (preg_match($regex, $unprocessedScript, $match)) {
 
-                 // only keep the group that match and we only want matches with group names
 
-                 // we can use the group name to find the token type using nameToType
 
-                 $filterMatch = array_filter(array_filter($match), 'is_string', ARRAY_FILTER_USE_KEY);
 
-                 // the first element in filterMatch will contain the matched group and the key will be the name
 
-                 $type = $nameToType[key($filterMatch)];
 
-                 $currentMatch = current($filterMatch);
 
-                 //create the token
 
-                 $token = new SieveToken($type, $currentMatch, $line);
 
-                 $this->tokens_[] = $token;
 
-                 if ($type == SieveToken::Unknown)
 
-                     return;
 
-                 // just remove the part that we parsed: don't extract the new substring using script length
 
-                 // as mb_strlen is \theta(pos)  (it's linear in the position)
 
-                 $matchLength = mb_strlen($currentMatch);
 
-                 $unprocessedScript = mb_substr($unprocessedScript, $matchLength);
 
-                 $pos += $matchLength;
 
-                 $line += mb_substr_count($currentMatch, "\n");
 
-             } else {
 
-                 $this->tokens_[] = new SieveToken(SieveToken::Unknown, '', $line);
 
-                 return;
 
-             }
 
-         }
 
-         $this->tokens_[] = new SieveToken(SieveToken::ScriptEnd, '', $line);
 
-     }
 
-     public function nextTokenIs($type)
 
-     {
 
-         return $this->peekNextToken()->is($type);
 
-     }
 
-     public function peekNextToken()
 
-     {
 
-         $offset = 0;
 
-         do {
 
-             $next = $this->tokens_[$this->tokenPos_ + $offset++];
 
-         } while ($next->is(SieveToken::Comment|SieveToken::Whitespace));
 
-         return $next;
 
-     }
 
-     public function nextToken()
 
-     {
 
-         $token = $this->tokens_[$this->tokenPos_++];
 
-         while ($token->is(SieveToken::Comment|SieveToken::Whitespace))
 
-         {
 
-             if ($this->ptFn_ != null)
 
-                 call_user_func($this->ptFn_, $token);
 
-             $token = $this->tokens_[$this->tokenPos_++];
 
-         }
 
-         return $token;
 
-     }
 
-     protected $ptFn_ = null;
 
-     protected $tokenPos_ = 0;
 
-     protected $tokens_ = array();
 
-     protected $tokenMatch_ = array (
 
-         SieveToken::LeftBracket       =>  '\[',
 
-         SieveToken::RightBracket      =>  '\]',
 
-         SieveToken::BlockStart        =>  '\{',
 
-         SieveToken::BlockEnd          =>  '\}',
 
-         SieveToken::LeftParenthesis   =>  '\(',
 
-         SieveToken::RightParenthesis  =>  '\)',
 
-         SieveToken::Comma             =>  ',',
 
-         SieveToken::Semicolon         =>  ';',
 
-         SieveToken::Whitespace        =>  '[ \r\n\t]+',
 
-         SieveToken::Tag               =>  ':[[:alpha:]_][[:alnum:]_]*(?=\b)',
 
-         /*
 
-         "                           # match a quotation mark
 
-         (                           # start matching parts that include an escaped quotation mark
 
-         ([^"]*[^"\\\\])             # match a string without quotation marks and not ending with a backlash
 
-         ?                           # this also includes the empty string
 
-         (\\\\\\\\)*                 # match any groups of even number of backslashes
 
-                                     # (thus the character after these groups are not escaped)
 
-         \\\\"                       # match an escaped quotation mark
 
-         )*                          # accept any number of strings that end with an escaped quotation mark
 
-         [^"]*                       # accept any trailing part that does not contain any quotation marks
 
-         "                           # end of the quoted string
 
-         */
 
-         SieveToken::QuotedString      =>  '"(([^"]*[^"\\\\])?(\\\\\\\\)*\\\\")*[^"]*"',
 
-         SieveToken::Number            =>  '[[:digit:]]+(?:[KMG])?(?=\b)',
 
-         SieveToken::Comment           =>  '(?:\/\*(?:[^\*]|\*(?=[^\/]))*\*\/|#[^\r\n]*\r?(\n|$))',
 
-         SieveToken::MultilineString   =>  'text:[ \t]*(?:#[^\r\n]*)?\r?\n(\.[^\r\n]+\r?\n|[^\.][^\r\n]*\r?\n)*\.\r?(\n|$)',
 
-         SieveToken::Identifier        =>  '[[:alpha:]_][[:alnum:]_]*(?=\b)',
 
-         SieveToken::Unknown           =>  '[^ \r\n\t]+'
 
-     );
 
- }
 
 
  |