pytoken.py
"""
Provides Python token classes for the GiPSy scanner.
Library Version 1.1
Copyright 2013 Paul Griffiths
Email: mail@paulgriffiths.net
Distributed under the terms of the GNU General Public License.
http://www.gnu.org/licenses/
"""
from gipsy.token import Token
from gipsy.programtoken import IdentifierToken
class MLStringToken(Token):
"""
Generic multi-line string token class.
"""
def __init__(self, value):
"""
Class initializer.
Arguments:
value -- the string to be tokenized.
"""
Token.__init__(self, value)
self._token_type_string = "Multi-line String"
class BacktickToken(Token):
"""
Generic string token class.
"""
def __init__(self, value):
"""
Class initializer.
Arguments:
value -- the string to be tokenized.
"""
Token.__init__(self, value)
self._token_type_string = "Backtick"
class FirstIdentifierToken(IdentifierToken):
"""
Token class for the first identifier in a qualified
set, e.g. first.notfirst.stillnotfirst.end.
"""
def __init__(self, value):
"""
Class initializer.
Arguments:
value -- the string to be tokenized.
"""
IdentifierToken.__init__(self, value)
self._token_type_string = "FirstIdentifier"
class DefinitionToken(IdentifierToken):
"""
Generic definition token class.
"""
def __init__(self, value):
"""
Class initializer.
Arguments:
value -- the string to be tokenized.
"""
IdentifierToken.__init__(self, value)
self._token_type_string = "Definition"
class DecoratorToken(IdentifierToken):
"""
Generic decorator token class.
"""
def __init__(self, value):
"""
Class initializer.
Arguments:
value -- the string to be tokenized.
"""
IdentifierToken.__init__(self, value)
self._token_type_string = "Decorator"
class BuiltinToken(IdentifierToken):
"""
Generic built-in token class.
"""
def __init__(self, value):
"""
Class initializer.
Arguments:
value -- the string to be tokenized.
"""
IdentifierToken.__init__(self, value)
self._token_type_string = "Builtin"
pyparser.py
"""
Implements a Python source scanner based on the GiPSy.
Library Version 1.1
Copyright 2013 Paul Griffiths
Email: mail@paulgriffiths.net
Distributed under the terms of the GNU General Public License.
http://www.gnu.org/licenses/
"""
import keyword
import re
import __builtin__
from gipsy.token import WhitespaceToken
from gipsy.parser import Parser, TokenMatch
from gipsy.programtoken import CommentToken, KeywordToken, StringToken
from gipsy.programtoken import IdentifierToken, FloatToken, SeparatorToken
from gipsy.programtoken import IntegerToken, DelimiterToken, OperatorToken
from gipsy.pytoken import BacktickToken, DecoratorToken, MLStringToken
from gipsy.pytoken import DefinitionToken, BuiltinToken, FirstIdentifierToken
class PyParser(Parser):
"""
A Python source code scanner class.
"""
def __init__(self):
"""
Class initializer.
"""
Parser.__init__(self)
self._containers = [('(', ')'), ('{', '}'), ('[', ']')]
tmat = self._token_matches
tmat.append(TokenMatch(re.compile(r"#.*"),
[CommentToken]))
tmat.append(TokenMatch(re.compile(r"r?([\"|\']{3})[^\1]*?" +
r"(?<!\\)(\\\\)*\1"),
[MLStringToken]))
tmat.append(TokenMatch(re.compile(r'''r?(["|']).*?(?<!\\)(\\\\)*\1'''),
[StringToken]))
tmat.append(TokenMatch(re.compile(r"r?([`]).*?(?<!\\)(\\\\)*\1"),
[BacktickToken]))
tmat.append(TokenMatch(re.compile(r"@[a-zA-Z_][\w\.]*"),
[DecoratorToken]))
tmat.append(TokenMatch(re.compile(r"(def)(\s+)([a-zA-Z_][\w]*)"),
[KeywordToken,
WhitespaceToken,
DefinitionToken]))
tmat.append(TokenMatch(re.compile(r"(class)(\s+)([a-zA-Z_][\w]*)"),
[KeywordToken,
WhitespaceToken,
DefinitionToken]))
tmat.append(TokenMatch(re.compile(r"(\.)([a-zA-Z_][\w]*)"),
[SeparatorToken, IdentifierToken]))
tmat.append(TokenMatch(re.compile(r"[a-zA-Z_][\w]*"),
[FirstIdentifierToken]))
tmat.append(TokenMatch(re.compile(r"[0-9]+[\.][0-9]*((e|E)[\+\-]" +
r"?[0-9]+)?(J|j)?"),
[FloatToken]))
tmat.append(TokenMatch(re.compile(r"[\.][0-9]+((e|E)[\+\-]?" +
r"[0-9]+)?(j|J)?"),
[FloatToken]))
tmat.append(TokenMatch(re.compile(r"(0x)?[0-9]+(L|l)?(J|j)?"),
[IntegerToken]))
tmat.append(TokenMatch(re.compile(r"(\+=|\-=|\*=|/=|%=|//=|\*\*=)"),
[DelimiterToken]))
tmat.append(TokenMatch(re.compile(r"(\*\*|<<|>>|<=|>=|<>|==|!=|//)"),
[OperatorToken]))
tmat.append(TokenMatch(re.compile(r"="),
[DelimiterToken]))
tmat.append(TokenMatch(re.compile(r"[\+\*\-\/%~&\^\|<>]"),
[OperatorToken]))
tmat.append(TokenMatch(re.compile(r"[,:\.]"),
[SeparatorToken]))
def _get_token(self, tclass, value):
if tclass == FirstIdentifierToken:
if value in keyword.kwlist:
return KeywordToken(value)
elif value in dir(__builtin__):
return BuiltinToken(value)
else:
return IdentifierToken(value)
return Parser._get_token(self, tclass, value)