# Lexer for CHP 
# 
#  Depends on ply
#
# 0.1 - Stephen Longfield 10 Dec 2012 (for CS6118 project)
#       - Initial creation
# 0.2 - Stephen Longfield 10 June 2014 (for POPL)
#        - 15 June -- Added comments, integers
# 0.3 - Stephen Longfield 18 June 2014 
#        - Removing GETS

import ply.lex as lex
import sys

reserved = {
  'new'    : "NEW", 
  'true'  : "TRUE",
  'false'  : "FALSE",
  'skip'  : "SKIP",
  'else'  : "ELSE"
}

tokens = [
  'PROBE',
  'OPEN_BRACKET',
  'CLOSE_BRACKET',
  'OPEN_PAREN',
  'CLOSE_PAREN',
  'DOT',
  'STAR',
  'BANG',
  'NOT',
  'AND',
  'OR',
  'QUESTION',
  'SEMICOLON',
  'BAR',
  'EQUALS',
  'PLUS',
  'ARROW',
  'BOX',
  'ID',
  'INT'
] + list(reserved.values())

# Simple token regexen
t_PROBE         = r'\#'
t_OPEN_BRACKET  = r'\['
t_CLOSE_BRACKET = r'\]'
t_OPEN_PAREN    =  r'\('
t_CLOSE_PAREN   = r'\)'
t_DOT           = r'\.'
t_STAR          = r'\*'
t_BANG          = r'\!'
t_BAR           = r'\|'
t_NOT           = r'\~'
t_AND           = r'&&'
t_OR            = r'\|\|'
t_QUESTION      = r'\?'
t_EQUALS        = r'='
t_PLUS          = r'\+'
t_SEMICOLON     = r';'
t_ARROW         = r'->'
t_BOX           = r'\[\]'

# C-style line comments
t_ignore_COMMENT = r'//.*'

# Strings
def t_ID(t):
  r'[a-zA-Z_][a-zA-Z_0-9]*'
  t.type = reserved.get(t.value,'ID') # Reserved word check
  return t

def t_INT(t):
  r'-?(([1-9][0-9]*)|0)'
  try:
    t.value = int(t.value)
  except ValueError:
    print("Integer value too large %d", t.value)
    t.value = 0
  return t

# Bookkeeping regexen
# Ignore whitespace
t_ignore = ' \t'

# Count lines
def t_newline(t):
  r'\n+'
  t.lexer.lineno += len(t.value)

# Junky error handler
def t_error(t):
  print "Illegal character at '%s'" % t.value[0]
  quit()

# Build the lexer
lexer = lex.lex()

# Test
def test(data):
  lexer.input(data)
  while True:
    tok = lexer.token()
    if not tok: break
    print tok

if __name__ == '__main__':
  inp = sys.stdin.read()
  test(inp)
