Skip to content
Snippets Groups Projects
Commit 51a73f93 authored by ngu0158's avatar ngu0158
Browse files

Delete Lexical Analyzer

parent 2cfb628c
No related merge requests found
import re
class Token:
def __init__(self, token_type, value=None):
self.token_type = token_type
self.value = value
def __repr__(self):
if self.value is not None:
return f"{self.token_type}: {self.value}"
else:
return self.token_type
class Tokenizer:
def __init__(self, source):
self.source = source
self.position = 0
def _skip_comment(self):
while self.position < len(self.source) and self.source[self.position] != '\n':
self.position += 1
def get_next_token(self):
self._skip_whitespace()
if self.position >= len(self.source):
return None
current_char = self.source[self.position]
if current_char == '/' and self.source[self.position + 1] == '/':
self._skip_comment()
return self.get_next_token()
if current_char.isdigit():
return self._read_integer()
elif current_char.isalpha():
return self._read_identifier_or_keyword()
elif current_char in '+-*/%':
self.position += 1
return Token('OP', current_char)
elif current_char in '(':
self.position += 1
return Token('LPAR')
elif current_char in ')':
self.position += 1
return Token('RPAR')
elif current_char == ';':
self.position += 1
return Token('SEMICOLON')
def _skip_whitespace(self):
while self.position < len(self.source) and self.source[self.position].isspace():
self.position += 1
def _read_integer(self):
value_str = ''
while self.position < len(self.source) and self.source[self.position].isdigit():
value_str += self.source[self.position]
self.position += 1
return Token('NUM', int(value_str))
def _read_identifier_or_keyword(self):
value_str = ''
while self.position < len(self.source) and self.source[self.position].isalnum():
value_str += self.source[self.position]
self.position += 1
token_type = None
if value_str in ['div', 'mod']:
return value_str.upper()
else:
token_type = 'ID'
return Token(token_type, value_str)
f1= open("test.txt", "r")
source = f1.read()
tokenizer = Tokenizer(source)
token = tokenizer.get_next_token()
while token is not None:
print(token)
token = tokenizer.get_next_token()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment