From 4355edb257fc641bdee8964e865562e0eafcd4d2 Mon Sep 17 00:00:00 2001 From: ngu0158 <duc.tuan.nguyen.st@vsb.cz> Date: Mon, 11 Mar 2024 14:11:56 +0000 Subject: [PATCH] Lab2 --- Lexical Analyzer | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 Lexical Analyzer diff --git a/Lexical Analyzer b/Lexical Analyzer new file mode 100644 index 0000000..34b98d8 --- /dev/null +++ b/Lexical Analyzer @@ -0,0 +1,83 @@ +import re + +class Token: + def __init__(self, token_type, value=None): + self.token_type = token_type + self.value = value + + def __repr__(self): + if self.value is not None: + return f"{self.token_type}: {self.value}" + else: + return self.token_type + +class Tokenizer: + def __init__(self, source): + self.source = source + self.position = 0 + + def _skip_comment(self): + while self.position < len(self.source) and self.source[self.position] != '\n': + self.position += 1 + + def get_next_token(self): + self._skip_whitespace() + + if self.position >= len(self.source): + return None + + current_char = self.source[self.position] + if current_char == '/' and self.source[self.position + 1] == '/': + self._skip_comment() + return self.get_next_token() + if current_char.isdigit(): + return self._read_integer() + elif current_char.isalpha(): + return self._read_identifier_or_keyword() + elif current_char in '+-*/%': + self.position += 1 + return Token('OP', current_char) + elif current_char in '(': + self.position += 1 + return Token('LPAR') + elif current_char in ')': + self.position += 1 + return Token('RPAR') + elif current_char == ';': + self.position += 1 + return Token('SEMICOLON') + + + def _skip_whitespace(self): + while self.position < len(self.source) and self.source[self.position].isspace(): + self.position += 1 + + def _read_integer(self): + value_str = '' + while self.position < len(self.source) and self.source[self.position].isdigit(): + value_str += self.source[self.position] + self.position += 1 + + return Token('NUM', int(value_str)) + + def _read_identifier_or_keyword(self): + value_str = '' + while self.position < len(self.source) and self.source[self.position].isalnum(): + value_str += self.source[self.position] + self.position += 1 + + token_type = None + if value_str in ['div', 'mod']: + return value_str.upper() + else: + token_type = 'ID' + + return Token(token_type, value_str) +f1= open("test.txt", "r") +source = f1.read() +tokenizer = Tokenizer(source) + +token = tokenizer.get_next_token() +while token is not None: + print(token) + token = tokenizer.get_next_token() -- GitLab