def tokenize(input_string): tokens = [] i = 0 while i < len(input_string): if input_string[i].isdigit() or (input_string[i] == '-' and i + 1 < len(input_string) and input_string[i + 1].isdigit()): j = i while j < len(input_string) and (input_string[j].isdigit() or input_string[j] == '-'): j += 1 tokens.append(('NUM', input_string[i:j])) i = j elif input_string[i].isalpha() or input_string[i] == '_': j = i while j < len(input_string) and (input_string[j].isalnum() or input_string[j] == '_'): j += 1 tokens.append(('ID', input_string[i:j])) i = j elif input_string[i] == '(': tokens.append(('LPAR', input_string[i])) i += 1 elif input_string[i] == ')': tokens.append(('RPAR', input_string[i])) i += 1 elif input_string[i] in '+-*/%{}[];': tokens.append(('OP', input_string[i])) i += 1 elif input_string[i] in (' ', '\t', '\n'): i += 1 else: print("Unexpected character:", input_string[i]) i += 1 return tokens input_string = "-2 + (245 div 3); note\n2 mod 3 * hello" tokens = tokenize(input_string) for token in tokens: print(token[0].upper(), ':', token[1])