diff --git a/BachelorThesis.pdf b/BachelorThesis.pdf
index 226441b206df9b1f09a10d98b69f97d195d1ae95..4560b8cf9a623b28eee6d5b8643048c3594e623a 100644
Binary files a/BachelorThesis.pdf and b/BachelorThesis.pdf differ
diff --git a/main.py b/main.py
index fe545ad04652afde751c1b06b8369f2d4e72106e..bad145d31479c8cc56bee4246e9e7bb520068105 100644
--- a/main.py
+++ b/main.py
@@ -1,3 +1,5 @@
+import sqlglot.dialects
+import sqlglot.dialects.dialect
 import sqlglot.optimizer
 import sqlglot.optimizer.qualify
 from handling_dataset.main import filter_postgresql_questions, load_code_sections, get_questions, link_questions_with_answers, save_code_sections
@@ -26,8 +28,54 @@ def strip_tags(html):
     s.feed(html)
     return s.get_data()
 
-def erase_html(code_section: str) -> str:
-    result = strip_tags(' '.join(code_section))
+def replace_unrecognized_letters(result: str) -> str:
+    result = result.replace('E\'', '\'')
+    result = result.replace('`', '')
+    result = result.replace('Ĺ‚', 'l')
+    result = result.replace('ø', 'o')
+    if 'psql:latest.dump:1601: invalid command' in result:
+        return ''
+    result = result.replace('ÂŁ', '$')
+    result = result.replace('è', 'e').replace('é', 'e').replace('É', 'e')
+    result = result.replace('Ă­', 'i')
+    result = result.replace('ç', 'c')
+    result = result.replace('õ', 'o')
+    if '<?php if($funcao==1 )' in result:
+        return ''
+    result = result.replace('á', 'a')
+    result = result.replace('Ăł', 'o')
+    result = result.replace('Ĺ„', 'n')
+    result = result.replace('ñ', 'n')
+    if 'else {       $GLOBALS[komunikat_edycja_agenta' in result:
+        return ''
+    result = result.replace('ĂŁ', 'a')
+    result = result.replace('©', 'c')
+    result = result.replace('ĂĽ', 'u')
+    result = result.replace('Ă—', 'x')
+    result = result.replace('Ăş', 'u')
+    result = result.replace('ĂŞ', 'e')
+    result = result.replace('«', '').replace('»', '')
+    result = result.replace('Ă ', 'a')
+    result = result.replace('Ń„', '')
+    result = result.replace('Ă´', 'o')
+    result = result.replace('ä', 'a')
+    result = result.replace('§', '$')
+    result = result.replace('Ăš', 'U')
+    result = result.replace('ö', 'o')
+    result = result.replace('â', 'a')
+    result = result.replace('Âş', '')
+    result = result.replace('¡', '')
+    result = result.replace('ČĽ', '')
+    result = result.replace('Ăź', 's')
+    result = result.replace('с', 'c')
+    result = result.replace('С', 'C')
+    result = result.replace('Ă‚', 'A')
+    result = result.replace('÷', '').replace('®', '').replace('¤', '').replace('¦', '')
+
+    return result
+
+def erase_html(code_section: list) -> str:
+    result = strip_tags(';'.join(code_section))
     
     pattern = r"\\+['a-zA-Z]"
     matches = re.findall(pattern, result)
@@ -43,34 +91,40 @@ def erase_html(code_section: str) -> str:
             result = result.replace(match, '\n')
         else:
             result = result.replace(match, match[-2:])
-    
-    return result
-
 
+    return replace_unrecognized_letters(result)
 
 def analyze(id: str, codes: list) -> tuple:
     parsed = 0
     not_parsed = 0
-    for code in codes:
-        code = erase_html(code)
-
-        correct = False
-        try:
-            expression_tree = sqlglot.parse(code, dialect='postgres')
-            correct = True
-        except sqlglot.errors.ParseError:
-            correct = False
-        except sqlglot.errors.TokenError:
-            correct = False
-        except:
-            correct = False
-
-        if correct:
-            parsed += 1
-        else:
-            not_parsed += 1     
-    return (parsed, not_parsed)
+    is_none = 0
+    for code_list in codes:
+        all_codes_string = erase_html(code_list)
+        for code in all_codes_string.split(';'):
+            #print('----------------------')
+            #print(code)
 
+            correct = False
+            try:
+                expression_tree = sqlglot.parse(code, dialect='postgres')
+                if expression_tree == [None]:
+                    is_none += 1
+                correct = True
+            except sqlglot.errors.ParseError as pe:
+                correct = False
+                #print(f'----\nParseError: {pe}\n-----')
+            except sqlglot.errors.TokenError as te:
+                correct = False
+                #print(f'----\nTokenError: {te}\n-----')
+            except:
+                correct = False
+
+            if correct:
+                parsed += 1
+            else:
+                not_parsed += 1   
+
+    return (parsed, not_parsed, is_none)
 
 def run(input_filepath_all_answers: str, input_filepath_postgresql_questions: str, input_filepath_linked: str, input_filepath_codes: str) -> None:
     """Main function.
@@ -88,17 +142,30 @@ def run(input_filepath_all_answers: str, input_filepath_postgresql_questions: st
 
     if os.path.isfile(input_filepath_codes):
         codes = load_code_sections(input_filepath_codes)
-        count = 0
         parsed = 0
         not_parsed = 0
+        is_none = 0
+
         for key, values in codes.items():
-            if count < 1000:
-                tmp = analyze(key, values)
-                parsed += tmp[0]
-                not_parsed += tmp[1]
-                count += 1
-        print(f'Parsed: {parsed}, not parsed: {not_parsed}')
-        # Parsed: 931, not parsed: 1802
+            if not values:
+                continue
+
+            tmp = analyze(key, values)
+            parsed += tmp[0]
+            not_parsed += tmp[1]
+            is_none += tmp[2]
+            
+        print(f'Parsed: {parsed}, not parsed: {not_parsed}, None: {is_none} (included in Parsed)')
+        print('DONE!')
+        # Parsed: 931, not parsed: 1802 (pĹŻvodnÄ›)
+        # Parsed: 1122, not parsed: 2251 (ignor prázdných)
+        # Parsed: 5189, not parsed: 3887 (split na sekce, bez dialektu postgres)
+        # Parsed: 9723, not parsed: 6415 (split ;)
+        # Parsed: 9769, not parsed: 6369 (E' -> ', no `)
+
+        # Parsed: 9571, not parsed: 6567 (added dialect='postgres')
+        # Parsed: 1192213, not parsed: 886185 (all)
+        # Parsed: 1192213, not parsed: 886185, None: 198696 (included in Parsed)
         return
 
     if os.path.isfile(input_filepath_linked):
@@ -116,7 +183,6 @@ def run(input_filepath_all_answers: str, input_filepath_postgresql_questions: st
     link_questions_with_answers(input_filepath_all_answers, get_questions(input_filepath_postgresql_questions), input_filepath_linked)
     #creates input_filepath_codes
     save_code_sections(input_filepath_linked, input_filepath_codes)
-
     run(input_filepath_all_answers, input_filepath_postgresql_questions, input_filepath_linked, input_filepath_codes)
 
 if __name__ == "__main__":
@@ -125,9 +191,8 @@ if __name__ == "__main__":
 
 """
 DONE:
-    1. config file, soubory do files/
-    2. struktura kapitoly, nová kapitola ohledně schémat
-    3. test na větším počtu
-"""
+    1. Fix analyze
+    2. Fix kapitoly
 
-#TODO Fix analyze()
\ No newline at end of file
+    - PL/pgSQL zahazuju (DECLARE @OuterPageSize int)
+"""
\ No newline at end of file
diff --git a/tests.py b/tests.py
index deefc4b4af61a92f18578823209d6b2679c4dc79..404317d652ff5e79ba8027eb163d6334cdc81bb9 100644
--- a/tests.py
+++ b/tests.py
@@ -168,8 +168,18 @@ def tmp():
     except sqlglot.errors.OptimizeError:
         expression_tree = undo
 
-if __name__ == '__main__':
-    #print('SQLMETADATA: ')
-    #sqlmetadata()
+def whatever():
+    expr = "SELECT hello FROM y;"
+    expr_split = expr.split(';')
+    for exp in expr_split:
+        try:
+            expression_tree = sqlglot.parse(exp)
+            print(repr(expression_tree))
+            if expression_tree == [None]:
+                print('is None')
+
+        except sqlglot.errors.ParseError as pe:
+            print(f'ParseError: {pe}')
 
-    tmp()
\ No newline at end of file
+if __name__ == '__main__':
+    whatever()
\ No newline at end of file