Skip to content
Snippets Groups Projects
Commit a589ae26 authored by Mesharo's avatar Mesharo
Browse files

kapitoly, config file

parent 8d979505
No related merge requests found
No preview for this file type
import os
current_directory = os.getcwd()
filepaths = dict(
posts = 'D:\\stackoverflow.com\\Posts.xml',
tags = 'D:\\stackoverflow.com\\Tags.xml',
all_answers = f'{current_directory}\\files\\all_answers.txt',
postgresql_questions = f'{current_directory}\\files\\postgresql_questions.txt',
linked = f'{current_directory}\\files\\linked.txt',
codes = f'{current_directory}\\files\\codes.txt'
)
\ No newline at end of file
...@@ -5,6 +5,7 @@ import re ...@@ -5,6 +5,7 @@ import re
import os.path import os.path
import sqlglot import sqlglot
import copy import copy
import config
from io import StringIO from io import StringIO
from html.parser import HTMLParser from html.parser import HTMLParser
...@@ -47,43 +48,28 @@ def erase_html(code_section: str) -> str: ...@@ -47,43 +48,28 @@ def erase_html(code_section: str) -> str:
def analyze(id: str, codes: list) -> None: def analyze(id: str, codes: list) -> tuple:
print('CODES: ') parsed = 0
not_parsed = 0
for code in codes: for code in codes:
code = erase_html(code) code = erase_html(code)
#print(f'-- code: {code}') correct = False
expression_tree = False
try: try:
expression_trees = sqlglot.parse(code, dialect='postgres') expression_tree = sqlglot.parse(code, dialect='postgres')
correct = True
except sqlglot.errors.ParseError: except sqlglot.errors.ParseError:
#print(f'Sqlglot failed to parse given statement') correct = False
continue
except sqlglot.errors.TokenError: except sqlglot.errors.TokenError:
#print(f'Sqlglot failed to tokenize given statement') correct = False
continue except:
correct = False
for expression_tree in expression_trees:
#print(f'-- tree: {repr(expression_tree)}') if correct:
parsed += 1
solved = False else:
try: not_parsed += 1
undo = copy.deepcopy(expression_tree) return (parsed, not_parsed)
sqlglot.optimizer.qualify.qualify(expression_tree)
root = sqlglot.optimizer.build_scope(expression_tree)
if root:
for column in sqlglot.optimizer.find_all_in_scope(root.expression, sqlglot.exp.Column):
print(f"{column} => {root.sources[column.table]}")
solved = True
except sqlglot.errors.OptimizeError:
#print(f'Sqlglot failed to optimize given statement.')
expression_tree = undo
continue
print('--- --- ---')
def run(input_filepath_all_answers: str, input_filepath_postgresql_questions: str, input_filepath_linked: str, input_filepath_codes: str) -> None: def run(input_filepath_all_answers: str, input_filepath_postgresql_questions: str, input_filepath_linked: str, input_filepath_codes: str) -> None:
...@@ -103,10 +89,16 @@ def run(input_filepath_all_answers: str, input_filepath_postgresql_questions: st ...@@ -103,10 +89,16 @@ def run(input_filepath_all_answers: str, input_filepath_postgresql_questions: st
if os.path.isfile(input_filepath_codes): if os.path.isfile(input_filepath_codes):
codes = load_code_sections(input_filepath_codes) codes = load_code_sections(input_filepath_codes)
count = 0 count = 0
parsed = 0
not_parsed = 0
for key, values in codes.items(): for key, values in codes.items():
if count < 6: if count < 1000:
analyze(key, values) tmp = analyze(key, values)
parsed += tmp[0]
not_parsed += tmp[1]
count += 1 count += 1
print(f'Parsed: {parsed}, not parsed: {not_parsed}')
# Parsed: 931, not parsed: 1802
return return
if os.path.isfile(input_filepath_linked): if os.path.isfile(input_filepath_linked):
...@@ -119,7 +111,7 @@ def run(input_filepath_all_answers: str, input_filepath_postgresql_questions: st ...@@ -119,7 +111,7 @@ def run(input_filepath_all_answers: str, input_filepath_postgresql_questions: st
run(input_filepath_all_answers, input_filepath_postgresql_questions, input_filepath_linked, input_filepath_codes) run(input_filepath_all_answers, input_filepath_postgresql_questions, input_filepath_linked, input_filepath_codes)
# creates input_filepath_all_answers, input_filepath_postgresql_questions # creates input_filepath_all_answers, input_filepath_postgresql_questions
filter_postgresql_questions('D:\\stackoverflow\\Posts.xml', 'D:\\stackoverflow\\Tags.xml', input_filepath_postgresql_questions, input_filepath_all_answers) filter_postgresql_questions(config.filepaths['posts'], config.filepaths['tags'], input_filepath_postgresql_questions, input_filepath_all_answers)
# creates input_filepath_linked # creates input_filepath_linked
link_questions_with_answers(input_filepath_all_answers, get_questions(input_filepath_postgresql_questions), input_filepath_linked) link_questions_with_answers(input_filepath_all_answers, get_questions(input_filepath_postgresql_questions), input_filepath_linked)
#creates input_filepath_codes #creates input_filepath_codes
...@@ -128,16 +120,14 @@ def run(input_filepath_all_answers: str, input_filepath_postgresql_questions: st ...@@ -128,16 +120,14 @@ def run(input_filepath_all_answers: str, input_filepath_postgresql_questions: st
run(input_filepath_all_answers, input_filepath_postgresql_questions, input_filepath_linked, input_filepath_codes) run(input_filepath_all_answers, input_filepath_postgresql_questions, input_filepath_linked, input_filepath_codes)
if __name__ == "__main__": if __name__ == "__main__":
run('D:\\all_answers', 'D:\\postgresql_questions.txt', 'D:\\linked.txt', 'D:\\codes.txt') run(config.filepaths['all_answers'], config.filepaths['postgresql_questions'], config.filepaths['linked'], config.filepaths['codes'])
""" """
DONE: DONE:
1. Kapitola ohledně výběru parseru (stránka 10) 1. config file, soubory do files/
2. Automatizace/serializace na základě aktuálního progresu 2. struktura kapitoly, nová kapitola ohledně schémat
partially 3. Parser - tabulky s jejich aliasy 3. test na větším počtu
"""
OTÁZKY:
1. zahodit pokud sqlglot nedokáže qualify? #TODO Fix analyze()
2. cíl bakalářky - knihovna \ No newline at end of file
"""
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment