From 473a8d9420d98a7deaa51af154536b2cdd1fb8de Mon Sep 17 00:00:00 2001 From: samuelvasecka <samuel.vasecka@gmail.com> Date: Fri, 24 May 2024 13:45:50 +0200 Subject: [PATCH] final commit --- README.md | 141 ++++++++++++------ app/sample.db | Bin 4530176 -> 4530176 bytes app/src/main/java/githubsqlfinder/App.java | 91 ++++++++--- .../java/githubsqlfinder/GithubFinder.java | 53 +++++-- app/src/main/java/githubsqlfinder/JDBC.java | 5 +- 5 files changed, 216 insertions(+), 74 deletions(-) diff --git a/README.md b/README.md index 1e6747a..9e187c8 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,8 @@ This will be run in default configuration To change configuration, you should use command parameters: -1. **files** - number of files to fetch from Github -1. **batch** - number of files in one batch +1. **files** - number of files to fetch from Github (Max limit 5000/hour) +1. **batch** - number of files in one batch (please set to the number of files) 1. **mode** mode=1 - it will use example files (for testing purposes) mode=2 - it will fetch files from github @@ -42,49 +42,102 @@ Parameters can be used like this: * `./gradlew run -Dfiles=100 -Dbatch=50 -Dmode=2 -Doffset=200 -Dsample=2` * This command will fetch **100 files** in two **50 files batches** from **github** (mode=2) from **offset 200** (so there will be products from 200 to 300) from **original github collection** (sample=2) -In the testing process, i found out that most efficient **batch** size is **100**. In this configurrations, it takes around **5 minutes** to fetch this batch (100 files) from github. From this we can assume that **10000 files** will take around **8 hours**. +Here is the test run: -Here are some test runs which proves the lines above: - -Batch size: 10 -Github total request time: 17,4 min -``` -Github requests time in ms: 1049982(98,22%) -ANTLR parsing time in ms: 7533(0,70%) -Parsing tree string finding: 5841(0,55%) -Whole time: 1068991 -Number of all found queries: 18(from 100 files) -Number of TSql queries: 0 -Number of Postgre SQL queries: 15 -Number of PlSql queries: 0 -Number of MySql queries: 3 -New offset to use for query: 100 -``` -Batch size: 50 -Github total request time: 6,3 min +Number of files: 5000 +Total time: 29,4 min +SQL queries found: 78 ``` -Github requests time in ms: 376292(98,18%) -ANTLR parsing time in ms: 1688(0,44%) -Parsing tree string finding: 448(0,12%) -Whole time: 383269 -Number of all found queries: 2(from 100 files) +Github requests time in ms: 1727552 (98,01%) +ANTLR parsing time in ms: 17692 (1,00%) +Parsing tree string finding in ms: 8517 (0,48%) +Whole time: 1762689 +Number of valid files: 339 +Number of all found queries: 78 (from 5000 files) Number of TSql queries: 0 -Number of Postgre SQL queries: 2 -Number of PlSql queries: 0 -Number of MySql queries: 0 -New offset to use for query: 200 +Number of Postgre SQL queries: 32 +Number of PlSql queries: 1 +Number of MySql queries: 45 +New offset to use for query: 5000 ``` -Batch size: 100 -Github total request time: 5,2 min -``` -Github requests time in ms: 312213(95,48%) -ANTLR parsing time in ms: 5346(1,63%) -Parsing tree string finding: 5180(1,58%) -Whole time: 326986 -Number of all found queries: 36(from 100 files) -Number of TSql queries: 0 -Number of Postgre SQL queries: 0 -Number of PlSql queries: 0 -Number of MySql queries: 36 -New offset to use for query: 300 -``` \ No newline at end of file + +Database after the test run: + +| Id | Repozitory | File | DB dialect | SQL query | +| -- | -- | -- | -- | -- | +| 1 | 0--key/lib | portfolio/2008_eda/snapshots/edacomua/login.php | POSTGRE SQL | SELECT PASS FROM CLIENTS WHERE NICK = :P0; | +| 2 | 0--key/lib | portfolio/2008_eda/snapshots/edacomua/login.php | POSTGRE SQL | SELECT NAME FROM CLIENTS WHERE NICK = :P0; | +| 3 | 0--key/lib | portfolio/2008_eda/snapshots/edacomua/RegValid.php | POSTGRE SQL | SELECT * FROM CLIENTS WHERE NICK = :P0; | +| 4 | 0--key/lib | portfolio/2008_eda/snapshots/edacomua/phptest1.php | MYSQL | SELECT * FROM PET; | +| 5 | 0--key/lib | portfolio/2008_eda/snapshots/edacomua/RegRunnerSuccess.php | POSTGRE SQL | SELECT * FROM RUNNERS WHERE NICK = :P0;6 | 0--key/lib | portfolio/2008_eda/files/wap/a/wap_index_new.php | MYSQL | SELECT * FROM RUNNERS; | +| 7 | 0--key/lib | portfolio/2008_eda/snapshots/edacomua/Runner_Valid.php | POSTGRE SQL | SELECT * FROM RUNNERS WHERE NICK = :P0; | +| 8 | 0--key/lib | portfolio/2008_eda/files/wap/a/login.php | POSTGRE SQL | SELECT * FROM RUNNERS WHERE NICK = :P0 AND PASSWORD = :P1; | +| 9 | 0--key/lib | portfolio/2008_eda/files/wap/a/login.php | POSTGRE SQL | SELECT OR_NO , STATUS , CR_TIME2 , CU_NICK FROM ORDERS WHERE RU_NICK = :P0 AND QUALITY = 'WELL' | ORDER BY CR_TIME2 DESC; | +| 10 | 0--key/lib | portfolio/2008_eda/files/wap/a/login.php | POSTGRE SQL | SELECT ID , PCS FROM BASKETS WHERE ORDER_NO = :P0; | +| 11 | 0--key/lib | portfolio/2008_eda/files/wap/a/login.php | POSTGRE SQL | SELECT MEAL_NAME , MANUFACTURER , CAPACITY FROM MEAL_GLOBAL WHERE ID = :P0; | +| 12 | 0--key/lib | portfolio/2008_eda/files/wap/a/login.php | POSTGRE SQL | SELECT PRICE_UH , ALLOCATION FROM MEAL_LOCAL WHERE ID = :P0; | +| 13 | 0--key/lib | portfolio/2008_eda/files/wap/a/login.php | POSTGRE SQL | SELECT * FROM CUSTOMERS WHERE NICK = :P0; | +| 14 | 0--key/lib | portfolio/2008_eda/snapshots/20042008/Input_id.php | POSTGRE SQL | SELECT * FROM MEAL_GLOBAL WHERE ID = :P0; | +| 15 | 0--key/lib | portfolio/2008_eda/snapshots/20042008/Input_id.php | POSTGRE SQL | SELECT * FROM MEAL_LOCAL WHERE ID = :P0 AND STORE_NAME = :P1; | +| 16 | 0--key/lib | portfolio/2008_eda/snapshots/edacomua/RegSuccess.php | POSTGRE SQL | SELECT * FROM CLIENTS WHERE NICK = :P0; | +| 17 | 0--key/lib | portfolio/2008_eda/files/wap/a/wap_index_new3.php | MYSQL | SELECT COUNT ( * ) FROM RUNNERS; | +| 18 | 0--key/lib | portfolio/2008_eda/snapshots/edacomua/EncodingProbe.php | MYSQL | SELECT NICK FROM CLIENTS WHERE PASS = 'SOFT'; | +| 19 | 0--key/lib | portfolio/2008_eda/files/wap/wap_login.php | POSTGRE SQL | SELECT CU_NICK FROM ORDERS WHERE OR_NO = :P0; | +| 20 | 0--key/lib | portfolio/2008_eda/files/wap/wap_login.php | POSTGRE SQL | SELECT PASSWORD , STATUS , TYPE FROM RUNNERS WHERE NICK = :P0; | +| 21 | 0--key/lib | portfolio/2008_eda/files/wap/wap_login.php | POSTGRE SQL | SELECT STATUS , TYPE , TOWN , IV FROM RUNNERS WHERE NICK = :P0; | +| 22 | 0--key/lib | portfolio/2008_eda/files/wap/wap_login.php | POSTGRE SQL | SELECT OR_NO , STATUS , CR_TIME2 FROM ORDERS WHERE RU_NICK = :P0 AND QUALITY = 'WELL' ORDER BY CR_TIME2 DESC; | +| 23 | 0--key/lib | portfolio/2008_eda/files/wap/wap_login.php | POSTGRE SQL | SELECT ID , PCS FROM BASKETS WHERE ORDER_NO = :P0; | +| 24 | 0--key/lib | portfolio/2008_eda/files/wap/wap_login.php | POSTGRE SQL | SELECT MEAL_NAME , MANUFACTURER , CAPACITY FROM MEAL_GLOBAL WHERE ID = :P0; | +| 25 | 0--key/lib | portfolio/2008_eda/files/wap/wap_login.php | POSTGRE SQL | SELECT PRICE_UH , ALLOCATION FROM MEAL_LOCAL WHERE ID = :P0; | +| 26 | 0--key/lib | portfolio/2008_eda/snapshots/20042008/index.php | MYSQL | SELECT * FROM RUNNERS; | +| 27 | 0--key/lib | portfolio/2008_eda/snapshots/20042008/index.php | PLSQL | SELECT * FROM STORE; | +| 28 | 0--key/lib | portfolio/2008_eda/snapshots/20042008/index.php | MYSQL | SELECT * FROM CUSTOMERS; | +| 29 | 0--key/lib | portfolio/2008_eda/snapshots/20042008/index.php | MYSQL | SELECT * FROM MEAL_GLOBAL; | +| 30 | 0--key/lib | portfolio/2008_eda/snapshots/20042008/index.php | MYSQL | SELECT * FROM MEAL_LOCAL; | +| 31 | 0--key/lib | portfolio/2008_eda/snapshots/edacomua/TypicalOrder.php | MYSQL | SELECT * FROM PET; | +| 32 | 0--key/lib | portfolio/2008_eda/snapshots/20042008/Manage_section.php | POSTGRE SQL | SELECT ID , MEAL_NAME FROM MEAL_GLOBAL WHERE MEAL_TYPE = :P0; | +| 33 | 0--key/lib | portfolio/2008_eda/snapshots/20042008/login.php | POSTGRE SQL | SELECT PASS FROM CUSTOMERS WHERE NICK = :P0; | +| 34 | 0--key/lib | portfolio/2008_eda/snapshots/20042008/login.php | POSTGRE SQL | SELECT PASSWORD FROM RUNNERS WHERE NICK = :P0; | +| 35 | 0--key/lib | portfolio/2008_eda/snapshots/20042008/login.php | POSTGRE SQL | SELECT * FROM RUNNERS WHERE NICK = :P0; | +| 36 | 0--key/lib | portfolio/2008_eda/snapshots/20042008/login.php | POSTGRE SQL | SELECT * FROM CUSTOMERS WHERE NICK = :P0; | +| 37 | 0--key/lib | portfolio/2008_eda/snapshots/20042008/Arrive_in_store.php | POSTGRE SQL | SELECT * FROM RUNNERS WHERE NICK = :P0;38 | 0--key/lib | portfolio/2008_eda/snapshots/20042008/Arrive_in_store.php | POSTGRE SQL | SELECT * FROM STORES WHERE TOWN = :P0 AND STORE_NAME = :P1; | +| 39 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT 1 FROM MYSQL .USER LIMIT 1; | +| 40 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT 1 FROM `INFORMATION_SCHEMA` . `USER_PRIVILEGES` WHERE `PRIVILEGE_TYPE` = 'CREATE USER' AND '''PMA_TEST''@''LOCALHOST''' LIKE `GRANTEE` LIMIT 1; | +| 41 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT 1 FROM ( SELECT `GRANTEE` , `IS_GRANTABLE` FROM `INFORMATION_SCHEMA` . `COLUMN_PRIVILEGES` UNION SELECT `GRANTEE` , `IS_GRANTABLE` FROM `INFORMATION_SCHEMA` . `TABLE_PRIVILEGES` UNION SELECT `GRANTEE` , `IS_GRANTABLE` FROM `INFORMATION_SCHEMA` . `SCHEMA_PRIVILEGES` UNION SELECT `GRANTEE` , `IS_GRANTABLE` FROM `INFORMATION_SCHEMA` . `USER_PRIVILEGES` ) T WHERE `IS_GRANTABLE` = 'YES' AND '''PMA_TEST''@''LOCALHOST''' LIKE `GRANTEE` LIMIT 1; | +| 42 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT * FROM INFORMATION_SCHEMA .CHARACTER_SETS; | +| 43 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT * FROM INFORMATION_SCHEMA .COLLATIONS; | +| 44 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT `TABLE_NAME` FROM `INFORMATION_SCHEMA` . `TABLES` WHERE `TABLE_SCHEMA` = 'PMA_TEST' AND `TABLE_TYPE` = 'BASE TABLE'; | +| 45 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT `COLUMN_NAME` , `MIMETYPE` , `TRANSFORMATION` , `TRANSFORMATION_OPTIONS` , `INPUT_TRANSFORMATION` , `INPUT_TRANSFORMATION_OPTIONS` FROM `PMADB` . `COLUMN_INFO` WHERE `DB_NAME` = 'PMA_TEST' AND `TABLE_NAME` = 'TABLE1' AND ( `MIMETYPE` ! = '' OR `TRANSFORMATION` ! = '' OR `TRANSFORMATION_OPTIONS` ! = '' OR `INPUT_TRANSFORMATION` ! = '' OR `INPUT_TRANSFORMATION_OPTIONS` ! = '' ); | +| 46 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT TABLE_NAME FROM INFORMATION_SCHEMA .VIEWS WHERE TABLE_SCHEMA = 'PMA_TEST' AND TABLE_NAME = 'TABLE1'; | +| 47 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT * , `TABLE_SCHEMA` AS `DB` , `TABLE_NAME` AS `NAME` , `TABLE_TYPE` AS `TABLE_TYPE` , `ENGINE` AS `ENGINE` , `ENGINE` AS `TYPE` , `VERSION` AS `VERSION` , `ROW_FORMAT` AS `ROW_FORMAT` , `TABLE_ROWS` AS `ROWS` , `AVG_ROW_LENGTH` AS `AVG_ROW_LENGTH` , `DATA_LENGTH` AS `DATA_LENGTH` , `MAX_DATA_LENGTH` AS `MAX_DATA_LENGTH` , `INDEX_LENGTH` AS `INDEX_LENGTH` , `DATA_FREE` AS `DATA_FREE` , `AUTO_INCREMENT` AS `AUTO_INCREMENT` , `CREATE_TIME` AS `CREATE_TIME` , `UPDATE_TIME` AS `UPDATE_TIME` , `CHECK_TIME` AS `CHECK_TIME` , `TABLE_COLLATION` AS `COLLATION` , `CHECKSUM` AS `CHECKSUM` , `CREATE_OPTIONS` AS `CREATE_OPTIONS` , `TABLE_COMMENT` AS `COMMENT` FROM `INFORMATION_SCHEMA` . `TABLES` T WHERE `TABLE_SCHEMA` IN ( 'PMA_TEST' ) AND T . `TABLE_NAME` = 'TABLE1' ORDER BY NAME ASC; | +| 48 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT COUNT ( * ) FROM `PMA_TEST` . `TABLE1`; | +| 49 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT `PRIVILEGE_TYPE` FROM `INFORMATION_SCHEMA` . `USER_PRIVILEGES` WHERE GRANTEE = '''PMA_TEST''@''LOCALHOST''' AND PRIVILEGE_TYPE = 'TRIGGER'; | +| 50 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT `PRIVILEGE_TYPE` FROM `INFORMATION_SCHEMA` . `SCHEMA_PRIVILEGES` WHERE GRANTEE = '''PMA_TEST''@''LOCALHOST''' AND PRIVILEGE_TYPE = 'TRIGGER' AND 'PMA_TEST' LIKE `TABLE_SCHEMA`; | +| 51 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT `PRIVILEGE_TYPE` FROM `INFORMATION_SCHEMA` . `TABLE_PRIVILEGES` WHERE GRANTEE = '''PMA_TEST''@''LOCALHOST''' AND PRIVILEGE_TYPE = 'TRIGGER' AND 'PMA_TEST' LIKE `TABLE_SCHEMA` AND TABLE_NAME = 'TABLE1'; | +| 52 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT `PRIVILEGE_TYPE` FROM `INFORMATION_SCHEMA` . `USER_PRIVILEGES` WHERE GRANTEE = '''PMA_TEST''@''LOCALHOST''' AND PRIVILEGE_TYPE = 'EVENT'; | +| 53 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT `PRIVILEGE_TYPE` FROM `INFORMATION_SCHEMA` . `SCHEMA_PRIVILEGES` WHERE GRANTEE = '''PMA_TEST''@''LOCALHOST''' AND PRIVILEGE_TYPE = 'EVENT' AND 'PMA_TEST' LIKE `TABLE_SCHEMA`; | +| 54 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT `PRIVILEGE_TYPE` FROM `INFORMATION_SCHEMA` . `TABLE_PRIVILEGES` WHERE GRANTEE = '''PMA_TEST''@''LOCALHOST''' AND PRIVILEGE_TYPE = 'EVENT' AND TABLE_SCHEMA = 'PMA_TEST' AND TABLE_NAME = 'TABLE1'; | +| 55 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT TRIGGER_SCHEMA , TRIGGER_NAME , EVENT_MANIPULATION , EVENT_OBJECT_TABLE , ACTION_TIMING , ACTION_STATEMENT , EVENT_OBJECT_SCHEMA , EVENT_OBJECT_TABLE , DEFINER FROM INFORMATION_SCHEMA .TRIGGERS WHERE EVENT_OBJECT_SCHEMA = 'PMA_TEST' AND EVENT_OBJECT_TABLE = 'TABLE1' ;; | +| 56 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT `PRIVILEGE_TYPE` FROM `INFORMATION_SCHEMA` . `SCHEMA_PRIVILEGES` WHERE GRANTEE = '''PMA_TEST''@''LOCALHOST''' AND PRIVILEGE_TYPE = 'EVENT' AND TABLE_SCHEMA = 'PMA'; | +| 57 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT `PRIVILEGE_TYPE` FROM `INFORMATION_SCHEMA` . `SCHEMA_PRIVILEGES` WHERE GRANTEE = '''PMA_TEST''@''LOCALHOST''' AND PRIVILEGE_TYPE = 'TRIGGER' AND TABLE_SCHEMA = 'PMA'; | +| 58 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT DEFAULT_COLLATION_NAME FROM INFORMATION_SCHEMA .SCHEMATA WHERE SCHEMA_NAME = 'PMA_TEST' LIMIT 1; | +| 59 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT TRACKING_ACTIVE FROM `PMADB` . `TRACKING` WHERE DB_NAME = 'PMA_TEST_DB' AND TABLE_NAME = 'PMA_TEST_TABLE' ORDER BY VERSION DESC LIMIT 1; | +| 60 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT TRACKING_ACTIVE FROM `PMADB` . `TRACKING` WHERE DB_NAME = 'PMA_TEST_DB' AND TABLE_NAME = 'PMA_TEST_TABLE2' ORDER BY VERSION DESC LIMIT 1; | +| 61 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT `SCHEMA_NAME` FROM `INFORMATION_SCHEMA` . `SCHEMATA` , ( SELECT DB_FIRST_LEVEL FROM ( SELECT DISTINCT SUBSTRING_INDEX ( SCHEMA_NAME , '_' , 1 ) DB_FIRST_LEVEL FROM INFORMATION_SCHEMA .SCHEMATA WHERE TRUE ) T ORDER BY DB_FIRST_LEVEL ASC LIMIT 0 , 100 ) T2 WHERE TRUE AND 1 = LOCATE ( CONCAT ( DB_FIRST_LEVEL , '_' ) , CONCAT ( SCHEMA_NAME , '_' ) ) ORDER BY SCHEMA_NAME ASC; | +| 62 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT COUNT ( * ) FROM ( SELECT DISTINCT SUBSTRING_INDEX ( SCHEMA_NAME , '_' , 1 ) DB_FIRST_LEVEL FROM INFORMATION_SCHEMA .SCHEMATA WHERE TRUE ) T; | +| 63 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT `PARTITION_METHOD` FROM `INFORMATION_SCHEMA` . `PARTITIONS` WHERE `TABLE_SCHEMA` = 'DB' AND `TABLE_NAME` = 'TABLE'; | +| 64 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT * FROM `MYSQL` . `DB` LIMIT 1; | +| 65 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT * FROM `MYSQL` . `COLUMNS_PRIV` LIMIT 1; | +| 66 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT * FROM `MYSQL` . `TABLES_PRIV` LIMIT 1; | +| 67 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT * FROM `MYSQL` . `PROCS_PRIV` LIMIT 1; | +| 68 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT `PLUGIN` FROM `MYSQL` . `USER` WHERE `USER` = "PMA_USERNAME" AND `HOST` = "PMA_HOSTNAME" LIMIT 1; | +| 69 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT TABLE_NAME FROM INFORMATION_SCHEMA .VIEWS WHERE TABLE_SCHEMA = 'DB' AND TABLE_NAME = 'TABLE'; | +| 70 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT * , `TABLE_SCHEMA` AS `DB` , `TABLE_NAME` AS `NAME` , `TABLE_TYPE` AS `TABLE_TYPE` , `ENGINE` AS `ENGINE` , `ENGINE` AS `TYPE` , `VERSION` AS `VERSION` , `ROW_FORMAT` AS `ROW_FORMAT` , `TABLE_ROWS` AS `ROWS` , `AVG_ROW_LENGTH` AS `AVG_ROW_LENGTH` , `DATA_LENGTH` AS `DATA_LENGTH` , `MAX_DATA_LENGTH` AS `MAX_DATA_LENGTH` , `INDEX_LENGTH` AS `INDEX_LENGTH` , `DATA_FREE` AS `DATA_FREE` , `AUTO_INCREMENT` AS `AUTO_INCREMENT` , `CREATE_TIME` AS `CREATE_TIME` , `UPDATE_TIME` AS `UPDATE_TIME` , `CHECK_TIME` AS `CHECK_TIME` , `TABLE_COLLATION` AS `COLLATION` , `CHECKSUM` AS `CHECKSUM` , `CREATE_OPTIONS` AS `CREATE_OPTIONS` , `TABLE_COMMENT` AS `COMMENT` FROM `INFORMATION_SCHEMA` . `TABLES` T WHERE `TABLE_SCHEMA` IN ( 'DB' ) AND T . `TABLE_NAME` = 'TABLE' ORDER BY NAME ASC; | +| 71 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT `PLUGIN_NAME` , `PLUGIN_DESCRIPTION` FROM `INFORMATION_SCHEMA` . `PLUGINS` WHERE `PLUGIN_TYPE` = 'AUTHENTICATION' ;; | +| 72 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT `PRIVILEGE_TYPE` FROM `INFORMATION_SCHEMA` . `SCHEMA_PRIVILEGES` WHERE GRANTEE = '''PMA_TEST''@''LOCALHOST''' AND PRIVILEGE_TYPE = 'EVENT' AND 'DB' LIKE `TABLE_SCHEMA`; | +| 73 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT `PRIVILEGE_TYPE` FROM `INFORMATION_SCHEMA` . `SCHEMA_PRIVILEGES` WHERE GRANTEE = '''PMA_TEST''@''LOCALHOST''' AND PRIVILEGE_TYPE = 'TRIGGER' AND 'DB' LIKE `TABLE_SCHEMA`; | +| 74 | 007durgesh219/phpmyadmin | libraries/dbi/DBIDummy.php | MYSQL | SELECT ( COUNT ( DB_FIRST_LEVEL ) DIV 100 ) * 100 FROM ( SELECT DISTINCT SUBSTRING_INDEX ( SCHEMA_NAME , '_' , 1 ) DB_FIRST_LEVEL FROM INFORMATION_SCHEMA .SCHEMATA WHERE `SCHEMA_NAME` < 'DB' ) T; | +| 75 | 00foobar/openDGS | classes/Pagination.php | POSTGRE SQL | SELECT COUNT ( * ) FROM :P0 WHERE CATEGORY = :CATEGORY; | +| 76 | 00foobar/openDGS | classes/Pagination.php | POSTGRE SQL | SELECT * FROM :P0 ORDER BY ID DESC LIMIT :LIMITVAR OFFSET :OFFSETVAR; | +| 77 | 00foobar/openDGS | classes/Pagination.php | POSTGRE SQL | SELECT * FROM :P0 WHERE CATEGORY = :CATEGORY ORDER BY ID DESC LIMIT :LIMITVAR OFFSET :OFFSETVAR; | +| 78 | 0100Dev/WHMCS-Acumulus | src/includes/inject.php | POSTGRE SQL | SELECT GATEWAY FROM TBLPAYMENTGATEWAYS WHERE GATEWAY = :P0 AND SETTING = "ACUMULUSACCOUNT" LIMIT 1; | \ No newline at end of file diff --git a/app/sample.db b/app/sample.db index 74b87f052289e1921c5ccfe6353c957d1fd09cf0..dd0667d148f21022890f5e57dbb483af5df29214 100644 GIT binary patch delta 3882 zcmeH~TW}NC8OQf%rPak3S(b$E^2!8kjD;l(#+R7&S~>`WZjshFGEk$+f~qpIt&4-3 zNgR>XOgkMya!U81owjKb(x;}OC7z^dCQSR%hbGX_8A#H`bS4jdXj5hqI=v+AxAF~S zr=7m!rOfEJl8(;*obP=5?e~3p?2T{A9s|zVbk9(fJVwC<UO{+IJ=NZco_XombJ|f_ zdkY5WPqqKh-qzl#J5l?m+LJXe)^uwwXf~@asrywgs`e<~QVPr>lTch$*yKNu*VA94 z-Lki3C7BVuj7F$mPzR|Q7el38+0Ahr#6E$I^6@zF+xL<yO?skp7-c$EoP3LW%5t=c zp}ekH)AGDFc3&(KVxw3fUW*fEYe>|l-Yv{dWm3tBNPaw>-!b*2sZb*RK=7!Op*UBz zN*cm^a4Z<{@xkR4^c|Vpc>2_;><bQt;y0EBf|0>BUEAHdBX)-BU#Ig8$Aw6U{Ke*y zj<)S?$%8hA+PzK!f*4*g&bX?G2H{MjR)*?vWyy?fm$aU4_1DUY%Q6dTH$8#O<R#P7 zjESM9T}kq~W0bsSHPhwJP2_d+C1fN;%Nb-K|FnD+>B(tp0X34_)>+wc9eLAs+&<I5 zP={Tml^e6{a9?l;hXu03>RIvm_E`Evyp&3%3kAt_(MVhf#4rmsmTR<1s;qQPudYEN zrY0lHhy85jY-3?QgxP+!H|kwEX3J1+s#F}yj90BSQ_oOicdnFhRAR*P8S^LVNS8f_ zYRU8V)2N2DI?f>tdByP%QWLN9HKZbcc0Q^+rIe01OFnY8kOAvz6ggt0TrQm-JCvMQ zXUAcNOxq5t9#JSkSw_{;Zg9qudSjO)<Wdw*SA}9#6uZHV#dM+QT@A$|Eg8Dz$EDz3 ze=oXV`;DwF$`~rS_K;QFUU`1jFF~Gl7@CUcuKChK`B_^UCMHj0a#EVWaCu-Sq=Zjg z7g=S40`i7D`jcAE0mRUsN)sMG;yrW3ivSrw19CtCFn|(J0ct=4)Bv?W9Z(Nwfd)Va zGy;0S02qNyfC(@I7QhPF06X9SoIn%60xqB#*bLkUYyq|cEkG-<4QK<l1MPqt*a36^ z9zZ(7PM{Om1?&d8fIUDr&;#@WeZc)dKd^V<h?o1}*-;lHJnh%DhWhW;`|5sqFK4a> z_`RHYFK4Fn|1UXH%CX<N!pS$OnVVGY#hTkSjK-jDP<>zJQqD6s#pHnIkT^A<34Goc z7B$S3o5N7G<2`*VRXe~7c!W<Z6#!u{808Zo=mD08<Lt<O4E26xmQ>9>OC=r)!caH> z^}U(*4Tpw<!*Sl{iwuW_W;Qq!8WLD<pU3O*_|r2TBl|<XxSLOvPR^AkN`*o`1<hD$ zqBNc^z=toVQ^l%&pll9-l_7V}ij~X#iM08WK|U}$5Q!zE{$llWB<9C4b}+#X`B^`X z`<4yqt=MyjkFk-tef#2AV0$B#cNpsPc#cm_eleNvn4C)I{DF9CB3USaUC|^IzGN{o znX9txksz36+R(JmW&hkkhI)8y`-;oiR<@1Zwsa5wqmQ~0|NDVYqXP@=S2-G?A<^?2 z?v`S|k~*CgYs~np_|vz!qbMLYUgLg-_AR`5jXOxsFLLvX1M`dg{NmvJqVMHJzntBw zqb^h0d95BwqpR+0oxApsW?21!dRFyw=I=~c_BYu#&@0sC%A;|9O2O0R(oRun#(za# z@n2?q6?w#~7JL$QEO1sV(25;!3sNF`ojV}@!GVt=_rib^zk%p-PdmBU@G9C)zN5>^ z+Zbw+d{4KPcpC#~+q1KcGSo`GV)!j;AyMO)zud4DmZ4p9RY6v1URMHNh>V0QJ?wJS zOHKO*jqMgbpU=#s$1=IGLUA&mu8Nt5w@4k{+#UU-{=X;+_emXbmh{;J(mw$Bvt&?C zZ<bcc-noi2ZaUS>P?@#<PwMAJAQZ_bPUmvtThU87sZCEEqv}7bch<GmIyJvi|4#Lx zDxiFbxgh_e+(0K}AIr|jtmsEbK^;4vRP3b7%~-Tw=bl4;@$z-<1>_SmH@K{P5H=Ji zZg6(-qZ?dE&NEbY>1*;GE{q1mAHTzmBTh8F%e{*BidWv{uA%;g=ilRALyG$$c--Qi z_qn`y{rB7`>JvNP=YCC>xAcnP2K<7e2Yx>x1=qL5^Ew<>bi?=5Qn1Z5;sn~WaHSCo zNYSOF9&eJyyGe__QGC>h!;(NRe&E0l(&fT#vBQWzR_v;5CruOkELadPZNdYJPSCpV Y()z?FEm$R<(_w{p!Hk{aMH7zw8xGijDF6Tf delta 2418 zcmeIzTTGKz902g|e79aGy>KhEKwIQ4Z9y&~%Bs>M1G%(Zypk}$!m3Cq=-4y?suPpN zDfQbt$=+sMaBlIoXcJ`{eb^p0jk?K$nrK<Fm(7<&XX2*Y{{0Tjm%Z#^*^3Q7{yFFT z|L2mP@9X5;d+a2bHXG~=!?!W;5B{1Vc7{vJk^RDJr*ze<?g?~of9QVKJxR>doYYh# zTuTV3AFC%-PgOzXRpolcSw)ffsaPVM6I%E&_6=4<A29bxk4L?U413h|VUNItR+-40 z5??_X<h%i=kYg&mnNJs)02x=|e4<z5dSsm0qQ)#rBU?3i9;FgzBJQ5ppCU4&<(<pg zaTBiVur^p7)?(|1w$^5AL-V?N+zvgPU&U=$9+A5m+}2uaji+>VDSYg`{((IsKA&@6 zPj9dP&Hj<V&>r8=j#20E>%&g}P~YI*9X_Xjch7)t)B#NgbbDIcJL+M=c5l;o@`T9j zE#JDdy645}p0?Hv4Nb7VXL0@bSZ6zK`}fNaj|BStga5dJ{S!(5<MBhRK{}HBHquLm zl;fgKWP-LH$z|Rqtudx^p>nO1oq7o+O1D!FBaPIRb_gX%KctN!wY16j7Q0;~`LnjO z{YvSUv7a4ONRf;oBue?F14xiQHcca5sxgltPP%BGMXVIa9Ops?Bz>(vtd3vCLb!9! zO&6^Sz0eMA?`N9QMkMmy_*FdERd%?m3;`B!fCmB)fdVLj3aCK>(11js1v;Py29N}j zK?+C(X}}25K?X1ZGspy4ARAbK71%%ySO#)I9>@m;pb!*+Vz3;P06TC1CnyE+GpqpR zpaN8aDzFk%gH@mgtOjd9EqEo;RpvT<wA#enXLN(whgwHs<mE&8_bcJ$L-}7ll;IOM zU3{({6*0O}ZMWuR!u5nC^;z{=)%U6a<rm5f{##zhSy($Ndhy<uU;bozn#<x2pz28S z9T$g$l?vuSfXpFWeFEVfBy!i~6{?_b*HYgm@_824kePcfFRCO@@42oD6$<7^!DSBL zVaf3at|2azQ%?Hy_!L?}lm<M5%HqE%yU4pN){>$m+|4^hW*4y};Y@Nm3AgYLk=aa} zX~`HPJLyWsSA-H+wUUHcY$jDH_!3$kd6<G{kx&f%b~0|nBgA6F+u%g|jQILg%rzBT zI~7|u6<a?QtGgU?^H##g++!qoW_=^NO``L4@nF-r<H1lSw}Lz6@XH8I`AQD9KjjcC zb~m!OhseTXcO!`&qSar@!LUFxOmZuV<tc3wG@PSjck7_qw^&_Cs{?ZQaExYrBZug( zbj(Tj<&|5(cjOSgBzH!ClrwD>I@TE9*OtYZKcdwS<X}KFqec!<AFY0$R^Onj{ubX? z>tb~$tv*JpFVKvi<Pc4z$7!Ye3euU+(3$7y%mOWU(XpNGh7;#k@m4Lfk<r#BhSh6S zH<Ztmy!c4W5lZ+R_BvW%9x)q9XqqeFLV1OxF&)nc1@L_iEZwd?63)OaD4)bK@XshO za?ON05Xz0%GO-E?%OG`ZDOJ9ReJF?6vhhv9reF@*<sss*;O`Wny;exxzm$9xS;#EH fw@@}Y%HrK9EAouR9URIeUJ-wS%#r6Jo<n~D^KSWX diff --git a/app/src/main/java/githubsqlfinder/App.java b/app/src/main/java/githubsqlfinder/App.java index 6203585..d2706f7 100644 --- a/app/src/main/java/githubsqlfinder/App.java +++ b/app/src/main/java/githubsqlfinder/App.java @@ -36,11 +36,11 @@ public class App { static int numberOfAddedQueriesPostgreSQL = 0; static int numberOfAddedQueriesTSql = 0; static long timeMs = 0; + static int numberOfValidFiles = 0; public static void main(String[] args) throws Exception { long sTime = System.nanoTime(); - int numberOfGithubFilesToSearch = 100; int batchSize = 100; int mode = 2; @@ -55,7 +55,6 @@ public class App { System.err.println("The first parameter (number of files to process) was incorrect"); return; } - } if (args.length > 1) { @@ -101,7 +100,7 @@ public class App { // System.out.println("Number of files to process: " + numberOfGithubFilesToSearch); System.out.println("Batch size: " + batchSize); - System.out.println("Mode (1 - Example files, 2 - Github, 3 - Clear DB): " + mode); + System.out.println("Mode (1 - Example files, 2 - Github, 3 - Clear DB, 4 - Show DB): " + mode); System.out.println("Data (1 - Sample data, 2 - Real data): " + sample); System.out.println("offset: " + offset); @@ -109,7 +108,6 @@ public class App { boolean repeatCycle = true; String input = ""; - if (mode == 1) { //example files int maxFile = 3; @@ -120,14 +118,18 @@ public class App { input = Files.readString(path); - runPhpAntlr(input, "example", filePath); + if (isValidFile(input)) { + runPhpAntlr(input, "example", filePath); - for (String query : stringStatements) { - if (isValidQuery(query)) { - runAntlr(query.replaceAll("\\\\", ""), "example", filePath); + for (String query : stringStatements) { + if (isValidQuery(query)) { + runAntlr(query.replaceAll("\\\\", ""), "example", filePath); + } } } } + } else if (mode == 4) { + db.showPage(0, true); } else if (mode == 2) { //github @@ -149,17 +151,27 @@ public class App { githubTimeMs += (endTime - startTime) / 1_000_000; for (FieldValueList row : result.iterateAll()) { - - input = row.get("content").getStringValue(); String repoName = row.get("repo_name").getStringValue(); String filePath = row.get("path").getStringValue(); + if (sample == 1) { + input = row.get("content").getStringValue(); + } else { + startTime = System.nanoTime(); + input = githubFinder.getFileContent(repoName, filePath); + endTime = System.nanoTime(); + + githubTimeMs += (endTime - startTime) / 1_000_000; + } - runPhpAntlr(input, repoName, filePath); + if (isValidFile(input)) { + numberOfValidFiles++; + runPhpAntlr(input, repoName, filePath); - for (String query : stringStatements) { - if (isValidQuery(query)) { - runAntlr(query.replaceAll("\\\\", ""), repoName, filePath); - } + for (String query : stringStatements) { + if (isValidQuery(query)) { + runAntlr(query.replaceAll("\\\\", ""), repoName, filePath); + } + } } index++; System.out.println("" + index + "/" + numberOfGithubFilesToSearch); @@ -182,11 +194,12 @@ public class App { timeMs += (eTime - sTime) / 1_000_000; if (mode == 2) { - System.out.println("Github requests time in ms: " + githubTimeMs + "(" + String.format("%.2f", ((double) githubTimeMs / timeMs) * 100) + "%)"); - System.out.println("ANTLR parsing time in ms: " + antlrTimeMs + "(" + String.format("%.2f", ((double) antlrTimeMs / timeMs) * 100) + "%)"); - System.out.println("Parsing tree string finding: " + parsingTreeTimeMs + "(" + String.format("%.2f", ((double) parsingTreeTimeMs / timeMs) * 100) + "%)"); + System.out.println("Github requests time in ms: " + githubTimeMs + " (" + String.format("%.2f", ((double) githubTimeMs / timeMs) * 100) + "%)"); + System.out.println("ANTLR parsing time in ms: " + antlrTimeMs + " (" + String.format("%.2f", ((double) antlrTimeMs / timeMs) * 100) + "%)"); + System.out.println("Parsing tree string finding in ms: " + parsingTreeTimeMs + " (" + String.format("%.2f", ((double) parsingTreeTimeMs / timeMs) * 100) + "%)"); System.out.println("Whole time: " + timeMs); - System.out.println("Number of all found queries: " + numberOfAddedQueries + "(from " + numberOfGithubFilesToSearch + " files)"); + System.out.println("Number of valid files: " + numberOfValidFiles); + System.out.println("Number of all found queries: " + numberOfAddedQueries + " (from " + numberOfGithubFilesToSearch + " files)"); System.out.println("Number of TSql queries: " + numberOfAddedQueriesTSql); System.out.println("Number of Postgre SQL queries: " + numberOfAddedQueriesPostgreSQL); System.out.println("Number of PlSql queries: " + numberOfAddedQueriesPlSql); @@ -237,6 +250,7 @@ public class App { query = ""; paramIndex = 0; getParametrizedQuery(arithmeticExpressionContext); + query = removeQuotesAroundParams(query); if (!query.equals("") && !stringStatements.contains(query)) { stringStatements.add(query); } @@ -276,6 +290,25 @@ public class App { } matcher.appendTail(result); + return removeQuotesAroundParams(result.toString()); + } + + public static String removeQuotesAroundParams(String input) { + if (input == null) { + return null; + } + + Pattern pattern = Pattern.compile("(['\"]?):p\\d+\\1"); + Matcher matcher = pattern.matcher(input); + StringBuffer result = new StringBuffer(); + + while (matcher.find()) { + String match = matcher.group(); + String replacement = match.replaceAll("['\"]", ""); + matcher.appendReplacement(result, replacement); + } + matcher.appendTail(result); + return result.toString(); } @@ -283,6 +316,7 @@ public class App { if (tree instanceof StringContext) { String text = tree.getText(); text = text.replaceAll("\\\\", ""); + if (((text.startsWith("\'") && text.endsWith("\'")) || (text.startsWith("\"") && text.endsWith("\""))) && text.length() > 1) { text = text.substring(1, text.length() - 1); query += text; @@ -318,6 +352,25 @@ public class App { return false; } + + public static boolean isValidFile(String file) { + if (file == null) { + return false; + } + + String lowerCaseFile = file.toLowerCase(); + + if (lowerCaseFile.contains("select") && lowerCaseFile.contains("from") && lowerCaseFile.indexOf("select") < lowerCaseFile.indexOf("from")) { + return true; + } + + if (lowerCaseFile.contains("create table")) { + return true; + } + + return false; + } + public static void runAntlr(String input, String repoName, String filePath) { JDBC db = new JDBC(); input = input.toUpperCase(); diff --git a/app/src/main/java/githubsqlfinder/GithubFinder.java b/app/src/main/java/githubsqlfinder/GithubFinder.java index 9fa8d7b..be1c640 100644 --- a/app/src/main/java/githubsqlfinder/GithubFinder.java +++ b/app/src/main/java/githubsqlfinder/GithubFinder.java @@ -2,7 +2,6 @@ package githubsqlfinder; import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQueryOptions; -import com.google.cloud.bigquery.FieldValueList; import com.google.cloud.bigquery.Job; import com.google.cloud.bigquery.JobException; import com.google.cloud.bigquery.JobId; @@ -10,23 +9,23 @@ import com.google.cloud.bigquery.JobInfo; import com.google.cloud.bigquery.QueryJobConfiguration; import com.google.cloud.bigquery.TableResult; +import java.net.HttpURLConnection; +import java.net.URL; import java.util.UUID; +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.util.Base64; + public class GithubFinder { public TableResult getFilesFromGithub(int batchSize, int step, int originalBatchSize, int offset, int sample) { BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService(); - - String sqlFilesQuery = "select sample_files.id, sample_files.path, sample_contents.content, sample_files.repo_name " - + "from bigquery-public-data.github_repos.sample_files as sample_files " - + "join bigquery-public-data.github_repos.sample_contents as sample_contents on sample_files.id = sample_contents.id " - + "where sample_files.path like '%.sql' limit 10;"; - String phpFilesQuery = "select path, repo_name, content " + + String phpFilesQuery = "select path, repo_name " + "from bigquery-public-data.github_repos.files as files " + - "join bigquery-public-data.github_repos.contents as contents on files.id = contents.id " + "where files.path like '%.php' " + - "and (contents.content like '%select%from%' or contents.content like '%create table%') order by repo_name limit " + batchSize + " offset " + (offset + (step * originalBatchSize)) +";"; + "order by repo_name limit " + batchSize + " offset " + (offset + (step * originalBatchSize)) +";"; if (sample == 1) { phpFilesQuery = "select path, repo_name, content " + @@ -34,6 +33,7 @@ public class GithubFinder { "join bigquery-public-data.github_repos.sample_contents as sample_contents on sample_files.id = sample_contents.id " + "where sample_files.path like '%.php' " + "and (sample_contents.content like '%select%from%' or sample_contents.content like '%create table%') order by repo_name limit " + batchSize + " offset " + (offset + (step * originalBatchSize)) +";"; + } QueryJobConfiguration queryConfig = @@ -66,4 +66,39 @@ public class GithubFinder { return null; } + + public String getFileContent(String repoName, String filePath) throws Exception { + String apiUrl = "https://api.github.com/repos/" + repoName + "/contents/" + filePath; + URL url = new URL(apiUrl); + HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + connection.setRequestMethod("GET"); + connection.setRequestProperty("Authorization", "Bearer " + "ADD_HERE_YOUR_TOKEN"); + connection.setRequestProperty("Accept", "application/vnd.github.v3+json"); + + if (connection.getResponseCode() != 200) { + //Some of the files no longer exists (big query is not updated) + //Or the limit of the GitHub API is crossed + return null; + } + + BufferedReader br = new BufferedReader(new InputStreamReader((connection.getInputStream()))); + StringBuilder jsonResponse = new StringBuilder(); + String output; + while ((output = br.readLine()) != null) { + jsonResponse.append(output); + } + + connection.disconnect(); + + String jsonString = jsonResponse.toString(); + if (jsonString.split("\"content\":\"").length > 1) { + String contentEncoded = jsonString.split("\"content\":\"")[1].split("\",")[0]; + contentEncoded = contentEncoded.replace("\\n", ""); + byte[] decodedBytes = Base64.getDecoder().decode(contentEncoded); + String fileContent = new String(decodedBytes); + + return fileContent; + } + return null; + } } diff --git a/app/src/main/java/githubsqlfinder/JDBC.java b/app/src/main/java/githubsqlfinder/JDBC.java index 0011243..198862b 100644 --- a/app/src/main/java/githubsqlfinder/JDBC.java +++ b/app/src/main/java/githubsqlfinder/JDBC.java @@ -80,10 +80,11 @@ public class JDBC { } ResultSet rs = statement.executeQuery(sql); - System.out.println("Id | Repozitory | File | DB dialect | SQL query"); + System.out.println("| Id | Repozitory | File | DB dialect | SQL query |"); + System.out.println( "| -- | -- | -- | -- | -- |"); while(rs.next()) { - System.out.printf("%d | %s | %s | %s | %s\n", rs.getInt("id"), rs.getString("repo_name"), rs.getString("file_path"), rs.getString("db_dialect"), rs.getString("sql")); + System.out.printf(" | %d | %s | %s | %s | %s |\n", rs.getInt("id"), rs.getString("repo_name"), rs.getString("file_path"), rs.getString("db_dialect"), rs.getString("sql")); } } catch (SQLException e) { e.printStackTrace(System.err); -- GitLab