Skip to content
Snippets Groups Projects
Commit ebe1559a authored by dre0059's avatar dre0059
Browse files

The very first commit

parent c0eb02ef
Branches
No related merge requests found
Pipeline #2469 failed with stages
package com.dre0059.articleprocessor.repository;
import com.dre0059.articleprocessor.model.Reference;
import org.springframework.data.jpa.repository.JpaRepository;
import java.util.List;
public interface ReferenceRepository extends JpaRepository<Reference, Long> {
}
package com.dre0059.articleprocessor.service;
public class DocumentService {
}
package com.dre0059.articleprocessor.service;
import com.dre0059.articleprocessor.model.Author;
import com.dre0059.articleprocessor.model.Document;
import com.dre0059.articleprocessor.repository.AuthorRepository;
import com.dre0059.articleprocessor.repository.DocumentRepository;
import com.dre0059.articleprocessor.repository.ReferenceRepository;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Service
public class MetadataParser {
@Autowired
private DocumentRepository documentRepository;
@Autowired
private AuthorRepository authorRepository;
@Autowired
public MetadataParser(DocumentRepository documentRepository, AuthorRepository authorRepository) {
this.documentRepository = documentRepository;
this.authorRepository = authorRepository;
}
public Document parseBibTeX(String bibtexString) {
// Regular expression pre získanie hodnôt z BibTeX formátu
Pattern pattern = Pattern.compile("@.*?\\{.*?,\\s*author\\s*=\\s*\\{(.*?)\\},\\s*title\\s*=\\s*\\{(.*?)\\},\\s*doi\\s*=\\s*\\{(.*?)\\},\\s*abstract\\s*=\\s*\\{(.*?)\\}");
Matcher matcher = pattern.matcher(bibtexString);
if (matcher.find()) {
String authorsRaw = matcher.group(1);
String title = matcher.group(2);
String doi = matcher.group(3);
String abstractText = matcher.group(4);
List<Author> authors = parseAuthors(authorsRaw);
Document document = new Document(title, null, doi, abstractText, null, null);
document.setAuthors(authors);
documentRepository.save(document);
for (Author author : authors) {
authorRepository.save(author);
}
return document;
}
return null;
}
private List<Author> parseAuthors(String authorsRaw) {
List<Author> authors = new ArrayList<>();
String[] authorNames = authorsRaw.split(" and ");
for (String fullName : authorNames) {
String[] nameParts = fullName.trim().split("\\s+", 2);
if (nameParts.length == 2) {
authors.add(new Author(nameParts[1], nameParts[0])); // Priezvisko, Meno
} else {
authors.add(new Author(nameParts[0], "")); // Ak meno nemá priezvisko
}
}
return authors;
}
}
package com.dre0059.articleprocessor.service;
public class ReferenceService {
}
package com.dre0059.articleprocessor.service;
import com.dre0059.articleprocessor.model.DocumentMetadata;
import com.dre0059.articleprocessor.model.Reference;
import com.dre0059.articleprocessor.repository.DocumentRepository;
import com.dre0059.articleprocessor.repository.ReferenceRepository;
import org.springframework.stereotype.Service;
import org.w3c.dom.*;
import org.xml.sax.InputSource;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
@Service
public class TEIparser {
private final ReferenceRepository referenceRepository;
private final DocumentRepository documentRepository;
public TEIparser(ReferenceRepository referenceRepository, DocumentRepository documentRepository) {
this.referenceRepository = referenceRepository;
this.documentRepository = documentRepository;
}
public void parseAndSaveToDB(String xmlContent, DocumentMetadata document) {
try {
List<Reference> references = parseReferencesFromXML(xmlContent, document);
if (!references.isEmpty()) {
referenceRepository.saveAll(references);
System.out.println("References successfully saved to DB");
} else {
System.out.println("No valid references found in XML.");
}
} catch (Exception e) {
System.err.println("Error parsing references: " + e.getMessage());
e.printStackTrace();
}
}
private List<Reference> parseReferencesFromXML(String xmlContent, DocumentMetadata document) {
List<Reference> references = new ArrayList<>();
try {
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
dbFactory.setNamespaceAware(true);
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(new InputSource(new StringReader(xmlContent)));
// <biblStruct> - bibliographical information (consists of all needed information)
NodeList biblStructs = doc.getElementsByTagNameNS("*", "biblStruct");
for (int i = 0; i < biblStructs.getLength(); i++) {
Element bibl = (Element) biblStructs.item(i);
String title = getTagValueNS("title", bibl, "Unknown Title");
String publisher = getTagValueNS("publisher", bibl, "Unknown Publisher");
String year = getTagValueNS("year", bibl, "Unknown Year");
List<String> authors = new ArrayList<>();
NodeList authorNodes = bibl.getElementsByTagNameNS("*", "author");
for (int j = 0; j < authorNodes.getLength(); j++) {
Element authorElement = (Element) authorNodes.item(j);
Element persName = (Element) authorElement.getElementsByTagNameNS("*", "persName");
if (persName != null) {
String forename = getTagValueNS("forename", persName, "");
String surname = getTagValueNS("surname", persName, "");
if (!forename.isEmpty() || !surname.isEmpty()) {
authors.add(forename + " " + surname);
}
}
}
references.add(new Document(title, year, doi, abstractText, pages, publisher));
}
} catch (Exception e) {
System.err.println("Failed to parse references XML: " + e.getMessage());
e.printStackTrace();
}
return references;
}
private static String getTagValueNS(String tagName, Element element, String defaultValue) {
NodeList nodeList = element.getElementsByTagNameNS("*", tagName);
return (nodeList.getLength() > 0) ? nodeList.item(0).getTextContent().trim() : defaultValue;
}
}
server.port=8080
spring.application.name=articleProcessor
#spring.datasource.url=jdbc:sqlite:pdf_database.db
#spring.datasource.driver-class-name=org.sqlite.JDBC
#spring.jpa.database-platform=org.hibernate.dialect.SQLiteDialect
#spring.jpa.hibernate.ddl-auto=update
#spring.datasource.url=jdbc:sqlite:"D:\\Bakalarka\\my_db\\first_database.db"
#driverClassName=org.sqlite.JDBC
#url=jdbc:sqlite:memory:myDb?cache=shared
#username=sa
#password=sa
#spring.jpa.database-platform=org.hibernate.community.dialect.SQLiteDialect
#spring.jpa.hibernate.ddl-auto=create-drop
#spring.jpa.show-sql=true
# SQLite database configuration
#spring.datasource.url=jdbc:sqlite:pdf_database.db
#spring.jpa.database-platform=org.hibernate.dialect.SQLiteDialect
#spring.jpa.hibernate.ddl-auto=create-drop
#spring.jpa.show-sql=true
#spring.datasource.url=jdbc:h2:mem:testdb
spring.datasource.url=jdbc:h2:file:/data/demo
spring.datasource.driverClassName=org.h2.Driver
spring.datasource.username=sa
spring.datasource.password=password
spring.jpa.database-platform=org.hibernate.dialect.H2Dialect
spring.jpa.defer-datasource-initialization=true
spring.h2.console.enabled=true
spring.jpa.hibernate.ddl-auto=update
spring.jpa.show-sql=true
spring.jpa.properties.hibernate.format_sql=true
spring.thymeleaf.prefix=classpath:/templates/
spring.thymeleaf.suffix=.html
# Spring MVC for uploading PDF files
spring.servlet.multipart.max-file-size=5MB
spring.servlet.multipart.max-request-size=5MB
<!DOCTYPE html>
<html xmlns:th="http://www.thymeleaf.org">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Upload Success</title>
</head>
<body>
<h1>Upload Successful</h1>
<p th:text="${message}"></p>
<p>Converted text file: <a th:href="@{${txtFilePath}}" th:text="${txtFilePath}"></a></p>
<a href="/upload">Go back to upload another file</a>
<p>Are the following references correct?</p>
<button id="yesButton">Yes</button>
<button id="noButton">No</button>
<div id="references" style="display: block;">
<!-- Zobrazenie textu referencií -->
<div th:utext="${referencesText}"></div>
</div>
<div id="fullText" style="display: none;">
<h3>Full text:</h3>
<pre th:utext="${fullText}" id="fullTextContent" style="border: 1px solid #ccc; padding: 10px; cursor: text;"></pre>
<button id="saveSelectedText">Save Selected References</button>
</div>
<button id="showReferences">Show saved references</button>
<div id = "savedReferences" ></div>
<script>
// Zobrazenie celého textu pri kliknutí na "No"
document.getElementById("noButton").addEventListener("click", function(){
document.getElementById("fullText").style.display = "block";
document.getElementById("references").style.display = "none";
});
// Uloženie vybraného textu
document.getElementById("saveSelectedText").addEventListener("click", function () {
// Získanie vybraného textu
const selection = window.getSelection().toString().trim();
if (!selection) {
alert("Please select some text to save.");
return;
}
// Poslanie vybraného textu na server
fetch('/saveReferences', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ selectedText: selection }),
})
.then(response => {
if (response.ok) {
alert("Selected references saved successfully.");
} else {
return response.text().then(err => { throw new Error(err); });
}
})
.catch(error => console.error("Error saving references:", error));
});
document.getElementById("showReferences").addEventListener("click", function () {
fetch('/getReferences')
.then(response => {
if(!response.ok) {
return response.text().then(err => {
throw new Error(err);
});
}
return response.text(); // references as one string
})
.then(data => {
const savedReferencesDiv = document.getElementById("savedReferences");
console.log(data);
savedReferencesDiv.innerHTML = data.replace(/\n/g, '<br>'); // replace newLine for HTML tag
})
.catch(error => console.error("Error fetching references: ", error));
});
</script>
</body>
</html>
<!DOCTYPE html>
<html xmlns:th="http://www.thymeleaf.org">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Upload PDF</title>
<script src = "https://code.jquery.com/jquery-3.6.0.min.js"></script>
<style>
#pdf-preview {
width: 500px;
height: 600px;
border: 1px solid #ddd;
margin-top: 10px;
}
</style>
</head>
<body>
<h1>Upload your PDF</h1>
<!-- formular na nahravanie PDF -->
<form action="/api/grobid/upload" method="post" id = "uploadForm" enctype = "multipart/form-data">
<label for="fileInput">Choose PDF file:</label>
<input type = "file" id = "fileInput" name = "file" accept="application/pdf">
<button type = "submit">Upload & Process the PDF</button>
</form>
<!-- zobrazenie PDF -->
<div id="pdf-container">
<iframe id="pdf-preview" src="" style="display: none;"></iframe>
</div>
<!-- JSON výstup -->
<div id = "response-containter" style="display: none;">
<h3>Response : </h3>
<pre id = "json-output"></pre>
</div>
<!-- .js na zobrazenie PDF -->
<script>
document.getElementById('fileInput').addEventListener('change', function(event){
const file = event.target.files[0];
if(file && file.type === 'application/pdf'){
const fileURL = URL.createObjectURL(file);
document.getElementById('pdf-preview').src = fileURL;
document.getElementById('pdf-preview').style.display = 'block';
} else {
alert ('Please, choose valid PDF file.');
}
});
// AJAX for sending a file
$('#uploadForm').submit(function(event) {
event.preventDefault();
const fileInput = $('#fileInput')[0].files[0];
if(!fileInput){
alert("Please select a PDF file first.");
return;
}
const formData = new FormData();
formData.append("file", fileInput);
$.ajax({
url: "/api/grobid/upload",
type : "POST",
data : formData,
processData : false,
contentType : false,
success : function(response){
$('#json-output').text(JSON.stringify(response, null, 4));
$('#response-containter').show();
},
error: function(){
alert("Error processing PDF.");
}
});
});
</script>
</body>
</html>
package com.dre0059.articleprocessor;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;
@SpringBootTest
class ArticleProcessorApplicationTests {
@Test
void contextLoads() {
}
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment