Skip to content
Snippets Groups Projects
Commit 8598a757 authored by Lukas Maruniak's avatar Lukas Maruniak
Browse files

Code refactoring. Added PDF files save + display feature.

parent a6ee4a5b
No related merge requests found
Showing
with 232 additions and 108 deletions
"hahahahahahaha"
\ No newline at end of file
"heeeeeeeeeeeeeeeeeeej"
\ No newline at end of file
......@@ -44,30 +44,12 @@
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!--<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<version>3.43.2.1</version>
</dependency>
<dependency>
<groupId>org.hibernate.orm</groupId>
<artifactId>hibernate-community-dialects</artifactId>
<version>6.2.12.Final</version>
</dependency>
-->
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
......@@ -79,8 +61,6 @@
<artifactId>spring-boot-starter-webflux</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
......@@ -93,12 +73,14 @@
<artifactId>tika-core</artifactId>
<version>2.9.2</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>2.9.2</version>
<type>pom</type>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parser-pdf-module</artifactId>
......@@ -147,14 +129,6 @@
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<!-- GROBID Dependencies - stará verzia
<dependency>
<groupId>org.grobid</groupId>
<artifactId>grobid-core</artifactId>
<version>0.8.1</version>
</dependency>
-->
<!-- Apache Commons IO (pre prácu so súbormi) -->
<dependency>
<groupId>commons-io</groupId>
......
services:
grobid:
image: grobid/grobid:0.8.1
ports:
- "8070:8070"
db:
image: mysql
restart: always
environment:
MYSQL_USER: admin
MYSQL_ROOT_PASSWORD: admin
MYSQL_DATABASE: article-processor
volumes:
- ~/volumes/tmp/mysql-data:/var/lib/mysql
ports:
- "3306:3306"
docker run --rm --gpus all --init --ulimit core=0 -p 8070:8070 grobid/grobid:0.8.1
\ No newline at end of file
package com.dre0059.articleprocessor;
import com.dre0059.articleprocessor.config.GrobidProperties;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Service;
import org.springframework.web.reactive.function.BodyInserters;
......@@ -14,9 +16,10 @@ import java.io.File;
public class GrobidClient {
private final WebClient webClient;
public GrobidClient() {
this.webClient = WebClient.builder()
.baseUrl("http://158.196.98.65:8080") // URL kde beží GROBID server
public GrobidClient(GrobidProperties grobidProperties) {
this.webClient = WebClient.builder()
.baseUrl(grobidProperties.getHost()) // URL kde beží GROBID server
.build();
}
......
package com.dre0059.articleprocessor.apacheTika;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.springframework.stereotype.Component;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import static com.lowagie.text.html.HtmlTagMap.isTitle;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.springframework.stereotype.Component;
@Component
public class PDFbox {
......
package com.dre0059.articleprocessor.apacheTika;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import javax.sound.midi.Soundbank;
import java.io.*;
import java.util.ArrayList;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Scanner;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
public class ReferencesScanner {
......
package com.dre0059.articleprocessor.config;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
@Configuration
@ConfigurationProperties(prefix = "grobid")
public class GrobidProperties {
private String host;
public void setHost(String host) {
this.host = host;
}
public String getHost() {
return host;
}
}
......@@ -4,48 +4,58 @@ import com.dre0059.articleprocessor.dto.DocumentDto;
import com.dre0059.articleprocessor.dto.SimpleDocumentDto;
import com.dre0059.articleprocessor.service.DocumentService;
import java.util.List;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;
@Controller()
@RequestMapping("/api/document")
@Controller
@RequestMapping
public class DocumentController {
private final DocumentService documentService;
public DocumentController(DocumentService documentService) {
this.documentService = documentService;
}
@GetMapping("/{id}")
@GetMapping("/api/documents/{id}")
public ResponseEntity<DocumentDto> getDocumentById(@PathVariable Long id) {
return ResponseEntity.ok(documentService.getDocumentById(id));
}
@GetMapping("/references/{id}")
@GetMapping(
value = "/api/documents/{id}/content",
produces = MediaType.APPLICATION_PDF_VALUE
)
public @ResponseBody byte[] getDocumentContentById(@PathVariable Long id) {
return documentService.getDocumentContentById(id).getContent();
}
@GetMapping("/api/documents/{id}/references")
public ResponseEntity<List<SimpleDocumentDto>> getReferencesFromDocument(@PathVariable Long id) {
return ResponseEntity.ok(documentService.getDocumentReferences(id));
return ResponseEntity.ok(documentService.getReferencedDocumentsById(id));
}
@GetMapping("/view/{id}")
public String viewPdf(Model model, @PathVariable("id") Long id) {
var references = documentService.getDocumentReferences(id);
public String viewDocument(Model model, @PathVariable("id") Long id) {
var references = documentService.getReferencedDocumentsById(id);
model.addAttribute("documentId", id);
model.addAttribute("references", references);
return "view-pdf";
}
@GetMapping("/view")
public String viewPdf(Model model) {
var references = documentService.getAllReferences();
public String viewAllDocuments(Model model) {
var documents = documentService.getAllDocuments();
model.addAttribute("references", references);
model.addAttribute("documents", documents);
return "view-all";
}
......
package com.dre0059.articleprocessor.controller;
import com.dre0059.articleprocessor.GrobidClient;
import com.dre0059.articleprocessor.model.Category;
import com.dre0059.articleprocessor.repository.CategoryRepository;
import com.dre0059.articleprocessor.service.CategoryService;
import com.dre0059.articleprocessor.service.HeaderService;
import com.dre0059.articleprocessor.service.ReferenceService;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.multipart.MultipartFile;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
@Controller
@RequestMapping("/api/grobid")
@RequestMapping
public class FileUploadController {
private final GrobidClient grobidClient;
private final HeaderService headerService;
private final ReferenceService referenceService;
private final CategoryRepository categoryRepository;
private final CategoryService categoryService;
public FileUploadController(GrobidClient grobidClient, HeaderService headerService, ReferenceService referenceService, CategoryRepository categoryRepository) {
public FileUploadController(GrobidClient grobidClient, HeaderService headerService, ReferenceService referenceService, CategoryService categoryService) {
this.grobidClient = grobidClient;
this.headerService = headerService;
this.referenceService = referenceService;
this.categoryRepository = categoryRepository;
this.categoryService = categoryService;
}
@GetMapping("/upload")
public String showUploadForm(Model model) {
List<Category> categories = categoryRepository.findAll();
var categories = categoryService.getAll();
model.addAttribute("categories", categories);
return "upload"; // vracia upload.html
}
@PostMapping("/upload")
@PostMapping("/api/upload")
@ResponseBody
public ResponseEntity<String> handleFileUpload(@RequestParam("file") MultipartFile file, @RequestParam("category") String category) {
public ResponseEntity<String> handleFileUpload(@RequestParam("file") MultipartFile file, @RequestParam("categoryId") String categoryId) {
if (file.isEmpty()) {
return ResponseEntity.badRequest().body("No file uploaded!");
}
System.out.println("Received file: " + file.getOriginalFilename());
System.out.println("Received category: " + category);
try {
// Vytvorenie dočasného súboru
File tmpFile = File.createTempFile("article-", ".pdf");
// save data from file to tmpFile
......@@ -64,23 +59,9 @@ public class FileUploadController {
String header = grobidClient.processHeader(tmpFile);
String references = grobidClient.processReferences(tmpFile);
String categoryId = category.substring(0, 3);
Optional<Category> categoryOptional = categoryRepository.findById(categoryId);
System.out.println("ID category is : " + categoryId);
System.out.println("Optional category is : " + categoryOptional);
if (category.isEmpty()) {
return ResponseEntity.badRequest().body("Invalid category ID!");
}
headerService.processHeader(header, categoryOptional);
headerService.processHeader(header, categoryId, tmpFile);
referenceService.extractReferences(references);
//System.out.println(header);
//System.out.println(references);
tmpFile.delete();
return ResponseEntity.ok(header);
......
package com.dre0059.articleprocessor.dto;
public class CategoryDto {
private String id;
private String name;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
}
package com.dre0059.articleprocessor.dto;
public class DocumentContentDto {
private Long id;
private byte[] content;
public byte[] getContent() {
return content;
}
public void setContent(byte[] content) {
this.content = content;
}
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
}
......@@ -73,4 +73,5 @@ public class DocumentDto {
public void setTarget(String target) {
this.target = target;
}
}
package com.dre0059.articleprocessor.mapper;
import com.dre0059.articleprocessor.dto.CategoryDto;
import com.dre0059.articleprocessor.model.Category;
import java.util.List;
import org.mapstruct.Mapper;
@Mapper(componentModel = "spring")
public interface CategoryMapper {
CategoryDto toCategoryDto(Category entity);
Category toCategory(CategoryDto categoryDto);
List<CategoryDto> toCategoryDtoList(List<Category> entities);
}
package com.dre0059.articleprocessor.mapper;
import com.dre0059.articleprocessor.dto.DocumentContentDto;
import com.dre0059.articleprocessor.dto.DocumentDto;
import com.dre0059.articleprocessor.dto.SimpleDocumentDto;
import com.dre0059.articleprocessor.model.Dokument;
import java.util.List;
import org.mapstruct.Mapper;
import org.mapstruct.Mapping;
@Mapper(componentModel = "spring")
public interface DocumentMapper {
@Mapping(target = "publicationYear", source = "year")
DocumentDto toDocumentDto(Dokument entity);
SimpleDocumentDto toSimpleDocument(Dokument entity);
DocumentContentDto toDocumentContentDto(Dokument entity);
SimpleDocumentDto toSimpleDocumentDto(Dokument entity);
List<SimpleDocumentDto> toSimpleDocumentList(List<Dokument> entities);
default String toString(byte[] bytes) {
if (bytes == null) {
return null;
} else {
return new String(bytes);
}
}
}
package com.dre0059.articleprocessor.model;
import jakarta.persistence.*;
import org.hibernate.annotations.Cascade;
import jakarta.persistence.CascadeType;
import jakarta.persistence.Column;
import jakarta.persistence.Entity;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.GenerationType;
import jakarta.persistence.Id;
import jakarta.persistence.JoinColumn;
import jakarta.persistence.JoinTable;
import jakarta.persistence.Lob;
import jakarta.persistence.ManyToMany;
import jakarta.persistence.ManyToOne;
import jakarta.persistence.OneToMany;
import jakarta.persistence.Table;
import java.util.ArrayList;
import java.util.List;
import org.hibernate.annotations.Cascade;
@Entity
......@@ -26,14 +37,19 @@ public class Dokument {
@JoinColumn(name = "category_id")
private Category category;
// @Lob for huge text
//@Column(name = "abstractText", columnDefinition = "TEXT")
//private String abstractText;
@Lob
@Column
private byte[] abstractText;
@Lob
@Column
private byte[] content;
private String status; // if the value is PDF - the whole document was uploaded, otherwise the document was just mentioned in references
private String publisher;
private String target; // http link
@OneToMany(mappedBy = "fromDocument", cascade = CascadeType.ALL)
private List<Reference> references = new ArrayList<>();
......@@ -66,7 +82,8 @@ public class Dokument {
public String getPublisher() { return publisher; }
public List<Reference> getReferences() { return references; }
public List<Author> getAuthors() { return authors; }
public byte[] getAbstractText() { return abstractText; }
public byte[] getContent() { return content; }
public void setAuthors(List<Author> authors) { this.authors = authors; }
public void setTitle(String title) { this.title = title; }
......@@ -87,4 +104,10 @@ public class Dokument {
this.references = references;
}
public void setCategory(Category category) { this.category = category;}
public void setAbstractText(byte[] abstractText) { this.abstractText = abstractText; }
public void setContent(byte[] content) { this.content = content; }
public String getTarget() {
return target;
}
}
......@@ -2,12 +2,8 @@ package com.dre0059.articleprocessor.repository;
import com.dre0059.articleprocessor.model.Author;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.query.Param;
import org.springframework.stereotype.Repository;
import java.util.Optional;
@Repository
public interface AuthorRepository extends JpaRepository<Author, Long> {
/*@Query("SELECT CASE WHEN COUNT(a) > 0 THEN true ELSE false END FROM Author a WHERE a.lastName = :lastName AND a.firstName = :firstName")
......
......@@ -40,7 +40,7 @@ public interface DocumentRepository extends JpaRepository<Dokument, Long> {
WHERE d.id = :id
"""
)
List<Dokument> getReferencedDocumentsById(@Param("id") Long id);
List<Dokument> getReferencedDocumentsById(@Param("id") Long fromDocumentId);
}
......
package com.dre0059.articleprocessor.service;
import com.dre0059.articleprocessor.dto.CategoryDto;
import com.dre0059.articleprocessor.mapper.CategoryMapper;
import com.dre0059.articleprocessor.repository.CategoryRepository;
import java.util.List;
import org.springframework.stereotype.Service;
@Service
public class CategoryService {
private final CategoryMapper categoryMapper;
private final CategoryRepository categoryRepository;
public CategoryService(CategoryMapper categoryMapper, CategoryRepository categoryRepository) {
this.categoryMapper = categoryMapper;
this.categoryRepository = categoryRepository;
}
public CategoryDto getCategory(String id) {
return categoryMapper.toCategoryDto(categoryRepository.findById(id).orElse(null));
}
public List<CategoryDto> getAll() {
return categoryMapper.toCategoryDtoList(categoryRepository.findAll());
}
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment