|
|
|
@@ -2,25 +2,57 @@ class ContentGenerationJob < ApplicationJob |
|
|
|
require 'docsplit' |
|
|
|
|
|
|
|
@document_id = nil |
|
|
|
@document = nil |
|
|
|
@content = nil |
|
|
|
|
|
|
|
queue_as :default |
|
|
|
|
|
|
|
after_perform :generate_tags |
|
|
|
# after_perform :generate_tags |
|
|
|
|
|
|
|
def perform(document_id) |
|
|
|
@document_id = document_id |
|
|
|
document = Document.find(document_id) |
|
|
|
Docsplit.extract_text(document.doc.path, output: 'tmp/raw_content') |
|
|
|
file_path = 'tmp/raw_content/' + File.basename(document.doc.path, 'pdf') + 'txt' |
|
|
|
text = IO.read(file_path) |
|
|
|
@document = Document.find(document_id) |
|
|
|
@content = @document.build_content |
|
|
|
|
|
|
|
content = document.build_content(text: text) |
|
|
|
content.save! |
|
|
|
|
|
|
|
File.delete(file_path) if File.exist?(file_path) |
|
|
|
generate_metadata |
|
|
|
generate_text |
|
|
|
generate_page_previews |
|
|
|
end |
|
|
|
|
|
|
|
private |
|
|
|
def generate_metadata |
|
|
|
length = Docsplit.extract_length(@document.doc.path, output: 'tmp/raw_content') |
|
|
|
@content.pagecount = length |
|
|
|
@content.save |
|
|
|
end |
|
|
|
|
|
|
|
def generate_text |
|
|
|
Docsplit.extract_text(@document.doc.path, output: 'tmp/raw_content') |
|
|
|
file_path = 'tmp/raw_content/' + File.basename(@document.doc.path, 'pdf') + 'txt' |
|
|
|
text = IO.read(file_path) |
|
|
|
@content.text = text |
|
|
|
@content.save! |
|
|
|
|
|
|
|
File.delete(file_path) if File.exist?(file_path) |
|
|
|
end |
|
|
|
|
|
|
|
def generate_page_previews |
|
|
|
file_basename = File.basename(@document.doc.path, '.pdf') |
|
|
|
folder_path = 'tmp/raw_content/' + file_basename |
|
|
|
Docsplit.extract_images(@document.doc.path, output: folder_path, size: '200x', format: :png) |
|
|
|
@content.pagecount.times do |page| |
|
|
|
i = page + 1 |
|
|
|
page_preview = @content.page_previews.build |
|
|
|
puts 'build fine' |
|
|
|
file = File.open(folder_path + '/' + File.basename(@document.doc.path, '.pdf') + '_' + i.to_s + '.png', 'rb') |
|
|
|
puts 'opened fine' |
|
|
|
page_preview.preview = file |
|
|
|
page_preview.pagenumber = i |
|
|
|
file.close |
|
|
|
page_preview.save |
|
|
|
end |
|
|
|
end |
|
|
|
|
|
|
|
def generate_tags |
|
|
|
TagGenerationJob.perform_now @document_id |
|
|
|
end |
|
|
|
|