class ContentGenerationJob < ApplicationJob require 'docsplit' queue_as :default # after_perform :generate_tags def perform(document_id) @document_id = document_id @document = Document.find(document_id) @content = @document.build_content generate_metadata generate_text generate_page_previews end private def generate_metadata length = Docsplit.extract_length(@document.doc.path, output: 'tmp/raw_content') @content.pagecount = length @content.save end def generate_text Docsplit.extract_text(@document.doc.path, output: 'tmp/raw_content') file_path = 'tmp/raw_content/' + File.basename(@document.doc.path, 'pdf') + 'txt' text = IO.read(file_path) @content.text = text @content.save! File.delete(file_path) if File.exist?(file_path) end def generate_page_previews file_basename = File.basename(@document.doc.path, '.pdf') folder_path = 'tmp/raw_content/' + file_basename Docsplit.extract_images(@document.doc.path, output: folder_path, size: '200x', format: :png) 1.upto(@content.pagecount) do |page| file_name = folder_path + '/' + File.basename(@document.doc.path, '.pdf') + '_' + page.to_s + '.png' page_preview = @content.page_previews.build file = File.open(file_name, 'rb') page_preview.preview = file page_preview.pagenumber = page file.close page_preview.save end FileUtils.rm_rf(folder_path) end def generate_tags TagGenerationJob.perform_now @document_id end end