|
- class ContentGenerationJob < ApplicationJob
- require 'docsplit'
-
- @document_id = nil
- @document = nil
- @content = nil
-
- queue_as :default
-
- # after_perform :generate_tags
-
- def perform(document_id)
- @document_id = document_id
- @document = Document.find(document_id)
- @content = @document.build_content
-
- generate_metadata
- generate_text
- generate_page_previews
- end
-
- private
- def generate_metadata
- length = Docsplit.extract_length(@document.doc.path, output: 'tmp/raw_content')
- @content.pagecount = length
- @content.save
- end
-
- def generate_text
- Docsplit.extract_text(@document.doc.path, output: 'tmp/raw_content')
- file_path = 'tmp/raw_content/' + File.basename(@document.doc.path, 'pdf') + 'txt'
- text = IO.read(file_path)
- @content.text = text
- @content.save!
-
- File.delete(file_path) if File.exist?(file_path)
- end
-
- def generate_page_previews
- file_basename = File.basename(@document.doc.path, '.pdf')
- folder_path = 'tmp/raw_content/' + file_basename
- Docsplit.extract_images(@document.doc.path, output: folder_path, size: '200x', format: :png)
- @content.pagecount.times do |page|
- i = page + 1
- page_preview = @content.page_previews.build
- puts 'build fine'
- file = File.open(folder_path + '/' + File.basename(@document.doc.path, '.pdf') + '_' + i.to_s + '.png', 'rb')
- puts 'opened fine'
- page_preview.preview = file
- page_preview.pagenumber = i
- file.close
- page_preview.save
- end
- end
-
- def generate_tags
- TagGenerationJob.perform_now @document_id
- end
- end
|