diff --git a/Gemfile b/Gemfile index b7d08a2..b36239f 100644 --- a/Gemfile +++ b/Gemfile @@ -2,6 +2,7 @@ source 'https://rubygems.org' gem 'paperclip', '~> 5.0.0' gem 'docsplit', '~> 0.7.6' +gem 'filewatcher', '~> 0.5.3' # Bundle edge Rails instead: gem 'rails', github: 'rails/rails' gem 'rails', '~> 5.0.0', '>= 5.0.0.1' diff --git a/Gemfile.lock b/Gemfile.lock index 90e74a4..e905b65 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -59,6 +59,8 @@ GEM erubis (2.7.0) execjs (2.7.0) ffi (1.9.14) + filewatcher (0.5.3) + trollop (~> 2.0) globalid (0.3.7) activesupport (>= 4.1.0) i18n (0.7.0) @@ -147,6 +149,7 @@ GEM thor (0.19.1) thread_safe (0.3.5) tilt (2.0.5) + trollop (2.1.2) turbolinks (5.0.1) turbolinks-source (~> 5) turbolinks-source (5.0.0) @@ -171,6 +174,7 @@ DEPENDENCIES byebug coffee-rails (~> 4.2) docsplit (~> 0.7.6) + filewatcher (~> 0.5.3) jbuilder (~> 2.5) jquery-rails listen (~> 3.0.5) diff --git a/README.md b/README.md index 2561c4f..c7bd3fc 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,10 @@ Things you may want to cover: * Ruby version Ruby 2.3.1 -``` +```ruby gem 'paperclip', '~> 5.0.0' gem 'docsplit', '~> 0.7.6' +gem 'filewatcher', '~> 0.5.3' ``` * System dependencies diff --git a/app/controllers/documents_controller.rb b/app/controllers/documents_controller.rb index 74f489f..bedcbdf 100644 --- a/app/controllers/documents_controller.rb +++ b/app/controllers/documents_controller.rb @@ -4,7 +4,11 @@ class DocumentsController < ApplicationController # GET /documents # GET /documents.json def index - @documents = User.find(session[:user_id]).documents + if params[:search] + @documents = User.find(session[:user_id]).documents.contains_word(params[:search]) + else + @documents = User.find(session[:user_id]).documents + end end # GET /documents/1 @@ -67,6 +71,12 @@ class DocumentsController < ApplicationController end end + # GET /documents/search + def search + end + + # GET + private # Use callbacks to share common setup or constraints between actions. def set_document diff --git a/app/jobs/content_generation_job.rb b/app/jobs/content_generation_job.rb new file mode 100644 index 0000000..5409768 --- /dev/null +++ b/app/jobs/content_generation_job.rb @@ -0,0 +1,27 @@ +class ContentGenerationJob < ApplicationJob + require 'docsplit' + + @document_id = nil + + queue_as :default + + after_perform :generate_tags + + def perform(document_id) + @document_id = document_id + document = Document.find(document_id) + Docsplit.extract_text(document.doc.path, output: 'tmp/raw_content') + file_path = 'tmp/raw_content/' + File.basename(document.doc.path, 'pdf') + 'txt' + text = IO.read(file_path) + + content = document.build_content(text: text) + content.save! + + File.delete(file_path) if File.exist?(file_path) + end + + private + def generate_tags + TagGenerationJob.perform_now @document_id + end +end diff --git a/app/jobs/tag_generation_job.rb b/app/jobs/tag_generation_job.rb new file mode 100644 index 0000000..1a8e57c --- /dev/null +++ b/app/jobs/tag_generation_job.rb @@ -0,0 +1,16 @@ +class TagGenerationJob < ApplicationJob + queue_as :default + + def perform(document_id) + document = Document.find(document_id) + content = document.content + + words = content.text.squish.split(/\s+/) + + h = Hash.new(0) + content.text.squish.split(/\s+/).delete_if{ |w| w.length <= 3 }.each { |w| h[w] += 1 } + h.sort_by { |key, value| value } + + # stop list... + end +end diff --git a/app/models/content.rb b/app/models/content.rb new file mode 100644 index 0000000..6b3c2da --- /dev/null +++ b/app/models/content.rb @@ -0,0 +1,3 @@ +class Content < ApplicationRecord + belongs_to :document +end diff --git a/app/models/document.rb b/app/models/document.rb index 6408c51..f20888e 100644 --- a/app/models/document.rb +++ b/app/models/document.rb @@ -1,6 +1,7 @@ class Document < ApplicationRecord belongs_to :user belongs_to :category + has_one :content, dependent: :destroy, required: false has_and_belongs_to_many :tags has_attached_file :doc, @@ -10,4 +11,16 @@ class Document < ApplicationRecord hash_secret: Rails.application.secrets.secret_key_base } validates_attachment_content_type :doc, content_type: 'application/pdf' + + after_save :generate_content + + scope :contains_word, -> (word) { joins(:content).where("text like ?", "%#{word}%") } + + + private + def generate_content + if self.doc_content_type == 'application/pdf' + ContentGenerationJob.perform_now self.id + end + end end diff --git a/app/views/documents/_form.html.erb b/app/views/documents/_form.html.erb index ead9cdb..01c9b82 100644 --- a/app/views/documents/_form.html.erb +++ b/app/views/documents/_form.html.erb @@ -1,4 +1,4 @@ -<%= form_for document, url: documents_path, html: { multipart: true } do |f| %> +<%= form_for document, html: { multipart: true } do |f| %> <% if document.errors.any? %>
<%= notice %>
+ ++ <%= label_tag 'Search string' %> + <%= text_field_tag :search, params[:search] %> + <%= submit_tag 'Search' %> +
+<% end %> diff --git a/app/views/documents/show.html.erb b/app/views/documents/show.html.erb index c34b53d..0a37380 100644 --- a/app/views/documents/show.html.erb +++ b/app/views/documents/show.html.erb @@ -14,5 +14,17 @@ <%= link_to @document.doc_file_name, @document.doc.url %> +<% if @document.content %> ++ Content: + <%= @document.content.text.truncate_words(30) %> +
++ Words (length > 3): + <%= @document.content.text.squish.split(/\s+/).uniq.delete_if { |w| w.length <= 3 } %> +
+<% end %> + + <%= link_to 'Edit', edit_document_path(@document) %> | <%= link_to 'Back', documents_path %> diff --git a/app/views/layouts/application.html.erb b/app/views/layouts/application.html.erb index 77cc36f..039979f 100644 --- a/app/views/layouts/application.html.erb +++ b/app/views/layouts/application.html.erb @@ -27,7 +27,10 @@