From 772f12243feee6ebb2aebbf0f1417d6d0ae1e4f1 Mon Sep 17 00:00:00 2001 From: Nils Dittberner Date: Fri, 25 Nov 2016 23:45:28 +0100 Subject: [PATCH] Lots of changes. Working basic search. --- Gemfile | 1 + Gemfile.lock | 4 ++++ README.md | 3 ++- app/controllers/documents_controller.rb | 12 +++++++++- app/jobs/content_generation_job.rb | 27 ++++++++++++++++++++++ app/jobs/tag_generation_job.rb | 16 +++++++++++++ app/models/content.rb | 3 +++ app/models/document.rb | 13 +++++++++++ app/views/documents/_form.html.erb | 2 +- app/views/documents/search.html.erb | 11 +++++++++ app/views/documents/show.html.erb | 12 ++++++++++ app/views/layouts/application.html.erb | 5 +++- config/routes.rb | 9 +++++++- db/migrate/20161125164350_create_contents.rb | 10 ++++++++ ...20161125201757_rename_content_column_to_text.rb | 5 ++++ db/schema.rb | 10 +++++++- test/fixtures/contents.yml | 9 ++++++++ test/jobs/content_generation_job_test.rb | 7 ++++++ test/jobs/tag_generation_job_test.rb | 7 ++++++ test/models/content_test.rb | 7 ++++++ 20 files changed, 167 insertions(+), 6 deletions(-) create mode 100644 app/jobs/content_generation_job.rb create mode 100644 app/jobs/tag_generation_job.rb create mode 100644 app/models/content.rb create mode 100644 app/views/documents/search.html.erb create mode 100644 db/migrate/20161125164350_create_contents.rb create mode 100644 db/migrate/20161125201757_rename_content_column_to_text.rb create mode 100644 test/fixtures/contents.yml create mode 100644 test/jobs/content_generation_job_test.rb create mode 100644 test/jobs/tag_generation_job_test.rb create mode 100644 test/models/content_test.rb diff --git a/Gemfile b/Gemfile index b7d08a2..b36239f 100644 --- a/Gemfile +++ b/Gemfile @@ -2,6 +2,7 @@ source 'https://rubygems.org' gem 'paperclip', '~> 5.0.0' gem 'docsplit', '~> 0.7.6' +gem 'filewatcher', '~> 0.5.3' # Bundle edge Rails instead: gem 'rails', github: 'rails/rails' gem 'rails', '~> 5.0.0', '>= 5.0.0.1' diff --git a/Gemfile.lock b/Gemfile.lock index 90e74a4..e905b65 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -59,6 +59,8 @@ GEM erubis (2.7.0) execjs (2.7.0) ffi (1.9.14) + filewatcher (0.5.3) + trollop (~> 2.0) globalid (0.3.7) activesupport (>= 4.1.0) i18n (0.7.0) @@ -147,6 +149,7 @@ GEM thor (0.19.1) thread_safe (0.3.5) tilt (2.0.5) + trollop (2.1.2) turbolinks (5.0.1) turbolinks-source (~> 5) turbolinks-source (5.0.0) @@ -171,6 +174,7 @@ DEPENDENCIES byebug coffee-rails (~> 4.2) docsplit (~> 0.7.6) + filewatcher (~> 0.5.3) jbuilder (~> 2.5) jquery-rails listen (~> 3.0.5) diff --git a/README.md b/README.md index 2561c4f..c7bd3fc 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,10 @@ Things you may want to cover: * Ruby version Ruby 2.3.1 -``` +```ruby gem 'paperclip', '~> 5.0.0' gem 'docsplit', '~> 0.7.6' +gem 'filewatcher', '~> 0.5.3' ``` * System dependencies diff --git a/app/controllers/documents_controller.rb b/app/controllers/documents_controller.rb index 74f489f..bedcbdf 100644 --- a/app/controllers/documents_controller.rb +++ b/app/controllers/documents_controller.rb @@ -4,7 +4,11 @@ class DocumentsController < ApplicationController # GET /documents # GET /documents.json def index - @documents = User.find(session[:user_id]).documents + if params[:search] + @documents = User.find(session[:user_id]).documents.contains_word(params[:search]) + else + @documents = User.find(session[:user_id]).documents + end end # GET /documents/1 @@ -67,6 +71,12 @@ class DocumentsController < ApplicationController end end + # GET /documents/search + def search + end + + # GET + private # Use callbacks to share common setup or constraints between actions. def set_document diff --git a/app/jobs/content_generation_job.rb b/app/jobs/content_generation_job.rb new file mode 100644 index 0000000..5409768 --- /dev/null +++ b/app/jobs/content_generation_job.rb @@ -0,0 +1,27 @@ +class ContentGenerationJob < ApplicationJob + require 'docsplit' + + @document_id = nil + + queue_as :default + + after_perform :generate_tags + + def perform(document_id) + @document_id = document_id + document = Document.find(document_id) + Docsplit.extract_text(document.doc.path, output: 'tmp/raw_content') + file_path = 'tmp/raw_content/' + File.basename(document.doc.path, 'pdf') + 'txt' + text = IO.read(file_path) + + content = document.build_content(text: text) + content.save! + + File.delete(file_path) if File.exist?(file_path) + end + + private + def generate_tags + TagGenerationJob.perform_now @document_id + end +end diff --git a/app/jobs/tag_generation_job.rb b/app/jobs/tag_generation_job.rb new file mode 100644 index 0000000..1a8e57c --- /dev/null +++ b/app/jobs/tag_generation_job.rb @@ -0,0 +1,16 @@ +class TagGenerationJob < ApplicationJob + queue_as :default + + def perform(document_id) + document = Document.find(document_id) + content = document.content + + words = content.text.squish.split(/\s+/) + + h = Hash.new(0) + content.text.squish.split(/\s+/).delete_if{ |w| w.length <= 3 }.each { |w| h[w] += 1 } + h.sort_by { |key, value| value } + + # stop list... + end +end diff --git a/app/models/content.rb b/app/models/content.rb new file mode 100644 index 0000000..6b3c2da --- /dev/null +++ b/app/models/content.rb @@ -0,0 +1,3 @@ +class Content < ApplicationRecord + belongs_to :document +end diff --git a/app/models/document.rb b/app/models/document.rb index 6408c51..f20888e 100644 --- a/app/models/document.rb +++ b/app/models/document.rb @@ -1,6 +1,7 @@ class Document < ApplicationRecord belongs_to :user belongs_to :category + has_one :content, dependent: :destroy, required: false has_and_belongs_to_many :tags has_attached_file :doc, @@ -10,4 +11,16 @@ class Document < ApplicationRecord hash_secret: Rails.application.secrets.secret_key_base } validates_attachment_content_type :doc, content_type: 'application/pdf' + + after_save :generate_content + + scope :contains_word, -> (word) { joins(:content).where("text like ?", "%#{word}%") } + + + private + def generate_content + if self.doc_content_type == 'application/pdf' + ContentGenerationJob.perform_now self.id + end + end end diff --git a/app/views/documents/_form.html.erb b/app/views/documents/_form.html.erb index ead9cdb..01c9b82 100644 --- a/app/views/documents/_form.html.erb +++ b/app/views/documents/_form.html.erb @@ -1,4 +1,4 @@ -<%= form_for document, url: documents_path, html: { multipart: true } do |f| %> +<%= form_for document, html: { multipart: true } do |f| %> <% if document.errors.any? %>

<%= pluralize(document.errors.count, "error") %> prohibited this document from being saved:

diff --git a/app/views/documents/search.html.erb b/app/views/documents/search.html.erb new file mode 100644 index 0000000..cb52764 --- /dev/null +++ b/app/views/documents/search.html.erb @@ -0,0 +1,11 @@ +

<%= notice %>

+ +

Search documents

+ +<%= form_tag documents_path, :method => 'get' do %> +

+ <%= label_tag 'Search string' %> + <%= text_field_tag :search, params[:search] %> + <%= submit_tag 'Search' %> +

+<% end %> diff --git a/app/views/documents/show.html.erb b/app/views/documents/show.html.erb index c34b53d..0a37380 100644 --- a/app/views/documents/show.html.erb +++ b/app/views/documents/show.html.erb @@ -14,5 +14,17 @@ <%= link_to @document.doc_file_name, @document.doc.url %>

+<% if @document.content %> +

+ Content: + <%= @document.content.text.truncate_words(30) %> +

+

+ Words (length > 3): + <%= @document.content.text.squish.split(/\s+/).uniq.delete_if { |w| w.length <= 3 } %> +

+<% end %> + + <%= link_to 'Edit', edit_document_path(@document) %> | <%= link_to 'Back', documents_path %> diff --git a/app/views/layouts/application.html.erb b/app/views/layouts/application.html.erb index 77cc36f..039979f 100644 --- a/app/views/layouts/application.html.erb +++ b/app/views/layouts/application.html.erb @@ -27,7 +27,10 @@ <%= button_to 'Logout', logout_path, method: :delete %> diff --git a/config/routes.rb b/config/routes.rb index 810f3c1..0e6e392 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -2,7 +2,14 @@ Rails.application.routes.draw do root 'documents#index', as: 'documents_index' resources :categories - resources :documents + + resources :documents do + get :search, on: :collection + # collection do + # get 'search' + # end + end + resources :users controller :sessions do diff --git a/db/migrate/20161125164350_create_contents.rb b/db/migrate/20161125164350_create_contents.rb new file mode 100644 index 0000000..eccd735 --- /dev/null +++ b/db/migrate/20161125164350_create_contents.rb @@ -0,0 +1,10 @@ +class CreateContents < ActiveRecord::Migration[5.0] + def change + create_table :contents do |t| + t.text :content + t.references :document + + t.timestamps + end + end +end diff --git a/db/migrate/20161125201757_rename_content_column_to_text.rb b/db/migrate/20161125201757_rename_content_column_to_text.rb new file mode 100644 index 0000000..5895b1f --- /dev/null +++ b/db/migrate/20161125201757_rename_content_column_to_text.rb @@ -0,0 +1,5 @@ +class RenameContentColumnToText < ActiveRecord::Migration[5.0] + def change + rename_column :contents, :content, :text + end +end diff --git a/db/schema.rb b/db/schema.rb index 54f52a8..f905372 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20161124223446) do +ActiveRecord::Schema.define(version: 20161125201757) do create_table "categories", force: :cascade do |t| t.string "name" @@ -22,6 +22,14 @@ ActiveRecord::Schema.define(version: 20161124223446) do t.index ["user_id"], name: "index_categories_on_user_id" end + create_table "contents", force: :cascade do |t| + t.text "text" + t.integer "document_id" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["document_id"], name: "index_contents_on_document_id" + end + create_table "documents", force: :cascade do |t| t.string "name" t.datetime "created_at", null: false diff --git a/test/fixtures/contents.yml b/test/fixtures/contents.yml new file mode 100644 index 0000000..77a2317 --- /dev/null +++ b/test/fixtures/contents.yml @@ -0,0 +1,9 @@ +# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html + +one: + content: MyText + document: + +two: + content: MyText + document: diff --git a/test/jobs/content_generation_job_test.rb b/test/jobs/content_generation_job_test.rb new file mode 100644 index 0000000..dbf06f0 --- /dev/null +++ b/test/jobs/content_generation_job_test.rb @@ -0,0 +1,7 @@ +require 'test_helper' + +class ContentGenerationJobTest < ActiveJob::TestCase + # test "the truth" do + # assert true + # end +end diff --git a/test/jobs/tag_generation_job_test.rb b/test/jobs/tag_generation_job_test.rb new file mode 100644 index 0000000..699471d --- /dev/null +++ b/test/jobs/tag_generation_job_test.rb @@ -0,0 +1,7 @@ +require 'test_helper' + +class TagGenerationJobTest < ActiveJob::TestCase + # test "the truth" do + # assert true + # end +end diff --git a/test/models/content_test.rb b/test/models/content_test.rb new file mode 100644 index 0000000..530663b --- /dev/null +++ b/test/models/content_test.rb @@ -0,0 +1,7 @@ +require 'test_helper' + +class ContentTest < ActiveSupport::TestCase + # test "the truth" do + # assert true + # end +end