forked from id774/automaticruby
-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathfull_feed.rb
More file actions
77 lines (68 loc) · 2.23 KB
/
full_feed.rb
File metadata and controls
77 lines (68 loc) · 2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# -*- coding: utf-8 -*-
# Name:: Automatic::Plugin::Filter::FullFeed
# Author:: progd <http://d.hatena.ne.jp/progd/20120429/automatic_ruby_filter_full_feed>
# 774 <http://id774.net>
# Created:: Apr 29, 2012
# Updated:: Jun 23, 2013
# Copyright:: Copyright (c) 2012-2013 Automatic Ruby Developers.
# License:: Licensed under the GNU GENERAL PUBLIC LICENSE, Version 3.0.
module Automatic::Plugin
class FilterFullFeed
require 'nokogiri'
SITEINFO_TYPES = %w[SBM INDIVIDUAL IND SUBGENERAL SUB GENERAL GEN]
def initialize(config, pipeline=[])
@config = config
@pipeline = pipeline
@siteinfo = get_siteinfo
end
def run
@return_feeds = []
@pipeline.each {|feeds|
unless feeds.nil?
feeds.items.each {|feed|
feed = fulltext(feed)
}
end
@return_feeds << feeds
}
@return_feeds
end
private
def get_siteinfo
Automatic::Log.puts(:info, "Loading siteinfo from #{@config['siteinfo']}")
siteinfo = JSON.load(open(File.join(assets_dir, @config['siteinfo'])).read.force_encoding("UTF-8"))
siteinfo.select! { |info| SITEINFO_TYPES.include? (info['data']['type']) }
siteinfo.sort! { |a, b|
atype, btype = a['data']['type'], b['data']['type']
SITEINFO_TYPES.index(atype) <=> SITEINFO_TYPES.index(btype)
}
return siteinfo
end
def assets_dir
dir = (File.expand_path('~/.automatic/assets/siteinfo'))
if File.directory?(dir)
dir
else
File.join(File.dirname(__FILE__), '..', '..', 'assets', 'siteinfo')
end
end
def fulltext(feed)
return feed unless feed.link
@siteinfo.each {|info|
begin
if feed.link.match(info['data']['url'])
Automatic::Log.puts(:info, "Siteinfo matched: #{info['data']['url']}")
html = Nokogiri::HTML.parse(open(feed.link))
body = html.xpath(info['data']['xpath'])
feed.description = body.to_html.encode('UTF-8', :undef => :replace)
return feed
end
rescue
return feed
end
}
Automatic::Log.puts(:info, "Fulltext SITEINFO not found: #{feed.link}")
return feed
end
end
end