# News behavior # ============= # # Defines a tag to produce a "latest news" summary based on an XML feed. # # # History # ------- # # 2006-08-06 (ADH): Created. # 2006-08-07 (ADH): It seems the core RSS features are easily sufficient # so fun though Simple-RSS was, it makes sense to use # something that doesn't add extra dependencies. Moved # over to the Ruby RSS parser. Since tags get expanded # before filters run, but there is no way to escape # text filtered by (say) Textile, instead get rid of # characters known to be a problem. # 2006-08-08 (ADH): Now knows about the prevalent part filter during tag # processing and takes steps to escape the generated # content. Markdown doesn't seem to need it but Textile # is escaped; it turns out there is a '' tag # which does the job. Generating HTML from a tag is # still conceptually wrong because of filter operations # but for now I still want to keep this behavior very # simple for its users. There is an RSS behavior which # can be used if a more flexible scheme is required at # the expense of more effort and less clean handling of # empty RSS item fields. require 'rss' class TagError < StandardError; end class NewsBehavior < Behavior::Base register 'News' description %{ This behavior provides a 'news' tag which is supplied with a fully qualified URL pointing to an XML RSS feed. The feed is parsed and the 'latest news' summary generated from it. The URL is given in the mandatory 'feed' attribute within the tag. The 'headlines' attribute is optional; it defines how many entries will be included in the news summary and defaults to '4'. The 'dates' attribute is also optional; it says whether or not article published or modified dates (if found) will be added in small text after each headline. If '0' there are no dates, else dates are shown. The default value is '1', to show dates. Dates are extracted from the feed's "pubDate", "modified" or "dc_date" fields, in that order. Finally, an optional 'escape' attribute, defaulting to '1', ensures that RSS titles or links cannot be accidentally interpreted as Textile data for Textile filtered parts. Setting the attribute to '0' disables escaping to allow headlines marked up in Textile to be passed through to the Textile parser. Note that '<' and '>' characters in RSS item titles will always be escaped to HTML entities for security. Links to articles are run through URI::Escape() processing and have '~' characters changed to '%7E' for similar reasons. Example of use: } # We can't cache pages that might change on every fetch. def cache_page? false end # The tag generates code that might fall foul of text filters # on the page part. Overriding parse_object lets the behavior extract the # filter ID so that the tag processing code which then gets run can be # evaluated in the context of a particular prevalent filter. def parse_object(object) if (object.respond_to?(:filter_id)) @filter = object.filter_id else @filter = nil end super end # Tag definitions define_tags do # # # This behavior provides a 'news' tag which is supplied with a # fully qualified URL pointing to an XML RSS feed. The feed is # parsed and the 'latest news' summary generated from it. The URL # is given in the mandatory 'feed' attribute within the tag. # # The 'headlines' attribute is optional; it defines how many entries # will be included in the news summary and defaults to '4'. # # The 'dates' attribute is also optional; it says whether or not # article published or modified dates (if found) will be added in # small text after each headline. If '0' there are no dates, else # dates are shown. The default value is '1', to show dates. Dates are # extracted from the feed's "pubDate", "modified" or "dc_date" fields, # in that order. # # Finally, an optional 'escape' attribute, defaulting to '1', ensures # that RSS titles or links cannot be accidentally interpreted as # Textile data for Textile filtered partss. Setting the attribute to # '0' disables escaping to allow headlines marked up in Textile to be # passed through to the Textile parser. # # Note that '<' and '>' characters in RSS item titles will always be # escaped to HTML entities for security. Links to articles are run # through URI::Escape() processing and have '~' characters changed to # '%7E' for similar reasons. # # Example of use: # # # tag 'news' do |tag| feed = tag.attr['feed'] dates = (tag.attr['dates'] || '1').to_i escape = (tag.attr['escape'] || '1').to_i headlines = (tag.attr['headlines'] || '4').to_i raise TagError.new("No feed URL given in `news' tag") if (feed.nil? or feed.empty?) # Fetch the feed and parse it. rss = RSS::Parser.parse(feed) done = 0 out = "

' + out end # Loop through all items in the feed. rss.items.each do |item| # The item must have at least a title. next if (item.title.nil? or item.title.empty?) # If we've got a title, increase the headline count and bail if # the limit has been exceeded. done += 1 break if (done > headlines) # Add HTML for this item to the output string. out << '

' # Ensure the title string doesn't contain unsafe characters - # RSS feeds can be used maliciously. title = item.title.dup title.gsub!('<', '<') title.gsub!('>', '%gt;') # Markdown doesn't process text here anyway, possibly because # the HTML list markup seems to stop it from doing so. Don't # escape Markdown for now - the code below has been tested and # does work though, so it can be introduced later if need be. # #if (escape != 0 and @filter == 'Markdown') # title.gsub!(/([`*_{}\[\]()#.!])/) { '\\' + $& } #end # Insert link HTML if a link is present, escaping it and # manually converting "~" characters to the "%7E" equivalent. unless (item.link.nil? or item.link.empty?) link = URI::escape(item.link) link.gsub!(/\~/, '%7E') out << "#{title}" else out << "#{title}" end # Attempt to extract an item publication/modification date. time = nil if item.respond_to?(:pubDate) # Typo blogs, The Register time = item.pubDate elsif item.respond_to?(:modified) # RForum installations, generic time = item.modified elsif item.respond_to?(:dc_date) # SlashDot time = item.dc_date end # Add the date if found and if attributes say to do so, then # close the list item. out << time.strftime(' (%d-%b-%Y)') if (time.class == Time and dates != 0) out << "

\n" end # Close the list, handle Textile escaping if necessary and # return the final chunk of data. out << "

\n" if (escape != 0 and @filter == 'Textile') out << '' end out end end # From 'define_tags do' end