From e58ef018b2abc36067fd80746beb6b78691b2641 Mon Sep 17 00:00:00 2001 From: Felix Van der Jeugt Date: Tue, 6 Jan 2015 16:59:13 +0100 Subject: [PATCH] newsbeuter - add filter script --- local/bin/titlefilter.py | 78 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100755 local/bin/titlefilter.py diff --git a/local/bin/titlefilter.py b/local/bin/titlefilter.py new file mode 100755 index 0000000..33d857a --- /dev/null +++ b/local/bin/titlefilter.py @@ -0,0 +1,78 @@ +#!/bin/env python +# Filter items not containing keywords (given as parameters) from a rss feed. +# Reads stdin, writes stdout. + +import sys + +# The hard way, pretty clean but doesn't work (probably) because HTMLParser puts +# all the tags in lowercase. + +#from html.parser import HTMLParser +#from itertools import chain + +#def start_to_tag(tag, attrs, end=False): +# if end: +# items = chain([tag], ('{}="{}"'.format(n,v) for (n,v) in attrs), ['/']) +# else: +# items = chain([tag], ('{}="{}"'.format(n,v) for (n,v) in attrs)) +# return "<{}>".format(" ".join(items)) +# +#class FeedParser(HTMLParser): +# +# def __init__(self, keep): +# super().__init__(self) +# self.keep = keep +# self.parts = [] +# self.item = None +# +# def handle_starttag(self, tag, attrs, end=False): +# string = start_to_tag(tag, attrs, end=end) +# if tag == "item": +# self.item = string +# elif self.item is None: +# self.parts.append(string) +# else: +# self.item += string +# +# def handle_endtag(self, tag): +# string = "".format(tag) +# if self.item is None: +# self.parts.append(string) +# elif tag == "item": +# if any(title in self.item for title in self.keep): +# string = self.item + string +# else: +# string = "" +# self.item = None +# self.parts.append(string) +# else: +# self.item += string +# +# def handle_startendtag(self, tag, attrs): +# self.handle_starttag(tag, attrs, end=True) +# +# def handle_data(self, data): +# if self.item is None: +# self.parts.append(data) +# else: +# self.item += data +# +# def handle_entityref(self, data): +# self.handle_data("&{};".format(data)) +# +# def handle_charref(self, data): +# self.handle_data("$#{};".format(data)) + +#parser = FeedParser(sys.argv) +#parser.feed(sys.stdin.read()) +#parser.close() + +#print(''.join(parser.parts), end='') + +# The easy way. Works. +def check(block): + if '' not in block: return True + if any(title in block for title in sys.argv): return True + return False + +print(''.join(filter(check, sys.stdin.read().split(''))), end='')