| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174 | import pnalib
import html.parser
import re
url = "http://www.campusravita.fi/ruokalista";
restaurant_info = [
  [ "(TAMK) Campus Food", url, "", "middle" ],
]
class Tracker(object):
    def __init__(self, tag, attr_match=None, on_started=None, on_ended=None, on_data=None):
        self.tag = tag
        self.attr_match = attr_match
        self.on_started = on_started
        self.on_ended = on_ended
        self.on_data = on_data
        self.nesting = 0
    def handle_starttag(self, tag, attrs):
        if self.tag == tag:
            if self.nesting:
                self.nesting += 1
            else:
                attrs_matched = False
                if self.attr_match is None:
                    attrs_matched = True
                else:
                    for attr in attrs:
                        if attr[0] == self.attr_match[0] and self.attr_match[1].match(attr[1]):
                            attrs_matched = True
                if attrs_matched:
                    self.nesting = 1
                    if self.on_started:
                        self.on_started()
    def handle_endtag(self, tag):
        if self.nesting and self.tag == tag:
            self.nesting -= 1
            if self.nesting == 0 and self.on_ended:
                self.on_ended()
    def handle_data(self, data):
        if self.nesting and self.on_data:
            self.on_data(data)
    def __bool__(self):
        return self.nesting > 0
class CampusravitaHTMLParser(html.parser.HTMLParser):
    week_re = re.compile("Ruokalista - Viikko (\d+)")
    lunch_re = re.compile("Lounas|Deli-lounas")
    def __init__(self):
        html.parser.HTMLParser.__init__(self)
        self._trackers = []
        self.in_h3 = self._register_tracker("h3", on_data=self.handle_h3)
        # Everything in inside menu
        self.in_menu = self._register_tracker("section", ("id", "block-system-main"),
                on_started=self.handle_menu_start, on_ended=self.handle_menu_end)
        # Date comes after menu
        self.in_date_display = self._register_tracker("span", ("class", "date-display-single"),
                on_data=self.handle_date_display)
        # Lunch element contains one meal
        self.in_lunch = self._register_tracker("div", ("about", r"/fi/field-collection/field-ruoka-annos/\d+"),
                on_started=self.handle_lunch_start, on_ended=self.handle_lunch_end)
        # Next element contains food name 
        self.in_lunch_food = self._register_tracker("div", ("class", ".*field-name-field-nimi.*"),
                on_data=self.handle_lunch_food)
        # Next element contains food allergies
        self.in_allergy = self._register_tracker("div", ("class", ".*field-name-field-ruokavaliot.*"),
                on_started=self.handle_allergy_start, on_ended=self.handle_allergy_end)
        # Next element contains allergy short name
        self.in_allergy_short = self._register_tracker("div", ("class", ".*field-name-title field-type-ds.*"),
                on_data=self.handle_allergy)
        # Next element contains lunch price
        self.in_lunch_price = self._register_tracker("div", ("class", ".*field-name-field-annoksen-hinta.*"))
        self.lunch_type_match = False
        self.lunch = None
        self.week_foods = {}
    def _register_tracker(self, tag, attr_match=None, **kwargs):
        tracker = Tracker(tag, (attr_match[0], re.compile(attr_match[1])) if attr_match else None, **kwargs)
        self._trackers.append(tracker)
        return tracker
    def handle_date_display(self, data):
        index = -1
        if "Maanantai" in data:
            index = 0
        elif "Tiistai" in data:
            index = 1
        elif "Keskiviikko" in data:
            index = 2
        elif "Torstai" in data:
            index = 3
        elif "Perjantai" in data:
            index = 4
        elif "Lauantai" in data:
            index = 5
        elif "Sunnuntai" in data:
            index = 6
        if index >= 0:
            self.current_day = []
            self.week_foods[index] = self.current_day
    def handle_h3(self, data):
        if self.in_menu:
            lunch_match = self.lunch_re.match(data)
            self.lunch_type_match = bool(lunch_match)
    def handle_menu_start(self):
        print("*********** menu start")
    def handle_menu_end(self):
        print("*********** menu end")
    def handle_allergy(self, data):
        data = data.strip()
        if self.in_allergy and self.in_allergy_short and self.lunch and data:
            self.lunch["allergies"].append(data) 
    def handle_allergy_start(self):
        pass
    def handle_allergy_end(self):
        pass
    def handle_lunch_food(self, data):
        data = data.strip()
        if self.lunch and data:
            self.lunch["food"].append(data)
    def handle_lunch_start(self):
        if self.lunch_type_match:
            print("lunch start")
            self.lunch = {"food": [], "allergies": []}
    def handle_lunch_end(self):
        if self.lunch:
            print(repr(self.lunch).encode("cp1252", "ignore"))
            menu = "{menu} ({allergies})".format(menu=self.lunch["food"][0], allergies=", ".join(self.lunch["allergies"]))
            self.current_day.append(menu)
            self.lunch = None
    def handle_starttag(self, tag, attrs):
        for tracker in self._trackers:
            tracker.handle_starttag(tag, attrs)
    def handle_endtag(self, tag):
        for tracker in self._trackers:
            tracker.handle_endtag(tag)
    def handle_data(self, data):
        for tracker in self._trackers:
            tracker.handle_data(data)
        week_match = self.week_re.match(data)
        if week_match:
            self.week = int(week_match.group(1))
def get_restaurants(use_old, week):
    data = pnalib.get_file(url, "campusravita.html", use_old)
    parser = CampusravitaHTMLParser()
    parser.feed(data)
    restaurants = [[restaurant_info[0][0], "", parser.week, parser.week_foods, restaurant_info[0]]]
    return restaurants
 |