123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174 |
- import pnalib
- import html.parser
- import re
-
- url = "http://www.campusravita.fi/ruokalista";
-
- restaurant_info = [
- [ "(TAMK) Campus Food", url, "", "middle" ],
- ]
-
- class Tracker(object):
-
- def __init__(self, tag, attr_match=None, on_started=None, on_ended=None, on_data=None):
- self.tag = tag
- self.attr_match = attr_match
- self.on_started = on_started
- self.on_ended = on_ended
- self.on_data = on_data
- self.nesting = 0
-
- def handle_starttag(self, tag, attrs):
- if self.tag == tag:
- if self.nesting:
- self.nesting += 1
- else:
- attrs_matched = False
- if self.attr_match is None:
- attrs_matched = True
- else:
- for attr in attrs:
- if attr[0] == self.attr_match[0] and self.attr_match[1].match(attr[1]):
- attrs_matched = True
- if attrs_matched:
- self.nesting = 1
- if self.on_started:
- self.on_started()
-
-
- def handle_endtag(self, tag):
- if self.nesting and self.tag == tag:
- self.nesting -= 1
- if self.nesting == 0 and self.on_ended:
- self.on_ended()
-
- def handle_data(self, data):
- if self.nesting and self.on_data:
- self.on_data(data)
-
- def __bool__(self):
- return self.nesting > 0
-
- class CampusravitaHTMLParser(html.parser.HTMLParser):
- week_re = re.compile("Ruokalista - Viikko (\d+)")
- lunch_re = re.compile("Lounas|Deli-lounas")
-
- def __init__(self):
- html.parser.HTMLParser.__init__(self)
- self._trackers = []
- self.in_h3 = self._register_tracker("h3", on_data=self.handle_h3)
- # Everything in inside menu
- self.in_menu = self._register_tracker("section", ("id", "block-system-main"),
- on_started=self.handle_menu_start, on_ended=self.handle_menu_end)
- # Date comes after menu
- self.in_date_display = self._register_tracker("span", ("class", "date-display-single"),
- on_data=self.handle_date_display)
- # Lunch element contains one meal
- self.in_lunch = self._register_tracker("div", ("about", r"/fi/field-collection/field-ruoka-annos/\d+"),
- on_started=self.handle_lunch_start, on_ended=self.handle_lunch_end)
- # Next element contains food name
- self.in_lunch_food = self._register_tracker("div", ("class", ".*field-name-field-nimi.*"),
- on_data=self.handle_lunch_food)
- # Next element contains food allergies
- self.in_allergy = self._register_tracker("div", ("class", ".*field-name-field-ruokavaliot.*"),
- on_started=self.handle_allergy_start, on_ended=self.handle_allergy_end)
- # Next element contains allergy short name
- self.in_allergy_short = self._register_tracker("div", ("class", ".*field-name-title field-type-ds.*"),
- on_data=self.handle_allergy)
- # Next element contains lunch price
- self.in_lunch_price = self._register_tracker("div", ("class", ".*field-name-field-annoksen-hinta.*"))
- self.lunch_type_match = False
- self.lunch = None
-
- self.week_foods = {}
-
- def _register_tracker(self, tag, attr_match=None, **kwargs):
- tracker = Tracker(tag, (attr_match[0], re.compile(attr_match[1])) if attr_match else None, **kwargs)
- self._trackers.append(tracker)
- return tracker
-
- def handle_date_display(self, data):
- index = -1
- if "Maanantai" in data:
- index = 0
- elif "Tiistai" in data:
- index = 1
- elif "Keskiviikko" in data:
- index = 2
- elif "Torstai" in data:
- index = 3
- elif "Perjantai" in data:
- index = 4
- elif "Lauantai" in data:
- index = 5
- elif "Sunnuntai" in data:
- index = 6
- if index >= 0:
- self.current_day = []
- self.week_foods[index] = self.current_day
-
- def handle_h3(self, data):
- if self.in_menu:
- lunch_match = self.lunch_re.match(data)
- self.lunch_type_match = bool(lunch_match)
-
- def handle_menu_start(self):
- print("*********** menu start")
-
- def handle_menu_end(self):
- print("*********** menu end")
-
- def handle_allergy(self, data):
- data = data.strip()
- if self.in_allergy and self.in_allergy_short and self.lunch and data:
- self.lunch["allergies"].append(data)
-
- def handle_allergy_start(self):
- pass
-
- def handle_allergy_end(self):
- pass
-
- def handle_lunch_food(self, data):
- data = data.strip()
- if self.lunch and data:
- self.lunch["food"].append(data)
-
- def handle_lunch_start(self):
- if self.lunch_type_match:
- print("lunch start")
- self.lunch = {"food": [], "allergies": []}
-
- def handle_lunch_end(self):
- if self.lunch:
- print(repr(self.lunch).encode("cp1252", "ignore"))
- menu = "{menu} ({allergies})".format(menu=self.lunch["food"][0], allergies=", ".join(self.lunch["allergies"]))
- self.current_day.append(menu)
- self.lunch = None
-
- def handle_starttag(self, tag, attrs):
- for tracker in self._trackers:
- tracker.handle_starttag(tag, attrs)
-
- def handle_endtag(self, tag):
- for tracker in self._trackers:
- tracker.handle_endtag(tag)
-
- def handle_data(self, data):
- for tracker in self._trackers:
- tracker.handle_data(data)
- week_match = self.week_re.match(data)
- if week_match:
- self.week = int(week_match.group(1))
-
- def get_restaurants(use_old, week):
-
- data = pnalib.get_file(url, "campusravita.html", use_old)
- parser = CampusravitaHTMLParser()
- parser.feed(data)
-
- restaurants = [[restaurant_info[0][0], "", parser.week, parser.week_foods, restaurant_info[0]]]
-
- return restaurants
-
|