import pnalib
import html.parser
import re
url = "http://www.campusravita.fi/ruokalista";
restaurant_info = [
[ "(TAMK) Campus Food", url, "", "middle" ],
]
class Tracker(object):
def __init__(self, tag, attr_match=None, on_started=None, on_ended=None, on_data=None):
self.tag = tag
self.attr_match = attr_match
self.on_started = on_started
self.on_ended = on_ended
self.on_data = on_data
self.nesting = 0
def handle_starttag(self, tag, attrs):
if self.tag == tag:
if self.nesting:
self.nesting += 1
else:
attrs_matched = False
if self.attr_match is None:
attrs_matched = True
else:
for attr in attrs:
if attr[0] == self.attr_match[0] and self.attr_match[1].match(attr[1]):
attrs_matched = True
if attrs_matched:
self.nesting = 1
if self.on_started:
self.on_started()
def handle_endtag(self, tag):
if self.nesting and self.tag == tag:
self.nesting -= 1
if self.nesting == 0 and self.on_ended:
self.on_ended()
def handle_data(self, data):
if self.nesting and self.on_data:
self.on_data(data)
def __bool__(self):
return self.nesting > 0
class CampusravitaHTMLParser(html.parser.HTMLParser):
week_re = re.compile("Ruokalista - Viikko (\d+)")
lunch_re = re.compile("Lounas|Deli-lounas")
def __init__(self):
html.parser.HTMLParser.__init__(self)
self._trackers = []
self.in_h3 = self._register_tracker("h3", on_data=self.handle_h3)
# Everything in inside menu
self.in_menu = self._register_tracker("section", ("id", "block-system-main"),
on_started=self.handle_menu_start, on_ended=self.handle_menu_end)
# Date comes after menu
self.in_date_display = self._register_tracker("span", ("class", "date-display-single"),
on_data=self.handle_date_display)
# Lunch element contains one meal
self.in_lunch = self._register_tracker("div", ("about", r"/fi/field-collection/field-ruoka-annos/\d+"),
on_started=self.handle_lunch_start, on_ended=self.handle_lunch_end)
# Next element contains food name
self.in_lunch_food = self._register_tracker("div", ("class", ".*field-name-field-nimi.*"),
on_data=self.handle_lunch_food)
# Next element contains food allergies
self.in_allergy = self._register_tracker("div", ("class", ".*field-name-field-ruokavaliot.*"),
on_started=self.handle_allergy_start, on_ended=self.handle_allergy_end)
# Next element contains allergy short name
self.in_allergy_short = self._register_tracker("div", ("class", ".*field-name-title field-type-ds.*"),
on_data=self.handle_allergy)
# Next element contains lunch price
self.in_lunch_price = self._register_tracker("div", ("class", ".*field-name-field-annoksen-hinta.*"))
self.lunch_type_match = False
self.lunch = None
self.week_foods = {}
def _register_tracker(self, tag, attr_match=None, **kwargs):
tracker = Tracker(tag, (attr_match[0], re.compile(attr_match[1])) if attr_match else None, **kwargs)
self._trackers.append(tracker)
return tracker
def handle_date_display(self, data):
index = -1
if "Maanantai" in data:
index = 0
elif "Tiistai" in data:
index = 1
elif "Keskiviikko" in data:
index = 2
elif "Torstai" in data:
index = 3
elif "Perjantai" in data:
index = 4
elif "Lauantai" in data:
index = 5
elif "Sunnuntai" in data:
index = 6
if index >= 0:
self.current_day = []
self.week_foods[index] = self.current_day
def handle_h3(self, data):
if self.in_menu:
lunch_match = self.lunch_re.match(data)
self.lunch_type_match = bool(lunch_match)
def handle_menu_start(self):
pass
def handle_menu_end(self):
pass
def handle_allergy(self, data):
data = data.strip()
if self.in_allergy and self.in_allergy_short and self.lunch and data:
self.lunch["allergies"].append(data)
def handle_allergy_start(self):
pass
def handle_allergy_end(self):
pass
def handle_lunch_food(self, data):
data = data.strip()
if self.lunch and data:
self.lunch["food"].append(data)
def handle_lunch_start(self):
if self.lunch_type_match:
self.lunch = {"food": [], "allergies": []}
def handle_lunch_end(self):
if self.lunch:
#print(repr(self.lunch).encode("cp1252", "ignore"))
menu = "{menu} ({allergies})".format(menu=self.lunch["food"][0], allergies=", ".join(self.lunch["allergies"]))
self.current_day.append(menu)
self.lunch = None
def handle_starttag(self, tag, attrs):
for tracker in self._trackers:
tracker.handle_starttag(tag, attrs)
def handle_endtag(self, tag):
for tracker in self._trackers:
tracker.handle_endtag(tag)
def handle_data(self, data):
for tracker in self._trackers:
tracker.handle_data(data)
week_match = self.week_re.match(data)
if week_match:
self.week = int(week_match.group(1))
def get_restaurants(use_old, week):
data = pnalib.get_file(url, "campusravita.html", use_old)
parser = CampusravitaHTMLParser()
parser.feed(data)
restaurants = [[restaurant_info[0][0], "", parser.week, parser.week_foods, restaurant_info[0]]]
return restaurants