PNA.fi koodi

campusravita.py 5.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. import pnalib
  2. import html.parser
  3. import re
  4. url = "http://www.campusravita.fi/ruokalista";
  5. restaurant_info = [
  6. [ "(TAMK) Campus Food", url, "", "middle" ],
  7. ]
  8. class Tracker(object):
  9. def __init__(self, tag, attr_match=None, on_started=None, on_ended=None, on_data=None):
  10. self.tag = tag
  11. self.attr_match = attr_match
  12. self.on_started = on_started
  13. self.on_ended = on_ended
  14. self.on_data = on_data
  15. self.nesting = 0
  16. def handle_starttag(self, tag, attrs):
  17. if self.tag == tag:
  18. if self.nesting:
  19. self.nesting += 1
  20. else:
  21. attrs_matched = False
  22. if self.attr_match is None:
  23. attrs_matched = True
  24. else:
  25. for attr in attrs:
  26. if attr[0] == self.attr_match[0] and self.attr_match[1].match(attr[1]):
  27. attrs_matched = True
  28. if attrs_matched:
  29. self.nesting = 1
  30. if self.on_started:
  31. self.on_started()
  32. def handle_endtag(self, tag):
  33. if self.nesting and self.tag == tag:
  34. self.nesting -= 1
  35. if self.nesting == 0 and self.on_ended:
  36. self.on_ended()
  37. def handle_data(self, data):
  38. if self.nesting and self.on_data:
  39. self.on_data(data)
  40. def __bool__(self):
  41. return self.nesting > 0
  42. class CampusravitaHTMLParser(html.parser.HTMLParser):
  43. week_re = re.compile("Ruokalista - Viikko (\d+)")
  44. lunch_re = re.compile("Lounas|Deli-lounas")
  45. week = None
  46. def __init__(self):
  47. html.parser.HTMLParser.__init__(self)
  48. self._trackers = []
  49. self.in_h3 = self._register_tracker("h3", on_data=self.handle_h3)
  50. # Everything in inside menu
  51. self.in_menu = self._register_tracker("section", ("id", "block-system-main"),
  52. on_started=self.handle_menu_start, on_ended=self.handle_menu_end)
  53. # Date comes after menu
  54. self.in_date_display = self._register_tracker("span", ("class", "date-display-single"),
  55. on_data=self.handle_date_display)
  56. # Lunch element contains one meal
  57. self.in_lunch = self._register_tracker("div", ("about", r"/fi/field-collection/field-ruoka-annos/\d+"),
  58. on_started=self.handle_lunch_start, on_ended=self.handle_lunch_end)
  59. # Next element contains food name
  60. self.in_lunch_food = self._register_tracker("div", ("class", ".*field-name-field-nimi.*"),
  61. on_data=self.handle_lunch_food)
  62. # Next element contains food allergies
  63. self.in_allergy = self._register_tracker("div", ("class", ".*field-name-field-ruokavaliot.*"),
  64. on_started=self.handle_allergy_start, on_ended=self.handle_allergy_end)
  65. # Next element contains allergy short name
  66. self.in_allergy_short = self._register_tracker("div", ("class", ".*field-name-title field-type-ds.*"),
  67. on_data=self.handle_allergy)
  68. # Next element contains lunch price
  69. self.in_lunch_price = self._register_tracker("div", ("class", ".*field-name-field-annoksen-hinta.*"))
  70. self.lunch_type_match = False
  71. self.lunch = None
  72. self.week_foods = {}
  73. def _register_tracker(self, tag, attr_match=None, **kwargs):
  74. tracker = Tracker(tag, (attr_match[0], re.compile(attr_match[1])) if attr_match else None, **kwargs)
  75. self._trackers.append(tracker)
  76. return tracker
  77. def handle_date_display(self, data):
  78. index = -1
  79. if "Maanantai" in data:
  80. index = 0
  81. elif "Tiistai" in data:
  82. index = 1
  83. elif "Keskiviikko" in data:
  84. index = 2
  85. elif "Torstai" in data:
  86. index = 3
  87. elif "Perjantai" in data:
  88. index = 4
  89. elif "Lauantai" in data:
  90. index = 5
  91. elif "Sunnuntai" in data:
  92. index = 6
  93. if index >= 0:
  94. self.current_day = []
  95. self.week_foods[index] = self.current_day
  96. def handle_h3(self, data):
  97. if self.in_menu:
  98. lunch_match = self.lunch_re.match(data)
  99. self.lunch_type_match = bool(lunch_match)
  100. def handle_menu_start(self):
  101. pass
  102. def handle_menu_end(self):
  103. pass
  104. def handle_allergy(self, data):
  105. data = data.strip()
  106. if self.in_allergy and self.in_allergy_short and self.lunch and data:
  107. self.lunch["allergies"].append(data)
  108. def handle_allergy_start(self):
  109. pass
  110. def handle_allergy_end(self):
  111. pass
  112. def handle_lunch_food(self, data):
  113. data = data.strip()
  114. if self.lunch and data:
  115. self.lunch["food"].append(data)
  116. def handle_lunch_start(self):
  117. if self.lunch_type_match:
  118. self.lunch = {"food": [], "allergies": []}
  119. def handle_lunch_end(self):
  120. if self.lunch:
  121. #print(repr(self.lunch).encode("cp1252", "ignore"))
  122. menu = "{menu} ({allergies})".format(menu=self.lunch["food"][0], allergies=", ".join(self.lunch["allergies"]))
  123. self.current_day.append(menu)
  124. self.lunch = None
  125. def handle_starttag(self, tag, attrs):
  126. for tracker in self._trackers:
  127. tracker.handle_starttag(tag, attrs)
  128. def handle_endtag(self, tag):
  129. for tracker in self._trackers:
  130. tracker.handle_endtag(tag)
  131. def handle_data(self, data):
  132. for tracker in self._trackers:
  133. tracker.handle_data(data)
  134. week_match = self.week_re.match(data)
  135. if week_match:
  136. self.week = int(week_match.group(1))
  137. def get_restaurants(use_old, week):
  138. data = pnalib.get_file(url, "campusravita.html", use_old)
  139. parser = CampusravitaHTMLParser()
  140. parser.feed(data)
  141. restaurants = []
  142. if parser.week is not None:
  143. restaurants.append([restaurant_info[0][0], "", parser.week, parser.week_foods, restaurant_info[0]])
  144. return restaurants