PNA.fi koodi

campusravita.py 5.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. import pnalib
  2. import html.parser
  3. import re
  4. url = "http://www.campusravita.fi/ruokalista";
  5. restaurant_info = [
  6. [ "(TAMK) Campus Food", url, "", "middle" ],
  7. ]
  8. class Tracker(object):
  9. def __init__(self, tag, attr_match=None, on_started=None, on_ended=None, on_data=None):
  10. self.tag = tag
  11. self.attr_match = attr_match
  12. self.on_started = on_started
  13. self.on_ended = on_ended
  14. self.on_data = on_data
  15. self.nesting = 0
  16. def handle_starttag(self, tag, attrs):
  17. if self.tag == tag:
  18. if self.nesting:
  19. self.nesting += 1
  20. else:
  21. attrs_matched = False
  22. if self.attr_match is None:
  23. attrs_matched = True
  24. else:
  25. for attr in attrs:
  26. if attr[0] == self.attr_match[0] and self.attr_match[1].match(attr[1]):
  27. attrs_matched = True
  28. if attrs_matched:
  29. self.nesting = 1
  30. if self.on_started:
  31. self.on_started()
  32. def handle_endtag(self, tag):
  33. if self.nesting and self.tag == tag:
  34. self.nesting -= 1
  35. if self.nesting == 0 and self.on_ended:
  36. self.on_ended()
  37. def handle_data(self, data):
  38. if self.nesting and self.on_data:
  39. self.on_data(data)
  40. def __bool__(self):
  41. return self.nesting > 0
  42. class CampusravitaHTMLParser(html.parser.HTMLParser):
  43. week_re = re.compile("Ruokalista - Viikko (\d+)")
  44. lunch_re = re.compile("Lounas|Deli-lounas")
  45. def __init__(self):
  46. html.parser.HTMLParser.__init__(self)
  47. self._trackers = []
  48. self.in_h3 = self._register_tracker("h3", on_data=self.handle_h3)
  49. # Everything in inside menu
  50. self.in_menu = self._register_tracker("section", ("id", "block-system-main"),
  51. on_started=self.handle_menu_start, on_ended=self.handle_menu_end)
  52. # Date comes after menu
  53. self.in_date_display = self._register_tracker("span", ("class", "date-display-single"),
  54. on_data=self.handle_date_display)
  55. # Lunch element contains one meal
  56. self.in_lunch = self._register_tracker("div", ("about", r"/fi/field-collection/field-ruoka-annos/\d+"),
  57. on_started=self.handle_lunch_start, on_ended=self.handle_lunch_end)
  58. # Next element contains food name
  59. self.in_lunch_food = self._register_tracker("div", ("class", ".*field-name-field-nimi.*"),
  60. on_data=self.handle_lunch_food)
  61. # Next element contains food allergies
  62. self.in_allergy = self._register_tracker("div", ("class", ".*field-name-field-ruokavaliot.*"),
  63. on_started=self.handle_allergy_start, on_ended=self.handle_allergy_end)
  64. # Next element contains allergy short name
  65. self.in_allergy_short = self._register_tracker("div", ("class", ".*field-name-title field-type-ds.*"),
  66. on_data=self.handle_allergy)
  67. # Next element contains lunch price
  68. self.in_lunch_price = self._register_tracker("div", ("class", ".*field-name-field-annoksen-hinta.*"))
  69. self.lunch_type_match = False
  70. self.lunch = None
  71. self.week_foods = {}
  72. def _register_tracker(self, tag, attr_match=None, **kwargs):
  73. tracker = Tracker(tag, (attr_match[0], re.compile(attr_match[1])) if attr_match else None, **kwargs)
  74. self._trackers.append(tracker)
  75. return tracker
  76. def handle_date_display(self, data):
  77. index = -1
  78. if "Maanantai" in data:
  79. index = 0
  80. elif "Tiistai" in data:
  81. index = 1
  82. elif "Keskiviikko" in data:
  83. index = 2
  84. elif "Torstai" in data:
  85. index = 3
  86. elif "Perjantai" in data:
  87. index = 4
  88. elif "Lauantai" in data:
  89. index = 5
  90. elif "Sunnuntai" in data:
  91. index = 6
  92. if index >= 0:
  93. self.current_day = []
  94. self.week_foods[index] = self.current_day
  95. def handle_h3(self, data):
  96. if self.in_menu:
  97. lunch_match = self.lunch_re.match(data)
  98. self.lunch_type_match = bool(lunch_match)
  99. def handle_menu_start(self):
  100. print("*********** menu start")
  101. def handle_menu_end(self):
  102. print("*********** menu end")
  103. def handle_allergy(self, data):
  104. data = data.strip()
  105. if self.in_allergy and self.in_allergy_short and self.lunch and data:
  106. self.lunch["allergies"].append(data)
  107. def handle_allergy_start(self):
  108. pass
  109. def handle_allergy_end(self):
  110. pass
  111. def handle_lunch_food(self, data):
  112. data = data.strip()
  113. if self.lunch and data:
  114. self.lunch["food"].append(data)
  115. def handle_lunch_start(self):
  116. if self.lunch_type_match:
  117. print("lunch start")
  118. self.lunch = {"food": [], "allergies": []}
  119. def handle_lunch_end(self):
  120. if self.lunch:
  121. print(repr(self.lunch).encode("cp1252", "ignore"))
  122. menu = "{menu} ({allergies})".format(menu=self.lunch["food"][0], allergies=", ".join(self.lunch["allergies"]))
  123. self.current_day.append(menu)
  124. self.lunch = None
  125. def handle_starttag(self, tag, attrs):
  126. for tracker in self._trackers:
  127. tracker.handle_starttag(tag, attrs)
  128. def handle_endtag(self, tag):
  129. for tracker in self._trackers:
  130. tracker.handle_endtag(tag)
  131. def handle_data(self, data):
  132. for tracker in self._trackers:
  133. tracker.handle_data(data)
  134. week_match = self.week_re.match(data)
  135. if week_match:
  136. self.week = int(week_match.group(1))
  137. def get_restaurants(use_old, week):
  138. data = pnalib.get_file(url, "campusravita.html", use_old)
  139. parser = CampusravitaHTMLParser()
  140. parser.feed(data)
  141. restaurants = [[restaurant_info[0][0], "", parser.week, parser.week_foods, restaurant_info[0]]]
  142. return restaurants