PNA.fi koodi

amica.pl 5.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. use vars qw(@day_names);
  2. # erkkeri is gone - could remove a lot of this ugliness
  3. my $erkkeri_title = "(TTY) Amica Erkkeri";
  4. my @restaurant_info = (
  5. [ "(TaY) Amica Minerva", "http://www.amica.fi/minerva", "", "middle" ]
  6. #[ $erkkeri_title, "http://www.amica.fi/erkkeri", "", "left" ]
  7. );
  8. my ($parse_func, $day_id, $week, $erkkeri);
  9. my (@cur_day_foods, @week_foods);
  10. sub amica_parse_to_eof {
  11. }
  12. sub amica_finish_day {
  13. push @week_foods, [@cur_day_foods];
  14. @cur_day_foods = ();
  15. $day_id = $day_id + 1;
  16. }
  17. sub utf8_to_8859 {
  18. $_ = shift;
  19. s/ä/ä/g;
  20. s/ö/ö/g;
  21. s/Ä/Ä/g;
  22. s/Ö/Ö/g;
  23. return $_;
  24. }
  25. sub amica_parse_line {
  26. my $text = shift;
  27. $text =~ tr/\r\n\t/ /;
  28. $text =~ s/( | )*$//;
  29. $text =~ s/ *(\d+,\d+ *\/ *)?\d+,\d+ *$//;
  30. $text =~ s/^( | )*//;
  31. $text =~ s/( | )*$//;
  32. $text =~ s/valinnan mukaan$//;
  33. if ($text =~ /^(.*) \(([^\)]+)\)$/) {
  34. my ($name, $allergy) = ($1, $2);
  35. $name =~ s/( )*$//;
  36. $allergy =~ s/\*veg\./ eriks: Ve/g;
  37. $allergy =~ s/veg/Ve/gi;
  38. $allergy =~ s/\*([A-Z]+)/ eriks: $1/;
  39. return "$name ($allergy)\n";
  40. } elsif ($text ne "") {
  41. return "$text\n";
  42. }
  43. return "";
  44. }
  45. sub amica_parse_split {
  46. my $text = shift;
  47. my $food = "";
  48. for (;;) {
  49. $i = index($text, ")");
  50. last if ($i == -1);
  51. my $line = substr($text, 0, $i + 1);
  52. $food .= amica_parse_line($line);
  53. $text = substr($text, $i + 1);
  54. }
  55. $food .= amica_parse_line($text);
  56. push @cur_day_foods, $food if ($food ne "");
  57. }
  58. sub amica_parse_more_food {
  59. my $token = shift;
  60. if ($token->[0] eq 'T') {
  61. my $text = utf8_to_8859($token->[1]);
  62. my $next_day_name = $day_names[$day_id+1];
  63. if ($day_id < 6 && $text =~ /^$next_day_name\b/i) {
  64. # day changed
  65. amica_finish_day();
  66. } else {
  67. amica_parse_split($text);
  68. }
  69. } elsif ($token->[0] eq 'S' && $token->[1] eq 'h2') {
  70. amica_finish_day();
  71. $parse_func = \&amica_parse_to_eof;
  72. }
  73. }
  74. sub amica_parse_first_day {
  75. my $token = shift;
  76. if ($token->[0] eq 'T') {
  77. for ($day_id = 0; $day_id < 7; $day_id++) {
  78. my $dayname = $day_names[$i];
  79. last if ($token->[1] =~ /^$dayname\b/i);
  80. }
  81. $day_id = 0 if ($day_id == 7);
  82. $parse_func = \&amica_parse_more_food;
  83. }
  84. }
  85. sub amica_parse_to_start {
  86. my $token = shift;
  87. if ($token->[0] eq 'S' && $token->[1] eq 'p') {
  88. $parse_func = \&amica_parse_first_day;
  89. }
  90. }
  91. sub get_week {
  92. my ($mday, $mon) = @_;
  93. my @l = localtime;
  94. my @l2 = (0, 0, 0, $mday, $mon-1, $l[5], 0, 0, -1);
  95. @l = localtime(mktime(@l2));
  96. return strftime("%V", @l);
  97. }
  98. sub amica_parse_date {
  99. my $token = shift;
  100. if ($token->[0] eq 'T') {
  101. my $text = $token->[1];
  102. if ($text =~ /^(\d\d\d\d)-(\d?\d)-(\d?\d) /) {
  103. my ($mday, $mon) = ($3, $2);
  104. $week = get_week($mday, $mon);
  105. }
  106. } elsif ($token->[0] eq 'E' && $token->[1] eq 'h2') {
  107. $parse_func = \&amica_parse_to_start;
  108. }
  109. }
  110. sub amica_parse_to_date {
  111. my $token = shift;
  112. if ($token->[0] eq 'S' && $token->[1] eq 'h2') {
  113. my %attrs = %{$token->[2]};
  114. if ($attrs{'id'} =~ /HeadingMenu/) {
  115. $parse_func = \&amica_parse_date;
  116. }
  117. }
  118. }
  119. sub parse_amica {
  120. my ($fname, $info_ref) = @_;
  121. my $p = HTML::TokeParser->new($fname) or die("Can't open file $fname");
  122. my $title = @{$info_ref}[0];
  123. $week = "";
  124. $day_id = 0;
  125. @cur_day_foods = ();
  126. @week_foods = ();
  127. $parse_func = \&amica_parse_to_date;
  128. while (my $token = $p->get_token) {
  129. &$parse_func($token);
  130. }
  131. return [ $title, "", $week, [ @week_foods ], $info_ref ];
  132. }
  133. sub parse_amica_get_finnish_url {
  134. my ($fname) = @_;
  135. my $p = HTML::TokeParser->new($fname) or die("Can't open file $fname");
  136. my @l = localtime;
  137. my $this_week = strftime("%V", @l);
  138. my $state = 0;
  139. my $last_url = "";
  140. my $week = "";
  141. while (my $token = $p->get_token) {
  142. if ($token->[0] eq 'S' && $token->[1] eq 'meta') {
  143. my %attrs = %{$token->[2]};
  144. if ($attrs{'name'} eq "TITLE") {
  145. if ($attrs{'content'} =~ /(\d+)\.(\d+)\.? *- *(\d+)\.(\d+)/) {
  146. my ($mday, $mon) = ($1, $2);
  147. $week = get_week($mday, $mon);
  148. } elsif ($attrs{'content'} =~ /(\d+)\.? *- *(\d+)\.(\d+)/) {
  149. my ($mday, $mon) = ($1, $3);
  150. $week = get_week($mday, $mon);
  151. }
  152. return "" if ($attrs{'content'} !~ /English/i && $week == $this_week);
  153. }
  154. } elsif ($token->[0] eq 'S' && $token->[1] eq 'a') {
  155. my %attrs = %{$token->[2]};
  156. $last_url = $attrs{'href'};
  157. } elsif ($token->[0] eq 'T' && $token->[1] =~ /^ruokalista (\d+)\.(\d+)\.? *- *(\d+)\.(\d+)/i) {
  158. my ($mday, $mon) = ($1, $2);
  159. $week = get_week($mday, $mon);
  160. return $last_url if ($week == $this_week);
  161. } elsif ($token->[0] eq 'T' && $token->[1] =~ /^ruokalista (\d+)\.? *- *(\d+)\.(\d+)/i) {
  162. my ($mday, $mon) = ($1, $3);
  163. $week = get_week($mday, $mon);
  164. return $last_url if ($week == $this_week);
  165. } elsif ($token->[0] eq 'T' && $token->[1] =~ /^ruokalista (viikko|vko) (\d+)/i) {
  166. $week = $2;
  167. return $last_url if ($week == $this_week);
  168. }
  169. }
  170. return "";
  171. }
  172. sub parse_amica_url {
  173. my ($fname) = @_;
  174. my $p = HTML::TokeParser->new($fname) or die("Can't open file $fname");
  175. my $state = 0;
  176. while (my $token = $p->get_token) {
  177. if ($token->[0] eq 'S') {
  178. my %attrs = %{$token->[2]};
  179. if ($token->[1] eq 'td' && $attrs{'title'} eq 'Ruokalistat' && $state == 0) {
  180. $state = 1;
  181. } elsif ($token->[1] eq 'a' && $state == 1) {
  182. return $attrs{'href'};
  183. }
  184. }
  185. }
  186. return "";
  187. }
  188. sub get_amica_restaurant {
  189. my $use_old = shift;
  190. my $count = 0;
  191. my @restaurants = ();
  192. foreach my $i (@restaurant_info) {
  193. my @info = @{$i};
  194. my $temp_fname = "amica$count.temp.html";
  195. my $url = $info[1];
  196. if (!-f $temp_fname || !$use_old) {
  197. system("wget -q --timeout=10 -O $temp_fname.tmp '$url' && mv $temp_fname.tmp $temp_fname") if ($url ne "");
  198. }
  199. if (-f $temp_fname) {
  200. $info[1] = $url;
  201. push @restaurants, parse_amica($temp_fname, \@info);
  202. }
  203. $count++;
  204. }
  205. return @restaurants;
  206. }
  207. 1;