aesan_alerts.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

#!/usr/bin/env ruby

require 'net/http'
require 'uri'
require 'nokogiri'

AESAN_HOST = 'https://www.aesan.gob.es'
AESAN_NEWS_URI = "#{AESAN_HOST}/AECOSAN/web/seguridad_alimentaria/subseccion/otras_alertas_alimentarias.htm"
READ_TITLES_FILE = './aesan_read.txt'

def read_title(file_path, line)
  unless File.exist?(file_path)
    File.open(file_path, 'w') {}
  end

  file_content = File.readlines(file_path).map(&:chomp)

  if file_content.include?(line)
    return false
  else
    File.open(file_path, 'a') do |file|
      file.puts(line)
    end
    return true
  end
end

def fetch_first_paragraph(url)
  response = nil
  error = nil
  (1..3).each do
    response = Net::HTTP.get_response(URI.parse(url))
    break
  rescue StandardError => error
    sleep 5
  end
  if response.nil?
    abort "Error al descargar URL tras 3 intentos: #{error}"
  end

  if response.is_a?(Net::HTTPSuccess)
    document = Nokogiri::HTML(response.body)

    ps = document.xpath('//section[@class="theContent"]/p/a').filter_map do |p|
      title = p.text.gsub(/^\s+|\s+$/, '')
      "* #{title} - #{AESAN_HOST}#{p['href']}" if read_title(READ_TITLES_FILE, title)
    end

    if ps.any?
      if ps.length == 1
        puts "Se ha publicado una nueva alerta alimentaria en AESAN:\n"
      else
        puts "Se han publicado una o más alertas alimentarias en AESAN:\n"
      end
      puts ps
    end
  else
    abort "Error al descargar página: #{response.code} #{response.message}"
  end
end

fetch_first_paragraph(AESAN_NEWS_URI)