#!/usr/bin/env python3 import requests, json from diskcache import Index from bs4 import BeautifulSoup from unicodedata import normalize import pprint import paho.mqtt.publish as publish DEBUG = False site = 'https://www.nhsbsa.nhs.uk' base_url = f"{site}/pharmacies-gp-practices-and-appliance-contractors/serious-shortage-protocols-ssps" active_ssps = Index('ssps/active') expired_ssps = Index('ssps/expired') def extract_table(table, ssp_list): new_ssps = [] for row in table.tbody.find_all('tr'): columns = row.find_all('td') if(columns != []): # SSP Name/Ref ssp_name = normalize('NFKD',columns[0].text.strip()) ssp_link = normalize('NFKD',f"{site}{columns[0].find('a').get('href')}") # Start and End date dates = normalize('NFKD',columns[1].text.strip()) ds = dates.split('\n')[0].split('to') #print(f"Splitting dates: [{dates}] | ds len: {len(ds)}") start_date = ds[0].strip() end_date = ds[1].strip() # Guidance guidance_name = normalize('NFKD',columns[2].text.strip()) guidance_link = normalize('NFKD',f"{site}{columns[2].find('a').get('href')}") support = columns[2].text.strip() item = { 'name': ssp_name, 'url': ssp_link, 'start_date': start_date, 'end_date': end_date, 'guidance': guidance_name, 'guidance_url': guidance_link, } if not ssp_link in ssp_list: ssp_list[ssp_link] = item new_ssps.append(item) #print(item) return new_ssps # Only ever one request so no hassles here request = requests.get(base_url, headers = {'User-agent': 'friendly_python'}) data = request.text soup = BeautifulSoup(data, 'html.parser') tables = soup.find_all('table') actives = extract_table(tables[0], active_ssps) if len(actives) > 0: print("New Active SSPs") for i in actives: publish.single("sspmon/ssps/active", json.dumps(i), hostname="192.168.1.3") pprint.pp(i) expireds = extract_table(tables[1], expired_ssps) if len(expireds) > 0: print("Newly Expired SSPs") for i in expireds: publish.single("sspmon/ssps/expired", json.dumps(i), hostname="192.168.1.3") pprint.pp(i)