#!/usr/bin/env python3

import sys, getopt
import requests

from diskcache import Index
from selectolax.parser import HTMLParser

DEBUG = False
USAGE = f"Usage: python {sys.argv[0]} [--help] | [--version] | [--reddit] | [--lobsters]"
VERSION = f"{sys.argv[0]} version 1.0.0"

subs = {}
tags = {}
limit = 100
timeframe = 'week' #hour, day, week, month, year, all
listing = 'top' # controversial, best, hot, new, random, rising, top
badurls = []
posts = Index('data/results')
newposts = {}
reddit = False
lobsters = False

def parse():
    config = {}
    options, arguments = getopt.getopt(
        sys.argv[1:],                      # Arguments
        'vrlh',                            # Short option definitions
        ["version", "reddit", "lobsters", "help"]) # Long option definitions
    separator = "\n"
    for o, a in options:
        if o in ("-v", "--version"):
            print(VERSION)
            sys.exit()
        if o in ("-r", "--reddit"):
            config['reddit'] = True
        if o in ("-l", "--lobsters"):
            config['lobsters'] = True
        if o in ("-h", "--help"):
            print(USAGE)
            sys.exit()
    if not options:
        raise SystemExit(USAGE)
    try:
        operands = [int(arg) for arg in arguments]
    except ValueError:
        raise SystemExit(USAGE)
    return separator, operands, config


def get_reddit(subreddit,listing,limit,timeframe):
    try:
        base_url = f'https://www.reddit.com/r/{subreddit}/{listing}.json?limit={limit}&t={timeframe}'
        request = requests.get(base_url, headers = {'User-agent': 'yourbot'})
    except:
        print('An Error Occured')
    return request.json()

def process_reddit(sub, data, min_score):
    for i in data:
        d = i['data']
        if (d['score'] >= min_score):
            title, url, score = d['title'], d['url'], d['score']

            butest = [s for s in badurls if s.lower() in url.lower()]
            if butest:
                if DEBUG:
                    print(f"{url} is in badurls {butest}")
            else:
                post = {'title': title, 'source': f"reddit:{sub}", 'url': url, 'score': score}
                if not url in posts:
                    newposts[url] = post

    if newposts:
        for k, p in newposts.items():
            posts[p['url']] = p

def extract_reddits(subs):
    for sub, min_score in subs.items():
        r = get_reddit(sub, listing, limit, timeframe)
        data = r['data']['children']
        process_reddit(sub, data, min_score)

def extract_lobsters(tags):
    taglist = ",".join(tags.keys())
    try:
        base_url = f"https://lobste.rs/t/{taglist}"
        r = requests.get(base_url, headers = {'User-agent': 'yourbot'})
    except:
        print('An Error extracting from lobste.rs occured')
        sys.exit(-1)

    h = HTMLParser(r.text)
    items = h.css('ol.stories.list li')
    for i in items:
        score = i.css("div.score")[0].text()
        title = i.css("span.link a")[0].text()
        url = i.css("span.link a")[0].attrs['href']
        tags = ", ".join([x.text() for x in i.css("span.tags a")])
        post = {'title': title, 'source': f"lobsters:{tags}", 'url': url, 'score': score}
        if not url in posts:
            newposts[url] = post

    if newposts:
        for k, p in newposts.items():
            posts[p['url']] = p


def dump_data():
    if DEBUG:
        print("Subs identified:")
        for k,v in subs.items():
            print(f"Sub: {k} - min_score: {v}")

        print("\n")

    for k,p in newposts.items():
        print(f"Src: \"{p['source']}\" Title: \"{p['title']}\" Url: {p['url']} Score: {p['score']}")

# main
separator, operands, config = parse()

with open("badurls.txt") as f:
    badurls = f.read().splitlines()

if config.get("reddit"):
    with open("redditsubs.txt") as f:
        for l in f:
            sub,min_score = l.partition(":")[::2]
            subs[sub.strip()] = int(min_score)
    extract_reddits(subs)

if config.get("lobsters"):
    with open("lobsterstags.txt") as f:
        for l in f:
            tag,min_score = l.partition(":")[::2]
            tags[tag.strip()] = int(min_score)
    extract_lobsters(tags)

dump_data()