#/usr/bin/python3
#A simple crawler for RRP Canada I made for a coding assessment
from selenium import webdriver #The selenium python3 module is required
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains

import random
import time
import pymongo
from pymongo import MongoClient
from io import StringIO

# Scraping can become inefficient if frequent page hits cause the IP to be
# blocked. A good strategy is to mimick the browsing behaviour of a real user
# with random delays between page actions of 1-10 seconds, and a rest (hit
# speed) of a few hours. Rotation of IP, account, and user agent could also be
# added later to reduce suspicion.

hit_speed = 60*60*3
max_wait = 10
min_wait = 1
login_username = "etceterax+bot@protonmail.com"
login_password = "7}I8)dZ-GVv|]&jK]<]^$B,y$"

## Initialize the MongoDB client and a headless firefox browser

client = MongoClient()
db = client.optima
products = db.products
opts = webdriver.FirefoxOptions()
opts.headless = True

# products.create_index([('name', pymongo.ASCENDING), ('category',
#     pymongo.ASCENDING), ('by', pymongo.ASCENDING), ('supplied_by',
#         pymongo.ASCENDING)], unique=True)

def wait():
    time.sleep(random.randint(min_wait, max_wait))

def start():
    driver.get("https://www.rrpcanada.org/#/login")
    wait()
    assert "Sign In" in driver.title

def login():
    email = driver.find_element_by_id("email")
    password = driver.find_element_by_id("password")
    email.send_keys(login_username)
    password.send_keys(login_password)
    password.send_keys(Keys.RETURN)
    wait()

def getProducts():
    wait()
    next_button = driver.find_element_by_css_selector("a[href*='#/user-type']")
    next_button.click();
    wait()
    next_button = driver.find_element_by_css_selector("a[href*='#/available-products-services']")
    next_button.click();
    WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div.table div.line-item button")))
    wait()
    buttons = driver.find_elements_by_css_selector("div.table div.line-item button")
    print(f'{len(buttons)} results')

    #During account rotation, this could start with value of b not already stored
    for b in buttons:
        # ActionChains(driver).move_to_element(b).perform()
        driver.execute_script("arguments[0].scrollIntoView();", b)
        buttons = driver.find_elements_by_css_selector("div.table div.line-item button")
        b.click()
        wait()
        getProduct()
        driver.find_element_by_css_selector('div.close-modal-button').click()
        wait()

# There should be some error handling here in case an item has a missing
# description or location field
def getProduct():
    #Grab all the text in the modal which are a line seperated list of keys and values
    info = driver.find_element_by_xpath("//div[@class='modal-content-container']").text.split("\n")

    for i in range(0, len(info)):
        if info[i] == 'Contact:':
            x = i
            break

    details = info[x:-1] #Splice for product details while removing the comment button's text

    #Common information has keys and values on the same line
    product = {
            "category": info[1].strip(),
            "name": info[2].strip(),
            "by": info[3][4:].strip(),
            "supplied_by": info[4][14:].strip(),
            "made_in": info[7][8:].strip(),
            "description": info[8].strip(),
            }

    #Product details have keys and values alternating lines
    for i in range(0, len(details), 2):
        product[details[i].strip()[:-1]] = details[i+1].strip()

    if products.find_one(product):
        return
    #Updates product information and inserts if not already set
    products.update_one(product, {"$set": product}, upsert=True)

# An infinite loop that initializes the selenium driver, crawls, quits, then rests.
while True:
    driver = webdriver.Firefox(options=opts)
    driver.implicitly_wait(random.randint(min_wait, max_wait))
    start()
    login()
    getProducts()
    driver.close()
    time.sleep(hit_speed)