import os, tqdm, sys
import requests
import urllib.request
import re
from bs4 import BeautifulSoup
import multiprocessing
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from StringIO import StringIO
#from webdriver_manager.firefox import GeckoDriverManager
from selenium import webdriver
from utillc import *
from selenium.webdriver.firefox.service import Service as ServiceF
from selenium.webdriver.chrome.service import Service as ServiceC
from selenium.webdriver.common.action_chains import ActionChains
import PIL
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
import numpy as np
import lxml.etree
#wd = browser = webdriver.Firefox(executable_path=	  GeckoDriverManager().install())
EKO()
import sys
from selenium.webdriver.firefox.options import Options as FirefoxOptions
from selenium.webdriver.chrome.options import Options as ChromeOptions
import time
import cherrypy
import argparse
import PIL
from PIL import Image
import orange

def tqdmtqdm(x) : return x

class P(orange.RobotBase) :

	def __init__(self) :
		super().__init__()
		self.build(headless=True)
		pass

	def xx() :
		options = FirefoxOptions()
		options = ChromeOptions()
		#options.add_argument('--headless')
		options.add_argument('--no-sandbox')
		options.add_argument('--disable-dev-shm-usage')
		# changing user-agent because etoro detects the automated browser somehow
		options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) "
							 "Chrome/86.0.4240.183 Safari/537.36")
		EKO()


		s = ServiceC('/usr/bin/chromedriver')
		bot = self.driver = webdriver.Chrome(service=s, options=options)
		#self.driver = webdriver.Chrome('/usr/lib/chromium-browser/chromedriver', options=options)
		#self.driver = webdriver.Firefox(options=options)
		#self.driver.get("https://pythonbasics.org")
		#wd = webdriver.Firefox(service=service, options=options)
		EKO()

	def load(self, outdir, unfold, url, pattern, min_match_num) :
		self.url = url
		bot = self.driver
		browser = self.driver.get(self.url)

		self.driver.implicitly_wait(5)
			
		EKOX(self.driver.title)

		EKOI(self.screen(), sz=800)
		ta = "Tout accepter"
		accs = [ ta, ta.lower(), ta.upper(), "Tout Accepter"]
		try :
			for acc in accs :
				EKOX(acc)
				accepts = self.driver.find_elements(By.XPATH, '//button/span[contains(text(), "%s")]' % acc)
				for ie, a in tqdmtqdm(enumerate(accepts)) :
					EKOX(a)
					if a.is_displayed() and a.is_enabled() :
						try :
							a.click()
							EKOT('clické')
							raise Exception('click') 
						except Exception as ex1 :
							EKOX(ex1)
							if str(ex1) == 'click' :
								EKO()
								raise Exception(37)
		except Exception as ex :
			EKOX(ex)

		EKOI(self.screen(), sz=800)						   

		
		html_source = self.driver.page_source
		#with open("html.html", "w") as fd : fd.write(html_source)

		EKOX(hash(html_source))
		#EKOX(dir(bot))


		xpath = '//li[contains(@class,"Collection-section-items-item svelte")]'
		elems = bot.find_elements(By.XPATH, xpath)
		EKOX(len(list(elems)))

		EKOI(self.screen(), sz=800)			   

		def match(txt) : return pattern is None or pattern.lower() in txt.lower()

		def matches() :
			xpath = '//li[contains(@class,"Collection-section-items-item svelte")]'
			elems = bot.find_elements(By.XPATH, xpath)
			EKOX(len(list(elems)))
			def f(e) :
				x2 = e.find_element(By.XPATH, './/a[@data-testid="link"]')
				title = x2.text
				return title
			ll = [ (ie, f(e)) for ie, e in tqdmtqdm(enumerate(list(elems)))]
			EKOX(len(ll))
			ll = [ e for e in ll if match(e[1])]
			EKOX(len(ll))
			return ll
		
		for i in range(unfold) :
			EKOX(i)
			try :
				button = bot.find_element(By.XPATH, '//button[@data-testid="LoadNext"]')
				EKOX(button)
				ActionChains(bot).move_to_element(button).click(button).perform()
				EKOT("clicked")
				#elem.click()
				time.sleep(2)
				#EKOI(self.screen(), sz=800)			   
				if pattern is not None and len(matches()) >= min_match_num :
					EKOX(matches())
					break
				EKO()
			except Exception as ex:
				EKOX(ex)
				EKOT("tout déplié")
				break
			
		xpath = '//li[contains(@class,"Collection-section-items-item svelte")]'
		elems = bot.find_elements(By.XPATH, xpath)
		EKOX(len(list(elems)))
		for ie, e in tqdmtqdm(enumerate(list(elems))) :
			EKON(ie)
			#EKOI(self.screen(), sz=800)
			try :
				x2 = e.find_element(By.XPATH, './/a[@data-testid="link"]')
				EKON(x2.text)
				title = x2.text
				EKOX(title)
				if match(title) :
					x3 = e.find_element(By.XPATH, './/button[@type="button"]')
					EKOX(x3)
					x3.click()
					time.sleep(1)

					EKO()
					audios = bot.find_elements(By.XPATH, "/html/body/audio")
					EKOX(len(list(audios)))

					for e in audios :
						EKOX(e.get_attribute('src'))

					href = audios[-1].get_attribute('src')
					EKOX(href)
					filename = title.replace("\n", "").replace("/", "_").replace("'", "_").strip() + ".mp3"
					ll = [ "'", "?", " ", ":", ",", "é", "à", "'", "!", "\"", "`"]
					for ee in ll :
						filename = filename.replace(ee, "_")
					EKOX(filename)
					# declick ce podcast
					if not os.path.exists(os.path.join(outdir, filename)) :
						EKOT("downloading")
						doc = requests.get(href)
						EKOT("writing")				   
						with open(os.path.join(outdir, filename), 'wb') as f:
							f.write(doc.content)
							EKOT("%s written" % filename)
					else :
						EKOT("%s already there" % filename)
			except Exception as ex:
				EKOX(ex)


				EKOI(self.screen(), sz=800)				
				EKOT("sans doute pas d'émission ce jour")

				pop = bot.find_element(By.XPATH, "//*[contains(text(), 'Newsletter')]"); EKOX(pop)
				pop1 = pop.find_element(By.XPATH, "./.."); EKOX(pop1)
				pop1 = pop1.find_element(By.XPATH, "./../.."); EKOX(pop1)
				pop1 = pop1.find_element(By.XPATH, "./../../.."); EKOX(pop1)
				
				if "element not interactable" in str(ex) :
					pop1 = pop.find_element(By.XPATH, "./..");
					EKOX(pop1)					
					parser = lxml.etree.HTMLParser()
					html = bot.execute_script("return document.documentElement.outerHTML")
					EKOX(html)
					tree = lxml.etree.parse(StringIO(html), parser)
					EKOX(tree)
					path = tree.getpath(field)


				try :
					xpath = '//li[contains(@class,"ModalHeader-close svelte-xwavtz")]'
					elems = bot.find_elements(By.XPATH, xpath)
					for ie, a in tqdmtqdm(enumerate(elems)) :
						EKOX(a)
						if a.is_displayed() and a.is_enabled() :
							try :
								a.click()
								EKOT('clické')
								raise Exception('click') 
							except Exception as ex1 :
								EKOX(ex1)
								if str(ex1) == 'click' :
									EKO()
									raise Exception(37)
							except Exception as ex :
								EKOX(ex)
				except Exception as ex :
					EKOX(ex)		


					# Use the XPath to get the element from the browser.
					#input_from_xpath = driver.find_element_by_xpath(path)
					sys.exit(0)
				
				pass
		bot.close()	 # shuts down the bot


if __name__ == '__main__':

	parser = argparse.ArgumentParser(
		prog='ferrand aspirator',
		description='download ferrand podcast')
	parser.add_argument('--outdir', default="cours-de-lhistoire")
	parser.add_argument('--pattern', default=None)
	parser.add_argument('--min_match', type=int, default=1)
	parser.add_argument('--nunfold', type=int, default=2)
	parser.add_argument('--url', default="https://www.radiofrance.fr/franceculture/podcasts/le-cours-de-l-histoire")
	args = parser.parse_args()
	p = P()
	os.makedirs(args.outdir, exist_ok=True)
	p.load(args.outdir, args.nunfold, args.url, args.pattern, args.min_match)