#!c:/Python26/python.exe  
# -*- coding: utf-8 -*-

import sys
import os
import subprocess  # need Python 2.4 or later
import urllib
import codecs
import re

sys.path.append('regzamod')
from Common.RzCmnSubs import dbg,dbg2,dbg0,error,setdbg

#------------------------------------------------------------------------
# Common funcs
#------------------------------------------------------------------------
debug=0

#---- read url contents
def url_read (url):
	dbg("url_read: Start, url=%s" % (url))
	rc=0
	out=None
	try:
		fi= urllib.urlopen(url)
		out=fi.read()
	except IOError,e:
		error("url_read: IOError, e=%r" % (e))
		rc=-1
	except Exception,e:
		error("url_read: Unknown Exception, e=%r" % (e))
		rc=-2
	
	dbg("url_read: End")
	return(rc,out)

#---- conv (string)"hh:mm:ss"  to (int)secs 
def hms_to_sec (str):
	secs=0
	m=str.split(":")
	#dbg("==== dur m=%r" % m)
	j=0
	for s in m:
		if(len(s)>1 and s.startswith("0")):
			m[j]=s[1:]
		j+=1
	try:
		if(len(m)>2):
			secs=int(m[0])*3600+int(m[1])*60+int(m[2])
		elif(len(m)>1):
			secs=int(m[0])*60+int(m[1])
		elif(len(m)>0 and len(m[0])>0):
			secs=int(m[0])
	except ValueError,e:
		error("ValueError=%r, string=%r, split=%r" % (e,str,m))
		secs=0
	return secs

#------------------------------------------------------------------------
# Individual parsers (for specific site/page)
#------------------------------------------------------------------------
#---- for youtube playlist
def ps_ytb_playlist (url):
	(rc,out)=url_read(url)
	if(rc<0):
		list=[]
		return ("Unknown",list)
	if(debug>0): dbg("url_read rc=%d, url=%s" % (rc,url))
	if(debug>1): dbg("contents=%s" % out)
	
	#--- dump result
	if(False):
		fo=open("out_wk.html","w+b")
		fo.write(out)
		fo.close()
	
	#-----------------------------------------------------------------------------------------
	#<tr class="pl-video yt-uix-tile " data-set-video-id="" data-title="鶏肉の溶岩焼き　（陣や@溝の口）" 
	#	data-video-id="h6lgAwk1XeM">
	#	<td class="pl-video-handle "></td>
	#	<td class="pl-video-index"></td>
	#	<td class="pl-video-thumbnail">
	#		<span class="pl-video-thumb ux-thumb-wrap contains-addto">
	#		<a href="/watch?v=h6lgAwk1XeM&amp;list=PLmjuevQrM8DA1EpfZ9Cu5hAFE7WKZtOhn&amp;index=1" 
	#           class="yt-uix-sessionlink" aria-hidden="true"  data-sessionlink="feature=plpp_video
	#           &amp;ei=o2xxVbOEH5-AoQP7hYCoBA&amp;ved=CA0QxjQ" >  
	#			<span class="video-thumb  yt-thumb yt-thumb-72">
	#				<span class="yt-thumb-default">
	#					<span class="yt-thumb-clip">
	#						<img src="https://s.ytimg.com/yts/img/pixel-vfl3z5WfW.gif" 
	#							●data-thumb="//i.ytimg.com/vi/h6lgAwk1XeM/default.jpg" alt="" width="72" aria-hidden="true" >
	#					<span class="vertical-align"></span>
	#				</span>
	#			 </span>
	#			</span>
	#		</a>  
    #   	....
	#	</td>  
    #   ....
	# 	<td class="pl-video-title">
    #   		<a class="pl-video-title-link yt-uix-tile-link yt-uix-sessionlink  spf-link " dir="ltr" 
    #      			●href="/watch?v=vGk-N5jWQrM&amp;list=PLVnSVpF4FIlQb067UtWgBkrM1F9KrV0mH&amp;index=2" 
    #      			data-sessionlink="ei=mTLnWOq7JcaW4AKDsIYY&amp;feature=plpp_video&amp;
    #      			ved=CAgQxjQYASITCKrs4d7ckdMCFUYLWAodA5gBAyj6LA">
    #      			●TOARU MAJUTSU NO INDEX II - CAPITULO 8 SUB ESPAÑOL
    #   		</a>
    #   ....
	#	</td>
    #
	#	<td class="pl-video-time">
	#		<div class="more-menu-wrapper">
	#			<div class="timestamp">
	#			●<span aria-label="23 分">23:40</span>
	#			</div>
	#			<div class="pl-video-edit-options"></div>
	#		</div>
	#	</td>
    #   ....
	#</tr>
	#
	#-----------------------------------------------------------------------------------------
	
	#--- regular expression for exruct keyword from result
	pat=r"<span class=.+score.+>.+</span>.+<a +href=.+>(.+)</a>"
	pat=r"<span class=\"score\">.*\n<a href=.*>(.*)</a>"
	pat=r"<span class=\"pl-video-title-link.*\">.*\n.*\n<a href=.*>(.*)</a>"
	
	#pat_ttl=r'href="(.*)"'
	#pat_ttl=r'<a class="pl-video-title-link.* href="(.*)".*>(.*)</a>'
	#pat_ttl=r'<a class="pl-video-title-link.+ href="(.*)&amp;.+".+>.*(.*).*</a>.*<div class="timestamp">.*<span aria-label=.*>(.*)</span>'
	#pat_ttl=r'<a class="pl-video-title-link.+ href="(.*)&amp;.+".+>(.*).*</a>'  
	#pat_ttl=r'<a class="pl-video-title-link.+ href="(.*)&amp;.+".+>\n(.*)\n.*</a>'  # good1

	#pat_ttl=r'<a class="pl-video-title-link.+ href="(.*)&amp;.+".+>(.*).*</a>.*<span aria-label=.*>'  
	pat_ttl=r'<a class="pl-video-title-link.*href="(.*)&amp;.*".*>(.*).*</a>' 
	
	pat0=r'<title>(.*)</title>'  # list title
	pat1=r'<a class="pl-video-title-link.+ href="(.*)&amp;.+".+>\n.*\n.*</a>'  # item url
	pat2=r'<a class="pl-video-title-link.+ href=".*&amp;.+".+>\n(.*)\n.*</a>'  # item title
	pat3=r'<span aria-label=.+>(.*)</span>'  # item duration
	pat4=r'data-thumb="([^\s]*)" '  # item thumbnail
	
	rpat0=re.compile(pat0)
	rpat1=re.compile(pat1)
	rpat2=re.compile(pat2)
	rpat3=re.compile(pat3)
	rpat4=re.compile(pat4)
	
	#dbg("Replace Start")
	#out=out.replace("\n"," ")
	#dbg("Replace End" % out)
	#out=out.split('<td class="pl-video-title')
	
	out=out.split('<tr class="pl-video yt-uix-tile')
	
	title="Unknown"
	i=0
	cnt=0
	list=[]
	for s in out:
		if(i==0):
			m=re.findall(rpat0, s)
			if(len(m)>=1): title=m[0].strip()
		else:
			ent=["","","0",""]
			#---- url
			m=re.findall(rpat1, s)
			if(m<=0): continue
			cnt+=1
			if(len(m)>0):
				ent[0]=m[0].strip()
				if(ent[0].startswith("/watch?v=")):
					ent[0]="https://www.youtube.com"+ent[0]
			
			#---- title
			m=re.findall(rpat2, s)
			if(len(m)>0):
				ent[1]=m[0].strip()
			
			#---- duration 
			m=re.findall(rpat3, s)
			if(len(m)>0):
				ent[2]=str(hms_to_sec(m[0].strip()))
			
			#---- thumbnail
			m=re.findall(rpat4, s)
			if(len(m)>0):
				m[0]=m[0].strip()
				if(m[0].startswith("//")):
					m[0]="http:"+m[0]
				ent[3]=m[0]
			
			list.append(ent)
			#dbg("i=%d, ent[0]=%s, ent[1]=%s, ent[2]=%s, ent[3]=%s" % (i,ent[0],ent[1],ent[2],ent[3]))
		i+=1
	return (title,list)

if __name__ == '__main__':
	
	#------------------------------------------------------------------
	#	get args
	#------------------------------------------------------------------
	argv=sys.argv  # コマンドライン引数を格納したリストの取得 
	argc=len(argv) # 引数の個数
	
	if(debug>0):
		dbg("---- Script main : Start")
		dbg("python.exe=%s" % sys.executable)
		dbg("arg_cnt=%d, argv=%r" % (argc,argv))
		for i in range(argc):
			dbg("arg[%d]=%s" % (i,argv[i]))

	url=argv[1]
	out=argv[2]
	
	#------------------------------------------------------------------
	# Dispatch parser
	#------------------------------------------------------------------
	feed_title="Unknown"
	list=[]
	if(url.find("//www.youtube.com/playlist?list=")>0):
		(feed_title,list)=ps_ytb_playlist(url)
	elif(url.find("https://www.youtube.com/user/")>0):
		(feed_title,list)=ps_ytb_user(url)
	else:
		error("not supported url=%s" % url)
		
	#------------------------------------------------------------------
	# Output results (RSS feed structure)
	#------------------------------------------------------------------
	if(out=="-"):
		fo=sys.stdout
	else:
		fo=open(out,"w+b")
		#fo=codecs.open(out,"w+b","utf_8")
	
	#---- common start parts
	fo.write('<?xml version="1.0" encoding="utf-8"?>\n')
	fo.write('<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss" >\n')
	fo.write('<channel>\n')
	fo.write('<title>%s</title>\n' % feed_title)
	fo.write('<description>Created by Script</description>\n')
	
	i=1
	#---- per entry 
	for ms in list:
		url="Unknown"
		title="Unknown"
		dur="0"
		cnt=len(ms)
		if(cnt>0):
			url=ms[0]
			#---- cut playlist part in video url (youtube)
			pos=url.find("&amp;list=")
			if(pos>0):
				url=url[0:pos]
			#url=urllib.unquote(url)  # unquote will be done by caller
		if(cnt>1): 
			title=ms[1]
		if(cnt>2): 
			dur=ms[2]
		if(cnt>3): 
			thumb=ms[3]
			
		fo.write('<item no="%d">\n' % (i))
		fo.write('	<title>%s</title>\n' % (title))
		fo.write('	<link>"%s"</link>\n' % (url))
		#fo.write('	<media:title>%s</media:title>\n' % (title))
		fo.write('	<media:group>\n')
		fo.write('		<media:content url="%s" type="text/html" duration="%s" />\n' % (url,dur))
		fo.write('		<media:thumbnail url="%s" />\n' % (thumb))
		fo.write('	</media:group>\n')
		fo.write("</item>\n")
		i+=1
	
	#---- common end parts
	fo.write("</channel>\n")
	fo.write("</rss>\n")
	fo.close()

#-------------------------------------------------------------------------------	
# Sample RSS
#-------------------------------------------------------------------------------	
#<?xml version="1.0" encoding="UTF-8" ?>
#<rss version="2.0">
#<channel>
# <title>RSS Title</title>
# <description>This is an example of an RSS feed</description>
# <link>http://www.example.com/main.html</link>
# <lastBuildDate>Mon, 06 Sep 2010 00:01:00 +0000 </lastBuildDate>
# <pubDate>Sun, 06 Sep 2009 16:20:00 +0000</pubDate>
# <ttl>1800</ttl>
#
# <item>
#  <title>Example entry</title>
#  <description>Here is some text containing an interesting description.</description>
#  <link>http://www.example.com/blog/post/1</link>
#  <guid isPermaLink="true">7bd204c6-1655-4c27-aeee-53f933c5395f</guid>
#  <pubDate>Sun, 06 Sep 2009 16:20:00 +0000</pubDate>
# </item>
#
#</channel>
#</rss>
#
#-------------------------------------------------------------------------------	

