#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os, sys, time
import urllib,urllib2
import shutil,filecmp,datetime
from HTMLParser import HTMLParser

g_debug=1
g_taiwa=0
g_fo=None
outf=0 

# create a subclass and override the handler methods
class MyHTMLParser(HTMLParser):
	def handle_starttag(self, tag, attrs):
		global outf
		if(tag=="div"):
			#print "Encountered a start tag:", tag
			found=0
			for attr in attrs:
				if(attr[0]=="id" and attr[1]=="body"):
					print " Found target attr:", attr
					found=1
			if(found):
				outf=1
			else:
				outf=0
			
	def handle_endtag(self, tag):
		if(tag=="div"):
			self.outf=0
		#print "Encountered an end tag :", tag
	def handle_entityref(self, name):
		global outf
		if(outf==1):
			if(name=="amp"):
				g_fo.write("%s" % "&")
			elif(name=="gt"):
				g_fo.write("%s" % ">")
			elif(name=="lt"):
				g_fo.write("%s" % "<")
			elif(name=="nbsp"):
				g_fo.write("%s" % " ")
			elif(name=="quot"):
				g_fo.write("%s" % '"')
			
	def handle_data(self, data):
		global outf
		#print "Encountered Target data  :", data 
		if(outf==1):
			g_fo.write("%s" % data)
			
# instantiate the parser and fed it some HTML
#parser = MyHTMLParser()
#parser.feed('<html><head><title>Test</title></head>'
#            '<body><h1>Parse me!</h1></body></html>')
            
def url_read (url,outfile):
	dbg("url_read: url=%s outfile=%s" % (url,outfile))
	global g_fo,g_taiwa
	html=0
	if(url.endswith("html")):
		html=1
	try:
		#---- need agent_info
		req = urllib2.Request(url, headers={ 'User-Agent': 'Mozilla/5.0' })
		data= urllib2.urlopen(req).read()
		
		#---- write raw html  
		if(g_taiwa and html):
			#--- write raw data before Parse
			fo=open(outfile+".html","w")
			fo.write(data)
			fo.close()
		
		#---- extruct target entry data 
		g_fo=open(outfile,"w")
		if(html):
			parser = MyHTMLParser()
			parser.feed(data)
		else:
			g_fo.write(data)
		g_fo.close()
		
	except IOError,e:
		dbg("url_read: IOError, e=%r" % (e))
		return 1
	return 0
	
def dbg(*arg):
	global g_debug
	if(g_debug<=0):
		return
	str=''.join(arg)
	print >>sys.stderr, str

#----------------------------------------------------------------------------------
# main
#----------------------------------------------------------------------------------
if __name__ == '__main__':
	
	myprog=sys.argv[0]
	url=sys.argv[1]
	outfile=sys.argv[2]
	print "argv count=%r" % len(sys.argv)
	opt=None
	if(len(sys.argv)>3):
		opt=sys.argv[3]
		if(opt.find("taiwa=1")>=0):
			g_taiwa=1
	
	#url="http://www32.atwiki.jp/pms_regza/pages/37.html"
	#outfile="WEB_new.conf"
	
	print "url=%s, out=%s, opt=%s" % (url,outfile,opt)
	if(g_taiwa):
		sys.stdout.write("download WEB.conf, OK?(y/n)")
		yn = raw_input()
		if(yn != "y"):
			sys.exit(1)
	
	#---- download contents of file(WEB.conf)
	rc=url_read(url,outfile)
	
	if(rc==0):
			#---- download completed
			if(g_taiwa):
				print "download completed."
				print "Now Copy(Overwrite) to PMS's WEB.conf, OK?(y/n)"
				yn = raw_input()
				if(yn != "y"):
					sys.exit(1)
			
			#---- judge src/dest of file copy 
			src="./"+outfile
			cwd = os.getcwd()
			if(cwd.find("utility")>=0):
				dest="../conf/WEB.conf"
				#rc=shutil.copy("./"+outfile, "../conf/WEB.conf")
			else:
				dest="./conf/WEB.conf"
			
			#---- backup dest, if exists
			docopy=True
			if os.path.exists(dest):
				rc=filecmp.cmp(src, dest) 
				if(rc==True):  # same
					docopy=False
					print "Downloaded is same as Current: no need to update"
				else:
						dt = datetime.datetime.fromtimestamp(os.stat(dest).st_mtime)
						dtsfx=dt.strftime('-%Y-%m-%d-%H-%M-%S.bk')
						dbg("dest file(%s) exists: backup to %s" % (dest,dest+dtsfx));
						rc=shutil.move(dest,dest+dtsfx)
				
			#---- exec file copy
			if(docopy):
				rc=shutil.copy(src,dest)
				#rc=shutil.move("./"+outfile, "../conf/WEB.conf")
				print "copy src(%s) to dest(%s), rc=%r" % (src,dest,rc)

	if(g_taiwa):
		print "Hit Eny key to End"
		yn = raw_input()
		
	sys.exit(rc)
