1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
| #!/usr/bin/python
import urllib2
import urllib
import re
import subprocess
import os, sys
import threading
import time
def timed_download(url, filename, timeout):
(p, title) = download(url, filename)
if p is None:
return (1, '')
if p == 1:
return (0, '(duplicates)')
for i in range(timeout):
# check if rt is still running every 1 sec.
state = p.poll()
if state is not None:
break
time.sleep(1)
if p.poll() is None:
print 'terminated.'
p.terminate()
else:
print 'finished.'
return (0, title)
def download(url, filename):
if os.path.exists(filename):
return (1, 'exist')
videourl = url;
url = 'http://www.flvcd.com/parse.php?flag=&format=&kw=' + urllib.quote(videourl);
req = urllib2.Request(url);
req.add_header('host', 'www.flvcd.com');
req.add_header('Referer', url[:-4]);
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebkit/535.1 (KHTML, like Gecko) Chrome/14.0.825.0 Safari/535.1');
req.add_header('Accept-Language', 'en-us,en;q=0.5');
req.add_header('Accept-Encoding', 'gzip, deflate');
req.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7');
req.add_header('Keep-Alive', '115');
res = urllib2.urlopen(req);
html = res.read()
pattern = re.compile('firstmatch = pattern.search(html);
if firstmatch is not None:
urls = firstmatch.group(1);
urlpattern = re.compile('(.+)');
result = urlpattern.findall(urls);
data = [result[i:i+2] for i in range(0, len(result), 2)]
url = data[0][1]
ret = subprocess.Popen(["wget", "-T", "60", "-O", filename, "-U", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebkit/535.1 (KHTML, like Gecko) Chrome/14.0.825.0 Safari/535.1", url])
return (ret, data[0][0])
else:
return (None, '')
if __name__ == '__main__':
timed_download(sys.argv[1], sys.argv[2], int(sys.argv[3]))</pre>
|