zlike

personal, not technical

视频下载脚本

| Comments

作为这儿的第一个也应该是最后一个技术文章,下面这个脚本已经失效了⋯⋯

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import urllib2
import sys
import time, md5
import xml.dom.minidom
import subprocess

def get_linked_video(url):
    auth = ''.join([(str(int(time.time()))), ' XOA== MWZlNWE4Y2Q4OWQ0NjEyMWJjZTJmMWNiYTVhNzQwZGM='])
    auth = md5.md5(auth).hexdigest()
    ctime = int(time.time())
    api_url1 = "http://kuapi.youku.com/api_rest?method=video.getvideoid&pid=XOA==&ctime=" + str(ctime) + "&auth=" + auth + "&url=" + url

    f = urllib2.urlopen(api_url1)
    content = f.read(30000).decode("utf-8")

    print content

    data = xml.dom.minidom.parseString(content)

    response = data.getElementsByTagName("response")
    videoId = ""
    for r in response:
        c = r.childNodes
        for e in c:
            if e.nodeName == "videoId":
                videoId = e.childNodes[0].data
                break

    api_url2 = "http://kuapi.youku.com/api_rest?method=video.getvideofile&pid=XOA==&ctime=" + str(ctime) + "&auth=" + auth + "&videoid=" + videoId

    f = urllib2.urlopen(api_url2)
    content = f.read(30000).decode("utf-8")

    print content
    data = xml.dom.minidom.parseString(content.encode("utf-8"))
    response = data.getElementsByTagName(r"response")[0]

    title = ""
    for r in response.childNodes:
        if r.nodeName == "title":
            title = r.childNodes[0].data
            if r.nodeName == "streams":
                streams = r.childNodes
                for s in streams:
                    if s.nodeName == "stream":
                        tp = s.getAttribute("type")
                        if tp == "flv" or tp == "flvhd":
                            segs = s.childNodes
                            for seg in segs:
                                if seg.nodeName == "seg":
                                    download_url = seg.getAttribute("url")
                                    seg_index = seg.getAttribute("id")

    filename = title + "_part" + seg_index + ".flv"
    subprocess.call(["wget", "-T", "60", "-O", filename, "-U", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebkit/535.1 (KHTML, like Gecko) Chrome/14.0.825.0 Safari/535.1", download_url])

if __name__ == "__main__":
    get_linked_video(sys.argv[1])</pre>

所以,如果有人用得到的话,下面这个脚本应该是可以用的,而且万能(向伟大的flvcd.com致敬!)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/python
import urllib2
import urllib
import re
import subprocess
import os, sys
import threading
import time

def timed_download(url, filename, timeout):
    (p, title) = download(url, filename)
    if p is None:
        return (1, '')
    if p == 1:
        return (0, '(duplicates)')

    for i in range(timeout):
        # check if rt is still running every 1 sec.
        state = p.poll()
        if state is not None:
            break
        time.sleep(1)

    if p.poll() is None:
        print 'terminated.'
        p.terminate()
    else:
        print 'finished.'

    return (0, title)

def download(url, filename):
    if os.path.exists(filename):
        return (1, 'exist')

    videourl = url;
    url = 'http://www.flvcd.com/parse.php?flag=&amp;format=&amp;kw=' + urllib.quote(videourl);
    req = urllib2.Request(url);
    req.add_header('host', 'www.flvcd.com');
    req.add_header('Referer', url[:-4]);
    req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebkit/535.1 (KHTML, like Gecko) Chrome/14.0.825.0 Safari/535.1');
    req.add_header('Accept-Language', 'en-us,en;q=0.5');
    req.add_header('Accept-Encoding', 'gzip, deflate');
    req.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7');
    req.add_header('Keep-Alive', '115');

    res = urllib2.urlopen(req);
    html = res.read()

    pattern = re.compile('firstmatch = pattern.search(html);
    if firstmatch is not None:
        urls = firstmatch.group(1);

        urlpattern = re.compile('(.+)');
        result = urlpattern.findall(urls);

        data = [result[i:i+2] for i in range(0, len(result), 2)]
        url = data[0][1]
        ret = subprocess.Popen(["wget", "-T", "60", "-O", filename, "-U", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebkit/535.1 (KHTML, like Gecko) Chrome/14.0.825.0 Safari/535.1", url])
        return (ret, data[0][0])
    else:
        return (None, '')

if __name__ == '__main__':
    timed_download(sys.argv[1], sys.argv[2], int(sys.argv[3]))</pre>