Tutorial: Multiple Threads Downloader Source Code

August 18, 2017 | Views: 3095

Begin Learning Cyber Security for FREE Now!

FREE REGISTRATIONAlready a Member Login Here

This time I will present you here a source code made in Python which you can download a file with multiple threads option. This source code is for educational purposes and I hope is helpful to you. If you find any bugs in this code or have questions, leave a message in the comments section.

`#!/usr/bin/env python
#coding=utf-8

import os
import sys
import time
import urllib
import urllib2
import threading

#############################################################################
#
# self-defined exception classes
#
#############################################################################
class ConnectionError(Exception): pass
class URLUnreachable(Exception):pass
class CanotDownload(Exception):pass

#############################################################################
#
# multiple threads download module starts here
#
#############################################################################
class HttpGetThread(threading.Thread):
def __init__(self, name, url, filename, range=0):
threading.Thread.__init__(self, )
self.url = url
self.filename = filename
self.range = range
self.totalLength = range[1] - range[0] +1
try:
self.downloaded = os.path.getsize(self.filename)
except OSError:
self.downloaded = 0
self.percent = self.downloaded/float(self.totalLength)*100
self.headerrange = (self.range[0]+self.downloaded, self.range[1])
self.bufferSize = 8192

def run(self):
try:
self.downloaded = os.path.getsize(self.filename)
except OSError:
self.downloaded = 0
self.percent = self.downloaded/float(self.totalLength)*100
#self.headerrange = (self.range[0]+self.downloaded, self.range[1])
self.bufferSize = 8192
#request = urllib2.Request(self.url)
#request.add_header('Range', 'bytes=%d-%d' %self.headerrange)
downloadAll = False
retries = 1
while not downloadAll:
if retries > 10:
break
try:
self.headerrange = (self.range[0]+self.downloaded, self.range[1])
request = urllib2.Request(self.url)
request.add_header('Range', 'bytes=%d-%d' %self.headerrange)
conn = urllib2.urlopen(request)
startTime = time.time()
data = conn.read(self.bufferSize)
while data:
f = open(self.filename, 'ab')
f.write(data)
f.close()
self.time = int(time.time() - startTime)
self.downloaded += len(data)
self.percent = self.downloaded/float(self.totalLength) *100
data = conn.read(self.bufferSize)
downloadAll = True
except Exception, err:
retries += 1
time.sleep(1)
continue

def Split(size,blocks):
ranges = []
blocksize = size / blocks
for i in xrange(blocks-1):
ranges.append(( i*blocksize, i*blocksize+blocksize-1))
ranges.append(( blocksize*(blocks-1), size-1))

return ranges

def GetHttpFileSize(url):
length = 0
try:
conn = urllib.urlopen(url)
headers = conn.info().headers
for header in headers:
if header.find('Length') != -1:
length = header.split(':')[-1].strip()
length = int(length)
except Exception, err:
pass

return length

def hasLive(ts):
for t in ts:
if t.isAlive():
return True
return False

def MyHttpGet(url, output=None, connections=4):
"""
arguments:
url, in GBK encoding
output, default encoding, do no convertion
connections, integer
"""
length = GetHttpFileSize(url)
print length
mb = length/1024/1024.0
if length == 0:
raise URLUnreachable
blocks = connections
if output:
filename = output
else:
output = url.split('/')[-1]
ranges = Split(length, blocks)
names = ["%s_%d" %(output,i) for i in xrange(blocks)]

ts = []
for i in xrange(blocks):
t = HttpGetThread(i, url, names[i], ranges[i])
t.setDaemon(True)
t.start()
ts.append(t)

live = hasLive(ts)
startSize = sum([t.downloaded for t in ts])
startTime = time.time()
etime = 0
while live:
try:
etime = time.time() - startTime
d = sum([t.downloaded for t in ts])/float(length)*100
downloadedThistime = sum([t.downloaded for t in ts])-startSize
try:
rate = downloadedThistime / float(etime)/1024
except:
rate = 100.0
progressStr = u'rFilesize: %d(%.2fM) Downloaded: %.2f%% Avg rate: %.1fKB/s' %(length, mb, d, rate)
sys.stdout.write(progressStr)
sys.stdout.flush()
#sys.stdout.write('b'*(len(progressStr)+1))
live = hasLive(ts)
time.sleep(0.2)
except KeyboardInterrupt:
print
print "Exit..."
for n in names:
try:
os.remove(n)
except:
pass
sys.exit(1)

print
#print u'used time: %d:%d, pingjunsudu:%.2fKB/s' %(int(etime)/60, int(etime)%60,rate)

f = open(filename, 'wb')
for n in names:
f.write(open(n,'rb').read())
try:
os.remove(n)
except:
pass
f.close()

if __name__ == '__main__':
#This is an example of the get link, you can choose the link you prefer to use and edit the link here
MyHttpGet('http://dldir1.qq.com/qqfile/QQforMac/QQ_V3.1.1.dmg','my_download.file',4)`

Remember, put a message in the comments if you have some questions or opinions. I will be next time with more sample codes.

Share with Friends
FacebookTwitterLinkedInEmail
Use Cybytes and
Tip the Author!
Join
Share with Friends
FacebookTwitterLinkedInEmail
Ready to share your knowledge and expertise?
2 Comments
  1. see for another option hxxp://www.leeroy.me/download-a-file-using-multiple-threads-in-node-js/

Comment on This

You must be logged in to post a comment.

Our Revolution

We believe Cyber Security training should be free, for everyone, FOREVER. Everyone, everywhere, deserves the OPPORTUNITY to learn, begin and grow a career in this fascinating field. Therefore, Cybrary is a free community where people, companies and training come together to give everyone the ability to collaborate in an open source way that is revolutionizing the cyber security educational experience.

Cybrary On The Go

Get the Cybrary app for Android for online and offline viewing of our lessons.

Get it on Google Play
 

Support Cybrary

Donate Here to Get This Month's Donor Badge

 
Skip to toolbar

We recommend always using caution when following any link

Are you sure you want to continue?

Continue
Cancel