Python downloading files from url threading - not take
Python downloading files from url threading - remarkable, rather
Multithreading Downloader Class (Python recipe) by Itay Brandes ActiveState Code (http://code.activestate.com/recipes/578220/)
importosimporturllib2importtimeimportmultiprocessing.dummyasmultiprocessingimportstringfromrandomimportchoiceimportsocketfromctypesimportc_intimporttempfileimportdummyfromloggerimportlog"Smart Downloading Module. Written by Itay Brandes."shared_bytes_var=multiprocessing.Value(c_int,0)# a ctypes var that counts the bytes already downloadeddefDownloadFile(url,path,startByte=0,endByte=None,ShowProgress=True):''' Function downloads file. @param url: File url address. @param path: Destination file path. @param startByte: Start byte. @param endByte: End byte. Will work only if server supports HTTPRange headers. @param ShowProgress: If true, shows textual progress bar. @return path: Destination file path. '''url=url.replace(' ','%20')headers={}ifendByteisnotNone:headers['Range']='bytes=%d-%d'%(startByte,endByte)req=urllib2.Request(url,headers=headers)try:urlObj=urllib2.urlopen(req,timeout=4)excepturllib2.HTTPError,e:if"HTTP Error 416"instr(e):# HTTP 416 Error: Requested Range Not Satisfiable. Happens when we ask# for a range that is not available on the server. It will happen when# the server will try to send us a .html page that means something like# "you opened too many connections to our server". If this happens, we# will wait for the other threads to finish their connections and try again.log.warning("Thread didn't got the file it was expecting. Retrying...")time.sleep(5)returnDownloadFile(url,path,startByte,endByte,ShowProgress)else:raiseef=open(path,'wb')meta=urlObj.info()try:filesize=int(meta.getheaders("Content-Length")[0])exceptIndexError:log.warning("Server did not send Content-Length.")ShowProgress=Falsefilesize_dl=0block_sz=8192whileTrue:try:buff=urlObj.read(block_sz)except(socket.timeout,socket.error,urllib2.HTTPError),e:dummy.shared_bytes_var.value-=filesize_dlraiseeifnotbuff:breakfilesize_dl+=len(buff)try:dummy.shared_bytes_var.value+=len(buff)exceptAttributeError:passf.write(buff)ifShowProgress:status=r"%.2f MB / %.2f MB %s [%3.2f%%]"%(filesize_dl/1024.0/1024.0,filesize/1024.0/1024.0,progress_bar(1.0*filesize_dl/filesize),filesize_dl*100.0/filesize)status+=chr(8)*(len(status)+1)printstatus,ifShowProgress:print"\n"f.close()returnpathdefDownloadFile_Parall(url,path=None,processes=6,minChunkFile=1024**2,nonBlocking=False):''' Function downloads file parally. @param url: File url address. @param path: Destination file path. @param processes: Number of processes to use in the pool. @param minChunkFile: Minimum chunk file in bytes. @param nonBlocking: If true, returns (mapObj, pool). A list of file parts will be returned from the mapObj results, and the developer must join them himself. Developer also must close and join the pool. @return mapObj: Only if nonBlocking is True. A multiprocessing.pool.AsyncResult object. @return pool: Only if nonBlocking is True. A multiprocessing.pool object. '''fromHTTPQueryimportIs_ServerSupportHTTPRangeglobalshared_bytes_varshared_bytes_var.value=0url=url.replace(' ','%20')ifnotpath:path=get_rand_filename(os.environ['temp'])ifnotos.path.exists(os.path.dirname(path)):os.makedirs(os.path.dirname(path))log.debug("Downloading to %s..."%path)urlObj=urllib2.urlopen(url)meta=urlObj.info()filesize=int(meta.getheaders("Content-Length")[0])iffilesize/processes>minChunkFileandIs_ServerSupportHTTPRange(url):args=[]pos=0chunk=filesize/processesforiinrange(processes):startByte=posendByte=pos+chunkifendByte>filesize-1:endByte=filesize-1args.append([url,path+".%.3d"%i,startByte,endByte,False])pos+=chunk+1else:args=[[url,path+".000",None,None,False]]log.debug("Running %d processes..."%processes)pool=multiprocessing.Pool(processes,initializer=_initProcess,initargs=(shared_bytes_var,))mapObj=pool.map_async(lambdax:DownloadFile(*x),args)ifnonBlocking:returnmapObj,poolwhilenotmapObj.ready():status=r"%.2f MB / %.2f MB %s [%3.2f%%]"%(shared_bytes_var.value/1024.0/1024.0,filesize/1024.0/1024.0,progress_bar(1.0*shared_bytes_var.value/filesize),shared_bytes_var.value*100.0/filesize)status=status+chr(8)*(len(status)+1)printstatus,time.sleep(0.1)file_parts=mapObj.get()pool.terminate()pool.join()combine_files(file_parts,path)defcombine_files(parts,path):''' Function combines file parts. @param parts: List of file paths. @param path: Destination path. '''withopen(path,'wb')asoutput:forpartinparts:withopen(part,'rb')asf:output.writelines(f.readlines())os.remove(part)defprogress_bar(progress,length=20):''' Function creates a textual progress bar. @param progress: Float number between 0 and 1 describes the progress. @param length: The length of the progress bar in chars. Default is 20. '''length-=2# The bracket are 2 chars long.return"["+"#"*int(progress*length)+"-"*(length-int(progress*length))+"]"defget_rand_filename(dir_=os.getcwd()):"Function returns a non-existent random filename."returntempfile.mkstemp('.tmp','',dir_)[1]def_initProcess(x):dummy.shared_bytes_var=x
-
-