# First of all let's get the filelist:
import os
filelist = []
def getFileList(root_folder):
'''
Returns the list of files in specified folder.
'''
for root,dirs,files in os.walk(root_folder):
for filename in files:
filepath = os.path.join(root,filename)
if os.path.isfile(filepath):
filelist.append(filepath)
return filelist
# And function to return 1st line from the file:
def readFirstLine(filename):
'''
Returns as text the first line from file.
'''
f = open(filename, "r")
firstline = f.readlines()[0]
f.close()
return firstline
# The following function works with the list of files and throws the result into Queue.
def fileListProcessing(files, q):
'''
Puts first lines from all listed files into a Queue. Provides a safe way of getting the result from several processes.
'''
try:
result = []
for filename in files:
result.append(readFirstLine(filename))
except:
q.put([])
raise
q.put(result)
# And here is an actual multiprocessing:
from multiprocessing import Queue, Process, cpu_count
def myMultiprocessing(folder):
'''
Splits the source filelist into sublists according to the number of CPU cores and provides multiprocessing of them.
'''
files = getFileList(folder)
q = Queue()
procs = []
for i in xrange(0,cpu_count()):
# Split the source filelist into several sublists.
lst = [files[j] for j in xrange(0, len(files)) if j % cpu_count() == i]
if len(lst)>0:
p = Process(target=fileListProcessing, args=([lst, q]))
p.start()
procs += [p]
# Collect the results:
all_results = []
for i in xrange(0, len(procs)):
# Save all results from the queue.
all_results += q.get()
# Output results into the file.
log = open("logfile.log", "w")
print >>log, all_results
log.close()
if __name__ == "__main__":
myMultiprocessing("d:\\someFolder")
This seems to be an example of the multiprocessing with Python.