Module threadpool
[frames] | no frames]

Source Code for Module threadpool

  1  # -*- coding: UTF-8 -*- 
  2  """Easy to use object-oriented thread pool framework. 
  3   
  4  A thread pool is an object that maintains a pool of worker threads to perform 
  5  time consuming operations in parallel. It assigns jobs to the threads 
  6  by putting them in a work request queue, where they are picked up by the 
  7  next available thread. This then performs the requested operation in the 
  8  background and puts the results in another queue. 
  9   
 10  The thread pool object can then collect the results from all threads from 
 11  this queue as soon as they become available or after all threads have 
 12  finished their work. It's also possible, to define callbacks to handle 
 13  each result as it comes in. 
 14   
 15  The basic concept and some code was taken from the book "Python in a Nutshell, 
 16  2nd edition" by Alex Martelli, O'Reilly 2006, ISBN 0-596-10046-9, from section 
 17  14.5 "Threaded Program Architecture". I wrapped the main program logic in the 
 18  ThreadPool class, added the WorkRequest class and the callback system and 
 19  tweaked the code here and there. Kudos also to Florent Aide for the exception 
 20  handling mechanism. 
 21   
 22  Basic usage:: 
 23   
 24      >>> pool = ThreadPool(poolsize) 
 25      >>> requests = makeRequests(some_callable, list_of_args, callback) 
 26      >>> [pool.putRequest(req) for req in requests] 
 27      >>> pool.wait() 
 28   
 29  See the end of the module code for a brief, annotated usage example. 
 30   
 31  Website : http://chrisarndt.de/projects/threadpool/ 
 32   
 33  """ 
 34  __docformat__ = "restructuredtext en" 
 35   
 36  __all__ = [ 
 37      'makeRequests', 
 38      'NoResultsPending', 
 39      'NoWorkersAvailable', 
 40      'ThreadPool', 
 41      'WorkRequest', 
 42      'WorkerThread' 
 43  ] 
 44   
 45  __author__ = "Christopher Arndt" 
 46  __version__ = '1.3.2' 
 47  __license__ = "MIT license" 
 48   
 49   
 50  # standard library modules 
 51  import sys 
 52  import threading 
 53  import traceback 
 54   
 55  try: 
 56      import Queue            # Python 2 
 57  except ImportError: 
 58      import queue as Queue   # Python 3 
 59   
 60   
 61  # exceptions 
62 -class NoResultsPending(Exception):
63 """All work requests have been processed.""" 64 pass
65
66 -class NoWorkersAvailable(Exception):
67 """No worker threads available to process remaining requests.""" 68 pass
69 70 71 # internal module helper functions
72 -def _handle_thread_exception(request, exc_info):
73 """Default exception handler callback function. 74 75 This just prints the exception info via ``traceback.print_exception``. 76 77 """ 78 traceback.print_exception(*exc_info)
79 80 81 # utility functions
82 -def makeRequests(callable_, args_list, callback=None, 83 exc_callback=_handle_thread_exception):
84 """Create several work requests for same callable with different arguments. 85 86 Convenience function for creating several work requests for the same 87 callable where each invocation of the callable receives different values 88 for its arguments. 89 90 ``args_list`` contains the parameters for each invocation of callable. 91 Each item in ``args_list`` should be either a 2-item tuple of the list of 92 positional arguments and a dictionary of keyword arguments or a single, 93 non-tuple argument. 94 95 See docstring for ``WorkRequest`` for info on ``callback`` and 96 ``exc_callback``. 97 98 """ 99 requests = [] 100 for item in args_list: 101 if isinstance(item, tuple): 102 requests.append( 103 WorkRequest(callable_, item[0], item[1], callback=callback, 104 exc_callback=exc_callback) 105 ) 106 else: 107 requests.append( 108 WorkRequest(callable_, [item], None, callback=callback, 109 exc_callback=exc_callback) 110 ) 111 return requests
112 113 114 # classes
115 -class WorkerThread(threading.Thread):
116 """Background thread connected to the requests/results queues. 117 118 A worker thread sits in the background and picks up work requests from 119 one queue and puts the results in another until it is dismissed. 120 121 """ 122
123 - def __init__(self, requests_queue, results_queue, poll_timeout=5, **kwds):
124 """Set up thread in daemonic mode and start it immediatedly. 125 126 ``requests_queue`` and ``results_queue`` are instances of 127 ``Queue.Queue`` passed by the ``ThreadPool`` class when it creates a 128 new worker thread. 129 130 """ 131 threading.Thread.__init__(self, **kwds) 132 self.setDaemon(1) 133 self._requests_queue = requests_queue 134 self._results_queue = results_queue 135 self._poll_timeout = poll_timeout 136 self._dismissed = threading.Event() 137 self.start()
138
139 - def run(self):
140 """Repeatedly process the job queue until told to exit.""" 141 while True: 142 if self._dismissed.isSet(): 143 # we are dismissed, break out of loop 144 break 145 # get next work request. If we don't get a new request from the 146 # queue after self._poll_timout seconds, we jump to the start of 147 # the while loop again, to give the thread a chance to exit. 148 try: 149 request = self._requests_queue.get(True, self._poll_timeout) 150 except Queue.Empty: 151 continue 152 else: 153 if self._dismissed.isSet(): 154 # we are dismissed, put back request in queue and exit loop 155 self._requests_queue.put(request) 156 break 157 try: 158 result = request.callable(*request.args, **request.kwds) 159 self._results_queue.put((request, result)) 160 except: 161 request.exception = True 162 self._results_queue.put((request, sys.exc_info()))
163
164 - def dismiss(self):
165 """Sets a flag to tell the thread to exit when done with current job. 166 """ 167 self._dismissed.set()
168 169
170 -class WorkRequest:
171 """A request to execute a callable for putting in the request queue later. 172 173 See the module function ``makeRequests`` for the common case 174 where you want to build several ``WorkRequest`` objects for the same 175 callable but with different arguments for each call. 176 177 """ 178
179 - def __init__(self, callable_, args=None, kwds=None, requestID=None, 180 callback=None, exc_callback=_handle_thread_exception):
181 """Create a work request for a callable and attach callbacks. 182 183 A work request consists of the a callable to be executed by a 184 worker thread, a list of positional arguments, a dictionary 185 of keyword arguments. 186 187 A ``callback`` function can be specified, that is called when the 188 results of the request are picked up from the result queue. It must 189 accept two anonymous arguments, the ``WorkRequest`` object and the 190 results of the callable, in that order. If you want to pass additional 191 information to the callback, just stick it on the request object. 192 193 You can also give custom callback for when an exception occurs with 194 the ``exc_callback`` keyword parameter. It should also accept two 195 anonymous arguments, the ``WorkRequest`` and a tuple with the exception 196 details as returned by ``sys.exc_info()``. The default implementation 197 of this callback just prints the exception info via 198 ``traceback.print_exception``. If you want no exception handler 199 callback, just pass in ``None``. 200 201 ``requestID``, if given, must be hashable since it is used by 202 ``ThreadPool`` object to store the results of that work request in a 203 dictionary. It defaults to the return value of ``id(self)``. 204 205 """ 206 if requestID is None: 207 self.requestID = id(self) 208 else: 209 try: 210 self.requestID = hash(requestID) 211 except TypeError: 212 raise TypeError("requestID must be hashable.") 213 self.exception = False 214 self.callback = callback 215 self.exc_callback = exc_callback 216 self.callable = callable_ 217 self.args = args or [] 218 self.kwds = kwds or {}
219
220 - def __str__(self):
221 return "<WorkRequest id=%s args=%r kwargs=%r exception=%s>" % \ 222 (self.requestID, self.args, self.kwds, self.exception)
223
224 -class ThreadPool:
225 """A thread pool, distributing work requests and collecting results. 226 227 See the module docstring for more information. 228 229 """ 230
231 - def __init__(self, num_workers, q_size=0, resq_size=0, poll_timeout=5):
232 """Set up the thread pool and start num_workers worker threads. 233 234 ``num_workers`` is the number of worker threads to start initially. 235 236 If ``q_size > 0`` the size of the work *request queue* is limited and 237 the thread pool blocks when the queue is full and it tries to put 238 more work requests in it (see ``putRequest`` method), unless you also 239 use a positive ``timeout`` value for ``putRequest``. 240 241 If ``resq_size > 0`` the size of the *results queue* is limited and the 242 worker threads will block when the queue is full and they try to put 243 new results in it. 244 245 .. warning: 246 If you set both ``q_size`` and ``resq_size`` to ``!= 0`` there is 247 the possibilty of a deadlock, when the results queue is not pulled 248 regularly and too many jobs are put in the work requests queue. 249 To prevent this, always set ``timeout > 0`` when calling 250 ``ThreadPool.putRequest()`` and catch ``Queue.Full`` exceptions. 251 252 """ 253 self._requests_queue = Queue.Queue(q_size) 254 self._results_queue = Queue.Queue(resq_size) 255 self.workers = [] 256 self.dismissedWorkers = [] 257 self.workRequests = {} 258 self.createWorkers(num_workers, poll_timeout)
259
260 - def createWorkers(self, num_workers, poll_timeout=5):
261 """Add num_workers worker threads to the pool. 262 263 ``poll_timout`` sets the interval in seconds (int or float) for how 264 ofte threads should check whether they are dismissed, while waiting for 265 requests. 266 267 """ 268 for i in range(num_workers): 269 self.workers.append(WorkerThread(self._requests_queue, 270 self._results_queue, poll_timeout=poll_timeout))
271
272 - def dismissWorkers(self, num_workers, do_join=False):
273 """Tell num_workers worker threads to quit after their current task.""" 274 dismiss_list = [] 275 for i in range(min(num_workers, len(self.workers))): 276 worker = self.workers.pop() 277 worker.dismiss() 278 dismiss_list.append(worker) 279 280 if do_join: 281 for worker in dismiss_list: 282 worker.join() 283 else: 284 self.dismissedWorkers.extend(dismiss_list)
285
286 - def joinAllDismissedWorkers(self):
287 """Perform Thread.join() on all worker threads that have been dismissed. 288 """ 289 for worker in self.dismissedWorkers: 290 worker.join() 291 self.dismissedWorkers = []
292
293 - def putRequest(self, request, block=True, timeout=None):
294 """Put work request into work queue and save its id for later.""" 295 assert isinstance(request, WorkRequest) 296 # don't reuse old work requests 297 assert not getattr(request, 'exception', None) 298 self._requests_queue.put(request, block, timeout) 299 self.workRequests[request.requestID] = request
300
301 - def poll(self, block=False):
302 """Process any new results in the queue.""" 303 while True: 304 # still results pending? 305 if not self.workRequests: 306 raise NoResultsPending 307 # are there still workers to process remaining requests? 308 elif block and not self.workers: 309 raise NoWorkersAvailable 310 try: 311 # get back next results 312 request, result = self._results_queue.get(block=block) 313 # has an exception occured? 314 if request.exception and request.exc_callback: 315 request.exc_callback(request, result) 316 # hand results to callback, if any 317 if request.callback and not \ 318 (request.exception and request.exc_callback): 319 request.callback(request, result) 320 del self.workRequests[request.requestID] 321 except Queue.Empty: 322 break
323
324 - def wait(self):
325 """Wait for results, blocking until all have arrived.""" 326 while 1: 327 try: 328 self.poll(True) 329 except NoResultsPending: 330 break
331 332 333 ################ 334 # USAGE EXAMPLE 335 ################ 336 337 if __name__ == '__main__': 338 import random 339 import time 340 341 # the work the threads will have to do (rather trivial in our example)
342 - def do_something(data):
343 time.sleep(random.randint(1,5)) 344 result = round(random.random() * data, 5) 345 # just to show off, we throw an exception once in a while 346 if result > 5: 347 raise RuntimeError("Something extraordinary happened!") 348 return result
349 350 # this will be called each time a result is available 353 354 # this will be called when an exception occurs within a thread 355 # this example exception handler does little more than the default handler
356 - def handle_exception(request, exc_info):
357 if not isinstance(exc_info, tuple): 358 # Something is seriously wrong... 359 print(request) 360 print(exc_info) 361 raise SystemExit 362 print("**** Exception occured in request #%s: %s" % \ 363 (request.requestID, exc_info))
364 365 # assemble the arguments for each job to a list... 366 data = [random.randint(1,10) for i in range(20)] 367 # ... and build a WorkRequest object for each item in data 368 requests = makeRequests(do_something, data, print_result, handle_exception) 369 # to use the default exception handler, uncomment next line and comment out 370 # the preceding one. 371 #requests = makeRequests(do_something, data, print_result) 372 373 # or the other form of args_lists accepted by makeRequests: ((,), {}) 374 data = [((random.randint(1,10),), {}) for i in range(20)] 375 requests.extend( 376 makeRequests(do_something, data, print_result, handle_exception) 377 #makeRequests(do_something, data, print_result) 378 # to use the default exception handler, uncomment next line and comment 379 # out the preceding one. 380 ) 381 382 # we create a pool of 3 worker threads 383 print("Creating thread pool with 3 worker threads.") 384 main = ThreadPool(3) 385 386 # then we put the work requests in the queue... 387 for req in requests: 388 main.putRequest(req) 389 print("Work request #%s added." % req.requestID) 390 # or shorter: 391 # [main.putRequest(req) for req in requests] 392 393 # ...and wait for the results to arrive in the result queue 394 # by using ThreadPool.wait(). This would block until results for 395 # all work requests have arrived: 396 # main.wait() 397 398 # instead we can poll for results while doing something else: 399 i = 0 400 while True: 401 try: 402 time.sleep(0.5) 403 main.poll() 404 print("Main thread working...") 405 print("(active worker threads: %i)" % (threading.activeCount()-1, )) 406 if i == 10: 407 print("**** Adding 3 more worker threads...") 408 main.createWorkers(3) 409 if i == 20: 410 print("**** Dismissing 2 worker threads...") 411 main.dismissWorkers(2) 412 i += 1 413 except KeyboardInterrupt: 414 print("**** Interrupted!") 415 break 416 except NoResultsPending: 417 print("**** No pending results.") 418 break 419 if main.dismissedWorkers: 420 print("Joining all dismissed worker threads...") 421 main.joinAllDismissedWorkers() 422