threading and multiprocessing
difference between threading and multiprocessing
- threading is limited by the GIL (for CPython), so only one python process can run at the same time.
- object can be shared between threads (memory sharing), but must be copied per process.
GIL (Global Interpreter Lock)
- What: a mutex that only allows one thread to access python objects at any time.
- Why: prevents race conditions and ensures thread safety for reference counting.
-
Impact: harms performance for CPU-bound program.
For example, this two-thread program is SLOWER than a single-thread one.
# two-thread import time from threading import Thread COUNT = 50000000 def countdown(n): while n>0: n -= 1 t1 = Thread(target=countdown, args=(COUNT//2,)) t2 = Thread(target=countdown, args=(COUNT//2,)) start = time.time() t1.start() t2.start() t1.join() t2.join() end = time.time() print('Time taken in seconds -', end - start) # single-thread import time from threading import Thread COUNT = 50000000 def countdown(n): while n>0: n -= 1 start = time.time() countdown(COUNT) end = time.time() print('Time taken in seconds -', end - start)
multiprocessing
thread start method
- 'spawn': default on windows and macos, safe, slower.
- 'fork': only available and default on unix, unsafe, faster.
import multiprocessing as mp
mp.set_start_method('spawn')
simple data-parallel:
from multiprocessing import Pool
def f(x):
do_some_thing(x)
# method 1
p = Pool(8)
p.map(f, list(range(1024)))
p.close()
p.join()
# method 2
with Pool(8) as p:
p.map(f, list(range(1024)))
with return values:
def f(x):
return x
with Pool(8) as p:
res = p.map(f, list(range(1024)))
# res: [0, 1, 2, ..., 1023]
sender-receiver model with Queue:
import time
from multiprocessing import Queue, Process
# sender (background)
def sender(q):
x = 0
while True:
q.put(x)
x += 1
time.sleep(0.5)
q = Queue()
p = Process(target=sender, args=(q,))
p.start()
# reciever (foreground)
for _ in range(10):
x = q.get()
print(x)
time.sleep(0.5)
p.terminate() # terminate sender (don't use join since it runs endlessly)
A classy way:
import time
from multiprocessing import Queue, Process
class A:
def __init__(self):
self.q = Queue()
# sender (background)
def sender():
x = 0
while True:
self.q.put(x)
x += 1
time.sleep(0.5)
self.p = Process(target=sender)
self.p.start()
def run(self):
# reciever (foreground)
for _ in range(10):
x = self.q.get()
print(x)
time.sleep(0.5)
self.p.terminate()
a = A()
a.run()
threading
sender-receiver model
import time
from queue import Queue
from threading import Thread, Event
class A:
def __init__(self):
self.q = Queue()
self.exit_event = Event()
# sender (background)
def sender():
x = 0
while True:
# threading does not have a terminate() or kill(), so we manually handle the exit
if self.exit_event.is_set():
break
self.q.put(x)
x += 1
time.sleep(0.5)
self.p = Thread(target=sender)
self.p.start()
def run(self):
# reciever (foreground)
for _ in range(10):
x = self.q.get()
print(x)
time.sleep(0.5)
# set exit event
self.exit_event.set()
self.p.join()
a = A()
a.run()