threading and multiprocessing
difference between threading and multiprocessing
- threading is limited by the GIL (for CPython), so only one python process can run at the same time.
- object can be shared between threads (memory sharing), but must be copied per process.
GIL (Global Interpreter Lock)
- What: a mutex that only allows one thread to access python objects at any time.
- Why: prevents race conditions and ensures thread safety for reference counting.
- 
Impact: harms performance for CPU-bound program. For example, this two-thread program is SLOWER than a single-thread one. # two-thread import time from threading import Thread COUNT = 50000000 def countdown(n): while n>0: n -= 1 t1 = Thread(target=countdown, args=(COUNT//2,)) t2 = Thread(target=countdown, args=(COUNT//2,)) start = time.time() t1.start() t2.start() t1.join() t2.join() end = time.time() print('Time taken in seconds -', end - start) # single-thread import time from threading import Thread COUNT = 50000000 def countdown(n): while n>0: n -= 1 start = time.time() countdown(COUNT) end = time.time() print('Time taken in seconds -', end - start)
multiprocessing
thread start method
- 'spawn': default on windows and macos, safe, slower.
- 'fork': only available and default on unix, unsafe, faster.
import multiprocessing as mp
mp.set_start_method('spawn')
simple data-parallel:
from multiprocessing import Pool
def f(x):
    do_some_thing(x)
# method 1
p = Pool(8)
p.map(f, list(range(1024)))
p.close()
p.join()
# method 2
with Pool(8) as p:
    p.map(f, list(range(1024)))
with return values:
def f(x): 
    return x
with Pool(8) as p:
    res = p.map(f, list(range(1024)))
# res: [0, 1, 2, ..., 1023]
sender-receiver model with Queue:
import time
from multiprocessing import Queue, Process
# sender (background)
def sender(q):
    x = 0
    while True:
        q.put(x)
        x += 1
        time.sleep(0.5)
q = Queue()
p = Process(target=sender, args=(q,))
p.start()
# reciever (foreground)
for _ in range(10):
    x = q.get()
    print(x)
    time.sleep(0.5)
p.terminate() # terminate sender (don't use join since it runs endlessly)
A classy way:
import time
from multiprocessing import Queue, Process
class A:
    def __init__(self):
        self.q = Queue()
        # sender (background)
        def sender():
            x = 0
            while True:
                self.q.put(x)
                x += 1
                time.sleep(0.5)
        self.p = Process(target=sender)
        self.p.start()
    def run(self):
        # reciever (foreground)
        for _ in range(10):
            x = self.q.get()
            print(x)
            time.sleep(0.5)
        self.p.terminate()
a = A()
a.run()
threading
sender-receiver model
import time
from queue import Queue
from threading import Thread, Event
class A:
    def __init__(self):
        self.q = Queue()
        self.exit_event = Event()
        # sender (background)
        def sender():
            x = 0
            while True:
                # threading does not have a terminate() or kill(), so we manually handle the exit
                if self.exit_event.is_set():
                    break
                self.q.put(x)
                x += 1
                time.sleep(0.5)
        self.p = Thread(target=sender)
        self.p.start()
    def run(self):
        # reciever (foreground)
        for _ in range(10):
            x = self.q.get()
            print(x)
            time.sleep(0.5)
        # set exit event
        self.exit_event.set()
        self.p.join()
a = A()
a.run()