threading and multiprocessing

difference between threading and multiprocessing

threading is limited by the GIL (for CPython), so only one python process can run at the same time.
object can be shared between threads (memory sharing), but must be copied per process.

GIL (Global Interpreter Lock)

What: a mutex that only allows one thread to access python objects at any time.

Why: prevents race conditions and ensures thread safety for reference counting.

Impact: harms performance for CPU-bound program.

For example, this two-thread program is SLOWER than a single-thread one.

# two-thread
import time
from threading import Thread

COUNT = 50000000

def countdown(n):
    while n>0:
        n -= 1

t1 = Thread(target=countdown, args=(COUNT//2,))
t2 = Thread(target=countdown, args=(COUNT//2,))

start = time.time()
t1.start()
t2.start()
t1.join()
t2.join()
end = time.time()

print('Time taken in seconds -', end - start)

# single-thread
import time
from threading import Thread

COUNT = 50000000

def countdown(n):
    while n>0:
        n -= 1

start = time.time()
countdown(COUNT)
end = time.time()

print('Time taken in seconds -', end - start)

multiprocessing

thread start method

'spawn': default on windows and macos, safe, slower.
'fork': only available and default on unix, unsafe, faster.

import multiprocessing as mp

mp.set_start_method('spawn')

simple data-parallel:

from multiprocessing import Pool

def f(x):
    do_some_thing(x)

# method 1
p = Pool(8)
p.map(f, list(range(1024)))
p.close()
p.join()

# method 2
with Pool(8) as p:
    p.map(f, list(range(1024)))

with return values:

def f(x): 
    return x

with Pool(8) as p:
    res = p.map(f, list(range(1024)))
# res: [0, 1, 2, ..., 1023]

sender-receiver model with Queue:

import time
from multiprocessing import Queue, Process

# sender (background)
def sender(q):
    x = 0
    while True:
        q.put(x)
        x += 1
        time.sleep(0.5)

q = Queue()
p = Process(target=sender, args=(q,))

p.start()

# reciever (foreground)
for _ in range(10):
    x = q.get()
    print(x)
    time.sleep(0.5)

p.terminate() # terminate sender (don't use join since it runs endlessly)

A classy way:

import time
from multiprocessing import Queue, Process

class A:
    def __init__(self):
        self.q = Queue()

        # sender (background)
        def sender():
            x = 0
            while True:
                self.q.put(x)
                x += 1
                time.sleep(0.5)

        self.p = Process(target=sender)

        self.p.start()

    def run(self):

        # reciever (foreground)
        for _ in range(10):
            x = self.q.get()
            print(x)
            time.sleep(0.5)

        self.p.terminate()

a = A()
a.run()

threading

sender-receiver model

import time
from queue import Queue
from threading import Thread, Event

class A:
    def __init__(self):
        self.q = Queue()
        self.exit_event = Event()

        # sender (background)
        def sender():
            x = 0
            while True:
                # threading does not have a terminate() or kill(), so we manually handle the exit
                if self.exit_event.is_set():
                    break
                self.q.put(x)
                x += 1
                time.sleep(0.5)

        self.p = Thread(target=sender)

        self.p.start()

    def run(self):

        # reciever (foreground)
        for _ in range(10):
            x = self.q.get()
            print(x)
            time.sleep(0.5)

        # set exit event
        self.exit_event.set()
        self.p.join()


a = A()
a.run()