Exploring Operating Systems

Thread vs Process Creation

Let’s first take a look into thread creation.

Thread Creation

Let’s create a very simple program where we will be creating a thread and joins it.

#include <pthread.h>

void *newThread(void *arg) {
    return NULL;
}

int main(int argc, char **argv) {
    pthread_t thread;
    pthread_create(&thread, NULL, &newThread, NULL);
    pthread_join(thread, NULL);
    return 0;
}

Feel free to add some printf statement to help understand output, you can do like below:

#include <stdio.h>
#include <pthread.h>

void *newthread(void *arg) {
    printf("Thread running\n");
    return NULL;
}

int main(int argc, char **argv) {
    pthread_t thread;
    int rc;

    rc = pthread_create(&thread, NULL, newthread, NULL);
    if (rc){
        printf("ERROR; return code from pthread_create() is %d\n", rc);
        return -1;  // Exit with error if thread creation fails
    }

    printf("Main program waiting for thread to complete\n");
    pthread_join(thread, NULL);
    printf("Thread joined\n");

    return 0;
}

To compile and run:

gcc threadCreation.c -o threadCreation -pthread
./threadCreation

Output will look like:

Main program waiting for thread to complete
Thread running
Thread joined

But, these output cannot help us understand any of the system call that are getting invoked here. Let’s use strace to help with tracing the program.

strace ./threadCreation

It will give the output as below:

execve("./th", ["./th"], 0x7ffe22c1ed10 /* 75 vars */) = 0
brk(NULL)                               = 0x62f72cbf9000
arch_prctl(0x3001 /* ARCH_??? */, 0x7ffd43db1be0) = -1 EINVAL (Invalid argument)
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8979a49000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=90175, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 90175, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f8979a32000
close(3)                                = 0
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\237\2\0\0\0\0\0"..., 832) = 832
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
pread64(3, "\4\0\0\0 \0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0"..., 48, 848) = 48
pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0I\17\357\204\3$\f\221\2039x\324\224\323\236S"..., 68, 896) = 68
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=2220400, ...}, AT_EMPTY_PATH) = 0
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
mmap(NULL, 2264656, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f8979800000
mprotect(0x7f8979828000, 2023424, PROT_NONE) = 0
mmap(0x7f8979828000, 1658880, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x28000) = 0x7f8979828000
mmap(0x7f89799bd000, 360448, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1bd000) = 0x7f89799bd000
mmap(0x7f8979a16000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x215000) = 0x7f8979a16000
mmap(0x7f8979a1c000, 52816, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f8979a1c000
close(3)                                = 0
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8979a2f000
arch_prctl(ARCH_SET_FS, 0x7f8979a2f740) = 0
set_tid_address(0x7f8979a2fa10)         = 74199
set_robust_list(0x7f8979a2fa20, 24)     = 0
rseq(0x7f8979a300e0, 0x20, 0, 0x53053053) = 0
mprotect(0x7f8979a16000, 16384, PROT_READ) = 0
mprotect(0x62f72bc04000, 4096, PROT_READ) = 0
mprotect(0x7f8979a83000, 8192, PROT_READ) = 0
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
munmap(0x7f8979a32000, 90175)           = 0
rt_sigaction(SIGRT_1, {sa_handler=0x7f8979891870, sa_mask=[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x7f8979842520}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f8978e00000
mprotect(0x7f8978e01000, 8388608, PROT_READ|PROT_WRITE) = 0
getrandom("\xf9\x22\xd5\xff\x89\x58\xec\x64", 8, GRND_NONBLOCK) = 8
brk(NULL)                               = 0x62f72cbf9000
brk(0x62f72cc1a000)                     = 0x62f72cc1a000
rt_sigprocmask(SIG_BLOCK, ~[], [], 8)   = 0
clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7f8979600910, parent_tid=0x7f8979600910, exit_signal=0, stack=0x7f8978e00000, stack_size=0x7fff00, tls=0x7f8979600640} => {parent_tid=[74200]}, 88) = 74200
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
Thread running
write(1, "Main program waiting for thread "..., 44Main program waiting for thread to complete
) = 44
write(1, "Thread joined\n", 14Thread joined
)         = 14
exit_group(0)                           = ?
+++ exited with 0 +++

When i was checking the output from strace i was thinking what kind of crap is this, but believe me i was never more wrong about anything than that

Let’s try to decode each of the line and understand what exactly they are doing:

System Call Explanations

Here the calls to mmap() and mprotect() helps doing the set up of the new thread’s stack:

mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f8978e00000
mprotect(0x7f8978e01000, 8388608, PROT_READ|PROT_WRITE) = 0

Let’s try to understand it more:

Memory Mapping for Stack Creation

Setting Permissions for Stack Use

Purpose of the Guard Page

Security and Efficiency Considerations

This setup demonstrates a common practice in memory management for stacks, balancing between performance (immediate stack usage) and security (protection against stack overflows).

Signal Management with rt_sigprocmask

Before and after creating a new thread, the rt_sigprocmask system call is used to manage signal masks:

rt_sigprocmask(SIG_BLOCK, ~[], [], 8)   = 0

After the thread is created:

rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0

Thread Creation with clone3

The clone3 system call is used to create a new thread:

clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7f8979600910, parent_tid=0x7f8979600910, exit_signal=0, stack=0x7f8978e00000, stack_size=0x7fff00, tls=0x7f8979600640} => {parent_tid=[74200]}, 88) = 74200

This help ensures that the new thread is properly integrated into the parent’s environment, sharing necessary resources while having its own stack and thread-specific storage. The use of clone3 over older variants like clone allows for more precise control over the creation of threads with modern flags and structures.

Process Creation

Let’s take below code as the example:

#include <stddef.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/wait.h>
#include <stdio.h>

int main(int argc, char **argv) {
    pid_t pid;
    if ((pid = fork()) > 0) {
        waitpid(pid, NULL, 0);
        return 0;
    } else if (pid == 0) {
        return 1;
    } else {
        perror("fork");
        return -1;
    }
}

Some of the Key Steps Involved here

Program Execution

execve("./process-creation", ["./process-creation"], 0x7ffc208c3b00 /* 75 vars */) = 0

Creation of Child Process

clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7e349f3f4a10) = 76678

Parent Waiting for Child

wait4(76678, NULL, 0, NULL)             = 76678

Signal Handling

--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=76678, si_uid=1000, si_status=1, si_utime=0, si_stime=0} ---