Let’s first take a look into thread creation.
Let’s create a very simple program where we will be creating a thread and joins it.
#include <pthread.h>
void *newThread(void *arg) {
return NULL;
}
int main(int argc, char **argv) {
pthread_t thread;
pthread_create(&thread, NULL, &newThread, NULL);
pthread_join(thread, NULL);
return 0;
}
Feel free to add some printf
statement to help understand output, you can do like below:
#include <stdio.h>
#include <pthread.h>
void *newthread(void *arg) {
printf("Thread running\n");
return NULL;
}
int main(int argc, char **argv) {
pthread_t thread;
int rc;
rc = pthread_create(&thread, NULL, newthread, NULL);
if (rc){
printf("ERROR; return code from pthread_create() is %d\n", rc);
return -1; // Exit with error if thread creation fails
}
printf("Main program waiting for thread to complete\n");
pthread_join(thread, NULL);
printf("Thread joined\n");
return 0;
}
To compile and run:
gcc threadCreation.c -o threadCreation -pthread
./threadCreation
Output will look like:
Main program waiting for thread to complete
Thread running
Thread joined
But, these output cannot help us understand any of the system call that are getting invoked here. Let’s use strace
to help with tracing the program.
strace ./threadCreation
It will give the output as below:
execve("./th", ["./th"], 0x7ffe22c1ed10 /* 75 vars */) = 0
brk(NULL) = 0x62f72cbf9000
arch_prctl(0x3001 /* ARCH_??? */, 0x7ffd43db1be0) = -1 EINVAL (Invalid argument)
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8979a49000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=90175, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 90175, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f8979a32000
close(3) = 0
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\237\2\0\0\0\0\0"..., 832) = 832
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
pread64(3, "\4\0\0\0 \0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0"..., 48, 848) = 48
pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0I\17\357\204\3$\f\221\2039x\324\224\323\236S"..., 68, 896) = 68
newfstatat(3, "", {st_mode=S_IFREG|0755, st_size=2220400, ...}, AT_EMPTY_PATH) = 0
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
mmap(NULL, 2264656, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f8979800000
mprotect(0x7f8979828000, 2023424, PROT_NONE) = 0
mmap(0x7f8979828000, 1658880, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x28000) = 0x7f8979828000
mmap(0x7f89799bd000, 360448, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1bd000) = 0x7f89799bd000
mmap(0x7f8979a16000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x215000) = 0x7f8979a16000
mmap(0x7f8979a1c000, 52816, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f8979a1c000
close(3) = 0
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f8979a2f000
arch_prctl(ARCH_SET_FS, 0x7f8979a2f740) = 0
set_tid_address(0x7f8979a2fa10) = 74199
set_robust_list(0x7f8979a2fa20, 24) = 0
rseq(0x7f8979a300e0, 0x20, 0, 0x53053053) = 0
mprotect(0x7f8979a16000, 16384, PROT_READ) = 0
mprotect(0x62f72bc04000, 4096, PROT_READ) = 0
mprotect(0x7f8979a83000, 8192, PROT_READ) = 0
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
munmap(0x7f8979a32000, 90175) = 0
rt_sigaction(SIGRT_1, {sa_handler=0x7f8979891870, sa_mask=[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x7f8979842520}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f8978e00000
mprotect(0x7f8978e01000, 8388608, PROT_READ|PROT_WRITE) = 0
getrandom("\xf9\x22\xd5\xff\x89\x58\xec\x64", 8, GRND_NONBLOCK) = 8
brk(NULL) = 0x62f72cbf9000
brk(0x62f72cc1a000) = 0x62f72cc1a000
rt_sigprocmask(SIG_BLOCK, ~[], [], 8) = 0
clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7f8979600910, parent_tid=0x7f8979600910, exit_signal=0, stack=0x7f8978e00000, stack_size=0x7fff00, tls=0x7f8979600640} => {parent_tid=[74200]}, 88) = 74200
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
Thread running
write(1, "Main program waiting for thread "..., 44Main program waiting for thread to complete
) = 44
write(1, "Thread joined\n", 14Thread joined
) = 14
exit_group(0) = ?
+++ exited with 0 +++
When i was checking the output from strace
i was thinking what kind of crap is this, but believe me i was never more wrong about anything than that
Let’s try to decode each of the line and understand what exactly they are doing:
./th
with the environment variables listed in the memory address.**mmap(NULL, 8192, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0) = 0x7d674df61000** |
**openat(AT_FDCWD, “/etc/ld.so.cache”, O_RDONLY | O_CLOEXEC) = 3** |
**newfstatat(3, “”, {st_mode=S_IFREG | 0644, st_size=90175, …}, AT_EMPTY_PATH) = 0** |
/etc/ld.so.cache
into memory for reading.**openat(AT_FDCWD, “/lib/x86_64-linux-gnu/libc.so.6”, O_RDONLY | O_CLOEXEC) = 3** |
**newfstatat(3, “”, {st_mode=S_IFREG | 0755, st_size=2220400, …}, AT_EMPTY_PATH) = 0** |
**mmap(NULL, 2264656, PROT_READ, MAP_PRIVATE | MAP_DENYWRITE, 3, 0) = 0x7d674dc00000** |
**mmap(NULL, 12288, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0) = 0x7d674df47000** |
**mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0) = 0x7d674d200000** |
strace
output, we see the exit code here before the process ends.Here the calls to mmap()
and mprotect()
helps doing the set up of the new thread’s stack:
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0) = 0x7f8978e00000
mprotect(0x7f8978e01000, 8388608, PROT_READ|PROT_WRITE) = 0
Let’s try to understand it more:
mmap(NULL, 8392704, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK, -1, 0)
NULL
indicates the kernel should choose an address.8392704
bytes (approximately 8MB + 4KB).PROT_NONE
means no permissions are set initially.MAP_PRIVATE
creates a copy-on-write mapping, changes do not affect the original file.MAP_ANONYMOUS
does not map any file, hence “anonymous”.MAP_STACK
(Linux-specific, for informational purposes or compatibility with other OS).0x7f8978e00000
.mprotect(0x7f8978e01000, 8388608, PROT_READ|PROT_WRITE)
0x7f8978e01000
, which is 4KB after the start of the mapped region (the guard page).8388608
bytes, or 8MB, leaving the first 4KB as a guard page.PROT_READ|PROT_WRITE
allows reading and writing in this region.0x7f8978e00000
to 0x7f8978e00fff
) remains with PROT_NONE
permissions.This setup demonstrates a common practice in memory management for stacks, balancing between performance (immediate stack usage) and security (protection against stack overflows).
rt_sigprocmask
Before and after creating a new thread, the rt_sigprocmask
system call is used to manage signal masks:
rt_sigprocmask(SIG_BLOCK, ~[], [], 8) = 0
SIG_BLOCK
~[]
syntax indicates blocking all signals (~
inverts the empty set).[]
), but all signals are blocked during the operation of clone3
.After the thread is created:
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
SIG_SETMASK
[]
(Empty set), which means no signals are blocked.clone3
The clone3
system call is used to create a new thread:
clone3({flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID, child_tid=0x7f8979600910, parent_tid=0x7f8979600910, exit_signal=0, stack=0x7f8978e00000, stack_size=0x7fff00, tls=0x7f8979600640} => {parent_tid=[74200]}, 88) = 74200
mmap
call you provided previously.0x7fff00
bytes.74200
, which is the thread ID of the newly created thread.This help ensures that the new thread is properly integrated into the parent’s environment, sharing necessary resources while having its own stack and thread-specific storage. The use of clone3
over older variants like clone
allows for more precise control over the creation of threads with modern flags and structures.
Let’s take below code as the example:
#include <stddef.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/wait.h>
#include <stdio.h>
int main(int argc, char **argv) {
pid_t pid;
if ((pid = fork()) > 0) {
waitpid(pid, NULL, 0);
return 0;
} else if (pid == 0) {
return 1;
} else {
perror("fork");
return -1;
}
}
Program Execution
execve("./process-creation", ["./process-creation"], 0x7ffc208c3b00 /* 75 vars */) = 0
Creation of Child Process
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7e349f3f4a10) = 76678
CLONE_CHILD_CLEARTID
and CLONE_CHILD_SETTID
set up thread ID handling, while SIGCHLD
ensures the parent gets notified upon child’s state changes.Parent Waiting for Child
wait4(76678, NULL, 0, NULL) = 76678
Signal Handling
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=76678, si_uid=1000, si_status=1, si_utime=0, si_stime=0} ---