I'm working on an operating system programming assignment to understand pipes. M3.md The task involves using regular expressions to extract information from strace output and statistic system call execution times.
However, I'm encountering issues where the times are not being correctly calculated. I suspect the parse_strace_line function is failing to properly parse the strace output lines, especially the timestamp part (e.g., <0.000123>), leading to incorrect or missing time values in the statistics. The function might not be correctly handling the line format, edge cases like incomplete lines, or the regex pattern might not accurately capture the time component from the strace output.
My environment is Ubuntu running on WSL2.
Here is the code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/wait.h>
#include <regex.h>
#define MAX_SYSCALLS 1024 // Maximum distinct syscalls we can track
#define TOP_N 5 // Number of top results to display
/* Syscall tracking structure
* name: syscall name (e.g., "open", "read")
* time: accumulated time spent in this syscall (seconds)
*/
typedef struct {
char name[64];
double time;
} syscall_stat;
/* Main statistics container
* stats: array of individual syscall metrics
* count: number of unique syscalls tracked
* total_time: sum of all syscall times for percentage calculation
*/
typedef struct {
syscall_stat stats[MAX_SYSCALLS];
int count;
double total_time;
} syscall_stats;
/* Parse a line of strace output with timing information (-T option)
* line: input string from strace output
* syscall_name: output parameter for extracted syscall name
* time: output parameter for extracted duration
* Returns: 1 if successful, 0 if line doesn't contain timing info
*
* Strace line format examples:
* openat(AT_FDCWD, "/lib/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 <0.000014>
* read(0, "hello\n", 1024) = 6 <0.000010>
*/
int parse_strace_line(char* line, char* syscall_name, double* time) {
// Filter lines without timing information
if (strstr(line, " = ") == NULL || strstr(line, "<") == NULL) {
return 0;
}
// Regular expression pattern breakdown:
// ^(\w+) Capture group 1: syscall name at line start
// \(.*?\) Non-greedy match of arguments between parentheses
// \s+=\s+.*? Match return value portion
// <(\d+\.?\d*)> Capture group 2: time value between <>
regex_t regex;
regcomp(®ex, "^(\\w+)\\(.*?\\)\\s+=\\s+.*?<(\\d+\\.?\\d*)>$", REG_EXTENDED);
regmatch_t matches[3]; // Array for regex capture groups
if (regexec(®ex, line, 3, matches, 0) != 0) {
regfree(®ex);
return 0;
}
// Extract syscall name from first capture group
int name_len = matches[1].rm_eo - matches[1].rm_so;
strncpy(syscall_name, line + matches[1].rm_so, name_len);
syscall_name[name_len] = '\0';
// Convert time string to double
char* time_str = line + matches[2].rm_so;
*time = atof(time_str);
regfree(®ex);
return 1;
}
/* Update statistics with new syscall data
* stats: pointer to statistics container
* name: syscall name to add/update
* time: duration to accumulate
*
* If the syscall already exists, we sum the time.
* If new, we add it to the list (until MAX_SYSCALLS).
*/
void add_syscall(syscall_stats *stats, const char *name, double time) {
// Safety check against array overflow
if (stats->count >= MAX_SYSCALLS) {
fprintf(stderr, "Warning: Reached maximum tracked syscalls (%d)\n", MAX_SYSCALLS);
return;
}
// Check existing entries for duplicate syscall
for (int i = 0; i < stats->count; i++) {
if (strcmp(stats->stats[i].name, name) == 0) {
stats->stats[i].time += time;
stats->total_time += time;
return;
}
}
// Add new entry if space available
if (stats->count < MAX_SYSCALLS) {
strncpy(stats->stats[stats->count].name, name, sizeof(stats->stats[0].name));
stats->stats[stats->count].time = time;
stats->total_time += time;
stats->count++;
}
}
/* Display top N syscalls by accumulated time
* stats: statistics container with all data
* n: number of top results to show
*
* Uses bubble sort for simplicity with small datasets.
* Prints percentages relative to total tracked time.
* Flushes output with nulls for clean pipe termination.
*/
void print_top_syscalls(syscall_stats *stats, int n) {
if (stats->count == 0 || stats->total_time == 0) {
fprintf(stderr, "Error: No valid syscall data to display\n");
return;
}
// Simple O(n²) sort acceptable for MAX_SYSCALLS=1024
for (int i = 0; i < stats->count - 1; i++) {
for (int j = i+1; j < stats->count; j++) {
// Swap if later entry has higher time
if (stats->stats[j].time > stats->stats[i].time) {
syscall_stat temp = stats->stats[i];
stats->stats[i] = stats->stats[j];
stats->stats[j] = temp;
}
}
}
// Determine number of results to display
n = stats->count < n ? stats->count : n;
for (int i = 0; i < n; i++) {
double ratio = (stats->stats[i].time / stats->total_time) * 100;
printf("%s (%d%%)\n", stats->stats[i].name, (int)ratio);
}
/* Null-terminate output for clean pipe communication
* Some programs may read fixed buffer sizes - this ensures
* we flush the pipe completely and avoid hanging readers */
for (int i = 0; i < 80; i++) putchar('\0');
fflush(stdout);
}
/* Main execution flow:
* 1. Validate command line arguments
* 2. Set up parent/child process communication
* 3. Child: execute strace with command
* 4. Parent: process and analyze strace output
*/
int main(int argc, char *argv[]) {
if (argc < 2) {
fprintf(stderr, "Usage: %s COMMAND [ARGS...]\n", argv[0]);
fprintf(stderr, "Example: %s ls -l\n", argv[0]);
return 1;
}
// Create pipe for inter-process communication
int pipefd[2];
if (pipe(pipefd) == -1) {
perror("Failed to create pipe");
return 1;
}
pid_t pid = fork();
if (pid == -1) {
perror("Process fork failed");
return 1;
}
if (pid == 0) { // Child process - execute strace
// Redirect stdout to pipe write end
close(pipefd[0]);
dup2(pipefd[1], STDOUT_FILENO);
close(pipefd[1]);
// Build strace argument array:
// strace -T [user command] [args...]
char* strace_argv[argc + 2];
strace_argv[0] = "/usr/bin/strace";
strace_argv[1] = "-T"; // Enable timing reports
for (int i = 1; i < argc; i++) {
strace_argv[i+1] = argv[i];
}
strace_argv[argc+1] = NULL; // NULL-terminate array
// Execute strace with clean environment
execve("/usr/bin/strace", strace_argv, (char *[]){ "PATH=/bin:/usr/bin", NULL });
perror("Failed to execute strace");
return 1;
} else { // Parent process - analyze output
close(pipefd[1]); // Close unused write end
syscall_stats stats = {0}; // Initialize all fields to zero
char buffer[1024];
char* line_start = buffer;
ssize_t bytes_read;
// Read output in chunks until EOF
while ((bytes_read = read(pipefd[0],
line_start,
sizeof(buffer) - (line_start - buffer))) > 0) {
// Null-terminate the received data
buffer[bytes_read + (line_start - buffer)] = '\0';
line_start = buffer;
// Process complete lines (delimited by newlines)
char* line_end;
while ((line_end = strchr(line_start, '\n')) != NULL) {
*line_end = '\0'; // Temporarily terminate at newline
char syscall_name[64];
double time;
if (parse_strace_line(line_start, syscall_name, &time)) {
add_syscall(&stats, syscall_name, time);
}
line_start = line_end + 1; // Move to next line
}
// Handle partial line remaining in buffer
if (line_start != buffer) {
size_t remaining = buffer + sizeof(buffer) - line_start;
// Move partial line to buffer start for next read
memmove(buffer, line_start, remaining);
line_start = buffer + remaining;
}
}
// Cleanup and display results
close(pipefd[0]);
wait(NULL); // Wait for strace to terminate completely
print_top_syscalls(&stats, TOP_N);
}
return 0;
}
This is the command line to test:
root@DimLights:/mnt/f/WSL/Data/OperatingSystems/2025/os2025/sperf# gcc -g -o sperf sperf.c -lm -lrt
root@DimLights:/mnt/f/WSL/Data/OperatingSystems/2025/os2025/sperf# ./sperf echo "Hello WSL"
execve("/bin/echo", ["echo", "Hello WSL"], 0x7ffe6258ee20 /* 1 var */) = 0 <0.000363>
brk(NULL) = 0x55d4dee1c000 <0.000104>
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f483b684000 <0.000137>
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) <0.000115>
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 <0.000133>
fstat(3, {st_mode=S_IFREG|0644, st_size=36191, ...}) = 0 <0.000115>
mmap(NULL, 36191, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f483b67b000 <0.000097>
close(3) = 0 <0.000079>
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 <0.000108>
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220\243\2\0\0\0\0\0"..., 832) = 832 <0.000094>
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784 <0.000101>
fstat(3, {st_mode=S_IFREG|0755, st_size=2125328, ...}) = 0 <0.000096>
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784 <0.000088>
mmap(NULL, 2170256, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f483b469000 <0.000107>
mmap(0x7f483b491000, 1605632, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x28000) = 0x7f483b491000 <0.000117>
mmap(0x7f483b619000, 323584, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b0000) = 0x7f483b619000 <0.000099>
mmap(0x7f483b668000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1fe000) = 0x7f483b668000 <0.000102>
mmap(0x7f483b66e000, 52624, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f483b66e000 <0.000103>
close(3) = 0 <0.000104>
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f483b466000 <0.000085>
arch_prctl(ARCH_SET_FS, 0x7f483b466740) = 0 <0.000049>
set_tid_address(0x7f483b466a10) = 1543 <0.000319>
set_robust_list(0x7f483b466a20, 24) = 0 <0.000098>
rseq(0x7f483b467060, 0x20, 0, 0x53053053) = 0 <0.000123>
mprotect(0x7f483b668000, 16384, PROT_READ) = 0 <0.000096>
mprotect(0x55d4de99a000, 4096, PROT_READ) = 0 <0.000093>
mprotect(0x7f483b6bc000, 8192, PROT_READ) = 0 <0.000103>
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0 <0.000095>
munmap(0x7f483b67b000, 36191) = 0 <0.000118>
getrandom("\xc0\x8d\xa3\xbc\x88\x59\xbb\xd3", 8, GRND_NONBLOCK) = 8 <0.000098>
brk(NULL) = 0x55d4dee1c000 <0.000113>
brk(0x55d4dee3d000) = 0x55d4dee3d000 <0.000097>
fstat(1, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0 <0.000092>
write(1, "Hello WSL\n", 10Received from pipe: Hello WSL
) = 10 <0.000216>
close(1) = 0 <0.000165>
close(2) = 0 <0.000092>
exit_group(0) = ?
+++ exited with 0 +++
Total count is zero
Here's what I tried and what I expected:
I used GDB debugging and noticed that the
while ((line = strchr(line_start, '\n')) != NULL)
loop only ran once when testing the command. I can't figure out why the output shows:
+++ exited with 0 +++
Total count is zero
I've also asked AI for help and modified parts of the parse_strace_line function and the regular expression pattern, but with little success.
What actually happened: The statistics remain empty (Total count is zero), indicating the parsing or data extraction failed.
What I expected: The code should correctly parse system call timestamps from strace output, accumulate their times, and calculate the ratio of each call's time to the total.
As a student, I'd greatly appreciate it if you can point out the errors in my code and explain how to correctly read system call times and convert them into the ratio.
\wor\sregcomp()andregexec()errors and report them, for starters.straceoutput? With-T, the exec time comes at the end. I might just usestrrchrto find the space before the time. Otherwise, your regex has to handle all syscall line formats. I've never used-Tbefore. I've always used-tttand taken the difference between lines. Not quite the same, but easier to parse because the time appears first. With just-T, time is last token and first token issyscall(...). So, easier without regex. So, again, which is more important: parsingstraceor using learning regex?