c - forkall 没有按预期工作

标签 c pthreads fork

我正在尝试编写一个检查点多线程应用程序的代码。由于 fork 函数不适用于此类应用程序,我正在使用具有 forkall 函数的 solaris 来实现这一点。

以下是包含函数 checkpointrestart_from_checkpoint 的代码及其示例用法。为了安全起见,我只在两个障碍之间调用这些函数。

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
#include <semaphore.h>
#include <stdint.h>
#include <pthread.h>

#define NOFTHREADS 4

pid_t checkpoint();
void restart_from_checkpoint( pid_t pid );

sem_t sem;
pthread_barrier_t barrier;

void sig_handler(int signum)
{
    printf( ">> sem_post!\n" );
    sem_post( &sem );
}

pid_t child_pid;
pid_t par_pid;

void *threadFunc( void *pParam )
{
    unsigned int tid = *((unsigned int*)(pParam));
    int i;

    for( i = 0; i < 20; i++ )
    {
        if ( !(i % 2) )
        {
            pthread_barrier_wait(&barrier);
            if ( tid == 0 && i == 6 )
            {
                child_pid = checkpoint();
            }

            if ( tid == 0 )
             printf( "p%d: >> i = %d\n", getpid(), i ); 

            if ( tid == 0 && i == 12 && ( getpid() == par_pid ) )
            {
                restart_from_checkpoint( child_pid );
            }
            pthread_barrier_wait(&barrier);
        }
        printf( "p%d: t%d: i%d\n", getpid(), tid, i );
    }
}

int main( int argc, char *argv[] )
{
  int i;
  pthread_t hThread[NOFTHREADS];
  int index[NOFTHREADS];

  signal(SIGUSR1, sig_handler);
  pthread_barrier_init (&barrier, NULL, NOFTHREADS);

  par_pid = getpid();

  for( i = 0; i < NOFTHREADS; i++ )
  {
      index[i] = i;
      pthread_create( &hThread[i], NULL, threadFunc, &index[i] ); 
  }
  for( i = 0; i < NOFTHREADS; i++ )
      pthread_join( hThread[i], NULL );

  return 0;
}

pid_t checkpoint()
{
    pid_t pid;
    int wait_val;

    sem_init( &sem, 0, 0 );

    switch (pid=forkall()) 
    {
    case -1: 
        perror("fork"); 
        break;
    case 0:         // child process starts
        sem_wait( &sem );
        printf( ">> passed sem_wait!\n" );
        break;  // child process ends
    default:        // parent process starts
        return pid;
    }
}

void restart_from_checkpoint( pid_t pid )
{
    printf( ">> restart_from_checkpoint!\n" );
    kill( pid, SIGUSR1 );
    printf( ">> exiting!\n" );
    exit( 0 );
    printf( ">> should not had been printed!\n" );
}

下面是打印在屏幕上的输出...

p1159: >> i = 0
p1159: t0: i0
p1159: t0: i1
p1159: t1: i0
p1159: t1: i1
p1159: t2: i0
p1159: t2: i1
p1159: t3: i0
p1159: t3: i1
p1159: >> i = 2
p1159: t2: i2
p1159: t2: i3
p1159: t0: i2
p1159: t0: i3
p1159: t3: i2
p1159: t3: i3
p1159: t1: i2
p1159: t1: i3
p1159: >> i = 4
p1159: t0: i4
p1159: t0: i5
p1159: t2: i4
p1159: t2: i5
p1159: t1: i4
p1159: t1: i5
p1159: t3: i4
p1159: t3: i5
p1159: >> i = 6
p1159: t2: i6
p1159: t2: i7
p1159: t0: i6
p1159: t0: i7
p1159: t3: i6
p1159: t3: i7
p1159: t1: i6
p1159: t1: i7
p1159: >> i = 8
p1159: t1: i8
p1159: t1: i9
p1159: t2: i8
p1159: t2: i9
p1159: t3: i8
p1159: t3: i9
p1159: t0: i8
p1159: t0: i9
p1159: >> i = 10
p1159: t1: i10
p1159: t1: i11
p1159: t2: i10
p1159: t2: i11
p1159: t3: i10
p1159: t3: i11
p1159: t0: i10
p1159: t0: i11
p1159: >> i = 12
>> restart_from_checkpoint!
>> exiting!
p1159: >> i = 0
p1159: t0: i0
p1159: t0: i1
p1159: t1: i0
p1159: t1: i1
p1159: t2: i0
p1159: t2: i1
p1159: t3: i0
p1159: t3: i1
p1159: >> i = 2
p1159: t2: i2
p1159: t2: i3
p1159: t0: i2
p1159: t0: i3
p1159: t3: i2
p1159: t3: i3
p1159: t1: i2
p1159: t1: i3
p1159: >> i = 4
p1159: t0: i4
p1159: t0: i5
p1159: t2: i4
p1159: t2: i5
p1159: t1: i4
p1159: t1: i5
p1159: t3: i4
p1159: t3: i5
>> sem_post!
>> passed sem_wait!
p1160: >> i = 6
p1160: t0: i6
p1160: t0: i7
p1160: t2: i6
p1160: t2: i7
p1160: t3: i6
p1160: t3: i7
p1160: t1: i6
p1160: t1: i7
p1160: >> i = 8
p1160: t3: i8
p1160: t3: i9
p1160: t2: i8
p1160: t2: i9
p1160: t1: i8
p1160: t1: i9
p1160: t0: i8
p1160: t0: i9
p1160: >> i = 10
p1160: t3: i10
p1160: t3: i11
p1160: t1: i10
p1160: t1: i11
p1160: t0: i10
p1160: t0: i11
p1160: t2: i10
p1160: t2: i11
p1160: >> i = 12
p1160: t3: i12
p1160: t3: i13
p1160: t0: i12
p1160: t0: i13
p1160: t1: i12
p1160: t1: i13
p1160: t2: i12
p1160: t2: i13
p1160: >> i = 14
p1160: t1: i14
p1160: t1: i15
p1160: t2: i14
p1160: t2: i15
p1160: t0: i14
p1160: t0: i15
p1160: t3: i14
p1160: t3: i15
p1160: >> i = 16
p1160: t0: i16
p1160: t0: i17
p1160: t3: i16
p1160: t3: i17
p1160: t1: i16
p1160: t1: i17
p1160: t2: i16
p1160: t2: i17
p1160: >> i = 18
p1160: t1: i18
p1160: t1: i19
p1160: t2: i18
p1160: t2: i19
p1160: t0: i18
p1160: t0: i19
p1160: t3: i18
p1160: t3: i19

请注意,父进程 ID 为 1159,而子进程 ID 为 1160。现在我的问题是,为什么退出父进程后重新执行直到 i == 6(调用检查点的点),请参见 之间的输出>> exiting!>> sem_post!。不应该马上退出吗?我在这里做错了什么?

最佳答案

printf 函数不会立即(以同步方式)将文本输出到屏幕或文件。它确实将要打印的文本存储到 libc 缓冲区中,并且缓冲区会在某个时间(在 '\n' 字符处或如果有大量数据时)被刷新(写入屏幕或文件)。

fork 之后,所有缓冲区都从父项复制到子项。缓冲区中有一些文本,两个进程都将刷新缓冲区。

您应该考虑在 fork() 之前添加一个 fflush() 或通过 setvbuf 设置不同的缓冲区规则,例如禁用缓冲。

关于c - forkall 没有按预期工作,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/6712090/

相关文章:

c - 调用 pthread_exit 时如何从线程例程返回 void *

c - POSIX C 中 fork() 的重量更轻的替代品?

子进程因错误而终止

c++ - Clang 文档页面中的代码片段

c - 覆盖C中文件的内容

c++ - 客户端断开连接时 TCP 套接字选择服务器段错误

c - 3只猴子,打印值必须与信号量同步

c - execvp 和 fork 未按预期工作

objective-c - Mac环境下objective-c如何使用USB/HID接口(interface)

shell - 使用 SIGTSTP 挂起子进程后,shell 没有响应