linux内核是如何启动用户空间程序（shell篇）

首先在解释这个问题之前，分享一点基础知识：linux内核是一个操作系统内核，操作系统本身也是程序。用户空间程序也是一个程序。《linux内核是如何启动用户空间程序》这个标题似乎有点误导之意，但似乎无伤大雅。只要我们明白程序是被CPU识别执行的即可，哈哈。

小生在前面有两篇文章：

《linux内核如何启动用户空间进程【01】》

《linux内核如何启动用户空间进程【2】》

同样描述了linux内核是如何启动用户空间进程的，文章内容是基于linux内核start_kernel()函数启动过程的后期操作。但文本会从shell的角度出发，另有一番风味嘞。

站在用户角度来看，有多种方式启动应用程序。例如：可以从shell运行程序或双击应用程序图标来运行程序。不管以哪种方式启动应用程序，linux内核都会去处理这个应用程序的启动过程。

在本文中，将考虑从shell启动应用程序的方式。从shell启动一个应用程序的标准方法如下：启动一个终端模拟器应用程序，键入应用程序名称，并传递（或不传递）参数给这个应用程序，例如：

./demo

接下来，让我们思考一下从shell启动一个应用程序时linux内核发生了什么？当我们写程序名时shell做了什么？linux内核又做了什么？

我的默认shell是bash，所以本文将考虑bash shell如何启动程序。bash shell以及我们用C语言编写的程序都是从main()函数开始执行具体操作。如果查看bash shell的源代码，我们将在shell.c源代码文件中找到main()函数，代码如下（代码比较长，有删减）：

#if defined (NO_MAIN_ENV_ARG)
/* systems without third argument to main() */
int
main (argc, argv)
     int argc;
     char **argv;
#else /* !NO_MAIN_ENV_ARG */
int
main (argc, argv, env)
     int argc;
     char **argv, **env;
#endif /* !NO_MAIN_ENV_ARG */
{
  register int i;
  int code, old_errexit_flag;
#if defined (RESTRICTED_SHELL)
  int saverst;
#endif
  volatile int locally_skip_execution;
  volatile int arg_index, top_level_arg_index;
#ifdef __OPENNT
  char **env;

  env = environ;
#endif /* __OPENNT */

  USE_VAR(argc);
  USE_VAR(argv);
  USE_VAR(env);
  USE_VAR(code);
  USE_VAR(old_errexit_flag);
#if defined (RESTRICTED_SHELL)
  USE_VAR(saverst);
#endif

  /* 省略大量代码 */
  /* 省略大量代码 */
  /* 省略大量代码 */
  
  /* Do the things that should be done only for interactive shells. */
  if (interactive_shell)
  {
      /* Set up for checking for presence of mail. */
      reset_mail_timer ();
      init_mail_dates ();

#if defined (HISTORY)
      /* Initialize the interactive history stuff. */
      bash_initialize_history ();
      /* Don't load the history from the history file if we've already
  saved some lines in this session (e.g., by putting `history -s xx'
  into one of the startup files). */
      if (shell_initialized == 0 && history_lines_this_session == 0)
 load_history ();
#endif /* HISTORY */

      /* Initialize terminal state for interactive shells after the
  .bash_profile and .bashrc are interpreted. */
      get_tty_state ();
  }

#if !defined (ONESHOT)
   read_and_execute:
#endif /* !ONESHOT */

  shell_initialized = 1;

  if (pretty_print_mode && interactive_shell)
  {
    internal_warning (_("pretty-printing mode ignored in interactive shells"));
    pretty_print_mode = 0;
  }
  if (pretty_print_mode)
    exit_shell (pretty_print_loop ());

  /* Read commands until exit condition. */
  reader_loop ();
  exit_shell (last_command_exit_value);
}

在bash的主线程循环开始工作之前，这个函数做了许多事情：

（1）检查和尝试去打开/dev/tty。

（2）检查在调试模式下运行的shell。

（3）解析命令行传递过来的参数。

（4）读取shell环境变量。

（5）加载.bashrc、.profile和其他的配置文件。

在以上操作后，将可以看到reader_loop()函数的调用。这个函数定义在（eval.c）源代码文件中，它代表主线程循环，总而言之，这个函数将读取并执行命令，如下代码：

int reader_loop ()
{
    int our_indirection_level;
    COMMAND * volatile current_command;

    USE_VAR(current_command);

    current_command = (COMMAND *)NULL;

    our_indirection_level = ++indirection_level;

    if (just_one_command)
      reset_readahead_token ();

    while (EOF_Reached == 0)
    {
        int code;

        code = setjmp_nosigs (top_level);

  #if defined (PROCESS_SUBSTITUTION)
        unlink_fifo_list ();
  #endif /* PROCESS_SUBSTITUTION */

        /* XXX - why do we set this every time through the loop?  And why do
     it if SIGINT is trapped in an interactive shell? */
        if (interactive_shell && signal_is_ignored (SIGINT) == 0 && signal_is_trapped (SIGINT) == 0)
    set_signal_handler (SIGINT, sigint_sighandler);

        if (code != NOT_JUMPED)
    {
      indirection_level = our_indirection_level;

      switch (code)
        {
          /* Some kind of throw to top_level has occurred. */
        case FORCE_EOF:
        case ERREXIT:
        case EXITPROG:
          current_command = (COMMAND *)NULL;
          if (exit_immediately_on_error)
      variable_context = 0; /* not in a function */
          EOF_Reached = EOF;
          goto exec_done;

        case DISCARD:
          /* Make sure the exit status is reset to a non-zero value, but
       leave existing non-zero values (e.g., > 128 on signal)
       alone. */
          if (last_command_exit_value == 0)
      set_exit_status (EXECUTION_FAILURE);
          if (subshell_environment)
      {
        current_command = (COMMAND *)NULL;
        EOF_Reached = EOF;
        goto exec_done;
      }
          /* Obstack free command elements, etc. */
          if (current_command)
      {
        dispose_command (current_command);
        current_command = (COMMAND *)NULL;
      }

          restore_sigmask ();
          break;

        default:
          command_error ("reader_loop", CMDERR_BADJUMP, code, 0);
        }
    }

        executing = 0;
        if (temporary_env)
    dispose_used_env_vars ();

  #if (defined (ultrix) && defined (mips)) || defined (C_ALLOCA)
        /* Attempt to reclaim memory allocated with alloca (). */
        (void) alloca (0);
  #endif

        if (read_command () == 0)
        {
          if (interactive_shell == 0 && read_but_dont_execute)
          {
            set_exit_status (EXECUTION_SUCCESS);
            dispose_command (global_command);
            global_command = (COMMAND *)NULL;
          }
          else if (current_command = global_command)
          {
            global_command = (COMMAND *)NULL;
            if (interactive && ps0_prompt)
            {
              char *ps0_string;

              ps0_string = decode_prompt_string (ps0_prompt);
              if (ps0_string && *ps0_string)
                {
                  fprintf (stderr, "%s", ps0_string);
                  fflush (stderr);
                }
              free (ps0_string);
            }

          current_command_number++;

          executing = 1;
          stdin_redir = 0;

          execute_command (current_command);

exec_done:
          QUIT;

          if (current_command)
          {
            dispose_command (current_command);
            current_command = (COMMAND *)NULL;
          }
        }
      }
      else
      {
        /* Parse error, maybe discard rest of stream if not interactive. */
        if (interactive == 0)
          EOF_Reached = EOF;
      }
      if (just_one_command)
        EOF_Reached = EOF;
    }
    
    indirection_level--;
    return (last_command_exit_value);
  }

当reader_loop函数进行检查并读取给定的程序名称和参数时，它会从（execute_cmd.c）源代码文件调用execute_command()函数。execute_command函数的函数调用链如下：

execute_command
--> execute_command_internal
----> execute_simple_command
------> execute_disk_command
--------> shell_execve

在这个进程的最后，shell_execve()函数会调用execve()系统调用：

execve (command, args, env);

execve系统调用函数原型如下：

int execve(const char *filename, char *const argv [], char *const envp[]);

并按照给定的文件名、参数和环境变量执行程序。在此处，这个系统调用是第一次，也是唯一一次。

从上文分析可见，一个用户应用程序(文本是bash)的运行过程，到最后会进入linux的系统调用，下一步则是linux内核操作，这部分操作的入口则是execv()系统调用。execv系统调用本质则是调用do_execveat_common()函数（此后就详见《linux内核如何启动用户空间进程【2】》这篇文章了）

可见，又回到了do_execveat_common()。有意思！

655100linux内核是如何启动用户空间程序（shell篇）

linux内核是如何启动用户空间程序（shell篇）

文章评论