Tinyhttpd源码解析

发表于 2019-05-28 更新于 2021-10-23 分类于源码解析

Tinyhttpd是一个使用C语言编写的超轻量级的http服务器软件，本文对 Tinyhttpd 源码进行了剖析，有助于理解 web 服务器的原理

Tinyhttpd是一个超轻量级的http服务器软件，使用C语言编写，代码仅有500行左右，实际生产环境中并不实用，但通过研究这短小精悍的500行代码，有助于我们了解web服务器的本质，弄清楚在访问一个网页的过程中，服务器都做了哪些工作。

源码下载＆编译

TinyHttpd的原版代码下载

https://sourceforge.net/projects/tinyhttpd/

下载完成，解压后会得到这样一些文件：

目录结构

主要需要看的是 httpd.c 里边的源码。
在这之前，先make把代码编译一下，看一下执行后的效果。

编译的时候会报错，主要是一些线程函数和 Makefile 里的连接库错误，下面给出我的解决方案

把 Makefile 文件中的编译选项中的 -lsocket 去掉。
436行namelen和491行client_name_len的变量类型改为 socklen_t

accept_request用作线程的处理函数，函数原型需做出改变。

void *accept_request(void *);
void *accept_request(void * arg)
{
	// 此处为了尽可能少的修改原有代码，参数名改为arg
    int client = *(int*)arg;
    ...
    return NULL;
}

496行client_sock参数类型错误，改为&client_sock
（可选）改端口号，TinyHttpd 默认的是动态绑定端口号，为方便后面使用改为固定 8080
1
u_short port = 8080; // main函数，480行

（可选）改IP地址

1 2	// startup 函数 433行 name.sin_addr.s_addr = inet_addr("127.0.0.1");

至此，程序应该能正常编译通过，下面是示例程序的运行效果，

浏览器输入http://127.0.0.1:8080/index.html（ip和端口号要以自己的实际情况为准）
示例1

示例1

ps: 此处如果第二个页面显示不出来，主要是由于缺少或找不到执行 perl 脚本的程序导致，安装 perl 并将 htdocs/color.cgi 的第一行改为自己电脑上的 perl 所在位置即可。

Tinyhttpd 运行流程

main函数，调用 startup 创建监听套接字开始监听，循环接收客户端连接。
接收到一个客户端的连接后，创建一个新的线程，进入 accept_request 函数与客户端进行交互。
根据请求的类型，判断后续处理的方式，如果是 POST 请求或带参数的 GET 请求，则采取 CGI 的处理方式（execute_cgi 函数），否则，采取普通网页的处理方式（serve_file函数）。
数据传输完毕，关闭连接，线程退出。

int main(void)
{
    int server_sock = -1;
    u_short port = 8080;
    int client_sock = -1;
    struct sockaddr_in client_name;
    socklen_t client_name_len = sizeof(client_name);
    pthread_t newthread;

    server_sock = startup(&port);
    printf("httpd running on port %d\n", port);

    while (1)
    {
        client_sock = accept(server_sock,
        	(struct sockaddr *)&client_name,
            &client_name_len);
            
        if (client_sock == -1)
        error_die("accept");
        // 开启新线程
        if (pthread_create(&newthread , NULL, 
        	accept_request, &client_sock) != 0)
        	perror("pthread_create");
    }

    close(server_sock);

    return(0);
}

void* accept_request(void* arg)
{
    int client = *(int*)arg;
    char buf[1024];
    int numchars;
    char method[255];
    char url[255];
    char path[512];
    size_t i, j;
    struct stat st;
    int cgi = 0;	// 是否cgi程序
    char *query_string = NULL;

    numchars = get_line(client, buf, sizeof(buf));
    i = 0; j = 0;
	while (!ISspace(buf[j]) && (i < sizeof(method) - 1))
    {
        method[i] = buf[j];
        i++; j++;
    }
    method[i] = '\0';

    if (strcasecmp(method, "GET") && strcasecmp(method, "POST"))
    {
        unimplemented(client);
        return 0;
    }

    if (strcasecmp(method, "POST") == 0)
        cgi = 1;

    i = 0;
    while (ISspace(buf[j]) && (j < sizeof(buf)))
    	j++;
    while (!ISspace(buf[j]) && (i < sizeof(url) - 1) && (j < sizeof(buf)))
    {
        url[i] = buf[j];
        i++; j++;
    }
    url[i] = '\0';

    if (strcasecmp(method, "GET") == 0)
    {
        query_string = url;
        while ((*query_string != '?') && (*query_string != '\0'))
        	query_string++;
        if (*query_string == '?')
        {
            cgi = 1;
            *query_string = '\0';
            query_string++;
        }
    }

    sprintf(path, "htdocs%s", url);
    if (path[strlen(path) - 1] == '/')
    	strcat(path, "index.html");
    if (stat(path, &st) == -1)
    {
        while ((numchars > 0) && strcmp("\n", buf))
            numchars = get_line(client, buf, sizeof(buf));
        not_found(client);
    }
    else
    {
        if ((st.st_mode & S_IFMT) == S_IFDIR)
        	strcat(path, "/index.html");
        if ((st.st_mode & S_IXUSR) ||
        		(st.st_mode & S_IXGRP) ||
        		(st.st_mode & S_IXOTH)    )
        	cgi = 1;
        if (!cgi)
        	serve_file(client, path);
        else
        	execute_cgi(client, path, method, query_string);
    }

    close(client);
    return NULL;
}

对普通网页的处理

对于普通网页，TinyHttpd 会尝试打开该文件；
如果打开成功，会将响应头和文件内容一并发给客户端进行响应；
如果打开失败，会将网页找不到的错误发给客户端，这就是 404 not fount 出现的原因。

void serve_file(int client, const char *filename)
{
	FILE *resource = NULL;
    int numchars = 1;
	char buf[1024];

	buf[0] = 'A'; buf[1] = '\0';
	while ((numchars > 0) && strcmp("\n", buf))  
    	numchars = get_line(client, buf, sizeof(buf));
	
    resource = fopen(filename, "r");
	if (resource == NULL)
    	not_found(client);	// 404
    else
    {
    	headers(client, filename); // 响应头
    	cat(client, resource); // 网页主体
    }
    fclose(resource);
}

对 CGI程序的处理

对于可执行的CGI程序，TinyHttpd 会 fork 一个进程进行执行。
通过两个管道（cgi_output、cgi_input）进行父子进程通信；
子进程（CGI程序）将将标准输入输出重定向至两个管道，这样的效果是：CGI程序中的标准输入会从 cgi_input 管道读取，标准输出会输出到 cgi_output 管道，所以在编写CGI程序时，只需要通过相应编程语言的输入输出函数进行操作，而不需要额外的接口。
父进程会将 cgi_output 管道的内容发送给到客户端，cgi_input 管道的内容发送给子进程。

CGI示意图

void execute_cgi(int client, const char *path,
                 const char *method, const char *query_string)
{
    char buf[1024];
    int cgi_output[2];
    int cgi_input[2];
    pid_t pid;
    int status;
    int i;
    char c;
    int numchars = 1;
    int content_length = -1;

    buf[0] = 'A'; buf[1] = '\0';
    if (strcasecmp(method, "GET") == 0)
        while ((numchars > 0) && strcmp("\n", buf)) 
            numchars = get_line(client, buf, sizeof(buf));
    else    /* POST */
    {
        numchars = get_line(client, buf, sizeof(buf));
        while ((numchars > 0) && strcmp("\n", buf))
        {
            buf[15] = '\0';
            if (strcasecmp(buf, "Content-Length:") == 0)
            	content_length = atoi(&(buf[16]));
            numchars = get_line(client, buf, sizeof(buf));
        }
        if (content_length == -1)
        {
            bad_request(client);
            return;
        }
    }

    sprintf(buf, "HTTP/1.0 200 OK\r\n");
    send(client, buf, strlen(buf), 0);

    if (pipe(cgi_output) < 0)
    {
        cannot_execute(client);
        return;
    }
    if (pipe(cgi_input) < 0) 
    {
        cannot_execute(client);
        return;
    }

    if ( (pid = fork()) < 0 ) 
    {
        cannot_execute(client);
        return;
    }
    if (pid == 0)  /* child: CGI script */
    {
        char meth_env[255];
        char query_env[255];
        char length_env[255];

        dup2(cgi_output[1], 1);	// 输出重定向
        dup2(cgi_input[0], 0);	// 输入重定向
        close(cgi_output[0]);
        close(cgi_input[1]);
        sprintf(meth_env, "REQUEST_METHOD=%s", method);
        putenv(meth_env);
        if (strcasecmp(method, "GET") == 0)
        {	//GET方式，数据以字符串的形式存入QUERY_STRING环境变量
            sprintf(query_env, "QUERY_STRING=%s", query_string);
            putenv(query_env);
        }
        else
        {   //POST方式，数据的长度以字符串的形式存入CONTENT_LENGTH环境变量
        	// 数据的内容通过 cgi_input 管道发送给CGI程序
            sprintf(length_env, "CONTENT_LENGTH=%d", content_length);
            putenv(length_env);
        }
        execl(path, path, NULL);
        exit(0);
    } 
    else 
    {    /* parent */
        close(cgi_output[1]);
        close(cgi_input[0]);
        if (strcasecmp(method, "POST") == 0)
        for (i = 0; i < content_length; i++)
        {
            recv(client, &c, 1, 0);
            write(cgi_input[1], &c, 1);
        }
        while (read(cgi_output[0], &c, 1) > 0)
        	send(client, &c, 1, 0);

        close(cgi_output[0]);
        close(cgi_input[1]);
        waitpid(pid, &status, 0);
    }
}

源码下载＆编译

Tinyhttpd 运行流程

对 普通网页 的处理

对 CGI程序 的处理

对普通网页的处理

对 CGI程序的处理