实现一个简单的 Web 服务器
HTTP 服务器 sim
方便起见, 将服务器称为 sim, 可以为本地目录的静态文件提供服务.
支持多个同时的连接( 由不同的客户端同时发起).
解析每个收到的 HTTP 请求, 并作出合适的 HTTP 回应.
支持 GET 类请求, 只需查看请求的第一行, 请求行.
如果可以, 回应中应包含请求的文件.
产品级的服务器要考虑多层次的安全问题, 对于文件存取, 资源定位, 要绝对小心.
服务器 sim 的架构
服务器 sim 为每个已连接的客户建立一个独立的结构.
这些独立的结构组成一个链表.
结构中存储的信息包括每个客户端的地址, 套接字, 收到的数据等.
编写了多个辅助函数用于维护这个链表.
如, 添加新客户端, 删除客户端, 等待客户数据, 通过套接字查找客户, 传送客户请求的文件, 发送错误消息.
服务器 sim 在主循环中等待新连接或新数据.
当收到新数据, 检查数据是否构成一个完整的 HTTP 请求.
如果收到了完整的请求, 服务器 sim 尝试发送请求的资源.
如果请求格式不正确或找不到请求的资源, 服务器 sim 向客户发送错误信息.
复杂的地方在于 多连接处理, HTTP 请求的解析过程, 错误处理.
服务器 sim 还要告诉客户其发送的资源的类型(media type).
内容类型
服务器 sim 应该在头部 Content-Type 中表明其所发送内容的正确类型(MIME type).
通过扩展名确定文件类型, 对于未知类型, 使用默认值 application/octet-stream.
扩展名 | 类型 |
---|---|
.css | text/css |
.csv | text/csv |
.gif | image/gif |
.htm | text/html |
.html | text/html |
.ico | image/x-icon |
.jpeg | image/jpeg |
.jpg | image/jpeg |
.js | application/javascript |
.json | application/json |
.png | image/png |
application/pdf | |
.svg | image/svg+xml |
.txt | text/plain |
未知类型, 默认值 | application/octet-stream |
代码 sim.http.c
三个文件 index.html, pet-small.png, test.hands-on.html, 存到 public 目录下, 用于测试.
- /*
- sim.http.c
- **$ ls
- network.h public sim.http.c
- **$ ls public
- index.html pet-small.png test.hands-on.html
- **$ gcc sim.http.c -o sim
- **$ ./sim
- ...
- 浏览器地址栏输入
- http://127.0.0.1:8080
- http://127.0.0.1:8080/test.hands-on.html
- */
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <stdbool.h>
- #include "network.h"
- #define MAX_LISTEN 16
- #define MAX_REQUEST_SIZE 2047
- #define DATA_BUFF_SIZE 1024
- #define ADDR_BUFF_SIZE 100
- int create_socket(const char * host, const char * port);
- const char * get_content_type(const char * path);
- struct client_info {
- socklen_t addr_length;
- struct sockaddr_storage addr;
- int socket;
- char request[MAX_REQUEST_SIZE + 1];
- int bytes_recv;
- struct client_info * next;
- };
- struct client_info * get_client(struct client_info ** list_clients, int socket);
- void drop_client(struct client_info ** list_clients, struct client_info * ptr_client);
- void get_client_addr(struct client_info * ptr_client, char * addr, int addr_size);
- fd_set wait_on_clients(struct client_info * list_clients, int server);
- void send_400(struct client_info ** list_clients, struct client_info * ptr_client);
- void send_404(struct client_info ** list_clients, struct client_info * ptr_client);
- void serve_resource(
- struct client_info ** list_clients,
- struct client_info * ptr_client, const char * path
- );
- int main(int argc, char ** argv) {
- struct client_info * list_clients = NULL;
- int server = create_socket(NULL, "8080");
- while (true) {
- fd_set reads = wait_on_clients(list_clients, server);
- if (FD_ISSET(server, &reads)) {
- struct client_info * client = get_client(&list_clients, -1);
- client->socket = accept(
- server,
- (struct sockaddr *) &(client->addr),
- &(client->addr_length)
- );
- if (client->socket < 0) {
- fprintf(stderr, "accept() failed, errno: %d, %s\n", errno, strerror(errno));
- return EXIT_FAILURE;
- }
- char buff_addr[ADDR_BUFF_SIZE];
- get_client_addr(client, buff_addr, ADDR_BUFF_SIZE);
- printf("New connection: %s ...\n", buff_addr);
- }
- struct client_info * client = list_clients;
- while (client) {
- struct client_info * next = client->next;
- if (FD_ISSET(client->socket, &reads)) {
- if (MAX_REQUEST_SIZE == client->bytes_recv) {
- send_400(&list_clients, client);
- continue;
- }
- int cnt_recv = recv(
- client->socket,
- client->request + client->bytes_recv,
- MAX_REQUEST_SIZE - client->bytes_recv,
- 0
- );
- if (cnt_recv < 1) {
- char buff_addr[ADDR_BUFF_SIZE];
- get_client_addr(client, buff_addr, ADDR_BUFF_SIZE);
- fprintf(stderr, "Unexpected disconnect: %s ...\n", buff_addr);
- drop_client(&list_clients, client);
- }
- else {
- client->bytes_recv += cnt_recv;
- client->request[client->bytes_recv] = '\0';
- char * end_request = strstr(client->request, "\r\n\r\n");
- if (end_request) {
- if (strncmp("GET /", client->request, 5)) {
- send_400(&list_clients, client);
- }
- else {
- char * path = client->request + 4;
- char * end_path = strstr(path, " ");
- if (end_path == NULL) {
- send_400(&list_clients, client);
- }
- else {
- *end_path = '\0';
- serve_resource(&list_clients, client, path);
- }
- }
- }
- }
- }
- client = next;
- }
- }
- printf("\nClosing socket ...\n");
- close(server);
- printf("Finished ...\n");
- return EXIT_SUCCESS;
- }
- int create_socket(const char * host, const char * port) {
- printf("Configuring local address ...\n");
- struct addrinfo hints;
- memset(&hints, 0, sizeof(hints));
- hints.ai_family = AF_INET;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = AI_PASSIVE;
- struct addrinfo * local_addr;
- if (getaddrinfo(host, port, &hints, &local_addr)) {
- fprintf(stderr, "getaddrinfo() failed, errno: %d, %s\n", errno, strerror(errno));
- exit(EXIT_FAILURE);
- }
- printf("Creating socket ...\n");
- int sock_listen = socket(
- local_addr->ai_family, local_addr->ai_socktype, local_addr->ai_protocol
- );
- if (sock_listen < 0) {
- fprintf(stderr, "socket() failed, errno: %d, %s\n", errno, strerror(errno));
- exit(EXIT_FAILURE);
- }
- printf("Binding socket to local address ...\n");
- if (bind(sock_listen, local_addr->ai_addr, local_addr->ai_addrlen)) {
- fprintf(stderr, "bind() failed, errno: %d, %s\n", errno, strerror(errno));
- exit(EXIT_FAILURE);
- }
- freeaddrinfo(local_addr);
- printf("Listening ...\n");
- if (listen(sock_listen, MAX_LISTEN)) {
- fprintf(stderr, "listen failed, errno: %d, %s\n", errno, strerror(errno));
- exit(EXIT_FAILURE);
- }
- return sock_listen;
- }
- const char * get_content_type(const char * path) {
- const char * last_dot = strrchr(path, '.');
- if (last_dot) {
- if (strcmp(last_dot, ".css") == 0) return "text/css";
- if (strcmp(last_dot, ".csv") == 0) return "text/csv";
- if (strcmp(last_dot, ".gif") == 0) return "image/gif";
- if (strcmp(last_dot, ".htm") == 0) return "text/html";
- if (strcmp(last_dot, ".html") == 0) return "text/html";
- if (strcmp(last_dot, ".ico") == 0) return "text/image/x-icon";
- if (strcmp(last_dot, ".jpeg") == 0) return "text/jpeg";
- if (strcmp(last_dot, ".jpg") == 0) return "text/jpeg";
- if (strcmp(last_dot, ".js") == 0) return "application/javascript";
- if (strcmp(last_dot, ".json") == 0) return "application/json";
- if (strcmp(last_dot, ".png") == 0) return "image/png";
- if (strcmp(last_dot, ".pdf") == 0) return "application/pdf";
- if (strcmp(last_dot, ".svg") == 0) return "image/svg+xml";
- if (strcmp(last_dot, ".txt") == 0) return "text/plain";
- }
- return "application/octet-stream";
- }
- struct client_info * get_client(
- struct client_info ** list_clients, int socket
- ) {
- struct client_info * p = *list_clients;
- while (p) {
- if (p->socket == socket)
- break;
- p = p->next;
- }
- if (p) return p;
- p = calloc(1, sizeof(struct client_info));
- if (p == NULL) {
- fprintf(stderr, "out of memory ...\n");
- exit(EXIT_FAILURE);
- }
- p->addr_length = sizeof(p->addr);
- p->next = *list_clients;
- *list_clients = p;
- return p;
- }
- void drop_client(
- struct client_info ** list_clients, struct client_info * ptr_client
- ) {
- close(ptr_client->socket);
- struct client_info ** p = list_clients;
- while (*p) {
- if (*p == ptr_client) {
- *p = ptr_client->next;
- free(ptr_client);
- return;
- }
- p = &((*p)->next);
- }
- fprintf(stderr, "client not found ...\n");
- exit(EXIT_FAILURE);
- }
- void get_client_addr(struct client_info * ptr_client, char * addr, int addr_size) {
- char buff_addr[ADDR_BUFF_SIZE];
- getnameinfo(
- (struct sockaddr *) &(ptr_client->addr), ptr_client->addr_length,
- buff_addr, sizeof(buff_addr),
- NULL, 0,
- NI_NUMERICHOST
- );
- if (strlen(buff_addr) >= addr_size) {
- fprintf(stderr, "addr_size too small ...\n");
- exit(EXIT_FAILURE);
- }
- sprintf(addr, "%s", buff_addr);
- }
- fd_set wait_on_clients(struct client_info * list_clients, int server) {
- fd_set reads;
- FD_ZERO(&reads);
- FD_SET(server, &reads);
- int max_socket = server;
- struct client_info * p = list_clients;
- while (p) {
- FD_SET(p->socket, &reads);
- if (p->socket > max_socket)
- max_socket = p->socket;
- p = p->next;
- }
- if (select(max_socket+1, &reads, NULL, NULL, NULL) < 0) {
- fprintf(stderr, "select() failed, errno: %d, %s\n", errno, strerror(errno));
- exit(EXIT_FAILURE);
- }
- return reads;
- }
- void send_400(
- struct client_info ** list_clients, struct client_info * ptr_client
- ) {
- const char * res400 = "HTTP/1.1 400 Bad Request\r\n"
- "Connection: close\r\n"
- "Content-Length: 11\r\n\r\nBad Request";
- send(ptr_client->socket, res400, strlen(res400), 0);
- drop_client(list_clients, ptr_client);
- }
- void send_404(
- struct client_info ** list_clients, struct client_info * ptr_client
- ) {
- const char * res404 = "HTTP/1.1 404 Not Found\r\n"
- "Connection: close\r\n"
- "Content-Length: 9\r\n\r\nNot Found";
- send(ptr_client->socket, res404, strlen(res404), 0);
- drop_client(list_clients, ptr_client);
- }
- void serve_resource(
- struct client_info ** list_clients,
- struct client_info * ptr_client, const char * path
- ) {
- char buff_addr[ADDR_BUFF_SIZE];
- get_client_addr(ptr_client, buff_addr, ADDR_BUFF_SIZE);
- printf("Serve_resource %s %s ...\n", buff_addr, path);
- if (strcmp(path, "/") == 0)
- path = "/index.html";
- if (strlen(path) > ADDR_BUFF_SIZE) {
- send_400(list_clients, ptr_client);
- return;
- }
- if (strstr(path, "..")) {
- send_404(list_clients, ptr_client);
- return;
- }
- char full_path[128];
- sprintf(full_path, "public%s", path);
- FILE * fp = fopen(full_path, "rb");
- if (fp == NULL) {
- send_404(list_clients, ptr_client);
- return;
- }
- fseek(fp, 0L, SEEK_END);
- size_t res_content_len = ftell(fp);
- fseek(fp, 0L, SEEK_SET);
- const char * res_content_type = get_content_type(full_path);
- char buffer[DATA_BUFF_SIZE];
- sprintf(buffer, "HTTP/1.1 200 OK\r\n");
- send(ptr_client->socket, buffer, strlen(buffer), 0);
- sprintf(buffer, "Connection: close\r\n");
- send(ptr_client->socket, buffer, strlen(buffer), 0);
- sprintf(buffer, "Content-Length: %u\r\n", res_content_len);
- send(ptr_client->socket, buffer, strlen(buffer), 0);
- sprintf(buffer, "Content-Type: %s\r\n", res_content_type);
- send(ptr_client->socket, buffer, strlen(buffer), 0);
- sprintf(buffer, "\r\n");
- send(ptr_client->socket, buffer, strlen(buffer), 0);
- int cnt_read = fread(buffer, 1, DATA_BUFF_SIZE, fp);
- while (cnt_read) {
- send(ptr_client->socket, buffer, cnt_read, 0);
- cnt_read = fread(buffer, 1, DATA_BUFF_SIZE, fp);
- }
- fclose(fp);
- drop_client(list_clients, ptr_client);
- }
安全和健壮性
程序不信任连接到的点, 是开发网络代码最重要的原则之一.
程序也不应假设连接的点发送的数据都是特定格式.
如果对于错误和异常的检测不够细心, 导致的漏洞很容易被利用.
写程序时, 无论使用的是哪种语言, 稍不注意就会产生 bug.
在使用 C 语言时, 要特别注意避免内存错误.
服务器软件的另一个议题是关于系统内文件的存取, 哪些文件可以被存取, 哪些不能.
恶意客户可能会尝试下载服务器系统中的任意文件.
一个关于安全的基本建议是, 使用非特权(non-privileged) 账户运行网络程序.
该账户只能存取实现服务器功能所需的最小的资源集.
这个建议无法替代编写安全的代码, 但却是保障系统安全的最后一道藩篱.
自己编写的代码覆盖了所有漏洞常常是不可能的.
操作系统也并不是总能提供充分的文档.
而操作系统的 APIs, 其行为方式经常是 非显然的(non-obvious) 和 非直觉的(non-intuitive).
总之, 要小心.
开源服务器
在互联网上部署 web 服务器, 建议采用开源实现.
如 Nginx, Apache, 都是性能良好, 跨平台, 安全, C 语言编写, 并完全免费的.
同时还拥有完备的文档, 寻求支持也很容易.