/* http_get - fetch the contents of an http URL
**
** Originally based on a simple version by Al Globus globus@nas.nasa.gov.
** Debugged and prettified by Jef Poskanzer jef@acme.com. Also includes
** ifdefs to handle https via OpenSSL.
*/
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#ifdef USE_SSL
#include <ssl.h>
#endif
/* Forwards (function prototypes) */
static void usage(void);
static void getURL(char* url, char* referer, char* user_agent, char* auth_token, char* cookie );
static void getURLbyParts(int protocol, char* host, int port, char* file, char* referer, char* user_agent, char* auth_token, char* cookie );
static void show_error(char* cause);
static void sigcatch(int sig);
static int b64_encode(unsigned char* ptr, int len, char* space, int size);
/* Globals. */
static char* argv0;
static int verbose;
static int timeout;
static char* url;
/* Protocol symbols. */
#define PROTO_HTTP 0
#ifdef USE_SSL
#define PROTO_HTTPS 1
#endif
/* Header FSM states. */
#define HDST_BOL 0
#define HDST_TEXT 1
#define HDST_LF 2
#define HDST_CR 3
#define HDST_CRLF 4
#define HDST_CRLFCR 5
int main( int argc, char** argv ) {
int argn;
char* referer;
char* user_agent;
char* auth_token;
char* cookie;
argv0 = argv[0];
argn = 1;
verbose = 0;
timeout = 60;
referer = (char*) 0;
user_agent = "http_get";
auth_token = (char*) 0;
cookie = (char*) 0;
while ( argn < argc && argv[argn][0] == '-' && argv[argn][1] != '\0' ) {
if ( strcmp( argv[argn], "-v" ) == 0 ) {
verbose = 1;
}
else if ( strcmp( argv[argn], "-t" ) == 0 && argn + 1 < argc ) {
++argn;
timeout = atoi( argv[argn] );
}
else if ( strcmp( argv[argn], "-r" ) == 0 && argn + 1 < argc ) {
++argn;
referer = argv[argn];
}
else if ( strcmp( argv[argn], "-u" ) == 0 && argn + 1 < argc ) {
++argn;
user_agent = argv[argn];
}
else if ( strcmp( argv[argn], "-a" ) == 0 && argn + 1 < argc ) {
++argn;
auth_token = argv[argn];
}
else if ( strcmp( argv[argn], "-c" ) == 0 && argn + 1 < argc ) {
++argn;
cookie = argv[argn];
}
else {
usage();
}
++argn;
} /* end while */
if ( argn >= argc ) {
usage();
}
url = argv[argn];
++argn;
if ( argn != argc ) {
usage();
}
(void) signal( SIGALRM, sigcatch );
getURL(url, referer, user_agent, auth_token, cookie);
exit(0);
}
static void usage(void)
{
(void) fprintf( stderr, "usage: %s [-t timeout] [-r referer] [-u user-agent] [-a username:password] url\n", argv0 );
exit(1);
}
/* url must be of the form http://host-name[:port]/file-name */
static void
getURL( char* url, char* referer, char* user_agent, char* auth_token, char* cookie )
{
char* s;
int protocol;
char host[2000];
int host_len;
int port;
char* file = 0;
char* http = "http://";
int http_len = strlen( http );
char* https = "https://";
int https_len = strlen( https );
int proto_len;
if ( url == (char*) 0 )
{
(void) fprintf( stderr, "%s: null URL\n", argv0 );
exit( 1 );
}
if ( strncmp( http, url, http_len ) == 0 )
{
proto_len = http_len;
protocol = PROTO_HTTP;
}
#ifdef USE_SSL
else if ( strncmp( https, url, https_len ) == 0 )
{
proto_len = https_len;
protocol = PROTO_HTTPS;
}
#endif
else
{
(void) fprintf( stderr, "%s: non-http URL\n", argv0 );
exit( 1 );
}
/* Get the host name. */
for ( s = url + proto_len; *s != '\0' && *s != ':' && *s != '/'; ++s )
;
host_len = s - url;
host_len -= proto_len;
strncpy( host, url + proto_len, host_len );
host[host_len] = '\0';
/* Get port number. */
if ( *s == ':' )
{
port = atoi( ++s );
while ( *s != '\0' && *s != '/' )
++s;
}
else
#ifdef USE_SSL
if ( protocol == PROTO_HTTPS )
port = 443;
else
port = 80;
#else
port = 80;
#endif
/* Get the file name. */
if ( *s == '\0' )
file = "/";
else
file = s;
getURLbyParts( protocol, host, port, file, referer, user_agent, auth_token, cookie );
}
static void
getURLbyParts( int protocol, char* host, int port, char* file, char* referer, char* user_agent, char* auth_token, char* cookie )
{
struct hostent *he;
struct servent *se;
struct protoent *pe;
struct sockaddr_in sin;
int sockfd;
#ifdef USE_SSL
SSL_CTX* ssl_ctx;
SSL* ssl;
#endif
char buf[10000];
int bytes, b, header_state;
(void) alarm( timeout );
he = gethostbyname( host );
if ( he == (struct hostent*) 0 )
{
(void) fprintf( stderr, "%s: unknown host - %s\n", argv0, host );
exit( 1 );
}
(void) alarm( timeout );
se = getservbyname( "telnet", "tcp" );
if ( se == (struct servent*) 0 )
{
(void) fprintf( stderr, "%s: unknown service\n", argv0 );
exit( 1 );
}
(void) alarm( timeout );
pe = getprotobyname( se->s_proto );
if ( pe == (struct protoent*) 0 )
{
(void) fprintf( stderr, "%s: unknown protocol\n", argv0 );
exit( 1 );
}
bzero( (caddr_t) &sin, sizeof(sin) );
sin.sin_family = he->h_addrtype;
(void) alarm( timeout );
sockfd = socket( he->h_addrtype, SOCK_STREAM, pe->p_proto );
if ( sockfd < 0 )
show_error( "socket" );
(void) alarm( timeout );
if ( bind( sockfd, (struct sockaddr*) &sin, sizeof(sin) ) < 0 )
show_error( "bind" );
bcopy( he->h_addr, &sin.sin_addr, he->h_length );
sin.sin_port = htons( port );
(void) alarm( timeout );
if ( connect( sockfd, (struct sockaddr*) &sin, sizeof(sin) ) < 0 )
show_error( "connect" );
#ifdef USE_SSL
if ( protocol == PROTO_HTTPS )
{
/* Make SSL connection. */
int r;
SSL_load_error_strings();
SSLeay_add_ssl_algorithms();
ssl_ctx = SSL_CTX_new( SSLv23_client_method() );
ssl = SSL_new( ssl_ctx );
SSL_set_fd( ssl, sockfd );
r = SSL_connect( ssl );
if ( r <= 0 )
{
(void) fprintf(
stderr, "%s: %s - SSL connection failed - %d\n",
argv0, url, r );
ERR_print_errors_fp( stderr );
exit( 1 );
}
}
#endif
/* Build request buffer, starting with the GET. */
(void) alarm( timeout );
bytes = snprintf( buf, sizeof(buf), "GET %s HTTP/1.0\r\n", file );
/* HTTP/1.1 host header - some servers want it even in HTTP/1.0. */
bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Host: %s\r\n", host );
if ( referer != (char*) 0 )
/* Referer. */
bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Referer: %s\r\n", referer );
/* User-agent. */
bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "User-Agent: %s\r\n", user_agent );
/* Fixed headers. */
bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Accept: */*\r\n" );
bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Accept-Encoding: gzip, compress\r\n" );
bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Accept-Language: en\r\n" );
bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Accept-Charset: iso-8859-1,*,utf-8\r\n" );
if ( auth_token != (char*) 0 )
{
/* Basic Auth info. */
char token_buf[500];
token_buf[b64_encode( auth_token, strlen( auth_token ), token_buf, sizeof(token_buf) )] = '\0';
bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Authorization: Basic %s\r\n", token_buf );
}
/* Cookie. */
if ( cookie != (char*) 0 )
bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Cookie: %s\r\n", cookie );
/* Blank line. */
bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "\r\n" );
/* Now actually send it. */
#ifdef USE_SSL
if ( protocol == PROTO_HTTPS )
(void) SSL_write( ssl, buf, bytes );
else
(void) write( sockfd, buf, bytes );
#else
(void) write( sockfd, buf, bytes );
#endif
/* Get lines until a blank one. */
(void) alarm( timeout );
header_state = HDST_BOL;
for (;;)
{
#ifdef USE_SSL
if ( protocol == PROTO_HTTPS )
bytes = SSL_read( ssl, buf, sizeof(buf) );
else
bytes = read( sockfd, buf, sizeof(buf) );
#else
bytes = read( sockfd, buf, sizeof(buf) );
#endif
if ( bytes <= 0 )
break;
for ( b = 0; b < bytes; ++b )
{
if ( verbose )
(void) write( 1, &buf[b], 1 );
switch ( header_state )
{
case HDST_BOL:
switch ( buf[b] )
{
case '\n': header_state = HDST_LF; break;
case '\r': header_state = HDST_CR; break;
default: header_state = HDST_TEXT; break;
}
break;
case HDST_TEXT:
switch ( buf[b] )
{
case '\n': header_state = HDST_LF; break;
case '\r': header_state = HDST_CR; break;
}
break;
case HDST_LF:
switch ( buf[b] )
{
case '\n': goto end_of_headers;
case '\r': header_state = HDST_CR; break;
default: header_state = HDST_TEXT; break;
}
break;
case HDST_CR:
switch ( buf[b] )
{
case '\n': header_state = HDST_CRLF; break;
case '\r': goto end_of_headers;
default: header_state = HDST_TEXT; break;
}
break;
case HDST_CRLF:
switch ( buf[b] )
{
case '\n': goto end_of_headers;
case '\r': header_state = HDST_CRLFCR; break;
default: header_state = HDST_TEXT; break;
}
break;
case HDST_CRLFCR:
switch ( buf[b] )
{
case '\n': case '\r': goto end_of_headers;
default: header_state = HDST_TEXT; break;
}
break;
}
}
}
end_of_headers:
/* Dump out the rest of the headers buffer. */
++b;
(void) write( 1, &buf[b], bytes - b );
/* Copy the data. */
for (;;)
{
(void) alarm( timeout );
#ifdef USE_SSL
if ( protocol == PROTO_HTTPS )
bytes = SSL_read( ssl, buf, sizeof(buf) );
else
bytes = read( sockfd, buf, sizeof(buf) );
#else
bytes = read( sockfd, buf, sizeof(buf) );
#endif
if ( bytes == 0 )
break;
if ( bytes < 0 )
show_error( "read" );
(void) write( 1, buf, bytes );
}
#ifdef USE_SSL
if ( protocol == PROTO_HTTPS )
{
SSL_free( ssl );
SSL_CTX_free( ssl_ctx );
}
#endif
(void) close( sockfd );
}
static void
show_error( char* cause )
{
char buf[5000];
(void) sprintf( buf, "%s: %s - %s", argv0, url, cause );
perror( buf );
exit( 1 );
}
static void
sigcatch( int sig )
{
(void) fprintf( stderr, "%s: %s - timed out\n", argv0, url );
exit( 1 );
}
/* Base-64 encoding. This encodes binary data as printable ASCII characters.
** Three 8-bit binary bytes are turned into four 6-bit values, like so:
**
** [11111111] [22222222] [33333333]
**
** [111111] [112222] [222233] [333333]
**
** Then the 6-bit values are represented using the characters "A-Za-z0-9+/".
*/
static char b64_encode_table[64] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', /* 0-7 */
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', /* 8-15 */
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', /* 16-23 */
'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', /* 24-31 */
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', /* 32-39 */
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', /* 40-47 */
'w', 'x', 'y', 'z', '0', '1', '2', '3', /* 48-55 */
'4', '5', '6', '7', '8', '9', '+', '/' /* 56-63 */
};
static int b64_decode_table[256] = {
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 00-0F */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 10-1F */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, /* 20-2F */
52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, /* 30-3F */
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, /* 40-4F */
15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, /* 50-5F */
-1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, /* 60-6F */
41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, /* 70-7F */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 80-8F */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* 90-9F */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* A0-AF */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* B0-BF */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* C0-CF */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* D0-DF */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, /* E0-EF */
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 /* F0-FF */
};
/* Do base-64 encoding on a hunk of bytes. Return the actual number of
** bytes generated. Base-64 encoding takes up 4/3 the space of the original,
** plus a bit for end-padding. 3/2+5 gives a safe margin.
*/
static int
b64_encode( unsigned char* ptr, int len, char* space, int size )
{
int ptr_idx, space_idx, phase;
char c;
space_idx = 0;
phase = 0;
for ( ptr_idx = 0; ptr_idx < len; ++ptr_idx )
{
switch ( phase )
{
case 0:
c = b64_encode_table[ptr[ptr_idx] >> 2];
if ( space_idx < size )
space[space_idx++] = c;
c = b64_encode_table[( ptr[ptr_idx] & 0x3 ) << 4];
if ( space_idx < size )
space[space_idx++] = c;
++phase;
break;
case 1:
space[space_idx - 1] =
b64_encode_table[
b64_decode_table[space[space_idx - 1]] |
( ptr[ptr_idx] >> 4 ) ];
c = b64_encode_table[( ptr[ptr_idx] & 0xf ) << 2];
if ( space_idx < size )
space[space_idx++] = c;
++phase;
break;
case 2:
space[space_idx - 1] =
b64_encode_table[
b64_decode_table[space[space_idx - 1]] |
( ptr[ptr_idx] >> 6 ) ];
c = b64_encode_table[ptr[ptr_idx] & 0x3f];
if ( space_idx < size )
space[space_idx++] = c;
phase = 0;
break;
}
}
/* Pad with ='s. */
while ( phase++ < 3 )
if ( space_idx < size )
space[space_idx++] = '=';
return space_idx;
}