/* http_get - fetch the contents of an http URL
**
** Originally based on a simple version by Al Globus globus@nas.nasa.gov.
** Debugged and prettified by Jef Poskanzer jef@acme.com.  Also includes
** ifdefs to handle https via OpenSSL.
*/

#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>

#ifdef USE_SSL
#include <ssl.h>
#endif

/* Forwards (function prototypes) */
static void usage(void);
static void getURL(char* url, char* referer, char* user_agent, char* auth_token, char* cookie );
static void getURLbyParts(int protocol, char* host, int port, char* file, char* referer, char* user_agent, char* auth_token, char* cookie );
static void show_error(char* cause);
static void sigcatch(int sig);
static int b64_encode(unsigned char* ptr, int len, char* space, int size);


/* Globals. */
static char* argv0;
static int verbose;
static int timeout;
static char* url;

/* Protocol symbols. */
#define PROTO_HTTP 0
#ifdef USE_SSL
#define PROTO_HTTPS 1
#endif

/* Header FSM states. */
#define HDST_BOL 0
#define HDST_TEXT 1
#define HDST_LF 2
#define HDST_CR 3
#define HDST_CRLF 4
#define HDST_CRLFCR 5


int main( int argc, char** argv ) {

    int argn;
    char* referer;
    char* user_agent;
    char* auth_token;
    char* cookie;

    argv0 = argv[0];
    argn = 1;
    verbose = 0;
    timeout = 60;
    referer = (char*) 0;
    user_agent = "http_get";
    auth_token = (char*) 0;
    cookie = (char*) 0;
    while ( argn < argc && argv[argn][0] == '-' && argv[argn][1] != '\0' ) {

	if ( strcmp( argv[argn], "-v" ) == 0 ) {
	    verbose = 1;
	}
	else if ( strcmp( argv[argn], "-t" ) == 0 && argn + 1 < argc ) {
	    ++argn;
	    timeout = atoi( argv[argn] );
	}
	else if ( strcmp( argv[argn], "-r" ) == 0 && argn + 1 < argc ) {
	    ++argn;
	    referer = argv[argn];
	}
	else if ( strcmp( argv[argn], "-u" ) == 0 && argn + 1 < argc ) {
	    ++argn;
	    user_agent = argv[argn];
	}
	else if ( strcmp( argv[argn], "-a" ) == 0 && argn + 1 < argc ) {
	    ++argn;
	    auth_token = argv[argn];
	}
	else if ( strcmp( argv[argn], "-c" ) == 0 && argn + 1 < argc ) {
	    ++argn;
	    cookie = argv[argn];
	}
	else {
	    usage();
	}

	++argn;

    } /* end while */

    if ( argn >= argc ) {
	usage();
    }	
    url = argv[argn];
    ++argn;
    if ( argn != argc ) {
	usage();
    }

    (void) signal( SIGALRM, sigcatch );
    getURL(url, referer, user_agent, auth_token, cookie);

    exit(0);

}


static void usage(void)
    {
    (void) fprintf( stderr, "usage:  %s [-t timeout] [-r referer] [-u user-agent] [-a username:password] url\n", argv0 );
    exit(1);
    }


/* url must be of the form http://host-name[:port]/file-name */
static void
getURL( char* url, char* referer, char* user_agent, char* auth_token, char* cookie )
    {
    char* s;
    int protocol;
    char host[2000];
    int host_len;
    int port;
    char* file = 0;
    char* http = "http://";
    int http_len = strlen( http );
    char* https = "https://";
    int https_len = strlen( https );
    int proto_len;

    if ( url == (char*) 0 )
        {
	(void) fprintf( stderr, "%s: null URL\n", argv0 );
        exit( 1 );
        }
    if ( strncmp( http, url, http_len ) == 0 )
	{
	proto_len = http_len;
	protocol = PROTO_HTTP;
	}
#ifdef USE_SSL
    else if ( strncmp( https, url, https_len ) == 0 )
	{
	proto_len = https_len;
	protocol = PROTO_HTTPS;
	}
#endif
    else
        {
	(void) fprintf( stderr, "%s: non-http URL\n", argv0 );
        exit( 1 );
        }

    /* Get the host name. */
    for ( s = url + proto_len; *s != '\0' && *s != ':' && *s != '/'; ++s )
	;
    host_len = s - url;
    host_len -= proto_len;
    strncpy( host, url + proto_len, host_len );
    host[host_len] = '\0';

    /* Get port number. */
    if ( *s == ':' )
	{
	port = atoi( ++s );
	while ( *s != '\0' && *s != '/' )
	    ++s;
	}
    else
#ifdef USE_SSL
	if ( protocol == PROTO_HTTPS )
	    port = 443;
	else
	    port = 80;
#else
	port = 80;
#endif

    /* Get the file name. */
    if ( *s == '\0' ) 
	file = "/";
    else
	file = s;

    getURLbyParts( protocol, host, port, file, referer, user_agent, auth_token, cookie );
    }


static void
getURLbyParts( int protocol, char* host, int port, char* file, char* referer, char* user_agent, char* auth_token, char* cookie )
    {
    struct hostent *he;
    struct servent *se;
    struct protoent *pe;
    struct sockaddr_in sin;
    int sockfd;
#ifdef USE_SSL
    SSL_CTX* ssl_ctx;
    SSL* ssl;
#endif
    char buf[10000];
    int bytes, b, header_state;

    (void) alarm( timeout );
    he = gethostbyname( host );
    if ( he == (struct hostent*) 0 )
	{
	(void) fprintf( stderr, "%s: unknown host - %s\n", argv0, host );
	exit( 1 );
	}
    (void) alarm( timeout );
    se = getservbyname( "telnet", "tcp" );
    if ( se == (struct servent*) 0 )
	{
	(void) fprintf( stderr, "%s: unknown service\n", argv0 );
	exit( 1 );
	}
    (void) alarm( timeout );
    pe = getprotobyname( se->s_proto );
    if ( pe == (struct protoent*) 0 )
	{
	(void) fprintf( stderr, "%s: unknown protocol\n", argv0 );
	exit( 1 );
	}
    bzero( (caddr_t) &sin, sizeof(sin) );
    sin.sin_family = he->h_addrtype;

    (void) alarm( timeout );
    sockfd = socket( he->h_addrtype, SOCK_STREAM, pe->p_proto );
    if ( sockfd < 0 )
	show_error( "socket" );

    (void) alarm( timeout );
    if ( bind( sockfd, (struct sockaddr*) &sin, sizeof(sin) ) < 0 )
	show_error( "bind" );
    bcopy( he->h_addr, &sin.sin_addr, he->h_length );

    sin.sin_port = htons( port );
    (void) alarm( timeout );
    if ( connect( sockfd, (struct sockaddr*) &sin, sizeof(sin) ) < 0 )
	show_error( "connect" );

#ifdef USE_SSL
    if ( protocol == PROTO_HTTPS )
	{
	/* Make SSL connection. */
	int r;
	SSL_load_error_strings();
	SSLeay_add_ssl_algorithms();
	ssl_ctx = SSL_CTX_new( SSLv23_client_method() );
	ssl = SSL_new( ssl_ctx );
	SSL_set_fd( ssl, sockfd );
	r = SSL_connect( ssl );
	if ( r <= 0 )
	    {
	    (void) fprintf(
		stderr, "%s: %s - SSL connection failed - %d\n",
		argv0, url, r );
	    ERR_print_errors_fp( stderr );
	    exit( 1 );
	    }
	}
#endif

    /* Build request buffer, starting with the GET. */
    (void) alarm( timeout );
    bytes = snprintf( buf, sizeof(buf), "GET %s HTTP/1.0\r\n", file );
    /* HTTP/1.1 host header - some servers want it even in HTTP/1.0. */
    bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Host: %s\r\n", host );
    if ( referer != (char*) 0 )
	/* Referer. */
	bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Referer: %s\r\n", referer );
    /* User-agent. */
    bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "User-Agent: %s\r\n", user_agent );
    /* Fixed headers. */
    bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Accept: */*\r\n" );
    bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Accept-Encoding: gzip, compress\r\n" );
    bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Accept-Language: en\r\n" );
    bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Accept-Charset: iso-8859-1,*,utf-8\r\n" );
    if ( auth_token != (char*) 0 )
	{
	/* Basic Auth info. */
	char token_buf[500];
	token_buf[b64_encode( auth_token, strlen( auth_token ), token_buf, sizeof(token_buf) )] = '\0';
	bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Authorization: Basic %s\r\n", token_buf );
	}
    /* Cookie. */
    if ( cookie != (char*) 0 )
	bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "Cookie: %s\r\n", cookie );
    /* Blank line. */
    bytes += snprintf( &buf[bytes], sizeof(buf) - bytes, "\r\n" );
    /* Now actually send it. */
#ifdef USE_SSL
    if ( protocol == PROTO_HTTPS )
	(void) SSL_write( ssl, buf, bytes );
    else
	(void) write( sockfd, buf, bytes );
#else
    (void) write( sockfd, buf, bytes );
#endif

    /* Get lines until a blank one. */
    (void) alarm( timeout );
    header_state = HDST_BOL;
    for (;;)
	{
#ifdef USE_SSL
	if ( protocol == PROTO_HTTPS )
	    bytes = SSL_read( ssl, buf, sizeof(buf) );
	else
	    bytes = read( sockfd, buf, sizeof(buf) );
#else
	bytes = read( sockfd, buf, sizeof(buf) );
#endif
	if ( bytes <= 0 )
	    break;
	for ( b = 0; b < bytes; ++b )
	    {
	    if ( verbose )
		(void) write( 1, &buf[b], 1 );
	    switch ( header_state )
		{
		case HDST_BOL:
		switch ( buf[b] )
		    {
		    case '\n': header_state = HDST_LF; break;
		    case '\r': header_state = HDST_CR; break;
		    default: header_state = HDST_TEXT; break;
		    }
		break;
		case HDST_TEXT:
		switch ( buf[b] )
		    {
		    case '\n': header_state = HDST_LF; break;
		    case '\r': header_state = HDST_CR; break;
		    }
		break;

		case HDST_LF:
		switch ( buf[b] )
		    {
		    case '\n': goto end_of_headers;
		    case '\r': header_state = HDST_CR; break;
		    default: header_state = HDST_TEXT; break;
		    }
		break;

		case HDST_CR:
		switch ( buf[b] )
		    {
		    case '\n': header_state = HDST_CRLF; break;
		    case '\r': goto end_of_headers;
		    default: header_state = HDST_TEXT; break;
		    }
		break;

		case HDST_CRLF:
		switch ( buf[b] )
		    {
		    case '\n': goto end_of_headers;
		    case '\r': header_state = HDST_CRLFCR; break;
		    default: header_state = HDST_TEXT; break;
		    }
		break;

		case HDST_CRLFCR:
		switch ( buf[b] )
		    {
		    case '\n': case '\r': goto end_of_headers;
		    default: header_state = HDST_TEXT; break;
		    }
		break;
		}
	    }
	}
    end_of_headers:
    /* Dump out the rest of the headers buffer. */
    ++b;
    (void) write( 1, &buf[b], bytes - b );

    /* Copy the data. */
    for (;;)
        {
	(void) alarm( timeout );
#ifdef USE_SSL
	if ( protocol == PROTO_HTTPS )
	    bytes = SSL_read( ssl, buf, sizeof(buf) );
	else
	    bytes = read( sockfd, buf, sizeof(buf) );
#else
	bytes = read( sockfd, buf, sizeof(buf) );
#endif
	if ( bytes == 0 )
	    break;
	if ( bytes < 0 )
	    show_error( "read" );
	(void) write( 1, buf, bytes );
        }
#ifdef USE_SSL
    if ( protocol == PROTO_HTTPS )
	{
	SSL_free( ssl );
	SSL_CTX_free( ssl_ctx );
	}
#endif  
    (void) close( sockfd );
    }


static void
show_error( char* cause )
    {
    char buf[5000];
    (void) sprintf( buf, "%s: %s - %s", argv0, url, cause );
    perror( buf );
    exit( 1 );
    }


static void  
sigcatch( int sig )
    {       
    (void) fprintf( stderr, "%s: %s - timed out\n", argv0, url );
    exit( 1 );
    }


/* Base-64 encoding.  This encodes binary data as printable ASCII characters.
** Three 8-bit binary bytes are turned into four 6-bit values, like so:
**
**   [11111111]  [22222222]  [33333333]
**
**   [111111] [112222] [222233] [333333]
**
** Then the 6-bit values are represented using the characters "A-Za-z0-9+/".
*/

static char b64_encode_table[64] = {
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',  /* 0-7 */
    'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',  /* 8-15 */
    'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',  /* 16-23 */
    'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',  /* 24-31 */
    'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',  /* 32-39 */
    'o', 'p', 'q', 'r', 's', 't', 'u', 'v',  /* 40-47 */
    'w', 'x', 'y', 'z', '0', '1', '2', '3',  /* 48-55 */
    '4', '5', '6', '7', '8', '9', '+', '/'   /* 56-63 */
    };

static int b64_decode_table[256] = {
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 00-0F */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 10-1F */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63,  /* 20-2F */
    52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1,  /* 30-3F */
    -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,  /* 40-4F */
    15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1,  /* 50-5F */
    -1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,  /* 60-6F */
    41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1,  /* 70-7F */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 80-8F */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* 90-9F */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* A0-AF */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* B0-BF */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* C0-CF */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* D0-DF */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,  /* E0-EF */
    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1   /* F0-FF */
    };

/* Do base-64 encoding on a hunk of bytes.   Return the actual number of
** bytes generated.  Base-64 encoding takes up 4/3 the space of the original,
** plus a bit for end-padding.  3/2+5 gives a safe margin.
*/
static int
b64_encode( unsigned char* ptr, int len, char* space, int size )
    {
    int ptr_idx, space_idx, phase;
    char c;

    space_idx = 0;
    phase = 0;
    for ( ptr_idx = 0; ptr_idx < len; ++ptr_idx )
	{
	switch ( phase )
	    {
	    case 0:
	    c = b64_encode_table[ptr[ptr_idx] >> 2];
	    if ( space_idx < size )
		space[space_idx++] = c;
	    c = b64_encode_table[( ptr[ptr_idx] & 0x3 ) << 4];
	    if ( space_idx < size )
		space[space_idx++] = c;
	    ++phase;
	    break;
	    case 1:
	    space[space_idx - 1] =
	      b64_encode_table[
		b64_decode_table[space[space_idx - 1]] |
		( ptr[ptr_idx] >> 4 ) ];
	    c = b64_encode_table[( ptr[ptr_idx] & 0xf ) << 2];
	    if ( space_idx < size )
		space[space_idx++] = c;
	    ++phase;
	    break;
	    case 2:
	    space[space_idx - 1] =
	      b64_encode_table[
		b64_decode_table[space[space_idx - 1]] |
		( ptr[ptr_idx] >> 6 ) ];
	    c = b64_encode_table[ptr[ptr_idx] & 0x3f];
	    if ( space_idx < size )
		space[space_idx++] = c;
	    phase = 0;
	    break;
	    }
	}
    /* Pad with ='s. */
    while ( phase++ < 3 )
	if ( space_idx < size )
	    space[space_idx++] = '=';
    return space_idx;
    }