archie/prospero/user/vcache/gopherget.c
2024-05-27 16:13:40 +02:00

471 lines
16 KiB
C

/*
* Copyright (c) 1992, 1993 by the University of Southern California
*
* For copying and distribution information, please see the file <usc-copyr.h>.
*/
/* gopherget.c
Part of the program "vcache" in the Prospero distribution.
This allows Prospero clients to use the GOPHER access method (i.e., to
retrieve files sitting on Gopher servers.)
Written: swa@isi.edu, 7/13/92 -- 7/15/92
Comments improved 8/6/92
Gopher TEXT access method made considerably looser 6/30/93
Got rid of necessity for writev. 9/15/93
*/
#include <usc-copyr.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <perrno.h>
#include <pmachine.h>
#ifdef HAVE_WRITEV
#include <sys/uio.h> /* for writev */
#endif
#include <pfs.h> /* for assert() and internal_error() */
#include "vcache_macros.h"
/* If it doesnt respond in 5 secs, its probably gone away */
#define G_READ_TIMEOUT 5
#define G_OPEN_TIMEOUT 5
extern int pfs_debug, cache_verbose;
/* Use the Gopher protocol. Get the file denoted by the SELECTOR_STRING
from the Gopher server running at port PORT of HOST. Stores that file in
the local file named LOCAL. Return non-zero on error; 0 if OK. If it
returns non-zero, the existence and contents of the file LOCAL are
indeterminate. (If this presents a problem, we'll have to rewrite the code
to do the unlinking). */
int
gopherget(char *host, char *local, char *selector_string, int port,
int gophertype /* actually char, but make sure default
promotions work. */ )
{
FILE *local_file; /* file pointer for local destination */
int remote_sock; /* File Descriptor for remote socket. */
int retval; /* error status return. */
static int vcache_open_tcp_stream(), send_line();
int (*retrieval_method)(); /* What method will we use to retrieve this
file? Null means none. */
/* function to call to find out Gopher retrieval method. */
/* precede 'gophertype' with an _ to get rid of bogus whines from GCC 2.5.8
with -Wshadow. */
static int (*get_retrieval_method(int _gophertype))();
TRACE(5, "vcache: Attempting to retrieve remote document \"%s\" \n\
via Gopher from port %d of host %s\n", selector_string, port, host);
retrieval_method = get_retrieval_method(gophertype);
if (!retrieval_method)
return 1; /* Failure. */
/* Open Local Destination file. Note that PFS has already been disabled.
*/
local_file = fopen(local, "w");
if (local_file == NULL) {
ERRSYS("vcache: Couldn't create the local file %s:%s %s ", local);
return 1;
}
/* Open TCP stream to remote gopher server. */
TRACE(5,"Calling vcache_open_tcp_stream(host = %s, port = %d)\n",
host, port);
if ((remote_sock = vcache_open_tcp_stream(host, port)) < 0) {
/* No need to set p_err_string, vcache_open_tcp_stream will */
fclose(local_file);
return 1;
}
TRACE(5,"Sending selector string to remote GOPHER: %s\n",
selector_string);
/* At this point, if any errors occur, we still try to clean up as neatly
as possible. */
retval = send_line(remote_sock, selector_string);
if (!retval)
retval = (*retrieval_method)(remote_sock, local_file);
if (!retval && cache_verbose)
puts("vcache: Retrieval completed.");
if (fclose(local_file)) {
TRACE(5, "vcache: Error attempting to close local file \
%s\n", local);
retval = 1;
}
/* Close down both ends of the connection. shutdown() allegedly performs
actions that close() defers. */
if (shutdown(remote_sock, 2)) {
ERRSYS("vcache: shutdown(remote_sock, 2) failed:%s %s");
retval = 1;
}
if (close(remote_sock)) {
ERRSYS("vcache: close(remote_sock) failed:%s %s");
retval = 1;
}
if (retval)
return 1; /* failure. */
else
return 0; /* Done. Success! */
}
/* Open a TCP stream from here to the HOST at the PORT. */
/* Return socket descriptor on success, or -1 on failure. */
/* On failure set p_err_string to "user" friendly error message */
static int
vcache_open_tcp_stream(char host[],int port)
{
TRACE(2,"vcache: quick_open_tcp_stream(host=%s;port=%d;time=%d)\n",
host, port, G_OPEN_TIMEOUT);
/* Dont need to set error message - quick_open_tcp_stream sets p_err_string
to user-friendly error message */
return(quick_open_tcp_stream(host,port, G_OPEN_TIMEOUT));
}
/* Send a line to the remote host. The line we're given is NOT crlf
terminated. */
/* We use writev because it's more efficient than two writes (and saves copying
operations). The Berkeley TCP implementation lacks a "push" command, so
there's an implicit push after each "write" or "writev". */
/* Return nonzero if failure; 0 if success */
static int
send_line(remote_sock, str)
int remote_sock; /* A connected TCP socket */
char str[];
{
int expected_count; /* # of bytes we expect to send. */
int bytes_sent; /* # of bytes we actually sent. */
#ifdef HAVE_WRITEV
struct iovec iov[2];
iov[0].iov_base = str;
iov[0].iov_len = strlen(str);
iov[1].iov_base = "\r\n";
iov[1].iov_len = 2;
expected_count = iov[0].iov_len + iov[1].iov_len;
/* Selector strings are short enough that we should never send a partial
packet. */
bytes_sent = writev(remote_sock, iov, 2);
#else
static char *buf = NULL;
buf = qsprintf_stcopyr(buf, "%s\r\n", str);
expected_count = strlen(buf);
/* Gopher selector strings are short enough that we should never send a
partial packet. */
bytes_sent = write(remote_sock, buf, expected_count);
#endif /* HAVE_WRITEV */
if (bytes_sent != expected_count) {
ERRSYS("vcache: Error sending selector string %s to gopher server. \
\tExpected to send %d bytes; write() or writev() reported sending only %d. %s %s",
str, expected_count, bytes_sent);
return 1;
}
return 0;
}
/* Read a gopher text stream from in_sock and save it in out_file.
Gopher text lines are terminated with crlf.
Their last line consists of ".\r\n".
This is implemented as a simple DFA.
*/
/* 6/30/93:
We do full error recovery & don't complain by default, since I've now
learned that the format is followed rather loosely. Thus the LOOSE
definition. */
#define LOOSE
static
int
receive_gopher_text(in_sock, out)
int in_sock; /* Input socket for Gopher text */
FILE *out; /* Where to save the received text. */
{
char buf[BUFSIZ]; /* buffer for reading. */
register int numread; /* Number of bytes actually read. */
char *complaint = NULL; /* Might be set to a string that contains a
complaint about the remote format. This is
usually used for recoverable errors. */
enum states { BOL, /* Beginning of line */
GOT_BEG_DOT, /* Got '.' following BOL */
MID_LINE, /* middle of line */
GOT_DOT_CR, /* Got ".\r",Expect LF */
GOT_CR, /* Got CR; expect LF */
EOT, /* got ".\r\n" by itself */
#ifndef LOOSE
BAD /* something gross happened. We have
complained about it and are now just marking
time. */
#endif
} state;
state = BOL;
while((numread = quick_read(in_sock, buf, sizeof buf, G_READ_TIMEOUT)) > 0) {
register int i;
/* Process the characters we read. */
for (i = 0; i < numread; ++i) {
register char c = buf[i]; /* current character */
switch(state) {
case BOL:
#ifdef LOOSE
case_bol: /* a label to go to from EOT. */
#endif
switch(c) {
case '.':
state = GOT_BEG_DOT;
break;
case '\r':
state = GOT_CR;
break;
case '\n':
complaint = "Got LF without CR";
putc(c, out);
state = BOL;
break;
default:
putc(c, out);
state = MID_LINE;
break;
}
break;
case GOT_BEG_DOT:
if (c == '.') {
putc('.', out);
state = MID_LINE;
} else if (c == '\r') {
state = GOT_DOT_CR;
} else if (c == '\n') {
complaint = "Got LF without CR";
state = EOT;
} else {
complaint = "Got initial dot without following dot or EOL";
putc('.', out);
putc(c, out);
state = MID_LINE;
}
break;
case MID_LINE:
if (c == '\r') {
state = GOT_CR;
} else if (c == '\n') {
complaint = "Got LF without CR";
putc('\n', out);
state = BOL;
} else {
putc(c, out);
state = MID_LINE;
}
break;
case GOT_DOT_CR:
if (c != '\n') {
complaint = "Got . CR without LF";
#ifdef LOOSE
putc('.', out);
putc('\n', out);
goto case_bol;
#else
state = BAD;
#endif
} else {
state = EOT;
}
break;
case GOT_CR:
if (c != '\n') {
complaint = "Got CR without LF";
#ifdef LOOSE
putc('\n', out);
putc(c, out);
goto case_bol;
#else
state = BAD;
#endif
} else {
putc('\n', out);
state = BOL;
}
break;
case EOT:
/* If we get text after EOT, just keep on gobbling it up. This
is not a fatal error, merely a warning. */
complaint = "Got text after EOT";
#ifdef LOOSE
putc('.', out);
putc('\n', out);
goto case_bol;
#endif
break;
#ifndef LOOSE
case BAD: /* nothing more to do */
goto abort;
break;
#endif
default:
ERR("vcache: Internal error in line %d of file %s:\
unhandled case %s", __LINE__, __FILE__);
return(-1);
break;
}
}
}
abort:
if (complaint && cache_verbose)
ERR("vcache: we encountered incorrect formatting when\
reading from remote server: %s. \
This is not a serious problem by itself, and the retrieval did not\
necessarily fail, but you should notify your system maintainer %s", complaint);
if (numread < 0) { /* If read() returned an error */
ERRSYS("vcache: read from remote gopher server failed:%s %s");
return -1;
}
#ifndef LOOSE
if (state == EOT) /* normal completion (possibly with warnings,
but all errors seem recoverable.) */
#endif
return 0; /* success */
#ifndef LOOSE
ERR(stderr, "vcache: Premature end of transmission while \
reading text from remote gopher server. %s");
return -1;
#endif
}
/* Read a gopher binary stream from in_sock and save it in out_file.
Keep on consuming it until end of transmission. */
static
int
receive_gopher_binary(in_sock, out)
int in_sock; /* Input socket for Gopher text */
FILE *out; /* Where to save the received text. */
{
char buf[BUFSIZ]; /* buffer for reading & writing. */
register int numread; /* Number of bytes actually read. */
while((numread = quick_read(in_sock, buf, sizeof buf, G_READ_TIMEOUT)) > 0) {
/* We might be writing to a pipe, so we might not be able to write an
entire buffer at once.
Write out the buffer, possibly in multiple chunks. */
int i;
int numwritten;
for (i = 0; i < numread; i += numwritten) {
numwritten = fwrite(&buf[i], sizeof buf[0], numread - i, out);
if (numwritten == 0) { /* write error! */
ERR("vcache: receive_gopher_binary(): write error %s");
return -1; /* error */
}
}
}
if (numread == 0) /* EOF! */
return 0; /* success */
else if (numread < 0) { /* If read() returned an error */
ERRSYS("vcache: read from remote gopher server failed:%s %s");
return -1;
} else {
/* should never get here! */
ERR("vcache: Internal error at line %d of file %s. %s",
__LINE__, __FILE__);
return(-1);
/*NOTREACHED*/
}
}
/* Check 1st character of gopher selector string to decide if retrieval
method is supported. If no retrieval method, punt.
This table of retrieval methods comes from Edward Vielmetti's 13 July 1992
article in alt.gopher.
*/
/*
These use the Gopher TEXT retrieval method.
0 A_FILE Text file.
4 A_MACHEX Macintosh Binhex (.hqx) text file.
c A_CALENDAR Calendar text file.
e A_EVENT Event text file.
M A_MIME MIME (RFC 1341) text file.
These use the Gopher BINARY retrieval method.
9 A_UNIXBIN Binary file.
g A_GIF GIF (Graphics Interchange Format) binary file.
s A_SOUND Sound (8 bit u-law, no headers ?) binary file.
things to do with data as it's flying back at you:
- treat as binary, read until connection drops,
send to file A_UNIXBIN
send to process A_SOUND
send to screen A_GIF
- treat as text, read til "." alone on line,
send to screen A_FILE
send to process A_MIME
A_CALENDAR
A_EVENT
send to file A_MACHEX
*/
/* In addition, the gopher protocol.txt document describes two obsolete types
(use of these types is discouraged) which Ed has not mentioned in his
letter. I'll include them in the table
below, because we want to support as many types as possible. They are:
Type 5: DOS binary archive of some kind (binary)
Type 6: UUencoded UNIX file (text).
*/
struct {
char type_char;
int (*func)();
char *name;
} retrieval_method_tab[] = {
{'0', receive_gopher_text, "GOPHER_TEXT"},
{'4', receive_gopher_text, "GOPHER_TEXT"},
{'c', receive_gopher_text, "GOPHER_TEXT"},
{'e', receive_gopher_text, "GOPHER_TEXT"},
{'M', receive_gopher_text, "GOPHER_TEXT"},
{'9', receive_gopher_binary, "GOPHER_BINARY"},
{'g', receive_gopher_binary, "GOPHER_BINARY"},
{'s', receive_gopher_binary, "GOPHER_BINARY"},
{'5', receive_gopher_binary, "GOPHER_BINARY"},
{'6', receive_gopher_text, "GOPHER_TEXT" },
{'\0', NULL, NULL}
};
static
int
(*get_retrieval_method(int gophertype))() /* actually char. */
{
int i;
for (i = 0; retrieval_method_tab[i].type_char != '\0'; ++i) {
if (gophertype == retrieval_method_tab[i].type_char) {
TRACE(5,"Using retrieval method: %s\n",
retrieval_method_tab[i].name);
return retrieval_method_tab[i].func;
}
}
ERR("vcache: Can't retrieve document -- item type %c \
unsupported. %s", gophertype);
return NULL;
}