#!/bin/sh

# This is a *new* apt-proxy. It is based on ideas (and on some source
# snippletts) stolen from Rusty Russell. This proxy is released under
# the GNU General Public License Version 2 and may be freely redistri-
# buted and modified as you like it.
#
# However, this is alpha ware and not very well tested. Use it at your
# own risk!!! It may format your harddisks, make your CMOS battery
# explode, may give root access to script kiddies and it *also* may
# server nice Debian boxes to fetch .deb's:-)
#
# (C) 2001 by Jan-Benedict Glaw <jbglaw@lug-owll.de>
#

APT_PROXY_VERSION=0.1.0
#                   ^-- See? It's unstable!

APT_PROXY_LOGFILE=${1:-/dev/null}
# stdout and stderr to logfile (or /dev/null), fd3 to communicate
# with client.
exec 3>&1 1>>$APT_PROXY_LOGFILE 2>&1

CONFIG_FILE=/etc/ap2.conf

declare -a RSYNC_MATCH
declare -a RSYNC_BASEDIR
declare -a RSYNC_SOURCE

. $CONFIG_FILE



#
# log() logs the given strings into the global log file
#
# log Hello this is some Text
# log "Hello this" "is some " "Text"
#
function log() {
	echo "$$:(`date`)" "$@" >> $APT_PROXY_LOGFILE
}

#
# debug() logs the given strings into the global log file
# using log(). However, logging is only done if $DEBUG is
# set to "yes".
#
function debug() {
	if [ "$DEBUG" = "yes" ]; then
		log "$@"
	fi
}

#
# Writes a HTTP/1.0 header. First argument has to be the HTTP return code,
# all further arguments are treated as header lines. They mustn't shit all
# the \r\n cruft!!!
#
# write_header "404 Not Found" "X-Go-Away: Yes" "Content-Type: text/html"
#
function write_header() {
	(
		echo -ne "HTTP/1.0 $1\r\n"
		echo -ne "Date: `date -u -R`\r\n"
		echo -ne "Server: APT-Proxy2 $APT_PROXY_VERSION (C) GPLv2 by Jan-Benedict Glaw <jbglaw@lug-owl.de>\r\n"
		[ "$DEBUG" = yes ] && echo -ne "X-You-Requested: ${REQUEST}\r\n"
		[ "$DEBUG" = yes ] && which fortunes &> /dev/null && echo -ne "`fortunes | tr -d '\012\015'`\r\n"
		[ ! -z "${SIZE}" -a ! -z "${TRY_TO_KEEP_ALIVE}" ] && echo -ne "Connection: Keep-Alive\r\n"

		shift
		if [ $# -ne 0 ]; then
			for i in `seq 1 $#`; do
				echo -ne "$1\r\n"
				shift
			done
		fi
		echo -ne "\r\n"
#	) | dd >&3 2>/dev/null
	) >&3
}

#
# Display some HTML to show config error and exit() afterwards.
#
function bad_config_message() {
	write_header "404 Bad config" "Content-Type: text/html" "X-Info: Your not yet done request cannot be served..."
	exec >&3
	cat <<HERE
<HTML>
	<HEAD>
		<TITLE>
			HTTP Error 404 - Bad config
		</TITLE>
	</HEAD>
	<BODY>
		<H1>
			HTTP Error 404 - Bad config
		</H1>
		<P>
			$1 Sorry, I cannot proceed...
		</P>
		<HR>
		<SMALL>
			<CENTER>
				Now: `date`
			</CENTER>
		</SMALL>
	</BODY>
</HTML>
HERE
	exit 1
}

#
# Check config. If an error is found, it doesn't return!
#
function check_all_config() {
	if [ -z "${MAXSOURCE}" ]; then
		write_header "404 Bad config" "Content-Type: text/html" "X-Info: Your not yet done request cannot be served..."
		log "Bad config: \$MAXSOURCE not set"
		bad_config_message "The \$MAXSOURCE variable is not set."
	fi
	for i in `seq 0 "${MAXSOURCE}"`; do
		unset BADNESS
		echo "${RSYNC_MATCH[${i}]}" | grep '^/' &> /dev/null || BADNESS="\$RSYNC_MATCH[${i}] doesn't contain a leading '/'."
		echo "${RSYNC_MATCH[${i}]}" | grep '/$' &> /dev/null || BADNESS="\$RSYNC_MATCH[${i}] doesn't contain a trailing '/'."
		echo "${RSYNC_BASEDIR[${i}]}" | grep '/$' &> /dev/null || BADNESS="\$RSYNC_BASEDIR[${i}] doesn't contain a trailing '/'."
		[ -d "${RSYNC_BASEDIR[${i}]}" ] || BADNESS="\$RSYNC_BASEDIR[${i}] is not a valid directory."
		echo "${RSYNC_SOURCE[${i}]}" | grep '/$' &> /dev/null || BADNESS="\$RSYNC_SOURCE[${i}] doesn't contain a trailing '/'."
		echo "${RSYNC_SOURCE[${i}]}" | grep '::' &> /dev/null || BADNESS="\$RSYNC_SOURCE[${i}] doesn't contain '::' as a delimiter between rsync host and share."
		
		if [ ! -z "${BADNESS}" ]; then
			log "Bad config: ${BADNESS}"
			# bad_config_message() will exit()
			bad_config_message "${BADNESS}"
		fi
	done
}

#
# If there is something bad about $REQUEST, write a notice about this
# and break connection. Wo don't want to serve this client!
#
function check_for_insane_request() {
	unset BADNESS
	# ".." is bad. We don't want to leave the cage:-)
	echo "${REQUEST}" | grep '\.\.' &> /dev/null && BADNESS="Please don't try to send '..'!"
	# "`" is bad. We don't want to execute arbitrary commands,
	# because we're no CGI nor telnet server:-)
	echo "${REQUEST}" | grep '`' &> /dev/null && BADNESS="Please don't try to send '\`'!"
	# "%" is bad. We don't serve anything which needs special
	# encoding. The sane "_" is already filtered:-)
	echo "${REQUEST}" | grep '%' &> /dev/null && BADNESS="Please don't try to send '%'-encoded file names!"
	# "\" is bad. Backslashes are always bad.
	echo "${REQUEST}" | grep '\\' &> /dev/null && BADNESS="Please don't try to send '\\'!"
	# "//" is bad. Maybe someone wants to break out if we
	# cut one "/"...
	echo "${REQUEST}" | grep '//' &> /dev/null && BADNESS="Please don't send '//'!"
	# "/" is bad if it's at a request's end. We don't supply
	# directory indices...
	echo "${REQUEST}" | grep '/$' &> /dev/null && BADNESS="Please don't try to get a directory index by appending a '/'!"
	# "/" has to be at the beginning of a valid request.
	echo "${REQUEST}" | grep '^/' &> /dev/null || BADNESS="All requests need to start with '/'!"

	if [ ! -z "${BADNESS}" ]; then
		write_header "404 Not Found" "Content-Type: text/html" "X-Info: Your request $REQUEST is *bad*"
		log "Bad request: ${REQUEST}: ${BADNESS}"
		exec >&3
		cat <<HERE
<HTML>
	<HEAD>
		<TITLE>
			HTTP Error 404 - Not Found
		</TITLE>
	</HEAD>
	<BODY>
		<H1>
			HTTP Error 404 - Not Found
		</H1>
		<P>
			${BADNESS} I'm not to serve you...
		</P>
		<HR>
		<SMALL>
			<CENTER>
				Now: `date`
			</CENTER>
		</SMALL>
	</BODY>
</HTML>
HERE
		# Don't return if request was bad!
		exit 1
	fi
		
	return 0
}

function get_one_request() {
	# Read from stdin up to the empty line which indicates
	# End-Of-This_Request
	while read PREFIX LINE; do
		debug "Got line: ${PREFIX} ${LINE}"
		LINE="`echo "${LINE}" | tr -d '\012\015' | sed -e 's/%5[fF]/_/g'`"
		lc_PREFIX="`echo ${PREFIX} | tr -d '\012\015' | tr '[A-Z]' '[a-z]'`"
		lc_LINE="`echo ${LINE} | tr -d '\012\015' | tr '[A-Z]' '[a-z]'`"
		case "$lc_PREFIX" in
			get)
				REQUEST="`echo "${LINE}" | cut -f 1 -d ' '`"
				;;
			connection:)
				if [ "${lc_LINE}" = keep-alive ]; then
					TRY_TO_KEEP_ALIVE=yes
					log Got Keep-Alive request
				fi
				;;
			if-modified-since:)
				IMS_DATE="${LINE}"
				debug "Client requests IMS: ${IMS_DATE}"
				IMS_DATE=`date -u -d "${IMS_DATE}" '+%s'`
				;;
			'')
				debug Finished processing request
				break
				;;
			*)
				;;
		esac
	done

	if [ -z "${REQUEST}" ]; then
		debug "Finished with this client, no more work to do"
		exit 0
	fi

	# Does the request try to exploit this script?
	check_for_insane_request
}

#
# This function looks up an index to the source arrays for further
# processing. It's based on the RSYNC_MATCH[] array and compares
# $REQUEST to it.
#
function find_server() {
	[ -z ${RSYNC_MATCH[0]} ] && return 1

	for i in `seq 0 ${MAXSOURCE}`; do
		case "${REQUEST}" in
			${RSYNC_MATCH[$i]}*)
				MIRROR=$i
				return 0
				;;
		esac
	done

	# No useable mirror found:-(
	return 1
}

function check_for_changing_file() {
	case "${REQUEST}" in
		*/Packages)
			FILE_MIGHT_CHANGE=1
			;;
		*/Sources)
			FILE_MIGHT_CHANGE=1
			;;
		*/Release)
			FILE_MIGHT_CHANGE=1
			;;
		*/Packages\.gz)
			FILE_MIGHT_CHANGE=1
			;;
		*/Sources\.gz)
			FILE_MIGHT_CHANGE=1
			;;
		*/Release\.gz)
			FILE_MIGHT_CHANGE=1
			;;
		*)
			unset FILE_MIGHT_CHANGE
			;;
	esac
}

function set_relative_filename() {
	# Length of initial match
	LEN_OF_MATCH="`echo "${RSYNC_MATCH[${MIRROR}]}" | wc -c`"

	# Relative filename is old request minus the first
	# part, which is the match.
	FN_UNDER_BASEDIR="`echo "${REQUEST}" | cut -c ${LEN_OF_MATCH}-`"

	# Strip leading '/'
	if echo "${FN_UNDER_BASEDIR}" | grep '^/' &> /dev/null; then
		FN_UNDER_BASEDIR="`echo "${FN_UNDER_BASEDIR}" | cut -c 2-`"
	fi
}

function set_directories() {
	unset PART
	INDEX=0
	TEMPFILE="`tempfile`"

	# This part is tricky. The "for loop" is processed as if an own
	# subshell was started for it. So we can't simply set all those
	# variables. Workaround: we write all commands needed to set
	# all values into a temporary file and source this afterwards.

	if [ "`dirname "${FN_UNDER_BASEDIR}"`" = "." ]; then
		echo "DIRECTORY[0]=\"\"" >> ${TEMPFILE}
	else
		for i in `dirname "${FN_UNDER_BASEDIR}"| tr -s '/' ' '`; do
			if [ -z "${PART}" ]; then
				PART="${i}"
			else
				PART="${PART}/${i}"
			fi
			echo "DIRECTORY[${INDEX}]=\"${PART}\"" >> ${TEMPFILE}
			INDEX="$(expr ${INDEX} + 1)"
			# Now, zero out the following $DIRECTORY...
			echo "DIRECTORY[${INDEX}]=\"\"" >> ${TEMPFILE}
		done
	fi
	
	. "${TEMPFILE}"
	unset INDEX
	unset PART
	rm -f "${TEMPFILE}"
}

# Set all variables as needed. Pre-Requires: ${MIRROR} and ${REQUEST}
# and after function returnes, this variables will be set accordingly:
#
#	${FILE_MIGHT_CHANGE}	"yes" if file is special, unset if normal
#				file
#
#	${FN_DURING_DOWNLOAD}	eg. /home/ftp/pub/mirror/debian/pool/main/p/pppd/pppd-1.2.3_2.i386.deb.download
#				This file will get larger during download
#
#	${FN_UNDER_BASEDIR}	This is the path without leading '/' to the requested file within
#				${RSYNC_BASEDIR[$MIRROR]}, eg. pool/main/p/pppd/pppd-1.2.3_2.i386.deb
#
#	${DIRECTORY[]}		Array of all directories between
#				${RSYNC_BASEDIR[${MIRROR}]} and the relative
#				filename ${FN_UNDER_BASEDIR}, eg. "/pool",
#				"/pool/main", "/pool/main/p",
#				"/pool/main/p/pppd", ""
function set_variables() {
	# ${FILE_MIGHT_CHANGE}
	check_for_changing_file

	# ${FN_UNDER_BASEDIR}
	set_relative_filename

	# ${DIRECTORY[]}
	set_directories

	FULL_FILE_NAME=${RSYNC_BASEDIR[${MIRROR}]}${FN_UNDER_BASEDIR}
}

# Variable description:
# ~~~~~~~~~~~~~~~~~~~~~
# REQUEST		Holds the request after "one_more_request_there"
#
# IMS_DATE		Holds the If-Modified-Since date if requested
#
# SIZE			Holds file size if known. Note that we need to
#			close the connection (and to refuse a keep-alive)
#			if we don't know the content size!
#
# FILE_MIGHT_CHANGE	Gets set to != "" if the requested file is volatile
#
# MIRROR		Index value to RSYNC_MATCH[], RSYNC_BASEDIR[] and
#			RSYNC_SOURCE[]
#
while true; do
	unset REQUEST
	unset IMS_DATE
	unset SIZE
	unset FILE_MIGHT_CHANGE
	unset MIRROR
	unset TRY_TO_KEEP_ALIVE

	# check_all_config() will exit() on error!
	check_all_config
	
	# get_one_request() will exit() on EOF
	get_one_request
	
	log "Request is: ${REQUEST}"

	# Which array index to use? -> $MIRROR
	find_server
	if [ -z "${MIRROR}" ]; then
		SIZE=0
		write_header '404 Not Found' "X-Info: Didn't find a server for ${REQUEST}..." "Content-Size: ${SIZE}"
	else
		# Set all variables as needed
		set_variables
		if [ -z "${FILE_MIGHT_CHANGE}" -a -e "${FULL_FILE_NAME}" -a -f "${FULL_FILE_NAME}" ]; then
			# Simple file. Send it!
			SIZE="`echo \`wc -c < "${FULL_FILE_NAME}"\``"
			log "Request size is: $SIZE"
			LAST_MODIFIED_DATE="`date -u -R -r "${FULL_FILE_NAME}"`"
			write_header	"200 Here You have"		\
					"Content-Length: ${SIZE}"		\
					"Content-Type: text/plain"	\
					"Last-Modified: ${LAST_MODIFIED_DATE}"
			cat "${FULL_FILE_NAME}" >&3
		else
			# XXX Fetch file and stream it
			:
		fi
	fi

	# If size was unknown, close connection now...
	[ -z "${SIZE}" ] && exit 0
	[ -z "${TRY_TO_KEEP_ALIVE}" ] && exit 0
	#exit 0
done


