dslinux/user/wget/src ChangeLog Makefile.in alloca.c ansi2knr.c cmpt.c config.h.in connect.c connect.h convert.c convert.h cookies.c cookies.h ftp-basic.c ftp-ls.c ftp-opie.c ftp.c ftp.h gen-md5.c gen-md5.h gen_sslfunc.c gen_sslfunc.h getopt.c getopt.h gnu-md5.c gnu-md5.h hash.c hash.h headers.c headers.h host.c host.h html-parse.c html-parse.h html-url.c http.c init.c init.h log.c main.c mswindows.c mswindows.h netrc.c netrc.h options.h progress.c progress.h rbuf.c rbuf.h recur.c recur.h res.c res.h retr.c retr.h safe-ctype.c safe-ctype.h snprintf.c sysdep.h url.c url.h utils.c utils.h version.c wget.h
amadeus
dslinux_amadeus at user.in-berlin.de
Thu Aug 31 11:32:45 CEST 2006
Update of /cvsroot/dslinux/dslinux/user/wget/src
In directory antilope:/tmp/cvs-serv14346/user/wget/src
Added Files:
ChangeLog Makefile.in alloca.c ansi2knr.c cmpt.c config.h.in
connect.c connect.h convert.c convert.h cookies.c cookies.h
ftp-basic.c ftp-ls.c ftp-opie.c ftp.c ftp.h gen-md5.c
gen-md5.h gen_sslfunc.c gen_sslfunc.h getopt.c getopt.h
gnu-md5.c gnu-md5.h hash.c hash.h headers.c headers.h host.c
host.h html-parse.c html-parse.h html-url.c http.c init.c
init.h log.c main.c mswindows.c mswindows.h netrc.c netrc.h
options.h progress.c progress.h rbuf.c rbuf.h recur.c recur.h
res.c res.h retr.c retr.h safe-ctype.c safe-ctype.h snprintf.c
sysdep.h url.c url.h utils.c utils.h version.c wget.h
Log Message:
Add some more applications
--- NEW FILE: safe-ctype.c ---
/* <ctype.h> replacement macros.
Copyright (C) 2000 Free Software Foundation, Inc.
Contributed by Zack Weinberg <zackw at stanford.edu>.
This file is part of the libiberty library.
Libiberty is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
Libiberty is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with libiberty; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite
Boston, MA 02111-1307, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
/* This is a compatible replacement of the standard C library's <ctype.h>
with the following properties:
- Implements all isxxx() macros required by C99.
- Also implements some character classes useful when
parsing C-like languages.
- Does not change behavior depending on the current locale.
- Behaves properly for all values in the range of a signed or
unsigned char. */
#include <config.h>
#include <safe-ctype.h>
#include <stdio.h> /* for EOF */
/* Shorthand */
#define bl _sch_isblank
#define cn _sch_iscntrl
#define di _sch_isdigit
#define is _sch_isidst
#define lo _sch_islower
#define nv _sch_isnvsp
#define pn _sch_ispunct
#define pr _sch_isprint
#define sp _sch_isspace
#define up _sch_isupper
#define vs _sch_isvsp
#define xd _sch_isxdigit
/* Masks. */
#define L lo|is |pr /* lower case letter */
#define XL lo|is|xd|pr /* lowercase hex digit */
#define U up|is |pr /* upper case letter */
#define XU up|is|xd|pr /* uppercase hex digit */
#define D di |xd|pr /* decimal digit */
#define P pn |pr /* punctuation */
#define _ pn|is |pr /* underscore */
#define C cn /* control character */
#define Z nv |cn /* NUL */
#define M nv|sp |cn /* cursor movement: \f \v */
#define V vs|sp |cn /* vertical space: \r \n */
#define T nv|sp|bl|cn /* tab */
#define S nv|sp|bl|pr /* space */
/* Are we ASCII? */
#if '\n' == 0x0A && ' ' == 0x20 && '0' == 0x30 \
&& 'A' == 0x41 && 'a' == 0x61 && '!' == 0x21 \
&& EOF == -1
const unsigned short _sch_istable[256] =
{
Z, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */
C, T, V, M, M, V, C, C, /* BS HT LF VT FF CR SO SI */
C, C, C, C, C, C, C, C, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
C, C, C, C, C, C, C, C, /* CAN EM SUB ESC FS GS RS US */
S, P, P, P, P, P, P, P, /* SP ! " # $ % & ' */
P, P, P, P, P, P, P, P, /* ( ) * + , - . / */
D, D, D, D, D, D, D, D, /* 0 1 2 3 4 5 6 7 */
D, D, P, P, P, P, P, P, /* 8 9 : ; < = > ? */
P, XU, XU, XU, XU, XU, XU, U, /* @ A B C D E F G */
U, U, U, U, U, U, U, U, /* H I J K L M N O */
U, U, U, U, U, U, U, U, /* P Q R S T U V W */
U, U, U, P, P, P, P, _, /* X Y Z [ \ ] ^ _ */
P, XL, XL, XL, XL, XL, XL, L, /* ` a b c d e f g */
L, L, L, L, L, L, L, L, /* h i j k l m n o */
L, L, L, L, L, L, L, L, /* p q r s t u v w */
L, L, L, P, P, P, P, C, /* x y z { | } ~ DEL */
/* high half of unsigned char is locale-specific, so all tests are
false in "C" locale */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const unsigned char _sch_tolower[256] =
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64,
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
91, 92, 93, 94, 95, 96,
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
123,124,125,126,127,
128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143,
144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159,
160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175,
176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191,
192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207,
208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223,
224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239,
240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
};
const unsigned char _sch_toupper[256] =
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64,
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
91, 92, 93, 94, 95, 96,
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
123,124,125,126,127,
128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143,
144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159,
160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175,
176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191,
192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207,
208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223,
224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239,
240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255,
};
#else
#error "Unsupported host character set"
#endif /* not ASCII */
--- NEW FILE: rbuf.h ---
/* Declarations for rbuf.c.
Copyright (C) 1998 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef RBUF_H
#define RBUF_H
#ifdef HAVE_SSL
# include <openssl/ssl.h>
#endif
/* Retrieval stream */
struct rbuf
{
int fd;
#ifdef HAVE_SSL
SSL *ssl; /* the ssl structure -- replaces fd for ssl connections */
#endif /* HAVE_SSL */
char buffer[4096]; /* the input buffer */
char *buffer_pos; /* current position in the buffer */
size_t buffer_left; /* number of bytes left in the buffer:
buffer_left = buffer_end - buffer_pos */
int internal_dont_touch_this; /* used by RBUF_READCHAR macro */
};
/* Read a character from RBUF. If there is anything in the buffer,
the character is returned from the buffer. Otherwise, refill the
buffer and return the first character.
The return value is the same as with read(2). On buffered read,
the function returns 1.
#### That return value is totally screwed up, and is a direct
result of historical implementation of header code. The macro
should return the character or EOF, and in case of error store it
to rbuf->err or something. */
#define RBUF_READCHAR(rbuf, store) \
((rbuf)->buffer_left \
? (--(rbuf)->buffer_left, \
*((char *) (store)) = *(rbuf)->buffer_pos++, 1) \
: ((rbuf)->buffer_pos = (rbuf)->buffer, \
((((rbuf)->internal_dont_touch_this \
= rbuf_read_bufferful (rbuf)) <= 0) \
? (rbuf)->internal_dont_touch_this \
: ((rbuf)->buffer_left = (rbuf)->internal_dont_touch_this - 1, \
*((char *) (store)) = *(rbuf)->buffer_pos++, \
1))))
/* Return the file descriptor of RBUF. */
#define RBUF_FD(rbuf) ((rbuf)->fd)
/* Return the file descriptor of RBUF. */
#define RBUF_SSL(rbuf) ((rbuf)->ssl)
/* Function declarations */
void rbuf_initialize PARAMS ((struct rbuf *, int));
int rbuf_initialized_p PARAMS ((struct rbuf *));
void rbuf_uninitialize PARAMS ((struct rbuf *));
int rbuf_readchar PARAMS ((struct rbuf *, char *));
int rbuf_peek PARAMS ((struct rbuf *, char *));
int rbuf_flush PARAMS ((struct rbuf *, char *, int));
void rbuf_discard PARAMS ((struct rbuf *));
/* Internal, but used by the macro. */
int rbuf_read_bufferful PARAMS ((struct rbuf *));
#endif /* RBUF_H */
--- NEW FILE: Makefile.in ---
# Makefile for `wget' utility
# Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# In addition, as a special exception, the Free Software Foundation
# gives permission to link the code of its release of Wget with the
# OpenSSL project's "OpenSSL" library (or with modified versions of it
# that use the same license as the "OpenSSL" library), and distribute
# the linked executables. You must obey the GNU General Public License
# in all respects for all of the code used other than "OpenSSL". If you
# modify this file, you may extend this exception to your version of the
# file, but you are not obligated to do so. If you do not wish to do
# so, delete this exception statement from your version.
#
# Version: @VERSION@
#
FLTFLAGS += -s 20000
SHELL = /bin/sh
top_builddir = ..
top_srcdir = @top_srcdir@
srcdir = @srcdir@
VPATH = @srcdir@
ANSI2KNR = @ANSI2KNR@
o = . at U@o
prefix = @prefix@
exec_prefix = @exec_prefix@
bindir = @bindir@
sysconfdir = @sysconfdir@
localedir = $(prefix)/share/locale
DESTDIR =
CC = @CC@
CPPFLAGS = @CPPFLAGS@
# The following line is losing on some versions of make!
DEFS = @DEFS@ -DSYSTEM_WGETRC=\"$(sysconfdir)/wgetrc\" -DLOCALEDIR=\"$(localedir)\"
CFLAGS = @CFLAGS@
LDFLAGS = @LDFLAGS@
LIBS = @LIBS@
exeext = @exeext@
INCLUDES = -I. -I$(srcdir) @SSL_INCLUDES@
COMPILE = $(CC) $(INCLUDES) $(CPPFLAGS) $(DEFS) $(CFLAGS)
LINK = @LIBTOOL@ --mode=link $(CC) $(CFLAGS) $(LDFLAGS) -o $@
INSTALL = @INSTALL@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
RM = rm -f
ETAGS = etags
# Conditional compiles
ALLOCA = @ALLOCA@
MD5_OBJ = @MD5_OBJ@
OPIE_OBJ = @OPIE_OBJ@
SSL_OBJ = @SSL_OBJ@
GETOPT_OBJ = @GETOPT_OBJ@
OBJ = $(ALLOCA) cmpt$o connect$o convert$o cookies$o \
ftp$o ftp-basic$o ftp-ls$o $(OPIE_OBJ) $(GETOPT_OBJ) hash$o \
headers$o host$o html-parse$o html-url$o http$o init$o \
log$o main$o $(MD5_OBJ) netrc$o progress$o rbuf$o recur$o \
res$o retr$o safe-ctype$o snprintf$o $(SSL_OBJ) url$o \
utils$o version$o
.SUFFIXES:
.SUFFIXES: .c .o ._c ._o
.c.o:
$(COMPILE) -c $<
.c._c: $(ANSI2KNR)
$(ANSI2KNR) $< > $*.tmp && mv $*.tmp $@
._c._o:
@echo $(COMPILE) -c $<
@rm -f _$*.c
@ln $< _$*.c && $(COMPILE) -c _$*.c && mv _$*.o $@ && rm _$*.c
.c._o: $(ANSI2KNR)
$(ANSI2KNR) $< > $*.tmp && mv $*.tmp $*._c
@echo $(COMPILE) -c $*._c
@rm -f _$*.c
@ln $*._c _$*.c && $(COMPILE) -c _$*.c && mv _$*.o $@ && rm _$*.c
# Dependencies for building
wget$(exeext): $(OBJ)
$(LINK) $(OBJ) $(LIBS)
ansi2knr: ansi2knr.o
$(CC) -o ansi2knr ansi2knr.o $(LIBS)
$(OBJ): $(ANSI2KNR)
#
# Dependencies for installing
#
install: install.bin
uninstall: uninstall.bin
install.bin: wget$(exeext)
$(top_srcdir)/mkinstalldirs $(DESTDIR)$(bindir)
$(INSTALL_PROGRAM) wget$(exeext) $(DESTDIR)$(bindir)/wget$(exeext)
uninstall.bin:
$(RM) $(DESTDIR)$(bindir)/wget$(exeext)
#
# Dependencies for cleanup
#
clean:
$(RM) *.o wget$(exeext) *~ *.bak core core.[0-9]* $(ANSI2KNR) *._o *._c
$(RM) -r .libs
distclean: clean
$(RM) Makefile config.h
realclean: distclean
$(RM) TAGS
#
# Dependencies for maintenance
#
subdir = src
Makefile: Makefile.in ../config.status
cd .. && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= ./config.status
TAGS: *.c *.h
-$(ETAGS) *.c *.h
# DO NOT DELETE THIS LINE -- make depend depends on it.
alloca$o:
cmpt$o: wget.h sysdep.h options.h safe-ctype.h
connect$o: wget.h sysdep.h options.h safe-ctype.h utils.h connect.h host.h
convert$o: wget.h convert.h url.h recur.h utils.h hash.h
cookies$o: wget.h sysdep.h options.h safe-ctype.h cookies.h hash.h url.h utils.h
ftp-basic$o: wget.h sysdep.h options.h safe-ctype.h utils.h rbuf.h connect.h \
host.h ftp.h
ftp-ls$o: wget.h sysdep.h options.h safe-ctype.h utils.h ftp.h rbuf.h host.h \
url.h
ftp-opie$o: wget.h sysdep.h options.h safe-ctype.h gen-md5.h
ftp$o: wget.h sysdep.h options.h safe-ctype.h utils.h url.h rbuf.h retr.h \
ftp.h host.h connect.h netrc.h
gen-md5$o: wget.h sysdep.h options.h safe-ctype.h gen-md5.h
gen_sslfunc$o: wget.h sysdep.h options.h safe-ctype.h utils.h connect.h host.h \
url.h
getopt$o: wget.h sysdep.h options.h safe-ctype.h getopt.h
gnu-md5$o: wget.h sysdep.h options.h safe-ctype.h gnu-md5.h
hash$o: wget.h sysdep.h options.h safe-ctype.h utils.h hash.h
headers$o: wget.h sysdep.h options.h safe-ctype.h connect.h host.h rbuf.h \
headers.h
host$o: wget.h sysdep.h options.h safe-ctype.h utils.h host.h url.h hash.h
html-parse$o: wget.h sysdep.h options.h safe-ctype.h html-parse.h
html-url$o: wget.h sysdep.h options.h safe-ctype.h html-parse.h url.h utils.h
http$o: wget.h sysdep.h options.h safe-ctype.h utils.h url.h host.h rbuf.h \
retr.h headers.h connect.h host.h netrc.h gen_sslfunc.h \
cookies.h gen-md5.h
init$o: wget.h sysdep.h options.h safe-ctype.h utils.h init.h host.h recur.h \
netrc.h cookies.h progress.h
log$o: wget.h sysdep.h options.h safe-ctype.h utils.h
main$o: wget.h sysdep.h options.h safe-ctype.h utils.h init.h retr.h rbuf.h \
recur.h host.h cookies.h url.h progress.h gen_sslfunc.h getopt.h
gnu-md5$o: wget.h sysdep.h options.h safe-ctype.h gnu-md5.h
mswindows$o: wget.h sysdep.h options.h safe-ctype.h utils.h url.h
netrc$o: wget.h sysdep.h options.h safe-ctype.h utils.h netrc.h init.h
progress$o: wget.h sysdep.h options.h safe-ctype.h progress.h utils.h retr.h \
rbuf.h
rbuf$o: wget.h sysdep.h options.h safe-ctype.h rbuf.h connect.h host.h \
gen_sslfunc.h
recur$o: wget.h sysdep.h options.h safe-ctype.h url.h recur.h utils.h retr.h \
rbuf.h ftp.h host.h hash.h
res$o: wget.h sysdep.h options.h safe-ctype.h utils.h hash.h url.h retr.h res.h
retr$o: wget.h sysdep.h options.h safe-ctype.h utils.h retr.h rbuf.h url.h \
recur.h ftp.h host.h connect.h hash.h
snprintf$o: safe-ctype.h
safe-ctype$o: safe-ctype.h
url$o: wget.h sysdep.h options.h safe-ctype.h utils.h url.h host.h hash.h
utils$o: wget.h sysdep.h options.h safe-ctype.h utils.h hash.h
version$o:
--- NEW FILE: safe-ctype.h ---
/* <ctype.h> replacement macros.
Copyright (C) 2000 Free Software Foundation, Inc.
Contributed by Zack Weinberg <zackw at stanford.edu>.
This file is part of the libiberty library.
Libiberty is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
Libiberty is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with libiberty; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite
Boston, MA 02111-1307, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
/* This is a compatible replacement of the standard C library's <ctype.h>
with the following properties:
- Implements all isxxx() macros required by C99.
- Also implements some character classes useful when
parsing C-like languages.
- Does not change behavior depending on the current locale.
- Behaves properly for all values in the range of a signed or
unsigned char.
To avoid conflicts, this header defines the isxxx functions in upper
case, e.g. ISALPHA not isalpha. */
#ifndef SAFE_CTYPE_H
#define SAFE_CTYPE_H
/* Catch erroneous use of ctype macros. Files that really know what
they're doing can disable this check by defining the
I_REALLY_WANT_CTYPE_MACROS preprocessor constant. */
#ifndef I_REALLY_WANT_CTYPE_MACROS
/* We used to #define these to errors, but that loses when real
ctype.h is included, usually by a library's (OpenSSL's) header
which gets #included after wget.h. */
#undef isalpha
#undef isalnum
#undef isblank
#undef iscntrl
#undef isdigit
#undef isgraph
#undef islower
#undef isprint
#undef ispunct
#undef isspace
#undef isupper
#undef isxdigit
#endif /* I_REALLY_WANT_CTYPE_MACROS */
/* Categories. */
enum {
/* In C99 */
_sch_isblank = 0x0001, /* space \t */
_sch_iscntrl = 0x0002, /* nonprinting characters */
_sch_isdigit = 0x0004, /* 0-9 */
_sch_islower = 0x0008, /* a-z */
_sch_isprint = 0x0010, /* any printing character including ' ' */
_sch_ispunct = 0x0020, /* all punctuation */
_sch_isspace = 0x0040, /* space \t \n \r \f \v */
_sch_isupper = 0x0080, /* A-Z */
_sch_isxdigit = 0x0100, /* 0-9A-Fa-f */
/* Extra categories useful to cpplib. */
_sch_isidst = 0x0200, /* A-Za-z_ */
_sch_isvsp = 0x0400, /* \n \r */
_sch_isnvsp = 0x0800, /* space \t \f \v \0 */
/* Combinations of the above. */
_sch_isalpha = _sch_isupper|_sch_islower, /* A-Za-z */
_sch_isalnum = _sch_isalpha|_sch_isdigit, /* A-Za-z0-9 */
_sch_isidnum = _sch_isidst|_sch_isdigit, /* A-Za-z0-9_ */
_sch_isgraph = _sch_isalnum|_sch_ispunct, /* isprint and not space */
_sch_iscppsp = _sch_isvsp|_sch_isnvsp /* isspace + \0 */
};
/* Character classification. */
extern const unsigned short _sch_istable[256];
#define _sch_test(c, bit) (_sch_istable[(c) & 0xff] & (unsigned short)(bit))
#define ISALPHA(c) _sch_test(c, _sch_isalpha)
#define ISALNUM(c) _sch_test(c, _sch_isalnum)
#define ISBLANK(c) _sch_test(c, _sch_isblank)
#define ISCNTRL(c) _sch_test(c, _sch_iscntrl)
#define ISDIGIT(c) _sch_test(c, _sch_isdigit)
#define ISGRAPH(c) _sch_test(c, _sch_isgraph)
#define ISLOWER(c) _sch_test(c, _sch_islower)
#define ISPRINT(c) _sch_test(c, _sch_isprint)
#define ISPUNCT(c) _sch_test(c, _sch_ispunct)
#define ISSPACE(c) _sch_test(c, _sch_isspace)
#define ISUPPER(c) _sch_test(c, _sch_isupper)
#define ISXDIGIT(c) _sch_test(c, _sch_isxdigit)
#define ISIDNUM(c) _sch_test(c, _sch_isidnum)
#define ISIDST(c) _sch_test(c, _sch_isidst)
#define IS_VSPACE(c) _sch_test(c, _sch_isvsp)
#define IS_NVSPACE(c) _sch_test(c, _sch_isnvsp)
#define IS_SPACE_OR_NUL(c) _sch_test(c, _sch_iscppsp)
/* Character transformation. */
extern const unsigned char _sch_toupper[256];
extern const unsigned char _sch_tolower[256];
#define TOUPPER(c) _sch_toupper[(c) & 0xff]
#define TOLOWER(c) _sch_tolower[(c) & 0xff]
#endif /* SAFE_CTYPE_H */
--- NEW FILE: ftp-ls.c ---
/* Parsing FTP `ls' output.
Copyright (C) 1995, 1996, 1997, 2000, 2001
Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
#include <sys/types.h>
#include <errno.h>
#include "wget.h"
#include "utils.h"
#include "ftp.h"
#include "url.h"
/* Converts symbolic permissions to number-style ones, e.g. string
rwxr-xr-x to 755. For now, it knows nothing of
setuid/setgid/sticky. ACLs are ignored. */
static int
symperms (const char *s)
{
int perms = 0, i;
if (strlen (s) < 9)
return 0;
for (i = 0; i < 3; i++, s += 3)
{
perms <<= 3;
perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
(s[2] == 'x' || s[2] == 's'));
}
return perms;
}
/* Cleans a line of text so that it can be consistently parsed. Destroys
<CR> and <LF> in case that thay occur at the end of the line and
replaces all <TAB> character with <SPACE>. Returns the length of the
modified line. */
static int
clean_line(char *line)
{
int len = strlen (line);
if (!len) return 0;
if (line[len - 1] == '\n')
line[--len] = '\0';
if (line[len - 1] == '\r')
line[--len] = '\0';
for ( ; *line ; line++ ) if (*line == '\t') *line = ' ';
return len;
}
/* Convert the Un*x-ish style directory listing stored in FILE to a
linked list of fileinfo (system-independent) entries. The contents
of FILE are considered to be produced by the standard Unix `ls -la'
output (whatever that might be). BSD (no group) and SYSV (with
group) listings are handled.
The time stamps are stored in a separate variable, time_t
compatible (I hope). The timezones are ignored. */
static struct fileinfo *
ftp_parse_unix_ls (const char *file, int ignore_perms)
{
FILE *fp;
static const char *months[] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
int next, len, i, error, ignore;
int year, month, day; /* for time analysis */
int hour, min, sec;
struct tm timestruct, *tnow;
time_t timenow;
char *line, *tok; /* tokenizer */
struct fileinfo *dir, *l, cur; /* list creation */
fp = fopen (file, "rb");
if (!fp)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
return NULL;
}
dir = l = NULL;
/* Line loop to end of file: */
while ((line = read_whole_line (fp)))
{
len = clean_line (line);
/* Skip if total... */
if (!strncasecmp (line, "total", 5))
{
xfree (line);
continue;
}
/* Get the first token (permissions). */
tok = strtok (line, " ");
if (!tok)
{
xfree (line);
continue;
}
cur.name = NULL;
cur.linkto = NULL;
/* Decide whether we deal with a file or a directory. */
switch (*tok)
{
case '-':
cur.type = FT_PLAINFILE;
DEBUGP (("PLAINFILE; "));
break;
case 'd':
cur.type = FT_DIRECTORY;
DEBUGP (("DIRECTORY; "));
break;
case 'l':
cur.type = FT_SYMLINK;
DEBUGP (("SYMLINK; "));
break;
default:
cur.type = FT_UNKNOWN;
DEBUGP (("UNKNOWN; "));
break;
}
if (ignore_perms)
{
switch (cur.type)
{
case FT_PLAINFILE:
cur.perms = 0644;
break;
case FT_DIRECTORY:
cur.perms = 0755;
break;
default:
/*cur.perms = 1023;*/ /* #### What is this? --hniksic */
cur.perms = 0644;
}
DEBUGP (("implicit perms %0o; ", cur.perms));
}
else
{
cur.perms = symperms (tok + 1);
DEBUGP (("perms %0o; ", cur.perms));
}
error = ignore = 0; /* Erroneous and ignoring entries are
treated equally for now. */
year = hour = min = sec = 0; /* Silence the compiler. */
month = day = 0;
next = -1;
/* While there are tokens on the line, parse them. Next is the
number of tokens left until the filename.
Use the month-name token as the "anchor" (the place where the
position wrt the file name is "known"). When a month name is
encountered, `next' is set to 5. Also, the preceding
characters are parsed to get the file size.
This tactic is quite dubious when it comes to
internationalization issues (non-English month names), but it
works for now. */
while ((tok = strtok (NULL, " ")))
{
--next;
if (next < 0) /* a month name was not encountered */
{
for (i = 0; i < 12; i++)
if (!strcmp (tok, months[i]))
break;
/* If we got a month, it means the token before it is the
size, and the filename is three tokens away. */
if (i != 12)
{
char *t = tok - 2;
long mul = 1;
for (cur.size = 0; t > line && ISDIGIT (*t); mul *= 10, t--)
cur.size += mul * (*t - '0');
if (t == line)
{
/* Something is seriously wrong. */
error = 1;
break;
}
month = i;
next = 5;
DEBUGP (("month: %s; ", months[month]));
}
}
else if (next == 4) /* days */
{
if (tok[1]) /* two-digit... */
day = 10 * (*tok - '0') + tok[1] - '0';
else /* ...or one-digit */
day = *tok - '0';
DEBUGP (("day: %d; ", day));
}
else if (next == 3)
{
/* This ought to be either the time, or the year. Let's
be flexible!
If we have a number x, it's a year. If we have x:y,
it's hours and minutes. If we have x:y:z, z are
seconds. */
year = 0;
min = hour = sec = 0;
/* We must deal with digits. */
if (ISDIGIT (*tok))
{
/* Suppose it's year. */
for (; ISDIGIT (*tok); tok++)
year = (*tok - '0') + 10 * year;
if (*tok == ':')
{
/* This means these were hours! */
hour = year;
year = 0;
++tok;
/* Get the minutes... */
for (; ISDIGIT (*tok); tok++)
min = (*tok - '0') + 10 * min;
if (*tok == ':')
{
/* ...and the seconds. */
++tok;
for (; ISDIGIT (*tok); tok++)
sec = (*tok - '0') + 10 * sec;
}
}
}
if (year)
DEBUGP (("year: %d (no tm); ", year));
else
DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
}
else if (next == 2) /* The file name */
{
int fnlen;
char *p;
/* Since the file name may contain a SPC, it is possible
for strtok to handle it wrong. */
fnlen = strlen (tok);
if (fnlen < len - (tok - line))
{
/* So we have a SPC in the file name. Restore the
original. */
tok[fnlen] = ' ';
/* If the file is a symbolic link, it should have a
` -> ' somewhere. */
if (cur.type == FT_SYMLINK)
{
p = strstr (tok, " -> ");
if (!p)
{
error = 1;
break;
}
cur.linkto = xstrdup (p + 4);
DEBUGP (("link to: %s\n", cur.linkto));
/* And separate it from the file name. */
*p = '\0';
}
}
/* If we have the filename, add it to the list of files or
directories. */
/* "." and ".." are an exception! */
if (!strcmp (tok, ".") || !strcmp (tok, ".."))
{
DEBUGP (("\nIgnoring `.' and `..'; "));
ignore = 1;
break;
}
/* Some FTP sites choose to have ls -F as their default
LIST output, which marks the symlinks with a trailing
`@', directory names with a trailing `/' and
executables with a trailing `*'. This is no problem
unless encountering a symbolic link ending with `@',
or an executable ending with `*' on a server without
default -F output. I believe these cases are very
rare. */
fnlen = strlen (tok); /* re-calculate `fnlen' */
cur.name = (char *)xmalloc (fnlen + 1);
memcpy (cur.name, tok, fnlen + 1);
if (fnlen)
{
if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
{
cur.name[fnlen - 1] = '\0';
DEBUGP (("trailing `/' on dir.\n"));
}
else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
{
cur.name[fnlen - 1] = '\0';
DEBUGP (("trailing `@' on link.\n"));
}
else if (cur.type == FT_PLAINFILE
&& (cur.perms & 0111)
&& cur.name[fnlen - 1] == '*')
{
cur.name[fnlen - 1] = '\0';
DEBUGP (("trailing `*' on exec.\n"));
}
} /* if (fnlen) */
else
error = 1;
break;
}
else
abort ();
} /* while */
if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
error = 1;
DEBUGP (("\n"));
if (error || ignore)
{
DEBUGP (("Skipping.\n"));
FREE_MAYBE (cur.name);
FREE_MAYBE (cur.linkto);
xfree (line);
continue;
}
if (!dir)
{
l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
memcpy (l, &cur, sizeof (cur));
l->prev = l->next = NULL;
}
else
{
cur.prev = l;
l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
l = l->next;
memcpy (l, &cur, sizeof (cur));
l->next = NULL;
}
/* Get the current time. */
timenow = time (NULL);
tnow = localtime (&timenow);
/* Build the time-stamp (the idea by zaga at fly.cc.fer.hr). */
timestruct.tm_sec = sec;
timestruct.tm_min = min;
timestruct.tm_hour = hour;
timestruct.tm_mday = day;
timestruct.tm_mon = month;
if (year == 0)
{
/* Some listings will not specify the year if it is "obvious"
that the file was from the previous year. E.g. if today
is 97-01-12, and you see a file of Dec 15th, its year is
1996, not 1997. Thanks to Vladimir Volovich for
mentioning this! */
if (month > tnow->tm_mon)
timestruct.tm_year = tnow->tm_year - 1;
else
timestruct.tm_year = tnow->tm_year;
}
else
timestruct.tm_year = year;
if (timestruct.tm_year >= 1900)
timestruct.tm_year -= 1900;
timestruct.tm_wday = 0;
timestruct.tm_yday = 0;
timestruct.tm_isdst = -1;
l->tstamp = mktime (×truct); /* store the time-stamp */
xfree (line);
}
fclose (fp);
return dir;
}
static struct fileinfo *
ftp_parse_winnt_ls (const char *file)
{
FILE *fp;
int len;
int year, month, day; /* for time analysis */
int hour, min;
struct tm timestruct;
char *line, *tok; /* tokenizer */
struct fileinfo *dir, *l, cur; /* list creation */
fp = fopen (file, "rb");
if (!fp)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
return NULL;
}
dir = l = NULL;
/* Line loop to end of file: */
while ((line = read_whole_line (fp)))
{
len = clean_line (line);
/* Extracting name is a bit of black magic and we have to do it
before `strtok' inserted extra \0 characters in the line
string. For the moment let us just suppose that the name starts at
column 39 of the listing. This way we could also recognize
filenames that begin with a series of space characters (but who
really wants to use such filenames anyway?). */
if (len < 40) continue;
tok = line + 39;
cur.name = xstrdup(tok);
DEBUGP(("Name: '%s'\n", cur.name));
/* First column: mm-dd-yy. Should atoi() on the month fail, january
will be assumed. */
tok = strtok(line, "-");
month = atoi(tok) - 1;
if (month < 0) month = 0;
tok = strtok(NULL, "-");
day = atoi(tok);
tok = strtok(NULL, " ");
year = atoi(tok);
/* Assuming the epoch starting at 1.1.1970 */
if (year <= 70) year += 100;
/* Second column: hh:mm[AP]M, listing does not contain value for
seconds */
tok = strtok(NULL, ":");
hour = atoi(tok);
tok = strtok(NULL, "M");
min = atoi(tok);
/* Adjust hour from AM/PM. Just for the record, the sequence goes
11:00AM, 12:00PM, 01:00PM ... 11:00PM, 12:00AM, 01:00AM . */
tok+=2;
if (hour == 12) hour = 0;
if (*tok == 'P') hour += 12;
DEBUGP(("YYYY/MM/DD HH:MM - %d/%02d/%02d %02d:%02d\n",
year+1900, month, day, hour, min));
/* Build the time-stamp (copy & paste from above) */
timestruct.tm_sec = 0;
timestruct.tm_min = min;
timestruct.tm_hour = hour;
timestruct.tm_mday = day;
timestruct.tm_mon = month;
timestruct.tm_year = year;
timestruct.tm_wday = 0;
timestruct.tm_yday = 0;
timestruct.tm_isdst = -1;
cur.tstamp = mktime (×truct); /* store the time-stamp */
DEBUGP(("Timestamp: %ld\n", cur.tstamp));
/* Third column: Either file length, or <DIR>. We also set the
permissions (guessed as 0644 for plain files and 0755 for
directories as the listing does not give us a clue) and filetype
here. */
tok = strtok(NULL, " ");
while (*tok == '\0') tok = strtok(NULL, " ");
if (*tok == '<')
{
cur.type = FT_DIRECTORY;
cur.size = 0;
cur.perms = 0755;
DEBUGP(("Directory\n"));
}
else
{
cur.type = FT_PLAINFILE;
cur.size = atoi(tok);
cur.perms = 0644;
DEBUGP(("File, size %ld bytes\n", cur.size));
}
cur.linkto = NULL;
/* And put everything into the linked list */
if (!dir)
{
l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
memcpy (l, &cur, sizeof (cur));
l->prev = l->next = NULL;
}
else
{
cur.prev = l;
l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
l = l->next;
memcpy (l, &cur, sizeof (cur));
l->next = NULL;
}
xfree(line);
}
fclose(fp);
return dir;
}
/* Converts VMS symbolic permissions to number-style ones, e.g. string
RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W"
(write). Inspired by a patch of Stoyan Lekov <lekov at eda.bg>. */
static int
vmsperms (const char *s)
{
int perms = 0;
do
{
switch (*s) {
case ',': perms <<= 3; break;
case 'R': perms |= 4; break;
case 'W': perms |= 2; break;
case 'D': perms |= 2; break;
case 'E': perms |= 1; break;
default: DEBUGP(("wrong VMS permissons!\n"));
}
}
while (*++s);
return perms;
}
static struct fileinfo *
ftp_parse_vms_ls (const char *file)
{
FILE *fp;
/* #### A third copy of more-or-less the same array ? */
static const char *months[] = {
"JAN", "FEB", "MAR", "APR", "MAY", "JUN",
"JUL", "AUG", "SEP", "OCT", "NOV", "DEC"
};
int i;
int year, month, day; /* for time analysis */
int hour, min, sec;
struct tm timestruct;
char *line, *tok; /* tokenizer */
struct fileinfo *dir, *l, cur; /* list creation */
fp = fopen (file, "rb");
if (!fp)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
return NULL;
}
dir = l = NULL;
/* Skip empty line. */
line = read_whole_line (fp);
if (line)
xfree (line);
/* Skip "Directory PUB$DEVICE[PUB]" */
line = read_whole_line (fp);
if (line)
xfree (line);
/* Skip empty line. */
line = read_whole_line (fp);
if (line)
xfree (line);
/* Line loop to end of file: */
while ((line = read_whole_line (fp)))
{
char *p;
i = clean_line (line);
if (!i)
{
xfree (line);
break;
}
/* First column: Name. A bit of black magic again. The name my be
either ABCD.EXT or ABCD.EXT;NUM and it might be on a separate
line. Therefore we will first try to get the complete name
until the first space character; if it fails, we assume that the name
occupies the whole line. After that we search for the version
separator ";", we remove it and check the extension of the file;
extension .DIR denotes directory. */
tok = strtok(line, " ");
if (tok == NULL) tok = line;
DEBUGP(("file name: '%s'\n", tok));
for (p = tok ; *p && *p != ';' ; p++);
if (*p == ';') *p = '\0';
p = tok + strlen(tok) - 4;
if (!strcmp(p, ".DIR")) *p = '\0';
cur.name = xstrdup(tok);
DEBUGP(("Name: '%s'\n", cur.name));
/* If the name ends on .DIR or .DIR;#, it's a directory. We also set
the file size to zero as the listing does tell us only the size in
filesystem blocks - for an integrity check (when mirroring, for
example) we would need the size in bytes. */
if (! *p)
{
cur.type = FT_DIRECTORY;
cur.size = 0;
DEBUGP(("Directory\n"));
}
else
{
cur.type = FT_PLAINFILE;
DEBUGP(("File\n"));
}
cur.size = 0;
/* Second column, if exists, or the first column of the next line
contain file size in blocks. We will skip it. */
tok = strtok(NULL, " ");
if (tok == NULL)
{
DEBUGP(("Getting additional line\n"));
xfree (line);
line = read_whole_line (fp);
if (!line)
{
DEBUGP(("empty line read, leaving listing parser\n"));
break;
}
i = clean_line (line);
if (!i)
{
DEBUGP(("confusing VMS listing item, leaving listing parser\n"));
xfree (line);
break;
}
tok = strtok(line, " ");
}
DEBUGP(("second token: '%s'\n", tok));
/* Third/Second column: Date DD-MMM-YYYY. */
tok = strtok(NULL, "-");
DEBUGP(("day: '%s'\n",tok));
day = atoi(tok);
tok = strtok(NULL, "-");
if (!tok)
{
/* If the server produces garbage like
'EA95_0PS.GZ;1 No privilege for attempted operation'
the first strtok(NULL, "-") will return everything until the end
of the line and only the next strtok() call will return NULL. */
DEBUGP(("nonsense in VMS listing, skipping this line\n"));
xfree (line);
break;
}
for (i=0; i<12; i++) if (!strcmp(tok,months[i])) break;
/* Uknown months are mapped to January */
month = i % 12 ;
tok = strtok (NULL, " ");
year = atoi (tok) - 1900;
DEBUGP(("date parsed\n"));
/* Fourth/Third column: Time hh:mm[:ss] */
tok = strtok (NULL, " ");
hour = min = sec = 0;
p = tok;
hour = atoi (p);
for (; *p && *p != ':'; ++p);
if (*p)
min = atoi (++p);
for (; *p && *p != ':'; ++p);
if (*p)
sec = atoi (++p);
DEBUGP(("YYYY/MM/DD HH:MM:SS - %d/%02d/%02d %02d:%02d:%02d\n",
year+1900, month, day, hour, min, sec));
/* Build the time-stamp (copy & paste from above) */
timestruct.tm_sec = sec;
timestruct.tm_min = min;
timestruct.tm_hour = hour;
timestruct.tm_mday = day;
timestruct.tm_mon = month;
timestruct.tm_year = year;
timestruct.tm_wday = 0;
timestruct.tm_yday = 0;
timestruct.tm_isdst = -1;
cur.tstamp = mktime (×truct); /* store the time-stamp */
DEBUGP(("Timestamp: %ld\n", cur.tstamp));
/* Skip the fifth column */
tok = strtok(NULL, " ");
/* Sixth column: Permissions */
tok = strtok(NULL, ","); /* Skip the VMS-specific SYSTEM permissons */
tok = strtok(NULL, ")");
if (tok == NULL)
{
DEBUGP(("confusing VMS permissions, skipping line\n"));
xfree (line);
continue;
}
/* Permissons have the format "RWED,RWED,RE" */
cur.perms = vmsperms(tok);
DEBUGP(("permissions: %s -> 0%o\n", tok, cur.perms));
cur.linkto = NULL;
/* And put everything into the linked list */
if (!dir)
{
l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
memcpy (l, &cur, sizeof (cur));
l->prev = l->next = NULL;
}
else
{
cur.prev = l;
l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
l = l->next;
memcpy (l, &cur, sizeof (cur));
l->next = NULL;
}
xfree (line);
}
fclose (fp);
return dir;
}
/* This function switches between the correct parsing routine depending on
the SYSTEM_TYPE. The system type should be based on the result of the
"SYST" response of the FTP server. According to this repsonse we will
use on of the three different listing parsers that cover the most of FTP
servers used nowadays. */
struct fileinfo *
ftp_parse_ls (const char *file, const enum stype system_type)
{
switch (system_type)
{
case ST_UNIX:
return ftp_parse_unix_ls (file, FALSE);
case ST_WINNT:
{
/* Detect whether the listing is simulating the UNIX format */
FILE *fp;
int c;
fp = fopen (file, "rb");
if (!fp)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
return NULL;
}
c = fgetc(fp);
fclose(fp);
/* If the first character of the file is '0'-'9', it's WINNT
format. */
if (c >= '0' && c <='9')
return ftp_parse_winnt_ls (file);
else
return ftp_parse_unix_ls (file, TRUE);
}
case ST_VMS:
return ftp_parse_vms_ls (file);
case ST_MACOS:
return ftp_parse_unix_ls (file, TRUE);
default:
logprintf (LOG_NOTQUIET, _("\
Unsupported listing type, trying Unix listing parser.\n"));
return ftp_parse_unix_ls (file, FALSE);
}
}
/* Stuff for creating FTP index. */
/* The function creates an HTML index containing references to given
directories and files on the appropriate host. The references are
FTP. */
uerr_t
ftp_index (const char *file, struct url *u, struct fileinfo *f)
{
FILE *fp;
char *upwd;
char *htclfile; /* HTML-clean file name */
if (!opt.dfp)
{
fp = fopen (file, "wb");
if (!fp)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
return FOPENERR;
}
}
else
fp = opt.dfp;
if (u->user)
{
char *tmpu, *tmpp; /* temporary, clean user and passwd */
tmpu = url_escape (u->user);
tmpp = u->passwd ? url_escape (u->passwd) : NULL;
upwd = (char *)xmalloc (strlen (tmpu)
+ (tmpp ? (1 + strlen (tmpp)) : 0) + 2);
sprintf (upwd, "%s%s%s@", tmpu, tmpp ? ":" : "", tmpp ? tmpp : "");
xfree (tmpu);
FREE_MAYBE (tmpp);
}
else
upwd = xstrdup ("");
fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
fprintf (fp, "<html>\n<head>\n<title>");
fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
fprintf (fp, "</h1>\n<hr>\n<pre>\n");
while (f)
{
fprintf (fp, " ");
if (f->tstamp != -1)
{
/* #### Should we translate the months? Or, even better, use
ISO 8601 dates? */
static char *months[] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
struct tm *ptm = localtime ((time_t *)&f->tstamp);
fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
ptm->tm_mday);
if (ptm->tm_hour)
fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
else
fprintf (fp, " ");
}
else
fprintf (fp, _("time unknown "));
switch (f->type)
{
case FT_PLAINFILE:
fprintf (fp, _("File "));
break;
case FT_DIRECTORY:
fprintf (fp, _("Directory "));
break;
case FT_SYMLINK:
fprintf (fp, _("Link "));
break;
default:
fprintf (fp, _("Not sure "));
break;
}
htclfile = html_quote_string (f->name);
fprintf (fp, "<a href=\"ftp://%s%s:%hu", upwd, u->host, u->port);
if (*u->dir != '/')
putc ('/', fp);
fprintf (fp, "%s", u->dir);
if (*u->dir)
putc ('/', fp);
fprintf (fp, "%s", htclfile);
if (f->type == FT_DIRECTORY)
putc ('/', fp);
fprintf (fp, "\">%s", htclfile);
if (f->type == FT_DIRECTORY)
putc ('/', fp);
fprintf (fp, "</a> ");
if (f->type == FT_PLAINFILE)
fprintf (fp, _(" (%s bytes)"), legible (f->size));
else if (f->type == FT_SYMLINK)
fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
putc ('\n', fp);
xfree (htclfile);
f = f->next;
}
fprintf (fp, "</pre>\n</body>\n</html>\n");
xfree (upwd);
if (!opt.dfp)
fclose (fp);
else
fflush (fp);
return FTPOK;
}
--- NEW FILE: cookies.h ---
/* Support for cookies.
Copyright (C) 2001, 2002 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
GNU Wget is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef COOKIES_H
#define COOKIES_H
struct cookie_jar;
struct cookie_jar *cookie_jar_new PARAMS ((void));
void cookie_jar_delete PARAMS ((struct cookie_jar *));
void cookie_jar_process_set_cookie PARAMS ((struct cookie_jar *, const char *,
int, const char *, const char *));
char *cookie_jar_generate_cookie_header PARAMS ((struct cookie_jar *,
const char *, int,
const char *, int));
void cookie_jar_load PARAMS ((struct cookie_jar *, const char *));
void cookie_jar_save PARAMS ((struct cookie_jar *, const char *));
#endif /* COOKIES_H */
--- NEW FILE: rbuf.c ---
/* Buffering read.
Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
/* This is a simple implementation of buffering IO-read functions. */
#include <config.h>
#include <stdio.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#include "wget.h"
#include "rbuf.h"
#include "connect.h"
#ifdef HAVE_SSL
#include <openssl/bio.h>
#include <openssl/crypto.h>
#include <openssl/x509.h>
#include <openssl/ssl.h>
#include <openssl/err.h>
#include <openssl/pem.h>
#include "gen_sslfunc.h" /* for ssl_iread */
#endif /* HAVE_SSL */
void
rbuf_initialize (struct rbuf *rbuf, int fd)
{
rbuf->fd = fd;
#ifdef HAVE_SSL
/* pointing ssl to NULL results in an unchanged behaviour */
rbuf->ssl = NULL;
#endif /* HAVE_SSL */
rbuf->buffer_pos = rbuf->buffer;
rbuf->buffer_left = 0;
}
int
rbuf_initialized_p (struct rbuf *rbuf)
{
return rbuf->fd != -1;
}
void
rbuf_uninitialize (struct rbuf *rbuf)
{
rbuf->fd = -1;
}
int
rbuf_read_bufferful (struct rbuf *rbuf)
{
#ifdef HAVE_SSL
if (rbuf->ssl)
return ssl_iread (rbuf->ssl, rbuf->buffer, sizeof (rbuf->buffer));
else
#endif
return iread (rbuf->fd, rbuf->buffer, sizeof (rbuf->buffer));
}
/* Currently unused -- see RBUF_READCHAR. */
#if 0
/* Function version of RBUF_READCHAR. */
int
rbuf_readchar (struct rbuf *rbuf, char *store)
{
return RBUF_READCHAR (rbuf, store);
}
#endif
/* Like rbuf_readchar(), only don't move the buffer position. */
int
rbuf_peek (struct rbuf *rbuf, char *store)
{
if (!rbuf->buffer_left)
{
int res;
rbuf->buffer_pos = rbuf->buffer;
rbuf->buffer_left = 0;
#ifdef HAVE_SSL
if (rbuf->ssl != NULL) {
res = ssl_iread (rbuf->ssl, rbuf->buffer, sizeof (rbuf->buffer));
} else {
#endif /* HAVE_SSL */
res = iread (rbuf->fd, rbuf->buffer, sizeof (rbuf->buffer));
#ifdef HAVE_SSL
}
#endif /* HAVE_SSL */
if (res <= 0)
return res;
rbuf->buffer_left = res;
}
*store = *rbuf->buffer_pos;
return 1;
}
/* Flush RBUF's buffer to WHERE. Flush MAXSIZE bytes at most.
Returns the number of bytes actually copied. If the buffer is
empty, 0 is returned. */
int
rbuf_flush (struct rbuf *rbuf, char *where, int maxsize)
{
if (!rbuf->buffer_left)
return 0;
else
{
int howmuch = MINVAL (rbuf->buffer_left, maxsize);
if (where)
memcpy (where, rbuf->buffer_pos, howmuch);
rbuf->buffer_left -= howmuch;
rbuf->buffer_pos += howmuch;
return howmuch;
}
}
/* Discard any cached data in RBUF. */
void
rbuf_discard (struct rbuf *rbuf)
{
rbuf->buffer_left = 0;
rbuf->buffer_pos = rbuf->buffer;
}
--- NEW FILE: utils.c ---
/* Various functions of utilitarian nature.
Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
[...2050 lines suppressed...]
errno = saved_errno;
return 0;
}
#else /* not USE_SIGNAL_TIMEOUT */
#ifndef WINDOWS
/* A stub version of run_with_timeout that just calls FUN(ARG). Don't
define it under Windows, because Windows has its own version of
run_with_timeout that uses threads. */
int
run_with_timeout (double timeout, void (*fun) (void *), void *arg)
{
fun (arg);
return 0;
}
#endif /* not WINDOWS */
#endif /* not USE_SIGNAL_TIMEOUT */
--- NEW FILE: gnu-md5.c ---
/* md5.c - Functions to compute MD5 message digest of files or memory blocks
according to the definition of MD5 in RFC 1321 from April 1992.
Copyright (C) 1995, 1996 Free Software Foundation, Inc.
This file is part of the GNU C library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
/* Written by Ulrich Drepper <drepper at gnu.ai.mit.edu>, 1995. */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
/* Wget */
/*#if STDC_HEADERS || defined _LIBC*/
# include <stdlib.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
/*#else*/
/*# ifndef HAVE_MEMCPY*/
/*# define memcpy(d, s, n) bcopy ((s), (d), (n))*/
/*# endif*/
/*#endif*/
#include "wget.h"
#include "gnu-md5.h"
#ifdef _LIBC
# include <endian.h>
# if __BYTE_ORDER == __BIG_ENDIAN
# define WORDS_BIGENDIAN 1
# endif
#endif
#ifdef WORDS_BIGENDIAN
# define SWAP(n) \
(((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
#else
# define SWAP(n) (n)
#endif
/* This array contains the bytes used to pad the buffer to the next
64-byte boundary. (RFC 1321, 3.1: Step 1) */
static const unsigned char fillbuf[64] = { 0x80, 0 /* , 0, 0, ... */ };
/* Initialize structure containing state of computation.
(RFC 1321, 3.3: Step 3) */
void
md5_init_ctx (struct md5_ctx *ctx)
{
ctx->A = 0x67452301;
ctx->B = 0xefcdab89;
ctx->C = 0x98badcfe;
ctx->D = 0x10325476;
ctx->total[0] = ctx->total[1] = 0;
ctx->buflen = 0;
}
/* Put result from CTX in first 16 bytes following RESBUF. The result
must be in little endian byte order.
IMPORTANT: On some systems it is required that RESBUF is correctly
aligned for a 32 bits value. */
void *
md5_read_ctx (const struct md5_ctx *ctx, void *resbuf)
{
((md5_uint32 *) resbuf)[0] = SWAP (ctx->A);
((md5_uint32 *) resbuf)[1] = SWAP (ctx->B);
((md5_uint32 *) resbuf)[2] = SWAP (ctx->C);
((md5_uint32 *) resbuf)[3] = SWAP (ctx->D);
return resbuf;
}
/* Process the remaining bytes in the internal buffer and the usual
prolog according to the standard and write the result to RESBUF.
IMPORTANT: On some systems it is required that RESBUF is correctly
aligned for a 32 bits value. */
void *
md5_finish_ctx (struct md5_ctx *ctx, void *resbuf)
{
/* Take yet unprocessed bytes into account. */
md5_uint32 bytes = ctx->buflen;
size_t pad;
/* Now count remaining bytes. */
ctx->total[0] += bytes;
if (ctx->total[0] < bytes)
++ctx->total[1];
pad = bytes >= 56 ? 64 + 56 - bytes : 56 - bytes;
memcpy (&ctx->buffer[bytes], fillbuf, pad);
/* Put the 64-bit file length in *bits* at the end of the buffer. */
*(md5_uint32 *) &ctx->buffer[bytes + pad] = SWAP (ctx->total[0] << 3);
*(md5_uint32 *) &ctx->buffer[bytes + pad + 4] = SWAP ((ctx->total[1] << 3) |
(ctx->total[0] >> 29));
/* Process last bytes. */
md5_process_block (ctx->buffer, bytes + pad + 8, ctx);
return md5_read_ctx (ctx, resbuf);
}
/* Unused in Wget */
#if 0
/* Compute MD5 message digest for bytes read from STREAM. The
resulting message digest number will be written into the 16 bytes
beginning at RESBLOCK. */
int
md5_stream (FILE *stream, void *resblock)
{
/* Important: BLOCKSIZE must be a multiple of 64. */
#define BLOCKSIZE 4096
struct md5_ctx ctx;
char buffer[BLOCKSIZE + 72];
size_t sum;
/* Initialize the computation context. */
md5_init_ctx (&ctx);
/* Iterate over full file contents. */
while (1)
{
/* We read the file in blocks of BLOCKSIZE bytes. One call of the
computation function processes the whole buffer so that with the
next round of the loop another block can be read. */
size_t n;
sum = 0;
/* Read block. Take care for partial reads. */
do
{
n = fread (buffer + sum, 1, BLOCKSIZE - sum, stream);
sum += n;
}
while (sum < BLOCKSIZE && n != 0);
if (n == 0 && ferror (stream))
return 1;
/* If end of file is reached, end the loop. */
if (n == 0)
break;
/* Process buffer with BLOCKSIZE bytes. Note that
BLOCKSIZE % 64 == 0
*/
md5_process_block (buffer, BLOCKSIZE, &ctx);
}
/* Add the last bytes if necessary. */
if (sum > 0)
md5_process_bytes (buffer, sum, &ctx);
/* Construct result in desired memory. */
md5_finish_ctx (&ctx, resblock);
return 0;
}
/* Compute MD5 message digest for LEN bytes beginning at BUFFER. The
result is always in little endian byte order, so that a byte-wise
output yields to the wanted ASCII representation of the message
digest. */
void *
md5_buffer (const char *buffer, size_t len, void *resblock)
{
struct md5_ctx ctx;
/* Initialize the computation context. */
md5_init_ctx (&ctx);
/* Process whole buffer but last len % 64 bytes. */
md5_process_bytes (buffer, len, &ctx);
/* Put result in desired memory area. */
return md5_finish_ctx (&ctx, resblock);
}
#endif /* 0 */
void
md5_process_bytes (const void *buffer, size_t len, struct md5_ctx *ctx)
{
/* When we already have some bits in our internal buffer concatenate
both inputs first. */
if (ctx->buflen != 0)
{
size_t left_over = ctx->buflen;
size_t add = 128 - left_over > len ? len : 128 - left_over;
memcpy (&ctx->buffer[left_over], buffer, add);
ctx->buflen += add;
if (left_over + add > 64)
{
md5_process_block (ctx->buffer, (left_over + add) & ~63, ctx);
/* The regions in the following copy operation cannot overlap. */
memcpy (ctx->buffer, &ctx->buffer[(left_over + add) & ~63],
(left_over + add) & 63);
ctx->buflen = (left_over + add) & 63;
}
buffer = (const char *) buffer + add;
len -= add;
}
/* Process available complete blocks. */
if (len > 64)
{
md5_process_block (buffer, len & ~63, ctx);
buffer = (const char *) buffer + (len & ~63);
len &= 63;
}
/* Move remaining bytes in internal buffer. */
if (len > 0)
{
memcpy (ctx->buffer, buffer, len);
ctx->buflen = len;
}
}
/* These are the four functions used in the four steps of the MD5 algorithm
and defined in the RFC 1321. The first function is a little bit optimized
(as found in Colin Plumbs public domain implementation). */
/* #define FF(b, c, d) ((b & c) | (~b & d)) */
#define FF(b, c, d) (d ^ (b & (c ^ d)))
#define FG(b, c, d) FF (d, b, c)
#define FH(b, c, d) (b ^ c ^ d)
#define FI(b, c, d) (c ^ (b | ~d))
/* Process LEN bytes of BUFFER, accumulating context into CTX.
It is assumed that LEN % 64 == 0. */
void
md5_process_block (const void *buffer, size_t len, struct md5_ctx *ctx)
{
md5_uint32 correct_words[16];
const md5_uint32 *words = (md5_uint32 *)buffer;
size_t nwords = len / sizeof (md5_uint32);
const md5_uint32 *endp = words + nwords;
md5_uint32 A = ctx->A;
md5_uint32 B = ctx->B;
md5_uint32 C = ctx->C;
md5_uint32 D = ctx->D;
/* First increment the byte count. RFC 1321 specifies the possible
length of the file up to 2^64 bits. Here we only compute the
number of bytes. Do a double word increment. */
ctx->total[0] += len;
if (ctx->total[0] < len)
++ctx->total[1];
/* Process all bytes in the buffer with 64 bytes in each round of
the loop. */
while (words < endp)
{
md5_uint32 *cwp = correct_words;
md5_uint32 A_save = A;
md5_uint32 B_save = B;
md5_uint32 C_save = C;
md5_uint32 D_save = D;
/* First round: using the given function, the context and a constant
the next context is computed. Because the algorithms processing
unit is a 32-bit word and it is determined to work on words in
little endian byte order we perhaps have to change the byte order
before the computation. To reduce the work for the next steps
we store the swapped words in the array CORRECT_WORDS. */
#define OP(a, b, c, d, s, T) \
do \
{ \
a += FF (b, c, d) + (*cwp++ = SWAP (*words)) + T; \
++words; \
CYCLIC (a, s); \
a += b; \
} \
while (0)
/* It is unfortunate that C does not provide an operator for
cyclic rotation. Hope the C compiler is smart enough. */
#define CYCLIC(w, s) (w = (w << s) | (w >> (32 - s)))
/* Before we start, one word to the strange constants.
They are defined in RFC 1321 as
T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64
*/
/* Round 1. */
OP (A, B, C, D, 7, 0xd76aa478);
OP (D, A, B, C, 12, 0xe8c7b756);
OP (C, D, A, B, 17, 0x242070db);
OP (B, C, D, A, 22, 0xc1bdceee);
OP (A, B, C, D, 7, 0xf57c0faf);
OP (D, A, B, C, 12, 0x4787c62a);
OP (C, D, A, B, 17, 0xa8304613);
OP (B, C, D, A, 22, 0xfd469501);
OP (A, B, C, D, 7, 0x698098d8);
OP (D, A, B, C, 12, 0x8b44f7af);
OP (C, D, A, B, 17, 0xffff5bb1);
OP (B, C, D, A, 22, 0x895cd7be);
OP (A, B, C, D, 7, 0x6b901122);
OP (D, A, B, C, 12, 0xfd987193);
OP (C, D, A, B, 17, 0xa679438e);
OP (B, C, D, A, 22, 0x49b40821);
/* For the second to fourth round we have the possibly swapped words
in CORRECT_WORDS. Redefine the macro to take an additional first
argument specifying the function to use. */
#undef OP
#define OP(f, a, b, c, d, k, s, T) \
do \
{ \
a += f (b, c, d) + correct_words[k] + T; \
CYCLIC (a, s); \
a += b; \
} \
while (0)
/* Round 2. */
OP (FG, A, B, C, D, 1, 5, 0xf61e2562);
OP (FG, D, A, B, C, 6, 9, 0xc040b340);
OP (FG, C, D, A, B, 11, 14, 0x265e5a51);
OP (FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
OP (FG, A, B, C, D, 5, 5, 0xd62f105d);
OP (FG, D, A, B, C, 10, 9, 0x02441453);
OP (FG, C, D, A, B, 15, 14, 0xd8a1e681);
OP (FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
OP (FG, A, B, C, D, 9, 5, 0x21e1cde6);
OP (FG, D, A, B, C, 14, 9, 0xc33707d6);
OP (FG, C, D, A, B, 3, 14, 0xf4d50d87);
OP (FG, B, C, D, A, 8, 20, 0x455a14ed);
OP (FG, A, B, C, D, 13, 5, 0xa9e3e905);
OP (FG, D, A, B, C, 2, 9, 0xfcefa3f8);
OP (FG, C, D, A, B, 7, 14, 0x676f02d9);
OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
/* Round 3. */
OP (FH, A, B, C, D, 5, 4, 0xfffa3942);
OP (FH, D, A, B, C, 8, 11, 0x8771f681);
OP (FH, C, D, A, B, 11, 16, 0x6d9d6122);
OP (FH, B, C, D, A, 14, 23, 0xfde5380c);
OP (FH, A, B, C, D, 1, 4, 0xa4beea44);
OP (FH, D, A, B, C, 4, 11, 0x4bdecfa9);
OP (FH, C, D, A, B, 7, 16, 0xf6bb4b60);
OP (FH, B, C, D, A, 10, 23, 0xbebfbc70);
OP (FH, A, B, C, D, 13, 4, 0x289b7ec6);
OP (FH, D, A, B, C, 0, 11, 0xeaa127fa);
OP (FH, C, D, A, B, 3, 16, 0xd4ef3085);
OP (FH, B, C, D, A, 6, 23, 0x04881d05);
OP (FH, A, B, C, D, 9, 4, 0xd9d4d039);
OP (FH, D, A, B, C, 12, 11, 0xe6db99e5);
OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8);
OP (FH, B, C, D, A, 2, 23, 0xc4ac5665);
/* Round 4. */
OP (FI, A, B, C, D, 0, 6, 0xf4292244);
OP (FI, D, A, B, C, 7, 10, 0x432aff97);
OP (FI, C, D, A, B, 14, 15, 0xab9423a7);
OP (FI, B, C, D, A, 5, 21, 0xfc93a039);
OP (FI, A, B, C, D, 12, 6, 0x655b59c3);
OP (FI, D, A, B, C, 3, 10, 0x8f0ccc92);
OP (FI, C, D, A, B, 10, 15, 0xffeff47d);
OP (FI, B, C, D, A, 1, 21, 0x85845dd1);
OP (FI, A, B, C, D, 8, 6, 0x6fa87e4f);
OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
OP (FI, C, D, A, B, 6, 15, 0xa3014314);
OP (FI, B, C, D, A, 13, 21, 0x4e0811a1);
OP (FI, A, B, C, D, 4, 6, 0xf7537e82);
OP (FI, D, A, B, C, 11, 10, 0xbd3af235);
OP (FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
OP (FI, B, C, D, A, 9, 21, 0xeb86d391);
/* Add the starting values of the context. */
A += A_save;
B += B_save;
C += C_save;
D += D_save;
}
/* Put checksum in context given as argument. */
ctx->A = A;
ctx->B = B;
ctx->C = C;
ctx->D = D;
}
--- NEW FILE: res.c ---
/* Support for Robot Exclusion Standard (RES).
Copyright (C) 2001 Free Software Foundation, Inc.
This file is part of Wget.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
/* This file implements the Robot Exclusion Standard (RES).
RES is a simple protocol that enables site admins to signalize to
the web crawlers that certain parts of the site should not be
accessed. All the admin needs to do is create a "robots.txt" file
in the web server root, and use simple commands to allow or
disallow access to certain parts of the site.
The first specification was written by Martijn Koster in 1994, and
is still available at <http://www.robotstxt.org/wc/norobots.html>.
In 1996, Martijn wrote an Internet Draft specifying an improved RES
specification; however, that work was apparently abandoned since
the draft has expired in 1997 and hasn't been replaced since. The
draft is available at
<http://www.robotstxt.org/wc/norobots-rfc.html>.
This file implements RES as specified by the draft. Note that this
only handles the "robots.txt" support. The META tag that controls
whether the links should be followed is handled in `html-url.c'.
Known deviations:
* The end-of-line comment recognition is more in the spirit of the
Bourne Shell (as specified by RES-1994). That means that
"foo#bar" is taken literally, whereas "foo #bar" is interpreted
as "foo". The Draft apparently specifies that both should be
interpreted as "foo".
* We don't recognize sole CR as the line ending.
* We don't implement expiry mechanism for /robots.txt specs. I
consider it non-necessary for a relatively short-lived
application such as Wget. Besides, it is highly questionable
whether anyone deploys the recommended expiry scheme for
robots.txt.
Entry points are functions res_parse, res_parse_from_file,
res_match_path, res_register_specs, res_get_specs, and
res_retrieve_file. */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif /* HAVE_STRING_H */
#include <errno.h>
#include <assert.h>
#include "wget.h"
#include "utils.h"
#include "hash.h"
#include "url.h"
#include "retr.h"
#include "res.h"
struct path_info {
char *path;
int allowedp;
int user_agent_exact_p;
};
struct robot_specs {
int count;
int size;
struct path_info *paths;
};
/* Parsing the robot spec. */
/* Check whether AGENT (a string of length LENGTH) equals "wget" or
"*". If it is either of them, *matches is set to one. If it is
"wget", *exact_match is set to one. */
static void
match_user_agent (const char *agent, int length,
int *matches, int *exact_match)
{
if (length == 1 && *agent == '*')
{
*matches = 1;
*exact_match = 0;
}
else if (BOUNDED_EQUAL_NO_CASE (agent, agent + length, "wget"))
{
*matches = 1;
*exact_match = 1;
}
else
{
*matches = 0;
*exact_match = 0;
}
}
/* Add a path specification between PATH_B and PATH_E as one of the
paths in SPECS. */
static void
add_path (struct robot_specs *specs, const char *path_b, const char *path_e,
int allowedp, int exactp)
{
struct path_info pp;
if (path_b < path_e && *path_b == '/')
/* Our path representation doesn't use a leading slash, so remove
one from theirs. */
++path_b;
pp.path = strdupdelim (path_b, path_e);
pp.allowedp = allowedp;
pp.user_agent_exact_p = exactp;
++specs->count;
if (specs->count > specs->size)
{
if (specs->size == 0)
specs->size = 1;
else
specs->size <<= 1;
specs->paths = xrealloc (specs->paths,
specs->size * sizeof (struct path_info));
}
specs->paths[specs->count - 1] = pp;
}
/* Recreate SPECS->paths with only those paths that have non-zero
user_agent_exact_p. */
static void
prune_non_exact (struct robot_specs *specs)
{
struct path_info *newpaths;
int i, j, cnt;
cnt = 0;
for (i = 0; i < specs->count; i++)
if (specs->paths[i].user_agent_exact_p)
++cnt;
newpaths = xmalloc (cnt * sizeof (struct path_info));
for (i = 0, j = 0; i < specs->count; i++)
if (specs->paths[i].user_agent_exact_p)
newpaths[j++] = specs->paths[i];
assert (j == cnt);
xfree (specs->paths);
specs->paths = newpaths;
specs->count = cnt;
specs->size = cnt;
}
#define EOL(p) ((p) >= lineend)
#define SKIP_SPACE(p) do { \
while (!EOL (p) && ISSPACE (*p)) \
++p; \
} while (0)
#define FIELD_IS(string_literal) \
BOUNDED_EQUAL_NO_CASE (field_b, field_e, string_literal)
/* Parse textual RES specs beginning with SOURCE of length LENGTH.
Return a specs objects ready to be fed to res_match_path.
The parsing itself is trivial, but creating a correct SPECS object
is trickier than it seems, because RES is surprisingly byzantine if
you attempt to implement it correctly.
A "record" is a block of one or more `User-Agent' lines followed by
one or more `Allow' or `Disallow' lines. Record is accepted by
Wget if one of the `User-Agent' lines was "wget", or if the user
agent line was "*".
After all the lines have been read, we examine whether an exact
("wget") user-agent field was specified. If so, we delete all the
lines read under "User-Agent: *" blocks because we have our own
Wget-specific blocks. This enables the admin to say:
User-Agent: *
Disallow: /
User-Agent: google
User-Agent: wget
Disallow: /cgi-bin
This means that to Wget and to Google, /cgi-bin is disallowed,
whereas for all other crawlers, everything is disallowed.
res_parse is implemented so that the order of records doesn't
matter. In the case above, the "User-Agent: *" could have come
after the other one. */
struct robot_specs *
res_parse (const char *source, int length)
{
int line_count = 1;
const char *p = source;
const char *end = source + length;
/* non-zero if last applicable user-agent field matches Wget. */
int user_agent_applies = 0;
/* non-zero if last applicable user-agent field *exactly* matches
Wget. */
int user_agent_exact = 0;
/* whether we ever encountered exact user agent. */
int found_exact = 0;
/* count of allow/disallow lines in the current "record", i.e. after
the last `user-agent' instructions. */
int record_count = 0;
struct robot_specs *specs = xmalloc (sizeof (struct robot_specs));
memset (specs, '\0', sizeof (struct robot_specs));
while (1)
{
const char *lineend, *lineend_real;
const char *field_b, *field_e;
const char *value_b, *value_e;
if (p == end)
break;
lineend_real = memchr (p, '\n', end - p);
if (lineend_real)
++lineend_real;
else
lineend_real = end;
lineend = lineend_real;
/* Before doing anything else, check whether the line is empty
or comment-only. */
SKIP_SPACE (p);
if (EOL (p) || *p == '#')
goto next;
/* Make sure the end-of-line comments are respected by setting
lineend to a location preceding the first comment. Real line
ending remains in lineend_real. */
for (lineend = p; lineend < lineend_real; lineend++)
if ((lineend == p || ISSPACE (*(lineend - 1)))
&& *lineend == '#')
break;
/* Ignore trailing whitespace in the same way. */
while (lineend > p && ISSPACE (*(lineend - 1)))
--lineend;
assert (!EOL (p));
field_b = p;
while (!EOL (p) && (ISALNUM (*p) || *p == '-'))
++p;
field_e = p;
SKIP_SPACE (p);
if (field_b == field_e || EOL (p) || *p != ':')
{
DEBUGP (("Ignoring malformed line %d", line_count));
goto next;
}
++p; /* skip ':' */
SKIP_SPACE (p);
value_b = p;
while (!EOL (p))
++p;
value_e = p;
/* Finally, we have a syntactically valid line. */
if (FIELD_IS ("user-agent"))
{
/* We have to support several cases:
--previous records--
User-Agent: foo
User-Agent: Wget
User-Agent: bar
... matching record ...
User-Agent: baz
User-Agent: qux
... non-matching record ...
User-Agent: *
... matching record, but will be pruned later ...
We have to respect `User-Agent' at the beginning of each
new record simply because we don't know if we're going to
encounter "Wget" among the agents or not. Hence,
match_user_agent is called when record_count != 0.
But if record_count is 0, we have to keep calling it
until it matches, and if that happens, we must not call
it any more, until the next record. Hence the other part
of the condition. */
if (record_count != 0 || user_agent_applies == 0)
match_user_agent (value_b, value_e - value_b,
&user_agent_applies, &user_agent_exact);
if (user_agent_exact)
found_exact = 1;
record_count = 0;
}
else if (FIELD_IS ("allow"))
{
if (user_agent_applies)
{
add_path (specs, value_b, value_e, 1, user_agent_exact);
}
++record_count;
}
else if (FIELD_IS ("disallow"))
{
if (user_agent_applies)
{
int allowed = 0;
if (value_b == value_e)
/* Empty "disallow" line means everything is
*allowed*! */
allowed = 1;
add_path (specs, value_b, value_e, allowed, user_agent_exact);
}
++record_count;
}
else
{
DEBUGP (("Ignoring unknown field at line %d", line_count));
goto next;
}
next:
p = lineend_real;
++line_count;
}
if (found_exact)
{
/* We've encountered an exactly matching user-agent. Throw out
all the stuff with user-agent: *. */
prune_non_exact (specs);
}
else if (specs->size > specs->count)
{
/* add_path normally over-allocates specs->paths. Reallocate it
to the correct size in order to conserve some memory. */
specs->paths = xrealloc (specs->paths,
specs->count * sizeof (struct path_info));
specs->size = specs->count;
}
return specs;
}
/* The same like res_parse, but first map the FILENAME into memory,
and then parse it. */
struct robot_specs *
res_parse_from_file (const char *filename)
{
struct robot_specs *specs;
struct file_memory *fm = read_file (filename);
if (!fm)
{
logprintf (LOG_NOTQUIET, "Cannot open %s: %s",
filename, strerror (errno));
return NULL;
}
specs = res_parse (fm->content, fm->length);
read_file_free (fm);
return specs;
}
static void
free_specs (struct robot_specs *specs)
{
int i;
for (i = 0; i < specs->count; i++)
xfree (specs->paths[i].path);
FREE_MAYBE (specs->paths);
xfree (specs);
}
/* Matching of a path according to the specs. */
/* If C is '%' and (ptr[1], ptr[2]) form a hexadecimal number, and if
that number is not a numerical representation of '/', decode C and
advance the pointer. */
#define DECODE_MAYBE(c, ptr) do { \
if (c == '%' && ISXDIGIT (ptr[1]) && ISXDIGIT (ptr[2])) \
{ \
char decoded = X2DIGITS_TO_NUM (ptr[1], ptr[2]); \
if (decoded != '/') \
{ \
c = decoded; \
ptr += 2; \
} \
} \
} while (0)
/* The inner matching engine: return non-zero if RECORD_PATH matches
URL_PATH. The rules for matching are described at
<http://www.robotstxt.org/wc/norobots-rfc.txt>, section 3.2.2. */
static int
matches (const char *record_path, const char *url_path)
{
const char *rp = record_path;
const char *up = url_path;
for (; ; ++rp, ++up)
{
char rc = *rp;
char uc = *up;
if (!rc)
return 1;
if (!uc)
return 0;
DECODE_MAYBE(rc, rp);
DECODE_MAYBE(uc, up);
if (rc != uc)
return 0;
}
}
/* Iterate through all paths in SPECS. For the first one that
matches, return its allow/reject status. If none matches,
retrieval is by default allowed. */
int
res_match_path (const struct robot_specs *specs, const char *path)
{
int i;
if (!specs)
return 1;
for (i = 0; i < specs->count; i++)
if (matches (specs->paths[i].path, path))
{
int allowedp = specs->paths[i].allowedp;
DEBUGP (("%s path %s because of rule `%s'.\n",
allowedp ? "Allowing" : "Rejecting",
path, specs->paths[i].path));
return allowedp;
}
return 1;
}
/* Registering the specs. */
static struct hash_table *registered_specs;
/* Stolen from cookies.c. */
#define SET_HOSTPORT(host, port, result) do { \
int HP_len = strlen (host); \
result = alloca (HP_len + 1 + numdigit (port) + 1); \
memcpy (result, host, HP_len); \
result[HP_len] = ':'; \
number_to_string (result + HP_len + 1, port); \
} while (0)
/* Register RES specs that below to server on HOST:PORT. They will
later be retrievable using res_get_specs. */
void
res_register_specs (const char *host, int port, struct robot_specs *specs)
{
struct robot_specs *old;
char *hp, *hp_old;
SET_HOSTPORT (host, port, hp);
if (!registered_specs)
registered_specs = make_nocase_string_hash_table (0);
/* Required to shut up the compiler. */
old = NULL;
hp_old = NULL;
if (hash_table_get_pair (registered_specs, hp, hp_old, old))
{
if (old)
free_specs (old);
hash_table_put (registered_specs, hp_old, specs);
}
else
{
hash_table_put (registered_specs, xstrdup (hp), specs);
}
}
/* Get the specs that belong to HOST:PORT. */
struct robot_specs *
res_get_specs (const char *host, int port)
{
char *hp;
SET_HOSTPORT (host, port, hp);
if (!registered_specs)
return NULL;
return hash_table_get (registered_specs, hp);
}
/* Loading the robots file. */
#define RES_SPECS_LOCATION "/robots.txt"
/* Retrieve the robots.txt from the server root of the server that
serves URL. The file will be named according to the currently
active rules, and the file name will be returned in *file.
Return non-zero if robots were retrieved OK, zero otherwise. */
int
res_retrieve_file (const char *url, char **file)
{
uerr_t err;
char *robots_url = uri_merge (url, RES_SPECS_LOCATION);
logputs (LOG_VERBOSE, _("Loading robots.txt; please ignore errors.\n"));
*file = NULL;
err = retrieve_url (robots_url, file, NULL, NULL, NULL);
xfree (robots_url);
if (err != RETROK && *file != NULL)
{
/* If the file is not retrieved correctly, but retrieve_url
allocated the file name, deallocate is here so that the
caller doesn't have to worry about it. */
xfree (*file);
*file = NULL;
}
return err == RETROK;
}
static int
cleanup_hash_table_mapper (void *key, void *value, void *arg_ignored)
{
xfree (key);
free_specs (value);
return 0;
}
void
res_cleanup (void)
{
if (registered_specs)
{
hash_table_map (registered_specs, cleanup_hash_table_mapper, NULL);
hash_table_destroy (registered_specs);
registered_specs = NULL;
}
}
--- NEW FILE: ansi2knr.c ---
/* Copyright (C) 1989, 1997, 1998, 1999 Aladdin Enterprises. All rights reserved. */
/*$Id: ansi2knr.c,v 1.2 2006-08-31 09:32:39 dslinux_amadeus Exp $*/
/* Convert ANSI C function definitions to K&R ("traditional C") syntax */
/*
ansi2knr is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY. No author or distributor accepts responsibility to anyone for the
consequences of using it or for whether it serves any particular purpose or
works at all, unless he says so in writing. Refer to the GNU General Public
License (the "GPL") for full details.
Everyone is granted permission to copy, modify and redistribute ansi2knr,
but only under the conditions described in the GPL. A copy of this license
is supposed to have been given to you along with ansi2knr so you can know
your rights and responsibilities. It should be in a file named COPYLEFT,
or, if there is no file named COPYLEFT, a file named COPYING. Among other
things, the copyright notice and this notice must be preserved on all
copies.
We explicitly state here what we believe is already implied by the GPL: if
the ansi2knr program is distributed as a separate set of sources and a
separate executable file which are aggregated on a storage medium together
with another program, this in itself does not bring the other program under
the GPL, nor does the mere fact that such a program or the procedures for
constructing it invoke the ansi2knr executable bring any other part of the
program under the GPL.
*/
/*
* Usage:
ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]
* --filename provides the file name for the #line directive in the output,
* overriding input_file (if present).
* If no input_file is supplied, input is read from stdin.
* If no output_file is supplied, output goes to stdout.
* There are no error messages.
*
* ansi2knr recognizes function definitions by seeing a non-keyword
* identifier at the left margin, followed by a left parenthesis,
* with a right parenthesis as the last character on the line,
* and with a left brace as the first token on the following line
* (ignoring possible intervening comments), except that a line
* consisting of only
* identifier1(identifier2)
* will not be considered a function definition unless identifier2 is
* the word "void", and a line consisting of
* identifier1(identifier2, <<arbitrary>>)
* will not be considered a function definition.
* ansi2knr will recognize a multi-line header provided
* that no intervening line ends with a left or right brace or a semicolon.
* These algorithms ignore whitespace and comments, except that
* the function name must be the first thing on the line.
* The following constructs will confuse it:
* - Any other construct that starts at the left margin and
* follows the above syntax (such as a macro or function call).
* - Some macros that tinker with the syntax of function headers.
*/
/*
* The original and principal author of ansi2knr is L. Peter Deutsch
* <ghost at aladdin.com>. Other authors are noted in the change history
* that follows (in reverse chronological order):
lpd 1999-04-12 added minor fixes from Pavel Roskin
<pavel_roskin at geocities.com> for clean compilation with
gcc -W -Wall
lpd 1999-03-22 added hack to recognize lines consisting of
identifier1(identifier2, xxx) as *not* being procedures
lpd 1999-02-03 made indentation of preprocessor commands consistent
lpd 1999-01-28 fixed two bugs: a '/' in an argument list caused an
endless loop; quoted strings within an argument list
confused the parser
lpd 1999-01-24 added a check for write errors on the output,
suggested by Jim Meyering <meyering at ascend.com>
lpd 1998-11-09 added further hack to recognize identifier(void)
as being a procedure
lpd 1998-10-23 added hack to recognize lines consisting of
identifier1(identifier2) as *not* being procedures
lpd 1997-12-08 made input_file optional; only closes input and/or
output file if not stdin or stdout respectively; prints
usage message on stderr rather than stdout; adds
--filename switch (changes suggested by
<ceder at lysator.liu.se>)
lpd 1996-01-21 added code to cope with not HAVE_CONFIG_H and with
compilers that don't understand void, as suggested by
Tom Lane
lpd 1996-01-15 changed to require that the first non-comment token
on the line following a function header be a left brace,
to reduce sensitivity to macros, as suggested by Tom Lane
<tgl at sss.pgh.pa.us>
lpd 1995-06-22 removed #ifndefs whose sole purpose was to define
undefined preprocessor symbols as 0; changed all #ifdefs
for configuration symbols to #ifs
lpd 1995-04-05 changed copyright notice to make it clear that
including ansi2knr in a program does not bring the entire
program under the GPL
lpd 1994-12-18 added conditionals for systems where ctype macros
don't handle 8-bit characters properly, suggested by
Francois Pinard <pinard at iro.umontreal.ca>;
removed --varargs switch (this is now the default)
lpd 1994-10-10 removed CONFIG_BROKETS conditional
lpd 1994-07-16 added some conditionals to help GNU `configure',
suggested by Francois Pinard <pinard at iro.umontreal.ca>;
properly erase prototype args in function parameters,
contributed by Jim Avera <jima at netcom.com>;
correct error in writeblanks (it shouldn't erase EOLs)
lpd 1989-xx-xx original version
*/
/* Most of the conditionals here are to make ansi2knr work with */
/* or without the GNU configure machinery. */
#if HAVE_CONFIG_H
# include <config.h>
#endif
#include <stdio.h>
#include <ctype.h>
#if HAVE_CONFIG_H
/*
For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h).
This will define HAVE_CONFIG_H and so, activate the following lines.
*/
# if STDC_HEADERS || HAVE_STRING_H
# include <string.h>
# else
# include <strings.h>
# endif
#else /* not HAVE_CONFIG_H */
/* Otherwise do it the hard way */
# ifdef BSD
# include <strings.h>
# else
# ifdef VMS
extern int strlen(), strncmp();
# else
# include <string.h>
# endif
# endif
#endif /* not HAVE_CONFIG_H */
#if STDC_HEADERS
# include <stdlib.h>
#else
/*
malloc and free should be declared in stdlib.h,
but if you've got a K&R compiler, they probably aren't.
*/
# ifdef MSDOS
# include <malloc.h>
# else
# ifdef VMS
extern char *malloc();
extern void free();
# else
extern char *malloc();
extern int free();
# endif
# endif
#endif
/* Define NULL (for *very* old compilers). */
#ifndef NULL
# define NULL (0)
#endif
/*
* The ctype macros don't always handle 8-bit characters correctly.
* Compensate for this here.
*/
#ifdef isascii
# undef HAVE_ISASCII /* just in case */
# define HAVE_ISASCII 1
#else
#endif
#if STDC_HEADERS || !HAVE_ISASCII
# define is_ascii(c) 1
#else
# define is_ascii(c) isascii(c)
#endif
#define is_space(c) (is_ascii(c) && isspace(c))
#define is_alpha(c) (is_ascii(c) && isalpha(c))
#define is_alnum(c) (is_ascii(c) && isalnum(c))
/* Scanning macros */
#define isidchar(ch) (is_alnum(ch) || (ch) == '_')
#define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_')
/* Forward references */
char *skipspace();
char *scanstring();
int writeblanks();
int test1();
int convert1();
/* The main program */
int
main(argc, argv)
int argc;
char *argv[];
{ FILE *in = stdin;
FILE *out = stdout;
char *filename = 0;
char *program_name = argv[0];
char *output_name = 0;
#define bufsize 5000 /* arbitrary size */
char *buf;
char *line;
char *more;
char *usage =
"Usage: ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]\n";
/*
* In previous versions, ansi2knr recognized a --varargs switch.
* If this switch was supplied, ansi2knr would attempt to convert
* a ... argument to va_alist and va_dcl; if this switch was not
* supplied, ansi2knr would simply drop any such arguments.
* Now, ansi2knr always does this conversion, and we only
* check for this switch for backward compatibility.
*/
int convert_varargs = 1;
int output_error;
while ( argc > 1 && argv[1][0] == '-' ) {
if ( !strcmp(argv[1], "--varargs") ) {
convert_varargs = 1;
argc--;
argv++;
continue;
}
if ( !strcmp(argv[1], "--filename") && argc > 2 ) {
filename = argv[2];
argc -= 2;
argv += 2;
continue;
}
fprintf(stderr, "%s: Unrecognized switch: %s\n", program_name,
argv[1]);
fprintf(stderr, usage);
exit(1);
}
switch ( argc )
{
default:
fprintf(stderr, usage);
exit(0);
case 3:
output_name = argv[2];
out = fopen(output_name, "w");
if ( out == NULL ) {
fprintf(stderr, "%s: Cannot open output file %s\n",
program_name, output_name);
exit(1);
}
/* falls through */
case 2:
in = fopen(argv[1], "r");
if ( in == NULL ) {
fprintf(stderr, "%s: Cannot open input file %s\n",
program_name, argv[1]);
exit(1);
}
if ( filename == 0 )
filename = argv[1];
/* falls through */
case 1:
break;
}
if ( filename )
fprintf(out, "#line 1 \"%s\"\n", filename);
buf = malloc(bufsize);
if ( buf == NULL )
{
fprintf(stderr, "Unable to allocate read buffer!\n");
exit(1);
}
line = buf;
while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL )
{
test: line += strlen(line);
switch ( test1(buf) )
{
case 2: /* a function header */
convert1(buf, out, 1, convert_varargs);
break;
case 1: /* a function */
/* Check for a { at the start of the next line. */
more = ++line;
f: if ( line >= buf + (bufsize - 1) ) /* overflow check */
goto wl;
if ( fgets(line, (unsigned)(buf + bufsize - line), in) == NULL )
goto wl;
switch ( *skipspace(more, 1) )
{
case '{':
/* Definitely a function header. */
convert1(buf, out, 0, convert_varargs);
fputs(more, out);
break;
case 0:
/* The next line was blank or a comment: */
/* keep scanning for a non-comment. */
line += strlen(line);
goto f;
default:
/* buf isn't a function header, but */
/* more might be. */
fputs(buf, out);
strcpy(buf, more);
line = buf;
goto test;
}
break;
case -1: /* maybe the start of a function */
if ( line != buf + (bufsize - 1) ) /* overflow check */
continue;
/* falls through */
default: /* not a function */
wl: fputs(buf, out);
break;
}
line = buf;
}
if ( line != buf )
fputs(buf, out);
free(buf);
if ( output_name ) {
output_error = ferror(out);
output_error |= fclose(out);
} else { /* out == stdout */
fflush(out);
output_error = ferror(out);
}
if ( output_error ) {
fprintf(stderr, "%s: error writing to %s\n", program_name,
(output_name ? output_name : "stdout"));
exit(1);
}
if ( in != stdin )
fclose(in);
return 0;
}
/* Skip over whitespace and comments, in either direction. */
char *
skipspace(p, dir)
register char *p;
register int dir; /* 1 for forward, -1 for backward */
{ for ( ; ; )
{ while ( is_space(*p) )
p += dir;
if ( !(*p == '/' && p[dir] == '*') )
break;
p += dir; p += dir;
while ( !(*p == '*' && p[dir] == '/') )
{ if ( *p == 0 )
return p; /* multi-line comment?? */
p += dir;
}
p += dir; p += dir;
}
return p;
}
/* Scan over a quoted string, in either direction. */
char *
scanstring(p, dir)
register char *p;
register int dir;
{
for (p += dir; ; p += dir)
if (*p == '"' && p[-dir] != '\\')
return p + dir;
}
/*
* Write blanks over part of a string.
* Don't overwrite end-of-line characters.
*/
int
writeblanks(start, end)
char *start;
char *end;
{ char *p;
for ( p = start; p < end; p++ )
if ( *p != '\r' && *p != '\n' )
*p = ' ';
return 0;
}
/*
* Test whether the string in buf is a function definition.
* The string may contain and/or end with a newline.
* Return as follows:
* 0 - definitely not a function definition;
* 1 - definitely a function definition;
* 2 - definitely a function prototype (NOT USED);
* -1 - may be the beginning of a function definition,
* append another line and look again.
* The reason we don't attempt to convert function prototypes is that
* Ghostscript's declaration-generating macros look too much like
* prototypes, and confuse the algorithms.
*/
int
test1(buf)
char *buf;
{ register char *p = buf;
char *bend;
char *endfn;
int contin;
if ( !isidfirstchar(*p) )
return 0; /* no name at left margin */
bend = skipspace(buf + strlen(buf) - 1, -1);
switch ( *bend )
{
case ';': contin = 0 /*2*/; break;
case ')': contin = 1; break;
case '{': return 0; /* not a function */
case '}': return 0; /* not a function */
default: contin = -1;
}
while ( isidchar(*p) )
p++;
endfn = p;
p = skipspace(p, 1);
if ( *p++ != '(' )
return 0; /* not a function */
p = skipspace(p, 1);
if ( *p == ')' )
return 0; /* no parameters */
/* Check that the apparent function name isn't a keyword. */
/* We only need to check for keywords that could be followed */
/* by a left parenthesis (which, unfortunately, is most of them). */
{ static char *words[] =
{ "asm", "auto", "case", "char", "const", "double",
"extern", "float", "for", "if", "int", "long",
"register", "return", "short", "signed", "sizeof",
"static", "switch", "typedef", "unsigned",
"void", "volatile", "while", 0
};
char **key = words;
char *kp;
unsigned len = endfn - buf;
while ( (kp = *key) != 0 )
{ if ( strlen(kp) == len && !strncmp(kp, buf, len) )
return 0; /* name is a keyword */
key++;
}
}
{
char *id = p;
int len;
/*
* Check for identifier1(identifier2) and not
* identifier1(void), or identifier1(identifier2, xxxx).
*/
while ( isidchar(*p) )
p++;
len = p - id;
p = skipspace(p, 1);
if (*p == ',' ||
(*p == ')' && (len != 4 || strncmp(id, "void", 4)))
)
return 0; /* not a function */
}
/*
* If the last significant character was a ), we need to count
* parentheses, because it might be part of a formal parameter
* that is a procedure.
*/
if (contin > 0) {
int level = 0;
for (p = skipspace(buf, 1); *p; p = skipspace(p + 1, 1))
level += (*p == '(' ? 1 : *p == ')' ? -1 : 0);
if (level > 0)
contin = -1;
}
return contin;
}
/* Convert a recognized function definition or header to K&R syntax. */
int
convert1(buf, out, header, convert_varargs)
char *buf;
FILE *out;
int header; /* Boolean */
int convert_varargs; /* Boolean */
{ char *endfn;
register char *p;
/*
* The breaks table contains pointers to the beginning and end
* of each argument.
*/
char **breaks;
unsigned num_breaks = 2; /* for testing */
char **btop;
char **bp;
char **ap;
char *vararg = 0;
/* Pre-ANSI implementations don't agree on whether strchr */
/* is called strchr or index, so we open-code it here. */
for ( endfn = buf; *(endfn++) != '('; )
;
top: p = endfn;
breaks = (char **)malloc(sizeof(char *) * num_breaks * 2);
if ( breaks == NULL )
{ /* Couldn't allocate break table, give up */
fprintf(stderr, "Unable to allocate break table!\n");
fputs(buf, out);
return -1;
}
btop = breaks + num_breaks * 2 - 2;
bp = breaks;
/* Parse the argument list */
do
{ int level = 0;
char *lp = NULL;
char *rp = NULL;
char *end = NULL;
if ( bp >= btop )
{ /* Filled up break table. */
/* Allocate a bigger one and start over. */
free((char *)breaks);
num_breaks <<= 1;
goto top;
}
*bp++ = p;
/* Find the end of the argument */
for ( ; end == NULL; p++ )
{ switch(*p)
{
case ',':
if ( !level ) end = p;
break;
case '(':
if ( !level ) lp = p;
level++;
break;
case ')':
if ( --level < 0 ) end = p;
else rp = p;
break;
case '/':
if (p[1] == '*')
p = skipspace(p, 1) - 1;
break;
case '"':
p = scanstring(p, 1) - 1;
break;
default:
;
}
}
/* Erase any embedded prototype parameters. */
if ( lp && rp )
writeblanks(lp + 1, rp);
p--; /* back up over terminator */
/* Find the name being declared. */
/* This is complicated because of procedure and */
/* array modifiers. */
for ( ; ; )
{ p = skipspace(p - 1, -1);
switch ( *p )
{
case ']': /* skip array dimension(s) */
case ')': /* skip procedure args OR name */
{ int level = 1;
while ( level )
switch ( *--p )
{
case ']': case ')':
level++;
break;
case '[': case '(':
level--;
break;
case '/':
if (p > buf && p[-1] == '*')
p = skipspace(p, -1) + 1;
break;
case '"':
p = scanstring(p, -1) + 1;
break;
default: ;
}
}
if ( *p == '(' && *skipspace(p + 1, 1) == '*' )
{ /* We found the name being declared */
while ( !isidfirstchar(*p) )
p = skipspace(p, 1) + 1;
goto found;
}
break;
default:
goto found;
}
}
found: if ( *p == '.' && p[-1] == '.' && p[-2] == '.' )
{ if ( convert_varargs )
{ *bp++ = "va_alist";
vararg = p-2;
}
else
{ p++;
if ( bp == breaks + 1 ) /* sole argument */
writeblanks(breaks[0], p);
else
writeblanks(bp[-1] - 1, p);
bp--;
}
}
else
{ while ( isidchar(*p) ) p--;
*bp++ = p+1;
}
p = end;
}
while ( *p++ == ',' );
*bp = p;
/* Make a special check for 'void' arglist */
if ( bp == breaks+2 )
{ p = skipspace(breaks[0], 1);
if ( !strncmp(p, "void", 4) )
{ p = skipspace(p+4, 1);
if ( p == breaks[2] - 1 )
{ bp = breaks; /* yup, pretend arglist is empty */
writeblanks(breaks[0], p + 1);
}
}
}
/* Put out the function name and left parenthesis. */
p = buf;
while ( p != endfn ) putc(*p, out), p++;
/* Put out the declaration. */
if ( header )
{ fputs(");", out);
for ( p = breaks[0]; *p; p++ )
if ( *p == '\r' || *p == '\n' )
putc(*p, out);
}
else
{ for ( ap = breaks+1; ap < bp; ap += 2 )
{ p = *ap;
while ( isidchar(*p) )
putc(*p, out), p++;
if ( ap < bp - 1 )
fputs(", ", out);
}
fputs(") ", out);
/* Put out the argument declarations */
for ( ap = breaks+2; ap <= bp; ap += 2 )
(*ap)[-1] = ';';
if ( vararg != 0 )
{ *vararg = 0;
fputs(breaks[0], out); /* any prior args */
fputs("va_dcl", out); /* the final arg */
fputs(bp[0], out);
}
else
fputs(breaks[0], out);
}
free((char *)breaks);
return 0;
}
--- NEW FILE: gnu-md5.h ---
/* md5.h - Declaration of functions and data types used for MD5 sum
computing library functions.
Copyright (C) 1995, 1996 Free Software Foundation, Inc.
NOTE: The canonical source of this file is maintained with the GNU C
Library. Bugs can be reported to bug-glibc at prep.ai.mit.edu.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
#ifndef _MD5_H
#define _MD5_H 1
#include <stdio.h>
#if defined HAVE_LIMITS_H || _LIBC
# include <limits.h>
#endif
/* The following contortions are an attempt to use the C preprocessor
to determine an unsigned integral type that is 32 bits wide. An
alternative approach is to use autoconf's AC_CHECK_SIZEOF macro, but
doing that would require that the configure script compile and *run*
the resulting executable. Locally running cross-compiled executables
is usually not possible. */
#ifdef _LIBC
# include <sys/types.h>
typedef u_int32_t md5_uint32;
#else
# if defined __STDC__ && __STDC__
# define UINT_MAX_32_BITS 4294967295U
# else
# define UINT_MAX_32_BITS 0xFFFFFFFF
# endif
/* If UINT_MAX isn't defined, assume it's a 32-bit type.
This should be valid for all systems GNU cares about because
that doesn't include 16-bit systems, and only modern systems
(that certainly have <limits.h>) have 64+-bit integral types. */
# ifndef UINT_MAX
# define UINT_MAX UINT_MAX_32_BITS
# endif
# if UINT_MAX == UINT_MAX_32_BITS
typedef unsigned int md5_uint32;
# else
# if USHRT_MAX == UINT_MAX_32_BITS
typedef unsigned short md5_uint32;
# else
# if ULONG_MAX == UINT_MAX_32_BITS
typedef unsigned long md5_uint32;
# else
/* The following line is intended to evoke an error.
Using #error is not portable enough. */
"Cannot determine unsigned 32-bit data type."
# endif
# endif
# endif
#endif
/* Structure to save state of computation between the single steps. */
struct md5_ctx
{
md5_uint32 A;
md5_uint32 B;
md5_uint32 C;
md5_uint32 D;
md5_uint32 total[2];
md5_uint32 buflen;
char buffer[128];
};
/*
* The following three functions are build up the low level used in
* the functions `md5_stream' and `md5_buffer'.
*/
/* Initialize structure containing state of computation.
(RFC 1321, 3.3: Step 3) */
extern void md5_init_ctx PARAMS ((struct md5_ctx *ctx));
/* Starting with the result of former calls of this function (or the
initialization function update the context for the next LEN bytes
starting at BUFFER.
It is necessary that LEN is a multiple of 64!!! */
extern void md5_process_block PARAMS ((const void *buffer, size_t len,
struct md5_ctx *ctx));
/* Starting with the result of former calls of this function (or the
initialization function update the context for the next LEN bytes
starting at BUFFER.
It is NOT required that LEN is a multiple of 64. */
extern void md5_process_bytes PARAMS ((const void *buffer, size_t len,
struct md5_ctx *ctx));
/* Process the remaining bytes in the buffer and put result from CTX
in first 16 bytes following RESBUF. The result is always in little
endian byte order, so that a byte-wise output yields to the wanted
ASCII representation of the message digest.
IMPORTANT: On some systems it is required that RESBUF is correctly
aligned for a 32 bits value. */
extern void *md5_finish_ctx PARAMS ((struct md5_ctx *ctx, void *resbuf));
/* Put result from CTX in first 16 bytes following RESBUF. The result is
always in little endian byte order, so that a byte-wise output yields
to the wanted ASCII representation of the message digest.
IMPORTANT: On some systems it is required that RESBUF is correctly
aligned for a 32 bits value. */
extern void *md5_read_ctx PARAMS ((const struct md5_ctx *ctx, void *resbuf));
/* Compute MD5 message digest for bytes read from STREAM. The
resulting message digest number will be written into the 16 bytes
beginning at RESBLOCK. */
extern int md5_stream PARAMS ((FILE *stream, void *resblock));
/* Compute MD5 message digest for LEN bytes beginning at BUFFER. The
result is always in little endian byte order, so that a byte-wise
output yields to the wanted ASCII representation of the message
digest. */
extern void *md5_buffer PARAMS ((const char *buffer, size_t len,
void *resblock));
#endif
--- NEW FILE: http.c ---
/* HTTP support.
Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002
Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
[...2493 lines suppressed...]
{
char *wwwauth = NULL;
if (!strncasecmp (au, "Basic", 5))
wwwauth = basic_authentication_encode (user, passwd, "Authorization");
if (!strncasecmp (au, "NTLM", 4))
wwwauth = basic_authentication_encode (user, passwd, "Authorization");
#ifdef USE_DIGEST
else if (!strncasecmp (au, "Digest", 6))
wwwauth = digest_authentication_encode (au, user, passwd, method, path);
#endif /* USE_DIGEST */
return wwwauth;
}
void
http_cleanup (void)
{
if (pc_last_host_ip)
address_list_release (pc_last_host_ip);
}
--- NEW FILE: res.h ---
/* Declarations for res.c.
Copyright (C) 2001 Free Software Foundation, Inc.
This file is part of Wget.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef RES_H
#define RES_H
struct robot_specs;
struct robot_specs *res_parse PARAMS ((const char *, int));
struct robot_specs *res_parse_from_file PARAMS ((const char *));
int res_match_path PARAMS ((const struct robot_specs *, const char *));
void res_register_specs PARAMS ((const char *, int, struct robot_specs *));
struct robot_specs *res_get_specs PARAMS ((const char *, int));
int res_retrieve_file PARAMS ((const char *, char **));
void res_cleanup PARAMS ((void));
#endif /* RES_H */
--- NEW FILE: headers.h ---
/* Declarations for `headers.c'.
Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef HEADERS_H
#define HEADERS_H
enum {
HG_OK, HG_ERROR, HG_EOF
};
enum header_get_flags { HG_NONE = 0,
HG_NO_CONTINUATIONS = 0x2 };
int header_get PARAMS ((struct rbuf *, char **, enum header_get_flags));
int header_process PARAMS ((const char *, const char *,
int (*) (const char *, void *),
void *));
int header_extract_number PARAMS ((const char *, void *));
int header_strdup PARAMS ((const char *, void *));
int header_exists PARAMS ((const char *, void *));
int skip_lws PARAMS ((const char *));
#endif /* HEADERS_H */
--- NEW FILE: url.h ---
/* Declarations for url.c.
Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef URL_H
#define URL_H
/* Default port definitions */
#define DEFAULT_HTTP_PORT 80
#define DEFAULT_FTP_PORT 21
#define DEFAULT_HTTPS_PORT 443
/* Note: the ordering here is related to the order of elements in
`supported_schemes' in url.c. */
enum url_scheme {
SCHEME_HTTP,
#ifdef HAVE_SSL
SCHEME_HTTPS,
#endif
SCHEME_FTP,
SCHEME_INVALID
};
/* Structure containing info on a URL. */
struct url
{
char *url; /* Original URL */
enum url_scheme scheme; /* URL scheme */
char *host; /* Extracted hostname */
int port; /* Port number */
/* URL components (URL-quoted). */
char *path;
char *params;
char *query;
char *fragment;
/* Extracted path info (unquoted). */
char *dir;
char *file;
/* Username and password (unquoted). */
char *user;
char *passwd;
};
/* Function declarations */
char *url_escape PARAMS ((const char *));
struct url *url_parse PARAMS ((const char *, int *));
const char *url_error PARAMS ((int));
char *url_full_path PARAMS ((const struct url *));
void url_set_dir PARAMS ((struct url *, const char *));
void url_set_file PARAMS ((struct url *, const char *));
void url_free PARAMS ((struct url *));
enum url_scheme url_scheme PARAMS ((const char *));
int url_has_scheme PARAMS ((const char *));
int scheme_default_port PARAMS ((enum url_scheme));
void scheme_disable PARAMS ((enum url_scheme));
char *url_string PARAMS ((const struct url *, int));
char *url_file_name PARAMS ((const struct url *));
char *uri_merge PARAMS ((const char *, const char *));
int mkalldirs PARAMS ((const char *));
char *rewrite_shorthand_url PARAMS ((const char *));
int schemes_are_similar_p PARAMS ((enum url_scheme a, enum url_scheme b));
#endif /* URL_H */
--- NEW FILE: options.h ---
/* struct options.
Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
/* Needed for FDP. */
#include <stdio.h>
struct options
{
int verbose; /* Are we verbose? */
int quiet; /* Are we quiet? */
int ntry; /* Number of tries per URL */
int retry_connrefused; /* Treat CONNREFUSED as non-fatal. */
int background; /* Whether we should work in background. */
int kill_longer; /* Do we reject messages with *more*
data than specified in
content-length? */
int ignore_length; /* Do we heed content-length at all? */
int recursive; /* Are we recursive? */
int spanhost; /* Do we span across hosts in
recursion? */
int relative_only; /* Follow only relative links. */
int no_parent; /* Restrict access to the parent
directory. */
int reclevel; /* Maximum level of recursion */
int dirstruct; /* Do we build the directory structure
as we go along? */
int no_dirstruct; /* Do we hate dirstruct? */
int cut_dirs; /* Number of directory components to cut. */
int add_hostdir; /* Do we add hostname directory? */
int noclobber; /* Disables clobbering of existing
data. */
char *dir_prefix; /* The top of directory tree */
char *lfilename; /* Log filename */
char *input_filename; /* Input filename */
int force_html; /* Is the input file an HTML file? */
int spider; /* Is Wget in spider mode? */
char **accepts; /* List of patterns to accept. */
char **rejects; /* List of patterns to reject. */
char **excludes; /* List of excluded FTP directories. */
char **includes; /* List of FTP directories to
follow. */
char **domains; /* See host.c */
char **exclude_domains;
int dns_cache; /* whether we cache DNS lookups. */
char **follow_tags; /* List of HTML tags to recursively follow. */
char **ignore_tags; /* List of HTML tags to ignore if recursing. */
int follow_ftp; /* Are FTP URL-s followed in recursive
retrieving? */
int retr_symlinks; /* Whether we retrieve symlinks in
FTP. */
char *output_document; /* The output file to which the
documents will be printed. */
int od_known_regular; /* whether output_document is a
regular file we can manipulate,
i.e. not `-' or a device file. */
FILE *dfp; /* The file pointer to the output
document. */
int always_rest; /* Always use REST. */
char *ftp_acc; /* FTP username */
char *ftp_pass; /* FTP password */
int netrc; /* Whether to read .netrc. */
int ftp_glob; /* FTP globbing */
int ftp_pasv; /* Passive FTP. */
char *http_user; /* HTTP user. */
char *http_passwd; /* HTTP password. */
char *user_header; /* User-defined header(s). */
int http_keep_alive; /* whether we use keep-alive */
int use_proxy; /* Do we use proxy? */
int allow_cache; /* Do we allow server-side caching? */
char *http_proxy, *ftp_proxy, *https_proxy;
char **no_proxy;
char *base_href;
char *progress_type; /* progress indicator type. */
char *proxy_user; /*oli*/
char *proxy_passwd;
double read_timeout; /* The read/write timeout. */
double dns_timeout; /* The DNS timeout. */
double connect_timeout; /* The connect timeout. */
int random_wait; /* vary from 0 .. wait secs by random()? */
double wait; /* The wait period between retrievals. */
double waitretry; /* The wait period between retries. - HEH */
int use_robots; /* Do we heed robots.txt? */
long limit_rate; /* Limit the download rate to this
many bps. */
LARGE_INT quota; /* Maximum file size to download and
store. */
int numurls; /* Number of successfully downloaded
URLs */
int server_response; /* Do we print server response? */
int save_headers; /* Do we save headers together with
file? */
#ifdef ENABLE_DEBUG
int debug; /* Debugging on/off */
#endif
int timestamping; /* Whether to use time-stamping. */
int backup_converted; /* Do we save pre-converted files as *.orig? */
int backups; /* Are numeric backups made? */
char *useragent; /* Naughty User-Agent, which can be
set to something other than
Wget. */
char *referer; /* Naughty Referer, which can be
set to something other than
NULL. */
int convert_links; /* Will the links be converted
locally? */
int remove_listing; /* Do we remove .listing files
generated by FTP? */
int htmlify; /* Do we HTML-ify the OS-dependent
listings? */
char *dot_style;
long dot_bytes; /* How many bytes in a printing
dot. */
int dots_in_line; /* How many dots in one line. */
int dot_spacing; /* How many dots between spacings. */
int delete_after; /* Whether the files will be deleted
after download. */
int html_extension; /* Use ".html" extension on all text/html? */
int page_requisites; /* Whether we need to download all files
necessary to display a page properly. */
char *bind_address; /* What local IP address to bind to. */
#ifdef HAVE_SSL
char *sslcadir; /* CA directory (hash files) */
char *sslcafile; /* CA File to use */
char *sslcertfile; /* external client cert to use. */
char *sslcertkey; /* the keyfile for this certificate
(if not internal) included in the
certfile. */
int sslcerttype; /* 0 = PEM / 1=ASN1 (DER) */
int sslcheckcert; /* 0 do not check / 1 check server cert */
char *sslegdsock; /* optional socket of the egd daemon */
int sslprotocol; /* 0 = auto / 1 = v2 / 2 = v3 / 3 = TLSv1 */
#endif /* HAVE_SSL */
int cookies;
char *cookies_input;
char *cookies_output;
char *post_data; /* POST query string */
char *post_file_name; /* File to post */
enum {
restrict_unix,
restrict_windows
} restrict_files_os; /* file name restriction ruleset. */
int restrict_files_ctrl; /* non-zero if control chars in URLs
are restricted from appearing in
generated file names. */
int strict_comments; /* whether strict SGML comments are
enforced. */
};
extern struct options opt;
--- NEW FILE: connect.c ---
/* Establishing and handling network connections.
Copyright (C) 1995, 1996, 1997, 2001, 2002 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
#include <assert.h>
#ifndef WINDOWS
# include <sys/socket.h>
# include <netdb.h>
# include <netinet/in.h>
# ifndef __BEOS__
# include <arpa/inet.h>
# endif
#endif /* not WINDOWS */
#include <errno.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif /* HAVE_STRING_H */
#ifdef HAVE_SYS_SELECT_H
# include <sys/select.h>
#endif /* HAVE_SYS_SELECT_H */
#include "wget.h"
#include "utils.h"
#include "host.h"
#include "connect.h"
#ifndef errno
extern int errno;
#endif
/* Variables shared by bindport and acceptport: */
static int msock = -1;
static struct sockaddr *addr;
static ip_address bind_address;
static int bind_address_resolved;
static void
resolve_bind_address (void)
{
struct address_list *al;
if (bind_address_resolved || opt.bind_address == NULL)
/* Nothing to do. */
return;
al = lookup_host (opt.bind_address, 1);
if (!al)
{
logprintf (LOG_NOTQUIET,
_("Unable to convert `%s' to a bind address. Reverting to ANY.\n"),
opt.bind_address);
return;
}
address_list_copy_one (al, 0, &bind_address);
address_list_release (al);
bind_address_resolved = 1;
}
struct cwt_context {
int fd;
const struct sockaddr *addr;
socklen_t addrlen;
int result;
};
static void
connect_with_timeout_callback (void *arg)
{
struct cwt_context *ctx = (struct cwt_context *)arg;
ctx->result = connect (ctx->fd, ctx->addr, ctx->addrlen);
}
/* Like connect, but specifies a timeout. If connecting takes longer
than TIMEOUT seconds, -1 is returned and errno is set to
ETIMEDOUT. */
static int
connect_with_timeout (int fd, const struct sockaddr *addr, socklen_t addrlen,
double timeout)
{
struct cwt_context ctx;
ctx.fd = fd;
ctx.addr = addr;
ctx.addrlen = addrlen;
if (run_with_timeout (timeout, connect_with_timeout_callback, &ctx))
{
errno = ETIMEDOUT;
return -1;
}
if (ctx.result == -1 && errno == EINTR)
errno = ETIMEDOUT;
return ctx.result;
}
/* A kludge, but still better than passing the host name all the way
to connect_to_one. */
static const char *connection_host_name;
void
set_connection_host_name (const char *host)
{
if (host)
assert (connection_host_name == NULL);
else
assert (connection_host_name != NULL);
connection_host_name = host;
}
/* Connect to a remote host whose address has been resolved. */
int
connect_to_one (ip_address *addr, unsigned short port, int silent)
{
wget_sockaddr sa;
int sock, save_errno;
/* Set port and protocol */
wget_sockaddr_set_address (&sa, ip_default_family, port, addr);
if (!silent)
{
char *pretty_addr = pretty_print_address (addr);
if (connection_host_name
&& 0 != strcmp (connection_host_name, pretty_addr))
logprintf (LOG_VERBOSE, _("Connecting to %s[%s]:%hu... "),
connection_host_name, pretty_addr, port);
else
logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "),
pretty_addr, port);
}
/* Make an internet socket, stream type. */
sock = socket (ip_default_family, SOCK_STREAM, 0);
if (sock < 0)
goto out;
/* For very small rate limits, set the buffer size (and hence,
hopefully, the size of the kernel window) to the size of the
limit. That way we don't sleep for more than 1s between network
reads. */
if (opt.limit_rate && opt.limit_rate < 8192)
{
int bufsize = opt.limit_rate;
if (bufsize < 512)
bufsize = 512;
#ifdef SO_RCVBUF
setsockopt (sock, SOL_SOCKET, SO_RCVBUF,
(char *)&bufsize, sizeof (bufsize));
#endif
/* When we add opt.limit_rate support for writing, as with
`--post-file', also set SO_SNDBUF here. */
}
resolve_bind_address ();
if (bind_address_resolved)
{
/* Bind the client side to the requested address. */
wget_sockaddr bsa;
wget_sockaddr_set_address (&bsa, ip_default_family, 0, &bind_address);
if (bind (sock, &bsa.sa, sockaddr_len ()))
{
CLOSE (sock);
sock = -1;
goto out;
}
}
/* Connect the socket to the remote host. */
if (connect_with_timeout (sock, &sa.sa, sockaddr_len (),
opt.connect_timeout) < 0)
{
CLOSE (sock);
sock = -1;
goto out;
}
out:
if (sock >= 0)
{
/* Success. */
if (!silent)
logprintf (LOG_VERBOSE, _("connected.\n"));
DEBUGP (("Created socket %d.\n", sock));
}
else
{
save_errno = errno;
if (!silent)
logprintf (LOG_VERBOSE, "failed: %s.\n", strerror (errno));
errno = save_errno;
}
return sock;
}
/* Connect to a remote host whose address has been resolved. */
int
connect_to_many (struct address_list *al, unsigned short port, int silent)
{
int i, start, end;
address_list_get_bounds (al, &start, &end);
for (i = start; i < end; i++)
{
ip_address addr;
int sock;
address_list_copy_one (al, i, &addr);
sock = connect_to_one (&addr, port, silent);
if (sock >= 0)
/* Success. */
return sock;
address_list_set_faulty (al, i);
/* The attempt to connect has failed. Continue with the loop
and try next address. */
}
return -1;
}
int
test_socket_open (int sock)
{
#ifdef HAVE_SELECT
fd_set check_set;
struct timeval to;
/* Check if we still have a valid (non-EOF) connection. From Andrew
* Maholski's code in the Unix Socket FAQ. */
FD_ZERO (&check_set);
FD_SET (sock, &check_set);
/* Wait one microsecond */
to.tv_sec = 0;
to.tv_usec = 1;
/* If we get a timeout, then that means still connected */
if (select (sock + 1, &check_set, NULL, NULL, &to) == 0)
{
/* Connection is valid (not EOF), so continue */
return 1;
}
else
return 0;
#else
/* Without select, it's hard to know for sure. */
return 1;
#endif
}
/* Bind the local port PORT. This does all the necessary work, which
is creating a socket, setting SO_REUSEADDR option on it, then
calling bind() and listen(). If *PORT is 0, a random port is
chosen by the system, and its value is stored to *PORT. The
internal variable MPORT is set to the value of the ensuing master
socket. Call acceptport() to block for and accept a connection. */
uerr_t
bindport (unsigned short *port, int family)
{
int optval = 1;
wget_sockaddr srv;
memset (&srv, 0, sizeof (wget_sockaddr));
msock = -1;
if ((msock = socket (family, SOCK_STREAM, 0)) < 0)
return CONSOCKERR;
#ifdef SO_REUSEADDR
if (setsockopt (msock, SOL_SOCKET, SO_REUSEADDR,
(char *)&optval, sizeof (optval)) < 0)
return CONSOCKERR;
#endif
resolve_bind_address ();
wget_sockaddr_set_address (&srv, ip_default_family, htons (*port),
bind_address_resolved ? &bind_address : NULL);
if (bind (msock, &srv.sa, sockaddr_len ()) < 0)
{
CLOSE (msock);
msock = -1;
return BINDERR;
}
DEBUGP (("Master socket fd %d bound.\n", msock));
if (!*port)
{
socklen_t sa_len = sockaddr_len ();
if (getsockname (msock, &srv.sa, &sa_len) < 0)
{
CLOSE (msock);
msock = -1;
return CONPORTERR;
}
*port = wget_sockaddr_get_port (&srv);
DEBUGP (("using port %i.\n", *port));
}
if (listen (msock, 1) < 0)
{
CLOSE (msock);
msock = -1;
return LISTENERR;
}
return BINDOK;
}
#ifdef HAVE_SELECT
/* Wait for file descriptor FD to be available, timing out after
MAXTIME seconds. "Available" means readable if writep is 0,
writeable otherwise.
Returns 1 if FD is available, 0 for timeout and -1 for error. */
int
select_fd (int fd, double maxtime, int writep)
{
fd_set fds;
fd_set *rd = NULL, *wrt = NULL;
struct timeval tmout;
int result;
FD_ZERO (&fds);
FD_SET (fd, &fds);
*(writep ? &wrt : &rd) = &fds;
tmout.tv_sec = (long)maxtime;
tmout.tv_usec = 1000000L * (maxtime - (long)maxtime);
do
result = select (fd + 1, rd, wrt, NULL, &tmout);
while (result < 0 && errno == EINTR);
/* When we've timed out, set errno to ETIMEDOUT for the convenience
of the caller. */
if (result == 0)
errno = ETIMEDOUT;
return result;
}
#endif /* HAVE_SELECT */
/* Call accept() on MSOCK and store the result to *SOCK. This assumes
that bindport() has been used to initialize MSOCK to a correct
value. It blocks the caller until a connection is established. If
no connection is established for OPT.CONNECT_TIMEOUT seconds, the
function exits with an error status. */
uerr_t
acceptport (int *sock)
{
socklen_t addrlen = sockaddr_len ();
#ifdef HAVE_SELECT
if (opt.connect_timeout)
if (select_fd (msock, opt.connect_timeout, 0) <= 0)
return ACCEPTERR;
#endif
if ((*sock = accept (msock, addr, &addrlen)) < 0)
return ACCEPTERR;
DEBUGP (("Created socket fd %d.\n", *sock));
return ACCEPTOK;
}
/* Close SOCK, as well as the most recently remembered MSOCK, created
via bindport(). If SOCK is -1, close MSOCK only. */
void
closeport (int sock)
{
/*shutdown (sock, 2);*/
if (sock != -1)
CLOSE (sock);
if (msock != -1)
CLOSE (msock);
msock = -1;
}
/* Return the local IP address associated with the connection on FD. */
int
conaddr (int fd, ip_address *ip)
{
wget_sockaddr mysrv;
socklen_t addrlen = sizeof (mysrv);
if (getsockname (fd, &mysrv.sa, &addrlen) < 0)
return 0;
switch (mysrv.sa.sa_family)
{
#ifdef ENABLE_IPV6
case AF_INET6:
memcpy (ip, &mysrv.sin6.sin6_addr, 16);
return 1;
#endif
case AF_INET:
map_ipv4_to_ip ((ip4_address *)&mysrv.sin.sin_addr, ip);
return 1;
default:
abort ();
}
return 0;
}
/* Read at most LEN bytes from FD, storing them to BUF. This is
virtually the same as read(), but takes care of EINTR braindamage
and uses select() to timeout the stale connections (a connection is
stale if more than OPT.READ_TIMEOUT time is spent in select() or
read()). */
int
iread (int fd, char *buf, int len)
{
int res;
#ifdef HAVE_SELECT
if (opt.read_timeout)
if (select_fd (fd, opt.read_timeout, 0) <= 0)
return -1;
#endif
do
res = READ (fd, buf, len);
while (res == -1 && errno == EINTR);
return res;
}
/* Write LEN bytes from BUF to FD. This is similar to iread(), but
unlike iread(), it makes sure that all of BUF is actually written
to FD, so callers needn't bother with checking that the return
value equals to LEN. Instead, you should simply check for -1. */
int
iwrite (int fd, char *buf, int len)
{
int res = 0;
/* `write' may write less than LEN bytes, thus the outward loop
keeps trying it until all was written, or an error occurred. The
inner loop is reserved for the usual EINTR f*kage, and the
innermost loop deals with the same during select(). */
while (len > 0)
{
#ifdef HAVE_SELECT
if (opt.read_timeout)
if (select_fd (fd, opt.read_timeout, 1) <= 0)
return -1;
#endif
do
res = WRITE (fd, buf, len);
while (res == -1 && errno == EINTR);
if (res <= 0)
break;
buf += res;
len -= res;
}
return res;
}
--- NEW FILE: gen_sslfunc.c ---
/* SSL support.
Copyright (C) 2000 Free Software Foundation, Inc.
Contributed by Christian Fraenkel.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#include <config.h>
#ifdef HAVE_SSL
#include <assert.h>
#include <errno.h>
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#include <openssl/bio.h>
#include <openssl/crypto.h>
#include <openssl/x509.h>
#include <openssl/ssl.h>
#include <openssl/err.h>
#include <openssl/pem.h>
#include <openssl/rand.h>
#include "wget.h"
#include "utils.h"
#include "connect.h"
#include "url.h"
#ifndef errno
extern int errno;
#endif
void
ssl_init_prng (void)
{
/* It is likely that older versions of OpenSSL will fail on
non-Linux machines because this code is unable to seed the PRNG
on older versions of the library. */
#if SSLEAY_VERSION_NUMBER >= 0x00905100
char rand_file[256];
int maxrand = 500;
/* First, seed from a file specified by the user. This will be
$RANDFILE, if set, or ~/.rnd. */
RAND_file_name (rand_file, sizeof (rand_file));
if (rand_file)
/* Seed at most 16k (value borrowed from curl) from random file. */
RAND_load_file (rand_file, 16384);
if (RAND_status ())
return;
/* Get random data from EGD if opt.sslegdsock was set. */
if (opt.sslegdsock && *opt.sslegdsock)
RAND_egd (opt.sslegdsock);
if (RAND_status ())
return;
#ifdef WINDOWS
/* Under Windows, we can try to seed the PRNG using screen content.
This may or may not work, depending on whether we'll calling Wget
interactively. */
RAND_screen ();
if (RAND_status ())
return;
#endif
/* Still not enough randomness, presumably because neither random
file nor EGD have been available. Use the stupidest possible
method -- seed OpenSSL's PRNG with the system's PRNG. This is
insecure in the cryptographic sense, but people who care about
security will use /dev/random or their own source of randomness
anyway. */
while (RAND_status () == 0 && maxrand-- > 0)
{
unsigned char rnd = random_number (256);
RAND_seed (&rnd, sizeof (rnd));
}
if (RAND_status () == 0)
{
logprintf (LOG_NOTQUIET,
_("Could not seed OpenSSL PRNG; disabling SSL.\n"));
scheme_disable (SCHEME_HTTPS);
}
#endif /* SSLEAY_VERSION_NUMBER >= 0x00905100 */
}
int
verify_callback (int ok, X509_STORE_CTX *ctx)
{
char *s, buf[256];
s = X509_NAME_oneline (X509_get_subject_name (ctx->current_cert), buf, 256);
if (ok == 0) {
switch (ctx->error) {
case X509_V_ERR_CERT_NOT_YET_VALID:
case X509_V_ERR_CERT_HAS_EXPIRED:
/* This mean the CERT is not valid !!! */
ok = 0;
break;
case X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT:
/* Unsure if we should handle that this way */
ok = 1;
break;
}
}
return ok;
}
/* pass all ssl errors to DEBUGP
returns the number of printed errors */
int
ssl_printerrors (void)
{
int ocerr = 0;
unsigned long curerr = 0;
char errbuff[1024];
memset(errbuff, 0, sizeof(errbuff));
while ( 0 != (curerr = ERR_get_error ()))
{
DEBUGP (("OpenSSL: %s\n", ERR_error_string (curerr, errbuff)));
++ocerr;
}
return ocerr;
}
/* Creates a SSL Context and sets some defaults for it */
uerr_t
init_ssl (SSL_CTX **ctx)
{
SSL_METHOD *meth = NULL;
int verify;
int can_validate;
SSL_library_init ();
SSL_load_error_strings ();
SSLeay_add_all_algorithms ();
SSLeay_add_ssl_algorithms ();
switch (opt.sslprotocol)
{
default:
meth = SSLv23_client_method ();
break;
case 1 :
meth = SSLv2_client_method ();
break;
case 2 :
meth = SSLv3_client_method ();
break;
case 3 :
meth = TLSv1_client_method ();
break;
}
if (meth == NULL)
{
ssl_printerrors ();
return SSLERRCTXCREATE;
}
*ctx = SSL_CTX_new (meth);
if (meth == NULL)
{
ssl_printerrors ();
return SSLERRCTXCREATE;
}
/* Can we validate the server Cert ? */
if (opt.sslcadir != NULL || opt.sslcafile != NULL)
{
SSL_CTX_load_verify_locations (*ctx, opt.sslcafile, opt.sslcadir);
can_validate = 1;
}
else
{
can_validate = 0;
}
if (!opt.sslcheckcert)
{
/* check cert but ignore error, do not break handshake on error */
verify = SSL_VERIFY_NONE;
}
else
{
if (!can_validate)
{
logprintf (LOG_NOTQUIET, "Warrining validation of Server Cert not possible!\n");
verify = SSL_VERIFY_NONE;
}
else
{
/* break handshake if server cert is not valid but allow NO-Cert mode */
verify = SSL_VERIFY_PEER;
}
}
SSL_CTX_set_verify (*ctx, verify, verify_callback);
if (opt.sslcertfile != NULL || opt.sslcertkey != NULL)
{
int ssl_cert_type;
if (!opt.sslcerttype)
ssl_cert_type = SSL_FILETYPE_PEM;
else
ssl_cert_type = SSL_FILETYPE_ASN1;
if (opt.sslcertkey == NULL)
opt.sslcertkey = opt.sslcertfile;
if (opt.sslcertfile == NULL)
opt.sslcertfile = opt.sslcertkey;
if (SSL_CTX_use_certificate_file (*ctx, opt.sslcertfile, ssl_cert_type) <= 0)
{
ssl_printerrors ();
return SSLERRCERTFILE;
}
if (SSL_CTX_use_PrivateKey_file (*ctx, opt.sslcertkey , ssl_cert_type) <= 0)
{
ssl_printerrors ();
return SSLERRCERTKEY;
}
}
return 0; /* Succeded */
}
void
shutdown_ssl (SSL* con)
{
if (con == NULL)
return;
if (0==SSL_shutdown (con))
SSL_shutdown (con);
SSL_free (con);
}
/* Sets up a SSL structure and performs the handshake on fd
Returns 0 if everything went right
Returns 1 if something went wrong ----- TODO: More exit codes
*/
int
connect_ssl (SSL **con, SSL_CTX *ctx, int fd)
{
if (NULL == (*con = SSL_new (ctx)))
{
ssl_printerrors ();
return 1;
}
if (!SSL_set_fd (*con, fd))
{
ssl_printerrors ();
return 1;
}
SSL_set_connect_state (*con);
switch (SSL_connect (*con))
{
case 1 :
return (*con)->state != SSL_ST_OK;
default:
ssl_printerrors ();
shutdown_ssl (*con);
*con = NULL;
return 1;
case 0 :
ssl_printerrors ();
SSL_free (*con);
*con = NULL;
return 1;
}
return 0;
}
void
free_ssl_ctx (SSL_CTX * ctx)
{
SSL_CTX_free (ctx);
}
/* SSL version of iread. Only exchanged read for SSL_read Read at
most LEN bytes from FD, storing them to BUF. */
int
ssl_iread (SSL *con, char *buf, int len)
{
int res, fd;
BIO_get_fd (con->rbio, &fd);
#ifdef HAVE_SELECT
if (opt.read_timeout && !SSL_pending (con))
if (select_fd (fd, opt.read_timeout, 0) <= 0)
return -1;
#endif
do
res = SSL_read (con, buf, len);
while (res == -1 && errno == EINTR);
return res;
}
/* SSL version of iwrite. Only exchanged write for SSL_write Write
LEN bytes from BUF to FD. */
int
ssl_iwrite (SSL *con, char *buf, int len)
{
int res = 0, fd;
BIO_get_fd (con->rbio, &fd);
/* `write' may write less than LEN bytes, thus the outward loop
keeps trying it until all was written, or an error occurred. The
inner loop is reserved for the usual EINTR f*kage, and the
innermost loop deals with the same during select(). */
while (len > 0)
{
#ifdef HAVE_SELECT
if (opt.read_timeout)
if (select_fd (fd, opt.read_timeout, 1) <= 0)
return -1;
#endif
do
res = SSL_write (con, buf, len);
while (res == -1 && errno == EINTR);
if (res <= 0)
break;
buf += res;
len -= res;
}
return res;
}
#endif /* HAVE_SSL */
--- NEW FILE: hash.h ---
/* Hash table declarations.
Copyright (C) 2000 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef HASH_H
#define HASH_H
/* From XEmacs, and hence from Dragon book. */
#define GOOD_HASH 65599 /* prime number just over 2^16; Dragon book, p. 435 */
#define HASH2(a,b) (GOOD_HASH * (a) + (b))
#define HASH3(a,b,c) (GOOD_HASH * HASH2 (a,b) + (c))
#define HASH4(a,b,c,d) (GOOD_HASH * HASH3 (a,b,c) + (d))
#define HASH5(a,b,c,d,e) (GOOD_HASH * HASH4 (a,b,c,d) + (e))
#define HASH6(a,b,c,d,e,f) (GOOD_HASH * HASH5 (a,b,c,d,e) + (f))
#define HASH7(a,b,c,d,e,f,g) (GOOD_HASH * HASH6 (a,b,c,d,e,f) + (g))
#define HASH8(a,b,c,d,e,f,g,h) (GOOD_HASH * HASH7 (a,b,c,d,e,f,g) + (h))
#define HASH9(a,b,c,d,e,f,g,h,i) (GOOD_HASH * HASH8 (a,b,c,d,e,f,g,h) + (i))
struct hash_table;
struct hash_table *hash_table_new PARAMS ((int,
unsigned long (*) (const void *),
int (*) (const void *,
const void *)));
void hash_table_destroy PARAMS ((struct hash_table *));
void *hash_table_get PARAMS ((const struct hash_table *, const void *));
int hash_table_get_pair PARAMS ((const struct hash_table *, const void *,
void *, void *));
int hash_table_contains PARAMS ((const struct hash_table *, const void *));
void hash_table_put PARAMS ((struct hash_table *, const void *, void *));
int hash_table_remove PARAMS ((struct hash_table *, const void *));
void hash_table_clear PARAMS ((struct hash_table *));
void hash_table_map PARAMS ((struct hash_table *,
int (*) (void *, void *, void *),
void *));
int hash_table_count PARAMS ((const struct hash_table *));
unsigned long string_hash PARAMS ((const void *));
int string_cmp PARAMS ((const void *, const void *));
struct hash_table *make_string_hash_table PARAMS ((int));
struct hash_table *make_nocase_string_hash_table PARAMS ((int));
unsigned long ptrhash PARAMS ((const void *));
int ptrcmp PARAMS ((const void *, const void *));
#endif /* HASH_H */
--- NEW FILE: netrc.c ---
/* Read and parse the .netrc file to get hosts, accounts, and passwords.
Copyright (C) 1996, Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
/* This file used to be kept in synch with the code in Fetchmail, but
the latter has diverged since. */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#include <sys/types.h>
#include <errno.h>
#include "wget.h"
#include "utils.h"
#include "netrc.h"
#include "init.h"
#ifndef errno
extern int errno;
#endif
#define NETRC_FILE_NAME ".netrc"
acc_t *netrc_list;
static acc_t *parse_netrc PARAMS ((const char *));
/* Return the correct user and password, given the host, user (as
given in the URL), and password (as given in the URL). May return
NULL.
If SLACK_DEFAULT is set, allow looking for a "default" account.
You will typically turn it off for HTTP. */
void
search_netrc (const char *host, const char **acc, const char **passwd,
int slack_default)
{
acc_t *l;
static int processed_netrc;
if (!opt.netrc)
return;
/* Find ~/.netrc. */
if (!processed_netrc)
{
char *home = home_dir ();
netrc_list = NULL;
processed_netrc = 1;
if (home)
{
int err;
struct stat buf;
char *path = (char *)alloca (strlen (home) + 1
+ strlen (NETRC_FILE_NAME) + 1);
sprintf (path, "%s/%s", home, NETRC_FILE_NAME);
xfree (home);
err = stat (path, &buf);
if (err == 0)
netrc_list = parse_netrc (path);
}
}
/* If nothing to do... */
if (!netrc_list)
return;
/* Acc and password found; all OK. */
if (*acc && *passwd)
return;
/* Some data not given -- try finding the host. */
for (l = netrc_list; l; l = l->next)
{
if (!l->host)
continue;
else if (!strcasecmp (l->host, host))
break;
}
if (l)
{
if (*acc)
{
/* Looking for password in .netrc. */
if (!strcmp (l->acc, *acc))
*passwd = l->passwd; /* usernames match; password OK */
else
*passwd = NULL; /* usernames don't match */
}
else /* NOT *acc */
{
/* If password was given, use it. The account is l->acc. */
*acc = l->acc;
if (l->passwd)
*passwd = l->passwd;
}
return;
}
else
{
if (!slack_default)
return;
if (*acc)
return;
/* Try looking for the default account. */
for (l = netrc_list; l; l = l->next)
if (!l->host)
break;
if (!l)
return;
*acc = l->acc;
if (!*passwd)
*passwd = l->passwd;
return;
}
}
#ifdef STANDALONE
#include <assert.h>
/* Normally, these functions would be defined by your package. */
# define xmalloc malloc
# define xfree free
# define xstrdup strdup
# define xrealloc realloc
/* Read a line from FP. The function reallocs the storage as needed
to accomodate for any length of the line. Reallocs are done
storage exponentially, doubling the storage after each overflow to
minimize the number of calls to realloc() and fgets(). The newline
character at the end of line is retained.
After end-of-file is encountered without anything being read, NULL
is returned. NULL is also returned on error. To distinguish
between these two cases, use the stdio function ferror(). */
char *
read_whole_line (FILE *fp)
{
int length = 0;
int bufsize = 81;
char *line = (char *)xmalloc (bufsize);
while (fgets (line + length, bufsize - length, fp))
{
length += strlen (line + length);
assert (length > 0);
if (line[length - 1] == '\n')
break;
/* fgets() guarantees to read the whole line, or to use up the
space we've given it. We can double the buffer
unconditionally. */
bufsize <<= 1;
line = xrealloc (line, bufsize);
}
if (length == 0 || ferror (fp))
{
xfree (line);
return NULL;
}
if (length + 1 < bufsize)
/* Relieve the memory from our exponential greediness. We say
`length + 1' because the terminating \0 is not included in
LENGTH. We don't need to zero-terminate the string ourselves,
though, because fgets() does that. */
line = xrealloc (line, length + 1);
return line;
}
#endif /* STANDALONE */
/* Maybe add NEWENTRY to the account information list, LIST. NEWENTRY is
set to a ready-to-use acc_t, in any event. */
static void
maybe_add_to_list (acc_t **newentry, acc_t **list)
{
acc_t *a, *l;
a = *newentry;
l = *list;
/* We need an account name in order to add the entry to the list. */
if (a && ! a->acc)
{
/* Free any allocated space. */
xfree (a->host);
xfree (a->acc);
xfree (a->passwd);
}
else
{
if (a)
{
/* Add the current machine into our list. */
a->next = l;
l = a;
}
/* Allocate a new acc_t structure. */
a = (acc_t *)xmalloc (sizeof (acc_t));
}
/* Zero the structure, so that it is ready to use. */
memset (a, 0, sizeof(*a));
/* Return the new pointers. */
*newentry = a;
*list = l;
return;
}
/* Helper function for the parser, shifts contents of
null-terminated string once character to the left.
Used in processing \ and " constructs in the netrc file */
static void
shift_left(char *string)
{
char *p;
for (p=string; *p; ++p)
*p = *(p+1);
}
/* Parse a .netrc file (as described in the ftp(1) manual page). */
static acc_t *
parse_netrc (const char *path)
{
FILE *fp;
char *line, *p, *tok, *premature_token;
acc_t *current, *retval;
int ln, quote;
/* The latest token we've seen in the file. */
enum
{
tok_nothing, tok_account, tok_login, tok_macdef, tok_machine, tok_password
} last_token = tok_nothing;
current = retval = NULL;
fp = fopen (path, "r");
if (!fp)
{
fprintf (stderr, _("%s: Cannot read %s (%s).\n"), exec_name,
path, strerror (errno));
return retval;
}
/* Initialize the file data. */
ln = 0;
premature_token = NULL;
/* While there are lines in the file... */
while ((line = read_whole_line (fp)))
{
ln ++;
/* Parse the line. */
p = line;
quote = 0;
/* Skip leading whitespace. */
while (*p && ISSPACE (*p))
p ++;
/* If the line is empty, then end any macro definition. */
if (last_token == tok_macdef && !*p)
/* End of macro if the line is empty. */
last_token = tok_nothing;
/* If we are defining macros, then skip parsing the line. */
while (*p && last_token != tok_macdef)
{
/* Skip any whitespace. */
while (*p && ISSPACE (*p))
p ++;
/* Discard end-of-line comments; also, stop processing if
the above `while' merely skipped trailing whitespace. */
if (*p == '#' || !*p)
break;
/* If the token starts with quotation mark, note this fact,
and squash the quotation character */
if (*p == '"'){
quote = 1;
shift_left (p);
}
tok = p;
/* Find the end of the token, handling quotes and escapes. */
while (*p && (quote ? *p != '"' : !ISSPACE (*p))){
if (*p == '\\')
shift_left (p);
p ++;
}
/* If field was quoted, squash the trailing quotation mark
and reset quote flag. */
if (quote)
{
shift_left (p);
quote = 0;
}
/* Null-terminate the token, if it isn't already. */
if (*p)
*p ++ = '\0';
switch (last_token)
{
case tok_login:
if (current)
current->acc = xstrdup (tok);
else
premature_token = "login";
break;
case tok_machine:
/* Start a new machine entry. */
maybe_add_to_list (¤t, &retval);
current->host = xstrdup (tok);
break;
case tok_password:
if (current)
current->passwd = xstrdup (tok);
else
premature_token = "password";
break;
/* We handle most of tok_macdef above. */
case tok_macdef:
if (!current)
premature_token = "macdef";
break;
/* We don't handle the account keyword at all. */
case tok_account:
if (!current)
premature_token = "account";
break;
/* We handle tok_nothing below this switch. */
case tok_nothing:
break;
}
if (premature_token)
{
fprintf (stderr, _("\
%s: %s:%d: warning: \"%s\" token appears before any machine name\n"),
exec_name, path, ln, premature_token);
premature_token = NULL;
}
if (last_token != tok_nothing)
/* We got a value, so reset the token state. */
last_token = tok_nothing;
else
{
/* Fetch the next token. */
if (!strcmp (tok, "account"))
last_token = tok_account;
else if (!strcmp (tok, "default"))
{
maybe_add_to_list (¤t, &retval);
}
else if (!strcmp (tok, "login"))
last_token = tok_login;
else if (!strcmp (tok, "macdef"))
last_token = tok_macdef;
else if (!strcmp (tok, "machine"))
last_token = tok_machine;
else if (!strcmp (tok, "password"))
last_token = tok_password;
else
fprintf (stderr, _("%s: %s:%d: unknown token \"%s\"\n"),
exec_name, path, ln, tok);
}
}
xfree (line);
}
fclose (fp);
/* Finalize the last machine entry we found. */
maybe_add_to_list (¤t, &retval);
xfree (current);
/* Reverse the order of the list so that it appears in file order. */
current = retval;
retval = NULL;
while (current)
{
acc_t *saved_reference;
/* Change the direction of the pointers. */
saved_reference = current->next;
current->next = retval;
/* Advance to the next node. */
retval = current;
current = saved_reference;
}
return retval;
}
/* Free a netrc list. */
void
free_netrc(acc_t *l)
{
acc_t *t;
while (l)
{
t = l->next;
FREE_MAYBE (l->acc);
FREE_MAYBE (l->passwd);
FREE_MAYBE (l->host);
xfree (l);
l = t;
}
}
#ifdef STANDALONE
#include <sys/types.h>
#include <sys/stat.h>
int
main (int argc, char **argv)
{
struct stat sb;
char *program_name, *file, *target;
acc_t *head, *a;
if (argc < 2 || argc > 3)
{
fprintf (stderr, _("Usage: %s NETRC [HOSTNAME]\n"), argv[0]);
exit (1);
}
program_name = argv[0];
file = argv[1];
target = argv[2];
if (stat (file, &sb))
{
fprintf (stderr, _("%s: cannot stat %s: %s\n"), argv[0], file,
strerror (errno));
exit (1);
}
head = parse_netrc (file);
a = head;
while (a)
{
/* Skip if we have a target and this isn't it. */
if (target && a->host && strcmp (target, a->host))
{
a = a->next;
continue;
}
if (!target)
{
/* Print the host name if we have no target. */
if (a->host)
fputs (a->host, stdout);
else
fputs ("DEFAULT", stdout);
fputc (' ', stdout);
}
/* Print the account name. */
fputs (a->acc, stdout);
if (a->passwd)
{
/* Print the password, if there is any. */
fputc (' ', stdout);
fputs (a->passwd, stdout);
}
fputc ('\n', stdout);
/* Exit if we found the target. */
if (target)
exit (0);
a = a->next;
}
/* Exit with failure if we had a target, success otherwise. */
if (target)
exit (1);
exit (0);
}
#endif /* STANDALONE */
--- NEW FILE: gen_sslfunc.h ---
/* SSL support.
Copyright (C) 2000 Free Software Foundation, Inc.
Contributed by Christian Fraenkel.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef GEN_SSLFUNC_H
#define GEN_SSLFUNC_H
#ifdef HAVE_SSL
# include <openssl/ssl.h>
#endif
void ssl_init_prng PARAMS ((void));
int init_ssl PARAMS ((SSL_CTX **));
int connect_ssl PARAMS ((SSL **, SSL_CTX *, int));
void shutdown_ssl PARAMS ((SSL*));
void free_ssl_ctx PARAMS ((SSL_CTX *));
int verify_callback PARAMS ((int, X509_STORE_CTX *));
int ssl_iread PARAMS ((SSL *, char *, int));
int ssl_iwrite PARAMS ((SSL *, char *, int));
int ssl_printerrors PARAMS ((void));
#endif /* GEN_SSLFUNC_H */
--- NEW FILE: connect.h ---
/* Declarations for connect.
Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef CONNECT_H
#define CONNECT_H
#include "host.h"
/* Function declarations */
int connect_to_one PARAMS ((ip_address *, unsigned short, int));
int connect_to_many PARAMS ((struct address_list *, unsigned short, int));
void set_connection_host_name PARAMS ((const char *));
int test_socket_open PARAMS ((int));
int select_fd PARAMS ((int, double, int));
uerr_t bindport PARAMS ((unsigned short *, int));
uerr_t acceptport PARAMS ((int *));
void closeport PARAMS ((int));
int conaddr PARAMS ((int, ip_address *));
int iread PARAMS ((int, char *, int));
int iwrite PARAMS ((int, char *, int));
#endif /* CONNECT_H */
--- NEW FILE: url.c ---
/* URL handling.
Copyright (C) 1995, 1996, 1997, 2000, 2001, 2003, 2003
Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
[...2102 lines suppressed...]
/* Now run all the tests with a leading slash before the test case,
to prove that the slash is being preserved. */
for (i = 0; i < countof (tests); i++)
{
char *test, *expected_result;
int expected_change = tests[i].should_modify;
test = xmalloc (1 + strlen (tests[i].test) + 1);
sprintf (test, "/%s", tests[i].test);
expected_result = xmalloc (1 + strlen (tests[i].result) + 1);
sprintf (expected_result, "/%s", tests[i].result);
run_test (test, expected_result, expected_change);
xfree (test);
xfree (expected_result);
}
}
#endif
--- NEW FILE: netrc.h ---
/* Declarations for netrc.c
Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef NETRC_H
#define NETRC_H
typedef struct _acc_t
{
char *host; /* NULL if this is the default machine
entry. */
char *acc;
char *passwd; /* NULL if there is no password. */
struct _acc_t *next;
} acc_t;
void search_netrc PARAMS((const char *, const char **, const char **, int));
void free_netrc PARAMS((acc_t *l));
#endif /* NETRC_H */
--- NEW FILE: hash.c ---
/* Hash tables.
Copyright (C) 2000, 2001 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif /* HAVE_STRING_H */
#include <stdlib.h>
#include <assert.h>
#include "wget.h"
#include "utils.h"
#include "hash.h"
#ifdef STANDALONE
# undef xmalloc
# undef xrealloc
# undef xfree
# define xmalloc malloc
# define xrealloc realloc
# define xfree free
# undef TOLOWER
# define TOLOWER(x) ('A' <= (x) && (x) <= 'Z' ? (x) - 32 : (x))
#endif
/* INTERFACE:
Hash tables are an implementation technique used to implement
mapping between objects. Assuming a good hashing function is used,
they provide near-constant-time access and storing of information.
Duplicate keys are not allowed.
This file defines the following entry points: hash_table_new
creates a hash table, and hash_table_destroy deletes it.
hash_table_put establishes a mapping between a key and a value.
hash_table_get retrieves the value that corresponds to a key.
hash_table_contains queries whether a key is stored in a table at
all. hash_table_remove removes a mapping that corresponds to a
key. hash_table_map allows you to map through all the entries in a
hash table. hash_table_clear clears all the entries from the hash
table.
The number of mappings in a table is not limited, except by the
amount of memory. As you add new elements to a table, it regrows
as necessary. If you have an idea about how many elements you will
store, you can provide a hint to hash_table_new().
The hashing and equality functions depend on the type of key and
are normally provided by the user. For the special (and frequent)
case of using string keys, you can use the pre-canned
make_string_hash_table(), which provides an efficient string
hashing function, and a string equality wrapper around strcmp().
When specifying your hash and test functions, make sure the
following holds true:
- The test function returns non-zero for keys that are considered
"equal", zero otherwise.
- The hash function returns a number that represents the
"distinctness" of the object. In more precise terms, it means
that for any two objects that test "equal" under the test
function, the hash function MUST produce the same result.
This does not mean that each distinct object must produce a
distinct value, only that non-distinct objects must produce the
same values! For instance, a hash function that returns 0 for
any given object is a perfectly valid (albeit extremely bad) hash
function. A hash function that hashes a string by adding up all
its characters is another example of a valid (but quite bad) hash
function.
The above stated rule is quite easy to enforce. For example, if
your testing function compares strings case-insensitively, all
your function needs to do is lower-case the string characters
before calculating a hash. That way you have easily guaranteed
that case differences will not result in a different hash.
- If you care about performance, choose a hash function with as
good "spreading" as possible. A good hash function will react to
even a small change in its input with a completely different
resulting hash. Finally, don't make the hash function itself
overly slow, because you'll be incurring a non-negligible
overhead to reads and writes to the hash table.
Note that neither keys nor values are copied when inserted into the
hash table, so they must exist for the lifetime of the table. This
means that e.g. the use of static strings is OK, but objects with a
shorter life-time need to be copied (with strdup() or the like in
the case of strings) before being inserted. */
/* IMPLEMENTATION:
All the hash mappings (key-value pairs of pointers) are stored in a
contiguous array. The position of each mapping is determined by
the hash value of its key and the size of the table: location :=
hash(key) % size. If two different keys end up on the same
position (hash collision), the one that came second is placed at
the next empty position following the occupied place. This
collision resolution technique is called "linear probing".
There are more advanced collision resolution mechanisms (quadratic
probing, double hashing), but we don't use them because they incur
more non-sequential access to the array, which results in worse
cache behavior. Linear probing works well as long as the
fullness/size ratio is kept below 75%. We make sure to regrow or
rehash the hash table whenever this threshold is exceeded.
Collisions make deletion tricky because finding collisions again
relies on new empty spots not being created. That's why
hash_table_remove is careful to rehash the mappings that follow the
deleted one. */
/* When hash table's fullness exceeds this threshold, the hash table
is resized. */
#define HASH_FULLNESS_THRESHOLD 0.75
/* The hash table size is multiplied by this factor with each resize.
This guarantees infrequent resizes. */
#define HASH_RESIZE_FACTOR 2
struct mapping {
void *key;
void *value;
};
struct hash_table {
unsigned long (*hash_function) PARAMS ((const void *));
int (*test_function) PARAMS ((const void *, const void *));
int size; /* size of the array */
int count; /* number of non-empty, non-deleted
fields. */
int resize_threshold; /* after size exceeds this number of
entries, resize the table. */
int prime_offset; /* the offset of the current prime in
the prime table. */
struct mapping *mappings; /* the array of mapping pairs. */
};
/* We use NULL key to mark a mapping as empty. It is consequently
illegal to store NULL keys. */
#define NON_EMPTY(mp) (mp->key != NULL)
/* "Next" mapping is the mapping after MP, but wrapping back to
MAPPINGS when MP would reach MAPPINGS+SIZE. */
#define NEXT_MAPPING(mp, mappings, size) (mp != mappings + (size - 1) \
? mp + 1 : mappings)
/* Loop over non-empty mappings starting at MP. */
#define LOOP_NON_EMPTY(mp, mappings, size) \
for (; NON_EMPTY (mp); mp = NEXT_MAPPING (mp, mappings, size))
/* #### We might want to multiply with the "golden ratio" here to get
better randomness for keys that do not result from a good hash
function. This is currently not a problem in Wget because we only
use the string hash tables. */
#define HASH_POSITION(ht, key) (ht->hash_function (key) % ht->size)
/* Find a prime near, but greather than or equal to SIZE. Of course,
the primes are not calculated, but looked up from a table. The
table does not contain all primes in range, just a selection useful
for this purpose.
PRIME_OFFSET is a minor optimization: if specified, it starts the
search for the prime number beginning with the specific offset in
the prime number table. The final offset is stored in the same
variable. */
static int
prime_size (int size, int *prime_offset)
{
static const unsigned long primes [] = {
13, 19, 29, 41, 59, 79, 107, 149, 197, 263, 347, 457, 599, 787, 1031,
1361, 1777, 2333, 3037, 3967, 5167, 6719, 8737, 11369, 14783,
19219, 24989, 32491, 42257, 54941, 71429, 92861, 120721, 156941,
204047, 265271, 344857, 448321, 582821, 757693, 985003, 1280519,
1664681, 2164111, 2813353, 3657361, 4754591, 6180989, 8035301,
10445899, 13579681, 17653589, 22949669, 29834603, 38784989,
50420551, 65546729, 85210757, 110774011, 144006217, 187208107,
243370577, 316381771, 411296309, 534685237, 695090819, 903618083,
1174703521, 1527114613, 1985248999,
(unsigned long)0x99d43ea5, (unsigned long)0xc7fa5177
};
int i = *prime_offset;
for (; i < countof (primes); i++)
if (primes[i] >= size)
{
/* Set the offset to the next prime. That is safe because,
next time we are called, it will be with a larger SIZE,
which means we could never return the same prime anyway.
(If that is not the case, the caller can simply reset
*prime_offset.) */
*prime_offset = i + 1;
return primes[i];
}
abort ();
return 0;
}
/* Create a hash table with hash function HASH_FUNCTION and test
function TEST_FUNCTION. The table is empty (its count is 0), but
pre-allocated to store at least ITEMS items.
ITEMS is the number of items that the table can accept without
needing to resize. It is useful when creating a table that is to
be immediately filled with a known number of items. In that case,
the regrows are a waste of time, and specifying ITEMS correctly
will avoid them altogether.
Note that hash tables grow dynamically regardless of ITEMS. The
only use of ITEMS is to preallocate the table and avoid unnecessary
dynamic regrows. Don't bother making ITEMS prime because it's not
used as size unchanged. To start with a small table that grows as
needed, simply specify zero ITEMS.
If HASH_FUNCTION is not provided, identity table is assumed,
i.e. key pointers are compared as keys. If you want strings with
equal contents to hash the same, use make_string_hash_table. */
struct hash_table *
hash_table_new (int items,
unsigned long (*hash_function) (const void *),
int (*test_function) (const void *, const void *))
{
int size;
struct hash_table *ht
= (struct hash_table *)xmalloc (sizeof (struct hash_table));
ht->hash_function = hash_function ? hash_function : ptrhash;
ht->test_function = test_function ? test_function : ptrcmp;
ht->prime_offset = 0;
/* Calculate the size that ensures that the table will store at
least ITEMS keys without the need to resize. */
size = 1 + items / HASH_FULLNESS_THRESHOLD;
size = prime_size (size, &ht->prime_offset);
ht->size = size;
ht->resize_threshold = size * HASH_FULLNESS_THRESHOLD;
/*assert (ht->resize_threshold >= items);*/
ht->mappings = xmalloc (ht->size * sizeof (struct mapping));
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
ht->count = 0;
return ht;
}
/* Free the data associated with hash table HT. */
void
hash_table_destroy (struct hash_table *ht)
{
xfree (ht->mappings);
xfree (ht);
}
/* The heart of most functions in this file -- find the mapping whose
KEY is equal to key, using linear probing. Returns the mapping
that matches KEY, or the first empty mapping if none matches. */
static inline struct mapping *
find_mapping (const struct hash_table *ht, const void *key)
{
struct mapping *mappings = ht->mappings;
int size = ht->size;
struct mapping *mp = mappings + HASH_POSITION (ht, key);
int (*equals) PARAMS ((const void *, const void *)) = ht->test_function;
LOOP_NON_EMPTY (mp, mappings, size)
if (equals (key, mp->key))
break;
return mp;
}
/* Get the value that corresponds to the key KEY in the hash table HT.
If no value is found, return NULL. Note that NULL is a legal value
for value; if you are storing NULLs in your hash table, you can use
hash_table_contains to be sure that a (possibly NULL) value exists
in the table. Or, you can use hash_table_get_pair instead of this
function. */
void *
hash_table_get (const struct hash_table *ht, const void *key)
{
struct mapping *mp = find_mapping (ht, key);
if (NON_EMPTY (mp))
return mp->value;
else
return NULL;
}
/* Like hash_table_get, but writes out the pointers to both key and
value. Returns non-zero on success. */
int
hash_table_get_pair (const struct hash_table *ht, const void *lookup_key,
void *orig_key, void *value)
{
struct mapping *mp = find_mapping (ht, lookup_key);
if (NON_EMPTY (mp))
{
if (orig_key)
*(void **)orig_key = mp->key;
if (value)
*(void **)value = mp->value;
return 1;
}
else
return 0;
}
/* Return 1 if HT contains KEY, 0 otherwise. */
int
hash_table_contains (const struct hash_table *ht, const void *key)
{
struct mapping *mp = find_mapping (ht, key);
return NON_EMPTY (mp);
}
/* Grow hash table HT as necessary, and rehash all the key-value
mappings. */
static void
grow_hash_table (struct hash_table *ht)
{
struct mapping *old_mappings = ht->mappings;
struct mapping *old_end = ht->mappings + ht->size;
struct mapping *mp, *mappings;
int newsize;
newsize = prime_size (ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset);
#if 0
printf ("growing from %d to %d; fullness %.2f%% to %.2f%%\n",
ht->size, newsize,
100.0 * ht->count / ht->size,
100.0 * ht->count / newsize);
#endif
ht->size = newsize;
ht->resize_threshold = newsize * HASH_FULLNESS_THRESHOLD;
mappings = xmalloc (ht->size * sizeof (struct mapping));
memset (mappings, '\0', ht->size * sizeof (struct mapping));
ht->mappings = mappings;
for (mp = old_mappings; mp < old_end; mp++)
if (NON_EMPTY (mp))
{
struct mapping *new_mp = mappings + HASH_POSITION (ht, mp->key);
/* We don't need to test for uniqueness of keys because all
the keys come from the hash table and are therefore known
to be unique. */
LOOP_NON_EMPTY (new_mp, mappings, newsize)
;
*new_mp = *mp;
}
xfree (old_mappings);
}
/* Put VALUE in the hash table HT under the key KEY. This regrows the
table if necessary. */
void
hash_table_put (struct hash_table *ht, const void *key, void *value)
{
struct mapping *mp = find_mapping (ht, key);
if (NON_EMPTY (mp))
{
/* update existing item */
mp->key = (void *)key; /* const? */
mp->value = value;
return;
}
/* If adding the item would make the table exceed max. fullness,
grow the table first. */
if (ht->count >= ht->resize_threshold)
{
grow_hash_table (ht);
mp = find_mapping (ht, key);
}
/* add new item */
++ht->count;
mp->key = (void *)key; /* const? */
mp->value = value;
}
/* Remove a mapping that matches KEY from HT. Return 0 if there was
no such entry; return 1 if an entry was removed. */
int
hash_table_remove (struct hash_table *ht, const void *key)
{
struct mapping *mp = find_mapping (ht, key);
if (!NON_EMPTY (mp))
return 0;
else
{
int size = ht->size;
struct mapping *mappings = ht->mappings;
mp->key = NULL;
--ht->count;
/* Rehash all the entries following MP. The alternative
approach is to mark the entry as deleted, i.e. create a
"tombstone". That makes remove faster, but leaves a lot of
garbage and slows down hash_table_get and hash_table_put. */
mp = NEXT_MAPPING (mp, mappings, size);
LOOP_NON_EMPTY (mp, mappings, size)
{
const void *key2 = mp->key;
struct mapping *mp_new = mappings + HASH_POSITION (ht, key2);
/* Find the new location for the key. */
LOOP_NON_EMPTY (mp_new, mappings, size)
if (key2 == mp_new->key)
/* The mapping MP (key2) is already where we want it (in
MP_NEW's "chain" of keys.) */
goto next_rehash;
*mp_new = *mp;
mp->key = NULL;
next_rehash:
;
}
return 1;
}
}
/* Clear HT of all entries. After calling this function, the count
and the fullness of the hash table will be zero. The size will
remain unchanged. */
void
hash_table_clear (struct hash_table *ht)
{
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
ht->count = 0;
}
/* Map MAPFUN over all the mappings in hash table HT. MAPFUN is
called with three arguments: the key, the value, and MAPARG.
It is undefined what happens if you add or remove entries in the
hash table while hash_table_map is running. The exception is the
entry you're currently mapping over; you may remove or change that
entry. */
void
hash_table_map (struct hash_table *ht,
int (*mapfun) (void *, void *, void *),
void *maparg)
{
struct mapping *mp = ht->mappings;
struct mapping *end = ht->mappings + ht->size;
for (; mp < end; mp++)
if (NON_EMPTY (mp))
{
void *key;
repeat:
key = mp->key;
if (mapfun (key, mp->value, maparg))
return;
/* hash_table_remove might have moved the adjacent
mappings. */
if (mp->key != key && NON_EMPTY (mp))
goto repeat;
}
}
/* Return the number of elements in the hash table. This is not the
same as the physical size of the hash table, which is always
greater than the number of elements. */
int
hash_table_count (const struct hash_table *ht)
{
return ht->count;
}
/* Functions from this point onward are meant for convenience and
don't strictly belong to this file. However, this is as good a
place for them as any. */
/*
* Support for hash tables whose keys are strings.
*
*/
/* 31 bit hash function. Taken from Gnome's glib, modified to use
standard C types.
We used to use the popular hash function from the Dragon Book, but
this one seems to perform much better. */
unsigned long
string_hash (const void *key)
{
const char *p = key;
unsigned int h = *p;
if (h)
for (p += 1; *p != '\0'; p++)
h = (h << 5) - h + *p;
return h;
}
/* Frontend for strcmp usable for hash tables. */
int
string_cmp (const void *s1, const void *s2)
{
return !strcmp ((const char *)s1, (const char *)s2);
}
/* Return a hash table of preallocated to store at least ITEMS items
suitable to use strings as keys. */
struct hash_table *
make_string_hash_table (int items)
{
return hash_table_new (items, string_hash, string_cmp);
}
/*
* Support for hash tables whose keys are strings, but which are
* compared case-insensitively.
*
*/
/* Like string_hash, but produce the same hash regardless of the case. */
static unsigned long
string_hash_nocase (const void *key)
{
const char *p = key;
unsigned int h = TOLOWER (*p);
if (h)
for (p += 1; *p != '\0'; p++)
h = (h << 5) - h + TOLOWER (*p);
return h;
}
/* Like string_cmp, but doing case-insensitive compareison. */
static int
string_cmp_nocase (const void *s1, const void *s2)
{
return !strcasecmp ((const char *)s1, (const char *)s2);
}
/* Like make_string_hash_table, but uses string_hash_nocase and
string_cmp_nocase. */
struct hash_table *
make_nocase_string_hash_table (int items)
{
return hash_table_new (items, string_hash_nocase, string_cmp_nocase);
}
/* Hashing of pointers. Used for hash tables that are keyed by
pointer identity. (Common Lisp calls them EQ hash tables, and Java
calls them IdentityHashMaps.) */
unsigned long
ptrhash (const void *ptr)
{
unsigned long key = (unsigned long)ptr;
key += (key << 12);
key ^= (key >> 22);
key += (key << 4);
key ^= (key >> 9);
key += (key << 10);
key ^= (key >> 2);
key += (key << 7);
key ^= (key >> 12);
#if SIZEOF_LONG > 4
key += (key << 44);
key ^= (key >> 54);
key += (key << 36);
key ^= (key >> 41);
key += (key << 42);
key ^= (key >> 34);
key += (key << 39);
key ^= (key >> 44);
#endif
return key;
}
int
ptrcmp (const void *ptr1, const void *ptr2)
{
return ptr1 == ptr2;
}
#if 0
/* Currently unused: hashing of integers. */
unsigned long
inthash (unsigned int key)
{
key += (key << 12);
key ^= (key >> 22);
key += (key << 4);
key ^= (key >> 9);
key += (key << 10);
key ^= (key >> 2);
key += (key << 7);
key ^= (key >> 12);
return key;
}
#endif
#ifdef STANDALONE
#include <stdio.h>
#include <string.h>
int
print_hash_table_mapper (void *key, void *value, void *count)
{
++*(int *)count;
printf ("%s: %s\n", (const char *)key, (char *)value);
return 0;
}
void
print_hash (struct hash_table *sht)
{
int debug_count = 0;
hash_table_map (sht, print_hash_table_mapper, &debug_count);
assert (debug_count == sht->count);
}
int
main (void)
{
struct hash_table *ht = make_string_hash_table (0);
char line[80];
while ((fgets (line, sizeof (line), stdin)))
{
int len = strlen (line);
if (len <= 1)
continue;
line[--len] = '\0';
if (!hash_table_contains (ht, line))
hash_table_put (ht, strdup (line), "here I am!");
#if 1
if (len % 5 == 0)
{
char *line_copy;
if (hash_table_get_pair (ht, line, &line_copy, NULL))
{
hash_table_remove (ht, line);
xfree (line_copy);
}
}
#endif
}
#if 0
print_hash (ht);
#endif
#if 1
printf ("%d %d\n", ht->count, ht->size);
#endif
return 0;
}
#endif
--- NEW FILE: recur.c ---
/* Handling of recursive HTTP retrieving.
Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif /* HAVE_STRING_H */
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include <errno.h>
#include <assert.h>
#include <sys/types.h>
#include "wget.h"
#include "url.h"
#include "recur.h"
#include "utils.h"
#include "retr.h"
#include "ftp.h"
#include "host.h"
#include "hash.h"
#include "res.h"
#include "convert.h"
#ifndef errno
extern int errno;
#endif
extern char *version_string;
extern LARGE_INT total_downloaded_bytes;
extern struct hash_table *dl_url_file_map;
extern struct hash_table *downloaded_html_set;
/* Functions for maintaining the URL queue. */
struct queue_element {
const char *url; /* the URL to download */
const char *referer; /* the referring document */
int depth; /* the depth */
unsigned int html_allowed :1; /* whether the document is allowed to
be treated as HTML. */
struct queue_element *next; /* next element in queue */
};
struct url_queue {
struct queue_element *head;
struct queue_element *tail;
int count, maxcount;
};
/* Create a URL queue. */
static struct url_queue *
url_queue_new (void)
{
struct url_queue *queue = xmalloc (sizeof (*queue));
memset (queue, '\0', sizeof (*queue));
return queue;
}
/* Delete a URL queue. */
static void
url_queue_delete (struct url_queue *queue)
{
xfree (queue);
}
/* Enqueue a URL in the queue. The queue is FIFO: the items will be
retrieved ("dequeued") from the queue in the order they were placed
into it. */
static void
url_enqueue (struct url_queue *queue,
const char *url, const char *referer, int depth, int html_allowed)
{
struct queue_element *qel = xmalloc (sizeof (*qel));
qel->url = url;
qel->referer = referer;
qel->depth = depth;
qel->html_allowed = html_allowed;
qel->next = NULL;
++queue->count;
if (queue->count > queue->maxcount)
queue->maxcount = queue->count;
DEBUGP (("Enqueuing %s at depth %d\n", url, depth));
DEBUGP (("Queue count %d, maxcount %d.\n", queue->count, queue->maxcount));
if (queue->tail)
queue->tail->next = qel;
queue->tail = qel;
if (!queue->head)
queue->head = queue->tail;
}
/* Take a URL out of the queue. Return 1 if this operation succeeded,
or 0 if the queue is empty. */
static int
url_dequeue (struct url_queue *queue,
const char **url, const char **referer, int *depth,
int *html_allowed)
{
struct queue_element *qel = queue->head;
if (!qel)
return 0;
queue->head = queue->head->next;
if (!queue->head)
queue->tail = NULL;
*url = qel->url;
*referer = qel->referer;
*depth = qel->depth;
*html_allowed = qel->html_allowed;
--queue->count;
DEBUGP (("Dequeuing %s at depth %d\n", qel->url, qel->depth));
DEBUGP (("Queue count %d, maxcount %d.\n", queue->count, queue->maxcount));
xfree (qel);
return 1;
}
static int download_child_p PARAMS ((const struct urlpos *, struct url *, int,
struct url *, struct hash_table *));
static int descend_redirect_p PARAMS ((const char *, const char *, int,
struct url *, struct hash_table *));
/* Retrieve a part of the web beginning with START_URL. This used to
be called "recursive retrieval", because the old function was
recursive and implemented depth-first search. retrieve_tree on the
other hand implements breadth-search traversal of the tree, which
results in much nicer ordering of downloads.
The algorithm this function uses is simple:
1. put START_URL in the queue.
2. while there are URLs in the queue:
3. get next URL from the queue.
4. download it.
5. if the URL is HTML and its depth does not exceed maximum depth,
get the list of URLs embedded therein.
6. for each of those URLs do the following:
7. if the URL is not one of those downloaded before, and if it
satisfies the criteria specified by the various command-line
options, add it to the queue. */
uerr_t
retrieve_tree (const char *start_url)
{
uerr_t status = RETROK;
/* The queue of URLs we need to load. */
struct url_queue *queue;
/* The URLs we do not wish to enqueue, because they are already in
the queue, but haven't been downloaded yet. */
struct hash_table *blacklist;
int up_error_code;
struct url *start_url_parsed = url_parse (start_url, &up_error_code);
if (!start_url_parsed)
{
logprintf (LOG_NOTQUIET, "%s: %s.\n", start_url,
url_error (up_error_code));
return URLERROR;
}
queue = url_queue_new ();
blacklist = make_string_hash_table (0);
/* Enqueue the starting URL. Use start_url_parsed->url rather than
just URL so we enqueue the canonical form of the URL. */
url_enqueue (queue, xstrdup (start_url_parsed->url), NULL, 0, 1);
string_set_add (blacklist, start_url_parsed->url);
while (1)
{
int descend = 0;
char *url, *referer, *file = NULL;
int depth, html_allowed;
boolean dash_p_leaf_HTML = FALSE;
if (opt.quota && total_downloaded_bytes > opt.quota)
break;
if (status == FWRITEERR)
break;
/* Get the next URL from the queue... */
if (!url_dequeue (queue,
(const char **)&url, (const char **)&referer,
&depth, &html_allowed))
break;
/* ...and download it. Note that this download is in most cases
unconditional, as download_child_p already makes sure a file
doesn't get enqueued twice -- and yet this check is here, and
not in download_child_p. This is so that if you run `wget -r
URL1 URL2', and a random URL is encountered once under URL1
and again under URL2, but at a different (possibly smaller)
depth, we want the URL's children to be taken into account
the second time. */
if (dl_url_file_map && hash_table_contains (dl_url_file_map, url))
{
file = xstrdup (hash_table_get (dl_url_file_map, url));
DEBUGP (("Already downloaded \"%s\", reusing it from \"%s\".\n",
url, file));
if (html_allowed
&& downloaded_html_set
&& string_set_contains (downloaded_html_set, file))
descend = 1;
}
else
{
int dt = 0;
char *redirected = NULL;
int oldrec = opt.recursive;
opt.recursive = 0;
status = retrieve_url (url, &file, &redirected, referer, &dt);
opt.recursive = oldrec;
if (html_allowed && file && status == RETROK
&& (dt & RETROKF) && (dt & TEXTHTML))
descend = 1;
if (redirected)
{
/* We have been redirected, possibly to another host, or
different path, or wherever. Check whether we really
want to follow it. */
if (descend)
{
if (!descend_redirect_p (redirected, url, depth,
start_url_parsed, blacklist))
descend = 0;
else
/* Make sure that the old pre-redirect form gets
blacklisted. */
string_set_add (blacklist, url);
}
xfree (url);
url = redirected;
}
}
if (descend
&& depth >= opt.reclevel && opt.reclevel != INFINITE_RECURSION)
{
if (opt.page_requisites
&& (depth == opt.reclevel || depth == opt.reclevel + 1))
{
/* When -p is specified, we are allowed to exceed the
maximum depth, but only for the "inline" links,
i.e. those that are needed to display the page.
Originally this could exceed the depth at most by
one, but we allow one more level so that the leaf
pages that contain frames can be loaded
correctly. */
dash_p_leaf_HTML = TRUE;
}
else
{
/* Either -p wasn't specified or it was and we've
already spent the two extra (pseudo-)levels that it
affords us, so we need to bail out. */
DEBUGP (("Not descending further; at depth %d, max. %d.\n",
depth, opt.reclevel));
descend = 0;
}
}
/* If the downloaded document was HTML, parse it and enqueue the
links it contains. */
if (descend)
{
int meta_disallow_follow = 0;
struct urlpos *children
= get_urls_html (file, url, &meta_disallow_follow);
if (opt.use_robots && meta_disallow_follow)
{
free_urlpos (children);
children = NULL;
}
if (children)
{
struct urlpos *child = children;
struct url *url_parsed = url_parsed = url_parse (url, NULL);
assert (url_parsed != NULL);
for (; child; child = child->next)
{
if (child->ignore_when_downloading)
continue;
if (dash_p_leaf_HTML && !child->link_inline_p)
continue;
if (download_child_p (child, url_parsed, depth, start_url_parsed,
blacklist))
{
url_enqueue (queue, xstrdup (child->url->url),
xstrdup (url), depth + 1,
child->link_expect_html);
/* We blacklist the URL we have enqueued, because we
don't want to enqueue (and hence download) the
same URL twice. */
string_set_add (blacklist, child->url->url);
}
}
url_free (url_parsed);
free_urlpos (children);
}
}
if (opt.delete_after || (file && !acceptable (file)))
{
/* Either --delete-after was specified, or we loaded this
otherwise rejected (e.g. by -R) HTML file just so we
could harvest its hyperlinks -- in either case, delete
the local file. */
DEBUGP (("Removing file due to %s in recursive_retrieve():\n",
opt.delete_after ? "--delete-after" :
"recursive rejection criteria"));
logprintf (LOG_VERBOSE,
(opt.delete_after
? _("Removing %s.\n")
: _("Removing %s since it should be rejected.\n")),
file);
if (unlink (file))
logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
register_delete_file (file);
}
xfree (url);
FREE_MAYBE (referer);
FREE_MAYBE (file);
}
/* If anything is left of the queue due to a premature exit, free it
now. */
{
char *d1, *d2;
int d3, d4;
while (url_dequeue (queue,
(const char **)&d1, (const char **)&d2, &d3, &d4))
{
xfree (d1);
FREE_MAYBE (d2);
}
}
url_queue_delete (queue);
if (start_url_parsed)
url_free (start_url_parsed);
string_set_free (blacklist);
if (opt.quota && total_downloaded_bytes > opt.quota)
return QUOTEXC;
else if (status == FWRITEERR)
return FWRITEERR;
else
return RETROK;
}
/* Based on the context provided by retrieve_tree, decide whether a
URL is to be descended to. This is only ever called from
retrieve_tree, but is in a separate function for clarity.
The most expensive checks (such as those for robots) are memoized
by storing these URLs to BLACKLIST. This may or may not help. It
will help if those URLs are encountered many times. */
static int
download_child_p (const struct urlpos *upos, struct url *parent, int depth,
struct url *start_url_parsed, struct hash_table *blacklist)
{
struct url *u = upos->url;
const char *url = u->url;
int u_scheme_like_http;
DEBUGP (("Deciding whether to enqueue \"%s\".\n", url));
if (string_set_contains (blacklist, url))
{
DEBUGP (("Already on the black list.\n"));
goto out;
}
/* Several things to check for:
1. if scheme is not http, and we don't load it
2. check for relative links (if relative_only is set)
3. check for domain
4. check for no-parent
5. check for excludes && includes
6. check for suffix
7. check for same host (if spanhost is unset), with possible
gethostbyname baggage
8. check for robots.txt
Addendum: If the URL is FTP, and it is to be loaded, only the
domain and suffix settings are "stronger".
Note that .html files will get loaded regardless of suffix rules
(but that is remedied later with unlink) unless the depth equals
the maximum depth.
More time- and memory- consuming tests should be put later on
the list. */
/* Determine whether URL under consideration has a HTTP-like scheme. */
u_scheme_like_http = schemes_are_similar_p (u->scheme, SCHEME_HTTP);
/* 1. Schemes other than HTTP are normally not recursed into. */
if (!u_scheme_like_http && !(u->scheme == SCHEME_FTP && opt.follow_ftp))
{
DEBUGP (("Not following non-HTTP schemes.\n"));
goto out;
}
/* 2. If it is an absolute link and they are not followed, throw it
out. */
if (u_scheme_like_http)
if (opt.relative_only && !upos->link_relative_p)
{
DEBUGP (("It doesn't really look like a relative link.\n"));
goto out;
}
/* 3. If its domain is not to be accepted/looked-up, chuck it
out. */
if (!accept_domain (u))
{
DEBUGP (("The domain was not accepted.\n"));
goto out;
}
/* 4. Check for parent directory.
If we descended to a different host or changed the scheme, ignore
opt.no_parent. Also ignore it for documents needed to display
the parent page when in -p mode. */
if (opt.no_parent
&& schemes_are_similar_p (u->scheme, start_url_parsed->scheme)
&& 0 == strcasecmp (u->host, start_url_parsed->host)
&& u->port == start_url_parsed->port
&& !(opt.page_requisites && upos->link_inline_p))
{
if (!frontcmp (start_url_parsed->dir, u->dir))
{
DEBUGP (("Going to \"%s\" would escape \"%s\" with no_parent on.\n",
u->dir, start_url_parsed->dir));
goto out;
}
}
/* 5. If the file does not match the acceptance list, or is on the
rejection list, chuck it out. The same goes for the directory
exclusion and inclusion lists. */
if (opt.includes || opt.excludes)
{
if (!accdir (u->dir, ALLABS))
{
DEBUGP (("%s (%s) is excluded/not-included.\n", url, u->dir));
goto out;
}
}
/* 6. Check for acceptance/rejection rules. We ignore these rules
for directories (no file name to match) and for HTML documents,
which might lead to other files that do need to be downloaded.
That is, unless we've exhausted the recursion depth anyway. */
if (u->file[0] != '\0'
&& !(has_html_suffix_p (u->file)
&& depth != INFINITE_RECURSION
&& depth < opt.reclevel - 1))
{
if (!acceptable (u->file))
{
DEBUGP (("%s (%s) does not match acc/rej rules.\n",
url, u->file));
goto out;
}
}
/* 7. */
if (schemes_are_similar_p (u->scheme, parent->scheme))
if (!opt.spanhost && 0 != strcasecmp (parent->host, u->host))
{
DEBUGP (("This is not the same hostname as the parent's (%s and %s).\n",
u->host, parent->host));
goto out;
}
/* 8. */
if (opt.use_robots && u_scheme_like_http)
{
struct robot_specs *specs = res_get_specs (u->host, u->port);
if (!specs)
{
char *rfile;
if (res_retrieve_file (url, &rfile))
{
specs = res_parse_from_file (rfile);
xfree (rfile);
}
else
{
/* If we cannot get real specs, at least produce
dummy ones so that we can register them and stop
trying to retrieve them. */
specs = res_parse ("", 0);
}
res_register_specs (u->host, u->port, specs);
}
/* Now that we have (or don't have) robots.txt specs, we can
check what they say. */
if (!res_match_path (specs, u->path))
{
DEBUGP (("Not following %s because robots.txt forbids it.\n", url));
string_set_add (blacklist, url);
goto out;
}
}
/* The URL has passed all the tests. It can be placed in the
download queue. */
DEBUGP (("Decided to load it.\n"));
return 1;
out:
DEBUGP (("Decided NOT to load it.\n"));
return 0;
}
/* This function determines whether we will consider downloading the
children of a URL whose download resulted in a redirection,
possibly to another host, etc. It is needed very rarely, and thus
it is merely a simple-minded wrapper around download_child_p. */
static int
descend_redirect_p (const char *redirected, const char *original, int depth,
struct url *start_url_parsed, struct hash_table *blacklist)
{
struct url *orig_parsed, *new_parsed;
struct urlpos *upos;
int success;
orig_parsed = url_parse (original, NULL);
assert (orig_parsed != NULL);
new_parsed = url_parse (redirected, NULL);
assert (new_parsed != NULL);
upos = xmalloc (sizeof (struct urlpos));
memset (upos, 0, sizeof (*upos));
upos->url = new_parsed;
success = download_child_p (upos, orig_parsed, depth,
start_url_parsed, blacklist);
url_free (orig_parsed);
url_free (new_parsed);
xfree (upos);
if (!success)
DEBUGP (("Redirection \"%s\" failed the test.\n", redirected));
return success;
}
--- NEW FILE: sysdep.h ---
/* Dirty system-dependent hacks.
Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
/* This file is included by wget.h. Random .c files need not include
it. */
#ifndef SYSDEP_H
#define SYSDEP_H
/* We need these to be playing with various stuff. */
#ifdef TIME_WITH_SYS_TIME
# include <sys/time.h>
# include <time.h>
#else /* not TIME_WITH_SYS_TIME_H */
#ifdef HAVE_SYS_TIME_H
# include <sys/time.h>
#else /* not HAVE_SYS_TIME_H */
# include <time.h>
#endif /* HAVE_SYS_TIME_H */
#endif /* TIME_WITH_SYS_TIME_H */
#include <sys/types.h>
#include <sys/stat.h>
#ifdef HAVE_INTTYPES_H
# include <inttypes.h>
#endif
#ifdef WINDOWS
/* Windows doesn't have some functions. Include mswindows.h so we get
their declarations, as well as some additional declarations and
macros. This must come first, so it can set things up. */
#include <mswindows.h>
#endif /* WINDOWS */
/* Watcom-specific stuff. In practice this is probably specific to
Windows, although Watcom exists under other OS's too. For that
reason, we keep this here. */
#ifdef __WATCOMC__
/* Watcom has its own alloca() defined in malloc.h malloc.h needs to
be included in the sources to prevent 'undefined external' errors
at the link phase. */
# include <malloc.h>
/* io.h defines unlink() and chmod(). We put this here because it's
way too obscure to require all the .c files to observe. */
# include <io.h>
#endif /* __WATCOMC__ */
/* Needed for compilation under OS/2: */
#ifdef __EMX__
#ifndef S_ISLNK
# define S_ISLNK(m) 0
#endif
#ifndef lstat
# define lstat stat
#endif
#endif /* __EMX__ */
/* Reportedly, stat() macros are broken on some old systems. Those
systems will have to fend for themselves, as I will not introduce
new code to handle it.
However, I will add code for *missing* macros, and the following
are missing from many systems. */
#ifndef S_ISLNK
# define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK)
#endif
#ifndef S_ISDIR
# define S_ISDIR(m) (((m) & (_S_IFMT)) == (_S_IFDIR))
#endif
#ifndef S_ISREG
# define S_ISREG(m) (((m) & _S_IFMT) == _S_IFREG)
#endif
/* Bletch! SPARC compiler doesn't define sparc (needed by
arpa/nameser.h) when in -Xc mode. Luckily, it always defines
__sparc. */
#ifdef __sparc
#ifndef sparc
#define sparc
#endif
#endif
#ifdef __BEOS__
# undef READ
# undef WRITE
# define READ(fd, buf, cnt) recv((fd), (buf), (cnt), 0)
# define WRITE(fd, buf, cnt) send((fd), (buf), (cnt), 0)
#endif
/* mswindows.h defines these. */
#ifndef READ
# define READ(fd, buf, cnt) read ((fd), (buf), (cnt))
#endif
#ifndef WRITE
# define WRITE(fd, buf, cnt) write ((fd), (buf), (cnt))
#endif
#ifndef REALCLOSE
# define REALCLOSE(x) close (x)
#endif
#define CLOSE(x) \
do { \
REALCLOSE (x); \
DEBUGP (("Closing fd %d\n", x)); \
} while (0)
/* Define a large integral type useful for storing large sizes that
exceed sizes of one download, such as when printing the sum of all
downloads. Note that this has nothing to do with large file
support, yet.
We use a 64-bit integral type where available, `double' otherwise.
It's hard to print LARGE_INT's portably, but fortunately it's
rarely needed. */
#if SIZEOF_LONG >= 8
/* Long is large enough: use it. */
typedef long LARGE_INT;
# define LARGE_INT_FMT "%ld"
#else
# if SIZEOF_LONG_LONG >= 8
/* Long long is large enough: use it. */
typedef long long LARGE_INT;
# define LARGE_INT_FMT "%lld"
# else
/* Large integer type unavailable; use `double' instead. */
typedef double LARGE_INT;
# define LARGE_INT_FMT "%.0f"
# endif
#endif
/* These are defined in cmpt.c if missing, therefore it's generally
safe to declare their parameters. */
#ifndef HAVE_STRERROR
char *strerror ();
#endif
#ifndef HAVE_STRCASECMP
int strcasecmp ();
#endif
#ifndef HAVE_STRNCASECMP
int strncasecmp ();
#endif
#ifndef HAVE_STRSTR
char *strstr ();
#endif
#ifndef HAVE_STRPTIME
char *strptime ();
#endif
#ifndef HAVE_SNPRINTF
int snprintf ();
#endif
#ifndef HAVE_VSNPRINTF
int vsnprintf ();
#endif
#ifndef HAVE_USLEEP
int usleep PARAMS ((unsigned long));
#endif
#ifndef HAVE_MEMMOVE
void *memmove ();
#endif
/* SunOS brain damage -- for some reason, SunOS header files fail to
declare the functions below, which causes all kinds of problems,
most notably compilation errors when using pointer arithmetic on
their return values.
This used to be only within `#ifdef STDC_HEADERS', but it got
tripped on other systems (AIX), thus causing havoc. Fortunately,
SunOS appears to be the only system braindamaged that badly, so I
added an extra `#ifdef sun' guard. */
#ifndef STDC_HEADERS
#ifdef sun
#ifndef __SVR4 /* exclude Solaris */
char *strstr ();
char *strchr ();
char *strrchr ();
char *strtok ();
char *strdup ();
void *memcpy ();
#endif /* not __SVR4 */
#endif /* sun */
#endif /* not STDC_HEADERS */
/* Some systems (Linux libc5, "NCR MP-RAS 3.0", and others) don't
provide MAP_FAILED, a symbolic constant for the value returned by
mmap() when it doesn't work. Usually, this constant should be -1.
This only makes sense for files that use mmap() and include
sys/mman.h *before* sysdep.h, but doesn't hurt others. */
#ifndef MAP_FAILED
# define MAP_FAILED ((void *) -1)
#endif
/* Enable system fnmatch only on systems where fnmatch.h is usable and
which are known to have a non-broken fnmatch implementation.
Currently those include glibc-based systems and Solaris. One could
add more, but fnmatch is not that large, so it might be better to
play it safe. */
#ifdef HAVE_WORKING_FNMATCH_H
# if defined __GLIBC__ && __GLIBC__ >= 2
# define SYSTEM_FNMATCH
# endif
# ifdef solaris
# define SYSTEM_FNMATCH
# endif
#endif /* HAVE_FNMATCH_H */
#ifdef SYSTEM_FNMATCH
# include <fnmatch.h>
#else /* not SYSTEM_FNMATCH */
/* Define fnmatch flags. Undef them first to avoid warnings in case
an evil library include chose to include system fnmatch.h. */
# undef FNM_PATHNAME
# undef FNM_NOESCAPE
# undef FNM_PERIOD
# undef FNM_NOMATCH
# define FNM_PATHNAME (1 << 0) /* No wildcard can ever match `/'. */
# define FNM_NOESCAPE (1 << 1) /* Backslashes don't quote special chars. */
# define FNM_PERIOD (1 << 2) /* Leading `.' is matched only explicitly. */
# define FNM_NOMATCH 1
/* Declare the function minimally. */
int fnmatch ();
#endif
/* Provide uint32_t on the platforms that don't define it. Although
most code should be agnostic about integer sizes, some code really
does need a 32-bit integral type. Such code should use uint32_t.
(The exception is gnu-md5.[ch], which uses its own detection for
portability across platforms.) */
#ifndef HAVE_UINT32_T
# if SIZEOF_INT == 4
typedef unsigned int uint32_t;
# else
# if SIZEOF_LONG == 4
typedef unsigned long uint32_t;
# else
# if SIZEOF_SHORT == 4
typedef unsigned short uint32_t;
# else
#error "Cannot determine a 32-bit unsigned integer type"
# endif
# endif
# endif
#endif
#endif /* SYSDEP_H */
--- NEW FILE: ftp.h ---
/* Declarations for FTP support.
Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef FTP_H
#define FTP_H
/* Need it for struct rbuf. */
#include "rbuf.h"
#include "host.h"
/* System types. */
enum stype
{
ST_UNIX,
ST_VMS,
ST_WINNT,
ST_MACOS,
ST_OS400,
ST_OTHER
};
uerr_t ftp_response PARAMS ((struct rbuf *, char **));
uerr_t ftp_login PARAMS ((struct rbuf *, const char *, const char *));
uerr_t ftp_port PARAMS ((struct rbuf *));
uerr_t ftp_pasv PARAMS ((struct rbuf *, ip_address *, unsigned short *));
#ifdef ENABLE_IPV6
uerr_t ftp_epsv PARAMS ((struct rbuf *, ip_address *, unsigned short *,
char *));
#endif
uerr_t ftp_type PARAMS ((struct rbuf *, int));
uerr_t ftp_cwd PARAMS ((struct rbuf *, const char *));
uerr_t ftp_retr PARAMS ((struct rbuf *, const char *));
uerr_t ftp_rest PARAMS ((struct rbuf *, long));
uerr_t ftp_list PARAMS ((struct rbuf *, const char *));
uerr_t ftp_syst PARAMS ((struct rbuf *, enum stype *));
uerr_t ftp_pwd PARAMS ((struct rbuf *, char **));
uerr_t ftp_size PARAMS ((struct rbuf *, const char *, long int *));
struct url;
/* File types. */
enum ftype
{
FT_PLAINFILE,
FT_DIRECTORY,
FT_SYMLINK,
FT_UNKNOWN
};
/* Globbing (used by ftp_retrieve_glob). */
enum
{
GLOBALL, GETALL, GETONE
};
/* Information about one filename in a linked list. */
struct fileinfo
{
enum ftype type; /* file type */
char *name; /* file name */
long size; /* file size */
long tstamp; /* time-stamp */
int perms; /* file permissions */
char *linkto; /* link to which file points */
struct fileinfo *prev; /* previous... */
struct fileinfo *next; /* ...and next structure. */
};
/* Commands for FTP functions. */
enum wget_ftp_command
{
DO_LOGIN = 0x0001, /* Connect and login to the server. */
DO_CWD = 0x0002, /* Change current directory. */
DO_RETR = 0x0004, /* Retrieve the file. */
DO_LIST = 0x0008, /* Retrieve the directory list. */
LEAVE_PENDING = 0x0010, /* Do not close the socket. */
NO_TRUNCATE = 0x0020 /* Don't truncate the file if REST
malfunctions. */
};
enum wget_ftp_fstatus
{
NOTHING = 0x0000, /* Nothing done yet. */
ON_YOUR_OWN = 0x0001, /* The ftp_loop_internal sets the
defaults. */
DONE_CWD = 0x0002 /* The current working directory is
correct. */
};
struct fileinfo *ftp_parse_ls PARAMS ((const char *, const enum stype));
uerr_t ftp_loop PARAMS ((struct url *, int *, struct url *));
uerr_t ftp_index PARAMS ((const char *, struct url *, struct fileinfo *));
char ftp_process_type PARAMS ((const char *));
#endif /* FTP_H */
--- NEW FILE: cookies.c ---
/* Support for cookies.
Copyright (C) 2001, 2002 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
GNU Wget is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
[...1533 lines suppressed...]
ind / 2 + 1, data, expected[ind], test_results[ind]);
if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
}
if (ind < test_count || expected[ind])
printf ("Unmatched number of results: %s\n", data);
}
for (i = 0; i < countof (tests_fail); i++)
{
struct cookie *c;
char *data = tests_fail[i];
test_count = 0;
c = parse_set_cookies (data, test_parse_cookies_callback, 1);
if (c)
printf ("Failed to report error on invalid data: %s\n", data);
}
}
#endif /* TEST_COOKIES */
--- NEW FILE: recur.h ---
/* Declarations for recur.c.
Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef RECUR_H
#define RECUR_H
struct urlpos;
void recursive_cleanup PARAMS ((void));
uerr_t retrieve_tree PARAMS ((const char *));
/* These are really in html-url.c. */
struct urlpos *get_urls_file PARAMS ((const char *));
struct urlpos *get_urls_html PARAMS ((const char *, const char *, int *));
void free_urlpos PARAMS ((struct urlpos *));
#endif /* RECUR_H */
--- NEW FILE: mswindows.h ---
/* Declarations for windows
Copyright (C) 1995, 1997, 1997, 1998 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef MSWINDOWS_H
#define MSWINDOWS_H
#ifndef WGET_H
#error Include mswindows.h inside or after "wget.h"
#endif
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN /* Prevent inclusion of <winsock*.h> in <windows.h> */
#endif
#include <windows.h>
/* Use the correct winsock header; <ws2tcpip.h> includes <winsock2.h> only on
* Watcom/MingW. We cannot use <winsock.h> for IPv6. Using getaddrinfo() requires
* <ws2tcpip.h>
*/
#if defined(ENABLE_IPV6) || defined(HAVE_GETADDRINFO)
# include <winsock2.h>
# include <ws2tcpip.h>
#else
# include <winsock.h>
#endif
#ifndef EAI_SYSTEM
# define EAI_SYSTEM -1 /* value doesn't matter */
#endif
/* Must include <sys/stat.h> because of 'stat' define below. */
#include <sys/stat.h>
/* Missing in several .c files. Include here. */
#include <io.h>
/* Apparently needed for alloca(). */
#include <malloc.h>
#ifndef S_ISDIR
# define S_ISDIR(m) (((m) & (_S_IFMT)) == (_S_IFDIR))
#endif
#ifndef S_ISLNK
# define S_ISLNK(a) 0
#endif
/* We have strcasecmp and strncasecmp, just under a different name. */
#define strcasecmp stricmp
#define strncasecmp strnicmp
/* The same for snprintf() and vsnprintf(). */
#define snprintf _snprintf
#define vsnprintf _vsnprintf
/* No stat on Windows. */
#define lstat stat
#define PATH_SEPARATOR '\\'
/* Microsoft says stat is _stat, Borland doesn't */
#ifdef _MSC_VER
# define stat _stat
#endif
#ifdef HAVE_ISATTY
/* Microsoft VC supports _isatty; Borland ? */
#ifdef _MSC_VER
# define isatty _isatty
#endif
#endif
#define REALCLOSE(x) closesocket (x)
/* read & write don't work with sockets on Windows 95. */
#define READ(fd, buf, cnt) recv ((fd), (buf), (cnt), 0)
#define WRITE(fd, buf, cnt) send ((fd), (buf), (cnt), 0)
/* #### Do we need this? */
#include <direct.h>
/* Windows compilers accept only one arg to mkdir. */
#ifndef __BORLANDC__
# define mkdir(a, b) _mkdir(a)
#else /* __BORLANDC__ */
# define mkdir(a, b) mkdir(a)
#endif /* __BORLANDC__ */
/* Declarations of various socket errors: */
#define EWOULDBLOCK WSAEWOULDBLOCK
#define EINPROGRESS WSAEINPROGRESS
#define EALREADY WSAEALREADY
#define ENOTSOCK WSAENOTSOCK
#define EDESTADDRREQ WSAEDESTADDRREQ
#define EMSGSIZE WSAEMSGSIZE
#define EPROTOTYPE WSAEPROTOTYPE
#define ENOPROTOOPT WSAENOPROTOOPT
#define EPROTONOSUPPORT WSAEPROTONOSUPPORT
#define ESOCKTNOSUPPORT WSAESOCKTNOSUPPORT
#define EOPNOTSUPP WSAEOPNOTSUPP
#define EPFNOSUPPORT WSAEPFNOSUPPORT
#define EAFNOSUPPORT WSAEAFNOSUPPORT
#define EADDRINUSE WSAEADDRINUSE
#define EADDRNOTAVAIL WSAEADDRNOTAVAIL
#define ENETDOWN WSAENETDOWN
#define ENETUNREACH WSAENETUNREACH
#define ENETRESET WSAENETRESET
#define ECONNABORTED WSAECONNABORTED
#define ECONNRESET WSAECONNRESET
#define ENOBUFS WSAENOBUFS
#define EISCONN WSAEISCONN
#define ENOTCONN WSAENOTCONN
#define ESHUTDOWN WSAESHUTDOWN
#define ETOOMANYREFS WSAETOOMANYREFS
#define ETIMEDOUT WSAETIMEDOUT
#define ECONNREFUSED WSAECONNREFUSED
#define ELOOP WSAELOOP
#define EHOSTDOWN WSAEHOSTDOWN
#define EHOSTUNREACH WSAEHOSTUNREACH
#define EPROCLIM WSAEPROCLIM
#define EUSERS WSAEUSERS
#define EDQUOT WSAEDQUOT
#define ESTALE WSAESTALE
#define EREMOTE WSAEREMOTE
/* Public functions. */
#ifndef HAVE_SLEEP
unsigned int sleep (unsigned);
#endif
#ifndef HAVE_USLEEP
int usleep (unsigned long);
#endif
void ws_startup (void);
void ws_changetitle (const char*, int);
void ws_percenttitle (double);
char *ws_mypath (void);
void ws_help (const char *);
void windows_main_junk (int *, char **, char **);
/* Things needed for IPv6; missing in <ws2tcpip.h>. */
#ifdef ENABLE_IPV6
# ifndef HAVE_NTOP
extern const char *inet_ntop (int af, const void *src, char *dst, size_t size);
# endif
# ifndef HAVE_PTON
extern int inet_pton (int af, const char *src, void *dst);
# endif
#endif /* ENABLE_IPV6 */
#endif /* MSWINDOWS_H */
--- NEW FILE: ftp.c ---
/* File Transfer Protocol support.
Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
[...1800 lines suppressed...]
prev->next = next;
else
*start = next;
return next;
}
/* Free the fileinfo linked list of files. */
static void
freefileinfo (struct fileinfo *f)
{
while (f)
{
struct fileinfo *next = f->next;
xfree (f->name);
if (f->linkto)
xfree (f->linkto);
xfree (f);
f = next;
}
}
--- NEW FILE: ftp-basic.c ---
/* Basic FTP routines.
Copyright (C) 1995, 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
#include <sys/types.h>
/* For inet_ntop. */
#ifndef WINDOWS
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#endif
#ifdef WINDOWS
# include <winsock.h>
#endif
#include "wget.h"
#include "utils.h"
#include "rbuf.h"
#include "connect.h"
#include "host.h"
#include "ftp.h"
char ftp_last_respline[128];
/* Get the response of FTP server and allocate enough room to handle
it. <CR> and <LF> characters are stripped from the line, and the
line is 0-terminated. All the response lines but the last one are
skipped. The last line is determined as described in RFC959. */
uerr_t
ftp_response (struct rbuf *rbuf, char **line)
{
int i;
int bufsize = 40;
*line = (char *)xmalloc (bufsize);
do
{
for (i = 0; 1; i++)
{
int res;
if (i > bufsize - 1)
*line = (char *)xrealloc (*line, (bufsize <<= 1));
res = RBUF_READCHAR (rbuf, *line + i);
/* RES is number of bytes read. */
if (res == 1)
{
if ((*line)[i] == '\n')
{
(*line)[i] = '\0';
/* Get rid of \r. */
if (i > 0 && (*line)[i - 1] == '\r')
(*line)[i - 1] = '\0';
break;
}
}
else
return FTPRERR;
}
if (opt.server_response)
logprintf (LOG_ALWAYS, "%s\n", *line);
else
DEBUGP (("%s\n", *line));
}
while (!(i >= 3 && ISDIGIT (**line) && ISDIGIT ((*line)[1]) &&
ISDIGIT ((*line)[2]) && (*line)[3] == ' '));
strncpy (ftp_last_respline, *line, sizeof (ftp_last_respline));
ftp_last_respline[sizeof (ftp_last_respline) - 1] = '\0';
return FTPOK;
}
/* Returns the malloc-ed FTP request, ending with <CR><LF>, printing
it if printing is required. If VALUE is NULL, just use
command<CR><LF>. */
static char *
ftp_request (const char *command, const char *value)
{
char *res = (char *)xmalloc (strlen (command)
+ (value ? (1 + strlen (value)) : 0)
+ 2 + 1);
sprintf (res, "%s%s%s\r\n", command, value ? " " : "", value ? value : "");
if (opt.server_response)
{
/* Hack: don't print out password. */
if (strncmp (res, "PASS", 4) != 0)
logprintf (LOG_ALWAYS, "--> %s\n", res);
else
logputs (LOG_ALWAYS, "--> PASS Turtle Power!\n");
}
else
DEBUGP (("\n--> %s\n", res));
return res;
}
#ifdef USE_OPIE
const char *calculate_skey_response PARAMS ((int, const char *, const char *));
#endif
/* Sends the USER and PASS commands to the server, to control
connection socket csock. */
uerr_t
ftp_login (struct rbuf *rbuf, const char *acc, const char *pass)
{
uerr_t err;
char *request, *respline;
int nwritten;
/* Get greeting. */
err = ftp_response (rbuf, &respline);
if (err != FTPOK)
{
xfree (respline);
return err;
}
if (*respline != '2')
{
xfree (respline);
return FTPSRVERR;
}
xfree (respline);
/* Send USER username. */
request = ftp_request ("USER", acc);
nwritten = iwrite (RBUF_FD (rbuf), request, strlen (request));
if (nwritten < 0)
{
xfree (request);
return WRITEFAILED;
}
xfree (request);
/* Get appropriate response. */
err = ftp_response (rbuf, &respline);
if (err != FTPOK)
{
xfree (respline);
return err;
}
/* An unprobable possibility of logging without a password. */
if (*respline == '2')
{
xfree (respline);
return FTPOK;
}
/* Else, only response 3 is appropriate. */
if (*respline != '3')
{
xfree (respline);
return FTPLOGREFUSED;
}
#ifdef USE_OPIE
{
static const char *skey_head[] = {
"331 s/key ",
"331 opiekey "
};
int i;
for (i = 0; i < countof (skey_head); i++)
{
if (strncasecmp (skey_head[i], respline, strlen (skey_head[i])) == 0)
break;
}
if (i < countof (skey_head))
{
const char *cp;
int skey_sequence = 0;
for (cp = respline + strlen (skey_head[i]);
'0' <= *cp && *cp <= '9';
cp++)
{
skey_sequence = skey_sequence * 10 + *cp - '0';
}
if (*cp == ' ')
cp++;
else
{
bad:
xfree (respline);
return FTPLOGREFUSED;
}
if ((cp = calculate_skey_response (skey_sequence, cp, pass)) == 0)
goto bad;
pass = cp;
}
}
#endif /* USE_OPIE */
xfree (respline);
/* Send PASS password. */
request = ftp_request ("PASS", pass);
nwritten = iwrite (RBUF_FD (rbuf), request, strlen (request));
if (nwritten < 0)
{
xfree (request);
return WRITEFAILED;
}
xfree (request);
/* Get appropriate response. */
err = ftp_response (rbuf, &respline);
if (err != FTPOK)
{
xfree (respline);
return err;
}
if (*respline != '2')
{
xfree (respline);
return FTPLOGINC;
}
xfree (respline);
/* All OK. */
return FTPOK;
}
#ifdef ENABLE_IPV6
uerr_t
ftp_eprt (struct rbuf *rbuf)
{
uerr_t err;
char *request, *respline;
ip_address in_addr;
unsigned short port;
char ipv6 [8 * (4 * 3 + 3) + 8];
char *bytes;
/* Setting port to 0 lets the system choose a free port. */
port = 0;
err = bindport (&port, ip_default_family);
if (err != BINDOK) /* Bind the port. */
return err;
/* Get the address of this side of the connection. */
if (!conaddr (RBUF_FD (rbuf), &in_addr))
/* Huh? This is not BINDERR! */
return BINDERR;
inet_ntop (AF_INET6, &in_addr, ipv6, sizeof (ipv6));
/* Construct the argument of EPRT (of the form |2|IPv6.ascii|PORT.ascii|). */
bytes = alloca (3 + strlen (ipv6) + 1 + numdigit (port) + 1 + 1);
sprintf (bytes, "|2|%s|%u|", ipv6, port);
/* Send PORT request. */
request = ftp_request ("EPRT", bytes);
if (0 > iwrite (RBUF_FD (rbuf), request, strlen (request)))
{
closeport (port);
xfree (request);
return WRITEFAILED;
}
xfree (request);
/* Get appropriate response. */
err = ftp_response (rbuf, &respline);
if (err != FTPOK)
{
closeport (port);
xfree (respline);
return err;
}
if (*respline != '2')
{
closeport (port);
xfree (respline);
return FTPPORTERR;
}
xfree (respline);
return FTPOK;
}
#endif
/* Bind a port and send the appropriate PORT command to the FTP
server. Use acceptport after RETR, to get the socket of data
connection. */
uerr_t
ftp_port (struct rbuf *rbuf)
{
uerr_t err;
char *request, *respline;
char bytes[6 * 4 +1];
ip_address in_addr;
ip4_address in_addr_4;
unsigned char *in_addr4_ptr = (unsigned char *)&in_addr_4;
int nwritten;
unsigned short port;
#ifdef ENABLE_IPV6
/*
Only try the Extented Version if we actually use IPv6
*/
if (ip_default_family == AF_INET6)
{
err = ftp_eprt (rbuf);
if (err == FTPOK)
return err;
}
#endif
/* Setting port to 0 lets the system choose a free port. */
port = 0;
err = bindport (&port, AF_INET);
if (err != BINDOK)
return err;
/* Get the address of this side of the connection and convert it
(back) to IPv4. */
if (!conaddr (RBUF_FD (rbuf), &in_addr))
/* Huh? This is not BINDERR! */
return BINDERR;
if (!map_ip_to_ipv4 (&in_addr, &in_addr_4))
return BINDERR;
/* Construct the argument of PORT (of the form a,b,c,d,e,f). Port
is unsigned short so (unsigned) (port & 0xff000) >> 8 is the same
like port >> 8
*/
sprintf (bytes, "%d,%d,%d,%d,%d,%d",
in_addr4_ptr[0], in_addr4_ptr[1], in_addr4_ptr[2], in_addr4_ptr[3],
port >> 8, port & 0xff);
/* Send PORT request. */
request = ftp_request ("PORT", bytes);
nwritten = iwrite (RBUF_FD (rbuf), request, strlen (request));
if (nwritten < 0)
{
xfree (request);
return WRITEFAILED;
}
xfree (request);
/* Get appropriate response. */
err = ftp_response (rbuf, &respline);
if (err != FTPOK)
{
xfree (respline);
return err;
}
if (*respline != '2')
{
xfree (respline);
return FTPPORTERR;
}
xfree (respline);
return FTPOK;
}
#ifdef ENABLE_IPV6
uerr_t
ftp_epsv (struct rbuf *rbuf, ip_address *addr, unsigned short *port,
char *typ)
{
int err;
char *s, *respline;
char *request = ftp_request ("EPSV", typ);
if (0 > iwrite (RBUF_FD (rbuf), request, strlen (request)))
{
xfree (request);
return WRITEFAILED;
}
/* Get the server response. */
err = ftp_response (rbuf, &respline);
if (err != FTPOK)
{
xfree (respline);
return err;
}
if (*respline != '2')
{
xfree (respline);
return FTPNOPASV;
}
/* Parse the request. */
s = respline;
/* respline::=229 Entering Extended Passive Mode (|||6446|) */
for (s += 4; *s && !ISDIGIT (*s); s++);
if (!*s)
return FTPINVPASV;
*port=0;
for (; ISDIGIT (*s); s++)
*port = (*s - '0') + 10 * (*port);
xfree (respline);
/* Now we have the port but we need the IPv6 :-( */
{
wget_sockaddr remote;
socklen_t addrlen = sizeof (remote);
struct sockaddr_in *ipv4_sock = (struct sockaddr_in *)&remote;
getpeername (RBUF_FD (rbuf), (struct sockaddr *)&remote, &addrlen);
switch(remote.sa.sa_family)
{
case AF_INET6:
memcpy (addr, &remote.sin6.sin6_addr, 16);
break;
case AF_INET:
map_ipv4_to_ip ((ip4_address *)&ipv4_sock->sin_addr, addr);
break;
default:
abort();
return FTPINVPASV;
/* realy bad */
}
}
return FTPOK;
}
#endif
/* Similar to ftp_port, but uses `PASV' to initiate the passive FTP
transfer. Reads the response from server and parses it. Reads the
host and port addresses and returns them. */
uerr_t
ftp_pasv (struct rbuf *rbuf, ip_address *addr, unsigned short *port)
{
char *request, *respline, *s;
int nwritten, i;
uerr_t err;
unsigned char addr4[4];
#ifdef ENABLE_IPV6
if (ip_default_family == AF_INET6)
{
err = ftp_epsv (rbuf, addr, port, "2"); /* try IPv6 with EPSV */
if (FTPOK == err)
return FTPOK;
err = ftp_epsv (rbuf, addr, port, "1"); /* try IPv4 with EPSV */
if (FTPOK == err)
return FTPOK;
}
#endif
/* Form the request. */
request = ftp_request ("PASV", NULL);
/* And send it. */
nwritten = iwrite (RBUF_FD (rbuf), request, strlen (request));
if (nwritten < 0)
{
xfree (request);
return WRITEFAILED;
}
xfree (request);
/* Get the server response. */
err = ftp_response (rbuf, &respline);
if (err != FTPOK)
{
xfree (respline);
return err;
}
if (*respline != '2')
{
xfree (respline);
return FTPNOPASV;
}
/* Parse the request. */
/* respline::=227 Entering Passive Mode (h1,h2,h3,h4,p1,p2). */
s = respline;
for (s += 4; *s && !ISDIGIT (*s); s++);
if (!*s)
return FTPINVPASV;
for (i = 0; i < 4; i++)
{
addr4[i] = 0;
for (; ISDIGIT (*s); s++)
addr4[i] = (*s - '0') + 10 * addr4[i];
if (*s == ',')
s++;
else
{
xfree (respline);
return FTPINVPASV;
}
}
/* Eventually make an IPv4 in IPv6 adress if needed */
map_ipv4_to_ip ((ip4_address *)addr4, addr);
*port=0;
for (; ISDIGIT (*s); s++)
*port = (*s - '0') + 10 * (*port);
if (*s == ',')
s++;
else
{
xfree (respline);
return FTPINVPASV;
}
{
unsigned short port2 = 0;
for (; ISDIGIT (*s); s++)
port2 = (*s - '0') + 10 * port2;
*port = (*port) * 256 + port2;
}
xfree (respline);
return FTPOK;
}
/* Sends the TYPE request to the server. */
uerr_t
ftp_type (struct rbuf *rbuf, int type)
{
char *request, *respline;
int nwritten;
uerr_t err;
char stype[2];
/* Construct argument. */
stype[0] = type;
stype[1] = 0;
/* Send TYPE request. */
request = ftp_request ("TYPE", stype);
nwritten = iwrite (RBUF_FD (rbuf), request, strlen (request));
if (nwritten < 0)
{
xfree (request);
return WRITEFAILED;
}
xfree (request);
/* Get appropriate response. */
err = ftp_response (rbuf, &respline);
if (err != FTPOK)
{
xfree (respline);
return err;
}
if (*respline != '2')
{
xfree (respline);
return FTPUNKNOWNTYPE;
}
xfree (respline);
/* All OK. */
return FTPOK;
}
/* Changes the working directory by issuing a CWD command to the
server. */
uerr_t
ftp_cwd (struct rbuf *rbuf, const char *dir)
{
char *request, *respline;
int nwritten;
uerr_t err;
/* Send CWD request. */
request = ftp_request ("CWD", dir);
nwritten = iwrite (RBUF_FD (rbuf), request, strlen (request));
if (nwritten < 0)
{
xfree (request);
return WRITEFAILED;
}
xfree (request);
/* Get appropriate response. */
err = ftp_response (rbuf, &respline);
if (err != FTPOK)
{
xfree (respline);
return err;
}
if (*respline == '5')
{
xfree (respline);
return FTPNSFOD;
}
if (*respline != '2')
{
xfree (respline);
return FTPRERR;
}
xfree (respline);
/* All OK. */
return FTPOK;
}
/* Sends REST command to the FTP server. */
uerr_t
ftp_rest (struct rbuf *rbuf, long offset)
{
char *request, *respline;
int nwritten;
uerr_t err;
static char numbuf[24]; /* Buffer for the number */
number_to_string (numbuf, offset);
request = ftp_request ("REST", numbuf);
nwritten = iwrite (RBUF_FD (rbuf), request, strlen (request));
if (nwritten < 0)
{
xfree (request);
return WRITEFAILED;
}
xfree (request);
/* Get appropriate response. */
err = ftp_response (rbuf, &respline);
if (err != FTPOK)
{
xfree (respline);
return err;
}
if (*respline != '3')
{
xfree (respline);
return FTPRESTFAIL;
}
xfree (respline);
/* All OK. */
return FTPOK;
}
/* Sends RETR command to the FTP server. */
uerr_t
ftp_retr (struct rbuf *rbuf, const char *file)
{
char *request, *respline;
int nwritten;
uerr_t err;
/* Send RETR request. */
request = ftp_request ("RETR", file);
nwritten = iwrite (RBUF_FD (rbuf), request, strlen (request));
if (nwritten < 0)
{
xfree (request);
return WRITEFAILED;
}
xfree (request);
/* Get appropriate response. */
err = ftp_response (rbuf, &respline);
if (err != FTPOK)
{
xfree (respline);
return err;
}
if (*respline == '5')
{
xfree (respline);
return FTPNSFOD;
}
if (*respline != '1')
{
xfree (respline);
return FTPRERR;
}
xfree (respline);
/* All OK. */
return FTPOK;
}
/* Sends the LIST command to the server. If FILE is NULL, send just
`LIST' (no space). */
uerr_t
ftp_list (struct rbuf *rbuf, const char *file)
{
char *request, *respline;
int nwritten;
uerr_t err;
/* Send LIST request. */
request = ftp_request ("LIST", file);
nwritten = iwrite (RBUF_FD (rbuf), request, strlen (request));
if (nwritten < 0)
{
xfree (request);
return WRITEFAILED;
}
xfree (request);
/* Get appropriate respone. */
err = ftp_response (rbuf, &respline);
if (err != FTPOK)
{
xfree (respline);
return err;
}
if (*respline == '5')
{
xfree (respline);
return FTPNSFOD;
}
if (*respline != '1')
{
xfree (respline);
return FTPRERR;
}
xfree (respline);
/* All OK. */
return FTPOK;
}
/* Sends the SYST command to the server. */
uerr_t
ftp_syst (struct rbuf *rbuf, enum stype *server_type)
{
char *request, *respline;
int nwritten;
uerr_t err;
/* Send SYST request. */
request = ftp_request ("SYST", NULL);
nwritten = iwrite (RBUF_FD (rbuf), request, strlen (request));
if (nwritten < 0)
{
xfree (request);
return WRITEFAILED;
}
xfree (request);
/* Get appropriate response. */
err = ftp_response (rbuf, &respline);
if (err != FTPOK)
{
xfree (respline);
return err;
}
if (*respline == '5')
{
xfree (respline);
return FTPSRVERR;
}
/* Skip the number (215, but 200 (!!!) in case of VMS) */
strtok (respline, " ");
/* Which system type has been reported (we are interested just in the
first word of the server response)? */
request = strtok (NULL, " ");
if (!strcasecmp (request, "VMS"))
*server_type = ST_VMS;
else if (!strcasecmp (request, "UNIX"))
*server_type = ST_UNIX;
else if (!strcasecmp (request, "WINDOWS_NT")
|| !strcasecmp (request, "WINDOWS2000"))
*server_type = ST_WINNT;
else if (!strcasecmp (request, "MACOS"))
*server_type = ST_MACOS;
else if (!strcasecmp (request, "OS/400"))
*server_type = ST_OS400;
else
*server_type = ST_OTHER;
xfree (respline);
/* All OK. */
return FTPOK;
}
/* Sends the PWD command to the server. */
uerr_t
ftp_pwd (struct rbuf *rbuf, char **pwd)
{
char *request, *respline;
int nwritten;
uerr_t err;
/* Send PWD request. */
request = ftp_request ("PWD", NULL);
nwritten = iwrite (RBUF_FD (rbuf), request, strlen (request));
if (nwritten < 0)
{
xfree (request);
return WRITEFAILED;
}
xfree (request);
/* Get appropriate response. */
err = ftp_response (rbuf, &respline);
if (err != FTPOK)
{
xfree (respline);
return err;
}
if (*respline == '5')
{
xfree (respline);
return FTPSRVERR;
}
/* Skip the number (257), leading citation mark, trailing citation mark
and everything following it. */
strtok (respline, "\"");
request = strtok (NULL, "\"");
/* Has the `pwd' been already allocated? Free! */
FREE_MAYBE (*pwd);
*pwd = xstrdup (request);
xfree (respline);
/* All OK. */
return FTPOK;
}
/* Sends the SIZE command to the server, and returns the value in 'size'.
* If an error occurs, size is set to zero. */
uerr_t
ftp_size (struct rbuf *rbuf, const char *file, long int *size)
{
char *request, *respline;
int nwritten;
uerr_t err;
/* Send PWD request. */
request = ftp_request ("SIZE", file);
nwritten = iwrite (RBUF_FD (rbuf), request, strlen (request));
if (nwritten < 0)
{
xfree (request);
*size = 0;
return WRITEFAILED;
}
xfree (request);
/* Get appropriate response. */
err = ftp_response (rbuf, &respline);
if (err != FTPOK)
{
xfree (respline);
*size = 0;
return err;
}
if (*respline == '5')
{
/*
* Probably means SIZE isn't supported on this server.
* Error is nonfatal since SIZE isn't in RFC 959
*/
xfree (respline);
*size = 0;
return FTPOK;
}
errno = 0;
*size = strtol (respline + 4, NULL, 0);
if (errno)
{
/*
* Couldn't parse the response for some reason. On the (few)
* tests I've done, the response is 213 <SIZE> with nothing else -
* maybe something a bit more resilient is necessary. It's not a
* fatal error, however.
*/
xfree (respline);
*size = 0;
return FTPOK;
}
xfree (respline);
/* All OK. */
return FTPOK;
}
/* If URL's params are of the form "type=X", return character X.
Otherwise, return 'I' (the default type). */
char
ftp_process_type (const char *params)
{
if (params
&& 0 == strncasecmp (params, "type=", 5)
&& params[5] != '\0')
return TOUPPER (params[5]);
else
return 'I';
}
--- NEW FILE: init.c ---
/* Reading/parsing the initialization file.
Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2003
Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
[...1318 lines suppressed...]
xfree (opt.ftp_acc);
FREE_MAYBE (opt.ftp_pass);
FREE_MAYBE (opt.ftp_proxy);
FREE_MAYBE (opt.https_proxy);
FREE_MAYBE (opt.http_proxy);
free_vec (opt.no_proxy);
FREE_MAYBE (opt.useragent);
FREE_MAYBE (opt.referer);
FREE_MAYBE (opt.http_user);
FREE_MAYBE (opt.http_passwd);
FREE_MAYBE (opt.user_header);
#ifdef HAVE_SSL
FREE_MAYBE (opt.sslcertkey);
FREE_MAYBE (opt.sslcertfile);
#endif /* HAVE_SSL */
FREE_MAYBE (opt.bind_address);
FREE_MAYBE (opt.cookies_input);
FREE_MAYBE (opt.cookies_output);
#endif
}
--- NEW FILE: wget.h ---
/* Miscellaneous declarations.
Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
/* This file contains some declarations that don't fit anywhere else.
It also contains some useful includes, like the obnoxious TIME_H
inclusion. */
#ifndef WGET_H
#define WGET_H
/* Disable assertions when debug support is not compiled in. */
#ifndef ENABLE_DEBUG
# define NDEBUG
#endif
/* Define this if you want primitive but extensive malloc debugging.
It will make Wget extremely slow, so only do it in development
builds. */
#undef DEBUG_MALLOC
#ifndef PARAMS
# if PROTOTYPES
# define PARAMS(args) args
# else
# define PARAMS(args) ()
# endif
#endif
/* `gettext (FOO)' is long to write, so we use `_(FOO)'. If NLS is
unavailable, _(STRING) simply returns STRING. */
#ifdef HAVE_NLS
# define _(string) gettext (string)
# ifdef HAVE_LIBINTL_H
# include <libintl.h>
# endif /* HAVE_LIBINTL_H */
#else /* not HAVE_NLS */
# define _(string) string
#endif /* not HAVE_NLS */
/* No-op version of gettext, used for constant strings. */
#define N_(string) (string)
/* I18N NOTE: You will notice that none of the DEBUGP messages are
marked as translatable. This is intentional, for a few reasons:
1) The debug messages are not meant for the users to look at, but
for the developers; as such, they should be considered more like
source comments than real program output.
2) The messages are numerous, and yet they are random and frivolous
("double yuck!" and such). There would be a lot of work with no
gain.
3) Finally, the debug messages are meant to be a clue for me to
debug problems with Wget. If I get them in a language I don't
understand, debugging will become a new challenge of its own! */
/* Include these, so random files need not include them. */
#include "sysdep.h"
#include "options.h"
/* locale independent replacement for ctype.h */
#include "safe-ctype.h"
#define DO_NOTHING do {} while (0)
/* Print X if debugging is enabled; a no-op otherwise. */
#ifdef ENABLE_DEBUG
# define DEBUGP(x) do { if (opt.debug) { debug_logprintf x; } } while (0)
#else /* not ENABLE_DEBUG */
# define DEBUGP(x) DO_NOTHING
#endif /* not ENABLE_DEBUG */
/* Make gcc check for the format of logmsg() and debug_logmsg(). */
#ifdef __GNUC__
# define GCC_FORMAT_ATTR(a, b) __attribute__ ((format (printf, a, b)))
#else /* not __GNUC__ */
# define GCC_FORMAT_ATTR(a, b)
#endif /* not __GNUC__ */
/* These are from log.c, but they are used everywhere, so we declare
them here. */
enum log_options { LOG_VERBOSE, LOG_NOTQUIET, LOG_NONVERBOSE, LOG_ALWAYS };
#ifdef HAVE_STDARG_H
void logprintf PARAMS ((enum log_options, const char *, ...))
GCC_FORMAT_ATTR (2, 3);
void debug_logprintf PARAMS ((const char *, ...)) GCC_FORMAT_ATTR (1, 2);
#else /* not HAVE_STDARG_H */
void logprintf ();
void debug_logprintf ();
#endif /* not HAVE_STDARG_H */
void logputs PARAMS ((enum log_options, const char *));
void logflush PARAMS ((void));
void log_set_flush PARAMS ((int));
int log_set_save_context PARAMS ((int));
/* Defined in `utils.c', but used literally everywhere. */
#ifndef DEBUG_MALLOC
#define xmalloc xmalloc_real
#define xrealloc xrealloc_real
#define xstrdup xstrdup_real
#define xfree free
void *xmalloc_real PARAMS ((size_t));
void *xrealloc_real PARAMS ((void *, size_t));
char *xstrdup_real PARAMS ((const char *));
#else /* DEBUG_MALLOC */
#define xmalloc(s) xmalloc_debug (s, __FILE__, __LINE__)
#define xfree(p) xfree_debug (p, __FILE__, __LINE__)
#define xrealloc(p, s) xrealloc_debug (p, s, __FILE__, __LINE__)
#define xstrdup(p) xstrdup_debug (p, __FILE__, __LINE__)
void *xmalloc_debug PARAMS ((size_t, const char *, int));
void xfree_debug PARAMS ((void *, const char *, int));
void *xrealloc_debug PARAMS ((void *, size_t, const char *, int));
char *xstrdup_debug PARAMS ((const char *, const char *, int));
#endif /* DEBUG_MALLOC */
/* #### Find a better place for this. */
/* The log file to which Wget writes to after HUP. */
#define DEFAULT_LOGFILE "wget-log"
#define MD5_HASHLEN 16
/* Useful macros used across the code: */
/* Is the string a hpyhen-only? */
#define HYPHENP(x) (*(x) == '-' && !*((x) + 1))
/* The smaller value of the two. */
#define MINVAL(x, y) ((x) < (y) ? (x) : (y))
/* Convert an ASCII hex digit to the corresponding number between 0
and 15. X should be a hexadecimal digit that satisfies isxdigit;
otherwise, the result is undefined. */
#define XDIGIT_TO_NUM(x) ((x) < 'A' ? (x) - '0' : TOUPPER (x) - 'A' + 10)
/* Convert a sequence of ASCII hex digits X and Y to a number betewen
0 and 255. Uses XDIGIT_TO_NUM for conversion of individual
digits. */
#define X2DIGITS_TO_NUM(h1, h2) ((XDIGIT_TO_NUM (h1) << 4) + XDIGIT_TO_NUM (h2))
/* The reverse of the above: convert a number in the [0, 16) range to
its ASCII representation in hex. The A-F characters are in upper
case. */
#define XNUM_TO_DIGIT(x) ("0123456789ABCDEF"[x])
/* Like XNUM_TO_DIGIT, but generates lower-case characters. */
#define XNUM_TO_digit(x) ("0123456789abcdef"[x])
/* Returns the number of elements in an array with fixed
initialization. For example:
static char a[] = "foo"; -- countof(a) == 4 (for terminating \0)
int a[5] = {1, 2}; -- countof(a) == 5
char *a[] = { -- countof(a) == 3
"foo", "bar", "baz"
}; */
#define countof(array) (sizeof (array) / sizeof (*(array)))
#define alloca_array(type, size) ((type *) alloca ((size) * sizeof (type)))
/* Copy the data delimited with BEG and END to alloca-allocated
storage, and zero-terminate it. Arguments are evaluated only once,
in the order BEG, END, PLACE. */
#define BOUNDED_TO_ALLOCA(beg, end, place) do { \
const char *BTA_beg = (beg); \
int BTA_len = (end) - BTA_beg; \
char **BTA_dest = &(place); \
*BTA_dest = alloca (BTA_len + 1); \
memcpy (*BTA_dest, BTA_beg, BTA_len); \
(*BTA_dest)[BTA_len] = '\0'; \
} while (0)
/* Return non-zero if string bounded between BEG and END is equal to
STRING_LITERAL. The comparison is case-sensitive. */
#define BOUNDED_EQUAL(beg, end, string_literal) \
((end) - (beg) == sizeof (string_literal) - 1 \
&& !memcmp ((beg), (string_literal), \
sizeof (string_literal) - 1))
/* The same as above, except the comparison is case-insensitive. */
#define BOUNDED_EQUAL_NO_CASE(beg, end, string_literal) \
((end) - (beg) == sizeof (string_literal) - 1 \
&& !strncasecmp ((beg), (string_literal), \
sizeof (string_literal) - 1))
/* Note that this much more elegant definition cannot be used:
#define STRDUP_ALLOCA(str) (strcpy ((char *)alloca (strlen (str) + 1), str))
This is because some compilers don't handle alloca() as argument to
function correctly. Gcc under Intel has been reported to offend in
this case. */
#define STRDUP_ALLOCA(ptr, str) do { \
(ptr) = (char *)alloca (strlen (str) + 1); \
strcpy ((ptr), (str)); \
} while (0)
/* Generally useful if you want to avoid arbitrary size limits but
don't need a full dynamic array. Assumes that BASEVAR points to a
malloced array of TYPE objects (or possibly a NULL pointer, if
SIZEVAR is 0), with the total size stored in SIZEVAR. This macro
will realloc BASEVAR as necessary so that it can hold at least
NEEDED_SIZE objects. The reallocing is done by doubling, which
ensures constant amortized time per element. */
#define DO_REALLOC(basevar, sizevar, needed_size, type) do \
{ \
/* Avoid side-effectualness. */ \
long do_realloc_needed_size = (needed_size); \
long do_realloc_newsize = 0; \
while ((sizevar) < (do_realloc_needed_size)) { \
do_realloc_newsize = 2*(sizevar); \
if (do_realloc_newsize < 32) \
do_realloc_newsize = 32; \
(sizevar) = do_realloc_newsize; \
} \
if (do_realloc_newsize) \
basevar = (type *)xrealloc (basevar, do_realloc_newsize * sizeof (type)); \
} while (0)
/* Free FOO if it is non-NULL. */
#define FREE_MAYBE(foo) do { if (foo) xfree (foo); } while (0)
extern const char *exec_name;
/* Document type ("dt") flags */
enum
{
TEXTHTML = 0x0001, /* document is of type text/html
or application/xhtml+xml */
RETROKF = 0x0002, /* retrieval was OK */
HEAD_ONLY = 0x0004, /* only send the HEAD request */
SEND_NOCACHE = 0x0008, /* send Pragma: no-cache directive */
ACCEPTRANGES = 0x0010, /* Accept-ranges header was found */
ADDED_HTML_EXTENSION = 0x0020 /* added ".html" extension due to -E */
};
/* Universal error type -- used almost everywhere. Error reporting of
this detail is not generally used or needed and should be
simplified. */
typedef enum
{
NOCONERROR, HOSTERR, CONSOCKERR, CONERROR, CONSSLERR,
CONREFUSED, NEWLOCATION, NOTENOUGHMEM, CONPORTERR,
BINDERR, BINDOK, LISTENERR, ACCEPTERR, ACCEPTOK,
CONCLOSED, FTPOK, FTPLOGINC, FTPLOGREFUSED, FTPPORTERR,
FTPNSFOD, FTPRETROK, FTPUNKNOWNTYPE, FTPRERR,
FTPREXC, FTPSRVERR, FTPRETRINT, FTPRESTFAIL, URLERROR,
FOPENERR, FWRITEERR, HOK, HLEXC, HEOF,
HERR, RETROK, RECLEVELEXC, FTPACCDENIED, WRONGCODE,
FTPINVPASV, FTPNOPASV,
CONTNOTSUPPORTED, RETRUNNEEDED, RETRFINISHED, READERR, TRYLIMEXC,
URLBADPATTERN, FILEBADFILE, RANGEERR, RETRBADPATTERN,
RETNOTSUP, ROBOTSOK, NOROBOTS, PROXERR, AUTHFAILED,
QUOTEXC, WRITEFAILED,
SSLERRCERTFILE,SSLERRCERTKEY,SSLERRCTXCREATE
} uerr_t;
typedef unsigned char boolean;
#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif
/* So we can say strcmp(a, b) == EQ rather than strcmp(a, b) == 0 or
the really awful !strcmp(a, b). */
#define EQ 0
/* For most options, 0 means no limits, but with -p in the picture, that causes
a problem on the maximum recursion depth variable. To retain backwards
compatibility we allow users to consider "0" to be synonymous with "inf" for
-l, but internally infinite recursion is specified by -1 and 0 means to only
retrieve the requisites of a single document. */
#define INFINITE_RECURSION -1
#define CONNECT_ERROR(x) ((x) == ECONNREFUSED && !opt.retry_connrefused \
? CONREFUSED : CONERROR)
#endif /* WGET_H */
--- NEW FILE: init.h ---
/* Declarations for init.c.
Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef INIT_H
#define INIT_H
void initialize PARAMS ((void));
void run_command PARAMS ((const char *));
void setoptval PARAMS ((const char *, const char *));
char *home_dir PARAMS ((void));
void cleanup PARAMS ((void));
#endif /* INIT_H */
--- NEW FILE: gen-md5.h ---
/* General MD5 header file.
Copyright (C) 2001 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef GEN_MD5_H
#define GEN_MD5_H
typedef struct gen_md5_context gen_md5_context;
/* Use a forward declaration so we don't have to include any of the
includes. */
struct gen_md5_context;
#define ALLOCA_MD5_CONTEXT(var_name) \
gen_md5_context *var_name = \
(gen_md5_context *) alloca (gen_md5_context_size ())
int gen_md5_context_size PARAMS ((void));
void gen_md5_init PARAMS ((gen_md5_context *));
void gen_md5_update PARAMS ((const unsigned char *, int, gen_md5_context *));
void gen_md5_finish PARAMS ((gen_md5_context *, unsigned char *));
#endif /* GEN_MD5_H */
--- NEW FILE: html-url.c ---
/* Collect URLs from HTML source.
Copyright (C) 1998, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#include <config.h>
#include <stdio.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#include <stdlib.h>
#include <errno.h>
#include <assert.h>
#include "wget.h"
#include "html-parse.h"
#include "url.h"
#include "utils.h"
#include "hash.h"
#include "convert.h"
#ifndef errno
extern int errno;
#endif
struct map_context;
typedef void (*tag_handler_t) PARAMS ((int, struct taginfo *,
struct map_context *));
#define DECLARE_TAG_HANDLER(fun) \
static void fun PARAMS ((int, struct taginfo *, struct map_context *))
DECLARE_TAG_HANDLER (tag_find_urls);
DECLARE_TAG_HANDLER (tag_handle_base);
DECLARE_TAG_HANDLER (tag_handle_form);
DECLARE_TAG_HANDLER (tag_handle_link);
DECLARE_TAG_HANDLER (tag_handle_meta);
enum {
TAG_A,
TAG_APPLET,
TAG_AREA,
TAG_BASE,
TAG_BGSOUND,
TAG_BODY,
TAG_EMBED,
TAG_FIG,
TAG_FORM,
TAG_FRAME,
TAG_IFRAME,
TAG_IMG,
TAG_INPUT,
TAG_LAYER,
TAG_LINK,
TAG_META,
TAG_OVERLAY,
TAG_SCRIPT,
TAG_TABLE,
TAG_TD,
TAG_TH
};
/* The list of known tags and functions used for handling them. Most
tags are simply harvested for URLs. */
static struct known_tag {
int tagid;
const char *name;
tag_handler_t handler;
} known_tags[] = {
{ TAG_A, "a", tag_find_urls },
{ TAG_APPLET, "applet", tag_find_urls },
{ TAG_AREA, "area", tag_find_urls },
{ TAG_BASE, "base", tag_handle_base },
{ TAG_BGSOUND, "bgsound", tag_find_urls },
{ TAG_BODY, "body", tag_find_urls },
{ TAG_EMBED, "embed", tag_find_urls },
{ TAG_FIG, "fig", tag_find_urls },
{ TAG_FORM, "form", tag_handle_form },
{ TAG_FRAME, "frame", tag_find_urls },
{ TAG_IFRAME, "iframe", tag_find_urls },
{ TAG_IMG, "img", tag_find_urls },
{ TAG_INPUT, "input", tag_find_urls },
{ TAG_LAYER, "layer", tag_find_urls },
{ TAG_LINK, "link", tag_handle_link },
{ TAG_META, "meta", tag_handle_meta },
{ TAG_OVERLAY, "overlay", tag_find_urls },
{ TAG_SCRIPT, "script", tag_find_urls },
{ TAG_TABLE, "table", tag_find_urls },
{ TAG_TD, "td", tag_find_urls },
{ TAG_TH, "th", tag_find_urls }
};
/* tag_url_attributes documents which attributes of which tags contain
URLs to harvest. It is used by tag_find_urls. */
/* Defines for the FLAGS. */
/* The link is "inline", i.e. needs to be retrieved for this document
to be correctly rendered. Inline links include inlined images,
stylesheets, children frames, etc. */
#define ATTR_INLINE 1
/* The link is expected to yield HTML contents. It's important not to
try to follow HTML obtained by following e.g. <img src="...">
regardless of content-type. Doing this causes infinite loops for
"images" that return non-404 error pages with links to the same
image. */
#define ATTR_HTML 2
/* For tags handled by tag_find_urls: attributes that contain URLs to
download. */
static struct {
int tagid;
const char *attr_name;
int flags;
} tag_url_attributes[] = {
{ TAG_A, "href", ATTR_HTML },
{ TAG_APPLET, "code", ATTR_INLINE },
{ TAG_AREA, "href", ATTR_HTML },
{ TAG_BGSOUND, "src", ATTR_INLINE },
{ TAG_BODY, "background", ATTR_INLINE },
{ TAG_EMBED, "href", ATTR_HTML },
{ TAG_EMBED, "src", ATTR_INLINE | ATTR_HTML },
{ TAG_FIG, "src", ATTR_INLINE },
{ TAG_FRAME, "src", ATTR_INLINE | ATTR_HTML },
{ TAG_IFRAME, "src", ATTR_INLINE | ATTR_HTML },
{ TAG_IMG, "href", ATTR_INLINE },
{ TAG_IMG, "lowsrc", ATTR_INLINE },
{ TAG_IMG, "src", ATTR_INLINE },
{ TAG_INPUT, "src", ATTR_INLINE },
{ TAG_LAYER, "src", ATTR_INLINE | ATTR_HTML },
{ TAG_OVERLAY, "src", ATTR_INLINE | ATTR_HTML },
{ TAG_SCRIPT, "src", ATTR_INLINE },
{ TAG_TABLE, "background", ATTR_INLINE },
{ TAG_TD, "background", ATTR_INLINE },
{ TAG_TH, "background", ATTR_INLINE }
};
/* The lists of interesting tags and attributes are built dynamically,
from the information above. However, some places in the code refer
to the attributes not mentioned here. We add them manually. */
static const char *additional_attributes[] = {
"rel", /* used by tag_handle_link */
"http-equiv", /* used by tag_handle_meta */
"name", /* used by tag_handle_meta */
"content", /* used by tag_handle_meta */
"action" /* used by tag_handle_form */
};
struct hash_table *interesting_tags;
struct hash_table *interesting_attributes;
static void
init_interesting (void)
{
/* Init the variables interesting_tags and interesting_attributes
that are used by the HTML parser to know which tags and
attributes we're interested in. We initialize this only once,
for performance reasons.
Here we also make sure that what we put in interesting_tags
matches the user's preferences as specified through --ignore-tags
and --follow-tags. */
int i;
interesting_tags = make_nocase_string_hash_table (countof (known_tags));
/* First, add all the tags we know hot to handle, mapped to their
respective entries in known_tags. */
for (i = 0; i < countof (known_tags); i++)
hash_table_put (interesting_tags, known_tags[i].name, known_tags + i);
/* Then remove the tags ignored through --ignore-tags. */
if (opt.ignore_tags)
{
char **ignored;
for (ignored = opt.ignore_tags; *ignored; ignored++)
hash_table_remove (interesting_tags, *ignored);
}
/* If --follow-tags is specified, use only those tags. */
if (opt.follow_tags)
{
/* Create a new table intersecting --follow-tags and known_tags,
and use it as interesting_tags. */
struct hash_table *intersect = make_nocase_string_hash_table (0);
char **followed;
for (followed = opt.follow_tags; *followed; followed++)
{
struct known_tag *t = hash_table_get (interesting_tags, *followed);
if (!t)
continue; /* ignore unknown --follow-tags entries. */
hash_table_put (intersect, *followed, t);
}
hash_table_destroy (interesting_tags);
interesting_tags = intersect;
}
/* Add the attributes we care about. */
interesting_attributes = make_nocase_string_hash_table (10);
for (i = 0; i < countof (additional_attributes); i++)
string_set_add (interesting_attributes, additional_attributes[i]);
for (i = 0; i < countof (tag_url_attributes); i++)
string_set_add (interesting_attributes, tag_url_attributes[i].attr_name);
}
/* Find the value of attribute named NAME in the taginfo TAG. If the
attribute is not present, return NULL. If ATTRIND is non-NULL, the
index of the attribute in TAG will be stored there. */
static char *
find_attr (struct taginfo *tag, const char *name, int *attrind)
{
int i;
for (i = 0; i < tag->nattrs; i++)
if (!strcasecmp (tag->attrs[i].name, name))
{
if (attrind)
*attrind = i;
return tag->attrs[i].value;
}
return NULL;
}
struct map_context {
char *text; /* HTML text. */
char *base; /* Base URI of the document, possibly
changed through <base href=...>. */
const char *parent_base; /* Base of the current document. */
const char *document_file; /* File name of this document. */
int nofollow; /* whether NOFOLLOW was specified in a
<meta name=robots> tag. */
struct urlpos *head, *tail; /* List of URLs that is being
built. */
};
/* Append LINK_URI to the urlpos structure that is being built.
LINK_URI will be merged with the current document base. TAG and
ATTRIND are the necessary context to store the position and
size. */
static struct urlpos *
append_url (const char *link_uri,
struct taginfo *tag, int attrind, struct map_context *ctx)
{
int link_has_scheme = url_has_scheme (link_uri);
struct urlpos *newel;
const char *base = ctx->base ? ctx->base : ctx->parent_base;
struct url *url;
if (!base)
{
DEBUGP (("%s: no base, merge will use \"%s\".\n",
ctx->document_file, link_uri));
if (!link_has_scheme)
{
/* Base URL is unavailable, and the link does not have a
location attached to it -- we have to give up. Since
this can only happen when using `--force-html -i', print
a warning. */
logprintf (LOG_NOTQUIET,
_("%s: Cannot resolve incomplete link %s.\n"),
ctx->document_file, link_uri);
return NULL;
}
url = url_parse (link_uri, NULL);
if (!url)
{
DEBUGP (("%s: link \"%s\" doesn't parse.\n",
ctx->document_file, link_uri));
return NULL;
}
}
else
{
/* Merge BASE with LINK_URI, but also make sure the result is
canonicalized, i.e. that "../" have been resolved.
(parse_url will do that for us.) */
char *complete_uri = uri_merge (base, link_uri);
DEBUGP (("%s: merge(\"%s\", \"%s\") -> %s\n",
ctx->document_file, base, link_uri, complete_uri));
url = url_parse (complete_uri, NULL);
if (!url)
{
DEBUGP (("%s: merged link \"%s\" doesn't parse.\n",
ctx->document_file, complete_uri));
xfree (complete_uri);
return NULL;
}
xfree (complete_uri);
}
DEBUGP (("appending \"%s\" to urlpos.\n", url->url));
newel = (struct urlpos *)xmalloc (sizeof (struct urlpos));
memset (newel, 0, sizeof (*newel));
newel->next = NULL;
newel->url = url;
newel->pos = tag->attrs[attrind].value_raw_beginning - ctx->text;
newel->size = tag->attrs[attrind].value_raw_size;
/* A URL is relative if the host is not named, and the name does not
start with `/'. */
if (!link_has_scheme && *link_uri != '/')
newel->link_relative_p = 1;
else if (link_has_scheme)
newel->link_complete_p = 1;
if (ctx->tail)
{
ctx->tail->next = newel;
ctx->tail = newel;
}
else
ctx->tail = ctx->head = newel;
return newel;
}
/* All the tag_* functions are called from collect_tags_mapper, as
specified by KNOWN_TAGS. */
/* Default tag handler: collect URLs from attributes specified for
this tag by tag_url_attributes. */
static void
tag_find_urls (int tagid, struct taginfo *tag, struct map_context *ctx)
{
int i, attrind;
int first = -1;
for (i = 0; i < countof (tag_url_attributes); i++)
if (tag_url_attributes[i].tagid == tagid)
{
/* We've found the index of tag_url_attributes where the
attributes of our tag begin. */
first = i;
break;
}
assert (first != -1);
/* Loop over the "interesting" attributes of this tag. In this
example, it will loop over "src" and "lowsrc".
<img src="foo.png" lowsrc="bar.png">
This has to be done in the outer loop so that the attributes are
processed in the same order in which they appear in the page.
This is required when converting links. */
for (attrind = 0; attrind < tag->nattrs; attrind++)
{
/* Find whether TAG/ATTRIND is a combination that contains a
URL. */
char *link = tag->attrs[attrind].value;
const int size = countof (tag_url_attributes);
/* If you're cringing at the inefficiency of the nested loops,
remember that they both iterate over a very small number of
items. The worst-case inner loop is for the IMG tag, which
has three attributes. */
for (i = first; i < size && tag_url_attributes[i].tagid == tagid; i++)
{
if (0 == strcasecmp (tag->attrs[attrind].name,
tag_url_attributes[i].attr_name))
{
struct urlpos *up = append_url (link, tag, attrind, ctx);
if (up)
{
int flags = tag_url_attributes[i].flags;
if (flags & ATTR_INLINE)
up->link_inline_p = 1;
if (flags & ATTR_HTML)
up->link_expect_html = 1;
}
}
}
}
}
/* Handle the BASE tag, for <base href=...>. */
static void
tag_handle_base (int tagid, struct taginfo *tag, struct map_context *ctx)
{
struct urlpos *base_urlpos;
int attrind;
char *newbase = find_attr (tag, "href", &attrind);
if (!newbase)
return;
base_urlpos = append_url (newbase, tag, attrind, ctx);
if (!base_urlpos)
return;
base_urlpos->ignore_when_downloading = 1;
base_urlpos->link_base_p = 1;
if (ctx->base)
xfree (ctx->base);
if (ctx->parent_base)
ctx->base = uri_merge (ctx->parent_base, newbase);
else
ctx->base = xstrdup (newbase);
}
/* Mark the URL found in <form action=...> for conversion. */
static void
tag_handle_form (int tagid, struct taginfo *tag, struct map_context *ctx)
{
int attrind;
char *action = find_attr (tag, "action", &attrind);
if (action)
{
struct urlpos *up = append_url (action, tag, attrind, ctx);
if (up)
up->ignore_when_downloading = 1;
}
}
/* Handle the LINK tag. It requires special handling because how its
links will be followed in -p mode depends on the REL attribute. */
static void
tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx)
{
int attrind;
char *href = find_attr (tag, "href", &attrind);
/* All <link href="..."> link references are external, except those
known not to be, such as style sheet and shortcut icon:
<link rel="stylesheet" href="...">
<link rel="shortcut icon" href="...">
*/
if (href)
{
struct urlpos *up = append_url (href, tag, attrind, ctx);
if (up)
{
char *rel = find_attr (tag, "rel", NULL);
if (rel
&& (0 == strcasecmp (rel, "stylesheet")
|| 0 == strcasecmp (rel, "shortcut icon")))
up->link_inline_p = 1;
}
}
}
/* Handle the META tag. This requires special handling because of the
refresh feature and because of robot exclusion. */
static void
tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx)
{
char *name = find_attr (tag, "name", NULL);
char *http_equiv = find_attr (tag, "http-equiv", NULL);
if (http_equiv && 0 == strcasecmp (http_equiv, "refresh"))
{
/* Some pages use a META tag to specify that the page be
refreshed by a new page after a given number of seconds. The
general format for this is:
<meta http-equiv=Refresh content="NUMBER; URL=index2.html">
So we just need to skip past the "NUMBER; URL=" garbage to
get to the URL. */
struct urlpos *entry;
int attrind;
int timeout = 0;
char *p;
char *refresh = find_attr (tag, "content", &attrind);
if (!refresh)
return;
for (p = refresh; ISDIGIT (*p); p++)
timeout = 10 * timeout + *p - '0';
if (*p++ != ';')
return;
while (ISSPACE (*p))
++p;
if (!( TOUPPER (*p) == 'U'
&& TOUPPER (*(p + 1)) == 'R'
&& TOUPPER (*(p + 2)) == 'L'
&& *(p + 3) == '='))
return;
p += 4;
while (ISSPACE (*p))
++p;
entry = append_url (p, tag, attrind, ctx);
if (entry)
{
entry->link_refresh_p = 1;
entry->refresh_timeout = timeout;
entry->link_expect_html = 1;
}
}
else if (name && 0 == strcasecmp (name, "robots"))
{
/* Handle stuff like:
<meta name="robots" content="index,nofollow"> */
char *content = find_attr (tag, "content", NULL);
if (!content)
return;
if (!strcasecmp (content, "none"))
ctx->nofollow = 1;
else
{
while (*content)
{
/* Find the next occurrence of ',' or the end of
the string. */
char *end = strchr (content, ',');
if (end)
++end;
else
end = content + strlen (content);
if (!strncasecmp (content, "nofollow", end - content))
ctx->nofollow = 1;
content = end;
}
}
}
}
/* Dispatch the tag handler appropriate for the tag we're mapping
over. See known_tags[] for definition of tag handlers. */
static void
collect_tags_mapper (struct taginfo *tag, void *arg)
{
struct map_context *ctx = (struct map_context *)arg;
/* Find the tag in our table of tags. This must not fail because
map_html_tags only returns tags found in interesting_tags. */
struct known_tag *t = hash_table_get (interesting_tags, tag->name);
assert (t != NULL);
t->handler (t->tagid, tag, ctx);
}
/* Analyze HTML tags FILE and construct a list of URLs referenced from
it. It merges relative links in FILE with URL. It is aware of
<base href=...> and does the right thing. */
struct urlpos *
get_urls_html (const char *file, const char *url, int *meta_disallow_follow)
{
struct file_memory *fm;
struct map_context ctx;
int flags;
/* Load the file. */
fm = read_file (file);
if (!fm)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
return NULL;
}
DEBUGP (("Loaded %s (size %ld).\n", file, fm->length));
ctx.text = fm->content;
ctx.head = ctx.tail = NULL;
ctx.base = NULL;
ctx.parent_base = url ? url : opt.base_href;
ctx.document_file = file;
ctx.nofollow = 0;
if (!interesting_tags)
init_interesting ();
/* Specify MHT_TRIM_VALUES because of buggy HTML generators that
generate <a href=" foo"> instead of <a href="foo"> (Netscape
ignores spaces as well.) If you really mean space, use &32; or
%20. */
flags = MHT_TRIM_VALUES;
if (opt.strict_comments)
flags |= MHT_STRICT_COMMENTS;
map_html_tags (fm->content, fm->length, collect_tags_mapper, &ctx, flags,
interesting_tags, interesting_attributes);
DEBUGP (("no-follow in %s: %d\n", file, ctx.nofollow));
if (meta_disallow_follow)
*meta_disallow_follow = ctx.nofollow;
FREE_MAYBE (ctx.base);
read_file_free (fm);
return ctx.head;
}
/* This doesn't really have anything to do with HTML, but it's similar
to get_urls_html, so we put it here. */
struct urlpos *
get_urls_file (const char *file)
{
struct file_memory *fm;
struct urlpos *head, *tail;
const char *text, *text_end;
/* Load the file. */
fm = read_file (file);
if (!fm)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
return NULL;
}
DEBUGP (("Loaded %s (size %ld).\n", file, fm->length));
head = tail = NULL;
text = fm->content;
text_end = fm->content + fm->length;
while (text < text_end)
{
int up_error_code;
char *url_text;
struct urlpos *entry;
struct url *url;
const char *line_beg = text;
const char *line_end = memchr (text, '\n', text_end - text);
if (!line_end)
line_end = text_end;
else
++line_end;
text = line_end;
/* Strip whitespace from the beginning and end of line. */
while (line_beg < line_end && ISSPACE (*line_beg))
++line_beg;
while (line_end > line_beg && ISSPACE (*(line_end - 1)))
--line_end;
if (line_beg == line_end)
continue;
/* The URL is in the [line_beg, line_end) region. */
/* We must copy the URL to a zero-terminated string, and we
can't use alloca because we're in a loop. *sigh*. */
url_text = strdupdelim (line_beg, line_end);
if (opt.base_href)
{
/* Merge opt.base_href with URL. */
char *merged = uri_merge (opt.base_href, url_text);
xfree (url_text);
url_text = merged;
}
url = url_parse (url_text, &up_error_code);
if (!url)
{
logprintf (LOG_NOTQUIET, "%s: Invalid URL %s: %s\n",
file, url_text, url_error (up_error_code));
xfree (url_text);
continue;
}
xfree (url_text);
entry = (struct urlpos *)xmalloc (sizeof (struct urlpos));
memset (entry, 0, sizeof (*entry));
entry->next = NULL;
entry->url = url;
if (!head)
head = entry;
else
tail->next = entry;
tail = entry;
}
read_file_free (fm);
return head;
}
void
cleanup_html_url (void)
{
FREE_MAYBE (interesting_tags);
FREE_MAYBE (interesting_attributes);
}
--- NEW FILE: getopt.c ---
/* Getopt for GNU.
NOTE: The canonical source of this file is maintained with the GNU
C Library. Bugs can be reported to bug-glibc at gnu.org.
Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99
Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
[...1037 lines suppressed...]
case '?':
break;
default:
printf ("?? getopt returned character code 0%o ??\n", c);
}
}
if (optind < argc)
{
printf ("non-option ARGV-elements: ");
while (optind < argc)
printf ("%s ", argv[optind++]);
printf ("\n");
}
exit (0);
}
#endif /* TEST */
--- NEW FILE: html-parse.c ---
/* HTML parser for Wget.
Copyright (C) 1998, 2000, 2003 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
[...1008 lines suppressed...]
int size = 256;
char *x = (char *)xmalloc (size);
int length = 0;
int read_count;
int tag_counter = 0;
while ((read_count = fread (x + length, 1, size - length, stdin)))
{
length += read_count;
size <<= 1;
x = (char *)xrealloc (x, size);
}
map_html_tags (x, length, test_mapper, &tag_counter, 0, NULL, NULL);
printf ("TAGS: %d\n", tag_counter);
printf ("Tag backouts: %d\n", tag_backout_count);
printf ("Comment backouts: %d\n", comment_backout_count);
return 0;
}
#endif /* STANDALONE */
--- NEW FILE: progress.h ---
/* Download progress.
Copyright (C) 2001 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
\(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef PROGRESS_H
#define PROGRESS_H
int valid_progress_implementation_p PARAMS ((const char *));
void set_progress_implementation PARAMS ((const char *));
void progress_schedule_redirect PARAMS ((void));
void *progress_create PARAMS ((long, long));
void progress_update PARAMS ((void *, long, double));
void progress_finish PARAMS ((void *, double));
RETSIGTYPE progress_handle_sigwinch PARAMS ((int));
#endif /* PROGRESS_H */
--- NEW FILE: getopt.h ---
/* Declarations for getopt.
Copyright (C) 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
#ifndef _GETOPT_H
#define _GETOPT_H 1
#ifdef __cplusplus
extern "C" {
#endif
/* For communication from `getopt' to the caller.
When `getopt' finds an option that takes an argument,
the argument value is returned here.
Also, when `ordering' is RETURN_IN_ORDER,
each non-option ARGV-element is returned here. */
extern char *optarg;
/* Index in ARGV of the next element to be scanned.
This is used for communication to and from the caller
and for communication between successive calls to `getopt'.
On entry to `getopt', zero means this is the first call; initialize.
When `getopt' returns EOF, this is the index of the first of the
non-option elements that the caller should itself scan.
Otherwise, `optind' communicates from one call to the next
how much of ARGV has been scanned so far. */
extern int optind;
/* Callers store zero here to inhibit the error message `getopt' prints
for unrecognized options. */
extern int opterr;
/* Set to an option character which was unrecognized. */
extern int optopt;
/* Describe the long-named options requested by the application.
The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
of `struct option' terminated by an element containing a name which is
zero.
The field `has_arg' is:
no_argument (or 0) if the option does not take an argument,
required_argument (or 1) if the option requires an argument,
optional_argument (or 2) if the option takes an optional argument.
If the field `flag' is not NULL, it points to a variable that is set
to the value given in the field `val' when the option is found, but
left unchanged if the option is not found.
To have a long-named option do something other than set an `int' to
a compiled-in constant, such as set a value from `optarg', set the
option's `flag' field to zero and its `val' field to a nonzero
value (the equivalent single-letter option character, if there is
one). For long options that have a zero `flag' field, `getopt'
returns the contents of the `val' field. */
struct option
{
#if __STDC__
const char *name;
#else
char *name;
#endif
/* has_arg can't be an enum because some compilers complain about
type mismatches in all the code that assumes it is an int. */
int has_arg;
int *flag;
int val;
};
/* Names for the values of the `has_arg' field of `struct option'. */
#define no_argument 0
#define required_argument 1
#define optional_argument 2
#if __STDC__
#if defined(__GNU_LIBRARY__)
/* Many other libraries have conflicting prototypes for getopt, with
differences in the consts, in stdlib.h. To avoid compilation
errors, only prototype getopt for the GNU C library. */
extern int getopt (int argc, char *const *argv, const char *shortopts);
#else /* not __GNU_LIBRARY__ */
extern int getopt ();
#endif /* not __GNU_LIBRARY__ */
extern int getopt_long (int argc, char *const *argv, const char *shortopts,
const struct option *longopts, int *longind);
extern int getopt_long_only (int argc, char *const *argv,
const char *shortopts,
const struct option *longopts, int *longind);
/* Internal only. Users should not call this directly. */
extern int _getopt_internal (int argc, char *const *argv,
const char *shortopts,
const struct option *longopts, int *longind,
int long_only);
#else /* not __STDC__ */
extern int getopt ();
extern int getopt_long ();
extern int getopt_long_only ();
extern int _getopt_internal ();
#endif /* not __STDC__ */
#ifdef __cplusplus
}
#endif
#endif /* _GETOPT_H */
--- NEW FILE: headers.c ---
/* Generic support for headers.
Copyright (C) 1997, 1998 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#include "wget.h"
#include "connect.h"
#include "rbuf.h"
#include "headers.h"
/* This file contains the generic routines for work with headers.
Currently they are used only by HTTP in http.c, but they can be
used by anything that cares about RFC822-style headers.
Header is defined in RFC2068, as quoted below. Note that this
definition is not HTTP-specific -- it is virtually
indistinguishable from the one given in RFC822 or RFC1036.
message-header = field-name ":" [ field-value ] CRLF
field-name = token
field-value = *( field-content | LWS )
field-content = <the OCTETs making up the field-value
and consisting of either *TEXT or combinations
of token, tspecials, and quoted-string>
The public functions are header_get() and header_process(), which
see. */
/* Get a header from read-buffer RBUF and return it in *HDR.
As defined in RFC2068 and elsewhere, a header can be folded into
multiple lines if the continuation line begins with a space or
horizontal TAB. Also, this function will accept a header ending
with just LF instead of CRLF.
The header may be of arbitrary length; the function will allocate
as much memory as necessary for it to fit. It need not contain a
`:', thus you can use it to retrieve, say, HTTP status line.
All trailing whitespace is stripped from the header, and it is
zero-terminated. */
int
header_get (struct rbuf *rbuf, char **hdr, enum header_get_flags flags)
{
int i;
int bufsize = 80;
*hdr = (char *)xmalloc (bufsize);
for (i = 0; 1; i++)
{
int res;
/* #### Use DO_REALLOC? */
if (i > bufsize - 1)
*hdr = (char *)xrealloc (*hdr, (bufsize <<= 1));
res = RBUF_READCHAR (rbuf, *hdr + i);
if (res == 1)
{
if ((*hdr)[i] == '\n')
{
if (!((flags & HG_NO_CONTINUATIONS)
|| i == 0
|| (i == 1 && (*hdr)[0] == '\r')))
{
char next;
/* If the header is non-empty, we need to check if
it continues on to the other line. We do that by
peeking at the next character. */
res = rbuf_peek (rbuf, &next);
if (res == 0)
return HG_EOF;
else if (res == -1)
return HG_ERROR;
/* If the next character is HT or SP, just continue. */
if (next == '\t' || next == ' ')
continue;
}
/* Strip trailing whitespace. (*hdr)[i] is the newline;
decrement I until it points to the last available
whitespace. */
while (i > 0 && ISSPACE ((*hdr)[i - 1]))
--i;
(*hdr)[i] = '\0';
break;
}
}
else if (res == 0)
return HG_EOF;
else
return HG_ERROR;
}
DEBUGP (("%s\n", *hdr));
return HG_OK;
}
/* Check whether HEADER begins with NAME and, if yes, skip the `:' and
the whitespace, and call PROCFUN with the arguments of HEADER's
contents (after the `:' and space) and ARG. Otherwise, return 0. */
int
header_process (const char *header, const char *name,
int (*procfun) (const char *, void *),
void *arg)
{
/* Check whether HEADER matches NAME. */
while (*name && (TOLOWER (*name) == TOLOWER (*header)))
++name, ++header;
if (*name || *header++ != ':')
return 0;
header += skip_lws (header);
return ((*procfun) (header, arg));
}
/* Helper functions for use with header_process(). */
/* Extract a long integer from HEADER and store it to CLOSURE. If an
error is encountered, return 0, else 1. */
int
header_extract_number (const char *header, void *closure)
{
const char *p = header;
long result;
for (result = 0; ISDIGIT (*p); p++)
result = 10 * result + (*p - '0');
/* Failure if no number present. */
if (p == header)
return 0;
/* Skip trailing whitespace. */
p += skip_lws (p);
/* Indicate failure if trailing garbage is present. */
if (*p)
return 0;
*(long *)closure = result;
return 1;
}
/* Strdup HEADER, and place the pointer to CLOSURE. */
int
header_strdup (const char *header, void *closure)
{
*(char **)closure = xstrdup (header);
return 1;
}
/* Write the value 1 into the integer pointed to by CLOSURE. */
int
header_exists (const char *header, void *closure)
{
*(int *)closure = 1;
return 1;
}
/* Skip LWS (linear white space), if present. Returns number of
characters to skip. */
int
skip_lws (const char *string)
{
const char *p = string;
while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
++p;
return p - string;
}
--- NEW FILE: main.c ---
/* Command line parsing.
Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002
Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include <sys/types.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif /* HAVE_STRING_H */
#ifdef HAVE_SIGNAL_H
# include <signal.h>
#endif
#ifdef HAVE_NLS
#ifdef HAVE_LOCALE_H
# include <locale.h>
#endif /* HAVE_LOCALE_H */
#endif /* HAVE_NLS */
#include <errno.h>
#ifndef errno
extern int errno;
#endif
#include "wget.h"
#include "utils.h"
#include "init.h"
#include "retr.h"
#include "recur.h"
#include "host.h"
#include "cookies.h"
#include "url.h"
#include "progress.h" /* for progress_handle_sigwinch */
#include "convert.h"
#ifdef HAVE_SSL
# include "gen_sslfunc.h"
#endif
/* On GNU system this will include system-wide getopt.h. */
#include "getopt.h"
#ifndef PATH_SEPARATOR
# define PATH_SEPARATOR '/'
#endif
struct options opt;
extern LARGE_INT total_downloaded_bytes;
extern char *version_string;
extern struct cookie_jar *wget_cookie_jar;
/* From log.c. */
void log_init PARAMS ((const char *, int));
void log_close PARAMS ((void));
void log_request_redirect_output PARAMS ((const char *));
static RETSIGTYPE redirect_output_signal PARAMS ((int));
const char *exec_name;
/* Initialize I18N. The initialization amounts to invoking
setlocale(), bindtextdomain() and textdomain().
Does nothing if NLS is disabled or missing. */
static void
i18n_initialize (void)
{
/* If HAVE_NLS is defined, assume the existence of the three
functions invoked here. */
#ifdef HAVE_NLS
/* Set the current locale. */
/* Here we use LC_MESSAGES instead of LC_ALL, for two reasons.
First, message catalogs are all of I18N Wget uses anyway.
Second, setting LC_ALL has a dangerous potential of messing
things up. For example, when in a foreign locale, Solaris
strptime() fails to handle international dates correctly, which
makes http_atotm() malfunction. */
#ifdef LC_MESSAGES
setlocale (LC_MESSAGES, "");
setlocale (LC_CTYPE, "");
#else
setlocale (LC_ALL, "");
#endif
/* Set the text message domain. */
bindtextdomain ("wget", LOCALEDIR);
textdomain ("wget");
#endif /* HAVE_NLS */
}
/* Print the usage message. */
static void
print_usage (void)
{
printf (_("Usage: %s [OPTION]... [URL]...\n"), exec_name);
}
/* Print the help message, describing all the available options. If
you add an option, be sure to update this list. */
static void
print_help (void)
{
printf (_("GNU Wget %s, a non-interactive network retriever.\n"),
version_string);
print_usage ();
/* Had to split this in parts, so the #@@#%# Ultrix compiler and cpp
don't bitch. Also, it makes translation much easier. */
fputs (_("\
\n\
Mandatory arguments to long options are mandatory for short options too.\n\
\n"), stdout);
fputs (_("\
Startup:\n\
-V, --version display the version of Wget and exit.\n\
-h, --help print this help.\n\
-b, --background go to background after startup.\n\
-e, --execute=COMMAND execute a `.wgetrc\'-style command.\n\
\n"), stdout);
fputs (_("\
Logging and input file:\n\
-o, --output-file=FILE log messages to FILE.\n\
-a, --append-output=FILE append messages to FILE.\n\
-d, --debug print debug output.\n\
-q, --quiet quiet (no output).\n\
-v, --verbose be verbose (this is the default).\n\
-nv, --non-verbose turn off verboseness, without being quiet.\n\
-i, --input-file=FILE download URLs found in FILE.\n\
-F, --force-html treat input file as HTML.\n\
-B, --base=URL prepends URL to relative links in -F -i file.\n\
\n"),stdout);
fputs (_("\
Download:\n\
-t, --tries=NUMBER set number of retries to NUMBER (0 unlimits).\n\
--retry-connrefused retry even if connection is refused.\n\
-O --output-document=FILE write documents to FILE.\n\
-nc, --no-clobber don\'t clobber existing files or use .# suffixes.\n\
-c, --continue resume getting a partially-downloaded file.\n\
--progress=TYPE select progress gauge type.\n\
-N, --timestamping don\'t re-retrieve files unless newer than local.\n\
-S, --server-response print server response.\n\
--spider don\'t download anything.\n\
-T, --timeout=SECONDS set all timeout values to SECONDS.\n\
--dns-timeout=SECS set the DNS lookup timeout to SECS.\n\
--connect-timeout=SECS set the connect timeout to SECS.\n\
--read-timeout=SECS set the read timeout to SECS.\n\
-w, --wait=SECONDS wait SECONDS between retrievals.\n\
--waitretry=SECONDS wait 1...SECONDS between retries of a retrieval.\n\
--random-wait wait from 0...2*WAIT secs between retrievals.\n\
-Y, --proxy=on/off turn proxy on or off.\n\
-Q, --quota=NUMBER set retrieval quota to NUMBER.\n\
--bind-address=ADDRESS bind to ADDRESS (hostname or IP) on local host.\n\
--limit-rate=RATE limit download rate to RATE.\n\
--dns-cache=off disable caching DNS lookups.\n\
--restrict-file-names=OS restrict chars in file names to ones OS allows.\n\
\n"), stdout);
fputs (_("\
Directories:\n\
-nd, --no-directories don\'t create directories.\n\
-x, --force-directories force creation of directories.\n\
-nH, --no-host-directories don\'t create host directories.\n\
-P, --directory-prefix=PREFIX save files to PREFIX/...\n\
--cut-dirs=NUMBER ignore NUMBER remote directory components.\n\
\n"), stdout);
fputs (_("\
HTTP options:\n\
--http-user=USER set http user to USER.\n\
--http-passwd=PASS set http password to PASS.\n\
-C, --cache=on/off (dis)allow server-cached data (normally allowed).\n\
-E, --html-extension save all text/html documents with .html extension.\n\
--ignore-length ignore `Content-Length\' header field.\n\
--header=STRING insert STRING among the headers.\n\
--proxy-user=USER set USER as proxy username.\n\
--proxy-passwd=PASS set PASS as proxy password.\n\
--referer=URL include `Referer: URL\' header in HTTP request.\n\
-s, --save-headers save the HTTP headers to file.\n\
-U, --user-agent=AGENT identify as AGENT instead of Wget/VERSION.\n\
--no-http-keep-alive disable HTTP keep-alive (persistent connections).\n\
--cookies=off don't use cookies.\n\
--load-cookies=FILE load cookies from FILE before session.\n\
--save-cookies=FILE save cookies to FILE after session.\n\
--post-data=STRING use the POST method; send STRING as the data.\n\
--post-file=FILE use the POST method; send contents of FILE.\n\
\n"), stdout);
#ifdef HAVE_SSL
fputs (_("\
HTTPS (SSL) options:\n\
--sslcertfile=FILE optional client certificate.\n\
--sslcertkey=KEYFILE optional keyfile for this certificate.\n\
--egd-file=FILE file name of the EGD socket.\n\
--sslcadir=DIR dir where hash list of CA's are stored.\n\
--sslcafile=FILE file with bundle of CA's\n\
--sslcerttype=0/1 Client-Cert type 0=PEM (default) / 1=ASN1 (DER)\n\
--sslcheckcert=0/1 Check the server cert agenst given CA\n\
--sslprotocol=0-3 choose SSL protocol; 0=automatic,\n\
1=SSLv2 2=SSLv3 3=TLSv1\n\
\n"), stdout);
#endif
fputs (_("\
FTP options:\n\
-nr, --dont-remove-listing don\'t remove `.listing\' files.\n\
-g, --glob=on/off turn file name globbing on or off.\n\
--passive-ftp use the \"passive\" transfer mode.\n\
--retr-symlinks when recursing, get linked-to files (not dirs).\n\
\n"), stdout);
fputs (_("\
Recursive retrieval:\n\
-r, --recursive recursive download.\n\
-l, --level=NUMBER maximum recursion depth (inf or 0 for infinite).\n\
--delete-after delete files locally after downloading them.\n\
-k, --convert-links convert non-relative links to relative.\n\
-K, --backup-converted before converting file X, back up as X.orig.\n\
-m, --mirror shortcut option equivalent to -r -N -l inf -nr.\n\
-p, --page-requisites get all images, etc. needed to display HTML page.\n\
--strict-comments turn on strict (SGML) handling of HTML comments.\n\
\n"), stdout);
fputs (_("\
Recursive accept/reject:\n\
-A, --accept=LIST comma-separated list of accepted extensions.\n\
-R, --reject=LIST comma-separated list of rejected extensions.\n\
-D, --domains=LIST comma-separated list of accepted domains.\n\
--exclude-domains=LIST comma-separated list of rejected domains.\n\
--follow-ftp follow FTP links from HTML documents.\n\
--follow-tags=LIST comma-separated list of followed HTML tags.\n\
-G, --ignore-tags=LIST comma-separated list of ignored HTML tags.\n\
-H, --span-hosts go to foreign hosts when recursive.\n\
-L, --relative follow relative links only.\n\
-I, --include-directories=LIST list of allowed directories.\n\
-X, --exclude-directories=LIST list of excluded directories.\n\
-np, --no-parent don\'t ascend to the parent directory.\n\
\n"), stdout);
fputs (_("Mail bug reports and suggestions to <bug-wget at gnu.org>.\n"),
stdout);
}
int
main (int argc, char *const *argv)
{
char **url, **t;
int i, c, nurl, status, append_to_log;
int erase_on_fail = 0;
static struct option long_options[] =
{
/* Options without arguments: */
{ "background", no_argument, NULL, 'b' },
{ "backup-converted", no_argument, NULL, 'K' },
{ "continue", no_argument, NULL, 'c' },
{ "convert-links", no_argument, NULL, 'k' },
{ "debug", no_argument, NULL, 'd' },
{ "delete-after", no_argument, NULL, 136 },
{ "dont-remove-listing", no_argument, NULL, 149 },
{ "follow-ftp", no_argument, NULL, 142 },
{ "force-directories", no_argument, NULL, 'x' },
{ "force-hier", no_argument, NULL, 'x' }, /* obsolete */
{ "force-html", no_argument, NULL, 'F'},
{ "help", no_argument, NULL, 'h' },
{ "html-extension", no_argument, NULL, 'E' },
{ "ignore-length", no_argument, NULL, 138 },
{ "mirror", no_argument, NULL, 'm' },
{ "no-clobber", no_argument, NULL, 141 },
{ "no-directories", no_argument, NULL, 147 },
{ "no-host-directories", no_argument, NULL, 148 },
{ "no-host-lookup", no_argument, NULL, 150 },
{ "no-http-keep-alive", no_argument, NULL, 156 },
{ "no-parent", no_argument, NULL, 133 },
{ "non-verbose", no_argument, NULL, 146 },
{ "passive-ftp", no_argument, NULL, 139 },
{ "page-requisites", no_argument, NULL, 'p' },
{ "quiet", no_argument, NULL, 'q' },
{ "random-wait", no_argument, NULL, 165 },
{ "recursive", no_argument, NULL, 'r' },
{ "relative", no_argument, NULL, 'L' },
{ "retr-symlinks", no_argument, NULL, 137 },
{ "retry-connrefused", no_argument, NULL, 174 },
{ "save-headers", no_argument, NULL, 's' },
{ "server-response", no_argument, NULL, 'S' },
{ "span-hosts", no_argument, NULL, 'H' },
{ "spider", no_argument, NULL, 132 },
{ "strict-comments", no_argument, NULL, 177 },
{ "timestamping", no_argument, NULL, 'N' },
{ "verbose", no_argument, NULL, 'v' },
{ "version", no_argument, NULL, 'V' },
/* Options accepting an argument: */
{ "accept", required_argument, NULL, 'A' },
{ "append-output", required_argument, NULL, 'a' },
{ "backups", required_argument, NULL, 151 }, /* undocumented */
{ "base", required_argument, NULL, 'B' },
{ "bind-address", required_argument, NULL, 155 },
{ "cache", required_argument, NULL, 'C' },
{ "connect-timeout", required_argument, NULL, 180 },
{ "cookies", required_argument, NULL, 160 },
{ "cut-dirs", required_argument, NULL, 145 },
{ "dns-timeout", required_argument, NULL, 178 },
{ "directory-prefix", required_argument, NULL, 'P' },
{ "dns-cache", required_argument, NULL, 175 },
{ "domains", required_argument, NULL, 'D' },
{ "dot-style", required_argument, NULL, 134 },
{ "execute", required_argument, NULL, 'e' },
{ "exclude-directories", required_argument, NULL, 'X' },
{ "exclude-domains", required_argument, NULL, 140 },
{ "follow-tags", required_argument, NULL, 153 },
{ "glob", required_argument, NULL, 'g' },
{ "header", required_argument, NULL, 131 },
{ "htmlify", required_argument, NULL, 135 },
{ "http-passwd", required_argument, NULL, 130 },
{ "http-user", required_argument, NULL, 129 },
{ "ignore-tags", required_argument, NULL, 'G' },
{ "include-directories", required_argument, NULL, 'I' },
{ "input-file", required_argument, NULL, 'i' },
{ "level", required_argument, NULL, 'l' },
{ "limit-rate", required_argument, NULL, 164 },
{ "load-cookies", required_argument, NULL, 161 },
{ "no", required_argument, NULL, 'n' },
{ "output-document", required_argument, NULL, 'O' },
{ "output-file", required_argument, NULL, 'o' },
{ "post-data", required_argument, NULL, 167 },
{ "post-file", required_argument, NULL, 168 },
{ "progress", required_argument, NULL, 163 },
{ "proxy", required_argument, NULL, 'Y' },
{ "proxy-passwd", required_argument, NULL, 144 },
{ "proxy-user", required_argument, NULL, 143 },
{ "quota", required_argument, NULL, 'Q' },
{ "read-timeout", required_argument, NULL, 179 },
{ "reject", required_argument, NULL, 'R' },
{ "restrict-file-names", required_argument, NULL, 176 },
{ "save-cookies", required_argument, NULL, 162 },
{ "timeout", required_argument, NULL, 'T' },
{ "tries", required_argument, NULL, 't' },
{ "user-agent", required_argument, NULL, 'U' },
{ "referer", required_argument, NULL, 157 },
{ "use-proxy", required_argument, NULL, 'Y' },
#ifdef HAVE_SSL
{ "sslcertfile", required_argument, NULL, 158 },
{ "sslcertkey", required_argument, NULL, 159 },
{ "egd-file", required_argument, NULL, 166 },
{ "sslcadir", required_argument, NULL, 169},
{ "sslcafile", required_argument, NULL, 170},
{ "sslcerttype", required_argument, NULL, 171},
{ "sslcheckcert", required_argument, NULL, 172},
{ "sslprotocol", required_argument, NULL, 173},
#endif /* HAVE_SSL */
{ "wait", required_argument, NULL, 'w' },
{ "waitretry", required_argument, NULL, 152 },
/* Local option */
{ "erase-on-fail", no_argument, NULL, 'Z' },
{ 0, 0, 0, 0 }
};
i18n_initialize ();
append_to_log = 0;
/* Construct the name of the executable, without the directory part. */
exec_name = strrchr (argv[0], PATH_SEPARATOR);
if (!exec_name)
exec_name = argv[0];
else
++exec_name;
#ifdef WINDOWS
windows_main_junk (&argc, (char **) argv, (char **) &exec_name);
#endif
initialize (); /* sets option defaults; reads the system wgetrc and .wgetrc */
/* [Is the order of the option letters significant? If not, they should be
alphabetized, like the long_options. The only thing I know for sure is
that the options with required arguments must be followed by a ':'.
-- Dan Harkless <wget at harkless.org>] */
while ((c = getopt_long (argc, argv, "\
hpVqvdkKsxmNWrHSLcFbEY:G:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:C:Z",
long_options, (int *)0)) != EOF)
{
switch (c)
{
/* Options without arguments: */
case 132:
setoptval ("spider", "on");
break;
case 133:
setoptval ("noparent", "on");
break;
case 136:
setoptval ("deleteafter", "on");
break;
case 137:
setoptval ("retrsymlinks", "on");
break;
case 138:
setoptval ("ignorelength", "on");
break;
case 139:
setoptval ("passiveftp", "on");
break;
case 141:
setoptval ("noclobber", "on");
break;
case 142:
setoptval ("followftp", "on");
break;
case 145:
setoptval ("cutdirs", optarg);
break;
case 146:
setoptval ("verbose", "off");
break;
case 147:
setoptval ("dirstruct", "off");
break;
case 148:
setoptval ("addhostdir", "off");
break;
case 149:
setoptval ("removelisting", "off");
break;
case 155:
setoptval ("bindaddress", optarg);
break;
case 156:
setoptval ("httpkeepalive", "off");
break;
case 165:
setoptval ("randomwait", "on");
break;
case 'b':
setoptval ("background", "on");
break;
case 'c':
setoptval ("continue", "on");
break;
case 'd':
#ifdef ENABLE_DEBUG
setoptval ("debug", "on");
#else
fprintf (stderr, _("%s: debug support not compiled in.\n"),
exec_name);
#endif
break;
case 'E':
setoptval ("htmlextension", "on");
break;
case 'F':
setoptval ("forcehtml", "on");
break;
case 'H':
setoptval ("spanhosts", "on");
break;
case 'h':
print_help ();
#ifdef WINDOWS
ws_help (exec_name);
#endif
exit (0);
break;
case 'K':
setoptval ("backupconverted", "on");
break;
case 'k':
setoptval ("convertlinks", "on");
break;
case 'L':
setoptval ("relativeonly", "on");
break;
case 'm':
setoptval ("mirror", "on");
break;
case 'N':
setoptval ("timestamping", "on");
break;
case 'p':
setoptval ("pagerequisites", "on");
break;
case 'S':
setoptval ("serverresponse", "on");
break;
case 's':
setoptval ("saveheaders", "on");
break;
case 'q':
setoptval ("quiet", "on");
break;
case 'r':
setoptval ("recursive", "on");
break;
case 'V':
printf ("GNU Wget %s\n\n", version_string);
printf ("%s", _("\
Copyright (C) 2003 Free Software Foundation, Inc.\n"));
printf ("%s", _("\
This program is distributed in the hope that it will be useful,\n\
but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
GNU General Public License for more details.\n"));
printf (_("\nOriginally written by Hrvoje Niksic <hniksic at xemacs.org>.\n"));
exit (0);
break;
case 'v':
setoptval ("verbose", "on");
break;
case 'x':
setoptval ("dirstruct", "on");
break;
case 174:
setoptval ("retryconnrefused", "on");
break;
case 177:
setoptval ("strictcomments", "on");
break;
/* Options accepting an argument: */
case 129:
setoptval ("httpuser", optarg);
break;
case 130:
setoptval ("httppasswd", optarg);
break;
case 131:
setoptval ("header", optarg);
break;
case 134:
setoptval ("dotstyle", optarg);
break;
case 135:
setoptval ("htmlify", optarg);
break;
case 140:
setoptval ("excludedomains", optarg);
break;
case 143:
setoptval ("proxyuser", optarg);
break;
case 144:
setoptval ("proxypasswd", optarg);
break;
case 151:
setoptval ("backups", optarg);
break;
case 152:
setoptval ("waitretry", optarg);
break;
case 153:
setoptval ("followtags", optarg);
break;
case 160:
setoptval ("cookies", optarg);
break;
case 161:
setoptval ("loadcookies", optarg);
break;
case 162:
setoptval ("savecookies", optarg);
break;
case 163:
setoptval ("progress", optarg);
break;
case 164:
setoptval ("limitrate", optarg);
break;
case 157:
setoptval ("referer", optarg);
break;
#ifdef HAVE_SSL
case 158:
setoptval ("sslcertfile", optarg);
break;
case 159:
setoptval ("sslcertkey", optarg);
break;
case 166:
setoptval ("egdfile", optarg);
break;
case 169:
setoptval ("sslcadir", optarg);
break;
case 170:
setoptval ("sslcafile", optarg);
break;
case 171:
setoptval ("sslcerttype", optarg);
break;
case 172:
setoptval ("sslcheckcert", optarg);
break;
case 173:
setoptval ("sslprotocol", optarg);
break;
#endif /* HAVE_SSL */
case 167:
setoptval ("postdata", optarg);
break;
case 168:
setoptval ("postfile", optarg);
break;
case 175:
setoptval ("dnscache", optarg);
break;
case 176:
setoptval ("restrictfilenames", optarg);
break;
case 178:
setoptval ("dnstimeout", optarg);
break;
case 179:
setoptval ("readtimeout", optarg);
break;
case 180:
setoptval ("connecttimeout", optarg);
break;
case 'A':
setoptval ("accept", optarg);
break;
case 'a':
setoptval ("logfile", optarg);
append_to_log = 1;
break;
case 'B':
setoptval ("base", optarg);
break;
case 'C':
setoptval ("cache", optarg);
break;
case 'D':
setoptval ("domains", optarg);
break;
case 'e':
run_command (optarg);
break;
case 'G':
setoptval ("ignoretags", optarg);
break;
case 'g':
setoptval ("glob", optarg);
break;
case 'I':
setoptval ("includedirectories", optarg);
break;
case 'i':
setoptval ("input", optarg);
break;
case 'l':
setoptval ("reclevel", optarg);
break;
case 'n':
{
/* #### What we really want here is --no-foo. */
char *p;
for (p = optarg; *p; p++)
switch (*p)
{
case 'v':
setoptval ("verbose", "off");
break;
case 'H':
setoptval ("addhostdir", "off");
break;
case 'd':
setoptval ("dirstruct", "off");
break;
case 'c':
setoptval ("noclobber", "on");
break;
case 'r':
setoptval ("removelisting", "off");
break;
case 'p':
setoptval ("noparent", "on");
break;
case 'k':
setoptval ("httpkeepalive", "off");
break;
default:
printf (_("%s: illegal option -- `-n%c'\n"), exec_name, *p);
print_usage ();
printf ("\n");
printf (_("Try `%s --help\' for more options.\n"), exec_name);
exit (1);
}
break;
}
case 'O':
setoptval ("outputdocument", optarg);
break;
case 'o':
setoptval ("logfile", optarg);
break;
case 'P':
setoptval ("dirprefix", optarg);
break;
case 'Q':
setoptval ("quota", optarg);
break;
case 'R':
setoptval ("reject", optarg);
break;
case 'T':
setoptval ("timeout", optarg);
break;
case 't':
setoptval ("tries", optarg);
break;
case 'U':
setoptval ("useragent", optarg);
break;
case 'w':
setoptval ("wait", optarg);
break;
case 'X':
setoptval ("excludedirectories", optarg);
break;
case 'Y':
setoptval ("useproxy", optarg);
break;
case 'Z':
erase_on_fail = 1;
break;
case '?':
print_usage ();
printf ("\n");
printf (_("Try `%s --help' for more options.\n"), exec_name);
exit (0);
break;
}
}
/* All user options have now been processed, so it's now safe to do
interoption dependency checks. */
if (opt.reclevel == 0)
opt.reclevel = INFINITE_RECURSION; /* see wget.h for commentary on this */
if (opt.page_requisites && !opt.recursive)
{
opt.recursive = TRUE;
opt.reclevel = 0;
if (!opt.no_dirstruct)
opt.dirstruct = TRUE; /* usually handled by cmd_spec_recursive() */
}
if (opt.verbose == -1)
opt.verbose = !opt.quiet;
/* Sanity checks. */
if (opt.verbose && opt.quiet)
{
printf (_("Can't be verbose and quiet at the same time.\n"));
print_usage ();
exit (1);
}
if (opt.timestamping && opt.noclobber)
{
printf (_("\
Can't timestamp and not clobber old files at the same time.\n"));
print_usage ();
exit (1);
}
nurl = argc - optind;
if (!nurl && !opt.input_filename)
{
/* No URL specified. */
printf (_("%s: missing URL\n"), exec_name);
print_usage ();
printf ("\n");
/* #### Something nicer should be printed here -- similar to the
pre-1.5 `--help' page. */
printf (_("Try `%s --help' for more options.\n"), exec_name);
exit (1);
}
if (opt.background)
fork_to_background ();
/* Initialize progress. Have to do this after the options are
processed so we know where the log file is. */
if (opt.verbose)
set_progress_implementation (opt.progress_type);
/* Fill in the arguments. */
url = alloca_array (char *, nurl + 1);
for (i = 0; i < nurl; i++, optind++)
{
char *rewritten = rewrite_shorthand_url (argv[optind]);
if (rewritten)
url[i] = rewritten;
else
url[i] = xstrdup (argv[optind]);
}
url[i] = NULL;
/* Change the title of console window on Windows. #### I think this
statement should belong to retrieve_url(). --hniksic. */
#ifdef WINDOWS
ws_changetitle (*url, nurl);
#endif
/* Initialize logging. */
log_init (opt.lfilename, append_to_log);
DEBUGP (("DEBUG output created by Wget %s on %s.\n\n", version_string,
OS_TYPE));
/* Open the output filename if necessary. */
if (opt.output_document)
{
if (HYPHENP (opt.output_document))
opt.dfp = stdout;
else
{
struct stat st;
opt.dfp = fopen (opt.output_document, opt.always_rest ? "ab" : "wb");
if (opt.dfp == NULL)
{
perror (opt.output_document);
exit (1);
}
if (fstat (fileno (opt.dfp), &st) == 0 && S_ISREG (st.st_mode))
opt.od_known_regular = 1;
}
}
#ifdef WINDOWS
ws_startup ();
#endif
/* Setup the signal handler to redirect output when hangup is
received. */
#ifdef HAVE_SIGNAL
if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
signal(SIGHUP, redirect_output_signal);
/* ...and do the same for SIGUSR1. */
signal (SIGUSR1, redirect_output_signal);
/* Writing to a closed socket normally signals SIGPIPE, and the
process exits. What we want is to ignore SIGPIPE and just check
for the return value of write(). */
signal (SIGPIPE, SIG_IGN);
#ifdef SIGWINCH
signal (SIGWINCH, progress_handle_sigwinch);
#endif
#endif /* HAVE_SIGNAL */
#ifdef HAVE_SSL
/* Must call this before resolving any URLs because it has the power
to disable `https'. */
ssl_init_prng ();
#endif
status = RETROK; /* initialize it, just-in-case */
/* Retrieve the URLs from argument list. */
for (t = url; *t; t++)
{
char *filename = NULL, *redirected_URL = NULL;
int dt;
if (opt.recursive && url_scheme (*t) != SCHEME_FTP)
status = retrieve_tree (*t);
else
status = retrieve_url (*t, &filename, &redirected_URL, NULL, &dt);
if (status != RETROK && erase_on_fail)
{
DEBUGP (("Removing file due to --erase-on-fail in main():\n"));
logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
if (unlink (filename))
logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
}
if (opt.delete_after && file_exists_p(filename))
{
DEBUGP (("Removing file due to --delete-after in main():\n"));
logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
if (unlink (filename))
logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
}
FREE_MAYBE (redirected_URL);
FREE_MAYBE (filename);
}
/* And then from the input file, if any. */
if (opt.input_filename)
{
int count;
status = retrieve_from_file (opt.input_filename, opt.force_html, &count);
if (!count)
logprintf (LOG_NOTQUIET, _("No URLs found in %s.\n"),
opt.input_filename);
}
/* Print the downloaded sum. */
if (opt.recursive
|| nurl > 1
|| (opt.input_filename && total_downloaded_bytes != 0))
{
logprintf (LOG_NOTQUIET,
_("\nFINISHED --%s--\nDownloaded: %s bytes in %d files\n"),
time_str (NULL), legible_large_int (total_downloaded_bytes),
opt.numurls);
/* Print quota warning, if exceeded. */
if (opt.quota && total_downloaded_bytes > opt.quota)
logprintf (LOG_NOTQUIET,
_("Download quota (%s bytes) EXCEEDED!\n"),
legible (opt.quota));
}
if (opt.cookies_output && wget_cookie_jar)
cookie_jar_save (wget_cookie_jar, opt.cookies_output);
if (opt.convert_links && !opt.delete_after)
convert_all_links ();
log_close ();
for (i = 0; i < nurl; i++)
xfree (url[i]);
cleanup ();
#ifdef DEBUG_MALLOC
print_malloc_debug_stats ();
#endif
if (status == RETROK)
return 0;
else
return 1;
}
#ifdef HAVE_SIGNAL
/* Hangup signal handler. When wget receives SIGHUP or SIGUSR1, it
will proceed operation as usual, trying to write into a log file.
If that is impossible, the output will be turned off.
#### It is unsafe to do call libc functions from a signal handler.
What we should do is, set a global variable, and have the code in
log.c pick it up. */
static RETSIGTYPE
redirect_output_signal (int sig)
{
char *signal_name = (sig == SIGHUP ? "SIGHUP" :
(sig == SIGUSR1 ? "SIGUSR1" :
"WTF?!"));
log_request_redirect_output (signal_name);
progress_schedule_redirect ();
signal (sig, redirect_output_signal);
}
#endif /* HAVE_SIGNAL */
--- NEW FILE: alloca.c ---
/* alloca.c -- allocate automatically reclaimed memory
(Mostly) portable public-domain implementation -- D A Gwyn
This implementation of the PWB library alloca function,
which is used to allocate space off the run-time stack so
that it is automatically reclaimed upon procedure exit,
was inspired by discussions with J. Q. Johnson of Cornell.
J.Otto Tennant <jot at cray.com> contributed the Cray support.
There are some preprocessor constants that can
be defined when compiling for your specific system, for
improved efficiency; however, the defaults should be okay.
The general concept of this implementation is to keep
track of all alloca-allocated blocks, and reclaim any
that are found to be deeper in the stack than the current
invocation. This heuristic does not reclaim storage as
soon as it becomes invalid, but it will do so eventually.
As a special case, alloca(0) reclaims storage without
allocating any. It is a good idea to use alloca(0) in
your main control loop, etc. to force garbage collection. */
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef emacs
#include "blockinput.h"
#endif
/* If compiling with GCC 2, this file's not needed. */
#if !defined (__GNUC__) || __GNUC__ < 2
/* If someone has defined alloca as a macro,
there must be some other way alloca is supposed to work. */
#ifndef alloca
#ifdef emacs
#ifdef static
/* actually, only want this if static is defined as ""
-- this is for usg, in which emacs must undefine static
in order to make unexec workable
*/
#ifndef STACK_DIRECTION
you
lose
-- must know STACK_DIRECTION at compile-time
#endif /* STACK_DIRECTION undefined */
#endif /* static */
#endif /* emacs */
/* If your stack is a linked list of frames, you have to
provide an "address metric" ADDRESS_FUNCTION macro. */
#if defined (CRAY) && defined (CRAY_STACKSEG_END)
long i00afunc ();
#define ADDRESS_FUNCTION(arg) (char *) i00afunc (&(arg))
#else
#define ADDRESS_FUNCTION(arg) &(arg)
#endif
#if __STDC__
typedef void *pointer;
#else
typedef char *pointer;
#endif
#ifndef NULL
#define NULL 0
#endif
/* Different portions of Emacs need to call different versions of
malloc. The Emacs executable needs alloca to call xmalloc, because
ordinary malloc isn't protected from input signals. On the other
hand, the utilities in lib-src need alloca to call malloc; some of
them are very simple, and don't have an xmalloc routine.
Non-Emacs programs expect this to call xmalloc.
Callers below should use malloc. */
#ifndef emacs
#define malloc xmalloc
#endif
extern pointer malloc ();
/* Define STACK_DIRECTION if you know the direction of stack
growth for your system; otherwise it will be automatically
deduced at run-time.
STACK_DIRECTION > 0 => grows toward higher addresses
STACK_DIRECTION < 0 => grows toward lower addresses
STACK_DIRECTION = 0 => direction of growth unknown */
#ifndef STACK_DIRECTION
#define STACK_DIRECTION 0 /* Direction unknown. */
#endif
#if STACK_DIRECTION != 0
#define STACK_DIR STACK_DIRECTION /* Known at compile-time. */
#else /* STACK_DIRECTION == 0; need run-time code. */
static int stack_dir; /* 1 or -1 once known. */
#define STACK_DIR stack_dir
static void
find_stack_direction ()
{
static char *addr = NULL; /* Address of first `dummy', once known. */
auto char dummy; /* To get stack address. */
if (addr == NULL)
{ /* Initial entry. */
addr = ADDRESS_FUNCTION (dummy);
find_stack_direction (); /* Recurse once. */
}
else
{
/* Second entry. */
if (ADDRESS_FUNCTION (dummy) > addr)
stack_dir = 1; /* Stack grew upward. */
else
stack_dir = -1; /* Stack grew downward. */
}
}
#endif /* STACK_DIRECTION == 0 */
/* An "alloca header" is used to:
(a) chain together all alloca'ed blocks;
(b) keep track of stack depth.
It is very important that sizeof(header) agree with malloc
alignment chunk size. The following default should work okay. */
#ifndef ALIGN_SIZE
#define ALIGN_SIZE sizeof(double)
#endif
typedef union hdr
{
char align[ALIGN_SIZE]; /* To force sizeof(header). */
struct
{
union hdr *next; /* For chaining headers. */
char *deep; /* For stack depth measure. */
} h;
} header;
static header *last_alloca_header = NULL; /* -> last alloca header. */
/* Return a pointer to at least SIZE bytes of storage,
which will be automatically reclaimed upon exit from
the procedure that called alloca. Originally, this space
was supposed to be taken from the current stack frame of the
caller, but that method cannot be made to work for some
implementations of C, for example under Gould's UTX/32. */
pointer
alloca (size)
unsigned size;
{
auto char probe; /* Probes stack depth: */
register char *depth = ADDRESS_FUNCTION (probe);
#if STACK_DIRECTION == 0
if (STACK_DIR == 0) /* Unknown growth direction. */
find_stack_direction ();
#endif
/* Reclaim garbage, defined as all alloca'd storage that
was allocated from deeper in the stack than currently. */
{
register header *hp; /* Traverses linked list. */
#ifdef emacs
BLOCK_INPUT;
#endif
for (hp = last_alloca_header; hp != NULL;)
if ((STACK_DIR > 0 && hp->h.deep > depth)
|| (STACK_DIR < 0 && hp->h.deep < depth))
{
register header *np = hp->h.next;
free ((pointer) hp); /* Collect garbage. */
hp = np; /* -> next header. */
}
else
break; /* Rest are not deeper. */
last_alloca_header = hp; /* -> last valid storage. */
#ifdef emacs
UNBLOCK_INPUT;
#endif
}
if (size == 0)
return NULL; /* No allocation required. */
/* Allocate combined header + user data storage. */
{
register pointer new = malloc (sizeof (header) + size);
/* Address of header. */
if (new == 0)
abort();
((header *) new)->h.next = last_alloca_header;
((header *) new)->h.deep = depth;
last_alloca_header = (header *) new;
/* User storage begins just after header. */
return (pointer) ((char *) new + sizeof (header));
}
}
#if defined (CRAY) && defined (CRAY_STACKSEG_END)
#ifdef DEBUG_I00AFUNC
#include <stdio.h>
#endif
#ifndef CRAY_STACK
#define CRAY_STACK
#ifndef CRAY2
/* Stack structures for CRAY-1, CRAY X-MP, and CRAY Y-MP */
struct stack_control_header
{
long shgrow:32; /* Number of times stack has grown. */
long shaseg:32; /* Size of increments to stack. */
long shhwm:32; /* High water mark of stack. */
long shsize:32; /* Current size of stack (all segments). */
};
/* The stack segment linkage control information occurs at
the high-address end of a stack segment. (The stack
grows from low addresses to high addresses.) The initial
part of the stack segment linkage control information is
0200 (octal) words. This provides for register storage
for the routine which overflows the stack. */
struct stack_segment_linkage
{
long ss[0200]; /* 0200 overflow words. */
long sssize:32; /* Number of words in this segment. */
long ssbase:32; /* Offset to stack base. */
long:32;
long sspseg:32; /* Offset to linkage control of previous
segment of stack. */
long:32;
long sstcpt:32; /* Pointer to task common address block. */
long sscsnm; /* Private control structure number for
microtasking. */
long ssusr1; /* Reserved for user. */
long ssusr2; /* Reserved for user. */
long sstpid; /* Process ID for pid based multi-tasking. */
long ssgvup; /* Pointer to multitasking thread giveup. */
long sscray[7]; /* Reserved for Cray Research. */
long ssa0;
long ssa1;
long ssa2;
long ssa3;
long ssa4;
long ssa5;
long ssa6;
long ssa7;
long sss0;
long sss1;
long sss2;
long sss3;
long sss4;
long sss5;
long sss6;
long sss7;
};
#else /* CRAY2 */
/* The following structure defines the vector of words
returned by the STKSTAT library routine. */
struct stk_stat
{
long now; /* Current total stack size. */
long maxc; /* Amount of contiguous space which would
be required to satisfy the maximum
stack demand to date. */
long high_water; /* Stack high-water mark. */
long overflows; /* Number of stack overflow ($STKOFEN) calls. */
long hits; /* Number of internal buffer hits. */
long extends; /* Number of block extensions. */
long stko_mallocs; /* Block allocations by $STKOFEN. */
long underflows; /* Number of stack underflow calls ($STKRETN). */
long stko_free; /* Number of deallocations by $STKRETN. */
long stkm_free; /* Number of deallocations by $STKMRET. */
long segments; /* Current number of stack segments. */
long maxs; /* Maximum number of stack segments so far. */
long pad_size; /* Stack pad size. */
long current_address; /* Current stack segment address. */
long current_size; /* Current stack segment size. This
number is actually corrupted by STKSTAT to
include the fifteen word trailer area. */
long initial_address; /* Address of initial segment. */
long initial_size; /* Size of initial segment. */
};
/* The following structure describes the data structure which trails
any stack segment. I think that the description in 'asdef' is
out of date. I only describe the parts that I am sure about. */
struct stk_trailer
{
long this_address; /* Address of this block. */
long this_size; /* Size of this block (does not include
this trailer). */
long unknown2;
long unknown3;
long link; /* Address of trailer block of previous
segment. */
long unknown5;
long unknown6;
long unknown7;
long unknown8;
long unknown9;
long unknown10;
long unknown11;
long unknown12;
long unknown13;
long unknown14;
};
#endif /* CRAY2 */
#endif /* not CRAY_STACK */
#ifdef CRAY2
/* Determine a "stack measure" for an arbitrary ADDRESS.
I doubt that "lint" will like this much. */
static long
i00afunc (long *address)
{
struct stk_stat status;
struct stk_trailer *trailer;
long *block, size;
long result = 0;
/* We want to iterate through all of the segments. The first
step is to get the stack status structure. We could do this
more quickly and more directly, perhaps, by referencing the
$LM00 common block, but I know that this works. */
STKSTAT (&status);
/* Set up the iteration. */
trailer = (struct stk_trailer *) (status.current_address
+ status.current_size
- 15);
/* There must be at least one stack segment. Therefore it is
a fatal error if "trailer" is null. */
if (trailer == 0)
abort ();
/* Discard segments that do not contain our argument address. */
while (trailer != 0)
{
block = (long *) trailer->this_address;
size = trailer->this_size;
if (block == 0 || size == 0)
abort ();
trailer = (struct stk_trailer *) trailer->link;
if ((block <= address) && (address < (block + size)))
break;
}
/* Set the result to the offset in this segment and add the sizes
of all predecessor segments. */
result = address - block;
if (trailer == 0)
{
return result;
}
do
{
if (trailer->this_size <= 0)
abort ();
result += trailer->this_size;
trailer = (struct stk_trailer *) trailer->link;
}
while (trailer != 0);
/* We are done. Note that if you present a bogus address (one
not in any segment), you will get a different number back, formed
from subtracting the address of the first block. This is probably
not what you want. */
return (result);
}
#else /* not CRAY2 */
/* Stack address function for a CRAY-1, CRAY X-MP, or CRAY Y-MP.
Determine the number of the cell within the stack,
given the address of the cell. The purpose of this
routine is to linearize, in some sense, stack addresses
for alloca. */
static long
i00afunc (long address)
{
long stkl = 0;
long size, pseg, this_segment, stack;
long result = 0;
struct stack_segment_linkage *ssptr;
/* Register B67 contains the address of the end of the
current stack segment. If you (as a subprogram) store
your registers on the stack and find that you are past
the contents of B67, you have overflowed the segment.
B67 also points to the stack segment linkage control
area, which is what we are really interested in. */
stkl = CRAY_STACKSEG_END ();
ssptr = (struct stack_segment_linkage *) stkl;
/* If one subtracts 'size' from the end of the segment,
one has the address of the first word of the segment.
If this is not the first segment, 'pseg' will be
nonzero. */
pseg = ssptr->sspseg;
size = ssptr->sssize;
this_segment = stkl - size;
/* It is possible that calling this routine itself caused
a stack overflow. Discard stack segments which do not
contain the target address. */
while (!(this_segment <= address && address <= stkl))
{
#ifdef DEBUG_I00AFUNC
fprintf (stderr, "%011o %011o %011o\n", this_segment, address, stkl);
#endif
if (pseg == 0)
break;
stkl = stkl - pseg;
ssptr = (struct stack_segment_linkage *) stkl;
size = ssptr->sssize;
pseg = ssptr->sspseg;
this_segment = stkl - size;
}
result = address - this_segment;
/* If you subtract pseg from the current end of the stack,
you get the address of the previous stack segment's end.
This seems a little convoluted to me, but I'll bet you save
a cycle somewhere. */
while (pseg != 0)
{
#ifdef DEBUG_I00AFUNC
fprintf (stderr, "%011o %011o\n", pseg, size);
#endif
stkl = stkl - pseg;
ssptr = (struct stack_segment_linkage *) stkl;
size = ssptr->sssize;
pseg = ssptr->sspseg;
result += size;
}
return (result);
}
#endif /* not CRAY2 */
#endif /* CRAY */
#endif /* no alloca */
#endif /* not GCC version 2 */
--- NEW FILE: progress.c ---
/* Download progress.
Copyright (C) 2001, 2002 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif /* HAVE_STRING_H */
#include <assert.h>
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
#ifdef HAVE_SIGNAL_H
# include <signal.h>
#endif
#include "wget.h"
#include "progress.h"
#include "utils.h"
#include "retr.h"
struct progress_implementation {
char *name;
void *(*create) PARAMS ((long, long));
void (*update) PARAMS ((void *, long, double));
void (*finish) PARAMS ((void *, double));
void (*set_params) PARAMS ((const char *));
};
/* Necessary forward declarations. */
static void *dot_create PARAMS ((long, long));
static void dot_update PARAMS ((void *, long, double));
static void dot_finish PARAMS ((void *, double));
static void dot_set_params PARAMS ((const char *));
static void *bar_create PARAMS ((long, long));
static void bar_update PARAMS ((void *, long, double));
static void bar_finish PARAMS ((void *, double));
static void bar_set_params PARAMS ((const char *));
static struct progress_implementation implementations[] = {
{ "dot", dot_create, dot_update, dot_finish, dot_set_params },
{ "bar", bar_create, bar_update, bar_finish, bar_set_params }
};
static struct progress_implementation *current_impl;
static int current_impl_locked;
/* Progress implementation used by default. Can be overriden in
wgetrc or by the fallback one. */
#define DEFAULT_PROGRESS_IMPLEMENTATION "bar"
/* Fallnback progress implementation should be something that works
under all display types. If you put something other than "dot"
here, remember that bar_set_params tries to switch to this if we're
not running on a TTY. So changing this to "bar" could cause
infloop. */
#define FALLBACK_PROGRESS_IMPLEMENTATION "dot"
/* Return non-zero if NAME names a valid progress bar implementation.
The characters after the first : will be ignored. */
int
valid_progress_implementation_p (const char *name)
{
int i = 0;
struct progress_implementation *pi = implementations;
char *colon = strchr (name, ':');
int namelen = colon ? colon - name : strlen (name);
for (i = 0; i < countof (implementations); i++, pi++)
if (!strncmp (pi->name, name, namelen))
return 1;
return 0;
}
/* Set the progress implementation to NAME. */
void
set_progress_implementation (const char *name)
{
int i, namelen;
struct progress_implementation *pi = implementations;
char *colon;
if (!name)
name = DEFAULT_PROGRESS_IMPLEMENTATION;
colon = strchr (name, ':');
namelen = colon ? colon - name : strlen (name);
for (i = 0; i < countof (implementations); i++, pi++)
if (!strncmp (pi->name, name, namelen))
{
current_impl = pi;
current_impl_locked = 0;
if (colon)
/* We call pi->set_params even if colon is NULL because we
want to give the implementation a chance to set up some
things it needs to run. */
++colon;
if (pi->set_params)
pi->set_params (colon);
return;
}
abort ();
}
static int output_redirected;
void
progress_schedule_redirect (void)
{
output_redirected = 1;
}
/* Create a progress gauge. INITIAL is the number of bytes the
download starts from (zero if the download starts from scratch).
TOTAL is the expected total number of bytes in this download. If
TOTAL is zero, it means that the download size is not known in
advance. */
void *
progress_create (long initial, long total)
{
/* Check if the log status has changed under our feet. */
if (output_redirected)
{
if (!current_impl_locked)
set_progress_implementation (FALLBACK_PROGRESS_IMPLEMENTATION);
output_redirected = 0;
}
return current_impl->create (initial, total);
}
/* Inform the progress gauge of newly received bytes. DLTIME is the
time in milliseconds since the beginning of the download. */
void
progress_update (void *progress, long howmuch, double dltime)
{
current_impl->update (progress, howmuch, dltime);
}
/* Tell the progress gauge to clean up. Calling this will free the
PROGRESS object, the further use of which is not allowed. */
void
progress_finish (void *progress, double dltime)
{
current_impl->finish (progress, dltime);
}
/* Dot-printing. */
struct dot_progress {
long initial_length; /* how many bytes have been downloaded
previously. */
long total_length; /* expected total byte count when the
download finishes */
int accumulated;
int rows; /* number of rows printed so far */
int dots; /* number of dots printed in this row */
double last_timer_value;
};
/* Dot-progress backend for progress_create. */
static void *
dot_create (long initial, long total)
{
struct dot_progress *dp = xmalloc (sizeof (struct dot_progress));
memset (dp, 0, sizeof (*dp));
dp->initial_length = initial;
dp->total_length = total;
if (dp->initial_length)
{
int dot_bytes = opt.dot_bytes;
long row_bytes = opt.dot_bytes * opt.dots_in_line;
int remainder = (int) (dp->initial_length % row_bytes);
long skipped = dp->initial_length - remainder;
if (skipped)
{
int skipped_k = (int) (skipped / 1024); /* skipped amount in K */
int skipped_k_len = numdigit (skipped_k);
if (skipped_k_len < 5)
skipped_k_len = 5;
/* Align the [ skipping ... ] line with the dots. To do
that, insert the number of spaces equal to the number of
digits in the skipped amount in K. */
logprintf (LOG_VERBOSE, _("\n%*s[ skipping %dK ]"),
2 + skipped_k_len, "", skipped_k);
}
logprintf (LOG_VERBOSE, "\n%5ldK", skipped / 1024);
for (; remainder >= dot_bytes; remainder -= dot_bytes)
{
if (dp->dots % opt.dot_spacing == 0)
logputs (LOG_VERBOSE, " ");
logputs (LOG_VERBOSE, ",");
++dp->dots;
}
assert (dp->dots < opt.dots_in_line);
dp->accumulated = remainder;
dp->rows = skipped / row_bytes;
}
return dp;
}
static void
print_percentage (long bytes, long expected)
{
int percentage = (int)(100.0 * bytes / expected);
logprintf (LOG_VERBOSE, "%3d%%", percentage);
}
static void
print_download_speed (struct dot_progress *dp, long bytes, double dltime)
{
logprintf (LOG_VERBOSE, " %s",
retr_rate (bytes, dltime - dp->last_timer_value, 1));
dp->last_timer_value = dltime;
}
/* Dot-progress backend for progress_update. */
static void
dot_update (void *progress, long howmuch, double dltime)
{
struct dot_progress *dp = progress;
int dot_bytes = opt.dot_bytes;
long row_bytes = opt.dot_bytes * opt.dots_in_line;
log_set_flush (0);
dp->accumulated += howmuch;
for (; dp->accumulated >= dot_bytes; dp->accumulated -= dot_bytes)
{
if (dp->dots == 0)
logprintf (LOG_VERBOSE, "\n%5ldK", dp->rows * row_bytes / 1024);
if (dp->dots % opt.dot_spacing == 0)
logputs (LOG_VERBOSE, " ");
logputs (LOG_VERBOSE, ".");
++dp->dots;
if (dp->dots >= opt.dots_in_line)
{
long row_qty = row_bytes;
if (dp->rows == dp->initial_length / row_bytes)
row_qty -= dp->initial_length % row_bytes;
++dp->rows;
dp->dots = 0;
if (dp->total_length)
print_percentage (dp->rows * row_bytes, dp->total_length);
print_download_speed (dp, row_qty, dltime);
}
}
log_set_flush (1);
}
/* Dot-progress backend for progress_finish. */
static void
dot_finish (void *progress, double dltime)
{
struct dot_progress *dp = progress;
int dot_bytes = opt.dot_bytes;
long row_bytes = opt.dot_bytes * opt.dots_in_line;
int i;
log_set_flush (0);
if (dp->dots == 0)
logprintf (LOG_VERBOSE, "\n%5ldK", dp->rows * row_bytes / 1024);
for (i = dp->dots; i < opt.dots_in_line; i++)
{
if (i % opt.dot_spacing == 0)
logputs (LOG_VERBOSE, " ");
logputs (LOG_VERBOSE, " ");
}
if (dp->total_length)
{
print_percentage (dp->rows * row_bytes
+ dp->dots * dot_bytes
+ dp->accumulated,
dp->total_length);
}
{
long row_qty = dp->dots * dot_bytes + dp->accumulated;
if (dp->rows == dp->initial_length / row_bytes)
row_qty -= dp->initial_length % row_bytes;
print_download_speed (dp, row_qty, dltime);
}
logputs (LOG_VERBOSE, "\n\n");
log_set_flush (0);
xfree (dp);
}
/* This function interprets the progress "parameters". For example,
if Wget is invoked with --progress=dot:mega, it will set the
"dot-style" to "mega". Valid styles are default, binary, mega, and
giga. */
static void
dot_set_params (const char *params)
{
if (!params || !*params)
params = opt.dot_style;
if (!params)
return;
/* We use this to set the retrieval style. */
if (!strcasecmp (params, "default"))
{
/* Default style: 1K dots, 10 dots in a cluster, 50 dots in a
line. */
opt.dot_bytes = 1024;
opt.dot_spacing = 10;
opt.dots_in_line = 50;
}
else if (!strcasecmp (params, "binary"))
{
/* "Binary" retrieval: 8K dots, 16 dots in a cluster, 48 dots
(384K) in a line. */
opt.dot_bytes = 8192;
opt.dot_spacing = 16;
opt.dots_in_line = 48;
}
else if (!strcasecmp (params, "mega"))
{
/* "Mega" retrieval, for retrieving very long files; each dot is
64K, 8 dots in a cluster, 6 clusters (3M) in a line. */
opt.dot_bytes = 65536L;
opt.dot_spacing = 8;
opt.dots_in_line = 48;
}
else if (!strcasecmp (params, "giga"))
{
/* "Giga" retrieval, for retrieving very very *very* long files;
each dot is 1M, 8 dots in a cluster, 4 clusters (32M) in a
line. */
opt.dot_bytes = (1L << 20);
opt.dot_spacing = 8;
opt.dots_in_line = 32;
}
else
fprintf (stderr,
_("Invalid dot style specification `%s'; leaving unchanged.\n"),
params);
}
/* "Thermometer" (bar) progress. */
/* Assumed screen width if we can't find the real value. */
#define DEFAULT_SCREEN_WIDTH 80
/* Minimum screen width we'll try to work with. If this is too small,
create_image will overflow the buffer. */
#define MINIMUM_SCREEN_WIDTH 45
static int screen_width = DEFAULT_SCREEN_WIDTH;
/* Size of the download speed history ring. */
#define DLSPEED_HISTORY_SIZE 20
/* The minimum time length of a history sample. By default, each
sample is at least 150ms long, which means that, over the course of
20 samples, "current" download speed spans at least 3s into the
past. */
#define DLSPEED_SAMPLE_MIN 150
struct bar_progress {
long initial_length; /* how many bytes have been downloaded
previously. */
long total_length; /* expected total byte count when the
download finishes */
long count; /* bytes downloaded so far */
double last_screen_update; /* time of the last screen update,
measured since the beginning of
download. */
int width; /* screen width we're using at the
time the progress gauge was
created. this is different from
the screen_width global variable in
that the latter can be changed by a
signal. */
char *buffer; /* buffer where the bar "image" is
stored. */
int tick; /* counter used for drawing the
progress bar where the total size
is not known. */
/* The following variables (kept in a struct for namespace reasons)
keep track of recent download speeds. See bar_update() for
details. */
struct bar_progress_hist {
int pos;
long times[DLSPEED_HISTORY_SIZE];
long bytes[DLSPEED_HISTORY_SIZE];
/* The sum of times and bytes respectively, maintained for
efficiency. */
long total_time;
long total_bytes;
} hist;
double recent_start; /* timestamp of beginning of current
position. */
long recent_bytes; /* bytes downloaded so far. */
/* create_image() uses these to make sure that ETA information
doesn't flash. */
double last_eta_time; /* time of the last update to download
speed and ETA, measured since the
beginning of download. */
long last_eta_value;
};
static void create_image PARAMS ((struct bar_progress *, double));
static void display_image PARAMS ((char *));
static void *
bar_create (long initial, long total)
{
struct bar_progress *bp = xmalloc (sizeof (struct bar_progress));
memset (bp, 0, sizeof (*bp));
/* In theory, our callers should take care of this pathological
case, but it can sometimes happen. */
if (initial > total)
total = initial;
bp->initial_length = initial;
bp->total_length = total;
/* - 1 because we don't want to use the last screen column. */
bp->width = screen_width - 1;
/* + 1 for the terminating zero. */
bp->buffer = xmalloc (bp->width + 1);
logputs (LOG_VERBOSE, "\n");
create_image (bp, 0);
display_image (bp->buffer);
return bp;
}
static void update_speed_ring PARAMS ((struct bar_progress *, long, double));
static void
bar_update (void *progress, long howmuch, double dltime)
{
struct bar_progress *bp = progress;
int force_screen_update = 0;
bp->count += howmuch;
if (bp->total_length > 0
&& bp->count + bp->initial_length > bp->total_length)
/* We could be downloading more than total_length, e.g. when the
server sends an incorrect Content-Length header. In that case,
adjust bp->total_length to the new reality, so that the code in
create_image() that depends on total size being smaller or
equal to the expected size doesn't abort. */
bp->total_length = bp->initial_length + bp->count;
update_speed_ring (bp, howmuch, dltime);
if (screen_width - 1 != bp->width)
{
bp->width = screen_width - 1;
bp->buffer = xrealloc (bp->buffer, bp->width + 1);
force_screen_update = 1;
}
if (dltime - bp->last_screen_update < 200 && !force_screen_update)
/* Don't update more often than five times per second. */
return;
create_image (bp, dltime);
display_image (bp->buffer);
bp->last_screen_update = dltime;
}
static void
bar_finish (void *progress, double dltime)
{
struct bar_progress *bp = progress;
if (bp->total_length > 0
&& bp->count + bp->initial_length > bp->total_length)
/* See bar_update() for explanation. */
bp->total_length = bp->initial_length + bp->count;
create_image (bp, dltime);
display_image (bp->buffer);
logputs (LOG_VERBOSE, "\n\n");
xfree (bp->buffer);
xfree (bp);
}
/* This code attempts to maintain the notion of a "current" download
speed, over the course of no less than 3s. (Shorter intervals
produce very erratic results.)
To do so, it samples the speed in 150ms intervals and stores the
recorded samples in a FIFO history ring. The ring stores no more
than 20 intervals, hence the history covers the period of at least
three seconds and at most 20 reads into the past. This method
should produce reasonable results for downloads ranging from very
slow to very fast.
The idea is that for fast downloads, we get the speed over exactly
the last three seconds. For slow downloads (where a network read
takes more than 150ms to complete), we get the speed over a larger
time period, as large as it takes to complete thirty reads. This
is good because slow downloads tend to fluctuate more and a
3-second average would be too erratic. */
static void
update_speed_ring (struct bar_progress *bp, long howmuch, double dltime)
{
struct bar_progress_hist *hist = &bp->hist;
double recent_age = dltime - bp->recent_start;
/* Update the download count. */
bp->recent_bytes += howmuch;
/* For very small time intervals, we return after having updated the
"recent" download count. When its age reaches or exceeds minimum
sample time, it will be recorded in the history ring. */
if (recent_age < DLSPEED_SAMPLE_MIN)
return;
/* Store "recent" bytes and download time to history ring at the
position POS. */
/* To correctly maintain the totals, first invalidate existing data
(least recent in time) at this position. */
hist->total_time -= hist->times[hist->pos];
hist->total_bytes -= hist->bytes[hist->pos];
/* Now store the new data and update the totals. */
hist->times[hist->pos] = recent_age;
hist->bytes[hist->pos] = bp->recent_bytes;
hist->total_time += recent_age;
hist->total_bytes += bp->recent_bytes;
/* Start a new "recent" period. */
bp->recent_start = dltime;
bp->recent_bytes = 0;
/* Advance the current ring position. */
if (++hist->pos == DLSPEED_HISTORY_SIZE)
hist->pos = 0;
#if 0
/* Sledgehammer check to verify that the totals are accurate. */
{
int i;
double sumt = 0, sumb = 0;
for (i = 0; i < DLSPEED_HISTORY_SIZE; i++)
{
sumt += hist->times[i];
sumb += hist->bytes[i];
}
assert (sumt == hist->total_time);
assert (sumb == hist->total_bytes);
}
#endif
}
#define APPEND_LITERAL(s) do { \
memcpy (p, s, sizeof (s) - 1); \
p += sizeof (s) - 1; \
} while (0)
#ifndef MAX
# define MAX(a, b) ((a) >= (b) ? (a) : (b))
#endif
static void
create_image (struct bar_progress *bp, double dl_total_time)
{
char *p = bp->buffer;
long size = bp->initial_length + bp->count;
char *size_legible = legible (size);
int size_legible_len = strlen (size_legible);
struct bar_progress_hist *hist = &bp->hist;
/* The progress bar should look like this:
xx% [=======> ] nn,nnn 12.34K/s ETA 00:00
Calculate the geometry. The idea is to assign as much room as
possible to the progress bar. The other idea is to never let
things "jitter", i.e. pad elements that vary in size so that
their variance does not affect the placement of other elements.
It would be especially bad for the progress bar to be resized
randomly.
"xx% " or "100%" - percentage - 4 chars
"[]" - progress bar decorations - 2 chars
" nnn,nnn,nnn" - downloaded bytes - 12 chars or very rarely more
" 1012.56K/s" - dl rate - 11 chars
" ETA xx:xx:xx" - ETA - 13 chars
"=====>..." - progress bar - the rest
*/
int dlbytes_size = 1 + MAX (size_legible_len, 11);
int progress_size = bp->width - (4 + 2 + dlbytes_size + 11 + 13);
if (progress_size < 5)
progress_size = 0;
/* "xx% " */
if (bp->total_length > 0)
{
int percentage = (int)(100.0 * size / bp->total_length);
assert (percentage <= 100);
if (percentage < 100)
sprintf (p, "%2d%% ", percentage);
else
strcpy (p, "100%");
p += 4;
}
else
APPEND_LITERAL (" ");
/* The progress bar: "[====> ]" or "[++==> ]". */
if (progress_size && bp->total_length > 0)
{
/* Size of the initial portion. */
int insz = (double)bp->initial_length / bp->total_length * progress_size;
/* Size of the downloaded portion. */
int dlsz = (double)size / bp->total_length * progress_size;
char *begin;
int i;
assert (dlsz <= progress_size);
assert (insz <= dlsz);
*p++ = '[';
begin = p;
/* Print the initial portion of the download with '+' chars, the
rest with '=' and one '>'. */
for (i = 0; i < insz; i++)
*p++ = '+';
dlsz -= insz;
if (dlsz > 0)
{
for (i = 0; i < dlsz - 1; i++)
*p++ = '=';
*p++ = '>';
}
while (p - begin < progress_size)
*p++ = ' ';
*p++ = ']';
}
else if (progress_size)
{
/* If we can't draw a real progress bar, then at least show
*something* to the user. */
int ind = bp->tick % (progress_size * 2 - 6);
int i, pos;
/* Make the star move in two directions. */
if (ind < progress_size - 2)
pos = ind + 1;
else
pos = progress_size - (ind - progress_size + 5);
*p++ = '[';
for (i = 0; i < progress_size; i++)
{
if (i == pos - 1) *p++ = '<';
else if (i == pos ) *p++ = '=';
else if (i == pos + 1) *p++ = '>';
else
*p++ = ' ';
}
*p++ = ']';
++bp->tick;
}
/* " 234,567,890" */
sprintf (p, " %-11s", legible (size));
p += strlen (p);
/* " 1012.45K/s" */
if (hist->total_time && hist->total_bytes)
{
static char *short_units[] = { "B/s", "K/s", "M/s", "G/s" };
int units = 0;
/* Calculate the download speed using the history ring and
recent data that hasn't made it to the ring yet. */
long dlquant = hist->total_bytes + bp->recent_bytes;
double dltime = hist->total_time + (dl_total_time - bp->recent_start);
double dlspeed = calc_rate (dlquant, dltime, &units);
sprintf (p, " %7.2f%s", dlspeed, short_units[units]);
p += strlen (p);
}
else
APPEND_LITERAL (" --.--K/s");
/* " ETA xx:xx:xx"; wait for three seconds before displaying the ETA.
That's because the ETA value needs a while to become
reliable. */
if (bp->total_length > 0 && dl_total_time > 3000)
{
long eta;
int eta_hrs, eta_min, eta_sec;
/* Don't change the value of ETA more than approximately once
per second; doing so would cause flashing without providing
any value to the user. */
if (bp->total_length != size
&& bp->last_eta_value != 0
&& dl_total_time - bp->last_eta_time < 900)
eta = bp->last_eta_value;
else
{
/* Calculate ETA using the average download speed to predict
the future speed. If you want to use a speed averaged
over a more recent period, replace dl_total_time with
hist->total_time and bp->count with hist->total_bytes.
I found that doing that results in a very jerky and
ultimately unreliable ETA. */
double time_sofar = (double)dl_total_time / 1000;
long bytes_remaining = bp->total_length - size;
eta = (long) (time_sofar * bytes_remaining / bp->count);
bp->last_eta_value = eta;
bp->last_eta_time = dl_total_time;
}
eta_hrs = eta / 3600, eta %= 3600;
eta_min = eta / 60, eta %= 60;
eta_sec = eta;
if (eta_hrs > 99)
goto no_eta;
if (eta_hrs == 0)
{
/* Hours not printed: pad with three spaces. */
APPEND_LITERAL (" ");
sprintf (p, " ETA %02d:%02d", eta_min, eta_sec);
}
else
{
if (eta_hrs < 10)
/* Hours printed with one digit: pad with one space. */
*p++ = ' ';
sprintf (p, " ETA %d:%02d:%02d", eta_hrs, eta_min, eta_sec);
}
p += strlen (p);
}
else if (bp->total_length > 0)
{
no_eta:
APPEND_LITERAL (" ");
}
assert (p - bp->buffer <= bp->width);
while (p < bp->buffer + bp->width)
*p++ = ' ';
*p = '\0';
}
/* Print the contents of the buffer as a one-line ASCII "image" so
that it can be overwritten next time. */
static void
display_image (char *buf)
{
int old = log_set_save_context (0);
logputs (LOG_VERBOSE, "\r");
logputs (LOG_VERBOSE, buf);
log_set_save_context (old);
}
static void
bar_set_params (const char *params)
{
int sw;
char *term = getenv ("TERM");
if (params
&& 0 == strcmp (params, "force"))
current_impl_locked = 1;
if ((opt.lfilename
#ifdef HAVE_ISATTY
/* The progress bar doesn't make sense if the output is not a
TTY -- when logging to file, it is better to review the
dots. */
|| !isatty (fileno (stderr))
#else
1
#endif
/* Normally we don't depend on terminal type because the
progress bar only uses ^M to move the cursor to the
beginning of line, which works even on dumb terminals. But
Jamie Zawinski reports that ^M and ^H tricks don't work in
Emacs shell buffers, and only make a mess. */
|| (term && 0 == strcmp (term, "emacs"))
)
&& !current_impl_locked)
{
/* We're not printing to a TTY, so revert to the fallback
display. #### We're recursively calling
set_progress_implementation here, which is slightly kludgy.
It would be nicer if we provided that function a return value
indicating a failure of some sort. */
set_progress_implementation (FALLBACK_PROGRESS_IMPLEMENTATION);
return;
}
sw = determine_screen_width ();
if (sw && sw >= MINIMUM_SCREEN_WIDTH)
screen_width = sw;
}
#ifdef SIGWINCH
RETSIGTYPE
progress_handle_sigwinch (int sig)
{
int sw = determine_screen_width ();
if (sw && sw >= MINIMUM_SCREEN_WIDTH)
screen_width = sw;
signal (SIGWINCH, progress_handle_sigwinch);
}
#endif
--- NEW FILE: html-parse.h ---
/* Declarations for html-parse.c.
Copyright (C) 1998 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef HTML_PARSE_H
#define HTML_PARSE_H
struct attr_pair {
char *name; /* attribute name */
char *value; /* attribute value */
/* Needed for URL conversion; the places where the value begins and
ends, including the quotes and everything. */
const char *value_raw_beginning;
int value_raw_size;
/* Used internally by map_html_tags. */
int name_pool_index, value_pool_index;
};
struct taginfo {
char *name; /* tag name */
int end_tag_p; /* whether this is an end-tag */
int nattrs; /* number of attributes */
struct attr_pair *attrs; /* attributes */
const char *start_position; /* start position of tag */
const char *end_position; /* end position of tag */
};
struct hash_table; /* forward declaration */
/* Flags for map_html_tags: */
#define MHT_STRICT_COMMENTS 1 /* use strict comment interpretation */
#define MHT_TRIM_VALUES 2 /* trim attribute values, e.g. interpret
<a href=" foo "> as "foo" */
void map_html_tags PARAMS ((const char *, int,
void (*) (struct taginfo *, void *), void *, int,
const struct hash_table *,
const struct hash_table *));
#endif /* HTML_PARSE_H */
--- NEW FILE: host.c ---
/* Host name resolution and matching.
Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#include <config.h>
#ifndef WINDOWS
#include <netdb.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#include <assert.h>
#include <sys/types.h>
#ifdef WINDOWS
# include <winsock.h>
# define SET_H_ERRNO(err) WSASetLastError(err)
#else
# include <sys/socket.h>
# include <netinet/in.h>
# ifndef __BEOS__
# include <arpa/inet.h>
# endif
# include <netdb.h>
# define SET_H_ERRNO(err) ((void)(h_errno = (err)))
#endif /* WINDOWS */
#ifndef NO_ADDRESS
#define NO_ADDRESS NO_DATA
#endif
#ifdef HAVE_SYS_UTSNAME_H
# include <sys/utsname.h>
#endif
#include <errno.h>
#include "wget.h"
#include "utils.h"
#include "host.h"
#include "url.h"
#include "hash.h"
#ifndef errno
extern int errno;
#endif
#ifndef h_errno
# ifndef __CYGWIN__
extern int h_errno;
# endif
#endif
#ifdef ENABLE_IPV6
int ip_default_family = AF_INET6;
#else
int ip_default_family = AF_INET;
#endif
/* Mapping between known hosts and to lists of their addresses. */
static struct hash_table *host_name_addresses_map;
/* Lists of addresses. This should eventually be extended to handle
IPv6. */
struct address_list {
int count; /* number of adrresses */
ip_address *addresses; /* pointer to the string of addresses */
int faulty; /* number of addresses known not to work. */
int refcount; /* so we know whether to free it or not. */
};
/* Get the bounds of the address list. */
void
address_list_get_bounds (struct address_list *al, int *start, int *end)
{
*start = al->faulty;
*end = al->count;
}
/* Copy address number INDEX to IP_STORE. */
void
address_list_copy_one (struct address_list *al, int index, ip_address *ip_store)
{
assert (index >= al->faulty && index < al->count);
memcpy (ip_store, al->addresses + index, sizeof (ip_address));
}
/* Check whether two address lists have all their IPs in common. */
int
address_list_match_all (struct address_list *al1, struct address_list *al2)
{
if (al1 == al2)
return 1;
if (al1->count != al2->count)
return 0;
return 0 == memcmp (al1->addresses, al2->addresses,
al1->count * sizeof (ip_address));
}
/* Mark the INDEXth element of AL as faulty, so that the next time
this address list is used, the faulty element will be skipped. */
void
address_list_set_faulty (struct address_list *al, int index)
{
/* We assume that the address list is traversed in order, so that a
"faulty" attempt is always preceded with all-faulty addresses,
and this is how Wget uses it. */
assert (index == al->faulty);
++al->faulty;
if (al->faulty >= al->count)
/* All addresses have been proven faulty. Since there's not much
sense in returning the user an empty address list the next
time, we'll rather make them all clean, so that they can be
retried anew. */
al->faulty = 0;
}
#ifdef HAVE_GETADDRINFO
/**
* address_list_from_addrinfo
*
* This function transform an addrinfo links list in and address_list.
*
* Input:
* addrinfo* Linkt list of addrinfo
*
* Output:
* address_list* New allocated address_list
*/
static struct address_list *
address_list_from_addrinfo (struct addrinfo *ai)
{
struct address_list *al;
struct addrinfo *ai_head = ai;
int cnt = 0;
int i;
for (ai = ai_head; ai; ai = ai->ai_next)
if (ai->ai_family == AF_INET || ai->ai_family == AF_INET6)
++cnt;
if (cnt == 0)
return NULL;
al = xmalloc (sizeof (struct address_list));
al->addresses = xmalloc (cnt * sizeof (ip_address));
al->count = cnt;
al->faulty = 0;
al->refcount = 1;
for (i = 0, ai = ai_head; ai; ai = ai->ai_next)
if (ai->ai_family == AF_INET6)
{
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ai->ai_addr;
memcpy (al->addresses + i, &sin6->sin6_addr, 16);
++i;
}
else if (ai->ai_family == AF_INET)
{
struct sockaddr_in *sin = (struct sockaddr_in *)ai->ai_addr;
map_ipv4_to_ip ((ip4_address *)&sin->sin_addr, al->addresses + i);
++i;
}
assert (i == cnt);
return al;
}
#else
/* Create an address_list out of a NULL-terminated vector of
addresses, as returned by gethostbyname. */
static struct address_list *
address_list_from_vector (char **h_addr_list)
{
int count = 0, i;
struct address_list *al = xmalloc (sizeof (struct address_list));
while (h_addr_list[count])
++count;
assert (count > 0);
al->count = count;
al->faulty = 0;
al->addresses = xmalloc (count * sizeof (ip_address));
al->refcount = 1;
for (i = 0; i < count; i++)
map_ipv4_to_ip ((ip4_address *)h_addr_list[i], al->addresses + i);
return al;
}
#endif
/* Like address_list_from_vector, but initialized with a single
address. */
static struct address_list *
address_list_from_single (ip_address *addr)
{
struct address_list *al = xmalloc (sizeof (struct address_list));
al->count = 1;
al->faulty = 0;
al->addresses = xmalloc (sizeof (ip_address));
al->refcount = 1;
memcpy (al->addresses, addr, sizeof (ip_address));
return al;
}
static void
address_list_delete (struct address_list *al)
{
xfree (al->addresses);
xfree (al);
}
void
address_list_release (struct address_list *al)
{
--al->refcount;
DEBUGP (("Releasing %p (new refcount %d).\n", al, al->refcount));
if (al->refcount <= 0)
{
DEBUGP (("Deleting unused %p.\n", al));
address_list_delete (al);
}
}
/**
* wget_sockaddr_set_address
*
* This function takes an wget_sockaddr and fill in the protocol type,
* the port number and the address, there NULL in address means wildcard.
* Unsuported adress family will abort the whole programm.
*
* Input:
* wget_sockaddr* The space to be filled
* int The wished protocol
* unsigned short The port
* const ip_address The Binary IP adress
*
* Return:
* - Only modify 1. param
*/
void
wget_sockaddr_set_address (wget_sockaddr *sa,
int ip_family, unsigned short port, ip_address *addr)
{
if (ip_family == AF_INET)
{
sa->sin.sin_family = ip_family;
sa->sin.sin_port = htons (port);
if (addr == NULL)
memset (&sa->sin.sin_addr, 0, sizeof(ip4_address));
else
{
ip4_address addr4;
if (!map_ip_to_ipv4 (addr, &addr4))
/* should the callers have prevented this? */
abort ();
memcpy (&sa->sin.sin_addr, &addr4, sizeof(ip4_address));
}
return;
}
#ifdef ENABLE_IPV6
if (ip_family == AF_INET6)
{
sa->sin6.sin6_family = ip_family;
sa->sin6.sin6_port = htons (port);
if (addr == NULL)
memset (&sa->sin6.sin6_addr, 0 , 16);
else
memcpy (&sa->sin6.sin6_addr, addr, 16);
return;
}
#endif
abort();
}
/**
* wget_sockaddr_set_port
*
* This funtion only fill the port of the socket information.
* If the protocol is not supported nothing is done.
* Unsuported adress family will abort the whole programm.
*
* Require:
* that the IP-Protocol already is set.
*
* Input:
* wget_sockaddr* The space there port should be entered
* unsigned int The port that should be entered in host order
*
* Return:
* - Only modify 1. param
*/
void
wget_sockaddr_set_port (wget_sockaddr *sa, unsigned short port)
{
if (sa->sa.sa_family == AF_INET)
{
sa->sin.sin_port = htons (port);
return;
}
#ifdef ENABLE_IPV6
if (sa->sa.sa_family == AF_INET6)
{
sa->sin6.sin6_port = htons (port);
return;
}
#endif
abort();
}
/**
* wget_sockaddr_get_addr
*
* This function return the adress from an sockaddr as byte string.
* Unsuported adress family will abort the whole programm.
*
* Require:
* that the IP-Protocol already is set.
*
* Input:
* wget_sockaddr* Socket Information
*
* Output:
* unsigned char * IP address as byte string.
*/
void *
wget_sockaddr_get_addr (wget_sockaddr *sa)
{
if (sa->sa.sa_family == AF_INET)
return &sa->sin.sin_addr;
#ifdef ENABLE_IPV6
if (sa->sa.sa_family == AF_INET6)
return &sa->sin6.sin6_addr;
#endif
abort();
/* unreached */
return NULL;
}
/**
* wget_sockaddr_get_port
*
* This function only return the port from the input structure
* Unsuported adress family will abort the whole programm.
*
* Require:
* that the IP-Protocol already is set.
*
* Input:
* wget_sockaddr* Information where to get the port
*
* Output:
* unsigned short Port Number in host order.
*/
unsigned short
wget_sockaddr_get_port (const wget_sockaddr *sa)
{
if (sa->sa.sa_family == AF_INET)
return htons (sa->sin.sin_port);
#ifdef ENABLE_IPV6
if (sa->sa.sa_family == AF_INET6)
return htons (sa->sin6.sin6_port);
#endif
abort();
/* do not complain about return nothing */
return -1;
}
/**
* sockaddr_len
*
* This function return the length of the sockaddr corresponding to
* the acutall prefered protocol for (bind, connect etc...)
* Unsuported adress family will abort the whole programm.
*
* Require:
* that the IP-Protocol already is set.
*
* Input:
* - Public IP-Family Information
*
* Output:
* socklen_t structure length for socket options
*/
socklen_t
sockaddr_len ()
{
if (ip_default_family == AF_INET)
return sizeof (struct sockaddr_in);
#ifdef ENABLE_IPV6
if (ip_default_family == AF_INET6)
return sizeof (struct sockaddr_in6);
#endif
abort();
/* do not complain about return nothing */
return 0;
}
/**
* Map an IPv4 adress to the internal adress format.
*/
void
map_ipv4_to_ip (ip4_address *ipv4, ip_address *ip)
{
#ifdef ENABLE_IPV6
static unsigned char ipv64[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff};
memcpy ((char *)ip + 12, ipv4 , 4);
memcpy ((char *)ip + 0, ipv64, 12);
#else
if ((char *)ip != (char *)ipv4)
memcpy (ip, ipv4, 4);
#endif
}
/* Detect whether an IP adress represents an IPv4 address and, if so,
copy it to IPV4. 0 is returned on failure.
This operation always succeeds when Wget is compiled without IPv6.
If IPV4 is NULL, don't copy, just detect. */
int
map_ip_to_ipv4 (ip_address *ip, ip4_address *ipv4)
{
#ifdef ENABLE_IPV6
static unsigned char ipv64[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff};
if (0 != memcmp (ip, ipv64, 12))
return 0;
if (ipv4)
memcpy (ipv4, (char *)ip + 12, 4);
#else
if (ipv4)
memcpy (ipv4, (char *)ip, 4);
#endif
return 1;
}
/* Versions of gethostbyname and getaddrinfo that support timeout. */
#ifndef ENABLE_IPV6
struct ghbnwt_context {
const char *host_name;
struct hostent *hptr;
};
static void
gethostbyname_with_timeout_callback (void *arg)
{
struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
ctx->hptr = gethostbyname (ctx->host_name);
}
/* Just like gethostbyname, except it times out after TIMEOUT seconds.
In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
The function makes sure that when NULL is returned for reasons
other than timeout, errno is reset. */
static struct hostent *
gethostbyname_with_timeout (const char *host_name, double timeout)
{
struct ghbnwt_context ctx;
ctx.host_name = host_name;
if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
{
SET_H_ERRNO (HOST_NOT_FOUND);
errno = ETIMEDOUT;
return NULL;
}
if (!ctx.hptr)
errno = 0;
return ctx.hptr;
}
#else /* ENABLE_IPV6 */
struct gaiwt_context {
const char *node;
const char *service;
const struct addrinfo *hints;
struct addrinfo **res;
int exit_code;
};
static void
getaddrinfo_with_timeout_callback (void *arg)
{
struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
}
/* Just like getaddrinfo, except it times out after TIMEOUT seconds.
In case of timeout, the EAI_SYSTEM error code is returned and errno
is set to ETIMEDOUT. */
static int
getaddrinfo_with_timeout (const char *node, const char *service,
const struct addrinfo *hints, struct addrinfo **res,
double timeout)
{
struct gaiwt_context ctx;
ctx.node = node;
ctx.service = service;
ctx.hints = hints;
ctx.res = res;
if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
{
errno = ETIMEDOUT;
return EAI_SYSTEM;
}
return ctx.exit_code;
}
#endif /* ENABLE_IPV6 */
/* Pretty-print ADDR. When compiled without IPv6, this is the same as
inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
address. */
char *
pretty_print_address (ip_address *addr)
{
#ifdef ENABLE_IPV6
ip4_address addr4;
static char buf[128];
if (map_ip_to_ipv4 (addr, &addr4))
return inet_ntoa (*(struct in_addr *)&addr4);
if (!inet_ntop (AF_INET6, addr, buf, sizeof (buf)))
return "<unknown>";
return buf;
#endif
return inet_ntoa (*(struct in_addr *)addr);
}
/* Add host name HOST with the address ADDR_TEXT to the cache.
ADDR_LIST is a NULL-terminated list of addresses, as in struct
hostent. */
static void
cache_host_lookup (const char *host, struct address_list *al)
{
if (!host_name_addresses_map)
host_name_addresses_map = make_nocase_string_hash_table (0);
++al->refcount;
hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
#ifdef ENABLE_DEBUG
if (opt.debug)
{
int i;
debug_logprintf ("Caching %s =>", host);
for (i = 0; i < al->count; i++)
debug_logprintf (" %s", pretty_print_address (al->addresses + i));
debug_logprintf ("\n");
}
#endif
}
struct address_list *
lookup_host (const char *host, int silent)
{
struct address_list *al = NULL;
uint32_t addr_ipv4;
ip_address addr;
/* First, try to check whether the address is already a numeric
address. */
#ifdef ENABLE_IPV6
if (inet_pton (AF_INET6, host, &addr) > 0)
return address_list_from_single (&addr);
#endif
addr_ipv4 = (uint32_t)inet_addr (host);
if (addr_ipv4 != (uint32_t)-1)
{
/* ADDR is defined to be in network byte order, which is what
this returns, so we can just copy it to STORE_IP. */
map_ipv4_to_ip ((ip4_address *)&addr_ipv4, &addr);
return address_list_from_single (&addr);
}
if (host_name_addresses_map)
{
al = hash_table_get (host_name_addresses_map, host);
if (al)
{
DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
++al->refcount;
return al;
}
}
if (!silent)
logprintf (LOG_VERBOSE, _("Resolving %s... "), host);
/* Host name lookup goes on below. */
#ifdef HAVE_GETADDRINFO
{
struct addrinfo hints, *ai;
int err;
memset (&hints, 0, sizeof (hints));
if (ip_default_family == AF_INET)
hints.ai_family = AF_INET;
else
hints.ai_family = PF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
err = getaddrinfo_with_timeout (host, NULL, &hints, &ai, opt.dns_timeout);
if (err != 0 || ai == NULL)
{
if (!silent)
logprintf (LOG_VERBOSE, _("failed: %s.\n"),
err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
return NULL;
}
al = address_list_from_addrinfo (ai);
freeaddrinfo (ai);
}
#else
{
struct hostent *hptr;
hptr = gethostbyname_with_timeout (host, opt.dns_timeout);
if (!hptr)
{
if (!silent)
{
if (errno != ETIMEDOUT)
logprintf (LOG_VERBOSE, _("failed: %s.\n"), herrmsg (h_errno));
else
logputs (LOG_VERBOSE, _("failed: timed out.\n"));
}
return NULL;
}
/* Do all systems have h_addr_list, or is it a newer thing? If
the latter, use address_list_from_single. */
al = address_list_from_vector (hptr->h_addr_list);
}
#endif
/* Print the addresses determined by DNS lookup, but no more than
three. */
if (!silent)
{
int i;
int printmax = al->count <= 3 ? al->count : 3;
for (i = 0; i < printmax; i++)
{
logprintf (LOG_VERBOSE, "%s",
pretty_print_address (al->addresses + i));
if (i < printmax - 1)
logputs (LOG_VERBOSE, ", ");
}
if (printmax != al->count)
logputs (LOG_VERBOSE, ", ...");
logputs (LOG_VERBOSE, "\n");
}
/* Cache the lookup information. */
if (opt.dns_cache)
cache_host_lookup (host, al);
return al;
}
/* Determine whether a URL is acceptable to be followed, according to
a list of domains to accept. */
int
accept_domain (struct url *u)
{
assert (u->host != NULL);
if (opt.domains)
{
if (!sufmatch ((const char **)opt.domains, u->host))
return 0;
}
if (opt.exclude_domains)
{
if (sufmatch ((const char **)opt.exclude_domains, u->host))
return 0;
}
return 1;
}
/* Check whether WHAT is matched in LIST, each element of LIST being a
pattern to match WHAT against, using backward matching (see
match_backwards() in utils.c).
If an element of LIST matched, 1 is returned, 0 otherwise. */
int
sufmatch (const char **list, const char *what)
{
int i, j, k, lw;
lw = strlen (what);
for (i = 0; list[i]; i++)
{
for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
break;
/* The domain must be first to reach to beginning. */
if (j == -1)
return 1;
}
return 0;
}
/* Print error messages for host errors. */
char *
herrmsg (int error)
{
/* Can't use switch since some constants are equal (at least on my
system), and the compiler signals "duplicate case value". */
if (error == HOST_NOT_FOUND
|| error == NO_RECOVERY
|| error == NO_DATA
|| error == NO_ADDRESS
|| error == TRY_AGAIN)
return _("Host not found");
else
return _("Unknown error");
}
static int
host_cleanup_mapper (void *key, void *value, void *arg_ignored)
{
struct address_list *al;
xfree (key); /* host */
al = (struct address_list *)value;
assert (al->refcount == 1);
address_list_delete (al);
return 0;
}
void
host_cleanup (void)
{
if (host_name_addresses_map)
{
hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
hash_table_destroy (host_name_addresses_map);
host_name_addresses_map = NULL;
}
}
--- NEW FILE: convert.h ---
/* Declarations for convert.c
Copyright (C) 2003 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef CONVERT_H
#define CONVERT_H
enum convert_options {
CO_NOCONVERT = 0, /* don't convert this URL */
CO_CONVERT_TO_RELATIVE, /* convert to relative, e.g. to
"../../otherdir/foo.gif" */
CO_CONVERT_TO_COMPLETE, /* convert to absolute, e.g. to
"http://orighost/somedir/bar.jpg". */
CO_NULLIFY_BASE /* change to empty string. */
};
struct url;
/* A structure that defines the whereabouts of a URL, i.e. its
position in an HTML document, etc. */
struct urlpos {
struct url *url; /* the URL of the link, after it has
been merged with the base */
char *local_name; /* local file to which it was saved
(used by convert_links) */
/* reserved for special links such as <base href="..."> which are
used when converting links, but ignored when downloading. */
unsigned int ignore_when_downloading :1;
/* Information about the original link: */
unsigned int link_relative_p :1; /* the link was relative */
unsigned int link_complete_p :1; /* the link was complete (had host name) */
unsigned int link_base_p :1; /* the url came from <base href=...> */
unsigned int link_inline_p :1; /* needed to render the page */
unsigned int link_expect_html :1; /* expected to contain HTML */
unsigned int link_refresh_p :1; /* link was received from
<meta http-equiv=refresh content=...> */
int refresh_timeout; /* for reconstructing the refresh. */
/* Conversion requirements: */
enum convert_options convert; /* is conversion required? */
/* URL's position in the buffer. */
int pos, size;
struct urlpos *next; /* next list element */
};
/* downloaded_file() takes a parameter of this type and returns this type. */
typedef enum
{
/* Return enumerators: */
FILE_NOT_ALREADY_DOWNLOADED = 0,
/* Return / parameter enumerators: */
FILE_DOWNLOADED_NORMALLY,
FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED,
/* Parameter enumerators: */
CHECK_FOR_FILE
} downloaded_file_t;
downloaded_file_t downloaded_file PARAMS ((downloaded_file_t, const char *));
void register_download PARAMS ((const char *, const char *));
void register_redirection PARAMS ((const char *, const char *));
void register_html PARAMS ((const char *, const char *));
void register_delete_file PARAMS ((const char *));
void convert_all_links PARAMS ((void));
#endif /* CONVERT_H */
--- NEW FILE: snprintf.c ---
/* This file is NOT part of Wget, but is used by Wget on the systems
where vsnprintf() is not defined. It has been written by Patrick
Powell and modified by other people. All the copyright and other
notices have been left intact.
My changes are documented at the bottom, along with other changes.
I hereby place my modifications to this file under the public
domain. */
/*
* Copyright Patrick Powell 1995
* This code is based on code written by Patrick Powell (papowell at astart.com)
* It may be used for any purpose as long as this notice remains intact
* on all source code distributions
*/
/**************************************************************
* Original:
* Patrick Powell Tue Apr 11 09:48:21 PDT 1995
* A bombproof version of doprnt (dopr) included.
* Sigh. This sort of thing is always nasty do deal with. Note that
* the version here does not include floating point...
*
* snprintf() is used instead of sprintf() as it does limit checks
* for string length. This covers a nasty loophole.
*
* The other functions are there to prevent NULL pointers from
* causing nast effects.
*
* More Recently:
* Brandon Long <blong at fiction.net> 9/15/96 for mutt 0.43
* This was ugly. It is still ugly. I opted out of floating point
* numbers, but the formatter understands just about everything
* from the normal C string format, at least as far as I can tell from
* the Solaris 2.5 printf(3S) man page.
*
* Brandon Long <blong at fiction.net> 10/22/97 for mutt 0.87.1
* Ok, added some minimal floating point support, which means this
* probably requires libm on most operating systems. Don't yet
* support the exponent (e,E) and sigfig (g,G). Also, fmtint()
* was pretty badly broken, it just wasn't being exercised in ways
* which showed it, so that's been fixed. Also, formated the code
* to mutt conventions, and removed dead code left over from the
* original. Also, there is now a builtin-test, just compile with:
* gcc -DTEST_SNPRINTF -o snprintf snprintf.c -lm
* and run snprintf for results.
*
* Thomas Roessler <roessler at guug.de> 01/27/98 for mutt 0.89i
* The PGP code was using unsigned hexadecimal formats.
* Unfortunately, unsigned formats simply didn't work.
*
* Michael Elkins <me at cs.hmc.edu> 03/05/98 for mutt 0.90.8
* The original code assumed that both snprintf() and vsnprintf() were
* missing. Some systems only have snprintf() but not vsnprintf(), so
* the code is now broken down under HAVE_SNPRINTF and HAVE_VSNPRINTF.
*
* Andrew Tridgell (tridge at samba.org) Oct 1998
* fixed handling of %.0f
* added test for HAVE_LONG_DOUBLE
*
* Russ Allbery <rra at stanford.edu> 2000-08-26
* fixed return value to comply with C99
* fixed handling of snprintf(NULL, ...)
*
* Hrvoje Niksic <hniksic at xemacs.org> 2000-11-04
* include <config.h> instead of "config.h".
* moved TEST_SNPRINTF stuff out of HAVE_SNPRINTF ifdef.
* include <stdio.h> for NULL.
* added support and test cases for long long.
* don't declare argument types to (v)snprintf if stdarg is not used.
* use int instead of short int as 2nd arg to va_arg.
*
**************************************************************/
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#if !defined(HAVE_SNPRINTF) || !defined(HAVE_VSNPRINTF)
#include <string.h>
#include <sys/types.h>
#include <stdio.h> /* for NULL */
#include <safe-ctype.h>
/* varargs declarations: */
#if defined(HAVE_STDARG_H)
# include <stdarg.h>
# define HAVE_STDARGS /* let's hope that works everywhere (mj) */
# define VA_LOCAL_DECL va_list ap
# define VA_START(f) va_start(ap, f)
# define VA_SHIFT(v,t) ; /* no-op for ANSI */
# define VA_END va_end(ap)
#else
# include <varargs.h>
# undef HAVE_STDARGS
# define VA_LOCAL_DECL va_list ap
# define VA_START(f) va_start(ap) /* f is ignored! */
# define VA_SHIFT(v,t) v = va_arg(ap,t)
# define VA_END va_end(ap)
#endif
#ifdef HAVE_LONG_DOUBLE
#define LDOUBLE long double
#else
#define LDOUBLE double
#endif
#if SIZEOF_LONG_LONG != 0
# define LLONG long long
#else
# define LLONG long
#endif
#ifdef HAVE_STDARGS
int snprintf (char *str, size_t count, const char *fmt, ...);
int vsnprintf (char *str, size_t count, const char *fmt, va_list arg);
#else
int snprintf ();
int vsnprintf ();
#endif
static int dopr (char *buffer, size_t maxlen, const char *format,
va_list args);
static int fmtstr (char *buffer, size_t *currlen, size_t maxlen,
char *value, int flags, int min, int max);
static int fmtint (char *buffer, size_t *currlen, size_t maxlen,
LLONG value, int base, int min, int max, int flags);
static int fmtfp (char *buffer, size_t *currlen, size_t maxlen,
LDOUBLE fvalue, int min, int max, int flags);
static int dopr_outch (char *buffer, size_t *currlen, size_t maxlen, char c );
/*
* dopr(): poor man's version of doprintf
*/
/* format read states */
#define DP_S_DEFAULT 0
#define DP_S_FLAGS 1
#define DP_S_MIN 2
#define DP_S_DOT 3
#define DP_S_MAX 4
#define DP_S_MOD 5
#define DP_S_MOD_L 6
#define DP_S_CONV 7
#define DP_S_DONE 8
/* format flags - Bits */
#define DP_F_MINUS (1 << 0)
#define DP_F_PLUS (1 << 1)
#define DP_F_SPACE (1 << 2)
#define DP_F_NUM (1 << 3)
#define DP_F_ZERO (1 << 4)
#define DP_F_UP (1 << 5)
#define DP_F_UNSIGNED (1 << 6)
/* Conversion Flags */
#define DP_C_SHORT 1
#define DP_C_LONG 2
#define DP_C_LLONG 3
#define DP_C_LDOUBLE 4
#define char_to_int(p) (p - '0')
#define MAX(p,q) ((p >= q) ? p : q)
#define MIN(p,q) ((p <= q) ? p : q)
static int dopr (char *buffer, size_t maxlen, const char *format, va_list args)
{
char ch;
LLONG value;
LDOUBLE fvalue;
char *strvalue;
int min;
int max;
int state;
int flags;
int cflags;
int total;
size_t currlen;
state = DP_S_DEFAULT;
currlen = flags = cflags = min = 0;
max = -1;
ch = *format++;
total = 0;
while (state != DP_S_DONE)
{
if (ch == '\0')
state = DP_S_DONE;
switch(state)
{
case DP_S_DEFAULT:
if (ch == '%')
state = DP_S_FLAGS;
else
total += dopr_outch (buffer, &currlen, maxlen, ch);
ch = *format++;
break;
case DP_S_FLAGS:
switch (ch)
{
case '-':
flags |= DP_F_MINUS;
ch = *format++;
break;
case '+':
flags |= DP_F_PLUS;
ch = *format++;
break;
case ' ':
flags |= DP_F_SPACE;
ch = *format++;
break;
case '#':
flags |= DP_F_NUM;
ch = *format++;
break;
case '0':
flags |= DP_F_ZERO;
ch = *format++;
break;
default:
state = DP_S_MIN;
break;
}
break;
case DP_S_MIN:
if ('0' <= ch && ch <= '9')
{
min = 10*min + char_to_int (ch);
ch = *format++;
}
else if (ch == '*')
{
min = va_arg (args, int);
ch = *format++;
state = DP_S_DOT;
}
else
state = DP_S_DOT;
break;
case DP_S_DOT:
if (ch == '.')
{
state = DP_S_MAX;
ch = *format++;
}
else
state = DP_S_MOD;
break;
case DP_S_MAX:
if ('0' <= ch && ch <= '9')
{
if (max < 0)
max = 0;
max = 10*max + char_to_int (ch);
ch = *format++;
}
else if (ch == '*')
{
max = va_arg (args, int);
ch = *format++;
state = DP_S_MOD;
}
else
state = DP_S_MOD;
break;
case DP_S_MOD:
switch (ch)
{
case 'h':
cflags = DP_C_SHORT;
ch = *format++;
break;
case 'l':
cflags = DP_C_LONG;
ch = *format++;
break;
case 'L':
cflags = DP_C_LDOUBLE;
ch = *format++;
break;
default:
break;
}
if (cflags != DP_C_LONG)
state = DP_S_CONV;
else
state = DP_S_MOD_L;
break;
case DP_S_MOD_L:
switch (ch)
{
case 'l':
cflags = DP_C_LLONG;
ch = *format++;
break;
default:
break;
}
state = DP_S_CONV;
break;
case DP_S_CONV:
switch (ch)
{
case 'd':
case 'i':
if (cflags == DP_C_SHORT)
value = (short int)va_arg (args, int);
else if (cflags == DP_C_LONG)
value = va_arg (args, long int);
else if (cflags == DP_C_LLONG)
value = va_arg (args, LLONG);
else
value = va_arg (args, int);
total += fmtint (buffer, &currlen, maxlen, value, 10, min, max, flags);
break;
case 'o':
flags |= DP_F_UNSIGNED;
if (cflags == DP_C_SHORT)
value = (unsigned short int)va_arg (args, unsigned int);
else if (cflags == DP_C_LONG)
value = va_arg (args, unsigned long int);
else if (cflags == DP_C_LLONG)
value = va_arg (args, unsigned LLONG);
else
value = va_arg (args, unsigned int);
total += fmtint (buffer, &currlen, maxlen, value, 8, min, max, flags);
break;
case 'u':
flags |= DP_F_UNSIGNED;
if (cflags == DP_C_SHORT)
value = (unsigned short int)va_arg (args, unsigned int);
else if (cflags == DP_C_LONG)
value = va_arg (args, unsigned long int);
else if (cflags == DP_C_LLONG)
value = va_arg (args, unsigned LLONG);
else
value = va_arg (args, unsigned int);
total += fmtint (buffer, &currlen, maxlen, value, 10, min, max, flags);
break;
case 'X':
flags |= DP_F_UP;
case 'x':
flags |= DP_F_UNSIGNED;
if (cflags == DP_C_SHORT)
value = (unsigned short int)va_arg (args, unsigned int);
else if (cflags == DP_C_LONG)
value = va_arg (args, unsigned long int);
else if (cflags == DP_C_LLONG)
value = va_arg (args, unsigned LLONG);
else
value = va_arg (args, unsigned int);
total += fmtint (buffer, &currlen, maxlen, value, 16, min, max, flags);
break;
case 'f':
if (cflags == DP_C_LDOUBLE)
fvalue = va_arg (args, LDOUBLE);
else
fvalue = va_arg (args, double);
/* um, floating point? */
total += fmtfp (buffer, &currlen, maxlen, fvalue, min, max, flags);
break;
case 'E':
flags |= DP_F_UP;
case 'e':
if (cflags == DP_C_LDOUBLE)
fvalue = va_arg (args, LDOUBLE);
else
fvalue = va_arg (args, double);
break;
case 'G':
flags |= DP_F_UP;
case 'g':
if (cflags == DP_C_LDOUBLE)
fvalue = va_arg (args, LDOUBLE);
else
fvalue = va_arg (args, double);
break;
case 'c':
total += dopr_outch (buffer, &currlen, maxlen, va_arg (args, int));
break;
case 's':
strvalue = va_arg (args, char *);
total += fmtstr (buffer, &currlen, maxlen, strvalue, flags, min, max);
break;
case 'p':
strvalue = va_arg (args, void *);
total += fmtint (buffer, &currlen, maxlen, (long) strvalue, 16, min,
max, flags);
break;
case 'n':
if (cflags == DP_C_SHORT)
{
short int *num;
num = va_arg (args, short int *);
*num = currlen;
}
else if (cflags == DP_C_LONG)
{
long int *num;
num = va_arg (args, long int *);
*num = currlen;
}
else if (cflags == DP_C_LLONG)
{
LLONG *num;
num = va_arg (args, LLONG *);
*num = currlen;
}
else
{
int *num;
num = va_arg (args, int *);
*num = currlen;
}
break;
case '%':
total += dopr_outch (buffer, &currlen, maxlen, ch);
break;
case 'w':
/* not supported yet, treat as next char */
ch = *format++;
break;
default:
/* Unknown, skip */
break;
}
ch = *format++;
state = DP_S_DEFAULT;
flags = cflags = min = 0;
max = -1;
break;
case DP_S_DONE:
break;
default:
/* hmm? */
break; /* some picky compilers need this */
}
}
if (buffer != NULL)
{
if (currlen < maxlen - 1)
buffer[currlen] = '\0';
else
buffer[maxlen - 1] = '\0';
}
return total;
}
static int fmtstr (char *buffer, size_t *currlen, size_t maxlen,
char *value, int flags, int min, int max)
{
int padlen, strln; /* amount to pad */
int cnt = 0;
int total = 0;
if (value == 0)
{
value = "<NULL>";
}
for (strln = 0; value[strln]; ++strln); /* strlen */
if (max >= 0 && max < strln)
strln = max;
padlen = min - strln;
if (padlen < 0)
padlen = 0;
if (flags & DP_F_MINUS)
padlen = -padlen; /* Left Justify */
while (padlen > 0)
{
total += dopr_outch (buffer, currlen, maxlen, ' ');
--padlen;
}
while (*value && ((max < 0) || (cnt < max)))
{
total += dopr_outch (buffer, currlen, maxlen, *value++);
++cnt;
}
while (padlen < 0)
{
total += dopr_outch (buffer, currlen, maxlen, ' ');
++padlen;
}
return total;
}
/* Have to handle DP_F_NUM (ie 0x and 0 alternates) */
static int fmtint (char *buffer, size_t *currlen, size_t maxlen,
LLONG value, int base, int min, int max, int flags)
{
int signvalue = 0;
unsigned LLONG uvalue;
char convert[24];
int place = 0;
int spadlen = 0; /* amount to space pad */
int zpadlen = 0; /* amount to zero pad */
const char *digits;
int total = 0;
if (max < 0)
max = 0;
uvalue = value;
if(!(flags & DP_F_UNSIGNED))
{
if( value < 0 ) {
signvalue = '-';
uvalue = -value;
}
else
if (flags & DP_F_PLUS) /* Do a sign (+/i) */
signvalue = '+';
else
if (flags & DP_F_SPACE)
signvalue = ' ';
}
if (flags & DP_F_UP)
/* Should characters be upper case? */
digits = "0123456789ABCDEF";
else
digits = "0123456789abcdef";
do {
convert[place++] = digits[uvalue % (unsigned)base];
uvalue = (uvalue / (unsigned)base );
} while(uvalue && (place < sizeof (convert)));
if (place == sizeof (convert)) place--;
convert[place] = 0;
zpadlen = max - place;
spadlen = min - MAX (max, place) - (signvalue ? 1 : 0);
if (zpadlen < 0) zpadlen = 0;
if (spadlen < 0) spadlen = 0;
if (flags & DP_F_ZERO)
{
zpadlen = MAX(zpadlen, spadlen);
spadlen = 0;
}
if (flags & DP_F_MINUS)
spadlen = -spadlen; /* Left Justifty */
#ifdef DEBUG_SNPRINTF
dprint (1, (debugfile, "zpad: %d, spad: %d, min: %d, max: %d, place: %d\n",
zpadlen, spadlen, min, max, place));
#endif
/* Spaces */
while (spadlen > 0)
{
total += dopr_outch (buffer, currlen, maxlen, ' ');
--spadlen;
}
/* Sign */
if (signvalue)
total += dopr_outch (buffer, currlen, maxlen, signvalue);
/* Zeros */
if (zpadlen > 0)
{
while (zpadlen > 0)
{
total += dopr_outch (buffer, currlen, maxlen, '0');
--zpadlen;
}
}
/* Digits */
while (place > 0)
total += dopr_outch (buffer, currlen, maxlen, convert[--place]);
/* Left Justified spaces */
while (spadlen < 0) {
total += dopr_outch (buffer, currlen, maxlen, ' ');
++spadlen;
}
return total;
}
static LDOUBLE abs_val (LDOUBLE value)
{
LDOUBLE result = value;
if (value < 0)
result = -value;
return result;
}
static LDOUBLE pow10 (int exp)
{
LDOUBLE result = 1;
while (exp)
{
result *= 10;
exp--;
}
return result;
}
static long round (LDOUBLE value)
{
long intpart;
intpart = value;
value = value - intpart;
if (value >= 0.5)
intpart++;
return intpart;
}
static int fmtfp (char *buffer, size_t *currlen, size_t maxlen,
LDOUBLE fvalue, int min, int max, int flags)
{
int signvalue = 0;
LDOUBLE ufvalue;
char iconvert[20];
char fconvert[20];
int iplace = 0;
int fplace = 0;
int padlen = 0; /* amount to pad */
int zpadlen = 0;
int caps = 0;
int total = 0;
long intpart;
long fracpart;
/*
* AIX manpage says the default is 0, but Solaris says the default
* is 6, and sprintf on AIX defaults to 6
*/
if (max < 0)
max = 6;
ufvalue = abs_val (fvalue);
if (fvalue < 0)
signvalue = '-';
else
if (flags & DP_F_PLUS) /* Do a sign (+/i) */
signvalue = '+';
else
if (flags & DP_F_SPACE)
signvalue = ' ';
#if 0
if (flags & DP_F_UP) caps = 1; /* Should characters be upper case? */
#endif
intpart = ufvalue;
/*
* Sorry, we only support 9 digits past the decimal because of our
* conversion method
*/
if (max > 9)
max = 9;
/* We "cheat" by converting the fractional part to integer by
* multiplying by a factor of 10
*/
fracpart = round ((pow10 (max)) * (ufvalue - intpart));
if (fracpart >= pow10 (max))
{
intpart++;
fracpart -= pow10 (max);
}
#ifdef DEBUG_SNPRINTF
dprint (1, (debugfile, "fmtfp: %f =? %d.%d\n", fvalue, intpart, fracpart));
#endif
/* Convert integer part */
do {
iconvert[iplace++] =
(caps? "0123456789ABCDEF":"0123456789abcdef")[intpart % 10];
intpart = (intpart / 10);
} while(intpart && (iplace < 20));
if (iplace == 20) iplace--;
iconvert[iplace] = 0;
/* Convert fractional part */
do {
fconvert[fplace++] =
(caps? "0123456789ABCDEF":"0123456789abcdef")[fracpart % 10];
fracpart = (fracpart / 10);
} while(fracpart && (fplace < 20));
if (fplace == 20) fplace--;
fconvert[fplace] = 0;
/* -1 for decimal point, another -1 if we are printing a sign */
padlen = min - iplace - max - 1 - ((signvalue) ? 1 : 0);
zpadlen = max - fplace;
if (zpadlen < 0)
zpadlen = 0;
if (padlen < 0)
padlen = 0;
if (flags & DP_F_MINUS)
padlen = -padlen; /* Left Justifty */
if ((flags & DP_F_ZERO) && (padlen > 0))
{
if (signvalue)
{
total += dopr_outch (buffer, currlen, maxlen, signvalue);
--padlen;
signvalue = 0;
}
while (padlen > 0)
{
total += dopr_outch (buffer, currlen, maxlen, '0');
--padlen;
}
}
while (padlen > 0)
{
total += dopr_outch (buffer, currlen, maxlen, ' ');
--padlen;
}
if (signvalue)
total += dopr_outch (buffer, currlen, maxlen, signvalue);
while (iplace > 0)
total += dopr_outch (buffer, currlen, maxlen, iconvert[--iplace]);
/*
* Decimal point. This should probably use locale to find the correct
* char to print out.
*/
if (max > 0)
{
total += dopr_outch (buffer, currlen, maxlen, '.');
while (fplace > 0)
total += dopr_outch (buffer, currlen, maxlen, fconvert[--fplace]);
}
while (zpadlen > 0)
{
total += dopr_outch (buffer, currlen, maxlen, '0');
--zpadlen;
}
while (padlen < 0)
{
total += dopr_outch (buffer, currlen, maxlen, ' ');
++padlen;
}
return total;
}
static int dopr_outch (char *buffer, size_t *currlen, size_t maxlen, char c)
{
if (*currlen + 1 < maxlen)
buffer[(*currlen)++] = c;
return 1;
}
#ifndef HAVE_VSNPRINTF
int vsnprintf (char *str, size_t count, const char *fmt, va_list args)
{
if (str != NULL)
str[0] = 0;
return dopr(str, count, fmt, args);
}
#endif /* !HAVE_VSNPRINTF */
#ifndef HAVE_SNPRINTF
/* VARARGS3 */
#ifdef HAVE_STDARGS
int snprintf (char *str,size_t count,const char *fmt,...)
#else
int snprintf (va_alist) va_dcl
#endif
{
#ifndef HAVE_STDARGS
char *str;
size_t count;
char *fmt;
#endif
VA_LOCAL_DECL;
int total;
VA_START (fmt);
VA_SHIFT (str, char *);
VA_SHIFT (count, size_t );
VA_SHIFT (fmt, char *);
total = vsnprintf(str, count, fmt, ap);
VA_END;
return total;
}
#endif /* !HAVE_SNPRINTF */
#endif /* !HAVE_SNPRINTF || !HAVE_VSNPRINTF */
#ifdef TEST_SNPRINTF
#include <stdio.h>
#ifndef LONG_STRING
#define LONG_STRING 1024
#endif
int main (void)
{
char buf1[LONG_STRING];
char buf2[LONG_STRING];
char *fp_fmt[] = {
"%-1.5f",
"%1.5f",
"%123.9f",
"%10.5f",
"% 10.5f",
"%+22.9f",
"%+4.9f",
"%01.3f",
"%4f",
"%3.1f",
"%3.2f",
"%.0f",
"%.1f",
NULL
};
double fp_nums[] = { -1.5, 134.21, 91340.2, 341.1234, 0203.9, 0.96, 0.996,
0.9996, 1.996, 4.136, 0};
char *int_fmt[] = {
"%-1.5d",
"%1.5d",
"%123.9d",
"%5.5d",
"%10.5d",
"% 10.5d",
"%+22.33d",
"%01.3d",
"%4d",
NULL
};
long int_nums[] = { -1, 134, 91340, 341, 0203, 0};
#if SIZEOF_LONG_LONG != 0
char *llong_fmt[] = {
"%lld", "%llu",
"%-1.5lld", "%-1.5llu",
"%1.5lld", "%1.5llu",
"%123.9lld", "%123.9llu",
"%5.5lld", "%5.5llu",
"%10.5lld", "%10.5llu",
"% 10.5lld", "% 10.5llu",
"%+22.33lld", "%+22.33llu",
"%01.3lld", "%01.3llu",
"%4lld", "%4llu",
NULL
};
long long llong_nums[] = {
~(long long)0, /* all-1 bit pattern */
(~(unsigned long long)0) >> 1, /* largest signed long long */
/* random... */
-150, 134, 91340, 341,
0
};
#endif
int x, y;
int fail = 0;
int num = 0;
printf ("Testing snprintf format codes against system sprintf...\n");
for (x = 0; fp_fmt[x] != NULL ; x++)
for (y = 0; fp_nums[y] != 0 ; y++)
{
snprintf (buf1, sizeof (buf1), fp_fmt[x], fp_nums[y]);
sprintf (buf2, fp_fmt[x], fp_nums[y]);
if (strcmp (buf1, buf2))
{
printf("snprintf doesn't match Format: %s\n\tsnprintf = %s\n\tsprintf = %s\n",
fp_fmt[x], buf1, buf2);
fail++;
}
num++;
}
for (x = 0; int_fmt[x] != NULL ; x++)
for (y = 0; int_nums[y] != 0 ; y++)
{
snprintf (buf1, sizeof (buf1), int_fmt[x], int_nums[y]);
sprintf (buf2, int_fmt[x], int_nums[y]);
if (strcmp (buf1, buf2))
{
printf("snprintf doesn't match Format: %s\n\tsnprintf = %s\n\tsprintf = %s\n",
int_fmt[x], buf1, buf2);
fail++;
}
num++;
}
#if SIZEOF_LONG_LONG != 0
for (x = 0; llong_fmt[x] != NULL ; x++)
for (y = 0; llong_nums[y] != 0 ; y++)
{
snprintf (buf1, sizeof (buf1), llong_fmt[x], llong_nums[y]);
sprintf (buf2, llong_fmt[x], llong_nums[y]);
if (strcmp (buf1, buf2))
{
printf("snprintf doesn't match Format: %s\n\tsnprintf = %s\n\tsprintf = %s\n",
llong_fmt[x], buf1, buf2);
fail++;
}
num++;
}
#endif
printf ("%d tests failed out of %d.\n", fail, num);
}
#endif /* SNPRINTF_TEST */
--- NEW FILE: utils.h ---
/* Declarations for utils.c.
Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef UTILS_H
#define UTILS_H
enum accd {
ALLABS = 1
};
/* A linked list of strings. The list is ordered alphabetically. */
typedef struct _slist
{
char *string;
struct _slist *next;
} slist;
struct hash_table;
struct file_memory {
char *content;
long length;
int mmap_p;
};
struct wget_timer;
char *time_str PARAMS ((time_t *));
char *datetime_str PARAMS ((time_t *));
#ifdef DEBUG_MALLOC
void print_malloc_debug_stats ();
#endif
char *xstrdup_lower PARAMS ((const char *));
int count_char PARAMS ((const char *, char));
char *strdupdelim PARAMS ((const char *, const char *));
char **sepstring PARAMS ((const char *));
int frontcmp PARAMS ((const char *, const char *));
char *pwd_cuserid PARAMS ((char *));
void fork_to_background PARAMS ((void));
void touch PARAMS ((const char *, time_t));
int remove_link PARAMS ((const char *));
int file_exists_p PARAMS ((const char *));
int file_non_directory_p PARAMS ((const char *));
long file_size PARAMS ((const char *));
int make_directory PARAMS ((const char *));
char *unique_name PARAMS ((const char *, int));
char *file_merge PARAMS ((const char *, const char *));
int acceptable PARAMS ((const char *));
int accdir PARAMS ((const char *s, enum accd));
char *suffix PARAMS ((const char *s));
int match_tail PARAMS ((const char *, const char *, int));
int has_wildcards_p PARAMS ((const char *));
int has_html_suffix_p PARAMS ((const char *));
char *read_whole_line PARAMS ((FILE *));
struct file_memory *read_file PARAMS ((const char *));
void read_file_free PARAMS ((struct file_memory *));
void free_vec PARAMS ((char **));
char **merge_vecs PARAMS ((char **, char **));
slist *slist_append PARAMS ((slist *, const char *));
slist *slist_prepend PARAMS ((slist *, const char *));
slist *slist_nreverse PARAMS ((slist *));
int slist_contains PARAMS ((slist *, const char *));
void slist_free PARAMS ((slist *));
void string_set_add PARAMS ((struct hash_table *, const char *));
int string_set_contains PARAMS ((struct hash_table *, const char *));
void string_set_free PARAMS ((struct hash_table *));
void free_keys_and_values PARAMS ((struct hash_table *));
char *legible PARAMS ((long));
char *legible_large_int PARAMS ((LARGE_INT));
int numdigit PARAMS ((long));
char *number_to_string PARAMS ((char *, long));
struct wget_timer *wtimer_allocate PARAMS ((void));
struct wget_timer *wtimer_new PARAMS ((void));
void wtimer_delete PARAMS ((struct wget_timer *));
void wtimer_reset PARAMS ((struct wget_timer *));
double wtimer_elapsed PARAMS ((struct wget_timer *));
double wtimer_granularity PARAMS ((void));
char *html_quote_string PARAMS ((const char *));
int determine_screen_width PARAMS ((void));
int random_number PARAMS ((int));
double random_float PARAMS ((void));
int run_with_timeout PARAMS ((double, void (*) (void *), void *));
#endif /* UTILS_H */
--- NEW FILE: ChangeLog ---
2003-10-21 Hrvoje Niksic <hniksic at xemacs.org>
* version.c: Wget 1.9.1 is released.
2003-11-08 Gisle Vanem <giva at bgnett.no>
* ftp-basic.c: Support Windows-2000 ftp servers. Win-2000 *is*
Win-NT 5.0 so calling it ST_WINNT is okay I guess.
2003-11-04 Hrvoje Niksic <hniksic at xemacs.org>
* sysdep.h: Define SYSTEM_FNMATCH only if HAVE_FNMATCH_H is true.
2003-11-01 Hrvoje Niksic <hniksic at xemacs.org>
* connect.c (acceptport): Don't call select_fd when timeout is not
requested.
2003-10-30 Hrvoje Niksic <hniksic at xemacs.org>
[...7374 lines suppressed...]
* connect.c (iread): Added EINTR check loop to select-ing
too. EINTR is now correctly handled with select().
* TODO: new file
1996-05-07 Hrvoje Niksic <hniksic at srce.hr>
* host.c (same_host): Made the function a little bit more
intelligent regarding diversified URL syntaxes.
* url.c (skip_url): Spaces are now skipped after URL:
* Released 1.3.1 with the patch to prevent crashing when sending
NULL to robot* functions and the patch to compile "out of the box"
on AIX.
* recr.c (recursive_retrieve): Added checking whether this_url is
NULL when calling the robot functions.
* ChangeLog: New file.
--- NEW FILE: convert.c ---
/* Conversion of links to local files.
Copyright (C) 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif /* HAVE_STRING_H */
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include <errno.h>
#include <assert.h>
#include <sys/types.h>
#include "wget.h"
#include "convert.h"
#include "url.h"
#include "recur.h"
#include "utils.h"
#include "hash.h"
static struct hash_table *dl_file_url_map;
struct hash_table *dl_url_file_map;
/* List of HTML files downloaded in this Wget run, used for link
conversion after Wget is done. The list and the set contain the
same information, except the list maintains the order. Perhaps I
should get rid of the list, it's there for historical reasons. */
static slist *downloaded_html_list;
struct hash_table *downloaded_html_set;
static void convert_links PARAMS ((const char *, struct urlpos *));
/* This function is called when the retrieval is done to convert the
links that have been downloaded. It has to be called at the end of
the retrieval, because only then does Wget know conclusively which
URLs have been downloaded, and which not, so it can tell which
direction to convert to.
The "direction" means that the URLs to the files that have been
downloaded get converted to the relative URL which will point to
that file. And the other URLs get converted to the remote URL on
the server.
All the downloaded HTMLs are kept in downloaded_html_files, and
downloaded URLs in urls_downloaded. All the information is
extracted from these two lists. */
void
convert_all_links (void)
{
slist *html;
long msecs;
int file_count = 0;
struct wget_timer *timer = wtimer_new ();
/* Destructively reverse downloaded_html_files to get it in the right order.
recursive_retrieve() used slist_prepend() consistently. */
downloaded_html_list = slist_nreverse (downloaded_html_list);
for (html = downloaded_html_list; html; html = html->next)
{
struct urlpos *urls, *cur_url;
char *url;
char *file = html->string;
/* Determine the URL of the HTML file. get_urls_html will need
it. */
url = hash_table_get (dl_file_url_map, file);
if (!url)
{
DEBUGP (("Apparently %s has been removed.\n", file));
continue;
}
DEBUGP (("Scanning %s (from %s)\n", file, url));
/* Parse the HTML file... */
urls = get_urls_html (file, url, NULL);
/* We don't respect meta_disallow_follow here because, even if
the file is not followed, we might still want to convert the
links that have been followed from other files. */
for (cur_url = urls; cur_url; cur_url = cur_url->next)
{
char *local_name;
struct url *u = cur_url->url;
if (cur_url->link_base_p)
{
/* Base references have been resolved by our parser, so
we turn the base URL into an empty string. (Perhaps
we should remove the tag entirely?) */
cur_url->convert = CO_NULLIFY_BASE;
continue;
}
/* We decide the direction of conversion according to whether
a URL was downloaded. Downloaded URLs will be converted
ABS2REL, whereas non-downloaded will be converted REL2ABS. */
local_name = hash_table_get (dl_url_file_map, u->url);
/* Decide on the conversion type. */
if (local_name)
{
/* We've downloaded this URL. Convert it to relative
form. We do this even if the URL already is in
relative form, because our directory structure may
not be identical to that on the server (think `-nd',
`--cut-dirs', etc.) */
cur_url->convert = CO_CONVERT_TO_RELATIVE;
cur_url->local_name = xstrdup (local_name);
DEBUGP (("will convert url %s to local %s\n", u->url, local_name));
}
else
{
/* We haven't downloaded this URL. If it's not already
complete (including a full host name), convert it to
that form, so it can be reached while browsing this
HTML locally. */
if (!cur_url->link_complete_p)
cur_url->convert = CO_CONVERT_TO_COMPLETE;
cur_url->local_name = NULL;
DEBUGP (("will convert url %s to complete\n", u->url));
}
}
/* Convert the links in the file. */
convert_links (file, urls);
++file_count;
/* Free the data. */
free_urlpos (urls);
}
msecs = wtimer_elapsed (timer);
wtimer_delete (timer);
logprintf (LOG_VERBOSE, _("Converted %d files in %.2f seconds.\n"),
file_count, (double)msecs / 1000);
}
static void write_backup_file PARAMS ((const char *, downloaded_file_t));
static const char *replace_attr PARAMS ((const char *, int, FILE *,
const char *));
static const char *replace_attr_refresh_hack PARAMS ((const char *, int, FILE *,
const char *, int));
static char *local_quote_string PARAMS ((const char *));
static char *construct_relative PARAMS ((const char *, const char *));
/* Change the links in one HTML file. LINKS is a list of links in the
document, along with their positions and the desired direction of
the conversion. */
static void
convert_links (const char *file, struct urlpos *links)
{
struct file_memory *fm;
FILE *fp;
const char *p;
downloaded_file_t downloaded_file_return;
struct urlpos *link;
int to_url_count = 0, to_file_count = 0;
logprintf (LOG_VERBOSE, _("Converting %s... "), file);
{
/* First we do a "dry run": go through the list L and see whether
any URL needs to be converted in the first place. If not, just
leave the file alone. */
int dry_count = 0;
struct urlpos *dry = links;
for (dry = links; dry; dry = dry->next)
if (dry->convert != CO_NOCONVERT)
++dry_count;
if (!dry_count)
{
logputs (LOG_VERBOSE, _("nothing to do.\n"));
return;
}
}
fm = read_file (file);
if (!fm)
{
logprintf (LOG_NOTQUIET, _("Cannot convert links in %s: %s\n"),
file, strerror (errno));
return;
}
downloaded_file_return = downloaded_file (CHECK_FOR_FILE, file);
if (opt.backup_converted && downloaded_file_return)
write_backup_file (file, downloaded_file_return);
/* Before opening the file for writing, unlink the file. This is
important if the data in FM is mmaped. In such case, nulling the
file, which is what fopen() below does, would make us read all
zeroes from the mmaped region. */
if (unlink (file) < 0 && errno != ENOENT)
{
logprintf (LOG_NOTQUIET, _("Unable to delete `%s': %s\n"),
file, strerror (errno));
read_file_free (fm);
return;
}
/* Now open the file for writing. */
fp = fopen (file, "wb");
if (!fp)
{
logprintf (LOG_NOTQUIET, _("Cannot convert links in %s: %s\n"),
file, strerror (errno));
read_file_free (fm);
return;
}
/* Here we loop through all the URLs in file, replacing those of
them that are downloaded with relative references. */
p = fm->content;
for (link = links; link; link = link->next)
{
char *url_start = fm->content + link->pos;
if (link->pos >= fm->length)
{
DEBUGP (("Something strange is going on. Please investigate."));
break;
}
/* If the URL is not to be converted, skip it. */
if (link->convert == CO_NOCONVERT)
{
DEBUGP (("Skipping %s at position %d.\n", link->url->url, link->pos));
continue;
}
/* Echo the file contents, up to the offending URL's opening
quote, to the outfile. */
fwrite (p, 1, url_start - p, fp);
p = url_start;
switch (link->convert)
{
case CO_CONVERT_TO_RELATIVE:
/* Convert absolute URL to relative. */
{
char *newname = construct_relative (file, link->local_name);
char *quoted_newname = local_quote_string (newname);
if (!link->link_refresh_p)
p = replace_attr (p, link->size, fp, quoted_newname);
else
p = replace_attr_refresh_hack (p, link->size, fp, quoted_newname,
link->refresh_timeout);
DEBUGP (("TO_RELATIVE: %s to %s at position %d in %s.\n",
link->url->url, newname, link->pos, file));
xfree (newname);
xfree (quoted_newname);
++to_file_count;
break;
}
case CO_CONVERT_TO_COMPLETE:
/* Convert the link to absolute URL. */
{
char *newlink = link->url->url;
char *quoted_newlink = html_quote_string (newlink);
if (!link->link_refresh_p)
p = replace_attr (p, link->size, fp, quoted_newlink);
else
p = replace_attr_refresh_hack (p, link->size, fp, quoted_newlink,
link->refresh_timeout);
DEBUGP (("TO_COMPLETE: <something> to %s at position %d in %s.\n",
newlink, link->pos, file));
xfree (quoted_newlink);
++to_url_count;
break;
}
case CO_NULLIFY_BASE:
/* Change the base href to "". */
p = replace_attr (p, link->size, fp, "");
break;
case CO_NOCONVERT:
abort ();
break;
}
}
/* Output the rest of the file. */
if (p - fm->content < fm->length)
fwrite (p, 1, fm->length - (p - fm->content), fp);
fclose (fp);
read_file_free (fm);
logprintf (LOG_VERBOSE, "%d-%d\n", to_file_count, to_url_count);
}
/* Construct and return a malloced copy of the relative link from two
pieces of information: local name S1 of the referring file and
local name S2 of the referred file.
So, if S1 is "jagor.srce.hr/index.html" and S2 is
"jagor.srce.hr/images/news.gif", the function will return
"images/news.gif".
Alternately, if S1 is "fly.cc.fer.hr/ioccc/index.html", and S2 is
"fly.cc.fer.hr/images/fly.gif", the function will return
"../images/fly.gif".
Caveats: S1 should not begin with `/', unless S2 also begins with
'/'. S1 should not contain things like ".." and such --
construct_relative ("fly/ioccc/../index.html",
"fly/images/fly.gif") will fail. (A workaround is to call
something like path_simplify() on S1). */
static char *
construct_relative (const char *s1, const char *s2)
{
int i, cnt, sepdirs1;
char *res;
if (*s2 == '/')
return xstrdup (s2);
/* S1 should *not* be absolute, if S2 wasn't. */
assert (*s1 != '/');
i = cnt = 0;
/* Skip the directories common to both strings. */
while (1)
{
while (s1[i] && s2[i]
&& (s1[i] == s2[i])
&& (s1[i] != '/')
&& (s2[i] != '/'))
++i;
if (s1[i] == '/' && s2[i] == '/')
cnt = ++i;
else
break;
}
for (sepdirs1 = 0; s1[i]; i++)
if (s1[i] == '/')
++sepdirs1;
/* Now, construct the file as of:
- ../ repeated sepdirs1 time
- all the non-mutual directories of S2. */
res = (char *)xmalloc (3 * sepdirs1 + strlen (s2 + cnt) + 1);
for (i = 0; i < sepdirs1; i++)
memcpy (res + 3 * i, "../", 3);
strcpy (res + 3 * i, s2 + cnt);
return res;
}
static void
write_backup_file (const char *file, downloaded_file_t downloaded_file_return)
{
/* Rather than just writing over the original .html file with the
converted version, save the former to *.orig. Note we only do
this for files we've _successfully_ downloaded, so we don't
clobber .orig files sitting around from previous invocations. */
/* Construct the backup filename as the original name plus ".orig". */
size_t filename_len = strlen(file);
char* filename_plus_orig_suffix;
boolean already_wrote_backup_file = FALSE;
slist* converted_file_ptr;
static slist* converted_files = NULL;
if (downloaded_file_return == FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED)
{
/* Just write "orig" over "html". We need to do it this way
because when we're checking to see if we've downloaded the
file before (to see if we can skip downloading it), we don't
know if it's a text/html file. Therefore we don't know yet
at that stage that -E is going to cause us to tack on
".html", so we need to compare vs. the original URL plus
".orig", not the original URL plus ".html.orig". */
filename_plus_orig_suffix = alloca (filename_len + 1);
strcpy(filename_plus_orig_suffix, file);
strcpy((filename_plus_orig_suffix + filename_len) - 4, "orig");
}
else /* downloaded_file_return == FILE_DOWNLOADED_NORMALLY */
{
/* Append ".orig" to the name. */
filename_plus_orig_suffix = alloca (filename_len + sizeof(".orig"));
strcpy(filename_plus_orig_suffix, file);
strcpy(filename_plus_orig_suffix + filename_len, ".orig");
}
/* We can get called twice on the same URL thanks to the
convert_all_links() call in main(). If we write the .orig file
each time in such a case, it'll end up containing the first-pass
conversion, not the original file. So, see if we've already been
called on this file. */
converted_file_ptr = converted_files;
while (converted_file_ptr != NULL)
if (strcmp(converted_file_ptr->string, file) == 0)
{
already_wrote_backup_file = TRUE;
break;
}
else
converted_file_ptr = converted_file_ptr->next;
if (!already_wrote_backup_file)
{
/* Rename <file> to <file>.orig before former gets written over. */
if (rename(file, filename_plus_orig_suffix) != 0)
logprintf (LOG_NOTQUIET, _("Cannot back up %s as %s: %s\n"),
file, filename_plus_orig_suffix, strerror (errno));
/* Remember that we've already written a .orig backup for this file.
Note that we never free this memory since we need it till the
convert_all_links() call, which is one of the last things the
program does before terminating. BTW, I'm not sure if it would be
safe to just set 'converted_file_ptr->string' to 'file' below,
rather than making a copy of the string... Another note is that I
thought I could just add a field to the urlpos structure saying
that we'd written a .orig file for this URL, but that didn't work,
so I had to make this separate list.
-- Dan Harkless <wget at harkless.org>
This [adding a field to the urlpos structure] didn't work
because convert_file() is called from convert_all_links at
the end of the retrieval with a freshly built new urlpos
list.
-- Hrvoje Niksic <hniksic at xemacs.org>
*/
converted_file_ptr = xmalloc(sizeof(*converted_file_ptr));
converted_file_ptr->string = xstrdup(file); /* die on out-of-mem. */
converted_file_ptr->next = converted_files;
converted_files = converted_file_ptr;
}
}
static int find_fragment PARAMS ((const char *, int, const char **,
const char **));
/* Replace an attribute's original text with NEW_TEXT. */
static const char *
replace_attr (const char *p, int size, FILE *fp, const char *new_text)
{
int quote_flag = 0;
char quote_char = '\"'; /* use "..." for quoting, unless the
original value is quoted, in which
case reuse its quoting char. */
const char *frag_beg, *frag_end;
/* Structure of our string is:
"...old-contents..."
<--- size ---> (with quotes)
OR:
...old-contents...
<--- size --> (no quotes) */
if (*p == '\"' || *p == '\'')
{
quote_char = *p;
quote_flag = 1;
++p;
size -= 2; /* disregard opening and closing quote */
}
putc (quote_char, fp);
fputs (new_text, fp);
/* Look for fragment identifier, if any. */
if (find_fragment (p, size, &frag_beg, &frag_end))
fwrite (frag_beg, 1, frag_end - frag_beg, fp);
p += size;
if (quote_flag)
++p;
putc (quote_char, fp);
return p;
}
/* The same as REPLACE_ATTR, but used when replacing
<meta http-equiv=refresh content="new_text"> because we need to
append "timeout_value; URL=" before the next_text. */
static const char *
replace_attr_refresh_hack (const char *p, int size, FILE *fp,
const char *new_text, int timeout)
{
/* "0; URL=..." */
char *new_with_timeout = (char *)alloca (numdigit (timeout)
+ 6 /* "; URL=" */
+ strlen (new_text)
+ 1);
sprintf (new_with_timeout, "%d; URL=%s", timeout, new_text);
return replace_attr (p, size, fp, new_with_timeout);
}
/* Find the first occurrence of '#' in [BEG, BEG+SIZE) that is not
preceded by '&'. If the character is not found, return zero. If
the character is found, return 1 and set BP and EP to point to the
beginning and end of the region.
This is used for finding the fragment indentifiers in URLs. */
static int
find_fragment (const char *beg, int size, const char **bp, const char **ep)
{
const char *end = beg + size;
int saw_amp = 0;
for (; beg < end; beg++)
{
switch (*beg)
{
case '&':
saw_amp = 1;
break;
case '#':
if (!saw_amp)
{
*bp = beg;
*ep = end;
return 1;
}
/* fallthrough */
default:
saw_amp = 0;
}
}
return 0;
}
/* Quote FILE for use as local reference to an HTML file.
We quote ? as %3F to avoid passing part of the file name as the
parameter when browsing the converted file through HTTP. However,
it is safe to do this only when `--html-extension' is turned on.
This is because converting "index.html?foo=bar" to
"index.html%3Ffoo=bar" would break local browsing, as the latter
isn't even recognized as an HTML file! However, converting
"index.html?foo=bar.html" to "index.html%3Ffoo=bar.html" should be
safe for both local and HTTP-served browsing. */
static char *
local_quote_string (const char *file)
{
const char *file_sans_qmark;
int qm;
if (!opt.html_extension)
return html_quote_string (file);
qm = count_char (file, '?');
if (qm)
{
const char *from = file;
char *to, *newname;
/* qm * 2 because we replace each question mark with "%3F",
i.e. replace one char with three, hence two more. */
int fsqlen = strlen (file) + qm * 2;
to = newname = (char *)alloca (fsqlen + 1);
for (; *from; from++)
{
if (*from != '?')
*to++ = *from;
else
{
*to++ = '%';
*to++ = '3';
*to++ = 'F';
}
}
assert (to - newname == fsqlen);
*to = '\0';
file_sans_qmark = newname;
}
else
file_sans_qmark = file;
return html_quote_string (file_sans_qmark);
}
/* Book-keeping code for dl_file_url_map, dl_url_file_map,
downloaded_html_list, and downloaded_html_set. Other code calls
these functions to let us know that a file has been downloaded. */
#define ENSURE_TABLES_EXIST do { \
if (!dl_file_url_map) \
dl_file_url_map = make_string_hash_table (0); \
if (!dl_url_file_map) \
dl_url_file_map = make_string_hash_table (0); \
} while (0)
/* Return 1 if S1 and S2 are the same, except for "/index.html". The
three cases in which it returns one are (substitute any substring
for "foo"):
m("foo/index.html", "foo/") ==> 1
m("foo/", "foo/index.html") ==> 1
m("foo", "foo/index.html") ==> 1
m("foo", "foo/" ==> 1
m("foo", "foo") ==> 1 */
static int
match_except_index (const char *s1, const char *s2)
{
int i;
const char *lng;
/* Skip common substring. */
for (i = 0; *s1 && *s2 && *s1 == *s2; s1++, s2++, i++)
;
if (i == 0)
/* Strings differ at the very beginning -- bail out. We need to
check this explicitly to avoid `lng - 1' reading outside the
array. */
return 0;
if (!*s1 && !*s2)
/* Both strings hit EOF -- strings are equal. */
return 1;
else if (*s1 && *s2)
/* Strings are randomly different, e.g. "/foo/bar" and "/foo/qux". */
return 0;
else if (*s1)
/* S1 is the longer one. */
lng = s1;
else
/* S2 is the longer one. */
lng = s2;
/* foo */ /* foo/ */
/* foo/index.html */ /* or */ /* foo/index.html */
/* ^ */ /* ^ */
if (*lng != '/')
/* The right-hand case. */
--lng;
if (*lng == '/' && *(lng + 1) == '\0')
/* foo */
/* foo/ */
return 1;
return 0 == strcmp (lng, "/index.html");
}
static int
dissociate_urls_from_file_mapper (void *key, void *value, void *arg)
{
char *mapping_url = (char *)key;
char *mapping_file = (char *)value;
char *file = (char *)arg;
if (0 == strcmp (mapping_file, file))
{
hash_table_remove (dl_url_file_map, mapping_url);
xfree (mapping_url);
xfree (mapping_file);
}
/* Continue mapping. */
return 0;
}
/* Remove all associations from various URLs to FILE from dl_url_file_map. */
static void
dissociate_urls_from_file (const char *file)
{
hash_table_map (dl_url_file_map, dissociate_urls_from_file_mapper,
(char *)file);
}
/* Register that URL has been successfully downloaded to FILE. This
is used by the link conversion code to convert references to URLs
to references to local files. It is also being used to check if a
URL has already been downloaded. */
void
register_download (const char *url, const char *file)
{
char *old_file, *old_url;
ENSURE_TABLES_EXIST;
/* With some forms of retrieval, it is possible, although not likely
or particularly desirable. If both are downloaded, the second
download will override the first one. When that happens,
dissociate the old file name from the URL. */
if (hash_table_get_pair (dl_file_url_map, file, &old_file, &old_url))
{
if (0 == strcmp (url, old_url))
/* We have somehow managed to download the same URL twice.
Nothing to do. */
return;
if (match_except_index (url, old_url)
&& !hash_table_contains (dl_url_file_map, url))
/* The two URLs differ only in the "index.html" ending. For
example, one is "http://www.server.com/", and the other is
"http://www.server.com/index.html". Don't remove the old
one, just add the new one as a non-canonical entry. */
goto url_only;
hash_table_remove (dl_file_url_map, file);
xfree (old_file);
xfree (old_url);
/* Remove all the URLs that point to this file. Yes, there can
be more than one such URL, because we store redirections as
multiple entries in dl_url_file_map. For example, if URL1
redirects to URL2 which gets downloaded to FILE, we map both
URL1 and URL2 to FILE in dl_url_file_map. (dl_file_url_map
only points to URL2.) When another URL gets loaded to FILE,
we want both URL1 and URL2 dissociated from it.
This is a relatively expensive operation because it performs
a linear search of the whole hash table, but it should be
called very rarely, only when two URLs resolve to the same
file name, *and* the "<file>.1" extensions are turned off.
In other words, almost never. */
dissociate_urls_from_file (file);
}
hash_table_put (dl_file_url_map, xstrdup (file), xstrdup (url));
url_only:
/* A URL->FILE mapping is not possible without a FILE->URL mapping.
If the latter were present, it should have been removed by the
above `if'. So we could write:
assert (!hash_table_contains (dl_url_file_map, url));
The above is correct when running in recursive mode where the
same URL always resolves to the same file. But if you do
something like:
wget URL URL
then the first URL will resolve to "FILE", and the other to
"FILE.1". In that case, FILE.1 will not be found in
dl_file_url_map, but URL will still point to FILE in
dl_url_file_map. */
if (hash_table_get_pair (dl_url_file_map, url, &old_url, &old_file))
{
hash_table_remove (dl_url_file_map, url);
xfree (old_url);
xfree (old_file);
}
hash_table_put (dl_url_file_map, xstrdup (url), xstrdup (file));
}
/* Register that FROM has been redirected to TO. This assumes that TO
is successfully downloaded and already registered using
register_download() above. */
void
register_redirection (const char *from, const char *to)
{
char *file;
ENSURE_TABLES_EXIST;
file = hash_table_get (dl_url_file_map, to);
assert (file != NULL);
if (!hash_table_contains (dl_url_file_map, from))
hash_table_put (dl_url_file_map, xstrdup (from), xstrdup (file));
}
/* Register that the file has been deleted. */
void
register_delete_file (const char *file)
{
char *old_url, *old_file;
ENSURE_TABLES_EXIST;
if (!hash_table_get_pair (dl_file_url_map, file, &old_file, &old_url))
return;
hash_table_remove (dl_file_url_map, file);
xfree (old_file);
xfree (old_url);
dissociate_urls_from_file (file);
}
/* Register that FILE is an HTML file that has been downloaded. */
void
register_html (const char *url, const char *file)
{
if (!downloaded_html_set)
downloaded_html_set = make_string_hash_table (0);
else if (hash_table_contains (downloaded_html_set, file))
return;
/* The set and the list should use the same copy of FILE, but the
slist interface insists on strduping the string it gets. Oh
well. */
string_set_add (downloaded_html_set, file);
downloaded_html_list = slist_prepend (downloaded_html_list, file);
}
/* Cleanup the data structures associated with recursive retrieving
(the variables above). */
void
convert_cleanup (void)
{
if (dl_file_url_map)
{
free_keys_and_values (dl_file_url_map);
hash_table_destroy (dl_file_url_map);
dl_file_url_map = NULL;
}
if (dl_url_file_map)
{
free_keys_and_values (dl_url_file_map);
hash_table_destroy (dl_url_file_map);
dl_url_file_map = NULL;
}
if (downloaded_html_set)
string_set_free (downloaded_html_set);
slist_free (downloaded_html_list);
downloaded_html_list = NULL;
}
/* Book-keeping code for downloaded files that enables extension
hacks. */
/* This table should really be merged with dl_file_url_map and
downloaded_html_files. This was originally a list, but I changed
it to a hash table beause it was actually taking a lot of time to
find things in it. */
static struct hash_table *downloaded_files_hash;
/* We're storing "modes" of type downloaded_file_t in the hash table.
However, our hash tables only accept pointers for keys and values.
So when we need a pointer, we use the address of a
downloaded_file_t variable of static storage. */
static downloaded_file_t *
downloaded_mode_to_ptr (downloaded_file_t mode)
{
static downloaded_file_t
v1 = FILE_NOT_ALREADY_DOWNLOADED,
v2 = FILE_DOWNLOADED_NORMALLY,
v3 = FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED,
v4 = CHECK_FOR_FILE;
switch (mode)
{
case FILE_NOT_ALREADY_DOWNLOADED:
return &v1;
case FILE_DOWNLOADED_NORMALLY:
return &v2;
case FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED:
return &v3;
case CHECK_FOR_FILE:
return &v4;
}
return NULL;
}
/* Remembers which files have been downloaded. In the standard case,
should be called with mode == FILE_DOWNLOADED_NORMALLY for each
file we actually download successfully (i.e. not for ones we have
failures on or that we skip due to -N).
When we've downloaded a file and tacked on a ".html" extension due
to -E, call this function with
FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED rather than
FILE_DOWNLOADED_NORMALLY.
If you just want to check if a file has been previously added
without adding it, call with mode == CHECK_FOR_FILE. Please be
sure to call this function with local filenames, not remote
URLs. */
downloaded_file_t
downloaded_file (downloaded_file_t mode, const char *file)
{
downloaded_file_t *ptr;
if (mode == CHECK_FOR_FILE)
{
if (!downloaded_files_hash)
return FILE_NOT_ALREADY_DOWNLOADED;
ptr = hash_table_get (downloaded_files_hash, file);
if (!ptr)
return FILE_NOT_ALREADY_DOWNLOADED;
return *ptr;
}
if (!downloaded_files_hash)
downloaded_files_hash = make_string_hash_table (0);
ptr = hash_table_get (downloaded_files_hash, file);
if (ptr)
return *ptr;
ptr = downloaded_mode_to_ptr (mode);
hash_table_put (downloaded_files_hash, xstrdup (file), &ptr);
return FILE_NOT_ALREADY_DOWNLOADED;
}
static int
df_free_mapper (void *key, void *value, void *ignored)
{
xfree (key);
return 0;
}
void
downloaded_files_free (void)
{
if (downloaded_files_hash)
{
hash_table_map (downloaded_files_hash, df_free_mapper, NULL);
hash_table_destroy (downloaded_files_hash);
downloaded_files_hash = NULL;
}
}
--- NEW FILE: host.h ---
/* Declarations for host.c
Copyright (C) 1995, 1996, 1997, 2001 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef HOST_H
#define HOST_H
#ifdef WINDOWS
# include <winsock.h>
#else
# include <netdb.h>
# include <sys/socket.h>
# include <netinet/in.h>
#ifndef __BEOS__
# include <arpa/inet.h>
#endif
#endif
struct url;
struct address_list;
/* wget_sockaddr is used instead of sockaddr where an IPV6 address
must fit. */
typedef union {
struct sockaddr sa; /* Generic but too small */
struct sockaddr_in sin; /* IPv4 socket address */
#ifdef ENABLE_IPV6
struct sockaddr_in6 sin6; /* IPv6 socket address */
#endif
} wget_sockaddr;
typedef struct {
unsigned char bytes[4];
} ip4_address;
/* If compiled with IPv6 support, we internally represent all IP
addresses as IPv6 addresses. IPv4 addresses are dynamically mapped
to IPv6, i.e. stored in the format ::ffff:<Ipv4>. */
#ifdef ENABLE_IPV6
# define MAX_IP_ADDRESS_SIZE 16
#else
# define MAX_IP_ADDRESS_SIZE 4
#endif
typedef struct {
unsigned char bytes[MAX_IP_ADDRESS_SIZE];
} ip_address;
/* Function declarations */
struct address_list *lookup_host PARAMS ((const char *, int));
char *herrmsg PARAMS ((int));
void address_list_get_bounds PARAMS ((struct address_list *, int *, int *));
void address_list_copy_one PARAMS ((struct address_list *, int,
ip_address *));
int address_list_match_all PARAMS ((struct address_list *,
struct address_list *));
void address_list_set_faulty PARAMS ((struct address_list *, int));
void address_list_release PARAMS ((struct address_list *));
char *pretty_print_address PARAMS ((ip_address *));
int accept_domain PARAMS ((struct url *));
int sufmatch PARAMS ((const char **, const char *));
void host_cleanup PARAMS ((void));
void wget_sockaddr_set_address PARAMS((wget_sockaddr *, int,
unsigned short, ip_address *));
void wget_sockaddr_set_port PARAMS((wget_sockaddr *, unsigned short));
void *wget_sockaddr_get_addr PARAMS((wget_sockaddr *));
unsigned short wget_sockaddr_get_port PARAMS((const wget_sockaddr *));
socklen_t sockaddr_len PARAMS(());
void map_ipv4_to_ip PARAMS((ip4_address *, ip_address *));
int map_ip_to_ipv4 PARAMS((ip_address *, ip4_address *));
extern int ip_default_family; /* defined in host.c */
#endif /* HOST_H */
--- NEW FILE: mswindows.c ---
/* mswindows.c -- Windows-specific support
Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
/* #### Someone please document what these functions do! */
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <errno.h>
#include <math.h>
#ifdef HACK_BCC_UTIME_BUG
# include <io.h>
# include <fcntl.h>
# ifdef HAVE_UTIME_H
# include <utime.h>
# endif
# ifdef HAVE_SYS_UTIME_H
# include <sys/utime.h>
# endif
#endif
#include "wget.h"
#include "utils.h"
#include "url.h"
#ifndef errno
extern int errno;
#endif
#ifndef ES_SYSTEM_REQUIRED
#define ES_SYSTEM_REQUIRED 0x00000001
#endif
#ifndef ES_CONTINUOUS
#define ES_CONTINUOUS 0x80000000
#endif
/* Defined in log.c. */
void log_request_redirect_output PARAMS ((const char *));
static DWORD set_sleep_mode (DWORD mode);
static DWORD pwr_mode = 0;
static int windows_nt_p;
#ifndef HAVE_SLEEP
/* Emulation of Unix sleep. */
unsigned int
sleep (unsigned seconds)
{
return SleepEx (1000 * seconds, TRUE) ? 0U : 1000 * seconds;
}
#endif
#ifndef HAVE_USLEEP
/* Emulation of Unix usleep(). This has a granularity of
milliseconds, but that's ok because:
a) Wget is only using it with milliseconds [not anymore, but b)
still applies];
b) You can't rely on usleep's granularity anyway. If a caller
expects usleep to respect every microsecond, he's in for a
surprise. */
int
usleep (unsigned long usec)
{
SleepEx (usec / 1000, TRUE);
return 0;
}
#endif /* HAVE_USLEEP */
void
windows_main_junk (int *argc, char **argv, char **exec_name)
{
char *p;
/* Remove .EXE from filename if it has one. */
*exec_name = xstrdup (*exec_name);
p = strrchr (*exec_name, '.');
if (p)
*p = '\0';
}
/* Winsock stuff. */
static void
ws_cleanup (void)
{
WSACleanup ();
if (pwr_mode)
set_sleep_mode (pwr_mode);
pwr_mode = 0;
}
static void
ws_hangup (void)
{
log_request_redirect_output ("CTRL+Break");
}
void
fork_to_background (void)
{
/* Whether we arrange our own version of opt.lfilename here. */
int changedp = 0;
if (!opt.lfilename)
{
opt.lfilename = unique_name (DEFAULT_LOGFILE, 0);
changedp = 1;
}
printf (_("Continuing in background.\n"));
if (changedp)
printf (_("Output will be written to `%s'.\n"), opt.lfilename);
ws_hangup ();
if (!windows_nt_p)
FreeConsole ();
}
static BOOL WINAPI
ws_handler (DWORD dwEvent)
{
switch (dwEvent)
{
#ifdef CTRLC_BACKGND
case CTRL_C_EVENT:
#endif
#ifdef CTRLBREAK_BACKGND
case CTRL_BREAK_EVENT:
#endif
fork_to_background ();
break;
case CTRL_SHUTDOWN_EVENT:
case CTRL_CLOSE_EVENT:
case CTRL_LOGOFF_EVENT:
default:
ws_cleanup ();
return FALSE;
}
return TRUE;
}
static char *title_buf = NULL;
static char *curr_url = NULL;
static int num_urls = 0;
void
ws_changetitle (const char *url, int nurl)
{
if (!nurl)
return;
num_urls = nurl;
if (title_buf)
xfree(title_buf);
if (curr_url)
xfree(curr_url);
title_buf = (char *)xmalloc (strlen (url) + 20);
curr_url = xstrdup(url);
sprintf(title_buf, "Wget %s%s", url, nurl == 1 ? "" : " ...");
SetConsoleTitle(title_buf);
}
void
ws_percenttitle (double percent)
{
if (num_urls == 1 && title_buf && curr_url && fabs(percent) <= 100.0)
{
sprintf (title_buf, "Wget [%.0f%%] %s", percent, curr_url);
SetConsoleTitle (title_buf);
}
}
char *
ws_mypath (void)
{
static char *wspathsave = NULL;
char buffer[MAX_PATH];
char *ptr;
if (wspathsave)
{
return wspathsave;
}
if (GetModuleFileName (NULL, buffer, MAX_PATH) &&
(ptr = strrchr (buffer, PATH_SEPARATOR)) != NULL)
{
*(ptr + 1) = '\0';
wspathsave = xstrdup (buffer);
}
else
wspathsave = NULL;
return wspathsave;
}
void
ws_help (const char *name)
{
char *mypath = ws_mypath ();
if (mypath)
{
struct stat sbuf;
char *buf = (char *)alloca (strlen (mypath) + strlen (name) + 4 + 1);
sprintf (buf, "%s%s.HLP", mypath, name);
if (stat (buf, &sbuf) == 0)
{
printf (_("Starting WinHelp %s\n"), buf);
WinHelp (NULL, buf, HELP_INDEX, 0);
}
else
{
printf ("%s: %s\n", buf, strerror (errno));
}
}
}
void
ws_startup (void)
{
WORD requested;
WSADATA data;
int err;
OSVERSIONINFO os;
if (GetVersionEx (&os) == TRUE
&& os.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS)
windows_nt_p = 1;
requested = MAKEWORD (1, 1);
err = WSAStartup (requested, &data);
if (err != 0)
{
fprintf (stderr, _("%s: Couldn't find usable socket driver.\n"),
exec_name);
exit (1);
}
if (data.wVersion < requested)
{
fprintf (stderr, _("%s: Couldn't find usable socket driver.\n"),
exec_name);
WSACleanup ();
exit (1);
}
atexit (ws_cleanup);
pwr_mode = set_sleep_mode (0);
SetConsoleCtrlHandler (ws_handler, TRUE);
}
/* Replacement utime function for buggy Borland C++Builder 5.5 compiler.
(The Borland utime function only works on Windows NT.) */
#ifdef HACK_BCC_UTIME_BUG
int
borland_utime (const char *path, const struct utimbuf *times)
{
int fd;
int res;
struct ftime ft;
struct tm *ptr_tm;
if ((fd = open (path, O_RDWR)) < 0)
return -1;
ptr_tm = localtime (×->modtime);
ft.ft_tsec = ptr_tm->tm_sec >> 1;
ft.ft_min = ptr_tm->tm_min;
ft.ft_hour = ptr_tm->tm_hour;
ft.ft_day = ptr_tm->tm_mday;
ft.ft_month = ptr_tm->tm_mon + 1;
ft.ft_year = ptr_tm->tm_year - 80;
res = setftime (fd, &ft);
close (fd);
return res;
}
#endif
/*
* Prevent Windows entering sleep/hibernation-mode while wget is doing
* a lengthy transfer. Windows does by default not consider network
* activity in console-programs as activity ! Works on Win-98/ME/2K
* and up.
*/
static DWORD
set_sleep_mode (DWORD mode)
{
HMODULE mod = LoadLibrary ("kernel32.dll");
DWORD (WINAPI *_SetThreadExecutionState) (DWORD) = NULL;
DWORD rc = (DWORD)-1;
if (mod)
(void *)_SetThreadExecutionState
= GetProcAddress ((HINSTANCE)mod, "SetThreadExecutionState");
if (_SetThreadExecutionState)
{
if (mode == 0) /* first time */
mode = (ES_SYSTEM_REQUIRED | ES_CONTINUOUS);
rc = (*_SetThreadExecutionState) (mode);
}
if (mod)
FreeLibrary (mod);
DEBUGP (("set_sleep_mode(): mode 0x%08lX, rc 0x%08lX\n", mode, rc));
return rc;
}
/* run_with_timeout Windows implementation. */
/* Stack size 0 uses default thread stack-size (reserve+commit).
* Determined by what's in the PE header.
*/
#define THREAD_STACK_SIZE 0
struct thread_data {
void (*fun) (void *);
void *arg;
DWORD ws_error;
};
/* The callback that runs FUN(ARG) in a separate thread. This
function exists for two reasons: a) to not require FUN to be
declared WINAPI/__stdcall[1], and b) to retrieve Winsock errors,
which are per-thread. The latter is useful when FUN calls Winsock
functions, which is how run_with_timeout is used in Wget.
[1] MSVC can use __fastcall globally (cl /Gr) and on Watcom this is
the default (wcc386 -3r). */
static DWORD WINAPI
thread_helper (void *arg)
{
struct thread_data *td = (struct thread_data *) arg;
/* Initialize Winsock error to what it was in the parent. That way
the subsequent call to WSAGetLastError will return the same value
if td->fun doesn't change Winsock error state. */
WSASetLastError (td->ws_error);
td->fun (td->arg);
/* Return Winsock error to the caller, in case FUN ran Winsock
code. */
td->ws_error = WSAGetLastError ();
return 0;
}
/* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
seconds. Returns non-zero if the function was interrupted with a
timeout, zero otherwise.
This works by running FUN in a separate thread and terminating the
thread if it doesn't finish in the specified time. */
int
run_with_timeout (double seconds, void (*fun) (void *), void *arg)
{
static HANDLE thread_hnd = NULL;
struct thread_data thread_arg;
DWORD thread_id;
int rc = 0;
DEBUGP (("seconds %.2f, ", seconds));
if (seconds == 0)
{
blocking_fallback:
fun (arg);
return 0;
}
/* Should never happen, but test for recursivety anyway */
assert (thread_hnd == NULL);
thread_arg.fun = fun;
thread_arg.arg = arg;
thread_arg.ws_error = WSAGetLastError ();
thread_hnd = CreateThread (NULL, THREAD_STACK_SIZE, thread_helper,
&thread_arg, 0, &thread_id);
if (!thread_hnd)
{
DEBUGP (("CreateThread() failed; %s\n", strerror (GetLastError ())));
goto blocking_fallback;
}
if (WaitForSingleObject (thread_hnd, (DWORD)(1000 * seconds))
== WAIT_OBJECT_0)
{
/* Propagate error state (which is per-thread) to this thread,
so the caller can inspect it. */
WSASetLastError (thread_arg.ws_error);
DEBUGP (("Winsock error: %d\n", WSAGetLastError ()));
rc = 0;
}
else
{
TerminateThread (thread_hnd, 1);
rc = 1;
}
CloseHandle (thread_hnd); /* clear-up after TerminateThread() */
thread_hnd = NULL;
return rc;
}
--- NEW FILE: cmpt.c ---
/* Replacements for routines missing on some systems.
Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
[...1615 lines suppressed...]
if (not)
return (FNM_NOMATCH);
}
break;
default:
if (c != *n)
return (FNM_NOMATCH);
}
++n;
}
if (*n == '\0')
return (0);
return (FNM_NOMATCH);
}
#endif /* not SYSTEM_FNMATCH */
--- NEW FILE: log.c ---
/* Messages logging.
Copyright (C) 1998, 2000, 2001 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#include <config.h>
/* This allows the architecture-specific .h files to specify the use
of stdargs regardless of __STDC__. */
#ifndef WGET_USE_STDARG
/* Use stdarg only if the compiler supports ANSI C and stdarg.h is
present. We check for both because there are configurations where
stdarg.h exists, but doesn't work. */
# ifdef __STDC__
# ifdef HAVE_STDARG_H
# define WGET_USE_STDARG
# endif
# endif
#endif /* not WGET_USE_STDARG */
#include <stdio.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#include <stdlib.h>
#ifdef WGET_USE_STDARG
# include <stdarg.h>
#else
# include <varargs.h>
#endif
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
#include <assert.h>
#include <errno.h>
#include "wget.h"
#include "utils.h"
#ifndef errno
extern int errno;
#endif
/* This file impplement support for "logging". Logging means printing
output, plus several additional features:
- Cataloguing output by importance. You can specify that a log
message is "verbose" or "debug", and it will not be printed unless
in verbose or debug mode, respectively.
- Redirecting the log to the file. When Wget's output goes to the
terminal, and Wget receives SIGHUP, all further output is
redirected to a log file. When this is the case, Wget can also
print the last several lines of "context" to the log file so that
it does not begin in the middle of a line. For this to work, the
logging code stores the last several lines of context. Callers may
request for certain output not to be stored.
- Inhibiting output. When Wget receives SIGHUP, but redirecting
the output fails, logging is inhibited. */
/* The file descriptor used for logging. This is NULL before log_init
is called; logging functions log to stderr then. log_init sets it
either to stderr or to a file pointer obtained from fopen(). If
logging is inhibited, logfp is set back to NULL. */
static FILE *logfp;
/* If non-zero, it means logging is inhibited, i.e. nothing is printed
or stored. */
static int inhibit_logging;
/* Whether the last output lines are stored for use as context. */
static int save_context_p;
/* Whether the log is flushed after each command. */
static int flush_log_p = 1;
/* Whether any output has been received while flush_log_p was 0. */
static int needs_flushing;
/* In the event of a hang-up, and if its output was on a TTY, Wget
redirects its output to `wget-log'.
For the convenience of reading this newly-created log, we store the
last several lines ("screenful", hence the choice of 24) of Wget
output, and dump them as context when the time comes. */
#define SAVED_LOG_LINES 24
/* log_lines is a circular buffer that stores SAVED_LOG_LINES lines of
output. log_line_current always points to the position in the
buffer that will be written to next. When log_line_current reaches
SAVED_LOG_LINES, it is reset to zero.
The problem here is that we'd have to either (re)allocate and free
strings all the time, or limit the lines to an arbitrary number of
characters. Instead of settling for either of these, we do both:
if the line is smaller than a certain "usual" line length (128
chars by default), a preallocated memory is used. The rare lines
that are longer than 128 characters are malloc'ed and freed
separately. This gives good performance with minimum memory
consumption and fragmentation. */
#define STATIC_LENGTH 128
static struct log_ln {
char static_line[STATIC_LENGTH + 1]; /* statically allocated
line. */
char *malloced_line; /* malloc'ed line, for lines of output
larger than 80 characters. */
char *content; /* this points either to malloced_line
or to the appropriate static_line.
If this is NULL, it means the line
has not yet been used. */
} log_lines[SAVED_LOG_LINES];
/* The current position in the ring. */
static int log_line_current = -1;
/* Whether the most recently written line was "trailing", i.e. did not
finish with \n. This is an important piece of information because
the code is always careful to append data to trailing lines, rather
than create new ones. */
static int trailing_line;
static void check_redirect_output PARAMS ((void));
#define ROT_ADVANCE(num) do { \
if (++num >= SAVED_LOG_LINES) \
num = 0; \
} while (0)
/* Free the log line index with NUM. This calls free on
ln->malloced_line if it's non-NULL, and it also resets
ln->malloced_line and ln->content to NULL. */
static void
free_log_line (int num)
{
struct log_ln *ln = log_lines + num;
if (ln->malloced_line)
{
xfree (ln->malloced_line);
ln->malloced_line = NULL;
}
ln->content = NULL;
}
/* Append bytes in the range [start, end) to one line in the log. The
region is not supposed to contain newlines, except for the last
character (at end[-1]). */
static void
saved_append_1 (const char *start, const char *end)
{
int len = end - start;
if (!len)
return;
/* First, check whether we need to append to an existing line or to
create a new one. */
if (!trailing_line)
{
/* Create a new line. */
struct log_ln *ln;
if (log_line_current == -1)
log_line_current = 0;
else
free_log_line (log_line_current);
ln = log_lines + log_line_current;
if (len > STATIC_LENGTH)
{
ln->malloced_line = strdupdelim (start, end);
ln->content = ln->malloced_line;
}
else
{
memcpy (ln->static_line, start, len);
ln->static_line[len] = '\0';
ln->content = ln->static_line;
}
}
else
{
/* Append to the last line. If the line is malloc'ed, we just
call realloc and append the new string. If the line is
static, we have to check whether appending the new string
would make it exceed STATIC_LENGTH characters, and if so,
convert it to malloc(). */
struct log_ln *ln = log_lines + log_line_current;
if (ln->malloced_line)
{
/* Resize malloc'ed line and append. */
int old_len = strlen (ln->malloced_line);
ln->malloced_line = xrealloc (ln->malloced_line, old_len + len + 1);
memcpy (ln->malloced_line + old_len, start, len);
ln->malloced_line[old_len + len] = '\0';
/* might have changed due to realloc */
ln->content = ln->malloced_line;
}
else
{
int old_len = strlen (ln->static_line);
if (old_len + len > STATIC_LENGTH)
{
/* Allocate memory and concatenate the old and the new
contents. */
ln->malloced_line = xmalloc (old_len + len + 1);
memcpy (ln->malloced_line, ln->static_line,
old_len);
memcpy (ln->malloced_line + old_len, start, len);
ln->malloced_line[old_len + len] = '\0';
ln->content = ln->malloced_line;
}
else
{
/* Just append to the old, statically allocated
contents. */
memcpy (ln->static_line + old_len, start, len);
ln->static_line[old_len + len] = '\0';
ln->content = ln->static_line;
}
}
}
trailing_line = !(end[-1] == '\n');
if (!trailing_line)
ROT_ADVANCE (log_line_current);
}
/* Log the contents of S, as explained above. If S consists of
multiple lines, they are logged separately. If S does not end with
a newline, it will form a "trailing" line, to which things will get
appended the next time this function is called. */
static void
saved_append (const char *s)
{
while (*s)
{
const char *end = strchr (s, '\n');
if (!end)
end = s + strlen (s);
else
++end;
saved_append_1 (s, end);
s = end;
}
}
/* Check X against opt.verbose and opt.quiet. The semantics is as
follows:
* LOG_ALWAYS - print the message unconditionally;
* LOG_NOTQUIET - print the message if opt.quiet is non-zero;
* LOG_NONVERBOSE - print the message if opt.verbose is zero;
* LOG_VERBOSE - print the message if opt.verbose is non-zero. */
#define CHECK_VERBOSE(x) \
switch (x) \
{ \
case LOG_ALWAYS: \
break; \
case LOG_NOTQUIET: \
if (opt.quiet) \
return; \
break; \
case LOG_NONVERBOSE: \
if (opt.verbose || opt.quiet) \
return; \
break; \
case LOG_VERBOSE: \
if (!opt.verbose) \
return; \
}
/* Returns the file descriptor for logging. This is LOGFP, except if
called before log_init, in which case it returns stderr. This is
useful in case someone calls a logging function before log_init.
If logging is inhibited, return NULL. */
static FILE *
get_log_fp (void)
{
if (inhibit_logging)
return NULL;
if (logfp)
return logfp;
return stderr;
}
/* Log a literal string S. The string is logged as-is, without a
newline appended. */
void
logputs (enum log_options o, const char *s)
{
FILE *fp;
check_redirect_output ();
if (!(fp = get_log_fp ()))
return;
CHECK_VERBOSE (o);
fputs (s, fp);
if (save_context_p)
saved_append (s);
if (flush_log_p)
logflush ();
else
needs_flushing = 1;
}
struct logvprintf_state {
char *bigmsg;
int expected_size;
int allocated;
};
/* Print a message to the log. A copy of message will be saved to
saved_log, for later reusal by log_dump_context().
It is not possible to code this function in a "natural" way, using
a loop, because of the braindeadness of the varargs API.
Specifically, each call to vsnprintf() must be preceded by va_start
and followed by va_end. And this is possible only in the function
that contains the `...' declaration. The alternative would be to
use va_copy, but that's not portable. */
static int
logvprintf (struct logvprintf_state *state, const char *fmt, va_list args)
{
char smallmsg[128];
char *write_ptr = smallmsg;
int available_size = sizeof (smallmsg);
int numwritten;
FILE *fp = get_log_fp ();
if (!save_context_p)
{
/* In the simple case just call vfprintf(), to avoid needless
allocation and games with vsnprintf(). */
vfprintf (fp, fmt, args);
goto flush;
}
if (state->allocated != 0)
{
write_ptr = state->bigmsg;
available_size = state->allocated;
}
/* The GNU coding standards advise not to rely on the return value
of sprintf(). However, vsnprintf() is a relatively new function
missing from legacy systems. Therefore I consider it safe to
assume that its return value is meaningful. On the systems where
vsnprintf() is not available, we use the implementation from
snprintf.c which does return the correct value. */
numwritten = vsnprintf (write_ptr, available_size, fmt, args);
/* vsnprintf() will not step over the limit given by available_size.
If it fails, it will return either -1 (POSIX?) or the number of
characters that *would have* been written, if there had been
enough room (C99). In the former case, we double the
available_size and malloc to get a larger buffer, and try again.
In the latter case, we use the returned information to build a
buffer of the correct size. */
if (numwritten == -1)
{
/* Writing failed, and we don't know the needed size. Try
again with doubled size. */
int newsize = available_size << 1;
state->bigmsg = xrealloc (state->bigmsg, newsize);
state->allocated = newsize;
return 0;
}
else if (numwritten >= available_size)
{
/* Writing failed, but we know exactly how much space we
need. */
int newsize = numwritten + 1;
state->bigmsg = xrealloc (state->bigmsg, newsize);
state->allocated = newsize;
return 0;
}
/* Writing succeeded. */
saved_append (write_ptr);
fputs (write_ptr, fp);
if (state->bigmsg)
xfree (state->bigmsg);
flush:
if (flush_log_p)
logflush ();
else
needs_flushing = 1;
return 1;
}
/* Flush LOGFP. Useful while flushing is disabled. */
void
logflush (void)
{
FILE *fp = get_log_fp ();
if (fp)
fflush (fp);
needs_flushing = 0;
}
/* Enable or disable log flushing. */
void
log_set_flush (int flush)
{
if (flush == flush_log_p)
return;
if (flush == 0)
{
/* Disable flushing by setting flush_log_p to 0. */
flush_log_p = 0;
}
else
{
/* Reenable flushing. If anything was printed in no-flush mode,
flush the log now. */
if (needs_flushing)
logflush ();
flush_log_p = 1;
}
}
/* (Temporarily) disable storing log to memory. Returns the old
status of storing, with which this function can be called again to
reestablish storing. */
int
log_set_save_context (int savep)
{
int old = save_context_p;
save_context_p = savep;
return old;
}
/* Handle difference in va_start between pre-ANSI and ANSI C. Note
that we always use `...' in function definitions and let ansi2knr
convert it for us. */
#ifdef WGET_USE_STDARG
# define VA_START(args, arg1) va_start (args, arg1)
#else
# define VA_START(args, ignored) va_start (args)
#endif
/* Print a message to the screen or to the log. The first argument
defines the verbosity of the message, and the rest are as in
printf(3). */
void
logprintf (enum log_options o, const char *fmt, ...)
{
va_list args;
struct logvprintf_state lpstate;
int done;
check_redirect_output ();
if (inhibit_logging)
return;
CHECK_VERBOSE (o);
memset (&lpstate, '\0', sizeof (lpstate));
do
{
VA_START (args, fmt);
done = logvprintf (&lpstate, fmt, args);
va_end (args);
}
while (!done);
}
#ifdef ENABLE_DEBUG
/* The same as logprintf(), but does anything only if opt.debug is
non-zero. */
void
debug_logprintf (const char *fmt, ...)
{
if (opt.debug)
{
va_list args;
struct logvprintf_state lpstate;
int done;
check_redirect_output ();
if (inhibit_logging)
return;
memset (&lpstate, '\0', sizeof (lpstate));
do
{
VA_START (args, fmt);
done = logvprintf (&lpstate, fmt, args);
va_end (args);
}
while (!done);
}
}
#endif /* ENABLE_DEBUG */
/* Open FILE and set up a logging stream. If FILE cannot be opened,
exit with status of 1. */
void
log_init (const char *file, int appendp)
{
if (file)
{
logfp = fopen (file, appendp ? "a" : "w");
if (!logfp)
{
perror (opt.lfilename);
exit (1);
}
}
else
{
/* The log goes to stderr to avoid collisions with the output if
the user specifies `-O -'. #### Francois Pinard suggests
that it's a better idea to print to stdout by default, and to
stderr only if the user actually specifies `-O -'. He says
this inconsistency is harder to document, but is overall
easier on the user. */
logfp = stderr;
/* If the output is a TTY, enable storing, which will make Wget
remember the last several printed messages, to be able to
dump them to a log file in case SIGHUP or SIGUSR1 is received
(or Ctrl+Break is pressed under Windows). */
if (1
#ifdef HAVE_ISATTY
&& isatty (fileno (logfp))
#endif
)
{
save_context_p = 1;
}
}
}
/* Close LOGFP, inhibit further logging and free the memory associated
with it. */
void
log_close (void)
{
int i;
if (logfp)
fclose (logfp);
logfp = NULL;
inhibit_logging = 1;
save_context_p = 0;
for (i = 0; i < SAVED_LOG_LINES; i++)
free_log_line (i);
log_line_current = -1;
trailing_line = 0;
}
/* Dump saved lines to logfp. */
static void
log_dump_context (void)
{
int num = log_line_current;
FILE *fp = get_log_fp ();
if (!fp)
return;
if (num == -1)
return;
if (trailing_line)
ROT_ADVANCE (num);
do
{
struct log_ln *ln = log_lines + num;
if (ln->content)
fputs (ln->content, fp);
ROT_ADVANCE (num);
}
while (num != log_line_current);
if (trailing_line)
if (log_lines[log_line_current].content)
fputs (log_lines[log_line_current].content, fp);
fflush (fp);
}
/* When SIGHUP or SIGUSR1 are received, the output is redirected
elsewhere. Such redirection is only allowed once. */
enum { RR_NONE, RR_REQUESTED, RR_DONE } redirect_request = RR_NONE;
static const char *redirect_request_signal_name;
/* Redirect output to `wget-log'. */
static void
redirect_output (void)
{
char *logfile = unique_name (DEFAULT_LOGFILE, 0);
fprintf (stderr, _("\n%s received, redirecting output to `%s'.\n"),
redirect_request_signal_name, logfile);
logfp = fopen (logfile, "w");
if (!logfp)
{
/* Eek! Opening the alternate log file has failed. Nothing we
can do but disable printing completely. */
fprintf (stderr, _("%s: %s; disabling logging.\n"),
logfile, strerror (errno));
inhibit_logging = 1;
}
else
{
/* Dump the context output to the newly opened log. */
log_dump_context ();
}
xfree (logfile);
save_context_p = 0;
}
/* Check whether a signal handler requested the output to be
redirected. */
static void
check_redirect_output (void)
{
if (redirect_request == RR_REQUESTED)
{
redirect_request = RR_DONE;
redirect_output ();
}
}
/* Request redirection at a convenient time. This may be called from
a signal handler. */
void
log_request_redirect_output (const char *signal_name)
{
if (redirect_request == RR_NONE && save_context_p)
/* Request output redirection. The request will be processed by
check_redirect_output(), which is called from entry point log
functions. */
redirect_request = RR_REQUESTED;
redirect_request_signal_name = signal_name;
}
--- NEW FILE: ftp-opie.c ---
/* Opie (s/key) support for FTP.
Copyright (C) 1998 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
[...2152 lines suppressed...]
gen_md5_init (ctx);
gen_md5_update ((unsigned char *)feed, strlen (feed), ctx);
gen_md5_finish (ctx, (unsigned char *)results);
results[0] ^= results[2];
results[1] ^= results[3];
memcpy (key, (char *) results, 8);
while (0 < sequence--)
{
gen_md5_init (ctx);
gen_md5_update ((unsigned char *)key, 8, ctx);
gen_md5_finish (ctx, (unsigned char *)results);
results[0] ^= results[2];
results[1] ^= results[3];
memcpy (key, (char *) results, 8);
}
btoe (buf, key);
return buf;
}
--- NEW FILE: retr.c ---
/* File retrieval.
Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#include <config.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif /* HAVE_UNISTD_H */
#include <errno.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif /* HAVE_STRING_H */
#include <assert.h>
#include "wget.h"
#include "utils.h"
#include "retr.h"
#include "progress.h"
#include "url.h"
#include "recur.h"
#include "ftp.h"
#include "host.h"
#include "connect.h"
#include "hash.h"
#include "convert.h"
#ifdef HAVE_SSL
# include "gen_sslfunc.h" /* for ssl_iread */
#endif
#ifndef errno
extern int errno;
#endif
/* See the comment in gethttp() why this is needed. */
int global_download_count;
/* Total size of downloaded files. Used to enforce quota. */
LARGE_INT total_downloaded_bytes;
static struct {
long chunk_bytes;
double chunk_start;
double sleep_adjust;
} limit_data;
static void
limit_bandwidth_reset (void)
{
limit_data.chunk_bytes = 0;
limit_data.chunk_start = 0;
}
/* Limit the bandwidth by pausing the download for an amount of time.
BYTES is the number of bytes received from the network, and DELTA
is the number of milliseconds it took to receive them. */
static void
limit_bandwidth (long bytes, double *dltime, struct wget_timer *timer)
{
double delta_t = *dltime - limit_data.chunk_start;
double expected;
limit_data.chunk_bytes += bytes;
/* Calculate the amount of time we expect downloading the chunk
should take. If in reality it took less time, sleep to
compensate for the difference. */
expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
if (expected > delta_t)
{
double slp = expected - delta_t + limit_data.sleep_adjust;
double t0, t1;
if (slp < 200)
{
DEBUGP (("deferring a %.2f ms sleep (%ld/%.2f).\n",
slp, limit_data.chunk_bytes, delta_t));
return;
}
DEBUGP (("\nsleeping %.2f ms for %ld bytes, adjust %.2f ms\n",
slp, limit_data.chunk_bytes, limit_data.sleep_adjust));
t0 = *dltime;
usleep ((unsigned long) (1000 * slp));
t1 = wtimer_elapsed (timer);
/* Due to scheduling, we probably slept slightly longer (or
shorter) than desired. Calculate the difference between the
desired and the actual sleep, and adjust the next sleep by
that amount. */
limit_data.sleep_adjust = slp - (t1 - t0);
/* Since we've called wtimer_elapsed, we might as well update
the caller's dltime. */
*dltime = t1;
}
limit_data.chunk_bytes = 0;
limit_data.chunk_start = *dltime;
}
#define MIN(i, j) ((i) <= (j) ? (i) : (j))
/* Reads the contents of file descriptor FD, until it is closed, or a
read error occurs. The data is read in 8K chunks, and stored to
stream fp, which should have been open for writing. If BUF is
non-NULL and its file descriptor is equal to FD, flush RBUF first.
This function will *not* use the rbuf_* functions!
The EXPECTED argument is passed to show_progress() unchanged, but
otherwise ignored.
If opt.verbose is set, the progress is also shown. RESTVAL
represents a value from which to start downloading (which will be
shown accordingly). If RESTVAL is non-zero, the stream should have
been open for appending.
The function exits and returns codes of 0, -1 and -2 if the
connection was closed, there was a read error, or if it could not
write to the output stream, respectively.
IMPORTANT: The function flushes the contents of the buffer in
rbuf_flush() before actually reading from fd. If you wish to read
from fd immediately, flush or discard the buffer. */
int
get_contents (int fd, FILE *fp, long *len, long restval, long expected,
struct rbuf *rbuf, int use_expected, double *elapsed)
{
int res = 0;
static char dlbuf[16384];
int dlbufsize = sizeof (dlbuf);
void *progress = NULL;
struct wget_timer *timer = wtimer_allocate ();
double dltime = 0;
*len = restval;
if (opt.verbose)
progress = progress_create (restval, expected);
if (rbuf && RBUF_FD (rbuf) == fd)
{
int sz = 0;
while ((res = rbuf_flush (rbuf, dlbuf, sizeof (dlbuf))) != 0)
{
fwrite (dlbuf, 1, res, fp);
*len += res;
sz += res;
}
if (sz)
fflush (fp);
if (ferror (fp))
{
res = -2;
goto out;
}
if (progress)
progress_update (progress, sz, 0);
}
if (opt.limit_rate)
limit_bandwidth_reset ();
wtimer_reset (timer);
/* Use a smaller buffer for low requested bandwidths. For example,
with --limit-rate=2k, it doesn't make sense to slurp in 16K of
data and then sleep for 8s. With buffer size equal to the limit,
we never have to sleep for more than one second. */
if (opt.limit_rate && opt.limit_rate < dlbufsize)
dlbufsize = opt.limit_rate;
/* Read from fd while there is available data.
Normally, if expected is 0, it means that it is not known how
much data is expected. However, if use_expected is specified,
then expected being zero means exactly that. */
while (!use_expected || (*len < expected))
{
int amount_to_read = (use_expected
? MIN (expected - *len, dlbufsize) : dlbufsize);
#ifdef HAVE_SSL
if (rbuf->ssl!=NULL)
res = ssl_iread (rbuf->ssl, dlbuf, amount_to_read);
else
#endif /* HAVE_SSL */
res = iread (fd, dlbuf, amount_to_read);
if (res <= 0)
break;
fwrite (dlbuf, 1, res, fp);
/* Always flush the contents of the network packet. This should
not hinder performance: fast downloads will be received in
16K chunks (which stdio would write out anyway), and slow
downloads won't be limited with disk performance. */
fflush (fp);
if (ferror (fp))
{
res = -2;
goto out;
}
dltime = wtimer_elapsed (timer);
if (opt.limit_rate)
limit_bandwidth (res, &dltime, timer);
*len += res;
if (progress)
progress_update (progress, res, dltime);
#ifdef WINDOWS
if (use_expected && expected > 0)
ws_percenttitle (100.0 * (double)(*len) / (double)expected);
#endif
}
if (res < -1)
res = -1;
out:
if (progress)
progress_finish (progress, dltime);
if (elapsed)
*elapsed = dltime;
wtimer_delete (timer);
return res;
}
/* Return a printed representation of the download rate, as
appropriate for the speed. If PAD is non-zero, strings will be
padded to the width of 7 characters (xxxx.xx). */
char *
retr_rate (long bytes, double msecs, int pad)
{
static char res[20];
static char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
int units = 0;
double dlrate = calc_rate (bytes, msecs, &units);
sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
return res;
}
/* Calculate the download rate and trim it as appropriate for the
speed. Appropriate means that if rate is greater than 1K/s,
kilobytes are used, and if rate is greater than 1MB/s, megabytes
are used.
UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
GB/s. */
double
calc_rate (long bytes, double msecs, int *units)
{
double dlrate;
assert (msecs >= 0);
assert (bytes >= 0);
if (msecs == 0)
/* If elapsed time is exactly zero, it means we're under the
granularity of the timer. This often happens on systems that
use time() for the timer. */
msecs = wtimer_granularity ();
dlrate = (double)1000 * bytes / msecs;
if (dlrate < 1024.0)
*units = 0;
else if (dlrate < 1024.0 * 1024.0)
*units = 1, dlrate /= 1024.0;
else if (dlrate < 1024.0 * 1024.0 * 1024.0)
*units = 2, dlrate /= (1024.0 * 1024.0);
else
/* Maybe someone will need this, one day. */
*units = 3, dlrate /= (1024.0 * 1024.0 * 1024.0);
return dlrate;
}
/* Maximum number of allowed redirections. 20 was chosen as a
"reasonable" value, which is low enough to not cause havoc, yet
high enough to guarantee that normal retrievals will not be hurt by
the check. */
#define MAX_REDIRECTIONS 20
#define SUSPEND_POST_DATA do { \
post_data_suspended = 1; \
saved_post_data = opt.post_data; \
saved_post_file_name = opt.post_file_name; \
opt.post_data = NULL; \
opt.post_file_name = NULL; \
} while (0)
#define RESTORE_POST_DATA do { \
if (post_data_suspended) \
{ \
opt.post_data = saved_post_data; \
opt.post_file_name = saved_post_file_name; \
post_data_suspended = 0; \
} \
} while (0)
static char *getproxy PARAMS ((struct url *));
/* Retrieve the given URL. Decides which loop to call -- HTTP, FTP,
FTP, proxy, etc. */
/* #### This function should be rewritten so it doesn't return from
multiple points. */
uerr_t
retrieve_url (const char *origurl, char **file, char **newloc,
const char *refurl, int *dt)
{
uerr_t result;
char *url;
int location_changed, dummy;
char *mynewloc, *proxy;
struct url *u, *proxy_url;
int up_error_code; /* url parse error code */
char *local_file;
int redirection_count = 0;
int post_data_suspended = 0;
char *saved_post_data = NULL;
char *saved_post_file_name = NULL;
/* If dt is NULL, use local storage. */
if (!dt)
{
dt = &dummy;
dummy = 0;
}
url = xstrdup (origurl);
if (newloc)
*newloc = NULL;
if (file)
*file = NULL;
u = url_parse (url, &up_error_code);
if (!u)
{
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
xfree (url);
return URLERROR;
}
if (!refurl)
refurl = opt.referer;
redirected:
result = NOCONERROR;
mynewloc = NULL;
local_file = NULL;
proxy_url = NULL;
proxy = getproxy (u);
if (proxy)
{
/* Parse the proxy URL. */
proxy_url = url_parse (proxy, &up_error_code);
if (!proxy_url)
{
logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
proxy, url_error (up_error_code));
xfree (url);
RESTORE_POST_DATA;
return PROXERR;
}
if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
{
logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
url_free (proxy_url);
xfree (url);
RESTORE_POST_DATA;
return PROXERR;
}
}
if (u->scheme == SCHEME_HTTP
#ifdef HAVE_SSL
|| u->scheme == SCHEME_HTTPS
#endif
|| (proxy_url && proxy_url->scheme == SCHEME_HTTP))
{
result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
}
else if (u->scheme == SCHEME_FTP)
{
/* If this is a redirection, we must not allow recursive FTP
retrieval, so we save recursion to oldrec, and restore it
later. */
int oldrec = opt.recursive;
if (redirection_count)
opt.recursive = 0;
result = ftp_loop (u, dt, proxy_url);
opt.recursive = oldrec;
/* There is a possibility of having HTTP being redirected to
FTP. In these cases we must decide whether the text is HTML
according to the suffix. The HTML suffixes are `.html',
`.htm' and a few others, case-insensitive. */
if (redirection_count && local_file && u->scheme == SCHEME_FTP)
{
if (has_html_suffix_p (local_file))
*dt |= TEXTHTML;
}
}
if (proxy_url)
{
url_free (proxy_url);
proxy_url = NULL;
}
location_changed = (result == NEWLOCATION);
if (location_changed)
{
char *construced_newloc;
struct url *newloc_parsed;
assert (mynewloc != NULL);
if (local_file)
xfree (local_file);
/* The HTTP specs only allow absolute URLs to appear in
redirects, but a ton of boneheaded webservers and CGIs out
there break the rules and use relative URLs, and popular
browsers are lenient about this, so wget should be too. */
construced_newloc = uri_merge (url, mynewloc);
xfree (mynewloc);
mynewloc = construced_newloc;
/* Now, see if this new location makes sense. */
newloc_parsed = url_parse (mynewloc, &up_error_code);
if (!newloc_parsed)
{
logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc,
url_error (up_error_code));
url_free (u);
xfree (url);
xfree (mynewloc);
RESTORE_POST_DATA;
return result;
}
/* Now mynewloc will become newloc_parsed->url, because if the
Location contained relative paths like .././something, we
don't want that propagating as url. */
xfree (mynewloc);
mynewloc = xstrdup (newloc_parsed->url);
/* Check for max. number of redirections. */
if (++redirection_count > MAX_REDIRECTIONS)
{
logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
MAX_REDIRECTIONS);
url_free (newloc_parsed);
url_free (u);
xfree (url);
xfree (mynewloc);
RESTORE_POST_DATA;
return WRONGCODE;
}
xfree (url);
url = mynewloc;
url_free (u);
u = newloc_parsed;
/* If we're being redirected from POST, we don't want to POST
again. Many requests answer POST with a redirection to an
index page; that redirection is clearly a GET. We "suspend"
POST data for the duration of the redirections, and restore
it when we're done. */
if (!post_data_suspended)
SUSPEND_POST_DATA;
goto redirected;
}
if (local_file)
{
if (*dt & RETROKF)
{
register_download (u->url, local_file);
if (redirection_count && 0 != strcmp (origurl, u->url))
register_redirection (origurl, u->url);
if (*dt & TEXTHTML)
register_html (u->url, local_file);
}
}
if (file)
*file = local_file ? local_file : NULL;
else
FREE_MAYBE (local_file);
url_free (u);
if (redirection_count)
{
if (newloc)
*newloc = url;
else
xfree (url);
}
else
{
if (newloc)
*newloc = NULL;
xfree (url);
}
++global_download_count;
RESTORE_POST_DATA;
return result;
}
/* Find the URLs in the file and call retrieve_url() for each of
them. If HTML is non-zero, treat the file as HTML, and construct
the URLs accordingly.
If opt.recursive is set, call recursive_retrieve() for each file. */
uerr_t
retrieve_from_file (const char *file, int html, int *count)
{
uerr_t status;
struct urlpos *url_list, *cur_url;
url_list = (html ? get_urls_html (file, NULL, NULL)
: get_urls_file (file));
status = RETROK; /* Suppose everything is OK. */
*count = 0; /* Reset the URL count. */
for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
{
char *filename = NULL, *new_file = NULL;
int dt;
if (cur_url->ignore_when_downloading)
continue;
if (opt.quota && total_downloaded_bytes > opt.quota)
{
status = QUOTEXC;
break;
}
if (opt.recursive && cur_url->url->scheme != SCHEME_FTP)
status = retrieve_tree (cur_url->url->url);
else
status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
if (filename && opt.delete_after && file_exists_p (filename))
{
DEBUGP (("Removing file due to --delete-after in"
" retrieve_from_file():\n"));
logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
if (unlink (filename))
logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
dt &= ~RETROKF;
}
FREE_MAYBE (new_file);
FREE_MAYBE (filename);
}
/* Free the linked list of URL-s. */
free_urlpos (url_list);
return status;
}
/* Print `giving up', or `retrying', depending on the impending
action. N1 and N2 are the attempt number and the attempt limit. */
void
printwhat (int n1, int n2)
{
logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
}
/* If opt.wait or opt.waitretry are specified, and if certain
conditions are met, sleep the appropriate number of seconds. See
the documentation of --wait and --waitretry for more information.
COUNT is the count of current retrieval, beginning with 1. */
void
sleep_between_retrievals (int count)
{
static int first_retrieval = 1;
if (first_retrieval)
{
/* Don't sleep before the very first retrieval. */
first_retrieval = 0;
return;
}
if (opt.waitretry && count > 1)
{
/* If opt.waitretry is specified and this is a retry, wait for
COUNT-1 number of seconds, or for opt.waitretry seconds. */
if (count <= opt.waitretry)
sleep (count - 1);
else
usleep (1000000L * opt.waitretry);
}
else if (opt.wait)
{
if (!opt.random_wait || count > 1)
/* If random-wait is not specified, or if we are sleeping
between retries of the same download, sleep the fixed
interval. */
usleep (1000000L * opt.wait);
else
{
/* Sleep a random amount of time averaging in opt.wait
seconds. The sleeping amount ranges from 0 to
opt.wait*2, inclusive. */
double waitsecs = 2 * opt.wait * random_float ();
DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
opt.wait, waitsecs));
usleep (1000000L * waitsecs);
}
}
}
/* Free the linked list of urlpos. */
void
free_urlpos (struct urlpos *l)
{
while (l)
{
struct urlpos *next = l->next;
if (l->url)
url_free (l->url);
FREE_MAYBE (l->local_name);
xfree (l);
l = next;
}
}
/* Rotate FNAME opt.backups times */
void
rotate_backups(const char *fname)
{
int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1;
char *from = (char *)alloca (maxlen);
char *to = (char *)alloca (maxlen);
struct stat sb;
int i;
if (stat (fname, &sb) == 0)
if (S_ISREG (sb.st_mode) == 0)
return;
for (i = opt.backups; i > 1; i--)
{
sprintf (from, "%s.%d", fname, i - 1);
sprintf (to, "%s.%d", fname, i);
rename (from, to);
}
sprintf (to, "%s.%d", fname, 1);
rename(fname, to);
}
static int no_proxy_match PARAMS ((const char *, const char **));
/* Return the URL of the proxy appropriate for url U. */
static char *
getproxy (struct url *u)
{
char *proxy = NULL;
char *rewritten_url;
static char rewritten_storage[1024];
if (!opt.use_proxy)
return NULL;
if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
return NULL;
switch (u->scheme)
{
case SCHEME_HTTP:
proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
break;
#ifdef HAVE_SSL
case SCHEME_HTTPS:
proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
break;
#endif
case SCHEME_FTP:
proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
break;
case SCHEME_INVALID:
break;
}
if (!proxy || !*proxy)
return NULL;
/* Handle shorthands. `rewritten_storage' is a kludge to allow
getproxy() to return static storage. */
rewritten_url = rewrite_shorthand_url (proxy);
if (rewritten_url)
{
strncpy (rewritten_storage, rewritten_url, sizeof(rewritten_storage));
rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';
proxy = rewritten_storage;
}
return proxy;
}
/* Should a host be accessed through proxy, concerning no_proxy? */
int
no_proxy_match (const char *host, const char **no_proxy)
{
if (!no_proxy)
return 1;
else
return !sufmatch (no_proxy, host);
}
--- NEW FILE: gen-md5.c ---
/* General MD5 support.
Copyright (C) 2001 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#include <config.h>
#include "wget.h"
#include "gen-md5.h"
#ifdef HAVE_BUILTIN_MD5
# include <gnu-md5.h>
typedef struct md5_ctx gen_md5_context_imp;
#endif
#ifdef HAVE_SOLARIS_MD5
# include <md5.h>
typedef MD5_CTX gen_md5_context_imp;
#endif
#ifdef HAVE_OPENSSL_MD5
# include <openssl/md5.h>
typedef MD5_CTX gen_md5_context_imp;
#endif
struct gen_md5_context {
gen_md5_context_imp imp;
};
/* Originally I planned for these to be macros, but that's very hard
because some of these MD5 implementations use the same names for
their types. For example, it is impossible to include <md5.h> and
<openssl/ssl.h> on Solaris, because the latter includes its own MD5
implementation, which clashes with <md5.h>. */
int
gen_md5_context_size (void)
{
return sizeof (struct gen_md5_context);
}
void
gen_md5_init (gen_md5_context *ctx)
{
gen_md5_context_imp *ctx_imp = &ctx->imp;
#ifdef HAVE_BUILTIN_MD5
md5_init_ctx (ctx_imp);
#endif
#ifdef HAVE_SOLARIS_MD5
MD5Init (ctx_imp);
#endif
#ifdef HAVE_OPENSSL_MD5
MD5_Init (ctx_imp);
#endif
}
void
gen_md5_update (unsigned const char *buffer, int len, gen_md5_context *ctx)
{
gen_md5_context_imp *ctx_imp = &ctx->imp;
#ifdef HAVE_BUILTIN_MD5
md5_process_bytes (buffer, len, ctx_imp);
#endif
#ifdef HAVE_SOLARIS_MD5
MD5Update (ctx_imp, (unsigned char *)buffer, len);
#endif
#ifdef HAVE_OPENSSL_MD5
MD5_Update (ctx_imp, buffer, len);
#endif
}
void
gen_md5_finish (gen_md5_context *ctx, unsigned char *result)
{
gen_md5_context_imp *ctx_imp = &ctx->imp;
#ifdef HAVE_BUILTIN_MD5
md5_finish_ctx (ctx_imp, result);
#endif
#ifdef HAVE_SOLARIS_MD5
MD5Final (result, ctx_imp);
#endif
#ifdef HAVE_OPENSSL_MD5
MD5_Final (result, ctx_imp);
#endif
}
--- NEW FILE: version.c ---
char *version_string = "1.9.1";
--- NEW FILE: retr.h ---
/* Declarations for retr.c.
Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef RETR_H
#define RETR_H
#include "rbuf.h"
int get_contents PARAMS ((int, FILE *, long *, long, long, struct rbuf *,
int, double *));
uerr_t retrieve_url PARAMS ((const char *, char **, char **,
const char *, int *));
uerr_t retrieve_from_file PARAMS ((const char *, int, int *));
char *retr_rate PARAMS ((long, double, int));
double calc_rate PARAMS ((long, double, int *));
void printwhat PARAMS ((int, int));
void sleep_between_retrievals PARAMS ((int));
void rotate_backups PARAMS ((const char *));
/* Because there's no http.h. */
struct url;
uerr_t http_loop PARAMS ((struct url *, char **, char **, const char *,
int *, struct url *));
#endif /* RETR_H */
--- NEW FILE: config.h.in ---
/* Configuration header file.
Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
Free Software Foundation, Inc.
This file is part of GNU Wget.
GNU Wget is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
GNU Wget is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wget; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
In addition, as a special exception, the Free Software Foundation
gives permission to link the code of its release of Wget with the
OpenSSL project's "OpenSSL" library (or with modified versions of it
that use the same license as the "OpenSSL" library), and distribute
the linked executables. You must obey the GNU General Public License
in all respects for all of the code used other than "OpenSSL". If you
modify this file, you may extend this exception to your version of the
file, but you are not obligated to do so. If you do not wish to do
so, delete this exception statement from your version. */
#ifndef CONFIG_H
#define CONFIG_H
/* Define if you have the <alloca.h> header file. */
#undef HAVE_ALLOCA_H
/* AIX requires this to be the first thing in the file. */
#ifdef __GNUC__
# define alloca __builtin_alloca
#else
# if HAVE_ALLOCA_H
# include <alloca.h>
# else
# ifdef _AIX
#pragma alloca
# else
# ifndef alloca /* predefined by HP cc +Olibcalls */
char *alloca ();
# endif
# endif
# endif
#endif
/* Define if on AIX 3.
System headers sometimes define this.
We just want to avoid a redefinition error message. */
#ifndef _ALL_SOURCE
#undef _ALL_SOURCE
#endif
/* Define to empty if the keyword does not work. */
#undef const
/* Define to empty or __inline__ or __inline. */
#undef inline
/* Define to `unsigned' if <sys/types.h> doesn't define. */
#undef size_t
/* Define to `int' if <sys/types.h> doesn't define. */
#undef pid_t
/* Define if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Define as the return type of signal handlers (int or void). */
#undef RETSIGTYPE
/* Define if your architecture is big endian (with the most
significant byte first). */
#undef WORDS_BIGENDIAN
/* Define to the length of short. */
#undef SIZEOF_SHORT
/* Define to the length of int. */
#undef SIZEOF_INT
/* Define to the length of long. */
#undef SIZEOF_LONG
/* Define to the length of long long. */
#undef SIZEOF_LONG_LONG
/* Define this if you want the NLS support. */
#undef HAVE_NLS
/* Define if you want the FTP support for Opie compiled in. */
#undef USE_OPIE
/* Define if you want the HTTP Digest Authorization compiled in. */
#undef USE_DIGEST
/* Define if you want the debug output support compiled in. */
#undef ENABLE_DEBUG
/* Define if you have sys/time.h header. */
#undef HAVE_SYS_TIME_H
/* Define if you can safely include both <sys/time.h> and <time.h>. */
#undef TIME_WITH_SYS_TIME
/* Define if you have inttypes.h header. */
#undef HAVE_INTTYPES_H
/* Define if you have struct utimbuf. */
#undef HAVE_STRUCT_UTIMBUF
/* Define if you have the uname function. */
#undef HAVE_UNAME
/* Define if you have a working version of mmap. */
#undef HAVE_MMAP
/* Define if you have the gethostname function. */
#undef HAVE_GETHOSTNAME
/* Define if you have the select function. */
#undef HAVE_SELECT
/* Define if you have the gettimeofday function. */
#undef HAVE_GETTIMEOFDAY
/* Define if you have the strdup function. */
#undef HAVE_STRDUP
/* Define if you have the sys/utsname.h header. */
#undef HAVE_SYS_UTSNAME_H
/* Define if you have the strerror function. */
#undef HAVE_STRERROR
/* Define if you have the snprintf function. */
#undef HAVE_SNPRINTF
/* Define if you have the vsnprintf function. */
#undef HAVE_VSNPRINTF
/* Define if you have the strstr function. */
#undef HAVE_STRSTR
/* Define if you have the strcasecmp function. */
#undef HAVE_STRCASECMP
/* Define if you have the strncasecmp function. */
#undef HAVE_STRNCASECMP
/* Define if you have the strpbrk function. */
#undef HAVE_STRPBRK
/* Define if you have the memmove function. */
#undef HAVE_MEMMOVE
/* Define if you have the strptime function. */
#undef HAVE_STRPTIME
/* Define if you have the mktime function. */
#undef HAVE_MKTIME
/* Define if you have the symlink function. */
#undef HAVE_SYMLINK
/* Define if you have the access function. */
#undef HAVE_ACCESS
/* Define if you have the isatty function. */
#undef HAVE_ISATTY
/* Define if you have the signal function. */
#undef HAVE_SIGNAL
/* Define if you have the sigsetjmp function. */
#undef HAVE_SIGSETJMP
/* Define if you have the sigblock function. */
#undef HAVE_SIGBLOCK
/* Define if you have the gettext function. */
#undef HAVE_GETTEXT
/* Define if you have the usleep function. */
#undef HAVE_USLEEP
/* Define if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define if you have the <stdarg.h> header file. */
#undef HAVE_STDARG_H
/* Define if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define if you have the <utime.h> header file. */
#undef HAVE_UTIME_H
/* Define if you have the <sys/utime.h> header file. */
#undef HAVE_SYS_UTIME_H
/* Define if you have the <termios.h> header file. */
#undef HAVE_TERMIOS_H
/* Define if you have the <sys/ioctl.h> header file. */
#undef HAVE_SYS_IOCTL_H
/* Define if you have the <sys/select.h> header file. */
#undef HAVE_SYS_SELECT_H
/* Define if you have the <pwd.h> header file. */
#undef HAVE_PWD_H
/* Define if you have the <signal.h> header file. */
#undef HAVE_SIGNAL_H
/* Define if you have the <setjmp.h> header file. */
#undef HAVE_SETJMP_H
/* Define if you have the <libintl.h> header file. */
#undef HAVE_LIBINTL_H
/* Define if you have the <locale.h> header file. */
#undef HAVE_LOCALE_H
/* Define if fnmatch.h can be included. */
#undef HAVE_WORKING_FNMATCH_H
/* Define to be the name of the operating system. */
#undef OS_TYPE
/* Define if you wish to compile with socks support. */
#undef HAVE_SOCKS
/* Define to 1 if ANSI function prototypes are usable. */
#undef PROTOTYPES
/* Define if all libs needed for ssl support are existing */
#undef HAVE_SSL
/* Define if we're compiling in support for MD5. */
#undef HAVE_MD5
/* Define if we're using Solaris libmd5. */
#undef HAVE_SOLARIS_MD5
/* Define if we're using OpenSSL md5. */
#undef HAVE_OPENSSL_MD5
/* Define if we're using builtin (GNU) md5.c. */
#undef HAVE_BUILTIN_MD5
/* Define if you have the getaddrinfo function. */
#undef HAVE_GETADDRINFO
/* Define if the system headers support the AI_ADDRCONFIG flag. */
#undef HAVE_GETADDRINFO_AI_ADDRCONFIG
/* Define if the system headers support the AI_V4MAPPED flag. */
#undef HAVE_GETADDRINFO_AI_V4MAPPED
/* Define if the system headers support the AI_ALL flag. */
#undef HAVE_GETADDRINFO_AI_ALL
/* Define if the system supports struct sockaddr_in6 */
#undef HAVE_HAVE_STRUCT_SOCKADDR_IN6
/* Define if struct sockaddr_in6 has the sin6_scope_id member */
#undef HAVE_SOCKADDR_IN6_SCOPE_ID
/* Define if you want to enable the IPv6 support. */
#undef ENABLE_IPV6
/* Defined to int or size_t on systems without socklen_t. */
#undef socklen_t
/* Define if you have uint32_t. */
#undef HAVE_UINT32_T
/* Some autoconf-unrelated preprocessor magic that cannot be in
sysdep.h because it must be done before including the system
headers. */
/* First a gambit to see whether we're on Solaris. We'll
need it below. */
#ifdef __sun
# ifdef __SVR4
# define solaris
# endif
#endif
/* The following several lines can be very dangerous; they can cripple
the header files and break compilation in _verY_ non-obvious ways.
Because of that, we define them only on architectures we know
about. */
#undef NAMESPACE_TWEAKS
#ifdef solaris
# define NAMESPACE_TWEAKS
# ifdef __GNUC__
/* Prevent stdio.h from declaring va_list and thus tripping gcc's
stdarg.h. */
# define _VA_LIST
# endif
#endif
#ifdef __linux__
# define NAMESPACE_TWEAKS
#endif
#ifdef NAMESPACE_TWEAKS
/* Request the "Unix 98 compilation environment". */
#define _XOPEN_SOURCE 500
/* For Solaris: request everything else that is available and doesn't
conflict with the above. */
#define __EXTENSIONS__
/* For Linux: request features of 4.3BSD and SVID (System V Interface
Definition). */
#define _SVID_SOURCE
#define _BSD_SOURCE
#endif /* NAMESPACE_TWEAKS */
#endif /* CONFIG_H */
More information about the dslinux-commit
mailing list