--- w3c-markup-validator-0.6.7.orig/htdocs/config/validator.conf +++ w3c-markup-validator-0.6.7/htdocs/config/validator.conf @@ -17,15 +17,17 @@ # # Email address of the maintainer of this service. -Maintainer www-validator@w3.org +Maintainer webmaster@localhost # # The "Home Page" for the service. Make sure this ends with a slash. -Home Page http://validator.w3.org/ +#Home Page http://validator.w3.org/ +Home Page http://localhost/w3c-markup-validator/ # # Base URL To Error Explanations (doc/errors.html) -Msg FAQ URI http://validator.w3.org/docs/errors.html +#Msg FAQ URI http://validator.w3.org/docs/errors.html +Msg FAQ URI http://localhost/w3c-markup-validator/docs/errors.html # # Base URL for the Element Reference. @@ -33,33 +35,53 @@ # # The SGML Library Path. -SGML Library /usr/local/validator/htdocs/sgml-lib +#SGML Library /usr/local/validator/htdocs/sgml-lib +SGML Library /usr/share/sgml +XML Library /usr/share/xml # +# The directory containing the catalogs +SGML Catalog /usr/share/w3c-markup-validator/catalog +#SGML Maincatalog /etc/sgml/catalog + +# +# # The SGML Parser to use. SGML Parser /usr/bin/onsgmls # # Mapping tables etc... -Element Map file:///usr/local/validator/htdocs/config/eref.cfg -FPI to Text file:///usr/local/validator/htdocs/config/fpis.cfg -Error to URI file:///usr/local/validator/htdocs/config/frag.cfg -File Type file:///usr/local/validator/htdocs/config/type.cfg -Doctypes file:///usr/local/validator/htdocs/config/doctypes.cfg -Charsets file:///usr/local/validator/htdocs/config/charset.cfg +#Element Map file:///usr/local/validator/htdocs/config/eref.cfg +#FPI to Text file:///usr/local/validator/htdocs/config/fpis.cfg +#Error to URI file:///usr/local/validator/htdocs/config/frag.cfg +#File Type file:///usr/local/validator/htdocs/config/type.cfg +#Doctypes file:///usr/local/validator/htdocs/config/doctypes.cfg +#Charsets file:///usr/local/validator/htdocs/config/charset.cfg +Element Map file:///usr/share/w3c-markup-validator/config/eref.cfg +FPI to Text file:///usr/share/w3c-markup-validator/config/fpis.cfg +Error to URI file:///usr/share/w3c-markup-validator/config/frag.cfg +File Type file:///usr/share/w3c-markup-validator/config/type.cfg +Doctypes file:///usr/share/w3c-markup-validator/config/doctypes.cfg +Charsets file:///usr/share/w3c-markup-validator/config/charset.cfg + # # DanC territory... -Tips DB file:///usr/local/validator/htdocs/config/tips.cfg +#Tips DB file:///usr/local/validator/htdocs/config/tips.cfg +Tips DB file:///usr/share/w3c-markup-validator/config/tips.cfg + # # The "templates" for headers, footers... -Header /usr/local/validator/htdocs/header.html -Footer /usr/local/validator/htdocs/footer.html +#Header /usr/local/validator/htdocs/header.html +#Footer /usr/local/validator/htdocs/footer.html +Header /usr/share/w3c-markup-validator/html/header.html +Footer /usr/share/w3c-markup-validator/html/footer.html # # Location of Verbose Error Messages... -Verbose Msg /usr/local/validator/htdocs/config/verbosemsg.cfg +#Verbose Msg /usr/local/validator/htdocs/config/verbosemsg.cfg +Verbose Msg /usr/share/w3c-markup-validator/config/verbosemsg.cfg # # Allowed Protocols --- w3c-markup-validator-0.6.7.orig/htdocs/config/checklink.conf +++ w3c-markup-validator-0.6.7/htdocs/config/checklink.conf @@ -0,0 +1,59 @@ +# +# Configuration file for the W3C Link Checker +# $Id: checklink.conf,v 1.5 2004/06/01 21:55:54 ville Exp $ +# +# See Config::General(3) for the syntax; 'SplitPolicy' is 'equalsign' here. +# + +# +# Trusted is a regular expression for matching "trusted" domains. This is +# used to restrict the domains where HTTP basic authentication will be sent. +# This is matched case insensitively against resoures' hostnames. +# +# Not specifying a value here means that the basic authentication will only +# be sent to the same host where the authentication was requested from. +# +# For example, the following would allow sending the authentication to any +# host in the w3.org domain (and *only* there): +# Trusted = \.w3\.org$ + + +# +# Allow_Private_IPs is a boolean flag (1/0) for specifying whether checking of +# links on non-public IP addresses is allowed. +# +# The default, ie. not specifying the value here means that checking links +# on non-public IP addresses is disabled when checklink runs as a CGI script, +# and allowed in command line mode. +# +# For example, the following would disallow private IP addresses regardless +# of the mode: +# Allow_Private_IPs = 0 + + +# +# Markup_Validator_URI and CSS_Validator_URI are formatted URIs to the +# respective validators. The %s in these will be replaced with the full +# "URI encoded" URI to the document being checked, and shown in the link +# checker results view in the online/CGI version. +# +# Defaults: +Markup_Validator_URI = /check?uri=%s +# CSS_Validator_URI = http://jigsaw.w3.org/css-validator/validator?uri=%s + + +# +# Doc_URI is the URI to the Link Checker documentation, shown in the +# results report in CGI mode, and the usage message in command line mode. +# If you have installed the documentation locally somewhere, you may wish to +# change this to point to that version. +# +# Default: +Doc_URI = /w3c-markup-validator/checklink.html + +# +# Style_URI is the URI (relative or absolute) of the style sheet to be used +# by the Link Checker for display of interface and results. +# +# Default: +Style_URI = /w3c-markup-validator/linkchecker.css --- w3c-markup-validator-0.6.7.orig/htdocs/footer.html +++ w3c-markup-validator-0.6.7/htdocs/footer.html @@ -2,7 +2,7 @@
check?uri=referer">Valid XHTML 1.0! Feedback: @@ -31,7 +31,7 @@
  • docs/" title="Documentation for this Service">Docs
  • docs/help.html" title="Help and answers to frequently asked questions">Help & FAQ
  • feedback.html" title="How to provide feedback on this service">Feedback
  • -
  • Link Checker
  • +
  • Link Checker
  • --- w3c-markup-validator-0.6.7.orig/htdocs/images/qa-small.enc +++ w3c-markup-validator-0.6.7/htdocs/images/qa-small.enc @@ -0,0 +1,49 @@ +begin 644 qa-small.png +MB5!.1PT*&@H````-24A$4@```$@````P"`(````]_JN\````!&=!34$``-;8 +MU$]8,@```!ET15AT4V]F='=A7')93P```>E +M241!5'C:8OS__S_#<`0``<3$,$P!0``-6X\!!-"P]1A```U;CP$$T+#U&$`` +ML1"CZ,*%"P\>/+AX\2*$JZ^OKZ"@8&!@,)@]!A!`#/]Q@_OW[Q<4%`#]@%6C +M@(!`0D("4,W_00D``@B[Q]Z_?P_T$I%!`_0>4/U@\QA``&'QV/GSY['&D@,, +M8$H!U0-U#2J/`000`V9<`=,8FG_6KU^/I@PH@N9#H*Y!%6\``83N,33GSI\_ +M'X]FH/>00P&H=_!X#""`4#RV?_]^XGT%3[?(6H`F#!*/`000BL>`Q0!RD4"D +M$!J'`-#!,UH2'6/U2=`IP"YR*TPH!1FPP6>!/:#`=`*H`^!#*`C@1J!7*`/ +M@=T+K"U5@`#"W@A&:RY"8J,!#-!B"9>O\'L,XA2@47`1_![#&LD0=P*-PMI" +M`@@@!EP],:RM>%P=,ZS-7XC'X%)`ST.LEPV,074!#L'H!(("P +M]Z`A90.P)`26>!LV;,#O,6`)"2PJT=J9D&@$NALH!700L`,.K!6`*0%NIYS!L],RB$,8,V``.!0WY9VWEPD(CJ7 +MP134[.XA9*G.*+D/N:@0_3?OJGKYI*R(^"3_""`&2HI4Y+(>&#^#JJ\)$$"( +MMN*R?=>WGK_/R<#!P/"#`49R<##\^`'C@C@,,%DHX\3&25`Q:N0WF2$.*,8`` +M(A!C`CSL_5F.0%\U+CH.$;'7EQD2,0800$S8\RX,`'WUX,5'B*\NW'D%2HK* +M8@Q#`0`$$$92Y$`D16"J`Z;#PFD'H.VFBX\A!0D0(=H^/.Q`97`1(`/(!0JB +ML8$Y$Y*&@0P@`DK!\RI0')Z\@8J!"&(%7"_<%N1<`%&`[!*TI`@00!A)\0$0BP*L5;`F18``PM8(YH!67$`W +M35A[%BER7A4$&X.SF>R"G5=!K5*PXS8>N[MPU]4'+SX![?OP]>?$=>>`7(@K +M@:D7*`X,5Z!1!R\^@<=28-W&!R\_`AF0Y*`@P0]T/:A1JRP*:O(O.@[4"'$Q +M4`O0>QN.W@%J`7(OWGTMP,T.=`8PVP,-!*H!>A@HB^D)@`!BP9(4?S`8:(D! +M*RZ@K<@>`P*(QX#&05KU0#\X&,@"`QOHS\2N'4#UP!@#!CF0O>'+':";(&4I +M)(:!*1G"`"J#9%=@0`#53%Q[#F@@))T#30-*`1$P((!2"W==@X3=0G`X`@'0 +M#_5QEO!B#,A]^/(3UJ0($$!,6.-Q?KX[T'0T7\'+#XBMD(($V!`!*H.$-S`L +M%:-G`]7$NVM#RAA@H`(5U\=90?0"G0),VQ!#@)X$!E/CPN,7[H*\<>#"8X@@ +MQ(>0Z`*R(>X&1BD\LT%$@(D"&+'`D%T`\S.:%P`"B`6S'JM/L@2Z&Z7I!`,/ +M7GZ">"G>30OH/F#VJV>P5!#G!T81,+U!DJ(`#T=B]\X/7WX`_0!.1;>!3@>2 +MT,1Y]Q4TC.Z"8@:H`.@X8'(%&@OQ##"Q@1(YF`V)/7EQ/F`L^5LK`Y4!+0+Z +M!)AB@;D+:`+0"D@2Q8PQ@`!"=%M2>K?-77L>U&;]P3`DP?`P@P`,*BW!F]H4K8`````$E%3D2N0F"" +` +end --- w3c-markup-validator-0.6.7.orig/htdocs/images/w3c_home.enc +++ w3c-markup-validator-0.6.7/htdocs/images/w3c_home.enc @@ -0,0 +1,49 @@ +begin 644 w3c_home.png +MB5!.1PT*&@H````-24A$4@```$@````P"`(````]_JN\````!&=!34$``+&/ +M"_QA!0``!Z-)1$%4>)SM6GUL4U44/QH9,7N+=#.!8@?M*!BPL&X)(F$T:5C9 +M^&JRJ3@,F[J*,4H39Z"X.+844=@S=`8<49<5*&@@!!KVH4.2%4L-$!->M^&( +M4H&YN4*R]4W;)FY#YQ\7;F_O^VA'6!"R7_K'N>^=]][YO7/NN>>>5X!'%(^- +MC8T]:!LF!(\_:`,F"H\LL2<>M`'W!RS+(F%@8,!D,IE,ID?!8RS+FDPF)&_8 +ML('C.([C8.SA1VUM[=C86'Y^?B@4RL_/1T<>!8\A!`*!]>O7VVPV-+PSQ_CP +M<-=O(:RT<$ZZ(FTJ=>6-8/CW6Q$\-.B5\D\Z??Z/JW\,(3GMR13-,XR\ONA# +M9AL75W=YHYX`DF>F +MI_[RS2M29FVIO_"%N_NQI%>3I?.G?U^W!@\YCLO-S175;&]OU^OULV;-BD0B +M[>WM1J-15"V6/,S+9F,Y.C+:$1@4:B_/GH'E_E#T1C`L9>@Y?U":A0BJ7HNC +M83:;I33+RLIJ:FHBD0@`7+]^74HM1LR@5Y*1=OCT5:'V.H(\`#1+1..-8+B[ +ME\?#C2OFA=O*R=_752M(_1*CEIQ=3J>SKZ\/#W4ZW?;MVZU6JTJE`H"^OKY] +M^_8!`)IXB8E!O$-$7[DB;>K2^=/QT-LA[A:2\-@_\,G;BRF%K?LO8#DU90K[ +MSA+R[($#![#.O7OW=G9VEI;&)DA%185"H1`U`"ABI$.Z>WG1 +M2)L](PW+;3_U\N%AH0Y)>)'V:2J)[SQXJ3\4Q_OSDSW@X +M,SVUZO6XV77V[%DDE):6DJPP7"Z73J>3=Q=0Q!1I4Q=DQK2%4Z@C,!@=&26/ +M"'6:?'%'J*7)MO\B>8=/WWF!NISC."38[78IHUTNE[R[0+AMV5@P%\OHQ9-H +M]M$TA%XEJDZOMU_-7;N%AB5&K5L;R$(7[3XRRAG0129)<\>)T +M_'$Z.-+X\/#'+HY\RE%/8&5%JV@9H%*I$D9:0H@0(^.'C#22P.;BY[#/C1CUQ1$&S[+V*?WQ=W@10Q\F6?OW*+#P^3$VRY7DGI(&>2 +M+D7D\="@5U:]GHM^%QN*6G>O(F=I=&2T_L2=57MB/4:^;`!H_K&']$9IP5R( +MW\)%1T:]_B"I0R5Z^OYZY6G':O((+D.JGIDS! +M:RYI_=;Z"V1)L4ZP\E+(UF:4&+5X2&Z4)I`8W(TW!*F48(XOFK%,%1Q2H$O3 +MNUEJ8HF5$K65E#74%BYV/#M!GP=A]HRX]HY,YKP'2!*C.AP8Y.H$$G-)?H*1 +MCTA&[=X@UU<4VE>X.)/:7%$\90X*0>WWAH:&/!Y/,A%P)',A@MEL3JKG(83PQ0MI4,T"B"??Y.MY>?N9)9OG +M&G@&O=)H-%975\N8A.%P.'P^G]_OEU*0ZX\AHW%)3JVY&.N6S2;+=DQ^R28W +M3I5-OAZJ4N/#PU2ELG!.>N9315E966JU6G3OC,%Q7$U-#1*D]@$)>O>DBY9+ +M]'U)'9+\(B(WO,5ZJ4+^8Q='EL4H@#4:C=5JM5@LN#$J!,=Q!H,!M=]DZJ]Q +M$#-+Y#JR!"')?UB6@^7HR&C!^]_N/'C)ZP]V!`8WU7IQZP]%I7K#"+B+3/7?=QZ\5'M47EC8R/#B'QY4JE4"5D!0%)?-*>O +MY\W=/QSZ[A>ILS]P_2]5?<\4-.+?\V^>E-%'N';MFM5JU>GN3+^\ +MO+P]>_:$0J%D;)[\G\?#ADEB#QLFB4WB?X+_`';KJ'!"@]W=`````$E%3D2N +#0F"" +` +end --- w3c-markup-validator-0.6.7.orig/htdocs/linkchecker.css +++ w3c-markup-validator-0.6.7/htdocs/linkchecker.css @@ -0,0 +1,273 @@ +/* + Base Style Sheet for the W3C Link Checker. + + Copyright 2000-2004 W3C (MIT, INRIA, Keio). All Rights Reserved. + See http://www.w3.org/Consortium/Legal/ipr-notice.html#Copyright + + $Id: linkchecker.css,v 1.10 2004/06/11 18:55:39 ville Exp $ +*/ + +html, body { + line-height: 120%; + color: black; + background: white; + font-family: "Bitstream Vera Sans", sans-serif; + margin: 0; + padding: 0; + border: 0; +} + +div#main { + padding-top: 1em; + padding-left: 1em; + padding-right: 1em; + margin: 0; +} + +a:link, a:visited { + color: blue; +} +a:hover { + color: blue; + background-color: #eee; +} +a:active { + color: red; + background-color: yellow; +} + +acronym:hover, abbr:hover { + cursor: help; +} +abbr[title], acronym[title], span[title], strong[title] { + border-bottom: thin dotted; + cursor: help; +} + +pre, code, tt { + font-family: "Bitstream Vera Sans Mono", monospace; + line-height: 100%; + white-space: pre; +} +pre { + padding-left: 2em; +} + +fieldset { + background: #fcfcfc; + border: 1px dotted #053188; + padding: 0.8em; +} + +a:link img, a:visited img { + border-style: none; +} +a img { + color: black; /* The only way to hide the border in NS 4.x */ +} + +ul.toc { + list-style: none; +} + +ol li { + padding: .1em; +} + +th { + text-align: left; +} + +/* These are usually targets and not links */ +h1 a, h1 a:hover, h2 a, h2 a:hover, h3 a, h3 a:hover { + color: inherit; + background-color: inherit; +} + +img { + vertical-align: middle; +} + +address img { + float: right; + width: 88px; +} + +address { + padding-top: 0; + padding-right: 1em; + padding-left: 1em; + padding-bottom: 0; + margin-top: 3em; + border-top: 1px solid black; + background-color: #eee; + clear: right; + background-image: url(/w3c-markup-validator/images/footer.jpg); + background-repeat: no-repeat; + background-attachment: scroll; + height: 50px; + background-position: center; +} + +p.copyright { + margin-top: 5em; + padding-top: .5em; + font-size: xx-small; + max-width: 85ex; + text-align: justify; + text-transform: uppercase; + margin-left: auto; + margin-right:auto; + font-family: "Bitstream Vera Sans Mono", monospace; + color: #888; + line-height: 120%; +} + +p.copyright a { + color: #88f; + text-decoration: none; +} + +/* Various header(ish) things. Definitions cribbed from the CORE Styles. */ + +h1, h2, h3, h4, h5, h6, dt { + font-family: "Bitstream Vera Sans", sans-serif; + font-size-adjust: .53; +} + +h1 { + font-size: 2em; + font-weight: bold; + font-style: normal; + text-decoration: none; + color: #053188; +} +#banner h1 { + font-size: 1.3em; + display: inline; +} + +#banner { + background-image: url(/w3c-markup-validator/images/header.jpg); + background-repeat: no-repeat; + background-attachment: scroll; + height: 50px; + background-position: right; + margin: 0; + padding: 0; + border-bottom: 1px solid black; +} + +h1#title { + padding: 0; + margin: 0; + color: #053188; +} + +h2 { + font-size: 1.5em; + text-align: left; + font-weight: bold; + font-style: normal; + text-decoration: none; + margin-top: 1em; + margin-bottom: 1em; + line-height: 120%; +} + +h3 { + font-size: 1.3em; + font-weight: normal; + font-style: normal; + text-decoration: none; + background-color: #eee; + text-indent: 1em; + padding: .2em; + border-top: 1px dotted black; +} + + +/* + Navbar +*/ + +#menu { + /* min-width: 63em; */ + list-style-type: none; + padding: 0; + margin: 0; + height: 1.5em; + background-color: #eee; + border-bottom: solid 1px black; +} + +#menu li { + display: inline; + padding: 0; + margin: 0; +} + +#menu li a:link, #menu li a:visited { + text-decoration: none; + text-align: center; + float: left; + display: block; + width: 8em; + padding: 2px 0px; + margin: 0; + background-color: #eee; + color: #053188; + font-size: smaller; + font-variant: small-caps; + border-right: solid 1px #bbb; + border-bottom: solid 1px #ddd; +} + +#menu li a#selected:link, #menu li a#selected:visited { + text-decoration: underline; + background-color: #fff; + color: #053188; +} + +#menu li a:hover { + text-decoration: underline; + background-color: #fff; + color: #053188; +} + +#menu li a:active { + background-color: #fff; + color: #053188; + border-right: solid 1px #ddd; +} + +/* Results */ +.report { + width: 100%; +} +dt.report { + font-weight: bold; +} +div#settings { + font-size: smaller; + float: right; +} +div#settings ul { + margin: 0; + padding-left: 1.5em; +} +.unauthorized { + background-color: aqua; +} +.redirect { + background-color: yellow; +} +.broken { + background-color: red; +} +.multiple { + background-color: fuchsia; +} +.dubious { + background-color: #ccc; +} --- w3c-markup-validator-0.6.7.orig/htdocs/checklink.html +++ w3c-markup-validator-0.6.7/htdocs/checklink.html @@ -0,0 +1,274 @@ + + + + + W3C Link Checker Documentation + + + + + + + + + + +
    + + +

    About this service

    + +

    + In order to check the validity of the technical reports that W3C + publishes, the Systems Team has developed a link checker. +

    + +

    + A first version was developed in August 1998 by + Renaud Bruyeron. + Since it was lacking some functionalities, + Hugo Haas + rewrote it more or less from scratch in November 1999. + It has been improved by Ville Skyttä and many other volunteers since. +

    + +

    + The source code is available publicly under the + W3C IPR + software notice from + CPAN (released + versions) and + CVS + (development and archived release versions). +

    + +

    What it does

    + +

    + The link checker reads an HTML or XHTML document and extracts a list + of anchors and links. +

    + +

    + It checks that no anchor is defined twice. +

    + +

    + It then checks that all the links are dereferenceable, including + the fragments. It warns about HTTP redirects, including directory + redirects. +

    + +

    + It can check recursively a part of a Web site. +

    + +

    + There is a command line version and a + CGI version. They both + support HTTP basic + authentication. This is achieved in the CGI version + by passing through the authorization information from the user browser + to the site tested. +

    + +

    Use it online

    + +

    + There is an + online version + of the link checker. +

    + +

    + In the online version (and in general, when run as a CGI script), + the number of documents that can be checked recursively is limited. + Both the command line version and the online one sleep at least one + second between requests to each server to avoid abuses and target + server congestion. +

    + +

    Install it locally

    + +

    + The link checker is written in Perl. It is packaged as a standard + CPAN distribution, and depends on + a few other modules which are also available from CPAN. +

    + +

    In order to install it:

    + +
      +
    1. + Install Perl. +
    2. +
    3. + You will need the following CPAN + distributions, as well as the distributions they possibly depend on. + Depending on your Perl version, you might already have some of + these installed. Also, the latest versions of these may require a + recent version of Perl. As long as the minimum version requirement(s) + below are satisfied, everything should be fine. The latest version + should not be needed, just get an older version that works with your + Perl. For an introduction to installing Perl modules, + see The CPAN FAQ. +
        +
      • W3C-LinkChecker (the link checker itself)
      • +
      • CGI.pm (required for CGI mode only)
      • +
      • Config-General (optional, version 2.06 or newer; required only for reading the (optional) configuration file)
      • +
      • HTML-Parser (version 3.00 or newer)
      • +
      • libwww-perl (version 5.66 or newer; version 5.70 or newer recommended, except for 5.76 which has a bug that may cause the link checker follow redirects to file: URLs)
      • +
      • Net-IP
      • +
      • TermReadKey (optional but recommended; required only in command line mode for password input)
      • +
      • Time-HiRes
      • +
      • URI
      • +
      +
    4. +
    5. + Optionally install the link checker configuration file, + etc/checklink.conf contained in the link checker + distribution package into /etc/w3c/checklink.conf + or set the W3C_CHECKLINK_CFG environment variable to the + location where you installed it. +
    6. +
    7. + Optionally, install the checklink script into a location + in your web server which allows execution of CGI scripts (typically a + directory named cgi-bin somewhere below your web server's + root directory). +
    8. +
    9. + See also the README and INSTALL file(s) + included in the above distributions. +
    10. +
    + +

    + Running checklink --help shows how to + use the command line version. The distribution package also includes + more extensive POD + documentation, use + perldoc checklink (or man checklink on Unixish systems) + to view it. +

    + +

    + If you want to enable the authentication capabilities with Apache, + have a look at + Steven Drake's hack. +

    + +

    + Some environment variables affect the way how the link checker uses + FTP. + In particular, passive mode is the default. See + Net::FTP(3) + for more information. +

    + +

    + There are multiple alternatives for configuring the default + NNTP + server for use with news: URIs without explicit hostnames, + see + Net::NNTP(3) + for more information. +

    + +

    Robots exclusion

    + +

    + As of version 4.0, the link checker honors + robots exclusion rules. To place rules specific to the W3C Link Checker in + /robots.txt files, sites can use the + W3C-checklink user agent string. For example, to allow + the link checker to access all documents on a server and to disallow + all other robots, one could use the following: +

    + +
    +User-Agent: *
    +Disallow: /
    +
    +User-Agent: W3C-checklink
    +Disallow:
    +
    + +

    + Robots exlusion support in the link checker is based on the + LWP::RobotUA + Perl module. It currently supports the + "original 1994 version" + of the standard. The robots META tag, ie. + <meta name="robots" content="...">, is not supported. + Other than that, the link checker's implementation goes all the way + in trying to honor robots exclusion rules; if a + /robots.txt disallows it, not even the first document + submitted as the root for a link checker run is fetched. +

    + +

    + Note that /robots.txt rules affect only user agents + that honor it; it is not a generic method for access control. +

    + +

    Comments, suggestions and bugs

    + +

    + The current version has proven to be stable. It could however be + improved, see the list of open enhancement ideas and bugs for details. +

    + +

    + Please send comments, suggestions and bug reports about the link checker + to the www-validator mailing list + (archives), + with 'checklink' in the subject. +

    + +
    +
    + Valid XHTML 1.0! + The W3C Validator Team
    + $Date: 2004/07/11 16:46:42 $ +
    + + + --- w3c-markup-validator-0.6.7.orig/httpd/cgi-bin/check +++ w3c-markup-validator-0.6.7/httpd/cgi-bin/check @@ -611,7 +611,7 @@ # # By default, use SGML catalog file and SGML Declaration. - my $catalog = File::Spec->catfile($CFG->{'SGML Library'}, 'sgml.soc'); + my $catalog = File::Spec->catfile($CFG->{'SGML Catalog'}, 'sgml.soc'); my @spopt = qw( -R -wvalid @@ -622,7 +622,7 @@ # # Switch to XML semantics if file is XML. if (&is_xml($File->{Type})) { - $catalog = File::Spec->catfile($CFG->{'SGML Library'}, 'xml.soc'); + $catalog = File::Spec->catfile($CFG->{'SGML Catalog'}, 'xml.soc'); push(@spopt, '-wxml'); &add_warning($File, 'note', 'Note:', <<".EOF."); The Validator XML support has @@ -647,6 +647,11 @@ # } } + # + # + if (defined $CFG->{'SGML Maincatalog'}) { + $catalog = File::Spec->catfile($CFG->{'SGML Maincatalog'}); + } # # Defaults for SP; turn off fixed charset mode and set encoding to UTF-8. @@ -656,7 +661,9 @@ # # Tell onsgmls about the SGML Library. - $ENV{SGML_SEARCH_PATH} = $CFG->{'SGML Library'}; + $ENV{SGML_SEARCH_PATH} = $CFG->{'SGML Catalog'}.":".$CFG->{'SGML Library'}. + ":".$CFG->{'XML Library'}; + # # Set the command to execute. @@ -2000,60 +2007,60 @@ unless ($File->{Version} eq 'unknown' or defined $File->{Tentative}) { if ($File->{Version} =~ /^HTML 2\.0$/) { - $image_uri = "$CFG->{'Home Page'}images/vh20"; + $image_uri = "$CFG->{'Home Page'}images/vh20.png"; $alttext = "Valid HTML 2.0!"; $gifborder = ""; } elsif ($File->{Version} =~ /HTML 3\.2{'Home Page'}images/vh32.png"; $alttext = "Valid HTML 3.2!"; $gifhw = ' height="31" width="88"'; } elsif ($File->{Version} =~ /HTML 4\.0<\/a> Strict$/) { - $image_uri = "http://www.w3.org/Icons/valid-html40"; + $image_uri = "$CFG->{'Home Page'}images/vh40.png"; $alttext = "Valid HTML 4.0!"; $gifborder = ""; $gifhw = ' height="31" width="88"'; } elsif ($File->{Version} =~ /HTML 4\.0<\/a> /) { - $image_uri = "http://www.w3.org/Icons/valid-html40"; + $image_uri = "$CFG->{'Home Page'}images/vh40.png"; $alttext = "Valid HTML 4.0!"; $gifhw = ' height="31" width="88"'; } elsif ($File->{Version} =~ /HTML 4\.01<\/a> Strict$/) { - $image_uri = "http://www.w3.org/Icons/valid-html401"; + $image_uri = "$CFG->{'Home Page'}images/vh401.png"; $alttext = "Valid HTML 4.01!"; $gifborder = ""; $gifhw = ' height="31" width="88"'; } elsif ($File->{Version} =~ /HTML 4\.01<\/a> /) { - $image_uri = "http://www.w3.org/Icons/valid-html401"; + $image_uri = "$CFG->{'Home Page'}images/vh401.png"; $alttext = "Valid HTML 4.01!"; $gifhw = ' height="31" width="88"'; } elsif ($File->{Version} =~ /XHTML 1\.0<\/a> /) { - $image_uri = "http://www.w3.org/Icons/valid-xhtml10"; + $image_uri = "$CFG->{'Home Page'}images/vxhtml10.png"; $alttext = "Valid XHTML 1.0!"; $gifborder = ""; $gifhw = ' height="31" width="88"'; $xhtmlendtag = " /"; } elsif ($File->{Version} =~ /XHTML Basic 1.0/) { - $image_uri = "$CFG->{'Home Page'}images/vxhtml-basic10"; + $image_uri = "$CFG->{'Home Page'}images/vxhtml-basic10.png"; $alttext = "Valid XHTML Basic 1.0!"; $gifborder = ""; $gifhw = ' height="31" width="88"'; $xhtmlendtag = " /"; } elsif ($File->{Version} =~ /XHTML 1.1/) { - $image_uri = "http://www.w3.org/Icons/valid-xhtml11"; + $image_uri = "$CFG->{'Home Page'}images/vxhtml11.png"; $alttext = "Valid XHTML 1.1!"; $gifborder = ""; $gifhw = ' height="31" width="88"'; $xhtmlendtag = " /"; } elsif ($File->{Version} =~ /HTML 3\.0/) { - $image_uri = "$CFG->{'Home Page'}images/vh30"; + $image_uri = "$CFG->{'Home Page'}images/vh30.png"; $alttext = "Valid HTML 3.0!"; } elsif ($File->{Version} =~ /Netscape/) { - $image_uri = "$CFG->{'Home Page'}images/vhns"; + $image_uri = "$CFG->{'Home Page'}images/vhns.png"; $alttext = "Valid Netscape-HTML!"; } elsif ($File->{Version} =~ /Hotjava/) { - $image_uri = "$CFG->{'Home Page'}images/vhhj"; + $image_uri = "$CFG->{'Home Page'}images/vhhj.png"; $alttext = "Valid Hotjava-HTML!"; } elsif ($File->{Version} =~ /ISO\/IEC 15445:2000/) { - $image_uri = "$CFG->{'Home Page'}images/v15445"; + $image_uri = "$CFG->{'Home Page'}images/v15445.png"; $alttext = "Valid ISO-HTML!"; $gifborder = ""; } --- w3c-markup-validator-0.6.7.orig/httpd/cgi-bin/checklink.pod +++ w3c-markup-validator-0.6.7/httpd/cgi-bin/checklink.pod @@ -0,0 +1,223 @@ +$Id: checklink.pod,v 1.12 2004/06/08 21:45:58 ville Exp $ + +=head1 NAME + +checklink - check the validity of links in an HTML or XHTML document + +=head1 SYNOPSIS + +B [ I ] I ... + +=head1 DESCRIPTION + +This manual page documents briefly the B command, a.k.a. the +W3C® Link Checker. + +B is a program that reads an HTML or XHTML document, +extracts a list of anchors and lists and checks that no anchor is +defined twice and that all the links are dereferenceable, including +the fragments. It warns about HTTP redirects, including directory +redirects, and can check recursively a part of a web site. + +The program can be used either as a command line tool or as a CGI script. + +=head1 OPTIONS + +This program follow the usual GNU command line syntax, with long options +starting with two dashes (`-'). A summary of options is included below. + +=over 5 + +=item B<-?, -h, --help> + +Show summary of options. + +=item B<-V, --version> + +Output version information. + +=item B<-s, --summary> + +Show result summary only. + +=item B<-b, --broken> + +Show only the broken links, not the redirects. + +=item B<-e, --directory> + +Hide directory redirects - e.g. L -> +L. + +=item B<-r, --recursive> + +Check the documents linked from the first one. + +=item B<-D, --depth> I + +Check the documents linked from the first one to depth I +(implies B<--recursive>). + +=item B<-l, --location> I + +Scope of the documents checked in recursive mode. By default, for +L for example, it would be +L. + +=item B<--exclude-docs> I + +In recursive mode, do not check links in documents whose URIs match +I. + +=item B<-L, --languages> I + +The C HTTP header to send. In command line mode, +this header is not sent by default. The special value C causes +a value to be detected from the C environment variable, and sent +if found. In CGI mode, the default is to send the value received from +the client as is. + +=item B<-q, --quiet> + +No output if no errors are found. + +=item B<-v, --verbose> + +Verbose mode. + +=item B<-i, --indicator> + +Show progress while parsing. + +=item B<-u, --user> I + +Specify a username for authentication. + +=item B<-p, --password> I + +Specify a password for authentication. + +=item B<--hide-same-realm> + +Hide 401's that are in the same realm as the document checked. + +=item B<-S, --sleep> I + +Sleep the specified number of seconds between requests to each server. +Defaults to 1 second, which is also the minimum allowed. + +=item B<-t, --timeout> I + +Timeout for requests, in seconds. + +=item B<-d, --domain> I + +Perl regular expression describing the domain to which the authentication +information (if present) will be sent. The default value can be specified +in the configuration file. See the C entry in the configuration +file description below for more information. + +=item B<--masquerade> I<"local remote"> + +Masquerade local dir as a remote URI. For example, the following results in +/my/local/dir/ being "mapped" to http://some/remote/uri/ + + --masquerade "/my/local/dir http://some/remote/uri/" + +As of revision 3.6.2.19 of checklink, B<--masquerade> takes a single +argument consisting of two URIs, separated by whitespace. One usual way of +providing a value with embedded whitespace is to enclose it in quotes. + +=item B<-y, --proxy> I + +Specify an HTTP proxy server. + +=item B<-H, --html> + +HTML output. + +=back + +=head1 FILES + +=over 5 + +=item F + +The main configuration file. You can use the L environment +variable to override the default location. + +C specifies a regular expression for matching trusted domains +(ie. domains where HTTP basic authentication, if any, will be sent). +The regular expression will be matched case insensitively against host +names. The default behavior (when unset, that is) is to send the +authentication information only to the host which requests it; usually +you don't want to change this. For example, the following configures +I the w3.org domain as trusted: + + Trusted = \.w3\.org$ + +C is a boolean flag indicating whether checking links +on non-public IP addresses is allowed. The default is true in command line +mode and false when run as a CGI script. For example, to disallow checking +non-public IP addresses, regardless of the mode, use: + + Allow_Private_IPs = 0 + +C and C are formatted URIs to the +respective validators. The C<%s> in these will be replaced with the full +"URI encoded" URI to the document being checked, and shown in the link +checker results view in the online/CGI version. The defaults are: + + Markup_Validator_URI = + http://validator.w3.org/check?uri=%s + CSS_Validator_URI = + http://jigsaw.w3.org/css-validator/validator?uri=%s + +C and C are URIs used for linking to the documentation +and style sheet from the dynamically generated content of the link checker. +The defaults are: + + Doc_URI = http://validator.w3.org/docs/checklink.html + Style_URI = http://validator.w3.org/docs/linkchecker.css + +=back + +=head1 ENVIRONMENT + +checklink uses the libwww-perl library which has a number of environment +variables affecting its behaviour. See L for some +pointers. + +=over 5 + +=item B + +If set, overrides the path to the configuration file. + +=back + +=head1 SEE ALSO + +The documentation for this program is available on the web at +L. + +L, L, L, L, L. + +=head1 AUTHOR + +This program was originally written by Hugo Haas Ehugo@w3.orgE, based +on Renaud Bruyeron's F. It has been enhanced by Ville Skyttä +and many other volunteers since. Use the Ewww-validator@w3.orgE +mailing list for feedback, and see +L for more information. + +This manual page was written by Frédéric Schütz +for the Debian GNU/Linux system (but may be used by others). + +=head1 COPYRIGHT + +This program is licensed under the W3C® Software License, +L. + +=cut --- w3c-markup-validator-0.6.7.orig/httpd/cgi-bin/checklink +++ w3c-markup-validator-0.6.7/httpd/cgi-bin/checklink @@ -0,0 +1,2164 @@ +#!/usr/bin/perl -wT +# +# W3C Link Checker +# by Hugo Haas +# (c) 1999-2004 World Wide Web Consortium +# based on Renaud Bruyeron's checklink.pl +# +# $Id: checklink,v 4.4 2004/07/20 14:57:24 ville Exp $ +# +# This program is licensed under the W3C(r) Software License: +# http://www.w3.org/Consortium/Legal/copyright-software +# +# The documentation is at: +# http://validator.w3.org/docs/checklink.html +# +# See the CVSweb interface at: +# http://dev.w3.org/cvsweb/perl/modules/W3C/LinkChecker/ +# +# An online version is available at: +# http://validator.w3.org/checklink +# +# Comments and suggestions should be sent to the www-validator mailing list: +# www-validator@w3.org (with 'checklink' in the subject) +# http://lists.w3.org/Archives/Public/www-validator/ (archives) + +use strict; + +# Get rid of potentially unsafe and unneeded environment variables. +delete(@ENV{qw(IFS CDPATH ENV BASH_ENV)}); +$ENV{PATH} = ''; # undef would output warnings with Perl 5.6.1's Cwd.pm. + +# ----------------------------------------------------------------------------- + +package W3C::UserAgent; + +use LWP::RobotUA qw(); +# @@@ Needs also W3C::LinkChecker but can't use() it here. + +@W3C::UserAgent::ISA = qw(LWP::RobotUA); + +sub new +{ + my $proto = shift; + my $class = ref($proto) || $proto; + my ($name, $from, $rules) = @_; + + # For security/privacy reasons, if $from was not given, do not send it. + # Cheat by defining something for the constructor, and resetting it later. + my $from_ok = $from; + $from ||= 'www-validator@w3.org'; + # WWW::RobotRules <= 5.78 have bugs which cause suboptimal results with + # User-Agent substring matching against robots.txt files; "User-Agent: *" + # should work ok with all though, and "User-Agent: W3C-checklink" for >= 5.77 + my $self = $class->SUPER::new($name, $from, $rules); + $self->from(undef) unless $from_ok; + + $self->env_proxy(); + return $self; +} + +sub simple_request +{ + my $self = shift; + my $response = $self->W3C::UserAgent::SUPER::simple_request(@_); + if (! defined($self->{FirstResponse})) { + $self->{FirstResponse} = $response->code(); + $self->{FirstMessage} = $response->message() || '(no message)'; + } + return $response; +} + +sub redirect_ok +{ + my ($self, $request, $response) = @_; + if ($self->{Checklink_verbose_progress}) { + # @@@ TODO: when an LWP internal robots.txt request gets redirected, + # this will a bit confusingly print out info about it. Would need a + # robust way of determining whether something is a LWP "internal" request. + &W3C::LinkChecker::hprintf("\n%s %s ", $request->method(),$request->uri()); + } + return $self->SUPER::redirect_ok($request, $response); +} + +# ----------------------------------------------------------------------------- + +package W3C::LinkChecker; + +use vars qw($AGENT $PACKAGE $PROGRAM $VERSION $REVISION + $DocType $Head $Accept $ContentTypes %Cfg); + +use HTML::Entities qw(); +use HTML::Parser 3.00 qw(); +use HTTP::Request qw(); +use HTTP::Response qw(); +use Net::hostent qw(gethostbyname); +use Net::IP qw(); +use Socket qw(inet_ntoa); +use Time::HiRes qw(); +use URI qw(); +use URI::Escape qw(); +use URI::file qw(); +# @@@ Needs also W3C::UserAgent but can't use() it here. + +use constant RC_ROBOTS_TXT => -1; +use constant RC_DNS_ERROR => -2; + +@W3C::LinkChecker::ISA = qw(HTML::Parser); + +BEGIN +{ + # Version info + $PACKAGE = 'W3C Link Checker'; + $PROGRAM = 'W3C-checklink'; + $VERSION = '4.0'; + my ($cvsver) = q$Revision: 4.4 $ =~ /(\d+[\d\.]*\.\d+)/; + $REVISION = sprintf('version %s [%s] (c) 1999-2004 W3C', + $VERSION, $cvsver); + $AGENT = sprintf('%s/%s [%s] %s', + $PROGRAM, $VERSION, $cvsver,LWP::RobotUA->_agent()); + + # Pull in mod_perl modules if applicable. + if ($ENV{MOD_PERL}) { + eval "require Apache::compat"; # For mod_perl 2 + require Apache; + } + + my @content_types = qw(application/xhtml+xml text/html); + $Accept = join(', ', @content_types) . ', */*;q=0.5'; + my $re = join('|', map { s/\+/\\+/g; $_ } @content_types); + $ContentTypes = qr{\b(?:$re)\b}io; + + # + # Read configuration. If the W3C_CHECKLINK_CFG environment variable has + # been set or the default contains a non-empty file, read it. Otherwise, + # skip silently. + # + my $defaultconfig = '/etc/w3c/checklink.conf'; + if ($ENV{W3C_CHECKLINK_CFG} || -s $defaultconfig) { + + require Config::General; + Config::General->require_version(2.06); # Need 2.06 for -SplitPolicy + + my $conffile = $ENV{W3C_CHECKLINK_CFG} || $defaultconfig; + eval { + my %config_opts = + ( -ConfigFile => $conffile, + -SplitPolicy => 'equalsign', + -AllowMultiOptions => 'no', + ); + %Cfg = Config::General->new(%config_opts)->getall(); + }; + if ($@) { + die <<".EOF."; +Failed to read configuration from '$conffile': +$@ +.EOF. + } + } + $Cfg{Markup_Validator_URI} ||= + 'http://validator.w3.org/check?uri=%s'; + $Cfg{CSS_Validator_URI} ||= + 'http://jigsaw.w3.org/css-validator/validator?uri=%s'; + $Cfg{Doc_URI} ||= + 'http://validator.w3.org/docs/checklink.html'; + $Cfg{Style_URI} ||= + 'http://validator.w3.org/docs/linkchecker.css'; + + $DocType = ''; + $Head = < + +EOF + + # Trusted environment variables that need laundering in taint mode. + foreach (qw(NNTPSERVER NEWSHOST)) { + ($ENV{$_}) = ($ENV{$_} =~ /^(.*)$/) if $ENV{$_}; + } + + # Use passive FTP by default, see Net::FTP(3). + $ENV{FTP_PASSIVE} = 1 unless exists($ENV{FTP_PASSIVE}); +} + +# Autoflush +$| = 1; + +# Different options specified by the user +my $cmdline = ! ($ENV{GATEWAY_INTERFACE} && $ENV{GATEWAY_INTERFACE} =~ /^CGI/); +my %Opts = + ( Command_Line => $cmdline, + Quiet => 0, + Summary_Only => 0, + Verbose => 0, + Progress => 0, + HTML => 0, + Timeout => 60, + Redirects => 1, + Dir_Redirects => 1, + Accept_Language => $cmdline ? undef : $ENV{HTTP_ACCEPT_LANGUAGE}, + HTTP_Proxy => undef, + Hide_Same_Realm => 0, + Depth => 0, # < 0 means unlimited recursion. + Sleep_Time => 1, + Max_Documents => 150, # For the online version. + User => undef, + Password => undef, + Base_Location => '.', + Exclude_Docs => undef, + Masquerade => 0, + Masquerade_From => '', + Masquerade_To => '', + Trusted => $Cfg{Trusted}, + Allow_Private_IPs => defined($Cfg{Allow_Private_IPs}) ? + $Cfg{Allow_Private_IPs} : $cmdline, + ); +undef $cmdline; + +# Global variables +# What is our query? +my $query; +# What URI's did we process? (used for recursive mode) +my %processed; +# Result of the HTTP query +my %results; +# List of redirects +my %redirects; +# Count of the number of documents checked +my $doc_count = 0; +# Time stamp +my $timestamp = &get_timestamp(); + +&parse_arguments() if $Opts{Command_Line}; + +# Precompile/error-check regular expressions. +if (defined($Opts{Exclude_Docs})) { + eval { $Opts{Exclude_Docs} = qr/$Opts{Exclude_Docs}/o; }; + &usage(1, "Error in exclude-docs regexp: $@") if $@; +} +if (defined($Opts{Trusted})) { + eval { $Opts{Trusted} = qr/$Opts{Trusted}/io; }; + &usage(1, "Error in trusted domains regexp: $@") if $@; +} + +if ($Opts{Command_Line}) { + + require Text::Wrap; + Text::Wrap->import('wrap'); + + &usage(1) unless @ARGV; + + $Opts{_Self_URI} = 'http://validator.w3.org/checklink'; # For HTML output + + &ask_password() if ($Opts{User} && !$Opts{Password}); + + my $first = 1; + foreach my $uri (@ARGV) { + if (!$Opts{Summary_Only}) { + printf("%s %s\n", $PACKAGE, $REVISION) unless $Opts{HTML}; + } else { + $Opts{Verbose} = 0; + $Opts{Progress} = 0; + } + # Transform the parameter into a URI + $uri = &urize($uri); + &check_uri($uri, $first, $Opts{Depth}); + $first &&= 0; + } + undef $first; + + if ($Opts{HTML}) { + &html_footer(); + } elsif (($doc_count > 0) && !$Opts{Summary_Only}) { + printf("\n%s\n", &global_stats()); + } + +} else { + + require CGI; + require CGI::Carp; + CGI::Carp->import(qw(fatalsToBrowser)); + $query = new CGI; + # Set a few parameters in CGI mode + $Opts{Verbose} = 0; + $Opts{Progress} = 0; + $Opts{HTML} = 1; + $Opts{_Self_URI} = $query->url(-relative => 1); + + # Backwards compatibility + my $uri = undef; + if ($uri = $query->param('url')) { + $query->param('uri', $uri) unless $query->param('uri'); + $query->delete('url'); + } + $uri = $query->param('uri'); + + if (! $uri) { + &html_header('', 1); # Set cookie only from results page. + &print_form($query); + &html_footer(); + exit; + } + + # Backwards compatibility + if ($query->param('hide_dir_redirects')) { + $query->param('hide_redirects', 'on'); + $query->param('hide_type', 'dir'); + $query->delete('hide_dir_redirects'); + } + + $Opts{Summary_Only} = 1 if $query->param('summary'); + + if ($query->param('hide_redirects')) { + $Opts{Dir_Redirects} = 0; + if (my $type = $query->param('hide_type')) { + $Opts{Redirects} = 0 if ($type ne 'dir'); + } else { + $Opts{Redirects} = 0; + } + } + + $Opts{Accept_Language} = undef if $query->param('no_accept_language'); + + $Opts{Depth} = -1 if ($query->param('recursive') && $Opts{Depth} == 0); + if (my $depth = $query->param('depth')) { + # @@@ Ignore invalid depth silently for now. + $Opts{Depth} = $1 if ($depth =~ /(-?\d+)/); + } + + # Save, clear or leave cookie as is. + my $cookie = ''; + if (my $action = $query->param('cookie')) { + my %cookie = (-name => $PROGRAM); + if ($action eq 'clear') { + # Clear the cookie. + $cookie{-value} = ''; + $cookie{-expires} = '-1M'; + } else { + # Always refresh the expiration time. + $cookie{-expires} = '+1M'; + if ($action eq 'set') { + # Set the options. + my %options = $query->Vars(); + delete($options{$_}) for qw(url uri check cookie); # Non-persistent. + $cookie{-value} = \%options; + } else { + # Use the old values. + $cookie{-value} = { $query->cookie($PROGRAM) }; + } + } + $cookie = $query->cookie(%cookie); + } + + undef $query; # Not needed any more. + + # All Apache configurations don't set HTTP_AUTHORIZATION for CGI scripts. + # If we're under mod_perl, there is a way around it... + if ($ENV{MOD_PERL}) { + my $auth = Apache->request()->header_in('Authorization'); + $ENV{HTTP_AUTHORIZATION} ||= $auth if $auth; + } + + $uri =~ s/^\s+//g; + if ($uri =~ m/^file:/) { + # Only the http scheme is allowed + &file_uri($uri); + } elsif ($uri !~ m/:/) { + if ($uri =~ m|^//|) { + $uri = 'http:'.$uri; + } else { + $uri = 'http://'.$uri; + } + } + + &check_uri($uri, 1, $Opts{Depth}, $cookie); + &html_footer(); +} + +############################################################################### + +################################ +# Command line and usage stuff # +################################ + +sub parse_arguments () +{ + + require Getopt::Long; + Getopt::Long->require_version(2.17); + Getopt::Long->import('GetOptions'); + Getopt::Long::Configure('bundling', 'no_ignore_case'); + my $masq = ''; + + GetOptions('help|h|?' => sub { usage(0) }, + 'q|quiet' => sub { $Opts{Quiet} = 1; + $Opts{Summary_Only} = 1; + }, + 's|summary' => \$Opts{Summary_Only}, + 'b|broken' => sub { $Opts{Redirects} = 0; + $Opts{Dir_Redirects} = 0; + }, + 'e|dir-redirects' => sub { $Opts{Dir_Redirects} = 0; }, + 'v|verbose' => \$Opts{Verbose}, + 'i|indicator' => \$Opts{Progress}, + 'H|html' => \$Opts{HTML}, + 'r|recursive' => sub { $Opts{Depth} = -1 + if $Opts{Depth} == 0; }, + 'l|location=s' => \$Opts{Base_Location}, + 'exclude-docs=s', => \$Opts{Exclude_Docs}, + 'u|user=s' => \$Opts{User}, + 'p|password=s' => \$Opts{Password}, + 't|timeout=i' => \$Opts{Timeout}, + 'S|sleep=i' => \$Opts{Sleep_Time}, + 'L|languages=s' => \$Opts{Accept_Language}, + 'n|noacclanguage' => sub { warn("*** Warning: The " . + "-n/--noacclanguage option is " . + "deprecated and has no effect.\n"); }, + 'D|depth=i' => sub { $Opts{Depth} = $_[1] + unless $_[1] == 0; }, + 'd|domain=s' => \$Opts{Trusted}, + 'y|proxy=s' => \$Opts{HTTP_Proxy}, + 'masquerade=s' => \$masq, + 'hide-same-realm' => \$Opts{Hide_Same_Realm}, + 'V|version' => \&version, + ) + || usage(1); + + if ($masq) { + $Opts{Masquerade} = 1; + my @masq = split(/\s+/, $masq); + if (scalar(@masq) != 2 || + !defined($masq[0]) || $masq[0] !~ /\S/ || + !defined($masq[1]) || $masq[1] !~ /\S/) { + usage(1, "Error: --masquerade takes two whitespace separated URIs."); + } else { + $Opts{Masquerade_From} = $masq[0]; + $Opts{Masquerade_To} = $masq[1]; + } + } + + if ($Opts{Accept_Language} && $Opts{Accept_Language} eq 'auto') { + $Opts{Accept_Language} = &guess_language(); + } + + if (($Opts{Sleep_Time} || 0) < 1) { + warn("*** Warning: minimum allowed sleep time is 1 second, resetting.\n"); + $Opts{Sleep_Time} = 1; + } +} + +sub version () +{ + print "$PACKAGE $REVISION\n"; + exit 0; +} + +sub usage () +{ + my ($exitval, $msg) = @_; + $exitval = 0 unless defined($exitval); + $msg ||= ''; $msg =~ s/[\r\n]*$/\n\n/ if $msg; + + die($msg) unless $Opts{Command_Line}; + + my $trust = defined($Cfg{Trusted}) ? $Cfg{Trusted} : 'same host only'; + + select(STDERR) if $exitval; + print "$msg$PACKAGE $REVISION + +Usage: checklink +Options: + -s, --summary Result summary only. + -b, --broken Show only the broken links, not the redirects. + -e, --directory Hide directory redirects, for example + http://www.w3.org/TR -> http://www.w3.org/TR/ + -r, --recursive Check the documents linked from the first one. + -D, --depth N Check the documents linked from the first one to + depth N (implies --recursive). + -l, --location URI Scope of the documents checked in recursive mode. + By default, for example for + http://www.w3.org/TR/html4/Overview.html + it would be http://www.w3.org/TR/html4/ + --exclude-docs REGEXP In recursive mode, do not check links in documents + whose URIs match REGEXP. + -L, --languages LANGS Accept-Language header to send. The special value + 'auto' causes autodetection from the environment. + -q, --quiet No output if no errors are found (implies -s). + -v, --verbose Verbose mode. + -i, --indicator Show progress while parsing. + -u, --user USERNAME Specify a username for authentication. + -p, --password PASSWORD Specify a password. + --hide-same-realm Hide 401's that are in the same realm as the + document checked. + -S, --sleep SECS Sleep SECS seconds between requests to each server + (default and minimum: 1 second). + -t, --timeout SECS Timeout for requests (in seconds). + -d, --domain DOMAIN Regular expression describing the domain to which + authentication information will be sent + (default: $trust). + --masquerade \"BASE1 BASE2\" Masquerade base URI BASE1 as BASE2. See the + manual page for more information. + -y, --proxy PROXY Specify an HTTP proxy server. + -H, --html HTML output. + -?, -h, --help Show this message and exit. + -V, --version Output version information and exit. + +See \"perldoc Net::FTP\" for information about various environment variables +affecting FTP connections and \"perldoc Net::NNTP\" for setting a default +NNTP server for news: URIs. + +The W3C_CHECKLINK_CFG environment variable can be used to set the +configuration file to use. See details in the full manual page, it can +be displayed with: perldoc checklink + +More documentation at: $Cfg{Doc_URI} +Please send bug reports and comments to the www-validator mailing list: + www-validator\@w3.org (with 'checklink' in the subject) + Archives are at: http://lists.w3.org/Archives/Public/www-validator/ +"; + exit $exitval; +} + +sub ask_password () +{ + eval { + local $SIG{__DIE__}; + require Term::ReadKey; + Term::ReadKey->require_version(2.00); + Term::ReadKey->import(qw(ReadMode)); + }; + if ($@) { + warn('Warning: Term::ReadKey 2.00 or newer not available, ' . + "password input disabled.\n"); + return; + } + printf(STDERR 'Enter the password for user %s: ', $Opts{User}); + ReadMode('noecho', *STDIN); + chomp($Opts{Password} = ); + ReadMode('restore', *STDIN); + print(STDERR "ok.\n"); +} + +############################################################################### + +########################################################################### +# Guess an Accept-Language header based on the $LANG environment variable # +########################################################################### + +sub guess_language () +{ + my $lang = $ENV{LANG} or return undef; + + $lang =~ s/[\.@].*$//; # en_US.UTF-8, fi_FI@euro... + + return 'en' if ($lang eq 'C' || $lang eq 'POSIX'); + + my $res = undef; + eval { + require Locale::Language; + if (my $tmp = Locale::Language::language2code($lang)) { + $lang = $tmp; + } + if (my ($l, $c) = (lc($lang) =~ /^([a-z]+)(?:[-_]([a-z]+))?/)) { + if (Locale::Language::code2language($l)) { + $res = $l; + if ($c) { + require Locale::Country; + $res .= "-$c" if Locale::Country::code2country($c); + } + } + } + }; + return $res; +} + +########################################### +# Transform foo into file://localhost/foo # +########################################### + +sub urize ($) +{ + my $u = URI->new_abs(URI::Escape::uri_unescape($_[0]), URI::file->cwd()); + return $u->as_string(); +} + +######################################## +# Check for broken links in a resource # +######################################## + +sub check_uri ($$$;$) +{ + + my ($uri, $first, $depth, $cookie) = @_; + + my $start = &get_timestamp() unless $Opts{Quiet}; + + # Get and parse the document + my $response = &get_document('GET', $uri, $doc_count, \%redirects); + + # Can we check the resource? If not, we exit here... + return -1 if defined($response->{Stop}); + + if ($first) { + # Use the first URI as the recursion base unless specified otherwise. + $Opts{Base_Location} = ($Opts{Base_Location} eq '.') + ? $response->{absolute_uri}->canonical() : + URI->new($Opts{Base_Location})->canonical(); + } else { + # Before fetching the document, we don't know if we'll be within the + # recursion scope or not (think redirects). + return -1 unless &in_recursion_scope($response->{absolute_uri}); + + print $Opts{HTML} ? '
    ' : '-' x 40, "\n"; + } + + # We are checking a new document + $doc_count++; + + if ($Opts{HTML}) { + &html_header($uri, 0, $cookie) if $first; + print('

    '); + } + + my $absolute_uri = $response->{absolute_uri}->as_string(); + + my $result_anchor = 'results'.$doc_count; + + printf("\nProcessing\t%s\n\n", + $Opts{HTML} ? &show_url($absolute_uri) : $absolute_uri); + + if ($Opts{HTML}) { + print("

    \n"); + if (! $Opts{Summary_Only}) { + my $accept = &encode($Accept); + my $acclang = &encode($Opts{Accept_Language} || '(not sent)'); + my $s = $Opts{Sleep_Time} == 1 ? '' : 's'; + printf(<<'EOF', $accept, $acclang, $Opts{Sleep_Time}, $s); +
    +Settings used: + +
    +EOF + printf("

    Go to the results.

    \n", + $result_anchor); + my $esc_uri = URI::Escape::uri_escape($absolute_uri, "^A-Za-z0-9."); + printf("

    For reliable link checking results, check +HTML validity first. See also +CSS validity.

    +

    Back to the link checker.

    \n", + &encode(sprintf($Cfg{Markup_Validator_URI}, $esc_uri)), + &encode(sprintf($Cfg{CSS_Validator_URI}, $esc_uri)), + &encode($Opts{_Self_URI})); + print("
    \n");
    +    }
    +  } elsif (! $Opts{Summary_Only}) {
    +    my $s = $Opts{Sleep_Time} == 1 ? '' : 's';
    +    my $acclang = $Opts{Accept_Language} || '(not sent)';
    +    printf(<<'EOF', $Accept, $acclang, $Opts{Sleep_Time}, $s);
    +Settings used:
    +- Accept: %s
    +- Accept-Language: %s
    +- Sleeping %d second%s between requests to each server
    +
    +EOF
    +  }
    +
    +  # Record that we have processed this resource
    +  $processed{$absolute_uri} = 1;
    +  # Parse the document
    +  my $p = &parse_document($uri, $absolute_uri,
    +                          $response->content(), 1,
    +                          $depth != 0);
    +  my $base = URI->new($p->{base});
    +
    +  # Check anchors
    +  ###############
    +
    +  print "Checking anchors...\n" unless $Opts{Summary_Only};
    +
    +  my %errors;
    +  foreach my $anchor (keys %{$p->{Anchors}}) {
    +    my $times = 0;
    +    foreach my $l (keys %{$p->{Anchors}{$anchor}}) {
    +      $times += $p->{Anchors}{$anchor}{$l};
    +    }
    +    # They should appear only once
    +    $errors{$anchor} = 1 if ($times > 1);
    +    # Empty IDREF's are not allowed
    +    $errors{$anchor} = 1 if ($anchor eq '');
    +  }
    +  print " done.\n" unless $Opts{Summary_Only};
    +
    +  # Check links
    +  #############
    +
    +  my %links;
    +  # Record all the links found
    +  foreach my $link (keys %{$p->{Links}}) {
    +    my $link_uri = URI->new($link);
    +    my $abs_link_uri = URI->new_abs($link_uri, $base);
    +
    +    # Work around a bug in URI::sip(s) (URI 1.22 - 1.30).
    +    $abs_link_uri = $link_uri
    +      if (!defined($abs_link_uri) && $link_uri->scheme() =~ /^sips?$/);
    +
    +    if ($Opts{Masquerade}) {
    +      if ($abs_link_uri =~ m|^$Opts{Masquerade_From}|) {
    +        printf("processing %s in base %s\n",
    +               $abs_link_uri, $Opts{Masquerade_To});
    +        my $nlink = $abs_link_uri;
    +        $nlink =~
    +          s|^$Opts{Masquerade_From}|$Opts{Masquerade_To}|;
    +        $abs_link_uri = URI->new($nlink);
    +      };
    +    }
    +    foreach my $lines (keys %{$p->{Links}{$link}}) {
    +      my $canonical = URI->new($abs_link_uri->canonical());
    +      my $url = $canonical->scheme().':'.$canonical->opaque();
    +      my $fragment = $canonical->fragment();
    +      if (! $fragment) {
    +        # Document without fragment
    +        $links{$url}{location}{$lines} = 1;
    +      } else {
    +        # Resource with a fragment
    +        $links{$url}{fragments}{$fragment}{$lines} = 1;
    +      }
    +    }
    +  }
    +
    +  # Build the list of broken URI's
    +  my %broken;
    +  foreach my $u (keys %links) {
    +
    +    # Don't check mailto: URI's
    +    next if ($u =~ m/^mailto:/);
    +
    +    &hprintf("Checking link %s\n", $u) unless $Opts{Summary_Only};
    +
    +    # Check that a link is valid
    +    &check_validity($uri, $u,
    +                    ($depth != 0 && &in_recursion_scope($u)),
    +                    \%links, \%redirects);
    +    &hprintf("\tReturn code: %s\n", $results{$u}{location}{code})
    +      if ($Opts{Verbose});
    +    if ($results{$u}{location}{success}) {
    +
    +      # Even though it was not broken, we might want to display it
    +      # on the results page (e.g. because it required authentication)
    +      $broken{$u}{location} = 1 if ($results{$u}{location}{display} >= 400);
    +
    +      # List the broken fragments
    +      foreach my $fragment (keys %{$links{$u}{fragments}}) {
    +        if ($Opts{Verbose}) {
    +          my @frags = sort keys %{$links{$u}{fragments}{$fragment}};
    +          &hprintf("\t\t%s %s - Line%s: %s\n",
    +                   $fragment,
    +                   ($results{$u}{fragments}{$fragment}) ? 'OK' : 'Not found',
    +                   (scalar(@frags) > 1) ? 's' : '',
    +                   join(', ', @frags)
    +                  );
    +        }
    +        # A broken fragment?
    +        if ($results{$u}{fragments}{$fragment} == 0) {
    +          $broken{$u}{fragments}{$fragment} += 2;
    +        }
    +      }
    +    } else {
    +      # Couldn't find the document
    +      $broken{$u}{location} = 1;
    +      # All the fragments associated are hence broken
    +      foreach my $fragment (keys %{$links{$u}{fragments}}) {
    +        $broken{$u}{fragments}{$fragment}++;
    +      }
    +    }
    +  }
    +  &hprintf("Processed in %ss.\n", &time_diff($start, &get_timestamp()))
    +    unless $Opts{Summary_Only};
    +
    +  # Display results
    +  if ($Opts{HTML} && !$Opts{Summary_Only}) {
    +    print("
    \n"); + printf("

    Results

    \n", $result_anchor); + } + print "\n" unless $Opts{Quiet}; + + &anchors_summary($p->{Anchors}, \%errors); + &links_summary(\%links, \%results, \%broken, \%redirects); + + # Do we want to process other documents? + if ($depth != 0) { + + foreach my $u (keys %links) { + + next unless $results{$u}{location}{success}; # Broken link? + + next unless &in_recursion_scope($u); + + # Do we understand its content type? + next unless ($results{$u}{location}{type} =~ $ContentTypes); + + # Have we already processed this URI? + next if &already_processed($u); + + # Do the job + print "\n"; + if ($Opts{HTML}) { + if (!$Opts{Command_Line}) { + if ($doc_count == $Opts{Max_Documents}) { + print("
    \n

    Maximum number of documents reached!

    \n"); + } + if ($doc_count >= $Opts{Max_Documents}) { + $doc_count++; + print("

    Not checking $u

    \n"); + $processed{$u} = 1; + next; + } + } + } + if ($depth < 0) { + &check_uri($u, 0, -1); + } else { + &check_uri($u, 0, $depth-1); + } + } + } +} + +####################################### +# Get and parse a resource to process # +####################################### + +sub get_document ($$$;\%) +{ + my ($method, $uri, $in_recursion, $redirects) = @_; + # $method contains the HTTP method the use (GET or HEAD) + # $uri contains the identifier of the resource + # $in_recursion is > 0 if we are in recursion mode (i.e. it is at least + # the second resource checked) + # $redirects is a pointer to the hash containing the map of the redirects + + # Get the resource + my $response; + if (defined($results{$uri}{response}) + && !(($method eq 'GET') && ($results{$uri}{method} eq 'HEAD'))) { + $response = $results{$uri}{response}; + } else { + $response = &get_uri($method, $uri); + &record_results($uri, $method, $response); + &record_redirects($redirects, $response); + } + if (! $response->is_success()) { + if (! $in_recursion) { + # Is it too late to request authentication? + if ($response->code() == 401) { + &authentication($response); + } else { + if ($Opts{HTML}) { + &html_header($uri); + print "

    "; + } + &hprintf("\nError: %d %s\n", + $response->code(), $response->message() || '(no message)'); + print "

    \n" if $Opts{HTML}; + } + } + $response->{Stop} = 1; + return($response); + } + + # What is the URI of the resource that we are processing by the way? + my $base_uri = URI->new($response->base()); + my $request_uri = URI->new($response->request->url); + $response->{absolute_uri} = $request_uri->abs($base_uri); + + # Can we parse the document? + my $failed_reason; + my $ct = $response->header('Content-Type'); + my $ce = $response->header('Content-Encoding'); + if (!$ct || $ct !~ $ContentTypes) { + $failed_reason = "Content-Type for <$request_uri> is " . + (defined($ct) ? "'$ct'" : 'undefined'); + } elsif (defined($ce) && $ce ne 'identity') { + # @@@ We could maybe handle gzip... + $failed_reason = "Content-Encoding for <$request_uri> is '$ce'"; + } + if ($failed_reason) { + # No, there is a problem... + if (! $in_recursion) { + if ($Opts{HTML}) { + &html_header($uri); + print "

    \n"; + } + &hprintf("Can't check links: %s.\n", $failed_reason); + print "

    \n" if $Opts{HTML}; + } + $response->{Stop} = 1; + } + + # Ok, return the information + return($response); +} + +######################################################### +# Check whether a URI is within the scope of recursion. # +######################################################### + +sub in_recursion_scope ($) +{ + my ($uri) = @_; + return undef unless $uri; + + my $current = URI->new($uri)->canonical(); + my $rel = $current->rel($Opts{Base_Location}); # base -> current! + + return undef if ($current eq $rel); # Relative path not possible? + return undef if ($rel =~ m|^(\.\.)?/|); # Relative path starts with ../ or /? + return undef if (defined($Opts{Exclude_Docs}) && + $current =~ $Opts{Exclude_Docs}); + return 1; +} + +################################################## +# Check whether a URI has already been processed # +################################################## + +sub already_processed ($) +{ + my ($uri) = @_; + # Don't be verbose for that part... + my $summary_value = $Opts{Summary_Only}; + $Opts{Summary_Only} = 1; + # Do a GET: if it fails, we stop, if not, the results are cached + my $response = &get_document('GET', $uri, 1); + # ... but just for that part + $Opts{Summary_Only} = $summary_value; + # Can we process the resource? + return -1 if defined($response->{Stop}); + # Have we already processed it? + return 1 if defined($processed{$response->{absolute_uri}->as_string()}); + # It's not processed yet and it is processable: return 0 + return 0; +} + +############################ +# Get the content of a URI # +############################ + +sub get_uri ($$;$\%$$$$) +{ + # Here we have a lot of extra parameters in order not to lose information + # if the function is called several times (401's) + my ($method, $uri, $start, $redirects, $code, $realm, $message, $auth) = @_; + + # $method contains the method used + # $uri contains the target of the request + # $start is a timestamp (not defined the first time the function is + # called) + # $redirects is a map of redirects + # $code is the first HTTP return code + # $realm is the realm of the request + # $message is the HTTP message received + # $auth equals 1 if we want to send out authentication information + + # For timing purposes + $start = &get_timestamp() unless defined($start); + + # Prepare the query + my $ua = W3C::UserAgent->new($AGENT); # @@@ TODO: admin address + # @@@ make number of keep-alive connections customizable + $ua->conn_cache({ total_capacity => 1}); # 1 keep-alive connection + $ua->delay($Opts{Sleep_Time}/60); + $ua->timeout($Opts{Timeout}); + $ua->proxy('http', 'http://' . $Opts{HTTP_Proxy}) if $Opts{HTTP_Proxy}; + + # Do we want printouts of progress? + my $verbose_progress = + ! ($Opts{Summary_Only} || (!$doc_count && $Opts{HTML})); + + &hprintf("%s %s ", $method, $uri) if $verbose_progress; + + my $request = new HTTP::Request($method, $uri); + $request->header('Accept-Language' => $Opts{Accept_Language}) + if $Opts{Accept_Language}; + $request->header('Accept', $Accept); + # Are we providing authentication info? + if ($auth && $request->url()->host() =~ $Opts{Trusted}) { + if (defined($ENV{HTTP_AUTHORIZATION})) { + $request->headers->header(Authorization => $ENV{HTTP_AUTHORIZATION}); + } elsif (defined($Opts{User}) && defined($Opts{Password})) { + $request->authorization_basic($Opts{User}, $Opts{Password}); + } + } + + # Tell the user agent if we want progress reports (in redirects) or not. + $ua->{Checklink_verbose_progress} = $verbose_progress; + + # Check if the IP address is allowed. + my $response = &ip_allowed($request->uri()); + return $response if $response; + + # Do the query + $response = $ua->request($request); + + # Get the results + # Record the very first response + if (! defined($code)) { + $code = $ua->{FirstResponse}; + $message = $ua->{FirstMessage}; + } + # Authentication requested? + if ($response->code() == 401 && + !defined($auth) && + (defined($ENV{HTTP_AUTHORIZATION}) + || (defined($Opts{User}) && defined($Opts{Password})))) { + + # Set host as trusted domain unless we already have one. + if (!$Opts{Trusted}) { + my $re = sprintf('^%s$', quotemeta($response->base()->host())); + $Opts{Trusted} = qr/$re/io; + } + + # Deal with authentication and avoid loops + if (! defined($realm)) { + $response->headers->www_authenticate =~ /Basic realm=\"([^\"]+)\"/; + $realm = $1; + } + print "\n" if $verbose_progress; + return &get_uri($method, $response->request()->url(), + $start, $redirects, $code, $realm, $message, 1); + } + # @@@ subtract robot delay from the "fetched in" time? + &hprintf(" fetched in %ss\n", + &time_diff($start, &get_timestamp())) if $verbose_progress; + + $response->{Realm} = $realm if defined($realm); + + return $response; +} + +######################################### +# Record the results of an HTTP request # +######################################### + +sub record_results ($$$) +{ + my ($uri, $method, $response) = @_; + $results{$uri}{response} = $response; + $results{$uri}{method} = $method; + $results{$uri}{location}{code} = $response->code(); + $results{$uri}{location}{code} = RC_ROBOTS_TXT() + if ($results{$uri}{location}{code} == 403 && + $response->message() =~ /Forbidden by robots\.txt/); + $results{$uri}{location}{code} = RC_DNS_ERROR() + if ($results{$uri}{location}{code} == 500 && + $response->message() =~ /Bad hostname '[^\']*'/); + $results{$uri}{location}{type} = $response->header('Content-type'); + $results{$uri}{location}{display} = $results{$uri}{location}{code}; + # Rewind, check for the original code and message. + for (my $tmp = $response->previous(); $tmp; $tmp = $tmp->previous()) { + $results{$uri}{location}{orig} = $tmp->code(); + $results{$uri}{location}{orig_message} = $tmp->message() || '(no message)'; + } + $results{$uri}{location}{success} = $response->is_success(); + # Stores the authentication information + if (defined($response->{Realm})) { + $results{$uri}{location}{realm} = $response->{Realm}; + $results{$uri}{location}{display} = 401 unless $Opts{Hide_Same_Realm}; + } + # What type of broken link is it? (stored in {record} - the {display} + # information is just for visual use only) + if (($results{$uri}{location}{display} == 401) + && ($results{$uri}{location}{code} == 404)) { + $results{$uri}{location}{record} = 404; + } else { + $results{$uri}{location}{record} = $results{$uri}{location}{display}; + } + # Did it fail? + $results{$uri}{location}{message} = $response->message() || '(no message)'; + if (! $results{$uri}{location}{success}) { + &hprintf("Error: %d %s\n", + $results{$uri}{location}{code}, + $results{$uri}{location}{message}) + if ($Opts{Verbose}); + return; + } +} + +#################### +# Parse a document # +#################### + +sub parse_document ($$$$$) +{ + my ($uri, $location, $document, $links, $rec_needs_links) = @_; + + my $p; + + if (defined($results{$uri}{parsing})) { + # We have already done the job. Woohoo! + $p->{base} = $results{$uri}{parsing}{base}; + $p->{Anchors} = $results{$uri}{parsing}{Anchors}; + $p->{Links} = $results{$uri}{parsing}{Links}; + return $p; + } + + my $start; + $p = W3C::LinkChecker->new(); + $p->{base} = $location; + if (! $Opts{Summary_Only}) { + $start = &get_timestamp(); + print("Parsing...\n"); + } + if (!$Opts{Summary_Only} || $Opts{Progress}) { + $p->{Total} = ($document =~ tr/\n//); + } + # We only look for anchors if we are not interested in the links + # obviously, or if we are running a recursive checking because we + # might need this information later + $p->{only_anchors} = !($links || $rec_needs_links); + + # Transform into for parsing + # Processing instructions are not parsed by process, but in this case + # it should be. It's expensive, it's horrible, but it's the easiest way + # for right now. + $document =~ s/\<\?(xml:stylesheet.*?)\?\>/\<$1\>/ unless $p->{only_anchors}; + + $p->parse($document); + + if (! $Opts{Summary_Only}) { + my $stop = &get_timestamp(); + print "\r" if $Opts{Progress}; + &hprintf(" done (%d lines in %ss).\n", + $p->{Total}, &time_diff($start, $stop)); + } + + # Save the results before exiting + $results{$uri}{parsing}{base} = $p->{base}; + $results{$uri}{parsing}{Anchors} = $p->{Anchors}; + $results{$uri}{parsing}{Links} = $p->{Links}; + + return $p; +} + +#################################### +# Constructor for W3C::LinkChecker # +#################################### + +sub new +{ + my $p = HTML::Parser::new(@_, api_version => 3); + + # Start tags + $p->handler(start => 'start', 'self, tagname, attr, text, event, tokens'); + # Declarations + $p->handler(declaration => + sub { + my $self = shift; + $self->declaration(substr($_[0], 2, -1)); + }, 'self, text'); + # Other stuff + $p->handler(default => 'text', 'self, text'); + # Line count + $p->{Line} = 1; + # Check ? + $p->{check_name} = 1; + # Check <[..] id="..">? + $p->{check_id} = 1; + # Don't interpret comment loosely + $p->strict_comment(1); + + return $p; +} + +################################################# +# Record or return the doctype of the document # +################################################# + +sub doctype +{ + my ($self, $dc) = @_; + return $self->{doctype} unless $dc; + $_ = $self->{doctype} = $dc; + + # What to look for depending on the doctype + $self->{check_name} = 0 if ($_ eq '-//W3C//DTD XHTML Basic 1.0//EN'); + + # Check for the id tag + if ( + # HTML 2.0 & 3.0 + m%^-//IETF//DTD HTML [23]\.0//% || + # HTML 3.2 + m%^-//W3C//DTD HTML 3\.2//%) { + $self->{check_id} = 0; + } + # Enable XML extensions + $self->xml_mode(1) if (m%^-//W3C//DTD XHTML %); +} + +####################################### +# Count the number of lines in a file # +####################################### + +sub new_line +{ + my ($self, $string) = @_; + my $count = ($string =~ tr/\n//); + $self->{Line} = $self->{Line} + $count; + printf("\r%4d%%", int($self->{Line}/$self->{Total}*100)) if $Opts{Progress}; +} + +############################# +# Extraction of the anchors # +############################# + +sub get_anchor +{ + my ($self, $tag, $attr) = @_; + + my $anchor = $attr->{id} if $self->{check_id}; + if ($self->{check_name} && ($tag eq 'a')) { + # @@@@ In XHTML, is mandatory + # Force an error if it's not the case (or if id's and name's values + # are different) + # If id is defined, name if defined must have the same value + $anchor ||= $attr->{name}; + } + + return $anchor; +} + +############################# +# W3C::LinkChecker handlers # +############################# + +sub add_link +{ + my ($self, $uri) = @_; + $self->{Links}{$uri}{$self->{Line}}++ if defined($uri); +} + +sub start +{ + my ($self, $tag, $attr, $text) = @_; + + # Anchors + my $anchor = $self->get_anchor($tag, $attr); + $self->{Anchors}{$anchor}{$self->{Line}}++ if defined($anchor); + + # Links + if (!$self->{only_anchors}) { + # Here, we are checking too many things + # The right thing to do is to parse the DTD... + if ($tag eq 'base') { + # Treat (without href) or as if it didn't exist. + if (defined($attr->{href}) && $attr->{href} ne '') { + $self->{base} = $attr->{href}; + } + } else { + $self->add_link($attr->{href}); + } + $self->add_link($attr->{src}); + $self->add_link($attr->{data}) if ($tag eq 'object'); + $self->add_link($attr->{cite}) if ($tag eq 'blockquote'); + } + + # Line counting + $self->new_line($text) if ($text =~ m/\n/); +} + +sub text +{ + my ($self, $text) = @_; + if (!$Opts{Progress}) { + # If we are just extracting information about anchors, + # parsing this part is only cosmetic (progress indicator) + return unless !$self->{only_anchors}; + } + $self->new_line($text) if ($text =~ /\n/); +} + +sub declaration +{ + my ($self, $text) = @_; + # Extract the doctype + my @declaration = split(/\s+/, $text, 4); + if (($#declaration >= 3) && + ($declaration[0] eq 'DOCTYPE') && + (lc($declaration[1]) eq 'html')) { + # Parse the doctype declaration + $text =~ m/^DOCTYPE\s+html\s+PUBLIC\s+\"([^\"]*)\"(\s+\"([^\"]*)\")?\s*$/i; + # Store the doctype + $self->doctype($1) if $1; + # If there is a link to the DTD, record it + $self->{Links}{$3}{$self->{Line}}++ if (!$self->{only_anchors} && $3); + } + return unless !$self->{only_anchors}; + $self->text($text); +} + +################################ +# Check the validity of a link # +################################ + +sub check_validity ($$$\%\%) +{ + my ($testing, $uri, $want_links, $links, $redirects) = @_; + # $testing is the URI of the document checked + # $uri is the URI of the target that we are verifying + # $want_links is true if we're interested in links in the target doc + # $links is a hash of the links in the documents checked + # $redirects is a map of the redirects encountered + + # Checking file: URI's is not allowed with a CGI + if ($testing ne $uri) { + if (!$Opts{Command_Line} && $testing !~ m/^file:/ && $uri =~ m/^file:/) { + my $msg = 'Error: \'file:\' URI not allowed'; + # Can't test? Return 400 Bad request. + $results{$uri}{location}{code} = 400; + $results{$uri}{location}{record} = 400; + $results{$uri}{location}{success} = 0; + $results{$uri}{location}{message} = $msg; + &hprintf("Error: %d %s\n", 400, $msg) if $Opts{Verbose}; + return; + } + } + + # Get the document with the appropriate method + # Only use GET if there are fragments. HEAD is enough if it's not the + # case. + my @fragments = keys %{$links->{$uri}{fragments}}; + my $method = scalar(@fragments) ? 'GET' : 'HEAD'; + + my $response; + my $being_processed = 0; + if ((! defined($results{$uri})) + || (($method eq 'GET') && ($results{$uri}{method} eq 'HEAD'))) { + $being_processed = 1; + $response = &get_uri($method, $uri); + # Get the information back from get_uri() + &record_results($uri, $method, $response); + # Record the redirects + &record_redirects($redirects, $response); + } + + # We got the response of the HTTP request. Stop here if it was a HEAD. + return if ($method eq 'HEAD'); + + # There are fragments. Parse the document. + my $p; + if ($being_processed) { + # Can we really parse the document? + return unless defined($results{$uri}{location}{type}); + if ($results{$uri}{location}{type} !~ $ContentTypes) { + &hprintf("Can't check content: Content-Type for '%s' is '%s'.\n", + $uri, $results{$uri}{location}{type}) + if ($Opts{Verbose}); + return; + } + # Do it then + $p = &parse_document($uri, $response->base(), + $response->as_string(), 0, $want_links); + } else { + # We already had the information + $p->{Anchors} = $results{$uri}{parsing}{Anchors}; + } + # Check that the fragments exist + foreach my $fragment (keys %{$links->{$uri}{fragments}}) { + if (defined($p->{Anchors}{$fragment}) + || &escape_match($fragment, $p->{Anchors})) { + $results{$uri}{fragments}{$fragment} = 1; + } else { + $results{$uri}{fragments}{$fragment} = 0; + } + } +} + +sub escape_match ($\%) +{ + my ($a, $hash) = (URI::Escape::uri_unescape($_[0]), $_[1]); + foreach my $b (keys %$hash) { + return 1 if ($a eq URI::Escape::uri_unescape($b)); + } + return 0; +} + +########################## +# Ask for authentication # +########################## + +sub authentication ($) +{ + my $r = $_[0]; + $r->headers->www_authenticate =~ /Basic realm=\"([^\"]+)\"/; + my $realm = $1; + $realm = '' unless defined($realm); + + if ($Opts{Command_Line}) { + printf STDERR <request()->url(), $realm; + +Authentication is required for %s. +The realm is "%s". +Use the -u and -p options to specify a username and password and the -d option +to specify trusted domains. +EOF + } else { + + printf("Status: 401 Authorization Required\nWWW-Authenticate: %s\nConnection: close\nContent-Language: en\nContent-Type: text/html; charset=iso-8859-1\n\n", $r->headers->www_authenticate); + + printf("%s + + +W3C Link Checker: 401 Authorization Required +%s +", $DocType, $Head); + &banner(': 401 Authorization Required'); + printf("

    + You need \"%s\" access to %s to perform link checking.
    +", &encode($realm), (&encode($r->request()->url())) x 2); + + if ($Opts{Trusted}) { + printf <%s +EOF + } + + print "

    \n"; + } +} + +################## +# Get statistics # +################## + +sub get_timestamp () +{ + return pack('LL', Time::HiRes::gettimeofday()); +} + +sub time_diff ($$) +{ + my @start = unpack('LL', $_[0]); + my @stop = unpack('LL', $_[1]); + for ($start[1], $stop[1]) { + $_ /= 1_000_000; + } + return(sprintf("%.1f", ($stop[0]+$stop[1])-($start[0]+$start[1]))); +} + +######################## +# Handle the redirects # +######################## + +# Record the redirects in a hash +sub record_redirects (\%$) +{ + my ($redirects, $response) = @_; + for (my $prev = $response->previous(); $prev; $prev = $prev->previous()) { + $redirects->{$prev->request()->url()} = $response->request()->url(); + } +} + +# Determine if a request is redirected +sub is_redirected ($%) +{ + my ($uri, %redirects) = @_; + return(defined($redirects{$uri})); +} + +# Get a list of redirects for a URI +sub get_redirects ($%) +{ + my ($uri, %redirects) = @_; + my @history = ($uri); + my %seen = ($uri => 1); # for tracking redirect loops + my $loop = 0; + while ($redirects{$uri}) { + $uri = $redirects{$uri}; + push(@history, $uri); + if ($seen{$uri}) { + $loop = 1; + last; + } else { + $seen{$uri}++; + } + } + return ($loop, @history); +} + +#################################################### +# Tool for sorting the unique elements of an array # +#################################################### + +sub sort_unique (@) +{ + my %saw; + @saw{@_} = (); + return (sort { $a <=> $b } keys %saw); +} + +##################### +# Print the results # +##################### + +sub anchors_summary (\%\%) +{ + my ($anchors, $errors) = @_; + + # Number of anchors found. + my $n = scalar(keys(%$anchors)); + if (! $Opts{Quiet}) { + if ($Opts{HTML}) { + print("

    Anchors

    \n

    "); + } else { + print("Anchors\n\n"); + } + &hprintf("Found %d anchor%s.", $n, ($n == 1) ? '' : 's'); + print('

    ') if $Opts{HTML}; + print("\n"); + } + # List of the duplicates, if any. + my @errors = keys %{$errors}; + if (! scalar(@errors)) { + print("

    Valid anchors!

    \n") if (! $Opts{Quiet} && $Opts{HTML} && $n); + return; + } + undef $n; + + print('

    ') if $Opts{HTML}; + print('List of duplicate and empty anchors'); + print < + + + + + + + + +EOF + print("\n"); + + foreach my $anchor (@errors) { + my $format; + my @unique = &sort_unique(keys %{$anchors->{$anchor}}); + if ($Opts{HTML}) { + $format = "\n"; + } else { + my $s = (scalar(@unique) > 1) ? 's' : ''; + $format = "\t%s\tLine$s: %s\n"; + } + printf($format, + &encode($anchor eq '' ? 'Empty anchor' : $anchor), + join(', ', @unique)); + } + + print("\n
    AnchorsLines
    %s%s
    \n") if $Opts{HTML}; +} + +sub show_link_report (\%\%\%\%\@;$\%) +{ + my ($links, $results, $broken, $redirects, $urls, $codes, $todo) = @_; + + print("\n

    ") if $Opts{HTML}; + print("\n"); + + # Process each URL + my ($c, $previous_c); + foreach my $u (@$urls) { + my @fragments = keys %{$broken->{$u}{fragments}}; + # Did we get a redirect? + my $redirected = &is_redirected($u, %$redirects); + # List of lines + my @total_lines; + foreach my $l (keys %{$links->{$u}{location}}) { + push (@total_lines, $l); + } + foreach my $f (keys %{$links->{$u}{fragments}}) { + next if ($f eq $u && defined($links->{$u}{$u}{-1})); + foreach my $l (keys %{$links->{$u}{fragments}{$f}}) { + push (@total_lines, $l); + } + } + + my ($redirect_loop, @redirects_urls) = get_redirects($u, %$redirects); + my $currloc = $results->{$u}{location}; + + # Error type + $c = &code_shown($u, $results); + # What to do + my $whattodo; + my $redirect_too; + if ($todo) { + my $currmsg = $currloc->{message} || ''; + if ($u =~ m/^javascript:/) { + if ($Opts{HTML}) { + $whattodo = +'You must change this link: people using a browser without JavaScript support +will not be able to follow this link. See the +Web +Content Accessibility Guidelines on the use of scripting on the Web and +the +techniques +on how to solve this.'; + } else { + $whattodo = 'Change this link: people using a browser without JavaScript support will not be able to follow this link.'; + } + } elsif ($c == RC_ROBOTS_TXT()) { + $whattodo = 'The link was not checked due to robots exclusion ' . + 'rules. Check the link manually.'; + } elsif ($redirect_loop) { + $whattodo = + 'Retrieving the URI results in a redirect loop, that should be ' . + 'fixed. Examine the redirect sequence to see where the loop ' . + 'occurs.'; + } else { + $whattodo = $todo->{$c}; + } + # @@@ 303 and 307 ??? + if (defined($redirects{$u}) && ($c != 301) && ($c != 302)) { + $redirect_too = 'The original request has been redirected.'; + $whattodo .= ' '.$redirect_too unless $Opts{HTML}; + } + } else { + # Directory redirects + $whattodo = 'Add a trailing slash to the URL.'; + } + + my @unique = &sort_unique(@total_lines); + my $lines_list = join(', ', @unique); + my $s = (scalar(@unique) > 1) ? 's' : ''; + undef @unique; + + my @http_codes = ($currloc->{code}); + unshift(@http_codes, $currloc->{orig}) if $currloc->{orig}; + @http_codes = map { $_ < 0 ? '(N/A)' : $_ } @http_codes; + + if ($Opts{HTML}) { + # Style stuff + my $idref = ''; + if ($codes && (!defined($previous_c) || ($c != $previous_c))) { + $idref = ' id="d'.$doc_count.'code_'.$c.'"'; + $previous_c = $c; + } + # Main info + for (@redirects_urls) { + $_ = &show_url($_); + } + # HTTP message + my $http_message; + if ($currloc->{message}) { + $http_message = &encode($currloc->{message}); + if ($c == 404 || $c == 500) { + $http_message = ''. + $http_message.''; + } + } + my $redirmsg = + $redirect_loop ? ' redirect loop detected' : ''; + printf(" +%s +
    What to do: %s%s
    +
    Response status code: %s
    +Response message: %s%s%s
    +
    Line%s: %s
    \n", + # Anchor for return codes + $idref, + # List of redirects + $redirected ? + join(' redirected to
    ', @redirects_urls) . $redirmsg : + &show_url($u), + # Color + &bgcolor($c), + # What to do + $whattodo, + # Redirect too? + $redirect_too ? + sprintf(' %s', &bgcolor(301), $redirect_too) : '', + # Response code chain + join(' -> ', + map { &encode($_) } @http_codes), + # Realm + defined($currloc->{realm}) + ? sprintf('Realm: %s
    ', &encode($currloc->{realm})) : '', + # HTTP original message + defined($currloc->{orig_message}) + ? &encode($currloc->{orig_message}). + ' -> ' + : '', + # HTTP final message + $http_message, + $s, + # List of lines + $lines_list); + if ($#fragments >= 0) { + my $fragment_direction = ''; + if ($currloc->{code} == 200) { + $fragment_direction = + ' They need to be fixed!'; + } + printf("
    Broken fragments and their line numbers: %s
    \n", + $fragment_direction); + } + } else { + my $redirmsg = $redirect_loop ? ' redirect loop detected' : ''; + printf("\n%s\t%s\n Code: %s %s\n%s\n", + # List of redirects + $redirected ? join("\n-> ", @redirects_urls) . $redirmsg : $u, + # List of lines + $lines_list ? "Line$s: $lines_list" : '', + # Response code chain + join(' -> ', @http_codes), + # HTTP message + $currloc->{message} || '', + # What to do + wrap(' To do: ', ' ', $whattodo)); + if ($#fragments >= 0) { + if ($currloc->{code} == 200) { + print("The following fragments need to be fixed:\n"); + } else { + print("Fragments:\n"); + } + } + } + # Fragments + foreach my $f (@fragments) { + if ($Opts{HTML}) { + printf("
    %s: %s
    \n", + # Broken fragment + &show_url($u, $f), + # List of lines + join(', ', &sort_unique(keys %{$links->{$u}{fragments}{$f}}))); + } else { + my @unq = &sort_unique(keys %{$links->{$u}{fragments}{$f}}); + printf("\t%-30s\tLine%s: %s\n", + # Fragment + $f, + # Multiple? + (scalar(@unq) > 1) ? 's' : '', + # List of lines + join(', ', @unq)); + } + } + + print("
    \n") if ($Opts{HTML} && scalar(@fragments)); + } + + # End of the table + print("
    \n") if $Opts{HTML}; +} + +sub code_shown ($$) +{ + my ($u, $results) = @_; + + if ($results->{$u}{location}{record} == 200) { + return $results->{$u}{location}{orig} || $results->{$u}{location}{record}; + } else { + return $results->{$u}{location}{record}; + } +} + +# +# Checks whether we're allowed to retrieve the document based on it's IP +# address. Takes an URI object and returns a HTTP::Response containing the +# appropriate status and error message if the IP was disallowed, undef +# otherwise. URIs without hostname or IP address are always allowed, +# including schemes where those make no sense (eg. data:, often javascript:). +# +sub ip_allowed ($) +{ + my ($uri) = @_; + my $hostname = undef; + eval { $hostname = $uri->host() }; # Not all URIs implement host()... + return undef unless $hostname; + + my $addr = my $iptype = my $resp = undef; + if (my $host = gethostbyname($hostname)) { + $addr = inet_ntoa($host->addr()) if $host->addr(); + if ($addr && (my $ip = Net::IP->new($addr))) { + $iptype = $ip->iptype(); + } + } + $iptype = 'PUBLIC' + if ($iptype && $iptype eq 'PRIVATE' && $Opts{Allow_Private_IPs}); + if ($iptype && $iptype ne 'PUBLIC') { + $resp = HTTP::Response->new(403, + 'Checking non-public IP address disallowed by link checker configuration'); + } + return $resp; +} + +sub links_summary (\%\%\%\%) +{ + # Advices to fix the problems + + my %todo = ( 200 => 'There are broken fragments which must be fixed.', + 300 => 'It usually means that there is a typo in a link that triggers mod_speling action - this must be fixed!', + 301 => 'You should update the link.', + 302 => 'Usually nothing.', + 303 => 'Usually nothing.', + 307 => 'Usually nothing.', + 400 => 'Usually the sign of a malformed URL that cannot be parsed by the server.', + 401 => "The link is not public. You'd better specify it.", + 403 => 'The link is forbidden! This needs fixing. Usual suspects: a missing index.html or Overview.html, or a missing ACL.', + 404 => 'The link is broken. Fix it NOW!', + 405 => 'The server does not allow HEAD requests. Go ask the guys who run this server why. Check the link manually.', + 406 => "The server isn't capable of responding according to the Accept* headers sent. Check it out.", + 407 => 'The link is a proxy, but requires Authentication.', + 408 => 'The request timed out.', + 410 => 'The resource is gone. You should remove this link.', + 415 => 'The media type is not supported.', + 500 => 'This is a server side problem. Check the URI.', + 501 => 'Could not check this link: method not implemented or scheme not supported.', + 503 => 'The server cannot service the request, for some unknown reason.', + # Non-HTTP codes: + RC_ROBOTS_TXT() => "The link was not checked due to robots exclusion rules. Check the link manually, and see also the link checker documentation on robots exclusion.", + RC_DNS_ERROR() => 'The hostname could not be resolved. This link needs to be fixed.', + ); + my %priority = ( 410 => 1, + 404 => 2, + 403 => 5, + 200 => 10, + 300 => 15, + 401 => 20 + ); + + my ($links, $results, $broken, $redirects) = @_; + + # List of the broken links + my @urls = keys %{$broken}; + my @dir_redirect_urls = (); + if ($Opts{Redirects}) { + # Add the redirected URI's to the report + for my $l (keys %$redirects) { + next unless (defined($results->{$l}) + && defined($links->{$l}) + && !defined($broken->{$l})); + # Check whether we have a "directory redirect" + # e.g. http://www.w3.org/TR -> http://www.w3.org/TR/ + my ($redirect_loop, @redirects) = get_redirects($l, %$redirects); + if (($#redirects == 1) + && (($redirects[0].'/') eq $redirects[1])) { + push(@dir_redirect_urls, $l); + next; + } + push(@urls, $l); + } + } + + # Broken links and redirects + if ($#urls < 0) { + if (! $Opts{Quiet}) { + if ($Opts{HTML}) { + print "

    Links

    \n

    Valid links!

    "; + } else { + print "\nValid links."; + } + print "\n"; + } + } else { + print('

    ') if $Opts{HTML}; + print("\nList of broken links"); + print(' and redirects') if $Opts{Redirects}; + + # Sort the URI's by HTTP Code + my %code_summary; + my @idx; + foreach my $u (@urls) { + if (defined($results->{$u}{location}{record})) { + my $c = &code_shown($u, $results); + $code_summary{$c}++; + push(@idx, $c); + } + } + my @sorted = @urls[ + sort { + defined($priority{$idx[$a]}) ? + defined($priority{$idx[$b]}) ? + $priority{$idx[$a]} + <=> $priority{$idx[$b]} : + -1 : + defined($priority{$idx[$b]}) ? + 1 : + $idx[$a] <=> $idx[$b] + } 0 .. $#idx + ]; + @urls = @sorted; + undef(@sorted); undef(@idx); + + if ($Opts{HTML}) { + # Print a summary + print < +

    Fragments listed are broken. See the table below to know what action +to take.

    + + + + + + + + + +EOF + foreach my $code (sort(keys(%code_summary))) { + printf('', &bgcolor($code)); + printf('', + $doc_count, $code, $code < 0 ? '(N/A)' : $code); + printf('', $code_summary{$code}); + printf('', $todo{$code}); + print "\n"; + } + print "\n
    CodeOccurrencesWhat to do
    %s%s%s
    \n"; + } else { + print(':'); + } + &show_link_report($links, $results, $broken, $redirects, + \@urls, 1, \%todo); + } + + # Show directory redirects + if ($Opts{Dir_Redirects} && ($#dir_redirect_urls > -1)) { + print('

    ') if $Opts{HTML}; + print("\nList of directory redirects"); + print("

    \n

    The links below are not broken, but the document does not use the exact URL.

    ") if $Opts{HTML}; + &show_link_report($links, $results, $broken, $redirects, + \@dir_redirect_urls); + } +} + +############################################################################### + +################ +# Global stats # +################ + +sub global_stats () +{ + my $stop = &get_timestamp(); + my $n_docs = + ($doc_count <= $Opts{Max_Documents}) ? $doc_count : $Opts{Max_Documents}; + return sprintf('Checked %d document%s in %s seconds.', + $n_docs, + ($n_docs == 1) ? '' : 's', + &time_diff($timestamp, $stop)); +} + +################## +# HTML interface # +################## + +sub html_header ($;$$) +{ + my ($uri, $doform, $cookie) = @_; + + my $title = defined($uri) ? $uri : ''; + $title = ': ' . $title if ($title =~ /\S/); + + # mod_perl 1.99_05 doesn't seem to like if the "\n\n" isn't in the same + # print() statement as the last header... + + my $headers = ''; + if (! $Opts{Command_Line}) { + $headers .= "Cache-Control: no-cache\nPragma: no-cache\n" if $doform; + $headers .= "Content-Type: text/html; charset=iso-8859-1\n"; + $headers .= "Set-Cookie: $cookie\n" if $cookie; + $headers .= "Content-Language: en\n\n"; + } + + my $script = my $onload = ''; + if ($doform) { + $script = <<'EOF'; + +EOF + $onload = ' onload="document.forms[0].uri.focus()"'; + } + + print $headers, $DocType, " + + +W3C Link Checker", &encode($title), " +", $Head, $script, " +'; + &banner($title); +} + +sub banner ($) +{ + my ($title) = @_; + printf(<<'EOF', &encode($title), $Cfg{Doc_URI}); + + +
    +EOF +} + +sub bgcolor ($) +{ + my ($code) = @_; + my $class; + my $r = HTTP::Response->new($code); + if ($r->is_success()) { + return ''; + } elsif ($code == RC_ROBOTS_TXT()) { + $class = 'dubious'; + } elsif ($code == 300) { + $class = 'multiple'; + } elsif ($code == 401) { + $class = 'unauthorized'; + } elsif ($r->is_redirect()) { + $class = 'redirect'; + } elsif ($r->is_error()) { + $class = 'broken'; + } else { + $class = 'broken'; + } + return(' class="'.$class.'"'); +} + +sub show_url ($;$) +{ + my ($url, $fragment) = @_; + if (defined($fragment)) { + my $u = URI->new($url); + $u->fragment($fragment); + $url = $u->as_string(); + } + $url = &encode($url); + return sprintf('%s', + $url, defined($fragment) ? &encode($fragment) : $url); +} + +sub html_footer () +{ + printf("

    %s

    \n", &global_stats()) if ($doc_count > 0 && !$Opts{Quiet}); + printf(<<'EOF', $PACKAGE, $REVISION); +
    +
    +
    +%s
    %s +
    +
    + + +EOF +} + +sub file_uri ($) +{ + my ($uri) = @_; + &html_header($uri); + printf(<<'EOF', &encode($uri)); +

    Forbidden

    +

    You cannot check such a URI (%s).

    +EOF + &html_footer(); + exit; +} + +sub print_form ($) +{ + my ($q) = @_; + + # Override undefined values from the cookie, if we got one. + my $got_cookie = 0; + if (my %cookie = $q->cookie($PROGRAM)) { + $got_cookie = 1; + while (my ($key, $value) = each %cookie) { + $q->param($key, $value) unless defined($q->param($key)); + } + } + + my $chk = ' checked="checked"'; + $q->param('hide_type', 'all') unless $q->param('hide_type'); + + my $sum = $q->param('summary') ? $chk : ''; + my $red = $q->param('hide_redirects') ? $chk : ''; + my $all = ($q->param('hide_type') ne 'dir') ? $chk : ''; + my $dir = $all ? '' : $chk; + my $acc = $q->param('no_accept_language') ? $chk : ''; + my $rec = $q->param('recursive') ? $chk : ''; + my $dep = &encode($q->param('depth') || ''); + + my $cookie_options = ''; + if ($got_cookie) { + $cookie_options = " + + + "; + } else { + $cookie_options = " + "; + } + + print "
    +

    +

    +
    + Options +

    + +
    + + + +
    + +
    + , + +

    ", $cookie_options, " +

    +
    +

    +
    +"; +} + +sub encode (@) +{ + return $Opts{HTML} ? HTML::Entities::encode(@_) : @_; +} + +sub hprintf (@) +{ + if (! $Opts{HTML}) { + printf(@_); + } else { + print HTML::Entities::encode(sprintf($_[0], @_[1..@_-1])); + } +} + +# Local Variables: +# mode: perl +# indent-tabs-mode: nil +# tab-width: 2 +# perl-indent-level: 2 +# End: +# ex: ts=2 sw=2 et --- w3c-markup-validator-0.6.7.orig/httpd/conf/w3c-markup-validator.conf +++ w3c-markup-validator-0.6.7/httpd/conf/w3c-markup-validator.conf @@ -0,0 +1,8 @@ +ScriptAlias /w3c-markup-validator/check /usr/lib/cgi-bin/check +ScriptAlias /w3c-markup-validator/checklink /usr/lib/cgi-bin/checklink +Alias /w3c-markup-validator /usr/share/w3c-markup-validator/html + + + Options +Includes +MultiViews + AddHandler server-parsed .html + --- w3c-markup-validator-0.6.7.orig/debian/po/templates.pot +++ w3c-markup-validator-0.6.7/debian/po/templates.pot @@ -0,0 +1,54 @@ +# +# Translators, if you are not familiar with the PO format, gettext +# documentation is worth reading, especially sections dedicated to +# this format, e.g. by running: +# info -n '(gettext)PO Files' +# info -n '(gettext)Header Entry' +# +# Some information specific to po-debconf are available at +# /usr/share/doc/po-debconf/README-trans +# or http://www.debian.org/intl/l10n/po-debconf/README-trans +# +# Developers do not need to manually edit POT or PO files. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2004-08-08 21:27+1000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=CHARSET\n" +"Content-Transfer-Encoding: 8bit\n" + +#. Type: select +#. Choices +#: ../templates:3 +msgid "Apache, Apache-SSL, Both, None" +msgstr "" + +#. Type: select +#. Description +#: ../templates:5 +msgid "Which web server would you like to reconfigure automatically?" +msgstr "" + +#. Type: select +#. Description +#: ../templates:5 +msgid "" +"w3c-markup-validator needs to check if your webserver is configured properly." +msgstr "" + +#. Type: select +#. Description +#: ../templates:5 +msgid "" +"w3c-markup-validator supports any web server that can call CGI scripts and " +"allows Server Side Includes (SSI), but this automatic configuration process " +"only supports Apache and Apache-SSL. If you use another server, you will " +"have to configure it manually before being able to use the package." +msgstr "" --- w3c-markup-validator-0.6.7.orig/debian/po/ca.po +++ w3c-markup-validator-0.6.7/debian/po/ca.po @@ -0,0 +1,50 @@ +# w3c-markup-validator (debconf) translation to Catalan. +# Copyright (C) 2004 Free Software Foundation, Inc. +# Aleix Badia i Bosch , 2004 +# +msgid "" +msgstr "" +"Project-Id-Version: w3c-markup-validator 0.6.1\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2004-08-08 21:27+1000\n" +"PO-Revision-Date: 2004-06-14 22:35+0200\n" +"Last-Translator: Aleix Badia i Bosch \n" +"Language-Team: Catalan \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +#. Type: select +#. Choices +#: ../templates:3 +msgid "Apache, Apache-SSL, Both, None" +msgstr "Apache, Apache-SSL, Both, None" + +#. Type: select +#. Description +#: ../templates:5 +msgid "Which web server would you like to reconfigure automatically?" +msgstr "Quin servidor voldríeu reconfigurar automàticament?" + +#. Type: select +#. Description +#: ../templates:5 +msgid "" +"w3c-markup-validator needs to check if your webserver is configured properly." +msgstr "" +"El w3c-markup-validator ha de comprovar si el servidor web està configurat " +"correctament." + +#. Type: select +#. Description +#: ../templates:5 +msgid "" +"w3c-markup-validator supports any web server that can call CGI scripts and " +"allows Server Side Includes (SSI), but this automatic configuration process " +"only supports Apache and Apache-SSL. If you use another server, you will " +"have to configure it manually before being able to use the package." +msgstr "" +"El w3c-markup-validator suporta qualsevol servidor web que pugui cridar " +"seqüències CGI i permeti Server Side Includes (SSI), però aquest procés de " +"configuració automàtica només suporta l'Apache i l'Apache-SSL. Si utilitzeu " +"un altre servidor, per poder-lo utilitzar l'haureu de configurar manualment." --- w3c-markup-validator-0.6.7.orig/debian/po/de.po +++ w3c-markup-validator-0.6.7/debian/po/de.po @@ -0,0 +1,60 @@ +# +# Translators, if you are not familiar with the PO format, gettext +# documentation is worth reading, especially sections dedicated to +# this format, e.g. by running: +# info -n '(gettext)PO Files' +# info -n '(gettext)Header Entry' +# +# Some information specific to po-debconf are available at +# /usr/share/doc/po-debconf/README-trans +# or http://www.debian.org/intl/l10n/po-debconf/README-trans +# +# Developers do not need to manually edit POT or PO files. +# +msgid "" +msgstr "" +"Project-Id-Version: w3c-markup-validator\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2004-08-08 21:27+1000\n" +"PO-Revision-Date: 2003-12-06 19:21+0100\n" +"Last-Translator: Patrick Willam \n" +"Language-Team: skolelinux-germany \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=iso-8859-1\n" +"Content-Transfer-Encoding: 8bit\n" + +#. Type: select +#. Choices +#: ../templates:3 +msgid "Apache, Apache-SSL, Both, None" +msgstr "Apache, Apache-SSL, Beide, Keinen" + +#. Type: select +#. Description +#: ../templates:5 +msgid "Which web server would you like to reconfigure automatically?" +msgstr "Welchen Web-Server möchten Sie automatisch re-konfigurieren lassen?" + +#. Type: select +#. Description +#: ../templates:5 +msgid "" +"w3c-markup-validator needs to check if your webserver is configured properly." +msgstr "" +"w3c-markup-validator muss überprüfen, ob Ihr Webserver richtig konfiguriert " +"ist." + +#. Type: select +#. Description +#: ../templates:5 +msgid "" +"w3c-markup-validator supports any web server that can call CGI scripts and " +"allows Server Side Includes (SSI), but this automatic configuration process " +"only supports Apache and Apache-SSL. If you use another server, you will " +"have to configure it manually before being able to use the package." +msgstr "" +"w3c-markup-validator unterstützt jeden Web-Server, der CGI-Skripts aufrufen " +"kann und Server Side Includes (SSI) erlaubt; aber dieser automatische " +"Konfigurationsvorgang wird nur von Apache und Apache-SSL unterstützt. Wenn " +"Sie einen anderen Web-Server verwenden, werden Sie ihn manuell konfigurieren " +"müssen, bevor Sie in der Lage sind, dieses Paket zu nutzen." --- w3c-markup-validator-0.6.7.orig/debian/po/fr.po +++ w3c-markup-validator-0.6.7/debian/po/fr.po @@ -0,0 +1,61 @@ +# +# Translators, if you are not familiar with the PO format, gettext +# documentation is worth reading, especially sections dedicated to +# this format, e.g. by running: +# info -n '(gettext)PO Files' +# info -n '(gettext)Header Entry' +# +# Some information specific to po-debconf are available at +# /usr/share/doc/po-debconf/README-trans +# or http://www.debian.org/intl/l10n/po-debconf/README-trans +# +# Developers do not need to manually edit POT or PO files. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2004-08-08 21:27+1000\n" +"PO-Revision-Date: 2003-03-09 18:37+1100\n" +"Last-Translator: Frédéric Schütz \n" +"Language-Team: French \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=ISO-8859-15\n" +"Content-Transfer-Encoding: 8bit\n" + +#. Type: select +#. Choices +#: ../templates:3 +msgid "Apache, Apache-SSL, Both, None" +msgstr "Apache, Apache-SSL, Tous, Aucun" + +#. Type: select +#. Description +#: ../templates:5 +msgid "Which web server would you like to reconfigure automatically?" +msgstr "Quel serveur web voulez-vous reconfigurer automatiquement ?" + +#. Type: select +#. Description +#: ../templates:5 +msgid "" +"w3c-markup-validator needs to check if your webserver is configured properly." +msgstr "" +"w3c-markup-validator doit vérifier si votre serveur web est correctement " +"configuré." + +#. Type: select +#. Description +#: ../templates:5 +msgid "" +"w3c-markup-validator supports any web server that can call CGI scripts and " +"allows Server Side Includes (SSI), but this automatic configuration process " +"only supports Apache and Apache-SSL. If you use another server, you will " +"have to configure it manually before being able to use the package." +msgstr "" +"w3c-markup-validator supporte tout serveur qui est capable d'appeler des " +"scripts CGI et de faire des inclusions côté serveur (SSI) mais ce système de " +"configuration automatique ne supporte qu'Apache et Apache-SSL. Si vous " +"utilisez un autre serveur, vous devrez le configurer manuellement avant de " +"pouvoir utiliser le paquet." --- w3c-markup-validator-0.6.7.orig/debian/po/ja.po +++ w3c-markup-validator-0.6.7/debian/po/ja.po @@ -0,0 +1,60 @@ +# +# Translators, if you are not familiar with the PO format, gettext +# documentation is worth reading, especially sections dedicated to +# this format, e.g. by running: +# info -n '(gettext)PO Files' +# info -n '(gettext)Header Entry' +# +# Some information specific to po-debconf are available at +# /usr/share/doc/po-debconf/README-trans +# or http://www.debian.org/intl/l10n/po-debconf/README-trans +# +# Developers do not need to manually edit POT or PO files. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2004-08-08 21:27+1000\n" +"PO-Revision-Date: 2003-11-19 15:26+0900\n" +"Last-Translator: Hideki Yamane \n" +"Language-Team: Japanese \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=EUC-JP\n" +"Content-Transfer-Encoding: 8bit\n" + +#. Type: select +#. Choices +#: ../templates:3 +msgid "Apache, Apache-SSL, Both, None" +msgstr "Apache, Apache-SSL, ξÊý, ¤É¤ì¤Ç¤â̵¤¤" + +#. Type: select +#. Description +#: ../templates:5 +msgid "Which web server would you like to reconfigure automatically?" +msgstr "¤É¤Î Web ¥µ¡¼¥Ð¤ò¼«Æ°Åª¤ËºÆÀßÄꤷ¤Þ¤¹¤«?" + +#. Type: select +#. Description +#: ../templates:5 +msgid "" +"w3c-markup-validator needs to check if your webserver is configured properly." +msgstr "" +"w3c-markup-validator ¤Ï Web ¥µ¡¼¥Ð¤¬Àµ¤·¤¯ÀßÄꤵ¤ì¤Æ¤¤¤ë¤«¤É¤¦¤«¤ò¥Á¥§¥Ã¥¯¤¹" +"¤ëɬÍפ¬¤¢¤ê¤Þ¤¹¡£" + +#. Type: select +#. Description +#: ../templates:5 +msgid "" +"w3c-markup-validator supports any web server that can call CGI scripts and " +"allows Server Side Includes (SSI), but this automatic configuration process " +"only supports Apache and Apache-SSL. If you use another server, you will " +"have to configure it manually before being able to use the package." +msgstr "" +"w3c-markup-validator ¤Ï CGI ¥¹¥¯¥ê¥×¥È¤È ¥µ¡¼¥Ð¥µ¥¤¥É¥¤¥ó¥¯¥ë¡¼¥É (SSI) ¤Î¼Â" +"¹Ô¤¬²Äǽ¤Ê web ¥µ¡¼¥Ð¤ò¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤Þ¤¹¤¬¡¢¤³¤Î¼«Æ°ÀßÄê¥×¥í¥»¥¹¤Ï Apache " +"¤È Apache-SSL ¤Î¤ß¤ò¥µ¥Ý¡¼¥È¤·¤Æ¤¤¤Þ¤¹¡£Â¾¤Î¥µ¡¼¥Ð¤òÍøÍѤ·¤Æ¤¤¤ë¾ì¹ç¡¢¥Ñ¥Ã" +"¥±¡¼¥¸¤òÍøÍѤǤ­¤ë¤è¤¦¤Ë¤¹¤ëÁ°¤Ë¼êÆ°¤ÇÀßÄê¤ò¹Ô¤¦É¬Íפ¬¤¢¤ê¤Þ¤¹¡£" --- w3c-markup-validator-0.6.7.orig/debian/po/it.po +++ w3c-markup-validator-0.6.7/debian/po/it.po @@ -0,0 +1,47 @@ +msgid "" +msgstr "" +"Project-Id-Version: w3c-markup-validator 0.6.1 (templates)\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2004-08-08 21:27+1000\n" +"PO-Revision-Date: 2004-05-24 14:14+0200\n" +"Last-Translator: Luca Monducci \n" +"Language-Team: Italian \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +#. Type: select +#. Choices +#: ../templates:3 +msgid "Apache, Apache-SSL, Both, None" +msgstr "Apache, Apache-SSL, Entrambi, Nessuno" + +#. Type: select +#. Description +#: ../templates:5 +msgid "Which web server would you like to reconfigure automatically?" +msgstr "Quale server web si desidera riconfigurare automaticamente?" + +#. Type: select +#. Description +#: ../templates:5 +msgid "" +"w3c-markup-validator needs to check if your webserver is configured properly." +msgstr "" +"w3c-markup-validator deve controllare se il server web è configurato " +"correttamente." + +#. Type: select +#. Description +#: ../templates:5 +msgid "" +"w3c-markup-validator supports any web server that can call CGI scripts and " +"allows Server Side Includes (SSI), but this automatic configuration process " +"only supports Apache and Apache-SSL. If you use another server, you will " +"have to configure it manually before being able to use the package." +msgstr "" +"w3c-markup-validator supporta qualsiasi server web che può richiamare degli " +"script CGI e che permette Server Side Includes (SSI) ma questo programma di " +"configurazione automatica supporta solo Apache e Apache-SSL. Se si sta " +"usando un altro server è necessario configurarlo manualmente prima di poter " +"usare il pacchetto." --- w3c-markup-validator-0.6.7.orig/debian/po/nl.po +++ w3c-markup-validator-0.6.7/debian/po/nl.po @@ -0,0 +1,58 @@ +# +# Translators, if you are not familiar with the PO format, gettext +# documentation is worth reading, especially sections dedicated to +# this format, e.g. by running: +# info -n '(gettext)PO Files' +# info -n '(gettext)Header Entry' +# +# Some information specific to po-debconf are available at +# /usr/share/doc/po-debconf/README-trans +# or http://www.debian.org/intl/l10n/po-debconf/README-trans +# +# Developers do not need to manually edit POT or PO files. +# +msgid "" +msgstr "" +"Project-Id-Version: w3c-markup-validator\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2004-08-08 21:27+1000\n" +"PO-Revision-Date: 2003-08-11 12:41+0100\n" +"Last-Translator: Bart Cornelis \n" +"Language-Team: dutch \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=iso-8859-1\n" +"Content-Transfer-Encoding: 8bit\n" + +#. Type: select +#. Choices +#: ../templates:3 +msgid "Apache, Apache-SSL, Both, None" +msgstr "Apache, Apache-SSL, Beide, Geen" + +#. Type: select +#. Description +#: ../templates:5 +msgid "Which web server would you like to reconfigure automatically?" +msgstr "Welke webserver(s) wilt u automatisch laten herconfigureren?" + +#. Type: select +#. Description +#: ../templates:5 +msgid "" +"w3c-markup-validator needs to check if your webserver is configured properly." +msgstr "w3c-markup-validator gaat na of uw webserver goed geconfigureerd is." + +#. Type: select +#. Description +#: ../templates:5 +msgid "" +"w3c-markup-validator supports any web server that can call CGI scripts and " +"allows Server Side Includes (SSI), but this automatic configuration process " +"only supports Apache and Apache-SSL. If you use another server, you will " +"have to configure it manually before being able to use the package." +msgstr "" +"w3c-markup-validator ondersteund elke webserver die in staat is om cgi-" +"scripts en server-side-includes (SSI) te gebruiken. Het automatische " +"configuratie process ondersteund daarentegen enkel Apache en Apache-SSL. " +"Indien u een andere server gebruikt dient u deze handmatig te configureren " +"voor u van dit pakket gebruik kunt maken." --- w3c-markup-validator-0.6.7.orig/debian/po/POTFILES.in +++ w3c-markup-validator-0.6.7/debian/po/POTFILES.in @@ -0,0 +1 @@ +[type: gettext/rfc822deb] templates --- w3c-markup-validator-0.6.7.orig/debian/README.config +++ w3c-markup-validator-0.6.7/debian/README.config @@ -0,0 +1,42 @@ +The w3c-markup-validator requires a web server that can at least call +CGI scripts and allows Server Side Includes (SSI) in the Apache way. + +If you're using the Debian package of Apache or Apache-SSL (1.3.x), the +installation process of this package can automatically update the +webserver's configuration (using the wwwconfig-common program) so that +the package works. If you're using any other server (eg Apache2), you will +have to configure it manually. + +To help you, the modifications that are added to the Apache webserver are +stored in /etc/w3c/apache.conf, which is reproduced below in its current +version: + +--- +ScriptAlias /w3c-markup-validator/check /usr/lib/cgi-bin/check +ScriptAlias /w3c-markup-validator/checklink /usr/lib/cgi-bin/checklink +Alias /w3c-markup-validator /usr/share/w3c-markup-validator/html + + + Options +Includes +MultiViews + AddHandler server-parsed .html + +--- + +If you are manually configuring Apache2, the modifications needed are +slightly different: + +--- +ScriptAlias /w3c-markup-validator/check /usr/lib/cgi-bin/check +ScriptAlias /w3c-markup-validator/checklink /usr/lib/cgi-bin/checklink +Alias /w3c-markup-validator /usr/share/w3c-markup-validator/html + + + Options +Includes +MultiViews + AddOutputFilter INCLUDES .html + +--- + +You will also need to enable the necessary modules (including at least cgi +and includes) + + -- Frederic Schutz Tue, 22 Jul 2003 22:17:02 +1000 --- w3c-markup-validator-0.6.7.orig/debian/TODO +++ w3c-markup-validator-0.6.7/debian/TODO @@ -0,0 +1,6 @@ +At the moment, this package uses a customised SGML catalog instead of the +centralised catalog provided in Debian. This will be changed as soon as +possible, probably when the next upstream version is released (it will +include better configuration options for choosing the catalog) + + -- Frederic Schutz Sun, 9 Mar 2003 19:55:30 +1100 --- w3c-markup-validator-0.6.7.orig/debian/dirs +++ w3c-markup-validator-0.6.7/debian/dirs @@ -0,0 +1,11 @@ +etc/w3c +usr/bin +usr/lib/cgi-bin +usr/share/w3c-markup-validator +usr/share/w3c-markup-validator/catalog +usr/share/w3c-markup-validator/config +usr/share/w3c-markup-validator/html +usr/share/w3c-markup-validator/html/dev/tests +usr/share/w3c-markup-validator/html/docs +usr/share/w3c-markup-validator/html/images +usr/share/w3c-markup-validator/html/source --- w3c-markup-validator-0.6.7.orig/debian/docs +++ w3c-markup-validator-0.6.7/debian/docs @@ -0,0 +1 @@ + --- w3c-markup-validator-0.6.7.orig/debian/control +++ w3c-markup-validator-0.6.7/debian/control @@ -0,0 +1,20 @@ +Source: w3c-markup-validator +Section: web +Priority: optional +Maintainer: Frederic Schutz +Build-Depends-Indep: debhelper (>= 4.1.16), sharutils, perl +Standards-Version: 3.6.1.1 + +Package: w3c-markup-validator +Architecture: all +Depends: perl (>= 5.8), debconf (>= 0.5), apache (>= 1.3.29.0.1-1) | httpd, libwww-perl, liburi-perl, libhtml-parser-perl (>= 3.25), libtext-iconv-perl, libset-intspan-perl, libnet-ip-perl, libconfig-general-perl (>= 2.06), sgml-data, w3c-dtd-xhtml (>= 1.1-5), opensp (>= 1.5release), wwwconfig-common +Recommends: w3-dtd-mathml +Suggests: libterm-readkey-perl (>= 2.00) +Description: W3C Markup Validator + This is a CGI script which lets you enter the URL of a web page which will + be then checked against a validating SGML parser for conformance to official + W3C recommendations. Pages can also be uploaded. A link checker is also + included. + . + These are the same scripts that are available on the W3C web site, + http://validator.w3.org. --- w3c-markup-validator-0.6.7.orig/debian/prerm +++ w3c-markup-validator-0.6.7/debian/prerm @@ -0,0 +1,69 @@ +#! /bin/sh +# prerm script for w3c-markup-validator +# +# see: dh_installdeb(1) + +set -e + +. /usr/share/debconf/confmodule + +# summary of how this script can be called: +# * `remove' +# * `upgrade' +# * `failed-upgrade' +# * `remove' `in-favour' +# * `deconfigure' `in-favour' +# `removing' +# +# for details, see http://www.debian.org/doc/debian-policy/ or +# the debian-policy package + + +case "$1" in + upgrade) + ;; + remove|deconfigure) + + db_get w3c-markup-validator/webserver || true + webserver="$RET" + case "$webserver" in + Apache) webservers="apache";; + Apache-SSL) webservers="apache-ssl";; + Both) webservers="apache apache-ssl";; + *) webservers="";; + esac + + includefile=/etc/w3c/apache.conf + + for server in $webservers; do + test -d /etc/$server || continue + + if [ -s /etc/$server/conf.d/w3c-markup-validator.conf ]; then + rm /etc/$server/conf.d/w3c-markup-validator.conf + + restart="$server $restart" + fi + . /usr/share/wwwconfig-common/apache-uninclude_all.sh + [ "$statut" = "purge" ] && restart="$server $restart" + done + + servers="apache-ssl apache" + . /usr/share/wwwconfig-common/restart.sh + + ;; + failed-upgrade) + ;; + *) + echo "prerm called with unknown argument \`$1'" >&2 + exit 1 + ;; +esac + +# dh_installdeb will replace this with shell code automatically +# generated by other debhelper scripts. + +#DEBHELPER# + +exit 0 + + --- w3c-markup-validator-0.6.7.orig/debian/rules +++ w3c-markup-validator-0.6.7/debian/rules @@ -0,0 +1,92 @@ +#!/usr/bin/make -f +# Sample debian/rules that uses debhelper. +# GNU copyright 1997 to 1999 by Joey Hess. + +# Uncomment this to turn on verbose mode. +#export DH_VERBOSE=1 + +### DH_COMPAT is not used anymore, see file debian/compat +# This is the debhelper compatibility version to use. +#export DH_COMPAT=4 + +PACKAGE=w3c-markup-validator + +configure: + +build: debian/po/templates.pot + +debian/po/templates.pot: debian/templates + @debconf-updatepo + +clean: + dh_testdir + dh_testroot + rm -f debian/checklink.1 + dh_clean + +install: build + dh_testdir + dh_testroot + dh_clean -k + dh_installdirs + + # Add here commands to install the package into debian/w3c-markup-validator. + install -o root -g root -m 0755 httpd/cgi-bin/check debian/${PACKAGE}/usr/lib/cgi-bin + install -o root -g root -m 0755 httpd/cgi-bin/checklink debian/${PACKAGE}/usr/lib/cgi-bin/checklink + ln -s ../lib/cgi-bin/checklink debian/${PACKAGE}/usr/bin/checklink + install -o root -g root -m 0644 htdocs/config/validator.conf debian/${PACKAGE}/etc/w3c/validator.conf + install -o root -g root -m 0644 htdocs/config/checklink.conf debian/${PACKAGE}/etc/w3c/checklink.conf + install -o root -g root -m 0644 httpd/conf/${PACKAGE}.conf debian/${PACKAGE}/etc/w3c/apache.conf + install -o root -g root -m 0644 catalog/sgml.soc debian/${PACKAGE}/usr/share/${PACKAGE}/catalog + install -o root -g root -m 0644 catalog/sgml.dcl debian/${PACKAGE}/usr/share/${PACKAGE}/catalog + install -o root -g root -m 0644 catalog/xml.soc debian/${PACKAGE}/usr/share/${PACKAGE}/catalog + install -o root -g root -m 0644 catalog/xml.dcl debian/${PACKAGE}/usr/share/${PACKAGE}/catalog + install -o root -g root -m 0644 htdocs/*.html debian/${PACKAGE}/usr/share/${PACKAGE}/html +# rm debian/${PACKAGE}/usr/share/${PACKAGE}/html/p3p.html + install -o root -g root -m 0644 htdocs/*.css debian/${PACKAGE}/usr/share/${PACKAGE}/html + install -o root -g root -m 0644 htdocs/config/*.cfg debian/${PACKAGE}/usr/share/${PACKAGE}/config +# rm debian/${PACKAGE}/usr/share/${PACKAGE}/config/check.cfg + install -o root -g root -m 0644 htdocs/dev/tests/* debian/${PACKAGE}/usr/share/${PACKAGE}/html/dev/tests + install -o root -g root -m 0644 htdocs/docs/*.html debian/${PACKAGE}/usr/share/${PACKAGE}/html/docs + install -o root -g root -m 0644 htdocs/source/*.html debian/${PACKAGE}/usr/share/${PACKAGE}/html/source + install -o root -g root -m 0644 htdocs/images/*.gif htdocs/images/*.png htdocs/images/*.jpg debian/${PACKAGE}/usr/share/${PACKAGE}/html/images + install -o root -g root -m 0644 htdocs/loadexplanation.js debian/${PACKAGE}/usr/share/${PACKAGE}/html/ + uudecode -o debian/${PACKAGE}/usr/share/${PACKAGE}/html/images/w3c_home.png htdocs/images/w3c_home.enc + uudecode -o debian/${PACKAGE}/usr/share/${PACKAGE}/html/images/qa-small.png htdocs/images/qa-small.enc + +# Build architecture-independent files here. +binary-indep: build install + dh_testdir + dh_testroot + dh_installdebconf + dh_installdocs -n debian/README.config + ln -s ../../${PACKAGE}/html/dev/tests debian/${PACKAGE}/usr/share/doc/${PACKAGE}/tests +# dh_installexamples +# dh_installmenu +# dh_installlogrotate +# dh_installemacsen +# dh_installpam +# dh_installmime +# dh_installinit +# dh_installcron + pod2man --center="W3C Link Checker" httpd/cgi-bin/checklink.pod >debian/checklink.1 + dh_installman debian/checklink.1 +# dh_installinfo + dh_installchangelogs +# dh_link +# dh_strip + dh_compress + dh_fixperms +# dh_makeshlibs + dh_installdeb + dh_perl -V + dh_shlibdeps + dh_gencontrol + dh_md5sums + dh_builddeb + +# We have no architecture-dependent files so this section is empty. +binary-arch: build install + +binary: binary-indep binary-arch +.PHONY: build clean binary-indep binary-arch binary install configure --- w3c-markup-validator-0.6.7.orig/debian/changelog +++ w3c-markup-validator-0.6.7/debian/changelog @@ -0,0 +1,120 @@ +w3c-markup-validator (0.6.7-2) unstable; urgency=low + + * Added documentation in README.Debian about validation of pages on + private IP addresses (Closes: Bug#268089) + * Corrected a few broken image links in footer.html and check + + -- Frederic Schutz Mon, 30 Aug 2004 16:36:18 +1000 + +w3c-markup-validator (0.6.7-1) unstable; urgency=low + + * New upstream release + * Updated internal SGML catalogs to use the new location of XML DTDs in + Debian. Requires w3c-dtd-xhtml >= 1.1-5. + * Updated linkchecker to version 4.0; in the future, it will be + provided as a separate package (w3c-linkchecker) + * Apache configuration now uses the conf.d directory (Closes: Bug#227855) + Requires apache (>= 1.3.29.0.1-1). + * debian/po: + + added Italian po-debconf translation, courtesy of Luca Monducci + (Closes: Bug#251741) + + added Catalan po-debconf translation, courtesy of Aleix Badia i Bosch + (Closes: Bug#254867) + + -- Frederic Schutz Mon, 9 Aug 2004 21:18:12 +1000 + +w3c-markup-validator (0.6.1-4) unstable; urgency=low + + * debian/control: + + added dependencies on libnet-ip-perl and libconfig-general-perl, now + required by checklink.pl + + added suggestion on libterm-readkey-perl, which can be used to enter + a password for checklink.pl + * httpd/cgi-bin/checklink.pl: updated to latest CVS version + + Unbreak --masquerade in checklink.pl (Closes: Bug#187791) + * check: backport fixes from CVS + + Untaint variables read from the configuration file (we must trust them + anyway) so that Perl (>= 5.8.1) does not yield an error when calling + the Open3 function. (Closes: Bug#217353) + * debian/po: + + added German po-debconf translation, translated by Pattrick + William, Tomas and Friedemann from the german Skolelinux team, courtesy + of . (Closes: Bug#223121) + + added Japanese po-debconf translation, courtesy of Hideki Yamane + (Closes: Bug#224184) + * Uploaded for Frederic by Jaldhar H. Vyas + + -- Jaldhar H. Vyas Wed, 31 Dec 2003 20:27:25 -0500 + +w3c-markup-validator (0.6.1-3) unstable; urgency=low + + * debian/checkink.1: removed, since the man page has been converted + to POD format and submitted upstream for inclusion in checklink.pl + * debian/dirs: removed etc/apache + * debian/control + + Now depends on Perl (>= 5.8) for correct handling of UNICODE characters + + Perl 5.8 provides libtime-hires-perl and libcgi-pm-perl in sufficient + version, so we don't need to depend on them anymore. + + Build-depends-indep: debhelper (>= 4.1.16), and removed po-debconf, + as suggested by the po-debconf man page + + Updated to standards 3.6.1 (nothing to change) + + The W3C produces "recommendations", not standards -- changed the + description accordingly + * debian/po: added dutch po-debconf translation, courtesy of + . (Closes: Bug#204927) + * debian/README.config: added configuration snippet for Apache2 + * debian/rules + + Added target debian/po/templates.pot in rules files to make sure + that the PO files are always up to date when the debconf template + is changed, as suggested by the po-debconf man page + + Generate the manpage while building using POD in checklink.pl + * htdocs/*: backported small changes from CVS + * httpd/cgi-bin/checklink.pl (backported from CVS) + + Added POD documentation + + Stop if unknown option on the command line (Closes: Bug#187792) + + Added option "-?" for help, and clarified help about the + --location option (Closes: Bug#189642) + + Fix redirect loop detection (Closes: Bug#188372) + + -- Frederic Schutz Sun, 7 Sep 2003 01:36:01 +1000 + +w3c-markup-validator (0.6.1-2) unstable; urgency=low + + * Depends: apache|httpd instead of apache|apache-ssl. + If you use another webserver, you'll have to configure it manually, + see the new README.config documentation file. Closes: Bug#177965 + * Depends: w3c-dtd-xhtml to allow validation of XHTML (1.0, 1.1 and + basic) documents. DTDs were previously provided by the sgml-data + package. Closes: Bug#178529 + * Added TODO file. + * Converted the package to po-debconf for the translation of debconf + templates, requires debhelper (>= 4.1.13), po-debconf for build. + * Debhelper changes: + - uses dh_installman instead of the (deprecated) dh_installmanpages + - uses debian/compat file instead of DH_COMPAT + * Updated to standards 3.5.9.0 (nothing to change) + + -- Frederic Schutz Tue, 18 Mar 2003 11:56:21 +1100 + +w3c-markup-validator (0.6.1-1) unstable; urgency=low + + * New upstream release + * First official release for Debian, closes: Bug#166025 + * Converted the installation scripts to debconf and wwwconfig-common + * Added support for the MathML DTD present in Debian (the package now + Recommends: w3-dtd-mathml) + + -- Frederic Schutz Mon, 16 Dec 2002 22:06:03 +1100 + +w3c-markup-validator (0.6.0-1) unstable; urgency=low + + * New upstream release + * Added a manpage for checklink(1) + + -- Frederic Schutz Tue, 26 Nov 2002 22:54:11 +1100 + +w3c-markup-validator (0.0.20021103-1) unstable; urgency=low + + * Initial Release, from the upstream CVS + + -- Frederic Schutz Mon, 4 Nov 2002 22:14:00 +1100 --- w3c-markup-validator-0.6.7.orig/debian/postinst +++ w3c-markup-validator-0.6.7/debian/postinst @@ -0,0 +1,78 @@ +#! /bin/sh +# postinst script for w3c-markup-validator +# +# see: dh_installdeb(1) + +set -e + +. /usr/share/debconf/confmodule + +# summary of how this script can be called: +# * `configure' +# * `abort-upgrade' +# * `abort-remove' `in-favour' +# +# * `abort-deconfigure' `in-favour' +# `removing' +# +# for details, see http://www.debian.org/doc/debian-policy/ or +# the debian-policy package +# +# quoting from the policy: +# Any necessary prompting should almost always be confined to the +# post-installation script, and should be protected with a conditional +# so that unnecessary prompting doesn't happen if a package's +# installation fails and the `postinst' is called with `abort-upgrade', +# `abort-remove' or `abort-deconfigure'. + +case "$1" in + configure|reconfigure) + + db_get w3c-markup-validator/webserver || true + webserver="$RET" + case "$webserver" in + Apache) webservers="apache";; + Apache-SSL) webservers="apache-ssl";; + Both) webservers="apache apache-ssl";; + *) webservers="";; + esac + + includefile=/etc/w3c/apache.conf + + for server in $webservers; do + test -d /etc/$server/conf.d || continue + + if [ ! -s /etc/$server/conf.d/w3c-markup-validator.conf ]; then + ln -s $includefile /etc/$server/conf.d/w3c-markup-validator.conf + # Transition from include-based configuration, if needed + . /usr/share/wwwconfig-common/apache-uninclude_all.sh + restart="$server $restart" + fi + + done + + servers="apache-ssl apache" + . /usr/share/wwwconfig-common/restart.sh + + ;; + + abort-upgrade|abort-remove|abort-deconfigure) + + ;; + + *) + echo "postinst called with unknown argument \`$1'" >&2 + exit 1 + ;; +esac + +# dh_installdeb will replace this with shell code automatically +# generated by other debhelper scripts. + +#DEBHELPER# + +db_stop + +exit 0 + + --- w3c-markup-validator-0.6.7.orig/debian/compat +++ w3c-markup-validator-0.6.7/debian/compat @@ -0,0 +1 @@ +4 --- w3c-markup-validator-0.6.7.orig/debian/config +++ w3c-markup-validator-0.6.7/debian/config @@ -0,0 +1,9 @@ +#!/bin/sh -e + +# Source debconf library. +. /usr/share/debconf/confmodule + +# +db_input medium w3c-markup-validator/webserver || true +db_go + --- w3c-markup-validator-0.6.7.orig/debian/README.Debian +++ w3c-markup-validator-0.6.7/debian/README.Debian @@ -0,0 +1,75 @@ +w3c-markup-validator for Debian +------------------------------- + +The markup validator is accessible locally at the URL + + http://localhost/w3c-markup-validator + +The checklink script can be used either as a command-line script +(/usr/bin/checklink), or as a CGI script at the URL + + http://localhost/w3c-markup-validator/checklink + +If you find a problem while using the validator, like a page +that doesn't validate even though it is correct, or any other error, +please try to validate it with the public W3C validator at +http://validator.w3.org/ before reporting a bug. + +If the page validates correctly on the W3C validator but doesn't on +your local copy, it may be due to one of the following reasons: + + - Debian and the W3C validator do not provide the same set of DTDs, + so if your document specifies a DTD that is not available in the + standard Debian packages, the validator will not be able to + check it. You can file a wishlist bug against sgml-data (please + send me a copy as well) to have it added. + + - there is (at least) one strange bug that makes opensp report + inexistant errors when it receives a file to check on its standard + input (as is the case with the validator), see bug# 170795 in the + Debian Bug Tracking System. If you experience this bug, please send + me a note with some information on your configuration. + +Note that by default, the validator refuses to consider pages that are +on a private IP address (eg 10.0.0.0, 192.168.0.0, etc), to avoid creating +a "backdoor" to the private network. If the validator is not accessible +from outside your private network and you want it to validate inside pages, +change the configuration variable "Allow Private IPs" from 0 to 1 in the +config file /etc/w3c/validator.conf. + +--- +A snippet of code (courtesy of Terje Bless ): + +Save this as e.g. "post.pl" and run it as "./post.pl file.html": + +#!/usr/bin/perl + +use LWP::UserAgent; +use HTTP::Request::Common 'POST'; + +print LWP::UserAgent + ->new + ->request( + POST 'http://localhost/w3-markup-validator/check', + Content_Type => 'form-data', + Content => [ + output => 'xml', + uploaded_file => [$ARGV[0]], + ] + )->as_string; +__END__ + +(or replace the URL with http://validator.w3.org/check for the official +W3 validator). + +The "output" parameter can be adjusted to suit you ("xml", "earl", "n3") +or omitted alltogether to get the HTML report. Other interesting +parameters are "charset" (value is any charset we support (quite a few)) +and "verbose" ("0" or "1") for verbose results in the HTML report only. + +Note that this does not imply that the XML (or n3, or earl) interfaces are +any less beta then they were. It's just a useful little snippet for +playing around with this. +--- + + -- Frederic Schutz , Mon, 4 Nov 2002 22:14:00 +1100 --- w3c-markup-validator-0.6.7.orig/debian/postrm +++ w3c-markup-validator-0.6.7/debian/postrm @@ -0,0 +1,41 @@ +#! /bin/sh +# postrm script for w3c-markup-validator +# +# see: dh_installdeb(1) + +set -e + +# summary of how this script can be called: +# * `remove' +# * `purge' +# * `upgrade' +# * `failed-upgrade' +# * `abort-install' +# * `abort-install' +# * `abort-upgrade' +# * `disappear' overwrit>r> +# for details, see http://www.debian.org/doc/debian-policy/ or +# the debian-policy package + + +case "$1" in + remove|upgrade|failed-upgrade|abort-install|abort-upgrade|disappear) + + ;; + + purge) + rmdir /etc/w3c 2>/dev/null || true + ;; + + *) + echo "postrm called with unknown argument \`$1'" >&2 + exit 1 + +esac + +# dh_installdeb will replace this with shell code automatically +# generated by other debhelper scripts. + +#DEBHELPER# + +exit 0 --- w3c-markup-validator-0.6.7.orig/debian/copyright +++ w3c-markup-validator-0.6.7/debian/copyright @@ -0,0 +1,97 @@ +This package was debianized by Frederic Schutz on +Mon, 4 Nov 2002 22:14:00 +1100. + +It was downloaded from the URL http://validator.w3.org/validator.tar.gz, +with some pieces coming from the project CVS at +anonymous@dev.w3.org:/sources/public (see details on the page +http://validator.w3.org/source/) + +Upstream Authors: Gerald Oskoboiny and others + +Copyright: the source code for the W3C markup validation service is +available under the terms of the W3C Software Copyright (compatible with +the GNU G P L), which can be found at the URL + + http://www.w3.org/Consortium/Legal/copyright-software-19980720 + +and is reproduced below: + + + W3C ® SOFTWARE NOTICE AND LICENSE + + Copyright © 1994-2002 [1]World Wide Web Consortium, ([2]Massachusetts + Institute of Technology, [3]Institut National de Recherche en Informatique + et en Automatique, [4]Keio University). All Rights Reserved. + http://www.w3.org/Consortium/Legal/ + + This W3C work (including software, documents, or other related items) + is being provided by the copyright holders under the following + license. By obtaining, using and/or copying this work, you (the + licensee) agree that you have read, understood, and will comply with + the following terms and conditions: + + Permission to use, copy, modify, and distribute this software and its + documentation, with or without modification, for any purpose and + without fee or royalty is hereby granted, provided that you include + the following on ALL copies of the software and documentation or + portions thereof, including modifications, that you make: + 1. The full text of this NOTICE in a location viewable to users of + the redistributed or derivative work. + 2. Any pre-existing intellectual property disclaimers, notices, or + terms and conditions. If none exist, a short notice of the + following form (hypertext is preferred, text is permitted) should + be used within the body of any redistributed or derivative code: + "Copyright © [$date-of-software] [5]World Wide Web Consortium, + ([6]Massachusetts Institute of Technology, [7]Institut National de + Recherche en Informatique et en Automatique, [8]Keio University). + All Rights Reserved. http://www.w3.org/Consortium/Legal/" + 3. Notice of any changes or modifications to the W3C files, including + the date changes were made. (We recommend you provide URIs to the + location from which the code is derived.) + THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND COPYRIGHT + HOLDERS MAKE NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, + INCLUDING BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY OR FITNESS + FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE OR + DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, + TRADEMARKS OR OTHER RIGHTS. + + COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL + OR CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE OF THE SOFTWARE OR + DOCUMENTATION. + + The name and trademarks of copyright holders may NOT be used in + advertising or publicity pertaining to the software without specific, + written prior permission. Title to copyright in this software and any + associated documentation will at all times remain with copyright + holders. + + ____________________________________ + + This formulation of W3C's notice and license became active on August + 14 1998 so as to improve compatibility with G P L. This version ensures + that W3C software licensing terms are no more restrictive than G P L and + consequently W3C software may be distributed in G P L packages. See the + [9]older formulation for the policy prior to this date. Please see our + [10]Copyright FAQ for common questions about using materials from our + site, including specific terms and conditions for packages like + libwww, Amaya, and Jigsaw. Other questions about this notice can be + directed to [11]site-policy@w3.org. + + + [12]webmaster + (last updated $Date: 2002/02/13 14:08:32 $) + +References + + 1. http://www.w3.org/ + 2. http://www.lcs.mit.edu/ + 3. http://www.inria.fr/ + 4. http://www.keio.ac.jp/ + 5. http://www.w3.org/ + 6. http://www.lcs.mit.edu/ + 7. http://www.inria.fr/ + 8. http://www.keio.ac.jp/ + 9. http://www.w3.org/Consortium/Legal/copyright-software-19980519.html + 10. http://www.w3.org/Consortium/Legal/IPR-FAQ.html + 11. mailto:site-policy@w3.org + 12. http://www.w3.org/Help/Webmaster.html --- w3c-markup-validator-0.6.7.orig/debian/templates +++ w3c-markup-validator-0.6.7/debian/templates @@ -0,0 +1,13 @@ +Template: w3c-markup-validator/webserver +Type: select +_Choices: Apache, Apache-SSL, Both, None +Default: ${webserver} +_Description: Which web server would you like to reconfigure automatically? + w3c-markup-validator needs to check if your webserver is configured + properly. + . + w3c-markup-validator supports any web server that can call CGI scripts and + allows Server Side Includes (SSI), but this automatic configuration + process only supports Apache and Apache-SSL. If you use another server, + you will have to configure it manually before being able to use the + package. --- w3c-markup-validator-0.6.7.orig/catalog/xml.dcl +++ w3c-markup-validator-0.6.7/catalog/xml.dcl @@ -0,0 +1,191 @@ +" + PIC "?>" + SHORTREF NONE + + NAMES + SGMLREF + + QUANTITY + NONE -- Quantities are not restricted in XML -- + + ENTITIES + "amp" 38 + "lt" 60 + "gt" 62 + "quot" 34 + "apos" 39 + + FEATURES + MINIMIZE + DATATAG NO + OMITTAG NO + RANK NO + SHORTTAG + STARTTAG + EMPTY NO + UNCLOSED NO + NETENABL IMMEDNET + ENDTAG + EMPTY NO + UNCLOSED NO + ATTRIB + DEFAULT YES + OMITNAME NO + VALUE NO + EMPTYNRM YES + IMPLYDEF + ATTLIST NO -- VALID: was YES -- + DOCTYPE NO + ELEMENT NO -- VALID: was YES -- + ENTITY NO + NOTATION NO -- VALID: was YES -- + LINK + SIMPLE NO + IMPLICIT NO + EXPLICIT NO + OTHER + CONCUR NO + SUBDOC NO + FORMAL NO + URN NO + KEEPRSRE YES + VALIDITY TYPE -- VALID: was NOASSERT -- + ENTITIES + REF ANY + INTEGRAL YES + + APPINFO NONE + + SEEALSO "ISO 8879//NOTATION Extensible Markup Language (XML) 1.0//EN" +> + --- w3c-markup-validator-0.6.7.orig/catalog/xml.soc +++ w3c-markup-validator-0.6.7/catalog/xml.soc @@ -0,0 +1,87 @@ +SGMLDECL "/usr/share/sgml/declaration/xml.dcl" +OVERRIDE YES + +PUBLIC "-//W3C//DTD Specification V2.0//EN" "http://www.w3.org/XML/1998/06/xmlspec-v20.dtd" +SYSTEM "spec.dtd" "http://www.w3.org/XML/1998/06/xmlspec-v20.dtd" + +-- MathMLL 2.0 Catalog Data File -- +-- MathML 2.0 DTD driver -- + +PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN" + /usr/share/xml/schema/w3c/mathml/dtd/xhtml-math11-f.dtd +PUBLIC "-//W3C//DTD MathML 2.0//EN" + /usr/share/xml/schema/w3c/mathml/dtd/mathml2.dtd + +SYSTEM "http://www.w3.org/TR/MathML2/dtd/xhtml-math11-f.dtd" + "/usr/share/xml/schema/w3c/mathml/dtd/xhtml-math11-f.dtd" +SYSTEM "http://www.w3.org/TR/MathML2/dtd/mathml2.dtd" + "/usr/share/xml/schema/w3c/mathml/dtd/mathml2.dtd" + +-- XHTML + MathML + SVG -- +-- PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" + "xhtml-math-svg-flat-20020809.dtd" -- + +-- PUBLIC "-//W3C//DTD SVG 20010719//EN" PR-SVG-20010719/svg10.dtd -- +PUBLIC "-//W3C//DTD SVG 1.0//EN" "/usr/xml/svg/svg10.dtd" + +-- XHTML 1.0/1.1/Basic Catalog Data File -- + +-- XHTML 1.1 DTD modular driver file -- +-- note that this uses the local, flattened version of the DTD. If you want + your catalog to use the master version of the XHTML Modules, change the + entry to reference xhtml11.dtd instead of xhtml11-flat.dtd +-- + +PUBLIC "-//W3C//DTD XHTML 1.1//EN" /usr/share/xml/xhtml/schema/dtd/1.1/xhtml11-flat.dtd + + +-- XHTML 1.1 framework modules -- +PUBLIC "-//W3C//ENTITIES XHTML 1.1 Document Model 1.0//EN" /usr/share/xml/xhtml/schema/dtd/1.1/xhtml11-model-1.mod + + -- Oasis entity catalog for Extensible HTML 1.0 -- + +PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" /usr/share/xml/xhtml/schema/dtd/1.0/xhtml1-strict.dtd +PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" /usr/share/xml/xhtml/schema/dtd/1.0/xhtml1-transitional.dtd +PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" /usr/share/xml/shtml/schema/dtd/1.0/xhtml1-frameset.dtd + + + -- ISO latin 1 entity set for Extensible HTML (XML 1.0 format) -- +PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN" /usr/share/xml/entities/xhtml/xhtml-lat1.ent +PUBLIC "-//W3C//ENTITIES Symbols for XHTML//EN" /usr/share/xml/entities/xhtml/xhtml-symbol.ent +PUBLIC "-//W3C//ENTITIES Special for XHTML//EN" /usr/share/xml/entities/xhtml/xhtml-special.ent + + -- XHTML Basic DTD modular driver file -- +PUBLIC "-//W3C//DTD XHTML Basic 1.0//EN" /usr/share/xml/xhtml/schema/dtd/basic/xhtml-basic10.dtd" + + -- XHTML Basic framework module -- +-- PUBLIC "-//W3C//ENTITIES XHTML Basic 1.0 Document Model 1.0//EN" "REC-xhtml-basic-20001219/xhtml-basic10-model-1.mod" -- + +-- SGML Open Catalog file for SMIL 1.0/2.0 + + This is SMIL 1.0/2.0. + Copyright 2000 W3C (MIT, INRIA, Keio), All Rights Reserved. +-- +-- DTD drivers -- +-- PUBLIC "-//W3C//DTD SMIL 1.0//EN" REC-smil-19980615/smil10.dtd -- +-- PUBLIC "-//W3C//DTD SMIL 2.0//EN" PR-smil20-20010605/SMIL20.dtd -- +-- PUBLIC "-//W3C//DTD SMIL 2.0 Basic//EN" PR-smil20-20010605/SMIL20Basic.dtd -- + +-- Framework utilities --- +-- PUBLIC "-//W3C//ENTITIES SMIL 2.0 Modular Framework 1.0//EN" PR-smil20-20010605/smil-framework-1.mod -- +-- PUBLIC "-//W3C//ENTITIES SMIL 2.0 Datatypes 1.0//EN" PR-smil20-20010605/smil-datatypes-1.mod -- +-- PUBLIC "-//W3C//ENTITIES SMIL 2.0 Qualified Names 1.0//EN" PR-smil20-20010605/smil-qname-1.mod -- +-- PUBLIC "-//W3C//ENTITIES SMIL 2.0 Common Attributes 1.0//EN" PR-smil20-20010605/smil-attribs-1.mod -- +-- PUBLIC "-//W3C//ENTITIES SMIL 2.0 Document Model 1.0//EN" PR-smil20-20010605/smil-model-1.mod -- + +-- SMIL 2.0 elements from section modules --- +-- PUBLIC "-//W3C//ELEMENTS SMIL 2.0 Document Structure//EN" PR-smil20-20010605/SMIL-struct.mod -- +-- PUBLIC "-//W3C//ELEMENTS SMIL 2.0 Animation//EN" PR-smil20-20010605/SMIL-anim.mod -- +-- PUBLIC "-//W3C//ELEMENTS SMIL 2.0 Content Control//EN" PR-smil20-20010605/SMIL-control.mod -- +-- PUBLIC "-//W3C//ELEMENTS SMIL 2.0 Document Metainformation//EN" PR-smil20-20010605/SMIL-metainformation.mod -- +-- PUBLIC "-//W3C//ELEMENTS SMIL 2.0 Layout//EN" PR-smil20-20010605/SMIL-layout.mod -- +-- PUBLIC "-//W3C//ELEMENTS SMIL 2.0 Linking//EN" PR-smil20-20010605/SMIL-link.mod -- +-- PUBLIC "-//W3C//ELEMENTS SMIL 2.0 Media Objects//EN" PR-smil20-20010605/SMIL-media.mod -- +-- PUBLIC "-//W3C//ELEMENTS SMIL 2.0 Timing//EN" PR-smil20-20010605/SMIL-timing.mod -- +-- PUBLIC "-//W3C//ELEMENTS SMIL 2.0 Transition//EN" PR-smil20-20010605/SMIL-transition.mod -- +-- PUBLIC "-//W3C//ELEMENTS SMIL 2.0 Integration//EN" PR-smil20-20010605/SMIL-integrate.mod -- + --- w3c-markup-validator-0.6.7.orig/catalog/sgml.dcl +++ w3c-markup-validator-0.6.7/catalog/sgml.dcl @@ -0,0 +1,81 @@ + --- w3c-markup-validator-0.6.7.orig/catalog/sgml.soc +++ w3c-markup-validator-0.6.7/catalog/sgml.soc @@ -0,0 +1,151 @@ + + -- SGML catalog for the W3C MarkUp Validation Service -- + -- $Id: sgml.soc,v 1.3 2002/10/27 15:36:19 ville Exp $ -- + +OVERRIDE YES -- prefer public identifiers to system identifiers -- +SGMLDECL /usr/share/sgml/html/dtd/4.01/HTML4.decl + + -- ISO latin 1 entity set for HTML -- +-- PUBLIC "-//IETF//ENTITIES Added Latin 1 for HTML//EN" html-latin.sgml -- +-- PUBLIC "ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML" ISOlat1.ent -- +-- PUBLIC "ISO 8879-1986//ENTITIES Added Latin 1//EN" ISOlat1.ent -- + + -- fake HTMLicons reference -- +-- PUBLIC "-//IETF//ENTITIES icons for HTML//EN" html-icons.sgml -- + + -- fake HTMLmath reference -- +-- PUBLIC "-//IETF//ENTITIES Math and Greek for HTML//EN" html-math.sgml -- + + -- Ways to refer to Level 3: most general to most specific -- +PUBLIC "-//IETF//DTD HTML//EN//3.0" /usr/share/sgml/html/dtd/html-3.dtd +PUBLIC "-//W3O//DTD W3 HTML 3.0//EN//" /usr/share/sgml/html/dtd/html-3.dtd +PUBLIC "-//W3O//DTD W3 HTML 3.0//EN" /usr/share/sgml/html/dtd/html-3.dtd +PUBLIC "-//W3C//DTD HTML 3 1995-03-24//EN" /usr/share/sgml/html/dtd/html-3.dtd +PUBLIC "-//IETF//DTD HTML 3.0//EN" /usr/share/sgml/html/dtd/html-3.dtd +PUBLIC "-//IETF//DTD HTML 3.0//EN//" /usr/share/sgml/html/dtd/html-3.dtd +PUBLIC "-//IETF//DTD HTML Level 3//EN" /usr/share/sgml/html/dtd/html-3.dtd +PUBLIC "-//IETF//DTD HTML Level 3//EN//3.0" /usr/share/sgml/html/dtd/html-3.dtd + + -- AdvaSoft's more up-to-date DTD for 3.0; see html-3-as.dtd for info -- +-- PUBLIC "-//AS//DTD HTML 3.0 asWedit + extensions//EN" html-3-as.dtd -- +-- PUBLIC "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//EN" html-3-as.dtd -- + + -- Ways to refer to strict Level 3: most general to most specific -- +PUBLIC "-//IETF//DTD HTML Strict//EN//3.0" /usr/share/sgml/html/dtd/html-3s.dtd +PUBLIC "-//W3O//DTD W3 HTML Strict 3.0//EN//" /usr/share/sgml/html/dtd/html-3s.dtd +PUBLIC "-//IETF//DTD HTML Strict Level 3//EN" /usr/share/sgml/html/dtd/html-3s.dtd +PUBLIC "-//IETF//DTD HTML Strict Level 3//EN//3.0" /usr/share/sgml/html/dtd/html-3s.dtd + + -- Ways to refer to Level 2: most general to most specific -- +PUBLIC "HTML" /usr/share/sgml/html/dtd/html.dtd +PUBLIC "-//IETF//DTD HTML//EN" /usr/share/sgml/html/dtd/html.dtd +PUBLIC "-//IETF//DTD HTML//EN//2.0" /usr/share/sgml/html/dtd/html.dtd +PUBLIC "-//IETF//DTD HTML 2.0//EN" /usr/share/sgml/html/dtd/html.dtd +PUBLIC "-//IETF//DTD HTML Level 2//EN" /usr/share/sgml/html/dtd/html.dtd +PUBLIC "-//IETF//DTD HTML Level 2//EN//2.0" /usr/share/sgml/html/dtd/html.dtd +PUBLIC "-//IETF//DTD HTML 2.0 Level 2//EN" /usr/share/sgml/html/dtd/html.dtd + + -- Ways to refer to Level 1: most general to most specific -- +PUBLIC "-//IETF//DTD HTML Level 1//EN" /usr/share/sgml/html/dtd/html-1.dtd +PUBLIC "-//IETF//DTD HTML Level 1//EN//2.0" /usr/share/sgml/html/dtd/html-1.dtd +PUBLIC "-//IETF//DTD HTML 2.0 Level 1//EN" /usr/share/sgml/html/dtd/html-1.dtd + + -- Ways to refer to Level 0: most general to most specific -- +PUBLIC "-//IETF//DTD HTML Level 0//EN" /usr/share/sgml/html/dtd/html-0.dtd +PUBLIC "-//IETF//DTD HTML Level 0//EN//2.0" /usr/share/sgml/html/dtd/html-0.dtd + + -- Ways to refer to Strict Level 2 -- +PUBLIC "-//IETF//DTD HTML Strict//EN" /usr/share/sgml/html/dtd/html-s.dtd +PUBLIC "-//IETF//DTD HTML Strict//EN//2.0" /usr/share/sgml/html/dtd/html-s.dtd +PUBLIC "-//IETF//DTD HTML Strict Level 2//EN" /usr/share/sgml/html/dtd/html-s.dtd +PUBLIC "-//IETF//DTD HTML Strict Level 2//EN//2.0" /usr/share/sgml/html/dtd/html-s.dtd +PUBLIC "-//IETF//DTD HTML 2.0 Strict//EN" /usr/share/sgml/html/dtd/html-s.dtd +PUBLIC "-//IETF//DTD HTML 2.0 Strict Level 2//EN" /usr/share/sgml/html/dtd/html-s.dtd + + -- Ways to refer to Strict Level 1 -- +PUBLIC "-//IETF//DTD HTML Strict Level 1//EN" /usr/share/sgml/html/dtd/html-1s.dtd +PUBLIC "-//IETF//DTD HTML Strict Level 1//EN//2.0" /usr/share/sgml/html/dtd/html-1s.dtd +PUBLIC "-//IETF//DTD HTML 2.0 Strict Level 1//EN" /usr/share/sgml/html/dtd/html-1s.dtd + + -- Ways to refer to Strict Level 0 -- +PUBLIC "-//IETF//DTD HTML Strict Level 0//EN" /usr/share/sgml/html/dtd/html-0s.dtd +PUBLIC "-//IETF//DTD HTML Strict Level 0//EN//2.0" /usr/share/sgml/html/dtd/html-0s.dtd + + -- Ways to refer to Netscape extensions HTML -- +PUBLIC "-//WebTechs//DTD Mozilla HTML//EN" /usr/share/sgml/html/dtd/html-mcom.dtd +PUBLIC "-//WebTechs//DTD Mozilla HTML 2.0//EN" /usr/share/sgml/html/dtd/html-mcom.dtd +PUBLIC "-//Netscape Comm. Corp.//DTD HTML//EN" /usr/share/sgml/html/dtd/html-mcom.dtd +PUBLIC "-//Netscape Comm. Corp.//DTD HTML//EN" /usr/share/sgml/html/dtd/html-mcom.dtd +PUBLIC "-//Netscape Comm. Corp.//DTD Strict HTML//EN" /usr/share/sgml/html/dtd/html-mcoms.dtd + + -- Ways to refer to Microsoft extensions HTML -- +-- PUBLIC "-//Microsoft//DTD Internet Explorer 2.0 HTML//EN" iehtml.dtd -- +-- PUBLIC "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//EN" iehtml-s.dtd -- +-- PUBLIC "-//Microsoft//DTD Internet Explorer 2.0 Tables//EN" ietables.dtd -- +-- PUBLIC "-//Microsoft//DTD Internet Explorer 3.0 HTML//EN" ie30.dtd -- +-- PUBLIC "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//EN" ie30-s.dtd -- +-- PUBLIC "-//Microsoft//DTD Internet Explorer 3.0 Tables//EN" ie30tables.dtd -- + + -- Ways to refer to Sun Microsystems HotJava extensions -- +PUBLIC "-//Sun Microsystems Corp.//DTD HotJava HTML//EN" /usr/share/sgml/html/dtd/html-hj.dtd +PUBLIC "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//EN" /usr/share/sgml/html/dtd/html-hjs.dtd + + -- Ways to refer to HTML 2.1 variants -- +-- PUBLIC "-//IETF//DTD HTML 2.1E//EN" html-2.1e.dtd -- + + -- O'Reilly & Associates DTD; see http://www.ora.com/standards/html/ -- +-- PUBLIC "-//O'Reilly and Associates//DTD HTML Extended 1.0//EN" oreilly-html.dtd -- +-- PUBLIC "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//EN" oreilly-html-relaxed.dtd -- +-- PUBLIC "-//O'Reilly and Associates//DTD HTML 2.0//EN" oreilly-rfc1866.dtd -- +-- PUBLIC "-//O'Reilly and Associates//ELEMENTS Table Fragment 1.0//EN" oreilly-draft-table.elements -- +-- PUBLIC "-//O'Reilly and Associates//ENTITIES Additional HTML 1.0//EN" oreilly-additional.entities -- + + -- Ways to Refer to SoftQuad HTML 2.0 + extensions -- +-- PUBLIC "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//EN" html-sq.dtd -- + + -- Spyglass HTML 2.0 Extended DTD -- +-- PUBLIC "-//Spyglass//DTD HTML 2.0 Extended//EN" spyglass/html2x.dtd -- +-- PUBLIC "ISO/IEC 10179:1995//NOTATION DSSSL Style Language//EN" spyglass/dsssl.not -- +-- PUBLIC "-//IETF//NOTATION W3C Style Language//EN" spyglass/w3csty.not -- +-- PUBLIC "-//IETF//NOTATION Cascading Style Sheet Language//EN" spyglass/css.not -- + + -- "HTML Pro" -- +-- PUBLIC "ISO 8879-1986//ENTITIES Added Latin 2//EN" pro/usr/local/lib/sgml/iso_8879-1986/entities/added_latin_2 -- +-- PUBLIC "ISO 8879-1986//ENTITIES Publishing//EN" pro/usr/local/lib/sgml/iso_8879-1986/entities/publishing -- +-- PUBLIC "ISO 8879-1986//ENTITIES General Technical//EN" pro/usr/local/lib/sgml/iso_8879-1986/entities/general_technical -- +-- PUBLIC "ISO 8879-1986//ENTITIES Numeric and Special Graphic//EN" pro/usr/local/lib/sgml/iso_8879-1986/entities/numeric_and_special_graphic -- +-- PUBLIC "+//Silmaril//DTD HTML Pro v0r11 19970101//EN" pro/html/dtds/htmlpro.dtd -- + + -- experimental DTDs -- +-- PUBLIC "-//W3C//DTD HTML Experimental 19960712//EN" pro/usr/local/lib/sgml/w3c/dtd/html_experimental_19960712 -- + + -- HTML 3.2 -- +PUBLIC "-//W3C//DTD HTML 3.2//EN" /usr/share/sgml/html/dtd/html-3.2.dtd +PUBLIC "-//W3C//DTD HTML 3.2 Final//EN" /usr/share/sgml/html/dtd/html-3.2.dtd +PUBLIC "-//W3C//DTD HTML 3.2 Draft//EN" /usr/share/sgml/html/dtd/html-3.2.dtd + + -- HTML 3.2 + Style -- +PUBLIC "-//W3C//DTD HTML Experimental 970421//EN" /usr/share/sgml/html/dtd/html-970421.dtd +PUBLIC "-//W3C//DTD HTML 3.2S Draft//EN" /usr/share/sgml/html/dtd/html-970421.dtd + + -- HTML 4.0 -- +PUBLIC "-//W3C//DTD HTML 4.0//EN" /usr/share/sgml/html/dtd/4.0/strict.dtd +PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" /usr/share/sgml/html/dtd/4.0/loose.dtd +PUBLIC "-//W3C//DTD HTML 4.0 Frameset//EN" /usr/share/sgml/html/dtd/4.0/frameset.dtd + + -- HTML 4.01 -- +PUBLIC "-//W3C//DTD HTML 4.01//EN" /usr/share/sgml/html/dtd/4.01/strict.dtd +PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" /usr/share/sgml/html/dtd/4.01/loose.dtd +PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" /usr/share/sgml/html/dtd/4.01/frameset.dtd +PUBLIC "-//W3C//ENTITIES Latin1//EN//HTML" /usr/share/sgml/html/entities/HTMLlat1.ent +PUBLIC "-//W3C//ENTITIES Latin 1//EN//HTML" /usr/share/sgml/html/entities/HTMLlat1.ent +PUBLIC "-//W3C//ENTITIES Special//EN//HTML" /usr/share/sgml/html/entities/HTMLspecial.ent +PUBLIC "-//W3C//ENTITIES Symbols//EN//HTML" /usr/share/sgml/html/entities/HTMLsymbol.ent + + -- ISO-HTML -- +PUBLIC "ISO/IEC 15445:2000//DTD HyperText Markup Language//EN" /usr/share/sgml/html/dtd/iso-15445/15445.dtd +PUBLIC "ISO/IEC 15445:2000//DTD HTML//EN" /usr/share/sgml/html/dtd/iso-15445/15445.dtd +PUBLIC "-//W3C//ENTITIES Full Latin 1//EN//HTML" /usr/share/sgml/html/entities/HTMLlat1.ent +PUBLIC "-//W3C//ENTITIES Symbolic//EN//HTML" /usr/share/sgml/html/entities/HTMLsymbol.ent + +-- PUBLIC "-//bebop.net//DTD HTML Apple Help 1.0//EN" AppleHelp1.0.dtd --