#!/usr/bin/perl # GenSite--Generate a static HTML-only web site from templates # $Id: GenSite.pl 1.12 2003/12/22 06:27:43 JP Exp root $ # $Log: GenSite.pl $ # Revision 1.12 2003/12/22 06:27:43 JP # Added TO DO notes, changed www4 to www for production! # # Revision 1.11 2003/12/01 07:26:35 JP # Moved &CheckLinks after GenSite variable interpolation. # Made find (-f) case insensive. # Moved body margin to CSS from HTML # # Revision 1.10 2003/11/23 07:14:33 JP # Now calculate level once and save it # Fix broken link count off by 1 # # Revision 1.9 2003/11/23 04:31:12 JP # Add s///s to CheckLinks find URL regex # Renamed $anchor to $url in Checklinks to make more sense # Renamed $text to $label in Checklinks to make more sense # Bugfix to correctly omit CVS/RCS directories # Bugfix for -b in usage and options # # Revision 1.8 2003/11/17 07:49:13 JP # Changed $TemplateDir to www.pages after moving template into CVS # Changed EM to JPATjpsdomainDOTcom # Added subrouting name to more error messages. # # Revision 1.7 2003/11/13 08:03:04 JP # Allow $WebSiteDir and $TemplateDir to be the same for easy admin # Allow -f without -r, and display the results # Add s///s to -f (so-called 'single line mode', but really lets . match \n) # Started to add another debug level # # Revision 1.6 2003/11/12 07:51:55 JP # Added search and replace function # A little misc cleanup and corrections # # Revision 1.5 2003/11/10 07:36:47 JP # Added link count feedback at end of run time # Added better link checking run time feedback # Added "global" flag for page interpolation substitutions (bug fix) # # Revision 1.4 2003/11/09 10:24:57 JP # Added 'meta name=GENERATOR' # Added ability to suppress missing meta Keyword or Desc warnings by using a dash in the template # Added ability to only process one file with -i (for template testing) # Added simple local link (file) and remote (URL) checking # # Revision 1.3 2003/11/06 07:42:18 JP # Re-implementednterpolation safely # Allow pages to be generated but UNLISTED in the nav menu # Fix level counting # # Revision 1.2 2003/11/05 07:23:58 JP # Allow in NavOrder.txt to refer to external sites in Nav # Suppress the warning about entries in NavOrder without pages if trailing - # Added the "more comments" section # Added the interpolation code, but that needs work # # Revision 1.1 2003/11/04 07:18:02 JP # Made some fatal errors only warnings # Made HTML Meta stuff optional # Added a simple check for a body in the template # Added more comments # Fixed Nav Menu links for headings with no page # # Revision 1.0 2003/11/03 07:51:42 JP # Initial Revision # ########################################################################## ##### TO DOs # # * Only copy CSS file if necessary # * Finish adding another debug level (now 0-3) # * Change variables to better/more meaningful names # * Change to a generic open and slurp file subroutine? # * WARN when external link not and target=_blank, except if $Sitexxxx and $HTMLxxxx # * Add local bookmark checks # * Debug why some sites don't work with RLC. # LWP User-Agent setting??? # Add RCL-Ignore.txt??? # * Correct handling for & in URLs # ########################################################################## $ver = '$Revision: 1.12 $'; # JP Vossen ########################################################################## (($myname = $0) =~ s/^.*(\/|\\)//ig); # remove up to last "\" or "/" $Greeting = ("$myname $ver Copyright 2003 JP Vossen (http://www.jpsdomain.org/)\n"); $Greeting .= (" Licensed under the GNU GENERAL PUBLIC LICENSE:\n"); $Greeting .= (" See http://www.gnu.org/copyleft/details.\n"); if (("@ARGV" =~ /\?/) || ("@ARGV" =~ / -h/) || "@ARGV" =~ / --help/) { print STDERR ("\n$Greeting\n\n"); print STDERR <<"EoN"; # Usage notes Usage: $myname [OPTIONS] -w {website} = Web site destination directory (www). -i {page} = Single template to process. -t {template} = Template source directory (www.pages). -s {sitename} = Name of the site (jpsdomain.org). -u {site URL} = Pubic URL of site (http://www.jpsdomain.org/). -c {CSS file} = The CSS file (jpsdomain.css). -b {BG file} = The body cell background image file. -S {suffix} = HTML page suffic (html). -T {suffix} = Template file suffic (page). -e {name} = Contact E-Mail Name (jp{at}jpsdo{at}jpsdomain{dot}org). -I {on | off} = Set page body variable interpolation on or off. -l {local | remote | both | off } = Do some simple link checking (both). -f {term} -r {term} = Global ind and replace a string in the templates. -D {0-3} = Print debug messages to STDERR (0=off, 2=most verbose). -q = Be quiet about it. Generate a static HTML-only web site from template pages. Runs on Windows and UNIX. Can regenerate a 'live' site if it has access to the live destination directory, or can run in offline mode to regenerate files for upload to a hosting site. ONLY deals with HTML files and will only overwrite destination HTML files for which there is a source template. In other words, binary or other files such as images are NOT dealt with, and you can have HTML pages in the destination which are outside the scope of GenSite.pl. The Navigation menu on the left can be sorted alphabetically, or in a defined order. If you do not want a defined order, edit this script and uncomment '\$NavOrderPage = undef;'. If NavOrderPage (-n) is defined but the file does not exist or is empty, it will be created in alphabetical order as a template. Edit the file as needed and re-run. The Navigation menu is limited to two levels, a top level and a sub-level. Top level items are bold, sub-level items are not bold, and are prefixed with '-'. The level is indicated in the template file by prefixing the Nav Name with a dash (-). If page body variable interpolation (which is implemented as a simple substitution) is enabled it allows the use of the following variables in the body of templated pages: $SiteName, $SiteURL, $EmailName, $EmailAddress and $SiteCopyright. It will slow down program execution a little bit, and will get confused if any of the above are listed in your pages. Search and replace is the only routine that touches the template files, so be CAREFUL with it. Make backups! You have been warned! BUGS: I got a little carried away with using dashes in the templates files to mean special things. EoN die ("\n"); } # end of usage ########################################################################## ##### More Comments # This is a very simple program created by me, for me. It works for my # needs for my site, but I have specific needs and constraints. For more # full featured Perl-based template web site creation tools, check out: # http://perl.apache.org/embperl/ # http://www.masonhq.com/ # # Or, use an entire web site framework like: # http://mamboserver.com/ # http://slashcode.com/ # # My main constraint is that the ISP where most of the site is hosted does # not support SSI, CGI, or anything else that would facilitate a modern # site. I have various reasons for staying there, not the least of which is # inertia. So I needed a simple solution that would create a navigable site # without any those technologies. Also, after the last redesign JavaScript # fiasco, I wanted totally PURE HTML. Since the site is mostly static, that # was fine, except for being able to actually maintain it. So I wrote this. # # The other trivia is that the site (www-t and www) directories are hosted # on Linux, running Samba. But I access them mostly from a Windows # workstation, which is where I run this script from as well. When the site # is regenerated, I can manually kick off an rsync from the Linux box to # the ISP. Or even better, I can edit the templates and do nothing. Cron on # the Linux box will re-gen the site and upload it as needed. Nice! # # Debug Levels (NOT FINISHED YET!!!) # 0 Off # 1 More detail # 2 Find or find and replace output # 3 ????? # 4 ????? ########################################################################## use Getopt::Std; # Use Perl5 built-in program argument handler getopts('w:i:t:s:u:c:b:N:S:T:i:l:f:r:D:q'); # Define possible args. use File::Find; # Used to find the files in the templates dir use File::Path; # Used to make directories use File::Copy; # Used to copy files # Set defaults # The following two can be the same directory, for easy admin, or # different dirs, for easy use with CVS. Do what ever is easier and make # more sense to you. $WebSiteDir = $opt_w || 'www'; # Destination dir for gen templates $TemplateDir = $opt_t || 'www.pages'; # Source dir for templates $SinglePage = $opt_i; # Single template to process $SiteName = $opt_s || 'jpsdomain.org'; $SiteURL = $opt_u || 'http://www.jpsdomain.org/'; $HTMLSuffix = $opt_S || 'html'; # Could be 'htm' also $TemplateSuffix = $opt_T || 'page'; # Template file extension $CSSPage = $opt_c || "jpsdomain.css"; # Name of CSS file $BodyCellBG = $opt_b || "images/blutxtr1.jpg"; # Name of BG image for 'body' cell $Google = $opt_g || "images/Google_Logo_40gry.gif"; # Google logo $NavOrderPage = $opt_n || "NavOrder.txt"; # Name of Nav Menu order file # $NavOrderPage = undef; # Set this for an alphabetical Nav menu $PageBodyInterp = $opt_I || "on"; $LinkCheck = $opt_l || "off"; # local, remote, both, off $EmailName = $opt_e || "JPATjpsdomainDOTcom"; $EmailAddress = $opt_E || "$EmailName"; $SiteCopyright = "Copyright © 1995-2003, JP Vossen. All rights reserved."; @Templates = (); # Global define for list of template files @Directories = (); # Global define for list of site directories @NavKeys = (); # Global define for Nav keys/sort order $PageBody = ''; # Global define for the body of the page %RemoteLinkStatus = ''; %LocalLinkStatus = ''; $BrokenLocalLinkCount = 1; # Start at 1 because it's easier to read $BrokenRemoteLinkCount = 1; # in the debug output when correcting errors $RemoteLinkCount = 0; # Have to subtract 1 in the counts though $LocalLinkCount = 0; ########################################################################## ##### MAIN if (! $opt_q) { print ("\n$Greeting\n"); } # Do a sanity check for find and replace if ((defined $opt_r) and (! $opt_f)) { die ("$myname: Can't have -r without -f!\n"); } # If we are just doing a single page, make sure it exists if (defined($SinglePage)) { if ($opt_D > 1) { warn ("$myname:main: SinglePage = '$SinglePage' before s!\!/!.\n"); } $SinglePage =~ s!\\!\/!g; # Replace DOS '\' with UNIX/Perl '/' if ($opt_D > 1) { warn ("$myname:main: SinglePage = '$SinglePage' after s!\!/!.\n"); } if (-f $SinglePage) { if (! $opt_q) { print ("Looking for single page '$SinglePage'...\n"); } } elsif (-f "$SinglePage.$TemplateSuffix") { if (! $opt_q) { print ("Looking for page '$SinglePage' + '.$TemplateSuffix'...\n"); } $SinglePage = "$SinglePage.$TemplateSuffix"; } else { die ("$myname: '$SinglePage' is not readable!\n"); } # end of file checks } # end of make sure single page exists if (! $opt_f) { # If doing find and replace, we don't do ANYTHING else... # Only load LWP if we have to--it's big if ($LinkCheck eq "remote") { use LWP::Simple qw(get head); } } # If we do a find and replace, we don't do ANYTHING else... if (! $opt_q) { print ("Building template list from '$TemplateDir/*.$TemplateSuffix' files...\n"); } find(\&BuildTemplateList, $TemplateDir); # Build a list of *.page files in the templates dirs if (! $opt_f) { # If doing find and replace, we don't do ANYTHING else... if (! $opt_q) { print ("Reading templates from '$TemplateDir/*.$TemplateSuffix' files...\n"); } foreach $template (@Templates) { &ReadTemplateFile(Details); # Read the details lines from each template } # end of reading template details if (! $opt_q) { print ("Building the master navigation menu...\n"); } &BuildMasterNav; # Build the master navigation table if (! $opt_q) { print ("Building directory structure in '$WebSiteDir'...\n"); } &MakeSiteDirs; # Make sure site sub-dirs exist if ($WebSiteDir ne $TemplateDir) { # Only need to copy if the dirs are DIFFERENT if (! $opt_q) { print ("Copying CSS file '$TemplateDir/$CSSPage' to '$WebSiteDir/$CSSPage'...\n"); } copy ("$TemplateDir/$CSSPage", "$WebSiteDir/$CSSPage") or die ("$myname:WriteHTMLFile could not copy $TemplateDir/$CSSPage to $WebSiteDir/$CSSPage: $!\n"); } # end of check to see if we copy the CSS file if (! $opt_q) { print ("Page body interpolation is: $PageBodyInterp.\n"); print ("Link checks are: $LinkCheck.\n"); if (($LinkCheck eq "remote") or ($LinkCheck eq "both")) { print ("Remote Link checks require internet connectivity and can be VERY SLOW!\n"); } # end of whine about remote link checks print ("Writing the new '$WebSiteDir/*.$HTMLSuffix' files...\n"); } # end of some user feedback } # If we do a find and replace, we don't do ANYTHING else... # Are we writing a single page, or the entire site? if (defined($SinglePage)) { if (! $opt_q) { print ("Processing single page '$SinglePage'...\n"); } @Templates = $SinglePage; # JUST do this page } # Single page or entire site? if (! $opt_f) { # If doing find and replace, we don't do ANYTHING else... foreach $template (@Templates) { &ReadTemplateFile(''); # Read the body of template file &WriteHTMLFile; # Write the generated web site page } # end of write the new site pages if (! $opt_q) { print ("\n\a$myname built ",@Templates+0," pages in ",time()-$^T," seconds.\n"); unless ($LinkCheck eq "off") { print ("\tChecked\tTotal\tGood\tBad\n"); } if (($LinkCheck eq "remote") or ($LinkCheck eq "both")) { print ("\tRemote\t$RemoteLinkCount\t", $RemoteLinkCount - $BrokenRemoteLinkCount - 1,"\t", $BrokenRemoteLinkCount - 1,"\n"); } # end of remote link summary if (($LinkCheck eq "local") or ($LinkCheck eq "both")) { print ("\tLocal\t$LocalLinkCount\t", $LocalLinkCount - $BrokenLocalLinkCount - 1,"\t", $BrokenLocalLinkCount - 1,"\n"); } # end of local link summary } # end of ending feedback } else { # Not that we have, somewhat clumsily, only done the things necessary # to prep for the find and replace, actually DO the work &FindandReplace; if (! $opt_q) { print ("\n\a$myname processed ",@Templates+0," pages in ",time()-$^T," seconds.\n"); } } # If we do a find and replace, we don't do ANYTHING else... ##### End of MAIN ########################################################################## #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ sub FindandReplace { my $termasfound; my $search = $opt_f; # Assign better variables if (defined $opt_r) { if (! $opt_q) { print ("Doing find and replace:\n"); } if ($opt_D > 0) { warn ("\n$myname:FindandReplace: 1 search = ~$search~, replace = ~$replace~\n"); } } else { if (! $opt_q) { print ("Doing find:\n"); } if ($opt_D > 0) { warn ("\n$myname:FindandReplace: 1 search = ~$search~\n"); } } # end of if we are doing a replace $search = eval "qq{$search}"; # Interpolate if (defined $opt_r) { $replace = $opt_r; $replace = eval "qq{$replace}"; # Interpolate if ($opt_D > 0) { warn ("\n$myname:FindandReplace: 2 search = ~$search~, replace = ~$replace~\n"); } } else { if ($opt_D > 0) { warn ("\n$myname:FindandReplace: 2 search = ~$search~\n"); } } # end of if we are doing a replace my $irs = $/; # Save the default input record separator undef($/); # Undefine input record separator foreach $template (@Templates) { $count = 0; # Just to be sure... # Open and read an entire template file if ($opt_D > 0) { warn ("\n$myname:FindandReplace: Reading template '$template'\n"); } open (TEMPLATE, "$template") or die ("$myname:FindandReplace: error opening '$template' for input: $!\n"); $Page =