#!/usr/bin/perl -w # csv2tab.pl--Convert CSV to TAB delimited # $Id: csv2tab.pl 1521 2008-09-27 20:01:13Z jp $ # $URL: file:///i:/home/SVN/util/csv2tab.pl $ my $VERSION = '$Version: 3.5 $'; my $COPYRIGHT = 'Copyright 2002-2008 JP Vossen (http://www.jpsdomain.org/)'; my $LICENSE = 'GNU GENERAL PUBLIC LICENSE'; my $USAGE = ''; # Placeholder for usage info below #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ((my $PROGRAM = $0) =~ s/^.*(\/|\\)//ig); # remove up to last "\" or "/" # This sub is here for quick documentation purposes. Other subs at bottom. sub Usage { # Called like: Usage ({exit code}) my $exit_code = $_[0] || 1; # Default of 1 if exit code not specified # Unlike sh, Perl does not have a built in way to skip leading # TABs (but not spaces) to allow indenting in HERE docs. So we cheat. ($USAGE = sprintf <<"EoN") =~ s/^\t//gm; NAME ${PROGRAM}--Convert CSV to TAB delimited SYNOPSIS $PROGRAM [OPTIONS] [-i | -w] [-o | -W] OPTIONS -i = Input file (otherwise STDIN) -o = Output file (otherwise STDOUT) -w = Take input from the Windows Clipboard instead of a file. -W = Write output to the Windows Clipboard instead of a file. -d {delimiter} = Use the specified delimiter instead of TAB. -h = This usage -v = Be verbose -V = Show version, copyright and license information -q = Ignored for backward compatability. Examples: $PROGRAM -i file | cut -f 5 DESCRIPTION ($VERSION) Parse a CSV formatted file and output a TAB delimited file. This is very handy for working with data in a spreadsheet (it can be faster than dealing with the spreadsheet's import CSV wizard) or for using tools like 'cut -d' which will not work when a field contains a CSV legally quoted comma. AUTHOR / BUG REPORTS JP Vossen (jp {at} jpsdomain {dot} org) http://www.jpsdomain.org/ COPYRIGHT & LICENSE $COPYRIGHT $LICENSE SEE ALSO "Parsing CSV files" on page 212 of Mastering Regular Expressions, 2nd (http://regex.info/ and http://www.oreilly.com/catalog/regex2/index.html) EoN print STDERR ("$USAGE"); # Print the usage exit $exit_code; # exit with the specified error code } # end of usage # Declare everything to keep -w and use strict happy my ($INFILE, $OUTFILE, $aline, $delimiter, $outline, @arecord); our ($opt_i, $opt_o, $opt_w, $opt_W, $opt_h, $opt_v, $opt_V, $opt_d); use strict; use Getopt::Std; getopts('i:o:wWhvVdq'); Usage(0) if $opt_h; Version(0) if $opt_V; # Set output delimiter (input is CSV) $delimiter=$opt_d||"\t"; # Use the specified delimiter or TAB Open_IO(); # Open input and outfile files if ($opt_v) { print STDERR ("$PROGRAM version $VERSION\n\t$COPYRIGHT\n\t$LICENSE\n"); } #print ("Starting at ", strftime("%Y-%m-%d %H:%M:%S %z", localtime), "\n"); #print ("Starting at ", strftime("%Y-%m-%d %H:%M:%S", gmtime), " UTC \n"); ########################################################################## # Main while ($aline = <$INFILE>) { chomp($aline); ### @arecord = quotewords(",", $KeepSep, $aline); @arecord = &parse_csv_mre2 ($aline); $outline = join ($delimiter, @arecord); print $OUTFILE "$outline\n"; } # end of while input # End of main ########################################################################## if ($opt_W) { Send_to_Clipboard(); } # Send output directly into the Clipboard if ($opt_v) { print STDERR ("\n\a$PROGRAM finished in ",time()-$^T," seconds.\n"); } # Subroutines #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Emit version and other information # Called like: Version ({exit code}) # Returns: nothing sub Version { my $exit_code = $_[0] || 1; # Default of 1 if exit code not specified print ("$PROGRAM version $VERSION\n\t$COPYRIGHT\n\t$LICENSE\n"); exit $exit_code; # exit with the specified error code } # end of sub Version #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Open input and output files, STDIN/STDOUT or Windows Clipboard # Called like: Open_IO() # Returns: nothing sub Open_IO { if (! $opt_i) { $opt_i = "-"; } # If no input file specified, use STDIN if (! $opt_o) { $opt_o = "-"; } # If no output file specified, use STDOUT # Input if ($opt_w and ($^O eq "MSWin32")) { # If we're getting input from the Windows Clipboard eval "use Win32::Clipboard;"; # Import clipboard but don't die if we're not on Windows my $cboard = Win32::Clipboard::GetText(); # (Have to) Read entire clipboard contents $cboard =~ s/\r//g; # Remove odd CRs ("\r"), if any, the clipboard sticks in # Dump CDB into a secure temp file that's automatically deleted when we're finished, # then rewind it to the main look can read $INFILE as normal. use File::Temp; $INFILE = tmpfile() || die ("$PROGRAM: error creating temp file for -w: $!\n"); print $INFILE ("$cboard"); seek($INFILE, 0, 0) or die ("$PROGRAM: error couldn't rewind temp INPUT file: $!\n"); } elsif ($opt_w and ($^O ne "MSWin32")) { die ("$PROGRAM: can't use -w on Linux or Unix! What're you thinking?!?\n"); } else { # Regular old input open ($INFILE, "$opt_i") or die ("$PROGRAM: error opening '$opt_i' for input: $!\n"); } # end of get input from clipboard # Output if ($opt_W and ($^O eq "MSWin32")) { # We're sending the output directly into the Clipboard eval "use Win32::Clipboard;"; # Import clipboard but don't die if we're not on Windows # Use a secure temp file that's automatically deleted when we're finished. use File::Temp; $OUTFILE = tmpfile() || die ("$PROGRAM: error creating temp file for -W: $!\n"); } elsif ($opt_W and ($^O ne "MSWin32")) { die ("$PROGRAM: can't use -W on Linux or Unix! What're you thinking?!?\n"); } else { # Regular old output # Note use of indirect file handle (e.g. '$' on $OUTFILE), needed for temp file open ($OUTFILE, ">$opt_o") or die ("$PROGRAM: error opening '$opt_o' for output: $!\n"); } # end of if using clipboard } # end of sub Open_IO #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # We're sending the output directly into the Clipboard # Called like: Send_to_Clipboard() # Returns: nothing sub Send_to_Clipboard { seek($OUTFILE, 0, 0) or die ("$PROGRAM: error couldn't rewind temp OUTPUT file: $!\n"); undef ($/); # Undefine the input line terminator so we grab the whole thing my $cboard = <$OUTFILE>; # Grab it ALL Win32::Clipboard::Set("$cboard"); # Send it to the clipboard } # end of sub Send_to_Clipboard #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Regex to parse CSV from _Mastering_Regular_Expressions,_Second_Edition_; # page 271. See http://regex.info/ esp. http://regex.info/dlisting.cgi?id=1253) # Called like: @arecord = &parse_csv_mre2 ($aline); # Returns: an array of the parsed line sub parse_csv_mre2 { if (scalar @_ == 0) { return(); } my $line = $_[0]; my @parsedline = (); my $field = ''; # See top for details about the regex while ($line =~ m{ \G(?:^|,) (?: # Either a double-quoted field (with "" for each ")... " # field's opening quote ( (?> [^"]* ) (?> "" [^"]* )* ) " # field's closing quote # ..or... | # ... some non-quote/non-comma text.... ( [^",]* ) ) }gx) { # OK, done with regex, NOW what... if (defined $2) { # Got some non-quote/non-comma text $field = $2; } else { # Got escaped quotes and stuff $field = $1; $field =~ s/""/"/g; } push (@parsedline, $field); } # end of while block return (@parsedline); } # end of sub parse_csv_mre2