# For the converting of Blogger archives into cleaner MT import files. # # A script by sysadmin1138 # # This script expires March 26th 2010, when Blogger terminates # the ability to use FTP to publish. At that point, wget/curl # will be needed to retrieve the files needed. You're on your # own for that. # # The Blogger template needs to be changed to the following lines: # # # EAUTHOR:::yourmtusername # ETITLE:::<$BlogItemSubject$> # EDATE:::<$BlogItemDateTime$> # EBODY:::<$BlogItemBody$> # # CAUTHOR:::<$BlogCommentAuthor$> # CDATE:::<$BlogCommentDateTime$> # CBODY:::<$BlogCommentBody$> # # # # This sets up the files the way the script expects. # Blogger Timestamps need to be in MM/DD/YYYY HH:MM AM/PM mode. use strict; my $SectionMarker="-----"; my $EntryMarker="\n--------"; my $InName; my $OutName; local *InFile; local *OutFile; my $EAuthor; my $CAuthor; my $AIndex; my $ATemp; my $Title; my $EDate; my $CDate; my $PCategory; my @Category; my $Label; my $Labels; my $LabelIndex; my $EndLabelIndex; my $LabelSize; my $CUrl; my $EBody; my $CBody; my $PLine; my @Split; my $spat=":::"; my $EntryCount=0; # Provide usage information if ($#ARGV != 1) { print "\nUsage:\tblogger2mt.pl \n\nExample:\n\tblogger2mt.pl 2001_10_01_archive.txt ~/www/cgi-bin/mt/import/2001_10_01_mtimport.txt\n\n"; die; } # Open the files we'll be working with $InName=$ARGV[0]; $OutName=$ARGV[1]; open (InFile, "<:utf8", $InName); open (OutFile, ">:utf8", $OutName); # Start processing the file! $PLine=readline(InFile); while (defined($PLine)) { chomp($PLine); @Split=split(/:::/, $PLine); if ($Split[0] eq "EAUTHOR") { # This is the first line of a section, so clear variables undef($EAuthor); undef($EDate); undef($Title); undef($EBody); undef(@Category); undef($PCategory); $EAuthor=$Split[1]; $EntryCount++; } elsif ($Split[0] eq "ETITLE") { $Title=$Split[1]; } elsif ($Split[0] eq "EDATE") { $EDate=$Split[1]; } elsif ($Split[0] eq "CAUTHOR") { undef($CUrl); undef($CAuthor); #Trim the terminal ">" of the Author line chop($Split[1]); $AIndex=rindex($Split[1], '>'); $CAuthor=substr($Split[1], $AIndex+1); $AIndex=index($CAuthor, '<'); $CAuthor=substr($CAuthor, 0, $AIndex); if ($Split[1] =~ /href/) { $AIndex=index($Split[1], 'href'); $CUrl=substr($Split[1], $AIndex+6); $AIndex=index($CUrl, '"'); $CUrl=substr($CUrl, 0, $AIndex); } } elsif ($Split[0] eq "CDATE") { $CDate=$Split[1]; } elsif ($Split[0] eq "CBODY") { $CBody=$Split[1]; # Since the Body is the last thing in the comment, commit the comment-block print (OutFile "\nCOMMENT:\n"); print (OutFile "AUTHOR: $CAuthor\n"); if (defined($CUrl)) { print (OutFile "URL: $CUrl\n") } print (OutFile "DATE: $CDate\n"); print (OutFile "$CBody\n"); print (OutFile "$SectionMarker\n"); } elsif ($Split[0] eq "EBODY") { # First, extract the body of the post from any attached labels $LabelIndex=index($Split[1], '

'); if ($LabelIndex != -1) { $EBody=substr($Split[1], 0, $LabelIndex); } else { $EBody=$Split[1]; } # Then parse any labels, turning them into categories $Labels=substr($Split[1], $LabelIndex); $LabelIndex=index($Labels, 'txt">'); while ($LabelIndex != -1) { $EndLabelIndex=index($Labels, '', $LabelIndex); $LabelSize=$EndLabelIndex - $LabelIndex; $Label=substr($Labels, $LabelIndex+5, $LabelSize-5); $LabelSize=push(@Category, $Label); if ($#Category == 0 ) { $PCategory=$Label; } $Labels=substr($Labels, $EndLabelIndex); $LabelIndex=index($Labels, 'txt">'); } # Finally, output the post so far. Comments will be handled separately. print (OutFile "$EntryMarker\nAUTHOR: $EAuthor\n"); print (OutFile "TITLE: $Title\n"); print (OutFile "STATUS: Draft\n"); if ($#Category > -1) { print (OutFile "PRIMARY CATEGORY: $PCategory\n"); for (my $i=0; $i <= $#Category; $i++) { print (OutFile "CATEGORY: $Category[$i]\n"); } } print (OutFile "DATE: $EDate\n$SectionMarker\n"); print (OutFile "BODY:\n$EBody\n"); print (OutFile "$SectionMarker\n"); print (OutFile "EXTENDED BODY:\n\n$SectionMarker\n"); print (OutFile "EXCERPT:\n\n$SectionMarker\n"); print (OutFile "KEYWORDS:\n\n$SectionMarker\n"); } $PLine=readline(InFile); } # Finalize the file print (OutFile "\n$EntryMarker\n"); # Close up files close InFile; close OutFile; print "Processed $EntryCount entries in file $InName.\n";