#!/usr/bin/perl $VERSION = '1.03'; # Cyrillic Mail Filter # Version 1.03 # Part of "Cyrillic Software Suite" # Get docs and newest version from # http://www.neystadt.org/cyrillic/ # # Copyright (c) 1997-98, John Neystadt # You may install this script on your site for free # To obtain permision for redistribution or any other usage # contact john@neystadt.org. # # Drop me a line if you deploy this script on your site. =head1 NAME cyr-mail-filter.pl v1.03 - Cyrillic Mail Filter to convert email across cyrillic charsets =cut use Mail::Internet; use Mail::Field; use MIME::Field::ContType; use MIME::Words; use Lingua::DetectCharset; use Convert::Cyrillic; use Getopt::Std; $ScriptName = "Cyrillic Mail Filter"; $ScriptVersion = $VERSION; if ($#ARGV == -1) { Usage (); } =head1 DESCRIPTION Copyright (c) 1998 John Neystadt (http://www.neystadt.org/cyrillic/). Filters email messages and converts the content across cyrillic charsets accrodingly to specified options. This script addresses the following problems of cyrillization: =over =item * Some people can not read cyrillic and need to get mail transliterated. =item * Most mail software does not sets correctly the B tag in headers. =item * Many mail readers can't handle cyrillic in B field. =back =head1 SYNOPSIS cyr-mail-filter.pl [B<-s>] B<-f>[] B<-t> =head1 OPTIONS =over =item -s Always transliterate subject =item -f{AUTO,WIN,KOI8,ISO} If _F_rom charset is not specified, will use charset from 'Content-Type' header for conversions. Otherwise will ignore the header. AUTO will detect the charset automatically from email body. WIN, KOI8 or ISO will enforce the source charset accordingly. =item -t{VOL,WIN,KOI8,ISO} Convert text _T_o charset, where charset is one of WIN, KOI8, ISO or VOL (e.g. volapuk or translit). =back =cut sub Usage { print STDERR " $ScriptName v$ScriptVersion. Copyright (c) 1998 John Neystadt (http://www.neystadt.org/cyrillic/). Filters email messages and converts the content across cyrillic charsets accrodingly to specified options. USAGE: [-s] -f[] -t -s - Always transliterate _S_ubject. -f{AUTO,WIN,KOI8,ISO} - If _F_rom charset is not specified, will use charset from 'Content-Type' header for conversions. Otherwise will ignore the header. AUTO will detect the charset automatically from email body. WIN, KOI8 or ISO will enforce the source charset accordingly. -t{VOL,WIN,KOI8,ISO} - Convert text _T_o charset, where charset is one of WIN, KOI8, ISO or VOL (e.g. volapuk or translit).\n"; exit (1); } my %opts; getopt ("stf", \%opts); my $paramTranslitSubj = 1 if exists $opts {'s'}; my $paramDstEnc = $opts {'t'}; my $paramSrcEnc = $opts {'f'} || 1 if exists $opts {'f'}; my $msg = new Mail::Internet (\*STDIN); my $header = $msg->head(); my $body = $msg->body (); my $ContentType = Mail::Field->extract ('Content-Type', $header); $ContentType = new Mail::Field ('Content-Type', 'text/plain') if $ContentType == undef; my $SrcEnc; if ($paramSrcEnc eq 'AUTO') { $SrcEnc = Lingua::DetectCharset::Detect (join (' ', @{$body})); # Disable any translations if the email is not in cyrillic $paramDstEnc = 'VOL' if $SrcEnc eq 'ENG'; } elsif ($paramSrcEnc eq 'WIN' || $paramSrcEnc eq 'KOI8' || $paramSrcEnc eq 'ISO') { $SrcEnc = $paramSrcEnc; } elsif ($paramSrcEnc == 1) { my %Cs2Enc = ('koi8-r' => 'KOI8', 'windows-1251' => 'WIN', 'iso-8859-5' => 'ISO'); $SrcEnc = $Cs2Enc {$ContentType->charset}; $SrcEnc = 'VOL' if !$SrcEnc; } else { Usage (); } my ($DstCharset, $DstEnc); if ($paramDstEnc eq 'WIN' || $paramDstEnc eq 'KOI8' || $paramDstEnc eq 'VOL' || $paramDstEnc eq 'ISO') { my %Enc2Cs = ('KOI8' => 'koi8-r', 'WIN' => 'windows-1251', 'ISO' => 'iso-8859-5', 'VOL' => undef); $DstEnc = $paramDstEnc; $DstCharset = $Enc2Cs {$DstEnc}; } else { Usage (); } if ($SrcEnc ne 'ENG') { for (0..$#{$body}) { ${$body} [$_] = Convert::Cyrillic::cstocs ($SrcEnc, $DstEnc, ${$body} [$_]); } my $Subject = MIME::Words::decode_mimewords ($header->get ('Subject')); $header->replace ('Subject', Convert::Cyrillic::cstocs ($SrcEnc, $paramTranslitSubj ? 'VOL' : $DstEnc, $Subject)); } $header->replace ('X-Removed-Content-Type', $ContentType->stringify); $ContentType->charset ($DstCharset); $header->replace ('Content-Type', $ContentType->stringify); $header->add ('X-Converted-By', "$ScriptName v$ScriptVersion ($SrcEnc -> $DstEnc), get your own from http://www.neystadt.org/cyrillic/"); $msg->print (); __END__ =head1 EXAMPLES On UNIX to create mail account B, which automatically translates all incoming mail, you can use the following: to-win: "|./cyr-mail-filter.pl -fAUTO -tWIN -s | resend my-email@jopa.org" Good use also is to create majordomo mailing list that auto-transliterates all incoming mail: ksp-lat: "|/usr/lib/majordomo/wrapper ./cyr-mail-filter.pl -fAUTO -tVOL -s | ./resend -l ksp-lat ksp-lat-resend" =head1 PREREQUISITES This script requires the C, C, C and C modules available from CPAN or at http://www.neystadt.org/cyrillic/. =pod OSNAMES any =cut