#!/usr/bin/perl

$VERSION = '1.02';

# Cyrillic Mail Filter
# Version 1.02
# Part of "Cyrillic Software Suite"
# Get docs and newest version from
#       http://www.neystadt.org/cyrillic/
#
# Copyright (c) 1997-98, John Neystadt <http://www.neystadt.org/john/>
# You may install this script on your site for free
# To obtain permision for redistribution or any other usage
#       contact john@neystadt.org.
#
# Drop me a line if you deploy this script on your site.

=head1 NAME

cyr-mail-filter.pl v1.02 - Cyrillic Mail Filter to convert email across cyrillic charsets

=cut

use Mail::Internet;
use Mail::Field;
use MIME::Field::ContType;
use Lingua::DetectCharset;
use Convert::Cyrillic;
use Getopt::Std;

$VERSION = '1.02';

$ScriptName = "Cyrillic Mail Filter";
$ScriptVersion = $VERSION;

if ($#ARGV == -1) {
	Usage ();
}

=head1 DESCRIPTION

Copyright (c) 1998 John Neystadt (http://www.neystadt.org/cyrillic/).
Filters email messages and converts the content across cyrillic charsets accrodingly to specified options.

This script addresses the following problems of cyrillization:

=over

=item *

Some people can not read cyrillic and need to get mail transliterated.

=item *

Most mail software does not sets correctly the B<Content-Type> tag in headers.

=item *

Many mail readers can't handle cyrillic in B<Subject> field.

=back

=head1 SYNOPSIS

cyr-mail-filter.pl [B<-s>] B<-f>[<charset>] B<-t><charset>

=head1 OPTIONS

=over

=item -s

Always transliterate subject

=item -f{AUTO,WIN,KOI8,ISO}

If _F_rom charset is not specified, will use charset from 'Content-Type' header for conversions. Otherwise will ignore the
header. AUTO will detect the charset automatically from email body. WIN, KOI8 or ISO will enforce the source charset 
accordingly.

=item -t{VOL,WIN,KOI8,ISO}

Convert text _T_o charset, where charset is one of WIN, KOI8, ISO or VOL (e.g. volapuk or translit).

=back

=cut

sub Usage {
	print STDERR "
$ScriptName v$ScriptVersion. Copyright (c) 1998 John Neystadt (http://www.neystadt.org/cyrillic/).
Filters email messages and converts the content across cyrillic charsets accrodingly to specified options.

USAGE: <filter> [-s] -f[<charset>] -t<charset>

	-s			- Always transliterate _S_ubject.
	-f{AUTO,WIN,KOI8,ISO}	- If _F_rom charset is not specified, will use charset from 'Content-Type' header
					for conversions. Otherwise will ignore the header. AUTO will detect the charset
					automatically from email body. WIN, KOI8 or ISO will enforce the source charset
					accordingly.
	-t{VOL,WIN,KOI8,ISO}	- Convert text _T_o charset, where charset is one of WIN, KOI8, ISO or
					VOL (e.g. volapuk or translit).\n";

	exit (1);
}

my %opts;
getopt ("stf", \%opts);

my $paramTranslitSubj = 1 if exists $opts {'s'};
my $paramDstEnc = $opts {'t'};
my $paramSrcEnc = $opts {'f'} || 1 if exists $opts {'f'};

my $msg = new Mail::Internet (\*STDIN); 
                                        
my $header = $msg->head();
my $body = $msg->body ();

my $ContentType = Mail::Field->extract ('Content-Type', $header);
$ContentType = new Mail::Field ('Content-Type', 'text/plain') 
	if $ContentType == undef;

my $SrcEnc;
if ($paramSrcEnc eq 'AUTO') {
	$SrcEnc = Lingua::DetectCharset::Detect (join (' ', @{$body}));

	# Disable any translations if the email is not in cyrillic
	$paramDstEnc = 'VOL' if $SrcEnc eq 'ENG'; 
} elsif ($paramSrcEnc eq 'WIN' || $paramSrcEnc eq 'KOI8' ||  $paramSrcEnc eq 'ISO') {
	$SrcEnc = $paramSrcEnc;
} elsif ($paramSrcEnc == 1) {
	my %Cs2Enc = ('koi8-r' => 'KOI8', 'windows-1251' => 'WIN', 'iso-8859-5' => 'ISO');
	$SrcEnc = $Cs2Enc {$ContentType->charset};
	$SrcEnc = 'VOL' if !$SrcEnc;
} else {
	Usage ();
}

my ($DstCharset, $DstEnc);

if ($paramDstEnc eq 'WIN' || $paramDstEnc eq 'KOI8' || $paramDstEnc eq 'VOL' || $paramDstEnc eq 'ISO') {
	my %Enc2Cs = ('KOI8' => 'koi8-r', 'WIN' => 'windows-1251', 'ISO' => 'iso-8859-5', 'VOL' => undef);
	$DstEnc = $paramDstEnc;
        $DstCharset = $Enc2Cs {$DstEnc};
} else {
	Usage ();
}

if ($SrcEnc ne 'ENG') {
	for (0..$#{$body}) {
		${$body} [$_] = Convert::Cyrillic::cstocs ($SrcEnc, $DstEnc, ${$body} [$_]);
	}

	my $Subject = $header->get ('Subject');
	$header->replace ('Subject', Convert::Cyrillic::cstocs ($SrcEnc, $paramTranslitSubj ? 'VOL' : $DstEnc, $Subject));
}

$header->replace ('X-Removed-Content-Type', $ContentType->stringify);
$ContentType->charset ($DstCharset);
$header->replace ('Content-Type', $ContentType->stringify);
$header->add ('X-Converted-By', "$ScriptName v$ScriptVersion ($SrcEnc -> $DstEnc), get your own from http://www.neystadt.org/cyrillic/");

$msg->print ();

__END__

=head1 EXAMPLES

On UNIX to create mail account B<to-win>, which automatically translates all incoming mail, you can use the following:

	to-win: "|./cyr-mail-filter.pl -fAUTO -tWIN -s | resend my-email@jopa.org"

Good use also is to create majordomo mailing list that auto-transliterates all incoming mail:

	ksp-lat: "|/usr/lib/majordomo/wrapper ./cyr-mail-filter.pl -fAUTO -tVOL -s | ./resend -l ksp-lat ksp-lat-resend"

=head1 PREREQUISITES

This script requires the C<MIME>, C<Mail>, C<Convert::Cyrillic> and C<HTTP::Headers::UserAgent>
modules available from CPAN or at http://www.neystadt.org/cyrillic/.

=pod OSNAMES

any

=cut
