#!/usr/local/bin/perl

# 
# newsgrep: NetNews grep
# 
#     [1998/11/06] OSHIRO Naoki. pnspool をコピー
# 
#     $Log:$
#

#
# Todo:
#  [1998/12/24]
#  ・References を見て関連発言を出力する
#  ・@Newsgroup のみの出力？(grep -l 相当)
#  ・ヘッダの抑制出力/任意選択
#  ・指定日以降/以前の出力
#

$newsgrep_ver="0.01";

############################################################
#
require "open2.pl";
require "jcode.pl";
require "getopts.pl";

# i:ignore case, l:list only, b:verbose
# h:head search only, v:ignore group, (m:max article)
&Getopts("hm:v:ilb");
$opt_i="i" if $opt_i;
if ($#ARGV<0) {
    print STDERR "Usage: newsgrep word (groups...)\n";
    exit;
}
$search_word=$ARGV[0];
@g=@ARGV; shift @g;
if ($#ARGV==0) {
    $search_group='.';
} else {
    $search_group=join('|', @g);
}
$ignore_group=$opt_v;

$jcode='euc';
$nntpserver="news.cc.u-ryukyu.ac.jp";
$nntpserver=$ENV{NNTPSERVER} if $ENV{NNTPSERVER};
#$newsrc="$ENV{HOME}/.newsrc";

############################################################
#
$pid=&open2(IN, OUT, "telnet $nntpserver nntp") || die "Cannot connetct $nntpserver.\n";
while (1) {
    if (eof(IN)) {
	print "Cannot login to NNTP server ($nntpserver).\n";
	exit;
    }
    $_=<IN>;
    last if (/^(200) /);
    if (/(211|500) |^\./) {
	print OUT "$_\n";
    }
}
&get_group_list;
&get_article;
print OUT "QUIT\n";
while (<IN>) {
    ;
}
close(IN);
close(OUT);

############################################################
#
sub get_group_list {
    local($num, $cnt, $group, $max, $min, $mark);
    %group="";
    $num=0;
    $|="1";
    $cnt=0;
    print OUT "LIST\n";
    while (<IN>) {
	last if /^\./;
	chop;
	next if ($ignore_group && /$ignore_group/);
	next unless /$search_group/;
	($group, $max, $min, $mark)=split(' ', $_);
	$group{$group}="$max $min $mark";
	return if (eof(IN));
	$num++;
	if ($cnt++ eq 50) {
	    #print ".";
	    $cnt=0;
	}
    }
    
if (0) {    
    open(F, "<$newsrc") || return;
    while (<F>) {
	next unless /^(.+)[:!]\s*(.+)$/;
	($group, $read)=($1, $2);
	next unless /^ura\./;
#	print OUT "GROUP $group\n";
#	$_=<IN>;
#	($code, $num, $min, $max)=split(' ', $_);
	$group{$group}="$max $min y";
    }
    close(F);
}
}

sub get_article {
    local($i, $cnt, $l, $line, $group, $max, $min, $mark);
    local(%mess_id);
    foreach $group (keys(%group)) {
	print OUT "GROUP $group\n";
	return if (eof(IN));
	$tmp=$_;
	while (<IN>) {
	    last if /^211 /;
	    print STDERR "Error: $tmp";
	    $tmp=$_;
	}
	print STDERR "Error: $_Error: $group\n" unless /^211 /;
	next unless (s/^211 //);
	($max, $min, $mark)=split(' ', $group{$group});
	($max, $min)=($max+0, $min+0);
	$path=&open_group_path($group);
	$|=1;
	if ($opt_m) {
	    $max_pre=$min+$opt_m;
	    $max=$max_pre if ($max > $max_pre);
	}
	print STDERR "$group($min,$max)" if $opt_b;
	for ($i=$min; $i<=$max; $i++) {
	    # ヘッダの取得
	    print OUT "HEAD $i\n";
	    return if (eof(IN));
	    $_=<IN>;
	    next unless /^22\d /;
	    $article="";
	    $mess_id="";
	    $line=0;
	    while (<IN>) {
		last if /^\..?$/;
		$article.=&strdecode64($_);
		$mess_id=$1 if /Message-ID: <(.+)>/;
		$line=$1 if /^Lines: (\d+)$/i;
	    }
	    $article.="\n";
	    next if ($mess_id && $mess_id{$mess_id}++>0);
	    
	    # 検索記事の出力 ($opt_h のとき)
	    &search_output(*group, *article, $search) if $opt_h;

	    # 記事内容の取得
	    print OUT "BODY $i\n";
	    return if (eof(IN));
	    $_=<IN>;
	    next unless (/^22\d /);
	    $cnt=0;
	    $l=0;
	    while (1) {
		return if (eof(IN));
		$_=<IN>;
		last if /^\.\xd?$/;
		$article.=$_;
		if ($cnt++ eq 100) {
		    print STDERR "." if $opt_b;
		    $cnt=0;
		}
		$l++;
	    }
	    if ($l ne $line) {
		chop;
		#print " $l/$line:$_";
	    }
	    # 検索記事の出力
	    &search_output(*group, *article, $search);
	}
	print STDERR "\n" if $opt_b;
    }
}

sub search_output {
    local(*group, *article, $search)=@_;
    
    # 検索記事の出力
    $tmp_article=$article;
      # [1998/11/18]
      # jcode で EUC に変換したまま保存すると文字化けしてしまう．
      # かといって変換しないと検索できない．
      # タイトルが MIME になっていると変換できない．
    &jcode'convert(*article, $jcode) if $jcode;
    if (eval("\$article=~/\$search_word/$opt_i")) {
	if ($opt_l) {
	    print "$group\n";
	    last;
	} else {
            print &make_mailheader();
	    print "$tmp_article\n";
	}
    }
}    

sub open_group_path {
    local($group)=@_;
    local($path, $node);
    @path=split('\.', $group);
    $path=".";
    foreach $node (@path) {
	$path.="/$node";
	#mkdir($path, 0777) unless ( -x $node );
    }
    return $path;
}

sub make_mailheader {
    local($owner, $date)=@_;

    $owner="newsgrep" unless ($owner);
    unless ($date) {
	($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
	$week=(Sun,Mon,Tue,Wed,Thu,Fri,Sat)[$wday];
	$month=(Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec)[$mon];
	$year+=1900;
	$date="$week $month $mday $hour:$min:$sec $year";
    }
    return "From $owner  $date\n";
}    

############################################################
#
# MIME library from m2m package: yoneyama@bbs.co.jp
sub strdecode64 # string
{
    local($src) = @_;
    local($i);
    local($j);
    local($tmp);
    local($kanji) = 0;
    local($result) = "";
    for($i = 0; $i < length($src);) {
        if ($kanji == 0) {
            if (substr($src, $i) =~ /^=\?(ISO|iso)\-2022\-(JP|jp)\?B\?/) {
                $kanji = 1;
                $i += 16;
            } else {
                $result .= substr($src, $i, 1);
                $i ++;
            }
        } else {
            for ($j = $i; $j < length($src); $j ++) {
                if (substr($src, $j) =~ /^\?=/) {
                    last;
                }
            }
            $tmp = &decode64(substr($src, $i, $j - $i));
            if (unpack("C",substr($tmp, length($tmp) - 1)) == 0) {
                chop($tmp);
            }
            $result .= $tmp;
            $kanji = 0;
            $i = $j + 2;
        }
    }
    return($result);
}

sub strencode64 # string
{
    local($src) = @_;
    local($i);
    local($j);
    local($kanji) = 0;
    local($result) = "";
    for($i = 0; $i < length($src);) {
        if ($kanji == 0) {
            if (substr($src, $i, 3) =~ /^\033\$./) {
                $kanji = 1;
            } else {
                $result .= substr($src, $i, 1);
                $i ++;
            }
        } else {
            for ($j = $i; $j < length($src); $j ++) {
                if (substr($src, $j, 3) =~ /^\033\(./) {
                    $j += 3;
                    last;
                }
            };
            $result .= "=?ISO-2022-JP?B?";
            $result .= &encode64(substr($src, $i, $j - $i));
            $result .= "?=";
            $kanji = 0;
            $i = $j;
        }
    }
    return($result);
}

sub decode64 # string
{
    local($src) = @_;
    local($base64) =
      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    local($i);
    local($c1);
    local($c2);
    local($c3);
    local($c4);
    local($result) = "";
    for($i = 0; $i < length($src); $i += 4) {
        $c1 = index($base64,substr($src,$i,1));
        if ($c1 == -1) {
            last;
        }
        $c2 = index($base64,substr($src,$i+1,1));
        if ($c2 != -1) {
            $result .= pack("C",($c1*4)+(($c2&0x30)/16));
        } else {
            $result .= pack("C",($c1*4));
            last;
        }
        $c3 = index($base64,substr($src,$i+2,1));
        if ($c3 != -1) {
            $result .= pack("C",($c2*16)|(($c3&0x3c)/4));
        } else {
            $result .= pack("C",($c2*16));
            last;
        }
        $c4 = index($base64,substr($src,$i+3,1));
        if ($c4 != -1) {
            $result .= pack("C",($c3&0x03)*64+$c4);
        } else {
            $result .= pack("C",($c3&0x03)*64);
            last;
        }
    }
    return($result);
}

sub encode64 # string
{
    local($src) = @_;
    local($base64) =
      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    local($i);
    local($c1);
    local($c2);
    local($c3);
    local($result);
    for($i = 0; $i < length($src); $i += 3) {
        $c1 = unpack("C",substr($src,$i,1));
        $result .= substr($base64,($c1>>2),1);
        if ((length($src) - $i) > 2) {
            $c2 = unpack("C",substr($src,$i+1,1));
            $c3 = unpack("C",substr($src,$i+2,1));
            $result .= substr($base64,(($c1&3)*16)+(($c2&0xf0)/16), 1);
            $result .= substr($base64,(($c2&0x0f)*4)+(($c3&0xc0)/64), 1);
            $result .= substr($base64,($c3&0x3f), 1);
        }
        if ((length($src) - $i) == 2) {
            $c2 = unpack("C",substr($src,$i+1,1));
            $result .= substr($base64,(($c1&3)*16)+(($c2&0xf0)/16), 1);
            $result .= substr($base64,(($c2&0x0f)*4), 1);
            $result .= "=";
            last;
        }
        if ((length($src) - $i) == 1) {
            $result .= substr($base64,(($c1&3)*16), 1);
            $result .= "==";
            last;
        }
    }
    return($result);
}

# end of newsgrep

