#!/usr/bin/perl
use strict;
use warnings;
use MediaWiki::Bot;
use Data::Dumper;
use LWP;
use LWP::UserAgent;
use Encode;
use URI::Escape;
use Storable qw(nstore retrieve);
use Carp;
$SIG{__DIE__} = sub { confess; };
use encoding 'utf8';
my $wiki;
my $lwp;
my $WIKINAME = $ARGV[0] or die "usage: ./bot.pl <Wiki>";
my $conf;
if($WIKINAME eq "AK") {
$conf = {
'wiki' => { 'host' => 'wiki.vorratsdatenspeicherung.de', 'path' => '/' },
'bot' => { 'user' => 'Drahflow\'s Bot', 'pass' => 'geheim' },
};
} elsif($WIKINAME eq "Piraten") {
$conf = {
'wiki' => { 'host' => 'wiki.piratenpartei.de', 'path' => 'wiki/' },
'bot' => { 'user' => 'Drahflow\'s Bot', 'pass' => 'geheim' },
};
} else {
die "Unknown wiki: $WIKINAME";
}
sub cycleConnection {
$wiki->logout() if($wiki);
$wiki = MediaWiki::Bot->new({
'assert' => 'user',
'agent' => "Drahflow's Wiki Bot",
'protocol' => 'http',
'host' => $conf->{'wiki'}->{'host'},
'path' => $conf->{'wiki'}->{'path'},
'login_data' => {
'username' => $conf->{'bot'}->{'user'},
'password' => $conf->{'bot'}->{'pass'}
},
'debug' => 1000,
}) or die "Wiki init failed";
$lwp = LWP::UserAgent->new();
$lwp->agent("Drahflow's Wiki Bot");
}
cycleConnection();
while(my $command = <STDIN>) {
chomp $command;
last if($command eq "q" or $command eq "quit");
print "len: " . length($command) . "\n";
my $force = 0;
if ($command =~ /^!(.*)/) {
$force = 1;
$command = $1;
}
my $error;
do {
print "executing: $command\n";
eval {
dumpContent($1) if($command =~ /^DUMP ([^|]*)$/);
execTest() if($command eq 'TEST');
uploadSource() if($command eq 'CUPLOAD');
cleanupRedirect($1, $2, $3? 1: 0) if($command =~ /^CREDIR ([^|]*)\|?((?:del)?)\|?((?:auto)?)$/);
cleanupDoubleRedirect() if($command =~ /^CDBLREDIR$/);
checkout($1) if($command =~ /^MVOUT ([^|]*)$/);
checkin($1, $2) if($command =~ /^MVIN ([^|]*)\|?([^|]*)$/);
syncin($1, $2, $3) if($command =~ /^MVSYN ([^|]*)\|?([^|]*)\|?([^|]*)$/);
storein($1, $2) if($command =~ /^MVSTORE ([^|]*)\|([^|]*)$/);
copyout($1) if($command =~ /^GET ([^|]*)$/);
masscopyout($1,$2) if($command =~ /^MGET ([^|]*)\|?((?:follow)?)$/);
fetchCategoryMatrix($1, $2) if($command =~ /^GETC ([^|]*)\|(.*)$/);
checkToDoUsage() if($command =~ /^QTODO$/);
checkLanguageSync() if($command =~ /^QLANG$/);
moveCategory($1, $2) if($command =~ /^CMV ([^|]*)\|?([^|]*)$/);
addCategories($1, $2) if($command =~ /^CADD (.*)\|\|(.*)$/);
putCategoryMatrix($1, $2, $3) if($command =~ /^CPUT ([^|]*)(?:\|([^|]+))?(?:\|(auto|ask))?$/);
pirateBoardUpdate() if($command =~ /^CPP$/);
};
$error = $@;
print $error if $error
} while ($error and $force);
}
$wiki->logout();
sub loadSure {
my ($name, $mode) = @_;
die "no mode given" unless $mode;
my $content = $wiki->get_text($name);
unless(defined $content) {
die "could not load $name";
}
print "Page $name loaded.\n";
return $content;
}
sub loadCategorySure {
my ($name) = @_;
unless($name =~ /Kategorie:|Category:/) {
die "category name must be given with prefix";
}
my $req = HTTP::Request->new(
'GET' => 'http://' . $conf->{'wiki'}->{'host'} . '/' . $name . '?redirect=no');
my $res = $lwp->request($req);
if(not $res->is_success()) {
die "could not load $name";
}
my ($subcatsPart) = $res->content() =~ /\n<h2>Unterkategorien(.*?)\n<h2/s;
my ($articlesPart) = $res->content() =~ /\n<h2>Seiten in der Kategorie(.*?)\nVon/s;
my $subcats = [];
my $articles = [];
while(defined $subcatsPart and $subcatsPart =~ s/.*?<a href="\/([^"]+)" title="([^"]+)">//) {
push @$subcats, $2;
}
while(defined $articlesPart and $articlesPart =~ s/.*?<a href="\/([^"]+)" title="([^"]+)">//) {
push @$articles, $2;
}
print "Category $name loaded.\n";
return $articles, $subcats;
}
sub looksOffLimit {
my ($name) = @_;
my $result = (
$name =~ /Ortsgruppe/ or
$name =~ /^Benutzer/ or
$name =~ /^Presse:/
);
}
sub saveSure {
my ($name, $text, $summary, $minor) = @_;
if(looksOffLimit($name)) {
askConfirmation("Page " . $name . " looks like it should be left alone");
}
die "no summary given" unless $summary;
$wiki->edit({
'page' => $name,
'text' => $text,
'summary' => $summary,
'is_minor' => $minor? 1: 0,
}) or die "could not save " . $name;
print "Page " . $name . " saved.\n";
}
sub askConfirmation {
my ($message) = @_;
while(1) {
print "==> $message, continue [N/y]\n";
my $answer = <STDIN>;
chomp $answer;
if($answer eq '' or $answer eq 'n') {
die "User confirmation failed.";
}
if($answer eq 'y') {
last;
}
}
}
sub dumpContent {
my ($name) = @_;
die "no name given" unless $name;
my $text = loadSure($name, "r");
print Dumper($text);
}
sub execTest {
my $text = loadSure('Benutzer:Drahflow/Sandkasten', "rw");
print Dumper($text);
saveSure(
'Benutzer:Drahflow/Sandkasten',
$text . 'Minimaler Testlauf',
'Testing [[Benutzer:Drahflow]]\'s Bot',
);
}
sub uploadSource {
open SRC, '<', 'bot.pl' or die "cannot open bot.pl: $!";
my $text = ' <no' . 'wiki>' .
(join '', map { s/'pass' => 'geheim']*'/'pass' => 'geheim'/; $_ } <SRC>) .
'</no' . 'wiki>';
close SRC;
saveSure(
'Benutzer:Drahflow\'s Bot/Source',
$text,
'[[Benutzer:Drahflow\'s Bot|Mich selbst]] hochladen'
);
}
sub fetchIncoming {
my ($name) = @_;
my @incoming = map { $_->{'title'} } $wiki->what_links_here($name);
print Dumper(\@incoming);
return @incoming;
}
sub cleanupRedirect {
my ($name, $del, $auto) = @_;
die "no name given" unless $name;
my $content = loadSure($name, "r");
$content =~ m!#(?:redirect|weiterleitung):? ?\[\[([^\]|]+)( ?\|([^\]]+))?\]\]!i or die "could not find redirect";
my ($redirect, undef, $redirectDisplay) = ($1, $2, $3);
$redirectDisplay = $name unless defined $redirectDisplay;
$redirectDisplay =~ s/_/ /g;
print "Redirect to: $redirect, Display: $redirectDisplay\n";
my $fix = sub {
my ($in) = @_;
return if($auto and looksOffLimit($in));
my $content = loadSure($in, "rw");
my $any = 0;
my $mask = $name;
$mask =~ s/[ _]/[ _]/g;
$mask =~ s/\(/\\(/g;
$mask =~ s/\)/\\)/g;
$mask =~ s/\\/\\\\/g;
while($content =~ m!\[\[$mask(#[^ ]*)?( ?\|([^\]]+))?\]\]!s) {
my ($anchor, undef, $display) = ($1, $2, $3);
if(not defined $anchor) {
$anchor = '';
}
if(not defined $display) {
$display = $redirectDisplay;
}
print "Displayname: $display\n";
$content =~ s!\[\[$mask(#[^ ]*)?( ?(\|[^\]]+)?)\]\]![[$redirect$anchor\|$display]]!;
print "Link on $in fixed.\n";
++$any;
}
if($any) {
unless($auto) {
askConfirmation("Page $in will be saved");
}
saveSure(
$in,
$content,
"Weiterleitungs-Cleanup, Link von $name auf $redirect verbogen"
);
} else {
if($auto) {
warn "incoming link not found" if(not $any);
} else {
die "incoming link not found" if(not $any);
}
}
};
my @incoming = fetchIncoming($name);
my $templateFixed = 0;
foreach my $in (
sort { looksOffLimit($a) <=> looksOffLimit($b) }
grep { $_ =~ /^Vorlage:/ }
@incoming) {
&$fix($in);
$templateFixed = 1;
}
if($templateFixed) {
cycleConnection();
@incoming = fetchIncoming($name);
}
foreach my $in (
sort { looksOffLimit($a) <=> looksOffLimit($b) }
grep { $_ !~ /^Vorlage:/ }
@incoming) {
&$fix($in);
}
if($del) {
$content = loadSure($name, "rw");
if($content =~ m!^#redirect ?\[\[$redirect\]\]$!si) {
$content .= "\n{{Vorlage:Drahflow/Löschen/Weiterleitung}}";
print "Inserted deletion remark.\n";
}
askConfirmation("Page $name will be saved");
saveSure(
$name,
$content,
"Weiterleitungs-Cleanup, Weiterleitung zum Löschen eingetragen");
}
print "Done.\n";
}
sub cleanupDoubleRedirect {
my $req = HTTP::Request->new(
'GET' => 'http://' . $conf->{'wiki'}->{'host'} . '/Spezial:Doppelte_Weiterleitungen');
my $res = $lwp->request($req);
if(not $res->is_success()) {
die "could not load Spezial:Doppelte_Weiterleitungen";
}
my @names =
map { uri_unescape($_) }
map { $_ =~ qr{.*<li><a href="/index\.php\?title=([^"]+)&redirect=no".*?→.*?<a href="/index\.php\?title=([^"]+)&redirect=no".*}; ($1, $2) }
grep { /^<li>/ }
split /\n/, $res->content();
for(my $i = 0; $i < @names; ++$i) {
eval {
cleanupRedirect($names[$i], 0, 1);
};
warn if($@);
print "($i / " . scalar(@names) . ")\n";
}
print "Done.\n";
}
sub sanitizeFilename {
my ($name) = @_;
$name =~ s/-/--/g;
$name =~ s!/!-+!g;
return "checkout/$name";
}
sub checkout {
my ($name) = @_;
die "no name given" unless $name;
my $content = loadSure($name, "rw");
if($content =~ /{{ *InArbeit/ or
$content =~ /{{ *Vorlage: *InArbeit/) {
askConfirmation("Page $name is tagged with {{Vorlage:InArbeit}}");
}
my $origContent = $content;
$content =
"{{Vorlage:InArbeit|[[Benutzer:Drahflow]]}}\n" . $content;
saveSure($name, $content, "{{:Vorlage:InArbeit}} gesetzt", 1);
my $filename = sanitizeFilename($name);
open PAGE, '>:utf8', $filename or die "cannot open $filename: $!";
print PAGE $origContent;
close PAGE;
print "Done.\n";
}
sub copyout {
my ($name) = @_;
die "no name given" unless $name;
my $content = loadSure($name, "rw");
my $filename = sanitizeFilename($name);
open PAGE, '>:utf8', $filename or die "cannot open $filename: $!";
print PAGE $content;
close PAGE;
print "Done.\n";
}
sub masscopyout {
my ($filename,$follow) = @_;
die "no filename given" unless $filename;
my @failures;
open LIST, '<:utf8', $filename or die "cannot open $filename: $!";
my $i = 0;
while(my $name = <LIST>) {
chomp $name;
if($name =~ /%/) {
$name = decode('utf8', encode('utf8', uri_unescape(encode('utf8', $name))));
}
eval {
my $content = loadSure($name, "r");
if($follow and $content =~ m!#(?:redirect|weiterleitung):? ?\[\[([^\]|]+)( ?\|([^\]]+))?\]\]!i) {
$name = $1;
if($name =~ /%/) {
$name = decode('utf8', encode('utf8', uri_unescape(encode('utf8', $name))));
}
$content = loadSure($name, "r");
}
my $filename = sanitizeFilename(sprintf("%06d", $i));
open PAGE, '>:utf8', $filename or die "cannot open $filename: $!";
print PAGE $content;
close PAGE;
};
if($@) {
push @failures, [$i, $name];
}
$i++;
sleep 3;
}
close LIST;
print "Failures:\n";
foreach my $fail (@failures) {
printf "%06d: %s\n", @$fail;
}
print "Done.\n";
}
sub checkin {
my ($name, $reason) = @_;
die "no name given" unless $name;
die "no reason given" unless $reason;
my $filename = sanitizeFilename($name);
open PAGE, '<:utf8', $filename or die "cannot open $filename: $!";
my $origContent = join('', <PAGE>);
close PAGE;
my $content = loadSure($name, "rw");
if($content !~ /^{{Vorlage:InArbeit|\[\[Benutzer:Drahflow\]\]}}/s) {
askConfirmation("Page $name is not tagged as being edited by you");
}
saveSure($name, $origContent, $reason);
unlink $filename;
print "Done.\n";
}
sub syncin {
my ($filename, $name, $reason) = @_;
die "no file given" unless $filename;
die "no name given" unless $name;
die "no reason given" unless $reason;
while(1) {
eval {
open PAGE, '<:utf8', $filename or die "cannot open $filename: $!";
my $origContent = join('', <PAGE>);
close PAGE;
saveSure($name, $origContent, $reason);
print "Synced.\n";
};
if($@) {
print $@;
}
sleep 15;
}
}
sub storein {
my ($filename, $reason) = @_;
die "no file given" unless $filename;
die "no reason given" unless $reason;
my $data = retrieve($filename) or die "could not retrieve from $filename: $!";
foreach my $d (@$data) {
eval {
saveSure($d->{'wiki_location'}, $d->{'wiki_content'}, $reason);
print "Stored " . $d->{'wiki_location'} . "\n";
};
if($@) {
print $@;
}
}
}
sub getTemplateUsers {
my ($name) = @_;
return map { $_->{'title'} } $wiki->list_transclusions($name);
}
sub checkToDoUsage {
my @users = getTemplateUsers("Vorlage:ToDo");
my @problems;
foreach my $user (@users) {
my $content = loadSure($user, "r");
if($content =~ /{{Vorlage: *ToDo/) {
push @problems, $user;
}
}
foreach my $user (@problems) {
print "Problematic usage: $user\n";
}
print "Done.\n";
}
sub moveCategory {
my ($from, $to) = @_;
die "no from category given" unless $from;
die "no to category given" unless $to;
my ($articles, $subcats) = loadCategorySure($from);
my @problems;
foreach my $entry (@$articles, @$subcats) {
my $content = loadSure($entry, "rw");
my $success = 0;
if($content =~ /\[\[$to(?:\|[^\]|]*)?\]\]/) {
if($content =~ s/\[\[$from(?:\|[^\]|]*)?\]\]//) {
$success = 1;
}
} else {
if($content =~ s/\[\[$from((?:\|[^\]|]*)?)\]\]/[[$to$1]]/) {
$success = 1;
}
}
if($success) {
saveSure(
$entry,
$content,
"Kategorie-Umbenennung, von $from nach $to");
} else {
push @problems, $entry;
}
}
foreach my $entry (@problems) {
print "Problematic usage: $entry\n";
}
print "Done.\n";
}
sub fetchCategoryMatrix {
my ($name, $categories) = @_;
my @categories = split /\|/, $categories;
die "no category given" unless @categories;
my %articles;
my $filename = sanitizeFilename($name);
open OUTPUT, '>:utf8', $filename or die "cannot open $filename: $!";
foreach my $cat (@categories) {
my ($articles, $subcats) = loadCategorySure($cat);
print OUTPUT "$cat\n";
foreach my $article (@$articles, @$subcats) {
$articles{$article}->{$cat} = 1;
}
}
print OUTPUT "\n";
foreach my $article (sort keys %articles) {
foreach my $cat (@categories) {
print OUTPUT $articles{$article}->{$cat}? "x ": " ";
}
print OUTPUT " " . $article . "\n";
}
close OUTPUT;
print "Done.\n";
}
sub putCategoryMatrix {
my ($name, $reason, $mode) = @_;
die "no reason has been given" unless $reason;
my $filename = sanitizeFilename($name);
my @categories;
open INPUT, '<:utf8', $filename or die "cannot open $filename: $!";
while(my $line = <INPUT>) {
chomp $line;
last if($line eq "");
push @categories, $line;
}
my %articlesNew;
while(my $line = <INPUT>) {
chomp $line;
my $article = substr($line, 1 + 2 * @categories);
my $i = 0;
foreach my $cat (@categories) {
$articlesNew{$article}->{$cat} = (substr($line, $i, 1) eq 'x');
$i += 2;
}
}
close INPUT;
my %articlesCurrent;
foreach my $cat (@categories) {
my ($articles, $subcats) = loadCategorySure($cat);
foreach my $article (@$articles, @$subcats) {
$articlesCurrent{$article}->{$cat} = 1;
}
}
my @problems;
# only articles mentioned in the file are touched
foreach my $article (sort keys %articlesNew) {
my @toInsert;
my @toDelete;
# only categories mentioned in the file are touched
foreach my $newCat (@categories) {
next unless $articlesNew{$article}->{$newCat};
push @toInsert, $newCat unless $articlesCurrent{$article}->{$newCat};
}
# only categories mentioned in the file are touched
foreach my $curCat (@categories) {
unless($articlesCurrent{$article}->{$curCat}) {
next;
}
push @toDelete, $curCat unless $articlesNew{$article}->{$curCat};
}
if(@toInsert or @toDelete) {
print "Modifying $article\n" .
" Insert: @toInsert\n" .
" Delete: @toDelete\n";
if(defined $mode) {
if($mode eq 'ask') {
askConfirmation("$article will be changed as above");
}
my $content = loadSure($article, "rw");
my $success = 1;
my $catDisplay = undef;
foreach my $cat (@toDelete) {
my $catRegex = $cat;
$catRegex =~ s/ /[ _]/g;
if($content =~ s/\n\[\[$catRegex((?:\|[^\]|]*)?)\]\]\n/\n/s) {
$catDisplay = substr($1, 1);
} elsif ($content =~ s/\[\[$catRegex((?:\|[^\]|]*)?)\]\]//) {
$catDisplay = substr($1, 1);
} else {
push @problems, "$article <-> $cat\n";
$success = 0;
}
}
foreach my $cat (@toInsert) {
unless($content =~ /\[\[$cat((?:\|[^\]|]*)?)\]\]/) {
unless($content =~ /\n$/s) {
$content .= "\n";
}
if(defined $catDisplay) {
$content .= "[[$cat|$catDisplay]]";
} else {
$content .= "[[$cat]]";
}
}
}
if($success) {
saveSure($article, $content, "Kategorie-Zuordnungs-Upload, $reason");
}
}
}
}
foreach my $entry (@problems) {
print "Problematic usage: $entry\n";
}
}
sub checkLanguageSync {
my %users = map { ($_, $_) } getTemplateUsers("Vorlage:Mehrsprachig");
my @problems;
while(%users) {
my ($first) = keys %users;
my $content = loadSure($first, "r");
unless($content =~ /{{(Vorlage:)? *Mehrsprachig\b(.*)}}/s) {
delete $users{$first};
push @problems, "Could not find template call: $first";
next;
}
my $parameters = $2;
unless($parameters =~ /\bsynchronisiert *= *1/) {
delete $users{$first};
next;
}
my @otherPages;
push @otherPages, $1 if($parameters =~ /\bde *= *([^|{}]*)/s);
push @otherPages, $1 if($parameters =~ /\ben *= *([^|{}]*)/s);
@otherPages = grep { $_ ne $first } map { chomp; $_ } @otherPages;
if(@otherPages < 1) {
delete $users{$first};
push @problems, "Synchronization group of less than 2 on $first";
next;
}
OTHERS: foreach my $other (@otherPages) {
my @firstLines = split /\n/, $content;
my $otherContent = loadSure($other, "r");
unless($otherContent =~ /{{(Vorlage:)? *Mehrsprachig\b(.*)}}/s) {
push @problems, "Could not find template call: $other";
next;
}
my $parameters = $2;
unless($parameters =~ /\bsynchronisiert *= *1/) {
next;
}
my @otherLines = split /\n/, $otherContent;
if(@firstLines != @otherLines) {
push @problems, "Line counts differ between $first and $other";
last OTHERS;
}
@firstLines = map { length($_)? 1: 0; } @firstLines;
@otherLines = map { length($_)? 1: 0; } @otherLines;
for(my $i = 0; $i < @firstLines; ++$i) {
if($firstLines[$i] ne $otherLines[$i]) {
push @problems, "Line " . ($i + 1) .
" differs between $first and $other";
last OTHERS;
}
}
}
delete $users{$first};
foreach my $name (@otherPages) {
delete $users{$name};
}
}
foreach my $entry (@problems) {
print "$entry\n";
}
print "Done.\n";
}
sub addCategories {
my ($categories, $names) = @_;
my @categories = split(/\|/, $categories);
my @names = split(/\|/, $names);
die "no categories given" unless @categories;
die "no pages given" unless @names;
my %pagesInCat;
foreach my $cat (@categories) {
my $correctPages = loadCategorySure($cat);
$pagesInCat{$cat} = $correctPages;
}
foreach my $name (@names) {
my $content = loadSure($name, "rw");
my $changes = 0;
foreach my $cat (@categories) {
next if(grep { $_ eq $name } @{$pagesInCat{$cat}});
$content .= "\n[[$cat]]";
print "$cat added.\n";
$changes = 1;
}
if($changes) {
saveSure(
$name,
$content,
"Kategorie hinzugefügt");
}
}
print "Done.\n";
}
sub pirateBoardUpdate {
my @files = glob("/home/drahflow/piraten/vorstand/*-protokoll");
my %decisions;
foreach my $file (@files) {
open FILE, '<:utf8', $file or die "cannot read $file: $!";
my $lastLine;
while(my $line = <FILE>) {
chomp $line;
if($line =~ /(#\d\d\d\d-\d\d-\d\d.\d+)/) {
my $decision = $1;
if(length($line) < 80) {
$line = $lastLine . $line;
}
$line =~ s/$decision//g;
$decisions{$decision} = $line;
print $line, "\n";
}
$lastLine = $line;
}
close FILE;
}
my $content = loadSure("Landesverband_Niedersachsen/Vorstand/Beschlüsse", "rw");
foreach my $decision (sort keys %decisions) {
next if($content =~ $decision);
$content .= <<EOWIKI;
'''$decision''': ''Unkategorisiert'': $decisions{$decision}
EOWIKI
}
saveSure(
"Landesverband_Niedersachsen/Vorstand/Beschlüsse",
$content,
"Update der Vorstandsbeschlüsse");
$content = <<EOWIKI;
[[Kategorie:Landesverband Niedersachsen]]
'''Achtung''': Die Inhalte dieser Seite halte [[Benutzer:Drahflow|ich]] auf
meinem eigenen Rechner autorativ. Änderungen werden nach der nächsten
Vorstandstelko unbesehen gelöscht.
EOWIKI
my $name;
open FILE, '<:utf8', '/home/drahflow/piraten/vorstand/todo' or die "open fail: ~/piraten/vorstand/todo: $!";
while(my $line = <FILE>) {
chomp $line;
if($line =~ /^([A-Z].*)$/) {
if(defined $name) {
$content .= "}}\n";
}
$name = $1;
$content .= "{{Kasten grau|$name|\n";
} elsif($line =~ /^ +(.*)$/) {
my $task = $1;
$content .= "* $task\n";
}
}
if(defined $name) {
$content .= "}}\n";
}
close FILE;
saveSure(
"Landesverband_Niedersachsen/Vorstand/TODO",
$content,
"Update der VorstandsTODOs");
print "Done.\n";
}
#TODO: Something within a directory which is also a name of a category should
# belong to said category
#TODO: Nothing should belong to a category and also directly to some category
# above it.
#TODO: Everything should have a category
#TODO: Categories should not be cyclic