User:CLT20RecordsUpdateBot/Source/update.php

<?php
 
ini_set('display_errors', 0);
ini_set('max_execution_time', 2500);
 
set_error_handler(
    function($code, $msg, $file, $line) {
        if ( strpos($msg, 'DOMDocument') !== false ) {  # Do not log HTML parsing warnings
            return false;
        }
        file_put_contents(
            'error_log.txt',
            $code . '|' . (str_replace(['&', '|', "\r\n", "\n"], ['&amp;', '&#124;', '<br />', 'br />'], $msg)) . '|' . $file . '|' . $line . "\r\n",
            FILE_APPEND
        );
    }, E_ALL ^ E_NOTICE
);
 
# Delete the status and error logs and backup file if any (not if using resume)
if ( ! @$_GET['resume'] ) {
    if ( file_exists('status.txt') ) {
        unlink('status.txt');
    }
    if ( file_exists('error_log.txt') ) {
        unlink('error_log.txt');
    }
    if ( file_exists('edit_failed_backup.txt') ) {
        unlink('edit_failed_backup.txt');
    }
}
 
function queryWikiAPI($method, $headers = [], $getdata = [], $postdata = []) {
 
    $wikiAPIPath = 'https://en.wikipedia.org/w/api.php';
 
    # Add a request ID
    if ( $method == 'POST' ) {
        $postdata['requestid'] = mt_rand();
    }
    else {
        $getdata['requestid'] = mt_rand();
    }
 
    # Additional headers for POST requests
    if ( $method == 'POST' && $postdata ) {
        $headers[] = 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8';
        $headers[] = 'Content-Length: ' . strlen(http_build_query($postdata));
    }
 
    $streamContextOptions = [
        'http' => [
            'method' => $method,
            'header' => implode("\r\n", $headers),
            'content' => http_build_query($postdata),
        ]
    ];
 
    # For non-POST requests, delete the request body
    if ( $method != 'POST' ) {
        unset($streamContextOptions['http']['content']);
    }
 
    $uri = $wikiAPIPath . ($getdata ? ('?' . http_build_query($getdata)) : '');
 
    $result = file_get_contents($uri, 0, stream_context_create($streamContextOptions));
    sleep(3);
 
    return $result;
 
}
 
$wikiAPIRequestHeaders = [
    'Accept: text/xml',
    'DNT: 1',
    'User-Agent: ',  # Sensitive information removed
];
 
$startTime = time();
 
# Log in
function CLT20RecordsUpdateBot_login() {
 
    global $wikiAPIRequestHeaders, $wikiAPIEditToken, $username, $password;
 
    # Username and password
    $username = 'CLT20RecordsUpdateBot';
    $password = '';  // Password removed

    $obtainLoginTokenResult = queryWikiAPI('POST', $wikiAPIRequestHeaders,
        [],
        [
            'format' => 'xml',
            'action' => 'login',
            'lgname' => $username,
            'lgpassword' => $password,
        ]
    );
    if ( $obtainLoginTokenResult === false ) {
        die('Failed to log in: Query to Wikipedia API failed');
    }
 
    $XMLDOMDoc = new DOMDocument();
    $XMLDOMDoc->loadXML($obtainLoginTokenResult);
 
    if ( $XMLDOMDoc->getElementsByTagName('error')->length ) {
        $errorCode = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('code');
        $errorMessage = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('info');
        die("[{$errorCode}] {$errorMessage}");
    }
 
    $loginInfo = $XMLDOMDoc->getElementsByTagName('login')->item(0);
    $cookiePrefix = $loginInfo->getAttribute('cookieprefix');
    $sessionID = $loginInfo->getAttribute('sessionid');
    $loginToken = $loginInfo->getAttribute('token');
 
    # Construct the sessionID cookie

    $wikiAPIRequestHeaders['cookie'] = "Cookie: {$cookiePrefix}_session={$sessionID}";
    # Use a uinque 'cookie' key rather than a numeric key, so that additional headers can be added to $wikiAPIRequestHeaders
    # without deleting this one. It does not break the implode() function used to assemble the headers

    # Send a second request with the login token
    $loginWithTokenResult = queryWikiAPI('POST', $wikiAPIRequestHeaders,
        [],
        [
            'format' => 'xml',
            'action' => 'login',
            'lgname' => $username,
            'lgpassword' => $password,
            'lgtoken' => $loginToken,
        ]
    );
    if ( $loginWithTokenResult === false ) {
        die('Failed to log in: Query to Wikipedia API failed');
    }
 
    $XMLDOMDoc = new DOMDocument();
    $XMLDOMDoc->loadXML($loginWithTokenResult);
 
    if ( $XMLDOMDoc->getElementsByTagName('error')->length ) {
        $errorCode = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('code');
        $errorMessage = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('info');
        die("[{$errorCode}] {$errorMessage}");
    }
 
    $loginInfo = $XMLDOMDoc->getElementsByTagName('login')->item(0);
 
    $loginResult = $loginInfo->getAttribute('result');
    if ( $loginResult != 'Success' ) {
        die("Login unsuccessful (result: {$loginResult})");
    }
 
    $loginUserName = $loginInfo->getAttribute('lgusername');
    $loginUserID = $loginInfo->getAttribute('lguserid');
    $loginToken = $loginInfo->getAttribute('lgtoken');
 
    # Set additional cookies after login

    $wikiAPIRequestHeaders['cookie'] .= "; {$cookiePrefix}UserName={$loginUserName}; {$cookiePrefix}UserID={$loginUserID}; {$cookiePrefix}Token={$loginToken}";
 
}
CLT20RecordsUpdateBot_login();

 
# Once logged in, automatically log out when the execution of the script terminates
register_shutdown_function(
    function() {
        global $wikiAPIRequestHeaders;
        queryWikiAPI('GET', $wikiAPIRequestHeaders,
            [
                'format' => 'xml',
                'action' => 'logout',
            ]
        );
    }
);
 
 
# Get the text of the page, the latest revision timestamp and edit token
$PageTitle = 'List of Champions League Twenty20 records and statistics';
 
function CLT20RecordsUpdateBot_getPageInfo() {
 
    global $wikiAPIRequestHeaders, $wikiAPIEditToken, $PageTitle, $PageText, $PageLatestRevisionTS, $username, $password;
 
    # Before proceeding, check for any new messages on the user talk page
    $hasNewMessagesResult = queryWikiAPI('GET', $wikiAPIRequestHeaders,
        [
            'format' => 'xml',
            'action' => 'query',
            'meta' => 'userinfo',
            'uiprop' => 'hasmsg',
        ]
    );
    if ( $hasNewMessagesResult === false ) {  # Don't stop the script here, only give a warning
        trigger_error('Cannot get info about new talk page messages: Query to Wikipedia API failed', E_USER_WARNING);
    }
 
    $XMLDOMDoc = new DOMDocument();
    $XMLDOMDoc->loadXML($hasNewMessagesResult);
 
    if ( $XMLDOMDoc->getElementsByTagName('error')->length ) {
        $errorCode = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('code');
        $errorMessage = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('info');
        trigger_error("Cannot get info about new talk page messages: Error: [{$errorCode}] {$errorMessage}", E_USER_WARNING);
    }
    elseif ( $XMLDOMDoc->getElementsByTagName('userinfo')->item(0)->hasAttribute('messages') ) {
        die('New message on user talk page (<a href="https://en.wikipedia.org/wiki/User_talk:' . urlencode($username) . '" target="_blank">view</a> | '
            . '<a href="https://en.wikipedia.org/w/index.php?title=User_talk:' . urlencode($username) . '&amp;diff=cur" target="_blank">last edit</a>)');
    }
 
    $getPageInfoResult = queryWikiAPI('GET', $wikiAPIRequestHeaders,
        [
            'action' => 'query',
            'format' => 'xml',
            'prop' => 'info|revisions',
            'titles' => $PageTitle,
            'intoken' => 'edit',
            'rvprop' => 'content|timestamp'
        ]
    );
    if ( $getPageInfoResult === false ) {
        die('Failed to obtain page text: Query to Wikipedia API failed');
    }
 
    $XMLDOMDoc = new DOMDocument();
    $XMLDOMDoc->loadXML($getPageInfoResult);
 
    if ( $XMLDOMDoc->getElementsByTagName('error')->length ) {
        $errorCode = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('code');
        $errorMessage = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('info');
        die("[{$errorCode}] {$errorMessage}");
    }
 
    $pageInfo = $XMLDOMDoc->getElementsByTagName('pages')->item(0)->getElementsByTagName('page')->item(0);
 
    # Stop if the page is missing
    if ( $pageInfo->hasAttribute('missing') ) {
        die('Failed to obtain page text (page does not exist or has been deleted)');
    }
 
    # Get the edit token
    $wikiAPIEditToken = $pageInfo->getAttribute('edittoken');
    if ( $wikiAPIEditToken == '+\\' || strpos($wikiAPIEditToken, '+\\') === false ) {
        die('Bad edit token obtained');
    }
 
    $revisionInfo = $pageInfo->getElementsByTagName('rev')->item(0);
 
    $PageText = $revisionInfo->childNodes->item(0)->nodeValue;
    $PageLatestRevisionTS = $revisionInfo->getAttribute('timestamp');
 
}

CLT20RecordsUpdateBot_getPageInfo();
 
# Stop the script if the page obtained is a redirect
if ( preg_match('/^#\s*+REDIRECT\s*+\[\[.*\]\]/isu', $PageText) ) {
    die('Redirect page obtained');
}
 
# Check for any {{bots}} or {{nobots}} templates
if ( 
    preg_match('/\{\{\s*+(?:[Nn]obots|[Bb]ots\s*+\|(?:.*?\|)?(?:deny\s*+\=\s*+all|allow\s*+\=\s*+none))/su', $PageText)
     || preg_match('/\{\{\s*+[Bb]ots\s*+\|(?:.*?\|)?deny\s*+\=(?:[^\|]*?,)?\s*+CLT20RecordsUpdateBot\s*+(?:,|\||\}\})/su', $PageText)
     || (
            preg_match('/\{\{\s*+[Bb]ots\s*+\|(?:.*?\|)?allow\s*+\=[^\|]*?(?:\||\}\})/su', $PageText)
            && ! preg_match('/\{\{\s*+[Bb]ots\s*+\|(?:.*?\|)?allow\s*+\=(?:[^\|]*?,)?\s*+CLT20RecordsUpdateBot\s*+(?:,|\||\}\})/su', $PageText)
        ) 
    ) {
    
    die('A {{bots}} or {{nobots}} template does not allow CLT20RecordsUpdateBot to edit this page');
}
 
 
# If the "resume" GET parameter is true, get the text of the backup file and use it to edit.
# This backup file is saved in the event of an edit conflict or other error when editing
# so that all updates do not have to be redone in the next attempt.
if ( @$_GET['resume'] ) {
    $PageText = file_get_contents('edit_failed_backup.txt');
    if ( $PageText === false ) {
        die("Cannot find the backup file");
    }
    $PageLatestRevisionTS = date('Y:m:d\TH:i:s\Z', $startTime);  # Set the edit confilct detection time to the start time of the script

    CLT20RecordsUpdateBot_editPage();
 
    unlink('edit_failed_backup.txt');
    exit;
}
 
 
# Encode areas wich should not be edited
# These will be decoded with html_entity_decode() before the wikitext is sent back to the server

# HTML comments
$PageText = preg_replace_callback('/\<\!--(.*?)--\>/us',
                                    function($match) {
                                        return '<!--' . str_replace(['&', '<', '>', '{', '}', '|', '!', '='],
                                                                    ['&amp;', '&lt;', '&gt;', '&#123;', '&#125;', '&#124;', '&#33;', '&#61;'],
                                                                    $match[1]) . '-->';
                                    }, $PageText);
 
# Tags where wikitext is not parsed
$PageText = preg_replace_callback('/(\<(nowiki|pre|math|source|syntaxhighlight)(?(?=\s)[^\>]*+)\>)(.*?)\<\/\2\>/us',  # Allow attributes only if there is a space after the tag name
                                    function($match) {
                                        return $match[1] . str_replace(['&', '<', '>', '{', '}', '|', '!', '='],
                                                                       ['&amp;', '&lt;', '&gt;', '&#123;', '&#125;', '&#124;', '&#33;', '&#61;' ],
                                                                       $match[3]) . '</' . $match[2] . '>' ;
                                    }, $PageText);
 
# Characters in template calls which may conflict with header and table syntax
$PageText = preg_replace_callback('/\{\{(?:[^\{\}]++|(?<!\{)\{|\}(?!\})|(?R))*?\}\}/u',
                                    function($match) {
                                        return str_replace(['&', '|', '!', '='], ['&amp;', '&#124;', '&#33;', '&#61;'], $match[0]);
                                    }, $PageText);
 
# Page text is obtained and encoded, now update it

$updateStartTime = time();
 
include 'StatsUpdateFunctions.php';
 
# Filter the stats GET parameter
# Remove non-existent function names and place valid ones in correct order
$StatsToUpdate = array_values(array_intersect(
    array_keys($StatsUpdateFunctions),
    explode('|', $_GET['stats'])
));
 
# Start updating
foreach ( $StatsToUpdate as $funcName ) {
 
    try {
        $funcCallResult = call_user_func($StatsUpdateFunctions[$funcName]);
    }
    catch ( Exception $error ) {
        trigger_error('Exception thrown: <div class="exception-msg">' . $error->getMessage() . "</div>in function {$funcName}", E_USER_WARNING);
        $funcCallResult = false;
    }
 
    file_put_contents('status.txt', $funcName . '|' . ((int) $funcCallResult) . "\r\n", FILE_APPEND);
 
}
unset($funcName, $funcCallResult);
 
# Decode encoded comments, nowiki tags etc. before commiting the edit

$PageText = preg_replace_callback('/\{\{(?:[^\{\}]++|(?<!\{)\{|\}(?!\})|(?R))*?\}\}/u',
                                    function($match) {
                                        return html_entity_decode($match[0], ENT_QUOTES | ENT_HTML5, 'UTF-8');
                                    }, $PageText);
 
$PageText = preg_replace_callback('/(\<(syntaxhighlight|source|math|pre|nowiki)(?(?=\s)[^\>]*+)\>)(.*?)\<\/\2\>/us',
                                    function($match) {
                                        return $match[1] . html_entity_decode($match[3], ENT_QUOTES | ENT_HTML5, 'UTF-8') . '</' . $match[2] . '>' ;
                                    }, $PageText);
 
$PageText = preg_replace_callback('/\<\!--(.*?)--\>/us',
                                    function($match) {
                                        return '<!--' . html_entity_decode($match[1], ENT_QUOTES | ENT_HTML5, 'UTF-8') . '-->';
                                    }, $PageText);
 
 
# Updating finished, now edit

$endTime = time();
 
 
function CLT20RecordsUpdateBot_editPage() {
 
    global $wikiAPIEditToken, $wikiAPIRequestHeaders, $PageTitle, $PageText, $PageLatestRevisionTS, $startTime, $endTime;
 
    # Get the update results (to be used in the edit summary)
    $updateResults = file('status.txt');
 
    if ( $updateResults !== false ) {
        $updateResults = array_map( 
            function($line) {
                return explode('|', trim($line));
            }, 
            $updateResults
        );
 
        $totalUpdates = count($updateResults);
        $successfulUpdates = count(array_filter($updateResults,
            function($result) {
                return $result[1] == 1;
            }
        ));
        $failedUpdates = count(array_filter($updateResults,
            function($result) {
                return $result[1] == 0;
            }
        ));
        $updateTime = ((int) (($endTime - $startTime) / 60)) . ':' . str_pad(($endTime - $startTime) % 60, 2, '0', STR_PAD_LEFT);
 
        $editSummary = "[[WP:BOT|Bot]]: Updating statistics ({$successfulUpdates} updates successful, {$failedUpdates} failed, {$updateTime})";
    }
    else {  # Use a generic edit summary if the status file is not available for some reason
        $editSummary = "[[WP:BOT|Bot]]: Updating statistics";
    }
    
    # Edit the page
    $editPageResult = queryWikiAPI('POST', $wikiAPIRequestHeaders,
        [],
        [
            'format' => 'xml',
            'action' => 'edit',
            'title' => $PageTitle,
            'summary' => $editSummary,
            'text' => $PageText,
            'basetimestamp' => $PageLatestRevisionTS,
            'nocreate' => true,
            'md5' => md5($PageText),
            'token' => $wikiAPIEditToken,
        ]
    );
    if ( $editPageResult === false ) {
        die('Failed to edit: Query to Wikipedia API failed');
    }
 
    $XMLDOMDoc = new DOMDocument();
    $XMLDOMDoc->loadXML($editPageResult);
 
    if ( $XMLDOMDoc->getElementsByTagName('error')->length ) {
        $errorCode = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('code');
        $errorMessage = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('info');
 
        # Save the wikitext to a backup file before ending. Can be retreived by adding &resume=1 in the URL
        file_put_contents('edit_failed_backup.txt', $PageText);
 
        die("[{$errorCode}] {$errorMessage}");
    }
 
    $editInfo = $XMLDOMDoc->getElementsByTagName('edit')->item(0);
 
    if ( $editInfo->getAttribute('result') != 'Success' ) {
        file_put_contents('edit_failed_backup.txt', $PageText);
        die('Failed to edit: Unknown error');
    }
 
    $oldRevision = $editInfo->getAttribute('oldrevid');
    $newRevision = $editInfo->getAttribute('newrevid');
 
    echo "#{$oldRevision}|{$newRevision}";
 
}

CLT20RecordsUpdateBot_editPage();
 
?>

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.