User:Polygnotus/Scripts/ExternalLinkMonitor.js

// <nowiki>
/**
 * External Link Monitor for Wikipedia (Enhanced with CiteHighlighter Integration)
 * Monitors recent changes that add external links to enwiki articles
 * Filters out: bot edits, top 10k domains, users with >500 edits
 * Colors links based on CiteHighlighter source ratings
 */

(function() {
    'use strict';

    // Add link to Tools menu
    mw.loader.using(['mediawiki.util', 'mediawiki.api'], function() {
        mw.util.addPortletLink(
            'p-tb',
            mw.util.getUrl('Special:BlankPage/ExternalLinkMonitor'),
            'External Link Monitor',
            't-extlinkmonitor',
            'Monitor external links added to articles'
        );
    });

    // Check if we're on the special page
    if (mw.config.get('wgCanonicalSpecialPageName') === 'Blankpage' && 
        mw.config.get('wgPageName') === 'Special:BlankPage/ExternalLinkMonitor') {
        
        $(document).ready(function() {
            initMonitor();
        });
    }

    function initMonitor() {
        $('#firstHeading').text('External Link Monitor');
        document.title = 'External Link Monitor';
        
        const $container = $('#mw-content-text');
        $container.html(`
            <div id="elm-container">
                <div id="elm-status">Loading source ratings and top 10k domains...</div>
                <div id="elm-controls" style="margin: 15px 0; display: none;">
                    <button id="elm-start" style="padding: 8px 16px; font-size: 14px; cursor: pointer;">Start Monitoring</button>
                    <button id="elm-stop" style="padding: 8px 16px; font-size: 14px; cursor: pointer; display: none;">Stop Monitoring</button>
                    <span id="elm-count" style="margin-left: 15px; font-weight: bold;"></span>
                </div>
                <div id="elm-legend" style="margin: 15px 0; padding: 10px; border: 1px solid #ccc; background: #f9f9f9; display: none;">
                    <strong>Color Legend:</strong>
                    <span style="margin-left: 10px; padding: 3px 8px; background: limegreen;">MEDRS</span>
                    <span style="margin-left: 10px; padding: 3px 8px; background: lightgreen;">Reliable</span>
                    <span style="margin-left: 10px; padding: 3px 8px; background: khaki;">Caution</span>
                    <span style="margin-left: 10px; padding: 3px 8px; background: lightcoral;">Unreliable</span>
                    <span style="margin-left: 10px; padding: 3px 8px; background: #ffcfd5;">Preprint</span>
                    <span style="margin-left: 10px; padding: 3px 8px; background: #ffb347;">Unreliable Word</span>
                </div>
                <div id="elm-results" style="margin-top: 20px; font-family: monospace; font-size: 12px;"></div>
            </div>
        `);

        loadSourceRatings();
    }

    // Storage for top 10k domains as a Set for O(1) lookup
    const top10kDomains = new Set();
    // Storage for CiteHighlighter source ratings
    let sourceRatings = {};
    let unreliableWords = [];
    let isMonitoring = false;
    let eventSource = null;
    let editCount = 0;

    // Color mapping from CiteHighlighter
    const colors = {
        unreliableWord: '#ffb347',
        preprint: '#ffcfd5',
        doi: 'transparent',
        medrs: 'limegreen',
        green: 'lightgreen',
        yellow: 'khaki',
        red: 'lightcoral'
    };

    async function loadSourceRatings() {
        try {
            // Load CiteHighlighter source ratings
            const api = new mw.Api();
            const sourcesResponse = await api.get({
                action: 'query',
                prop: 'revisions',
                titles: 'User:Novem Linguae/Scripts/CiteHighlighter/SourcesJSON.js',
                rvprop: 'content',
                rvslots: 'main',
                formatversion: 2
            });

            if (sourcesResponse.query && sourcesResponse.query.pages && sourcesResponse.query.pages[0]) {
                const content = sourcesResponse.query.pages[0].revisions[0].slots.main.content;
                sourceRatings = JSON.parse(content);
                console.log('Loaded source ratings:', sourceRatings);
            }

            // Load unreliable words
            unreliableWords = getUnreliableWords();

            // Now load top 10k domains
            loadTop10kDomains();
        } catch (error) {
            $('#elm-status').text('Error loading source ratings: ' + error.message);
            console.error('Error loading source ratings:', error);
        }
    }

    function getUnreliableWords() {
        return [
            '/comment',
            'about-me',
            'about-us',
            '/about/',
            'acquire',
            'announce',
            'blog',
            'blogspot',
            'businesswire',
            'caard',
            'contact-us',
            'contactus',
            'essay',
            'fandom',
            '/forum/',
            'google.com/search',
            'innovative',
            'newswire',
            'podcast',
            '/post/',
            'preprint',
            'press-release',
            'pressrelease',
            'prnews',
            'railfan',
            'sponsored',
            'thread',
            'user-review',
            'viewtopic',
            'weebly',
            'wix',
            'wordpress',
            '/wp-'
        ];
    }

    function loadTop10kDomains() {
        const api = new mw.Api();
        
        api.get({
            action: 'query',
            prop: 'revisions',
            titles: 'User:Polygnotus/Data/Top10kDomains',
            rvprop: 'content',
            rvslots: 'main',
            formatversion: 2
        }).done(function(data) {
            if (data.query && data.query.pages && data.query.pages[0]) {
                const content = data.query.pages[0].revisions[0].slots.main.content;
                parseDomainList(content);
                $('#elm-status').text('Ready to monitor. Source ratings and top 10k domains loaded.');
                $('#elm-controls').show();
                $('#elm-legend').show();
                setupEventHandlers();
            } else {
                $('#elm-status').text('Error: Could not load domain list.');
            }
        }).fail(function() {
            $('#elm-status').text('Error: Failed to fetch domain list from API.');
        });
    }

    function parseDomainList(content) {
        const lines = content.split('\n');
        for (const line of lines) {
            const domain = line.trim().toLowerCase();
            if (domain && !domain.startsWith('#')) {
                top10kDomains.add(domain);
            }
        }
        console.log('Loaded top 10k domains:', top10kDomains.size);
    }

    function isTop10kDomain(url) {
        try {
            const urlObj = new URL(url);
            let hostname = urlObj.hostname.toLowerCase();
            if (hostname.startsWith('www.')) hostname = hostname.substring(4);

            const parts = hostname.split('.');
            for (let i = 0; i < parts.length; i++) {
                if (top10kDomains.has(parts.slice(i).join('.'))) return true;
            }
            return false;
        } catch (e) {
            return false;
        }
    }

    function getSourceRating(url) {
        try {
            const urlObj = new URL(url);
            let hostname = urlObj.hostname.toLowerCase();
            
            // Remove www. prefix
            if (hostname.startsWith('www.')) {
                hostname = hostname.substring(4);
            }

            // Check unreliable words first (highest priority in CiteHighlighter)
            const urlLower = url.toLowerCase();
            for (const word of unreliableWords) {
                if (urlLower.includes(word)) {
                    return { color: 'unreliableWord', label: 'Unreliable Word' };
                }
            }

            // Check each rating category
            const ratingOrder = ['red', 'yellow', 'green', 'medrs', 'doi', 'preprint'];
            for (const rating of ratingOrder) {
                if (sourceRatings[rating]) {
                    for (const source of sourceRatings[rating]) {
                        // Check if the domain matches
                        if (hostname === source || hostname.endsWith('.' + source)) {
                            return { 
                                color: rating, 
                                label: rating === 'medrs' ? 'MEDRS' : 
                                       rating === 'green' ? 'Reliable' :
                                       rating === 'yellow' ? 'Caution' :
                                       rating === 'red' ? 'Unreliable' :
                                       rating === 'preprint' ? 'Preprint' :
                                       rating === 'doi' ? 'DOI' : rating
                            };
                        }
                    }
                }
            }

            return null;
        } catch (e) {
            return null;
        }
    }

    function setupEventHandlers() {
        $('#elm-start').on('click', startMonitoring);
        $('#elm-stop').on('click', stopMonitoring);
    }

    function startMonitoring() {
        if (isMonitoring) return;
        
        isMonitoring = true;
        editCount = 0;
        $('#elm-start').hide();
        $('#elm-stop').show();
        $('#elm-status').text('Monitoring active...');
        $('#elm-results').empty();
        updateCount();

        // Connect to Wikimedia EventStreams
        const streamUrl = 'https://stream.wikimedia.org/v2/stream/mediawiki.page-links-change';
        eventSource = new EventSource(streamUrl);

        eventSource.onopen = function() {
            console.log('EventStream connection opened');
        };

        eventSource.onerror = function(e) {
            console.error('EventStream error:', e);
            if (isMonitoring) {
                appendResult('Connection error. Reconnecting...', 'error');
            }
        };

        eventSource.onmessage = function(event) {
            try {
                const data = JSON.parse(event.data);
                processChange(data);
            } catch (e) {
                console.error('Error parsing event data:', e);
            }
        };
    }

    function stopMonitoring() {
        if (!isMonitoring) return;
        
        isMonitoring = false;
        $('#elm-start').show();
        $('#elm-stop').hide();
        $('#elm-status').text('Monitoring stopped.');
        
        if (eventSource) {
            eventSource.close();
            eventSource = null;
        }
    }

    function processChange(data) {
        // Filter: only enwiki
        const metaUri = data.meta && data.meta.uri;
        if (!metaUri || !metaUri.startsWith('https://en.wikipedia.org')) {
            return;
        }

        // Filter: no bot edits
        const performer = data.performer;
        if (!performer || performer.user_is_bot) {
            return;
        }

        // Filter: user edit count <= 500
        const userEditCount = performer.user_edit_count;
        if (userEditCount > 500) {
            return;
        }

        // Filter: only main namespace (namespace 0)
        if (data.page_namespace !== 0) {
            return;
        }

        // Check for added external links
        const addedLinks = data.added_links;
        if (!addedLinks || addedLinks.length === 0) {
            return;
        }

        const validLinks = [];
        for (const link of addedLinks) {
            if (link.external && link.link) {
                if (!isTop10kDomain(link.link)) {
                    const rating = getSourceRating(link.link);
                    validLinks.push({
                        url: link.link,
                        rating: rating
                    });
                }
            }
        }

        if (validLinks.length > 0) {
            displayEdit(data, validLinks, performer);
        }
    }

    function displayEdit(data, links, performer) {
        editCount++;
        updateCount();

        const pageTitle = data.page_title || 'Unknown';
        const pageTitleEncoded = encodeURIComponent(pageTitle).replace(/%20/g, '_');
        const revId = data.rev_id || 0;
        const timestamp = data.meta && data.meta.dt ? new Date(data.meta.dt).toISOString() : '';
        
        const userName = performer.user_text || 'Unknown';
        const userId = performer.user_id || 0;
        const userEditCount = performer.user_edit_count || 0;
        const userRegistration = performer.user_registration_dt || 'Unknown';

        let html = '<div style="margin-bottom: 20px; padding: 10px; border: 1px solid #ccc; background: #f9f9f9;">';
        html += `<div style="font-weight: bold; margin-bottom: 5px;">[${timestamp}]</div>`;
        html += `<div><a href="https://en.wikipedia.org/wiki/${pageTitleEncoded}" target="_blank">${escapeHtml(pageTitle)}</a></div>`;
        html += `<div><a href="https://en.wikipedia.org/w/index.php?title=${pageTitleEncoded}&diff=${revId}" target="_blank">Diff ${revId}</a></div>`;
        html += `<div style="margin-top: 8px;">User: <a href="https://en.wikipedia.org/wiki/User:${encodeURIComponent(userName)}" target="_blank">${escapeHtml(userName)}</a> `;
        html += `(ID: ${userId}, Edits: ${userEditCount}, Registered: ${userRegistration})</div>`;
        html += '<div style="margin-top: 8px; font-weight: bold;">Added links:</div>';
        html += '<ul style="margin: 5px 0;">';
        for (const link of links) {
            const bgColor = link.rating ? colors[link.rating.color] : 'transparent';
            const label = link.rating ? ` [${link.rating.label}]` : '';
            // Decode the URL completely
            const decodedUrl = decodeURIComponent(link.url);
            html += `<li style="background-color: ${bgColor}; padding: 3px; margin: 2px 0;">`;
            html += `<a href="${escapeHtml(decodedUrl)}" target="_blank" rel="nofollow">${escapeHtml(decodedUrl)}</a>`;
            if (label) {
                html += `<span style="font-weight: bold; margin-left: 5px;">${label}</span>`;
            }
            html += `</li>`;
        }
        html += '</ul>';
        html += '</div>';

        $('#elm-results').prepend(html);

        // Keep only last 50 results
        const results = $('#elm-results > div');
        if (results.length > 50) {
            results.slice(50).remove();
        }
    }

    function updateCount() {
        $('#elm-count').text(`Edits found: ${editCount}`);
    }

    function appendResult(message, type) {
        const color = type === 'error' ? 'red' : 'green';
        $('#elm-results').prepend(`<div style="color: ${color}; margin-bottom: 10px;">${escapeHtml(message)}</div>`);
    }

    function escapeHtml(text) {
        const map = {
            '&': '&amp;',
            '<': '&lt;',
            '>': '&gt;',
            '"': '&quot;',
            "'": '&#039;'
        };
        return String(text).replace(/[&<>"']/g, function(m) { return map[m]; });
    }

})();
// </nowiki>

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.