User:ChristieBot/GANbot.py

'''
Copyright (c) 2022 Mike Christie
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
'''
# Third party modules
import pywikibot
pywikibot.config.max_retries=2
import re
import time
from collections import defaultdict
import heapq
from dataclasses import dataclass, field
from Messages import MessageKind, MessageScope, BotMessage, report_message

@dataclass
class BotContext:
    gan_conn: any
    site: any
    GAN_CAT: str
    last_run_noms: dict
    last_run_articles_set: set
    cat_articles_set: set
    subtopics: dict
    prof: any

@dataclass
class RunState:
    update_needed: bool = False
    update_reasons: list[str] = field(default_factory=list)
    active_table_dirty: bool = False
    topic_update_needed: dict = field(default_factory=dict)
    topic_PFX_log: dict = field(default_factory=dict)
    update_log: list[str] = field(default_factory=lambda: [''])
    pledge_update_log: list[str] = field(default_factory=lambda: [''])

@dataclass
class NominationActionFlags:
    new_nomination_state: bool = False
    new_review: bool = False
    transclusion_needed: bool = False
    promotion_needed: bool = False
    tell_nominator_needed: bool = False

@dataclass
class NominationActionResult:
    nom: any
    flags: NominationActionFlags

@dataclass
class TalkPageState:
    talk_page: any
    talk_text: str
    has_ga_nominee: bool
    in_cat: bool

class RunProfiler:
    def __init__(self, topn=10):
        self.totals = defaultdict(float)
        self.counts = defaultdict(int)
        self.top = defaultdict(list)  # label -> min-heap of (elapsed, title)
        self.topn = topn

    def add(self, label, elapsed, title=None):
        self.totals[label] += elapsed
        self.counts[label] += 1
        if title is not None:
            h = self.top[label]
            item = (elapsed, title)
            if len(h) < self.topn:
                heapq.heappush(h, item)
            else:
                if elapsed > h[0][0]:
                    heapq.heapreplace(h, item)

    def report(self):
        lines = ["[PROFILE] totals / avg_ms / count"]
        for label, total in sorted(self.totals.items(), key=lambda kv: kv[1], reverse=True):
            n = self.counts[label]
            avg_ms = (total / n * 1000.0) if n else 0.0
            lines.append(f"[PROFILE] {label}: total={total:.2f}s avg={avg_ms:.1f}ms n={n}")
        for label, h in self.top.items():
            if not h:
                continue
            slow = sorted(h, reverse=True)
            lines.append(f"[PROFILE] slowest {label}:")
            for t, title in slow[: self.topn]:
                lines.append(f"[PROFILE]   {t:.2f}s  {title}")
        return "\n".join(lines)
        
import datetime
import sys
import os
import pymysql
import configparser
import operator
import mwparserfromhell
import traceback

# Local modules
sys.path.append('./www/python/src') # Not needed if I run from that directory
import GA
from GA import (
    Topic,
    Subtopic,
    Nom,
    Review_stats,
    WBGAN,
    Active_nomination,
    GAN,
    Name_changes,
    Nom_list,
    save_page,
    save_page_report_operational,
)
import GA_config
from GA_history import GAH, FailedGA, GAnominee, Article_History, GA_article_page, GARlink, GA_talk_page, GA_sub_page, GA_history_Exception, GAO

# Config helpers
def load_replica_config():
    HOME = os.environ.get('HOME')  # get environment variable $HOME
    replica_path = os.path.join(HOME, 'replica.my.cnf') if HOME else os.path.expanduser('~/replica.my.cnf')

    # Read Toolforge replica credentials
    config = configparser.ConfigParser()
    if not os.path.exists(replica_path):
        print(f"[FATAL] replica.my.cnf file not found at: {replica_path}")
        print("        Create it per Toolforge docs, and ensure it contains a [client] section with user/password.")
        sys.exit(2)

    config.read(replica_path)

    # Validate expected keys early so failures are clear.
    if 'client' not in config or 'user' not in config['client'] or 'password' not in config['client']:
        print(f"[FATAL] Invalid credentials file format in: {replica_path}")
        print("        Expected a [client] section containing user=... and password=...")
        sys.exit(2)

    return config

# Set up the connection to the GAN database
database = "s55175__ganfilter"
host = "tools.db.svc.eqiad.wmflabs"

def get_gan_conn(cfg):
    try:
        return pymysql.connections.Connection(
            user=cfg['client']['user'],
            password=cfg['client']['password'],
            database=database,
            host=host
        )
    except Exception as e:
        print(f"[FATAL] Could not connect to GAN database '{database}' on host '{host}'.")
        print(f"        Error: {type(e).__name__}: {e}")
        sys.exit(3)



def _fatal(stage, exc, gan_conn=None):
    # Print a clear message for Toolforge filelog / failure emails
    print(f"[FATAL] Unhandled exception during: {stage}")
    print(f"        {type(exc).__name__}: {exc}")
    traceback.print_exc()

    # Best-effort write to audit log (won't mask the original exception if logging fails)
    if gan_conn is not None:
        try:
            GAN.log(gan_conn, "GANbot", stage, f"{type(exc).__name__}: {exc}")
        except Exception:
            pass

def main():
    start_time = time.perf_counter()
    gan_conn = None
    run_ok = False
    prof = RunProfiler(topn=10)
    try:
        config = load_replica_config()
        gan_conn = get_gan_conn(config)

        # Create an initial copy of the review stats.  This will be used to provide the values to store in the nomination audit trail so that we know how many GAs and reviews were
        # showing on the GAN page when the review was started.
        t0 = time.perf_counter()
        review_stats = Review_stats(gan_conn)   # Refresh the GA reviewing stats dictionary from the database
        prof.add("review stats", time.perf_counter() - t0)

        # Create the name changes lookup from the database
        users = {}
        t0 = time.perf_counter()
        name_changes = Name_changes.get_name_changes(gan_conn)
        prof.add("get name changes", time.perf_counter() - t0)

        #t = datetime.datetime.utcnow()
        GAN.log(gan_conn, "GANbot","Initializing","Starting run")

        # Initialize some variables
        GA_config.transient_operational_issue = False
        GA_config.current_bug_messages = []
        site = pywikibot.Site('en','wikipedia')
        GAN_CAT = "Category:Good article nominees"
        t0 = time.perf_counter()
        wbgan = WBGAN.get_wbgan(config, gan_conn) # The WBGAN database that holds the number of promoted GAs by each nominator.  Maintained by another tool.
        prof.add("get WBGAN", time.perf_counter() - t0)
        #GAN.log(gan_conn, None, "WBGAN","GAN count for History6042 is " + str(wbgan.get_GA_count("History6042")))

        [topics, subtopics] = Topic.initialize()

        # Clear the error page
        GAN.clear_errors()

        single_title = GA_config.single_title # If is_live = False, then single_title can be used to restrict a run to a single article to speed up testing of changes

        # Next step is get the list of articles we have to look at in this pass.  This is the union of two lists:
        # 1. All the articles that currently have a {{GA nominee}} template on their talk page; and
        # 2. All articles currently in the active_nominations table.
        cat_articles = set()
        cat = pywikibot.Category(site,'Good article nominees') # We're going to iterate over these; this is the contents of GAN.
        try:
            for x in cat.articles():
                title = x.title()[5:]
                cat_articles.add(title)
        except (pywikibot.exceptions.ServerError, pywikibot.exceptions.APIError, pywikibot.exceptions.Error) as e:
            GAN.report_operational_issue(
                gan_conn,
                "GANbot:exceptions",
                "Category:Good article nominees",
                f"Error scanning cat.articles(): {type(e).__name__}: {e}",
            )
            raise

        GAN.log(gan_conn, "GANbot", "Process active nominations table", "Starting")
        t0 = time.perf_counter()
        last_run_noms = Active_nomination.get_active_nominations_dict(gan_conn)  # title -> row dict
        prof.add("get active nominations", time.perf_counter() - t0)
        last_run_articles_set = set(last_run_noms.keys())
        ctx = BotContext(
            gan_conn=gan_conn,
            site=site,
            GAN_CAT=GAN_CAT,
            last_run_noms=last_run_noms,
            last_run_articles_set=last_run_articles_set,
            cat_articles_set=cat_articles,
            subtopics=subtopics,
            prof=prof,
        )
        GAN.log(gan_conn, "GANbot", "Process active nominations table",
                f"Obtained {len(ctx.last_run_articles_set)} active nominations")
        all_articles = sorted(cat_articles | ctx.last_run_articles_set)  # Stable order for repeatable runs
        GAN.log(gan_conn, "GANbot", "Process active nominations table", "Created list of articles")
        state = RunState()
        for t in topics:
            state.topic_update_needed[t] = False
            state.topic_PFX_log[t] = ['']

        noms = Nom_list() # This will be loaded with the nominations that will print on GAN.
        GAN.log(gan_conn, "GANbot", "all_articles loop", f"About to start loop over {len(all_articles)} articles")

        def talk_in_gan_category(ctx, talk_page, gan_cat_title):
            """
            Fresh check: is talk_page currently in gan_cat_title?
            Faster: scan categories and early-exit on match (no set building).
            """
            try:
                for c in talk_page.categories(with_sort_key=False, total=None, content=False):
                    c_title = c.title() if hasattr(c, "title") else str(c)
                    if c_title == gan_cat_title:
                        return True
                return False
            except (pywikibot.exceptions.ServerError, pywikibot.exceptions.Error) as e:
                GAN.report_operational_issue(
                    ctx.gan_conn,
                    "GANbot:exceptions",
                    talk_page.title(),
                    f"Error checking GAN category membership: {type(e).__name__}: {e}",
                )
                return False

        def get_talk_page_state(ctx, title):
            talk_page = pywikibot.Page(ctx.site, "Talk:" + title)

            try:
                t0 = time.perf_counter()
                talk_text = talk_page.text
                ctx.prof.add("talk_page_text", time.perf_counter() - t0, title=title)
            except (pywikibot.exceptions.ServerError, pywikibot.exceptions.Error) as e:
                GAN.report_operational_issue(
                    ctx.gan_conn,
                    "GANbot:exceptions",
                    title,
                    f"Error reading talk page text: {type(e).__name__}: {e}",
                )
                return None

            has_ga_nominee = re.search(
                r"\{\{\s*ga\s*nominee\b", talk_text, flags=re.IGNORECASE
            ) is not None

            if has_ga_nominee:
                in_cat = True
            else:
                t0 = time.perf_counter()
                in_cat = talk_in_gan_category(ctx, talk_page, ctx.GAN_CAT)
                ctx.prof.add("talk_in_gan_category", time.perf_counter() - t0, title=title)

            return TalkPageState(
                talk_page=talk_page,
                talk_text=talk_text,
                has_ga_nominee=has_ga_nominee,
                in_cat=in_cat,
            )

        def topic_for_subtopic(subtopic):
            obj = subtopics.get(subtopic)
            return obj.topic.name if obj else 'Miscellaneous'

        def make_nom_from_params(params, title, entry_msgs):
            nominator = Nom.clean_user_name(params['nominator'])
            status = params['status']
            page_num = params['page']
            subtopic = params['subtopic']
            # normalize subtopic variants
            if isinstance(subtopic, str) and subtopic.lower() in Subtopic.subtopic_var_dict:
                subtopic = Subtopic.subtopic_var_dict[subtopic.lower()]
            topic = topic_for_subtopic(subtopic)
            return Nom(
                gan_conn,
                topic,
                subtopic,
                title,
                status,
                page_num,
                nominator,
                params['timestamp'],
                params['note'],
                params['shortdesc'],
                msgs=entry_msgs,
                populate_article_page_id=True,
                prof=ctx.prof,
            )

        def make_nom_from_row(row, entry_msgs=None):
            subtopic = row.get('subtopic') or ''
            topic = 'Miscellaneous'
            if subtopic in subtopics:
                topic = subtopics[subtopic].topic.name
            return Nom(
                gan_conn,
                topic,
                subtopic,
                row['title'],
                row['status'],
                row['page'],
                row['nominator'],
                row['nomination_ts'],
                row.get('note') or '',
                row.get('shortdesc') or '',
                msgs=entry_msgs or [],
                article_page_id=row.get('article_page_id'),
                prof=ctx.prof,
            )

        def build_nom_from_current_talk_page(ctx, talk_page, title, talk_text, entry_msgs, noms):
            # Fast check: count GA nominee templates via regex first (cheap).
            # If it looks like there might be >1, confirm with mwparserfromhell (safer).
            ga_nominee_count = len(re.findall(r"\{\{\s*ga\s*nominee\b", talk_text, flags=re.IGNORECASE))
            if ga_nominee_count > 1:
                # Confirm via parser to avoid false positives (comments/nowiki/etc.)
                t_ctr = 0
                code = mwparserfromhell.parse(talk_text)
                for t in code.filter_templates():
                    if t.name.strip().lower() == 'ga nominee':
                        t_ctr += 1
            else:
                t_ctr = ga_nominee_count

            if t_ctr > 1:
                report_message(
                    ctx.gan_conn,
                    BotMessage(
                        kind=MessageKind.EDITOR_ERROR,
                        scope=MessageScope.NOMINATION,
                        short_text="Multiple GA nominee templates on talk page",
                        source="build_nom_from_current_talk_page",
                        title=title,
                        nomination_msgs=entry_msgs,
                        gan_error_text=f"Multiple GA nomination templates found on talk page for [[{title}]]",
                    ),
                )
                return None

            t0 = time.perf_counter()
            params = GAN.get_params(
                ctx.gan_conn,
                talk_page,
                article_text=talk_text,
                nomination_msgs=entry_msgs,
                prof=ctx.prof,
            )
            ctx.prof.add("get_params", time.perf_counter() - t0, title=title)
            GAN.log(ctx.gan_conn, "GANbot:all_articles loop", title, f"Retrieved params: {params}")

            if params is None:
                report_message(
                    ctx.gan_conn,
                    BotMessage(
                        kind=MessageKind.EDITOR_ERROR,
                        scope=MessageScope.NOMINATION,
                        short_text="Malformed GA nominee template",
                        source="build_nom_from_current_talk_page",
                        title=title,
                        nomination_msgs=entry_msgs,                                                                                                              
                        gan_error_text=f"Malformed GA nomination for [[{title}]]",
                    ),
                )
                return None

            GAN.check_params(params, title, nomination_msgs=entry_msgs)

            nom = make_nom_from_params(params, title, entry_msgs)
            noms.add(nom)
            return nom

        def execute_nomination_actions(
            ctx,
            state,
            title,
            nom,
            flags,
            name_changes,
            wbgan,
            review_stats,
        ):
            if flags.transclusion_needed:
                t0 = time.perf_counter()
                nom.transclude(ctx.gan_conn)
                ctx.prof.add("transclude", time.perf_counter() - t0, title=title)

            if flags.new_review:
                nom.add_a_review(ctx.gan_conn)

            if flags.new_nomination_state:
                t0 = time.perf_counter()
                nom.save_nomination_state(ctx.gan_conn, name_changes, wbgan, review_stats)
                ctx.prof.add("save_nomination_state", time.perf_counter() - t0, title=title)
                state.active_table_dirty = True

                if nom.status == 'F':
                    t0 = time.perf_counter()
                    nom.add_failedga_oldid(ctx.gan_conn)
                    ctx.prof.add("add_failedga_oldid", time.perf_counter() - t0, title=title)

            if flags.promotion_needed:
                nom.add_GA_star(ctx.gan_conn)  # Also updates the oldid if needed

            if flags.tell_nominator_needed:
                GAN.log(ctx.gan_conn, "tell_nominator", title, "Calling nom.tell_nominator()")
                nom.tell_nominator(ctx.gan_conn)

        def handle_closed_or_removed_nomination(ctx, state, title, talk_page, old_nom):
            row = old_nom
            if row is None:
                GAN.log(ctx.gan_conn, "GANbot:in_last", title, "old_nom missing from bulk cache; falling back to DB lookup")
                row = Active_nomination.get_active_nomination(ctx.gan_conn, title)
                if row is None:
                    GAN.log(ctx.gan_conn, "GANbot:in_last", title, "DB lookup also failed; skipping")
                    return None

            moved_title = detect_moved_nomination_target(ctx, row)
            if moved_title is not None:
                GAN.log(
                    ctx.gan_conn,
                    "GANbot:in_last",
                    title,
                    f"Detected moved active review from {title} to {moved_title}; suppressing fail/remove handling",
                )
                return None

            state.update_needed = True
            state.update_reasons.append("Not in cat, in last run")

            nom = make_nom_from_row(row)
            flags = NominationActionFlags(new_nomination_state=True)

            new_status = 'F'
            flags.tell_nominator_needed = True

            if Nom.is_a_GA(talk_page):
                new_status = 'P'
                flags.promotion_needed = True
                flags.tell_nominator_needed = True
            elif not nom.review_page_exists:
                new_status = 'X'
                flags.tell_nominator_needed = False

            if old_nom['status'] == '' and new_status != 'X':
                flags.transclusion_needed = True
                flags.new_review = True

            nom.new_status_message(new_status, old_nom['status'])
            nom.update_status(new_status)
            state.update_log.append(nom.edit_summary)
            state.topic_PFX_log[nom.topic].append(nom.edit_summary)

            return NominationActionResult(nom=nom, flags=flags)

        def handle_new_nomination(ctx, state, nom):
            prior_nom = Active_nomination.get_active_nomination_by_article_page_id(
                ctx.gan_conn,
                nom.article_page_id,
            )

            if prior_nom is not None and prior_nom["title"] != nom.title:
                GAN.log(
                    ctx.gan_conn,
                    "GANbot:in_cat",
                    nom.title,
                    f"Detected moved active review from {prior_nom['title']} to {nom.title}; suppressing new-review notification",
                )
                state.update_needed = True
                state.update_reasons.append("Active review moved to a new title")
                state.topic_update_needed[nom.topic] = True

                # Save the new active nomination state, but do not notify or transclude as if a new review started.
                return NominationActionFlags(new_nomination_state=True)

            state.update_needed = True
            state.update_reasons.append("This is a new nomination")
            state.topic_update_needed[nom.topic] = True

            flags = NominationActionFlags(new_nomination_state=True)

            nom.new_status_message(nom.status, None)

            if nom.review_page_exists:
                GAN.log(
                    ctx.gan_conn,
                    "GANbot:in_cat",
                    nom.title,
                    "New nomination and review page already exists; setting transclusion_needed and new_review to True",
                )
                flags.new_review = True
                flags.transclusion_needed = True
                if nom.status in ['H', '2']:
                    flags.tell_nominator_needed = True

            return flags

        def handle_page_number_change(ctx, state, title, nom, old_nom, name_changes, wbgan, review_stats):
            GAN.log(
                ctx.gan_conn,
                "GANbot:in_cat:page number has changed",
                title,
                f"Two nominations found with different page numbers ({old_nom['page']} and {nom.page_num})",
            )

            last_run_nom = make_nom_from_row(old_nom)

            if last_run_nom.review_page_exists:
                GAN.log(
                    ctx.gan_conn,
                    "GANbot:in_cat:page number has changed",
                    title,
                    "Old nomination has a review page so failing it",
                )
                last_run_nom.new_status_message('F', old_nom['status'])
                last_run_nom.update_status('F')

            t0 = time.perf_counter()
            last_run_nom.save_nomination_state(ctx.gan_conn, name_changes, wbgan, review_stats)
            ctx.prof.add("save_nomination_state", time.perf_counter() - t0, title=title)
            state.active_table_dirty = True

            last_run_nom.tell_nominator(ctx.gan_conn)

            nom.new_status_message(nom.status, None)

            flags = NominationActionFlags()

            if nom.review_page_exists:
                GAN.log(
                    ctx.gan_conn,
                    "GANbot:in_cat:page number has changed",
                    title,
                    "Review page already exists for the new nomination; setting transclusion_needed and new_review to True",
                )
                flags.new_review = True
                flags.transclusion_needed = True
                if nom.status == 'H':
                    flags.tell_nominator_needed = True

            return flags

        def handle_same_page_nomination_change(ctx, nom, old_nom, matches):
            flags = NominationActionFlags()

            if nom.reviewer != old_nom['reviewer'] and nom.reviewer is not None:
                flags.tell_nominator_needed = True
                flags.new_review = True

            if nom.review_page_exists and old_nom['status'] == '':
                GAN.log(
                    ctx.gan_conn,
                    "GANbot:in_cat:match values is False",
                    nom.title,
                    "Nomination has updated parameters, the review page exists, and the old status is blank: setting transclusion_needed and tell_nominator_needed to True",
                )
                flags.tell_nominator_needed = True
                flags.transclusion_needed = True

            if matches['status'] == False:
                nom.new_status_message(nom.status, old_nom['status'])
                nom.update_timestamps(old_nom['status'])
                if nom.status in ['P', 'F', 'H'] and nom.status != old_nom['status']:
                    flags.tell_nominator_needed = True
            else:
                mismatches = [x for x in matches if matches[x] == False]
                mismatches_string = "/".join(mismatches)

                if (nom.reviewer is None or nom.reviewer == '') and old_nom['reviewer'] != '':
                    nom.edit_summary = f"No longer on review [[{nom.title}]]"
                elif not flags.transclusion_needed:
                    if nom.reviewer is not None and nom.reviewer != '' and old_nom['reviewer'] == '':
                        nom.edit_summary = f"On review [[{nom.title}]] by {nom.reviewer}"
                    else:
                        nom.edit_summary = f"Updated [[{nom.title}]] ({mismatches_string})"

            return flags

        def detect_moved_nomination_target(ctx, old_nom):
            old_title = old_nom["title"]
            article_page_id = old_nom.get("article_page_id")

            if article_page_id is None:
                return None

            current_title = Nom.get_title_from_article_page_id(article_page_id)
            if current_title is None:
                return None

            if current_title == old_title:
                return None

            # Do a fresh check on the moved title's talk page rather than relying on
            # ctx.cat_articles_set, which was snapshotted earlier in the run and may
            # be stale if the article was moved after the run began.
            talk_page = pywikibot.Page(ctx.site, "Talk:" + current_title)
            try:
                talk_text = talk_page.text
            except (pywikibot.exceptions.ServerError, pywikibot.exceptions.Error) as e:
                GAN.report_operational_issue(
                    ctx.gan_conn,
                    "GANbot:exceptions",
                    current_title,
                    f"Error reading talk page while checking moved nomination target: {type(e).__name__}: {e}",
                )
                return None

            has_ga_nominee = re.search(
                r"\{\{\s*ga\s*nominee\b", talk_text, flags=re.IGNORECASE
            ) is not None

            if has_ga_nominee:
                return current_title

            # Fall back to a fresh category-membership check on the talk page.
            in_gan_cat = talk_in_gan_category(ctx, talk_page, ctx.GAN_CAT)
            if in_gan_cat:
                return current_title

            return None

        def evaluate_existing_nomination_change(
            ctx,
            state,
            title,
            nom,
            old_nom,
            name_changes,
            wbgan,
            review_stats,
        ):
            matches = nom.compare(old_nom)

            if False in matches.values():
                GAN.log(ctx.gan_conn, "GANbot:in_cat", title, "Pre-existing nomination and something has changed")
                state.update_needed = True
                state.update_reasons.append("Pre-existing nomination and something has changed")
                state.topic_update_needed[nom.topic] = True

                flags = NominationActionFlags(new_nomination_state=True)

                if nom.page_num != str(old_nom['page']):
                    page_change_flags = handle_page_number_change(
                        ctx,
                        state,
                        title,
                        nom,
                        old_nom,
                        name_changes,
                        wbgan,
                        review_stats,
                    )
                    flags.new_review = page_change_flags.new_review
                    flags.transclusion_needed = page_change_flags.transclusion_needed
                    flags.tell_nominator_needed = page_change_flags.tell_nominator_needed
                else:
                    same_page_flags = handle_same_page_nomination_change(ctx, nom, old_nom, matches)
                    flags.new_review = same_page_flags.new_review
                    flags.transclusion_needed = same_page_flags.transclusion_needed
                    flags.tell_nominator_needed = same_page_flags.tell_nominator_needed

                return flags

            if nom.status == '' and nom.review_page_exists:
                GAN.log(
                    ctx.gan_conn,
                    "GANbot:in_cat:match values is True",
                    title,
                    "Nomination has no new parameters but the review page now exists; setting transclusion_needed, new_review, tell_nominator_needed, and new_nomination_state to True",
                )
                flags = NominationActionFlags(
                    new_nomination_state=True,
                    new_review=True,
                    transclusion_needed=True,
                    tell_nominator_needed=True,
                )
                return flags

            return None

        def handle_nomination_still_in_category(
            ctx,
            state,
            title,
            talk_page,
            talk_text,
            has_ga_nominee,
            old_nom,
            noms,
            name_changes,
            wbgan,
            review_stats,
        ):
            GAN.log(ctx.gan_conn, "GANbot:all_articles loop", title, "In the in_cat branch")

            entry_msgs = []

            if not has_ga_nominee:
                report_message(
                    ctx.gan_conn,
                    BotMessage(
                        kind=MessageKind.EDITOR_ERROR,
                        scope=MessageScope.NOMINATION,
                        short_text="In GAN category but GA nominee template missing",
                        source="handle_nomination_still_in_category",
                        title=title,
                        gan_error_text=(
                            f"[[{title}]] is in the GAN category but has no GA nominee template on its talk page\n"
                        ),
                    ),
                )
                return None

            nom = build_nom_from_current_talk_page(ctx, talk_page, title, talk_text, entry_msgs, noms)
            if nom is None:
                return None

            if not nom.review_page_exists and nom.status != '':
                GAN.log(ctx.gan_conn, "GANbot:in_cat", title, "no review page and inconsistent status")
                nom.update_status('')

            if title not in ctx.last_run_articles_set or old_nom is None:
                GAN.log(ctx.gan_conn, "GANbot:in_cat", title, "This is a new nomination")
                flags = handle_new_nomination(ctx, state, nom)
            else:
                GAN.log(ctx.gan_conn, "GANbot:in_cat", title, "This nomination is not new")
                flags = evaluate_existing_nomination_change(
                    ctx,
                    state,
                    title,
                    nom,
                    old_nom,
                    name_changes,
                    wbgan,
                    review_stats,
                )

            if flags is None:
                flags = NominationActionFlags()

            return NominationActionResult(nom=nom, flags=flags)

        def process_title(ctx, state, title, ctr):
            t_title = time.perf_counter()
            GAN.log(ctx.gan_conn, "GANbot:all_articles loop",title,"Starting")
            if ctr % 10 == 0:
                GAN.log(ctx.gan_conn, "GANbot:all_articles loop", None, f"Processed {ctr} articles")
                #print(f"Processed {ctr} articles")
            in_last = title in ctx.last_run_articles_set

            talk_state = get_talk_page_state(ctx, title)
            if talk_state is None:
                return

            talk_page = talk_state.talk_page
            talk_text = talk_state.talk_text
            has_ga_nominee = talk_state.has_ga_nominee
            in_cat = talk_state.in_cat

            old_nom = None
            new_review = False
            transclusion_needed = False
            new_nomination_state = False
            promotion_needed = False
            tell_nominator_needed = False

            if in_last:
                old_nom = ctx.last_run_noms.get(title)
                # Note that old_nom will be None if there was a problem retrieving it.
                # Here if the old_nom is a lower page number than the new nom, we are probably in a situation where the old nom has been passed/failed and a new nom created between runs.
                # This can happen if a review has been found to be invalid and the best thing to do is fail it and renominate.
                # Things to check: two nominee templates?  Page number incremented by 1? If OK, destroy nom and run as if we were just doing the fail pass.  That will get rid of the record
                # in active_nominations and the next pass will pick up the new nom.

            if not in_cat and not in_last:
                # Not in category now and wasn't active last run: nothing to do.
                return
            if in_cat:
                result = handle_nomination_still_in_category(
                    ctx,
                    state,
                    title,
                    talk_page,
                    talk_text,
                    has_ga_nominee,
                    old_nom,
                    noms,
                    name_changes,
                    wbgan,
                    review_stats,
                )
                if result is None:
                    return

                nom = result.nom
                new_nomination_state = result.flags.new_nomination_state
                new_review = result.flags.new_review
                transclusion_needed = result.flags.transclusion_needed
                promotion_needed = result.flags.promotion_needed
                tell_nominator_needed = result.flags.tell_nominator_needed
            else:
                #print("Not in cat, in last run")
                result = handle_closed_or_removed_nomination(ctx, state, title, talk_page, old_nom)
                if result is None:
                    return

                nom = result.nom
                new_nomination_state = result.flags.new_nomination_state
                promotion_needed = result.flags.promotion_needed
                tell_nominator_needed = result.flags.tell_nominator_needed
                transclusion_needed = result.flags.transclusion_needed
                new_review = result.flags.new_review
            # Now we're past all the setup and can do whatever the flags tell us to do.
            #GAN.log(ctx.gan_conn,"GANbot:after setup, about to execute flags",title,"Flags are: " + "transclusion_needed " + str(transclusion_needed) + "; new_review " + str(new_review) + "; new_nomination_state " + str(new_nomination_state) + "; promotion_needed " + str(promotion_needed) + "; tell_nominator_needed " + str(tell_nominator_needed))
            # Note that nom is created in the cat_noms section if it exists there, but if it doesn't it's created in the last_run_noms section
            action_flags = NominationActionFlags(
                new_nomination_state=new_nomination_state,
                new_review=new_review,
                transclusion_needed=transclusion_needed,
                promotion_needed=promotion_needed,
                tell_nominator_needed=tell_nominator_needed,
            )

            execute_nomination_actions(
                ctx,
                state,
                title,
                nom,
                action_flags,
                name_changes,
                wbgan,
                review_stats,
            )
            ctx.prof.add("process_title_total", time.perf_counter() - t_title, title=title)

        #GAN.log(gan_conn, "GANbot:after loop, tracking param counts", "N/A", str(GAN.param_info))
        # Outside the loop, all noms processed, so here we should be able to rebuild some things
        for ctr, title in enumerate(all_articles, start=1):
            process_title(ctx, state, title, ctr)
        if state.active_table_dirty:
            Active_nomination.update_active_nominations(gan_conn) # Reload the active nominations table
        else:
            GAN.log(gan_conn, "GANbot:state", "N/A", "active_nominations rebuild skipped (no changes)")
        Review_stats.update_statistics(gan_conn) # update the GA reviewing stats table in SQL and update the GA reviewing stats page on Wikipedia
        review_stats = Review_stats(gan_conn)   # Refresh the GA reviewing stats dictionary from the database
        # The next loops are going to generate the GAN page
        target = 'Wikipedia'
        sort_order = GA_config.sort_order
        print_list = []
        pledge_list = []

        collapsed = [] # This will hold noms that should be displayed in a collapsed list at the end of each topic
        noms_by_user = {}
        # Get the user information before trying to print anything, since this can change sort order
        for n in noms.noms:
            # As we look at each nomination, add the nominator and reviewer GA and review counts to the user dictionary if they're not already in it.
            #print("About to call update_users; nom is " + n.title)
            n.update_users(gan_conn, users, name_changes, review_stats, wbgan)
            # Build a dictionary of noms by user as we go for use in building the collapse list
            if n.nominator in noms_by_user:
                noms_by_user[n.nominator].append({'title': n.title, 'nomination_ts': n.nomination_ts})
            else:
                noms_by_user[n.nominator] = [{'title': n.title, 'nomination_ts': n.nomination_ts}]

        # Sort each user's list by the nomination date
        for u in noms_by_user:
            noms_by_user[u] = sorted(noms_by_user[u], key=lambda d: d['nomination_ts'])
            if len(noms_by_user[u]) > 20:
                add_to_collapse = noms_by_user[u][20:]
                for c in add_to_collapse:
                    collapsed.append(c['title'])

        #  If something is in collapsed, don't print it, put it in collapsed_topic.  Then at the end of the topic, print those inside a collapse template.

        # Now assemble the GARs
        # Each row is of the form  # {{GARentry|Terry Pratchett|1}} GA nominator: [[User:Example]]; GAR created: <date>
        # Create a dictionary with {'<subtopic>': {article_title: {GARpage=, orignom= , GARdate= }}
        # Then in the print code, add the GARs after the collapsed sections
        try:
            gar_dict = GAH.get_gar_dict(gan_conn, config)
        except (pywikibot.exceptions.ServerError,
                pywikibot.exceptions.APIError,
                pywikibot.exceptions.Error) as e:
            GAN.report_operational_issue(
                gan_conn,
                "GANbot:exceptions",
                "GAR dictionary",
                f"Error building GAR dictionary: {type(e).__name__}: {e}",
            )
            gar_dict = {}

        for k in topics: # We print the sections in topic order
            #print("In topic loop for " + k)
            topic = topics[k]
            topic_print_list = []
            topic_update_log = state.topic_PFX_log[k]
            #print("Printing topic " + topic.name)
            topic_noms = [x for x in noms.noms if x.topic == topic.name]
            print_list.append(topic.header(target))
            topic_print_list.append(topic.header(target))
            for subtopic in topic.subtopics:
                #print("Printing subtopic " + subtopic.name)
                subtopic_noms = [x for x in topic_noms if x.subtopic == subtopic.name]
                print_list.append(subtopic.section_header())
                topic_print_list.append(subtopic.section_header())
                #print("About to sort" + subtopic.name)
                subtopic_noms = sorted(subtopic_noms, key=operator.attrgetter(sort_order), reverse=False)
                #print("Sorted " + subtopic.name)
                subtopic_gars = []
                if subtopic.name in gar_dict:
                    subtopic_gars = gar_dict[subtopic.name]
                collapsed_topic = []
                for nom in subtopic_noms:
                    #print(" In first for loop, printing " + nom.title + "; edit_summary is " + nom.edit_summary)
                    if nom.title in collapsed:
                        #print("putting " + nom.title + " in collapsed")
                        collapsed_topic.append(nom)
                    else:
                        #print(nom.title + " is not in collapsed")
                        nom_text = nom.print_GAN_entry()
                        print_list.append(nom_text)
                        topic_print_list.append(nom_text)
                        if nom.has_a_pledge() and nom.status not in ('H','R','2'):
                            pledge_list.append(nom_text)
                        if nom.edit_summary != "No change" and nom.edit_summary != "":
                            state.update_log.append(nom.edit_summary)
                            topic_update_log.append(nom.edit_summary)
                            if nom.has_a_pledge():
                                state.pledge_update_log.append(nom.edit_summary)
                #print("At end of first for for " + subtopic.name + "; collapsed_topic has length " + len(collapsed_topic))
                if len(collapsed_topic) > 0:
                    #print("In the collapsed_topic if")
                    print_list.append("{{cot|Additional nominations}}")
                    print_list.append("* These are not displayed as the nominator currently has more than twenty active nominations")
                    topic_print_list.append("{{cot|Additional nominations}}")
                    topic_print_list.append("* These are not displayed as the nominator currently has more than twenty active nominations")
                    for nom in collapsed_topic:
                        #print("In the for loop inside collapsed topic")
                        nom_text = nom.print_GAN_entry()
                        print_list.append(nom_text)
                        topic_print_list.append(nom_text)
                        if nom.edit_summary != "No change" and nom.edit_summary != "":
                            state.update_log.append(nom.edit_summary)
                            topic_update_log.append(nom.edit_summary)
                            if nom.has_a_pledge():
                                state.pledge_update_log.append(nom.edit_summary)
                    print_list.append("{{cob}}")
                    topic_print_list.append("{{cob}}")
                if len(subtopic_gars) > 0:
                    #print_list.append("=== " + subtopic.name + " reassessments ===\n")
                    print_list.append("==== Reassessments ====\n")
                    gars = gar_dict[subtopic.name]
                    gars = sorted(gars, key = lambda d: d[1]['GARdate'])
                    for gar in subtopic_gars:
                        gar_line = "# {{GARentry|" + gar[0] + "|" + gar[1]['GARpage']
                        shortdesc = gar[1]['shortdesc']
                        if shortdesc is not None:
                            gar_line += "|shortdesc=" + shortdesc
                        gar_line += "}}"
                        orignom = gar[1]['orignom']
                        if orignom is not None:
                            gar_line += " GA nominator: [[User:" + orignom + "|" + orignom + "]]"
                            user = pywikibot.User(site, "User:" + orignom)
                            if user is not None:
                                try:
                                    ule = user.last_edit
                                except pywikibot.exceptions.APIError as e:
                                    GAN.report_operational_issue(
                                        gan_conn,
                                        "GANbot:exceptions",
                                        gar[0],
                                        f"API error getting GAR nominator's last edits for nominator {orignom}: {type(e).__name__}: {e}",
                                    )
                                    ule = None
                                if ule is not None:
                                    ule_ts = ule[2]
                                    ule_dt = GAH.convert_timestamp_to_datetime(ule_ts)
                                    ule_delta = datetime.datetime.utcnow() - ule_dt
                                    if ule_delta.days > 21:
                                        gar_line += " (inactive for " + str(ule_delta.days) + " days)"
                                    gar_line += "."
                        GARdate = gar[1]['GARdate']
                        if GARdate is not None:
                            gar_line += " GAR created: " + GARdate.strftime("%H:%M, %-d %B %Y (UTC)")
                        print_list.append(gar_line)
            # Save topic page
            page = pywikibot.Page(site, GA_config.strings['GA topic pages base'] + topic.name)
            topic_page_text = '\n'.join(topic_print_list)
            page.text = topic_page_text
            topic_update_text = "\n".join(topic_update_log)
            if len(topic_update_log) == 0:
                topic_update_text = "Update metrics"
            if state.topic_update_needed[topic.name]:
                t0 = time.perf_counter()
                if save_page_report_operational(
                    gan_conn,
                    page,
                    topic_update_text,
                    "GANbot:exceptions",
                    GA_config.strings['GA topic pages base'] + topic.name,
                    "Error saving topic page",
                ):
                    prof.add("save_topic_page", time.perf_counter() - t0)

        t0 = time.perf_counter()
        GAN.write_backlog_by_sort_order(gan_conn, noms.noms, sort_order)
        prof.add("write_backlog_by_sort_order", time.perf_counter() - t0)
        t0 = time.perf_counter()
        GAN.mark_superseded_reviews(gan_conn)
        prof.add("mark_superseded_reviews", time.perf_counter() - t0)
        GAN.log(gan_conn,"GANbot", "N/A","About to run integrity checks")
        t0 = time.perf_counter()
        GAN.integrity_checks(gan_conn)
        prof.add("integrity_checks", time.perf_counter() - t0)

        error_edit_summary = ''
        current_errors_text = ''
        def format_errors_list(errors):
            cleaned = []
            for e in errors:
                if not e:
                    continue
                line = e.strip()
                # Remove any leading bullet if already present
                if line.startswith("*"):
                    line = line[1:].strip()
                cleaned.append(f"* {line}")
            return "\n".join(cleaned)

        if len(GA_config.current_errors) > 0:
            current_errors_text = format_errors_list(GA_config.current_errors)
            error_edit_summary = "[[" + GA_config.strings['GAN errors page'] + "|Errors listed!]] "

        page_text = '\n'.join(print_list)
        page = pywikibot.Page(site,GA_config.strings['GAN page'])
        page.text=page_text
        page.text = GA_config.strings['GAN top text'] + page_text + GA_config.strings['GAN bottom text']

        update_text = "\n".join(state.update_log)
        if len(state.update_log) == 0:
            update_text = "Update metrics"

        pledge_page = pywikibot.Page(site, GA_config.strings['Open pledges page'])
        pledge_page_text = '\n'.join(pledge_list)
        pledge_page.text = pledge_page_text
        pledge_update_text = "\n".join(state.pledge_update_log)
        if pledge_update_text == '':
            pledge_update_text = "Updating list of open pledges"

        #state.update_needed = True       # Uncomment this to force a page update to test layout and sort changes; the page will not update otherwise unless a nomination changes status in some way.
        GAN.log(gan_conn,"GANbot", "N/A","About to save main GAN page")
        if state.update_needed:
            t0 = time.perf_counter()
            if save_page_report_operational(
                gan_conn,
                page,
                error_edit_summary + update_text,
                "GANbot:exceptions",
                GA_config.strings['GAN page'],
                "Error saving main GAN page",
            ):
                prof.add("save main GAN page", time.perf_counter() - t0)

            t0 = time.perf_counter()
            if save_page_report_operational(
                gan_conn,
                pledge_page,
                pledge_update_text,
                "GANbot:exceptions",
                GA_config.strings['Open pledges page'],
                "Error saving pledge page",
            ):
                prof.add("save pledge page", time.perf_counter() - t0)

        GAN.log(gan_conn,"GANbot", "N/A","About to write errors page")
        GAN.write_errors_page(current_errors_text)

        GAN.log(gan_conn, "GANbot", "N/A", "About to write bugs page")
        if len(GA_config.current_bug_messages) > 0:
            bug_page_text = "\n\n".join(GA_config.current_bug_messages)
        else:
            bug_page_text = ""
        GAN.write_bugs_page(bug_page_text)

        GAN.log(gan_conn, "GANbot", "N/A", "About to update operational status page")
        if not GA_config.transient_operational_issue:
            GAN.clear_operational_status()

        GAN.log(gan_conn,"GANbot", "N/A","About to write sortable table")
        # Now write the sortable table
        t0 = time.perf_counter()
        print_list = []
        print_list.append('{| class="wikitable sortable"')
        print_list.append('!Article')
        print_list.append('!Status')
        print_list.append('!Nominator')
        print_list.append('!Subtopic')
        print_list.append('!{{Tooltip|Age|Age in days}}')
        print_list.append('!{{Tooltip|Revs|Reviews}}')
        print_list.append('!{{Tooltip|GAs|Promoted GAs}}')
        print_list.append('!{{Tooltip|R/G|Reviews per GA}}')
        print_list.append('!Note')
        for n in noms.noms:
            print_list.append("|-")
            print_list.append("|[[" + n.title + "]]")
            if n.status == "":
                print_list.append("|")
            else:
                print_list.append("|" + n.status_string)
            print_list.append("| [[User:" + n.nominator + "|" + n.nominator + "]]")
            print_list.append("|[[Wikipedia:Good article nominations#" + n.subtopic + "|" + n.subtopic + "]]")
            print_list.append("|" + str(n.age_in_days))
            print_list.append("|" + str(n.nominator_reviews))
            print_list.append("|" + str(n.nominator_GAs))
            print_list.append("|" + str(n.R_over_G))
            print_list.append("|" + n.note)

        print_list.append("|}")
        sortable_table_text = "\n".join(print_list)
        prof.add("build sortable table", time.perf_counter() - t0)
        page = pywikibot.Page(site,GA_config.strings['GAN as a single table']) 
        page.text=sortable_table_text
        if state.update_needed:
            t0 = time.perf_counter()
            if save_page_report_operational(
                gan_conn,
                page,
                error_edit_summary + update_text,
                "GANbot:exceptions",
                GA_config.strings['GAN as a single table'],
                "Error saving sortable GAN page",
            ):
                prof.add("save sortable GAN page", time.perf_counter() - t0)
        run_ok = True
    except Exception as e:
        _fatal("main run", e, gan_conn)
        sys.exit(1)
    finally:
        elapsed = time.perf_counter() - start_time

        runtime_log_counters = getattr(GA_config, "runtime_log_counters", False)
        runtime_log_profile_summary = getattr(GA_config, "runtime_log_profile_summary", False)

        status = "OK" if run_ok else "FAILED"

        print(
            "[RUN_END] "
            f"status={status} "
            f"runtime_s={elapsed:.2f} "
            f"saves={GA.SAVES_ATTEMPTED}/{GA.SAVES_SUCCEEDED}/{GA.SAVES_FAILED} "
            f"op_issues={1 if GA_config.transient_operational_issue else 0} "
            f"wiki_errors={len(GA_config.current_errors)} "
            f"bug_msgs={len(GA_config.current_bug_messages)}",
            flush=True,
        )

        if runtime_log_counters:
            print(
                f"[FAST_PARSE] used={GAN.fast_parse_used} fallback={GAN.fast_parse_fallback}",
                flush=True,
            )

        if runtime_log_profile_summary:
            print(prof.report(), flush=True)

        try:
            if gan_conn is not None:
                gan_conn.close()
        except Exception:
            pass


if __name__ == "__main__":
    main()

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.