User:ChristieBot/GANbot.py
'''
Copyright (c) 2022 Mike Christie
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
'''
# Third party modules
import pywikibot
pywikibot.config.max_retries=2
import re
import time
from collections import defaultdict
import heapq
from dataclasses import dataclass, field
from Messages import MessageKind, MessageScope, BotMessage, report_message
@dataclass
class BotContext:
gan_conn: any
site: any
GAN_CAT: str
last_run_noms: dict
last_run_articles_set: set
cat_articles_set: set
subtopics: dict
prof: any
@dataclass
class RunState:
update_needed: bool = False
update_reasons: list[str] = field(default_factory=list)
active_table_dirty: bool = False
topic_update_needed: dict = field(default_factory=dict)
topic_PFX_log: dict = field(default_factory=dict)
update_log: list[str] = field(default_factory=lambda: [''])
pledge_update_log: list[str] = field(default_factory=lambda: [''])
@dataclass
class NominationActionFlags:
new_nomination_state: bool = False
new_review: bool = False
transclusion_needed: bool = False
promotion_needed: bool = False
tell_nominator_needed: bool = False
@dataclass
class NominationActionResult:
nom: any
flags: NominationActionFlags
@dataclass
class TalkPageState:
talk_page: any
talk_text: str
has_ga_nominee: bool
in_cat: bool
class RunProfiler:
def __init__(self, topn=10):
self.totals = defaultdict(float)
self.counts = defaultdict(int)
self.top = defaultdict(list) # label -> min-heap of (elapsed, title)
self.topn = topn
def add(self, label, elapsed, title=None):
self.totals[label] += elapsed
self.counts[label] += 1
if title is not None:
h = self.top[label]
item = (elapsed, title)
if len(h) < self.topn:
heapq.heappush(h, item)
else:
if elapsed > h[0][0]:
heapq.heapreplace(h, item)
def report(self):
lines = ["[PROFILE] totals / avg_ms / count"]
for label, total in sorted(self.totals.items(), key=lambda kv: kv[1], reverse=True):
n = self.counts[label]
avg_ms = (total / n * 1000.0) if n else 0.0
lines.append(f"[PROFILE] {label}: total={total:.2f}s avg={avg_ms:.1f}ms n={n}")
for label, h in self.top.items():
if not h:
continue
slow = sorted(h, reverse=True)
lines.append(f"[PROFILE] slowest {label}:")
for t, title in slow[: self.topn]:
lines.append(f"[PROFILE] {t:.2f}s {title}")
return "\n".join(lines)
import datetime
import sys
import os
import pymysql
import configparser
import operator
import mwparserfromhell
import traceback
# Local modules
sys.path.append('./www/python/src') # Not needed if I run from that directory
import GA
from GA import (
Topic,
Subtopic,
Nom,
Review_stats,
WBGAN,
Active_nomination,
GAN,
Name_changes,
Nom_list,
save_page,
save_page_report_operational,
)
import GA_config
from GA_history import GAH, FailedGA, GAnominee, Article_History, GA_article_page, GARlink, GA_talk_page, GA_sub_page, GA_history_Exception, GAO
# Config helpers
def load_replica_config():
HOME = os.environ.get('HOME') # get environment variable $HOME
replica_path = os.path.join(HOME, 'replica.my.cnf') if HOME else os.path.expanduser('~/replica.my.cnf')
# Read Toolforge replica credentials
config = configparser.ConfigParser()
if not os.path.exists(replica_path):
print(f"[FATAL] replica.my.cnf file not found at: {replica_path}")
print(" Create it per Toolforge docs, and ensure it contains a [client] section with user/password.")
sys.exit(2)
config.read(replica_path)
# Validate expected keys early so failures are clear.
if 'client' not in config or 'user' not in config['client'] or 'password' not in config['client']:
print(f"[FATAL] Invalid credentials file format in: {replica_path}")
print(" Expected a [client] section containing user=... and password=...")
sys.exit(2)
return config
# Set up the connection to the GAN database
database = "s55175__ganfilter"
host = "tools.db.svc.eqiad.wmflabs"
def get_gan_conn(cfg):
try:
return pymysql.connections.Connection(
user=cfg['client']['user'],
password=cfg['client']['password'],
database=database,
host=host
)
except Exception as e:
print(f"[FATAL] Could not connect to GAN database '{database}' on host '{host}'.")
print(f" Error: {type(e).__name__}: {e}")
sys.exit(3)
def _fatal(stage, exc, gan_conn=None):
# Print a clear message for Toolforge filelog / failure emails
print(f"[FATAL] Unhandled exception during: {stage}")
print(f" {type(exc).__name__}: {exc}")
traceback.print_exc()
# Best-effort write to audit log (won't mask the original exception if logging fails)
if gan_conn is not None:
try:
GAN.log(gan_conn, "GANbot", stage, f"{type(exc).__name__}: {exc}")
except Exception:
pass
def main():
start_time = time.perf_counter()
gan_conn = None
run_ok = False
prof = RunProfiler(topn=10)
try:
config = load_replica_config()
gan_conn = get_gan_conn(config)
# Create an initial copy of the review stats. This will be used to provide the values to store in the nomination audit trail so that we know how many GAs and reviews were
# showing on the GAN page when the review was started.
t0 = time.perf_counter()
review_stats = Review_stats(gan_conn) # Refresh the GA reviewing stats dictionary from the database
prof.add("review stats", time.perf_counter() - t0)
# Create the name changes lookup from the database
users = {}
t0 = time.perf_counter()
name_changes = Name_changes.get_name_changes(gan_conn)
prof.add("get name changes", time.perf_counter() - t0)
#t = datetime.datetime.utcnow()
GAN.log(gan_conn, "GANbot","Initializing","Starting run")
# Initialize some variables
GA_config.transient_operational_issue = False
GA_config.current_bug_messages = []
site = pywikibot.Site('en','wikipedia')
GAN_CAT = "Category:Good article nominees"
t0 = time.perf_counter()
wbgan = WBGAN.get_wbgan(config, gan_conn) # The WBGAN database that holds the number of promoted GAs by each nominator. Maintained by another tool.
prof.add("get WBGAN", time.perf_counter() - t0)
#GAN.log(gan_conn, None, "WBGAN","GAN count for History6042 is " + str(wbgan.get_GA_count("History6042")))
[topics, subtopics] = Topic.initialize()
# Clear the error page
GAN.clear_errors()
single_title = GA_config.single_title # If is_live = False, then single_title can be used to restrict a run to a single article to speed up testing of changes
# Next step is get the list of articles we have to look at in this pass. This is the union of two lists:
# 1. All the articles that currently have a {{GA nominee}} template on their talk page; and
# 2. All articles currently in the active_nominations table.
cat_articles = set()
cat = pywikibot.Category(site,'Good article nominees') # We're going to iterate over these; this is the contents of GAN.
try:
for x in cat.articles():
title = x.title()[5:]
cat_articles.add(title)
except (pywikibot.exceptions.ServerError, pywikibot.exceptions.APIError, pywikibot.exceptions.Error) as e:
GAN.report_operational_issue(
gan_conn,
"GANbot:exceptions",
"Category:Good article nominees",
f"Error scanning cat.articles(): {type(e).__name__}: {e}",
)
raise
GAN.log(gan_conn, "GANbot", "Process active nominations table", "Starting")
t0 = time.perf_counter()
last_run_noms = Active_nomination.get_active_nominations_dict(gan_conn) # title -> row dict
prof.add("get active nominations", time.perf_counter() - t0)
last_run_articles_set = set(last_run_noms.keys())
ctx = BotContext(
gan_conn=gan_conn,
site=site,
GAN_CAT=GAN_CAT,
last_run_noms=last_run_noms,
last_run_articles_set=last_run_articles_set,
cat_articles_set=cat_articles,
subtopics=subtopics,
prof=prof,
)
GAN.log(gan_conn, "GANbot", "Process active nominations table",
f"Obtained {len(ctx.last_run_articles_set)} active nominations")
all_articles = sorted(cat_articles | ctx.last_run_articles_set) # Stable order for repeatable runs
GAN.log(gan_conn, "GANbot", "Process active nominations table", "Created list of articles")
state = RunState()
for t in topics:
state.topic_update_needed[t] = False
state.topic_PFX_log[t] = ['']
noms = Nom_list() # This will be loaded with the nominations that will print on GAN.
GAN.log(gan_conn, "GANbot", "all_articles loop", f"About to start loop over {len(all_articles)} articles")
def talk_in_gan_category(ctx, talk_page, gan_cat_title):
"""
Fresh check: is talk_page currently in gan_cat_title?
Faster: scan categories and early-exit on match (no set building).
"""
try:
for c in talk_page.categories(with_sort_key=False, total=None, content=False):
c_title = c.title() if hasattr(c, "title") else str(c)
if c_title == gan_cat_title:
return True
return False
except (pywikibot.exceptions.ServerError, pywikibot.exceptions.Error) as e:
GAN.report_operational_issue(
ctx.gan_conn,
"GANbot:exceptions",
talk_page.title(),
f"Error checking GAN category membership: {type(e).__name__}: {e}",
)
return False
def get_talk_page_state(ctx, title):
talk_page = pywikibot.Page(ctx.site, "Talk:" + title)
try:
t0 = time.perf_counter()
talk_text = talk_page.text
ctx.prof.add("talk_page_text", time.perf_counter() - t0, title=title)
except (pywikibot.exceptions.ServerError, pywikibot.exceptions.Error) as e:
GAN.report_operational_issue(
ctx.gan_conn,
"GANbot:exceptions",
title,
f"Error reading talk page text: {type(e).__name__}: {e}",
)
return None
has_ga_nominee = re.search(
r"\{\{\s*ga\s*nominee\b", talk_text, flags=re.IGNORECASE
) is not None
if has_ga_nominee:
in_cat = True
else:
t0 = time.perf_counter()
in_cat = talk_in_gan_category(ctx, talk_page, ctx.GAN_CAT)
ctx.prof.add("talk_in_gan_category", time.perf_counter() - t0, title=title)
return TalkPageState(
talk_page=talk_page,
talk_text=talk_text,
has_ga_nominee=has_ga_nominee,
in_cat=in_cat,
)
def topic_for_subtopic(subtopic):
obj = subtopics.get(subtopic)
return obj.topic.name if obj else 'Miscellaneous'
def make_nom_from_params(params, title, entry_msgs):
nominator = Nom.clean_user_name(params['nominator'])
status = params['status']
page_num = params['page']
subtopic = params['subtopic']
# normalize subtopic variants
if isinstance(subtopic, str) and subtopic.lower() in Subtopic.subtopic_var_dict:
subtopic = Subtopic.subtopic_var_dict[subtopic.lower()]
topic = topic_for_subtopic(subtopic)
return Nom(
gan_conn,
topic,
subtopic,
title,
status,
page_num,
nominator,
params['timestamp'],
params['note'],
params['shortdesc'],
msgs=entry_msgs,
populate_article_page_id=True,
prof=ctx.prof,
)
def make_nom_from_row(row, entry_msgs=None):
subtopic = row.get('subtopic') or ''
topic = 'Miscellaneous'
if subtopic in subtopics:
topic = subtopics[subtopic].topic.name
return Nom(
gan_conn,
topic,
subtopic,
row['title'],
row['status'],
row['page'],
row['nominator'],
row['nomination_ts'],
row.get('note') or '',
row.get('shortdesc') or '',
msgs=entry_msgs or [],
article_page_id=row.get('article_page_id'),
prof=ctx.prof,
)
def build_nom_from_current_talk_page(ctx, talk_page, title, talk_text, entry_msgs, noms):
# Fast check: count GA nominee templates via regex first (cheap).
# If it looks like there might be >1, confirm with mwparserfromhell (safer).
ga_nominee_count = len(re.findall(r"\{\{\s*ga\s*nominee\b", talk_text, flags=re.IGNORECASE))
if ga_nominee_count > 1:
# Confirm via parser to avoid false positives (comments/nowiki/etc.)
t_ctr = 0
code = mwparserfromhell.parse(talk_text)
for t in code.filter_templates():
if t.name.strip().lower() == 'ga nominee':
t_ctr += 1
else:
t_ctr = ga_nominee_count
if t_ctr > 1:
report_message(
ctx.gan_conn,
BotMessage(
kind=MessageKind.EDITOR_ERROR,
scope=MessageScope.NOMINATION,
short_text="Multiple GA nominee templates on talk page",
source="build_nom_from_current_talk_page",
title=title,
nomination_msgs=entry_msgs,
gan_error_text=f"Multiple GA nomination templates found on talk page for [[{title}]]",
),
)
return None
t0 = time.perf_counter()
params = GAN.get_params(
ctx.gan_conn,
talk_page,
article_text=talk_text,
nomination_msgs=entry_msgs,
prof=ctx.prof,
)
ctx.prof.add("get_params", time.perf_counter() - t0, title=title)
GAN.log(ctx.gan_conn, "GANbot:all_articles loop", title, f"Retrieved params: {params}")
if params is None:
report_message(
ctx.gan_conn,
BotMessage(
kind=MessageKind.EDITOR_ERROR,
scope=MessageScope.NOMINATION,
short_text="Malformed GA nominee template",
source="build_nom_from_current_talk_page",
title=title,
nomination_msgs=entry_msgs,
gan_error_text=f"Malformed GA nomination for [[{title}]]",
),
)
return None
GAN.check_params(params, title, nomination_msgs=entry_msgs)
nom = make_nom_from_params(params, title, entry_msgs)
noms.add(nom)
return nom
def execute_nomination_actions(
ctx,
state,
title,
nom,
flags,
name_changes,
wbgan,
review_stats,
):
if flags.transclusion_needed:
t0 = time.perf_counter()
nom.transclude(ctx.gan_conn)
ctx.prof.add("transclude", time.perf_counter() - t0, title=title)
if flags.new_review:
nom.add_a_review(ctx.gan_conn)
if flags.new_nomination_state:
t0 = time.perf_counter()
nom.save_nomination_state(ctx.gan_conn, name_changes, wbgan, review_stats)
ctx.prof.add("save_nomination_state", time.perf_counter() - t0, title=title)
state.active_table_dirty = True
if nom.status == 'F':
t0 = time.perf_counter()
nom.add_failedga_oldid(ctx.gan_conn)
ctx.prof.add("add_failedga_oldid", time.perf_counter() - t0, title=title)
if flags.promotion_needed:
nom.add_GA_star(ctx.gan_conn) # Also updates the oldid if needed
if flags.tell_nominator_needed:
GAN.log(ctx.gan_conn, "tell_nominator", title, "Calling nom.tell_nominator()")
nom.tell_nominator(ctx.gan_conn)
def handle_closed_or_removed_nomination(ctx, state, title, talk_page, old_nom):
row = old_nom
if row is None:
GAN.log(ctx.gan_conn, "GANbot:in_last", title, "old_nom missing from bulk cache; falling back to DB lookup")
row = Active_nomination.get_active_nomination(ctx.gan_conn, title)
if row is None:
GAN.log(ctx.gan_conn, "GANbot:in_last", title, "DB lookup also failed; skipping")
return None
moved_title = detect_moved_nomination_target(ctx, row)
if moved_title is not None:
GAN.log(
ctx.gan_conn,
"GANbot:in_last",
title,
f"Detected moved active review from {title} to {moved_title}; suppressing fail/remove handling",
)
return None
state.update_needed = True
state.update_reasons.append("Not in cat, in last run")
nom = make_nom_from_row(row)
flags = NominationActionFlags(new_nomination_state=True)
new_status = 'F'
flags.tell_nominator_needed = True
if Nom.is_a_GA(talk_page):
new_status = 'P'
flags.promotion_needed = True
flags.tell_nominator_needed = True
elif not nom.review_page_exists:
new_status = 'X'
flags.tell_nominator_needed = False
if old_nom['status'] == '' and new_status != 'X':
flags.transclusion_needed = True
flags.new_review = True
nom.new_status_message(new_status, old_nom['status'])
nom.update_status(new_status)
state.update_log.append(nom.edit_summary)
state.topic_PFX_log[nom.topic].append(nom.edit_summary)
return NominationActionResult(nom=nom, flags=flags)
def handle_new_nomination(ctx, state, nom):
prior_nom = Active_nomination.get_active_nomination_by_article_page_id(
ctx.gan_conn,
nom.article_page_id,
)
if prior_nom is not None and prior_nom["title"] != nom.title:
GAN.log(
ctx.gan_conn,
"GANbot:in_cat",
nom.title,
f"Detected moved active review from {prior_nom['title']} to {nom.title}; suppressing new-review notification",
)
state.update_needed = True
state.update_reasons.append("Active review moved to a new title")
state.topic_update_needed[nom.topic] = True
# Save the new active nomination state, but do not notify or transclude as if a new review started.
return NominationActionFlags(new_nomination_state=True)
state.update_needed = True
state.update_reasons.append("This is a new nomination")
state.topic_update_needed[nom.topic] = True
flags = NominationActionFlags(new_nomination_state=True)
nom.new_status_message(nom.status, None)
if nom.review_page_exists:
GAN.log(
ctx.gan_conn,
"GANbot:in_cat",
nom.title,
"New nomination and review page already exists; setting transclusion_needed and new_review to True",
)
flags.new_review = True
flags.transclusion_needed = True
if nom.status in ['H', '2']:
flags.tell_nominator_needed = True
return flags
def handle_page_number_change(ctx, state, title, nom, old_nom, name_changes, wbgan, review_stats):
GAN.log(
ctx.gan_conn,
"GANbot:in_cat:page number has changed",
title,
f"Two nominations found with different page numbers ({old_nom['page']} and {nom.page_num})",
)
last_run_nom = make_nom_from_row(old_nom)
if last_run_nom.review_page_exists:
GAN.log(
ctx.gan_conn,
"GANbot:in_cat:page number has changed",
title,
"Old nomination has a review page so failing it",
)
last_run_nom.new_status_message('F', old_nom['status'])
last_run_nom.update_status('F')
t0 = time.perf_counter()
last_run_nom.save_nomination_state(ctx.gan_conn, name_changes, wbgan, review_stats)
ctx.prof.add("save_nomination_state", time.perf_counter() - t0, title=title)
state.active_table_dirty = True
last_run_nom.tell_nominator(ctx.gan_conn)
nom.new_status_message(nom.status, None)
flags = NominationActionFlags()
if nom.review_page_exists:
GAN.log(
ctx.gan_conn,
"GANbot:in_cat:page number has changed",
title,
"Review page already exists for the new nomination; setting transclusion_needed and new_review to True",
)
flags.new_review = True
flags.transclusion_needed = True
if nom.status == 'H':
flags.tell_nominator_needed = True
return flags
def handle_same_page_nomination_change(ctx, nom, old_nom, matches):
flags = NominationActionFlags()
if nom.reviewer != old_nom['reviewer'] and nom.reviewer is not None:
flags.tell_nominator_needed = True
flags.new_review = True
if nom.review_page_exists and old_nom['status'] == '':
GAN.log(
ctx.gan_conn,
"GANbot:in_cat:match values is False",
nom.title,
"Nomination has updated parameters, the review page exists, and the old status is blank: setting transclusion_needed and tell_nominator_needed to True",
)
flags.tell_nominator_needed = True
flags.transclusion_needed = True
if matches['status'] == False:
nom.new_status_message(nom.status, old_nom['status'])
nom.update_timestamps(old_nom['status'])
if nom.status in ['P', 'F', 'H'] and nom.status != old_nom['status']:
flags.tell_nominator_needed = True
else:
mismatches = [x for x in matches if matches[x] == False]
mismatches_string = "/".join(mismatches)
if (nom.reviewer is None or nom.reviewer == '') and old_nom['reviewer'] != '':
nom.edit_summary = f"No longer on review [[{nom.title}]]"
elif not flags.transclusion_needed:
if nom.reviewer is not None and nom.reviewer != '' and old_nom['reviewer'] == '':
nom.edit_summary = f"On review [[{nom.title}]] by {nom.reviewer}"
else:
nom.edit_summary = f"Updated [[{nom.title}]] ({mismatches_string})"
return flags
def detect_moved_nomination_target(ctx, old_nom):
old_title = old_nom["title"]
article_page_id = old_nom.get("article_page_id")
if article_page_id is None:
return None
current_title = Nom.get_title_from_article_page_id(article_page_id)
if current_title is None:
return None
if current_title == old_title:
return None
# Do a fresh check on the moved title's talk page rather than relying on
# ctx.cat_articles_set, which was snapshotted earlier in the run and may
# be stale if the article was moved after the run began.
talk_page = pywikibot.Page(ctx.site, "Talk:" + current_title)
try:
talk_text = talk_page.text
except (pywikibot.exceptions.ServerError, pywikibot.exceptions.Error) as e:
GAN.report_operational_issue(
ctx.gan_conn,
"GANbot:exceptions",
current_title,
f"Error reading talk page while checking moved nomination target: {type(e).__name__}: {e}",
)
return None
has_ga_nominee = re.search(
r"\{\{\s*ga\s*nominee\b", talk_text, flags=re.IGNORECASE
) is not None
if has_ga_nominee:
return current_title
# Fall back to a fresh category-membership check on the talk page.
in_gan_cat = talk_in_gan_category(ctx, talk_page, ctx.GAN_CAT)
if in_gan_cat:
return current_title
return None
def evaluate_existing_nomination_change(
ctx,
state,
title,
nom,
old_nom,
name_changes,
wbgan,
review_stats,
):
matches = nom.compare(old_nom)
if False in matches.values():
GAN.log(ctx.gan_conn, "GANbot:in_cat", title, "Pre-existing nomination and something has changed")
state.update_needed = True
state.update_reasons.append("Pre-existing nomination and something has changed")
state.topic_update_needed[nom.topic] = True
flags = NominationActionFlags(new_nomination_state=True)
if nom.page_num != str(old_nom['page']):
page_change_flags = handle_page_number_change(
ctx,
state,
title,
nom,
old_nom,
name_changes,
wbgan,
review_stats,
)
flags.new_review = page_change_flags.new_review
flags.transclusion_needed = page_change_flags.transclusion_needed
flags.tell_nominator_needed = page_change_flags.tell_nominator_needed
else:
same_page_flags = handle_same_page_nomination_change(ctx, nom, old_nom, matches)
flags.new_review = same_page_flags.new_review
flags.transclusion_needed = same_page_flags.transclusion_needed
flags.tell_nominator_needed = same_page_flags.tell_nominator_needed
return flags
if nom.status == '' and nom.review_page_exists:
GAN.log(
ctx.gan_conn,
"GANbot:in_cat:match values is True",
title,
"Nomination has no new parameters but the review page now exists; setting transclusion_needed, new_review, tell_nominator_needed, and new_nomination_state to True",
)
flags = NominationActionFlags(
new_nomination_state=True,
new_review=True,
transclusion_needed=True,
tell_nominator_needed=True,
)
return flags
return None
def handle_nomination_still_in_category(
ctx,
state,
title,
talk_page,
talk_text,
has_ga_nominee,
old_nom,
noms,
name_changes,
wbgan,
review_stats,
):
GAN.log(ctx.gan_conn, "GANbot:all_articles loop", title, "In the in_cat branch")
entry_msgs = []
if not has_ga_nominee:
report_message(
ctx.gan_conn,
BotMessage(
kind=MessageKind.EDITOR_ERROR,
scope=MessageScope.NOMINATION,
short_text="In GAN category but GA nominee template missing",
source="handle_nomination_still_in_category",
title=title,
gan_error_text=(
f"[[{title}]] is in the GAN category but has no GA nominee template on its talk page\n"
),
),
)
return None
nom = build_nom_from_current_talk_page(ctx, talk_page, title, talk_text, entry_msgs, noms)
if nom is None:
return None
if not nom.review_page_exists and nom.status != '':
GAN.log(ctx.gan_conn, "GANbot:in_cat", title, "no review page and inconsistent status")
nom.update_status('')
if title not in ctx.last_run_articles_set or old_nom is None:
GAN.log(ctx.gan_conn, "GANbot:in_cat", title, "This is a new nomination")
flags = handle_new_nomination(ctx, state, nom)
else:
GAN.log(ctx.gan_conn, "GANbot:in_cat", title, "This nomination is not new")
flags = evaluate_existing_nomination_change(
ctx,
state,
title,
nom,
old_nom,
name_changes,
wbgan,
review_stats,
)
if flags is None:
flags = NominationActionFlags()
return NominationActionResult(nom=nom, flags=flags)
def process_title(ctx, state, title, ctr):
t_title = time.perf_counter()
GAN.log(ctx.gan_conn, "GANbot:all_articles loop",title,"Starting")
if ctr % 10 == 0:
GAN.log(ctx.gan_conn, "GANbot:all_articles loop", None, f"Processed {ctr} articles")
#print(f"Processed {ctr} articles")
in_last = title in ctx.last_run_articles_set
talk_state = get_talk_page_state(ctx, title)
if talk_state is None:
return
talk_page = talk_state.talk_page
talk_text = talk_state.talk_text
has_ga_nominee = talk_state.has_ga_nominee
in_cat = talk_state.in_cat
old_nom = None
new_review = False
transclusion_needed = False
new_nomination_state = False
promotion_needed = False
tell_nominator_needed = False
if in_last:
old_nom = ctx.last_run_noms.get(title)
# Note that old_nom will be None if there was a problem retrieving it.
# Here if the old_nom is a lower page number than the new nom, we are probably in a situation where the old nom has been passed/failed and a new nom created between runs.
# This can happen if a review has been found to be invalid and the best thing to do is fail it and renominate.
# Things to check: two nominee templates? Page number incremented by 1? If OK, destroy nom and run as if we were just doing the fail pass. That will get rid of the record
# in active_nominations and the next pass will pick up the new nom.
if not in_cat and not in_last:
# Not in category now and wasn't active last run: nothing to do.
return
if in_cat:
result = handle_nomination_still_in_category(
ctx,
state,
title,
talk_page,
talk_text,
has_ga_nominee,
old_nom,
noms,
name_changes,
wbgan,
review_stats,
)
if result is None:
return
nom = result.nom
new_nomination_state = result.flags.new_nomination_state
new_review = result.flags.new_review
transclusion_needed = result.flags.transclusion_needed
promotion_needed = result.flags.promotion_needed
tell_nominator_needed = result.flags.tell_nominator_needed
else:
#print("Not in cat, in last run")
result = handle_closed_or_removed_nomination(ctx, state, title, talk_page, old_nom)
if result is None:
return
nom = result.nom
new_nomination_state = result.flags.new_nomination_state
promotion_needed = result.flags.promotion_needed
tell_nominator_needed = result.flags.tell_nominator_needed
transclusion_needed = result.flags.transclusion_needed
new_review = result.flags.new_review
# Now we're past all the setup and can do whatever the flags tell us to do.
#GAN.log(ctx.gan_conn,"GANbot:after setup, about to execute flags",title,"Flags are: " + "transclusion_needed " + str(transclusion_needed) + "; new_review " + str(new_review) + "; new_nomination_state " + str(new_nomination_state) + "; promotion_needed " + str(promotion_needed) + "; tell_nominator_needed " + str(tell_nominator_needed))
# Note that nom is created in the cat_noms section if it exists there, but if it doesn't it's created in the last_run_noms section
action_flags = NominationActionFlags(
new_nomination_state=new_nomination_state,
new_review=new_review,
transclusion_needed=transclusion_needed,
promotion_needed=promotion_needed,
tell_nominator_needed=tell_nominator_needed,
)
execute_nomination_actions(
ctx,
state,
title,
nom,
action_flags,
name_changes,
wbgan,
review_stats,
)
ctx.prof.add("process_title_total", time.perf_counter() - t_title, title=title)
#GAN.log(gan_conn, "GANbot:after loop, tracking param counts", "N/A", str(GAN.param_info))
# Outside the loop, all noms processed, so here we should be able to rebuild some things
for ctr, title in enumerate(all_articles, start=1):
process_title(ctx, state, title, ctr)
if state.active_table_dirty:
Active_nomination.update_active_nominations(gan_conn) # Reload the active nominations table
else:
GAN.log(gan_conn, "GANbot:state", "N/A", "active_nominations rebuild skipped (no changes)")
Review_stats.update_statistics(gan_conn) # update the GA reviewing stats table in SQL and update the GA reviewing stats page on Wikipedia
review_stats = Review_stats(gan_conn) # Refresh the GA reviewing stats dictionary from the database
# The next loops are going to generate the GAN page
target = 'Wikipedia'
sort_order = GA_config.sort_order
print_list = []
pledge_list = []
collapsed = [] # This will hold noms that should be displayed in a collapsed list at the end of each topic
noms_by_user = {}
# Get the user information before trying to print anything, since this can change sort order
for n in noms.noms:
# As we look at each nomination, add the nominator and reviewer GA and review counts to the user dictionary if they're not already in it.
#print("About to call update_users; nom is " + n.title)
n.update_users(gan_conn, users, name_changes, review_stats, wbgan)
# Build a dictionary of noms by user as we go for use in building the collapse list
if n.nominator in noms_by_user:
noms_by_user[n.nominator].append({'title': n.title, 'nomination_ts': n.nomination_ts})
else:
noms_by_user[n.nominator] = [{'title': n.title, 'nomination_ts': n.nomination_ts}]
# Sort each user's list by the nomination date
for u in noms_by_user:
noms_by_user[u] = sorted(noms_by_user[u], key=lambda d: d['nomination_ts'])
if len(noms_by_user[u]) > 20:
add_to_collapse = noms_by_user[u][20:]
for c in add_to_collapse:
collapsed.append(c['title'])
# If something is in collapsed, don't print it, put it in collapsed_topic. Then at the end of the topic, print those inside a collapse template.
# Now assemble the GARs
# Each row is of the form # {{GARentry|Terry Pratchett|1}} GA nominator: [[User:Example]]; GAR created: <date>
# Create a dictionary with {'<subtopic>': {article_title: {GARpage=, orignom= , GARdate= }}
# Then in the print code, add the GARs after the collapsed sections
try:
gar_dict = GAH.get_gar_dict(gan_conn, config)
except (pywikibot.exceptions.ServerError,
pywikibot.exceptions.APIError,
pywikibot.exceptions.Error) as e:
GAN.report_operational_issue(
gan_conn,
"GANbot:exceptions",
"GAR dictionary",
f"Error building GAR dictionary: {type(e).__name__}: {e}",
)
gar_dict = {}
for k in topics: # We print the sections in topic order
#print("In topic loop for " + k)
topic = topics[k]
topic_print_list = []
topic_update_log = state.topic_PFX_log[k]
#print("Printing topic " + topic.name)
topic_noms = [x for x in noms.noms if x.topic == topic.name]
print_list.append(topic.header(target))
topic_print_list.append(topic.header(target))
for subtopic in topic.subtopics:
#print("Printing subtopic " + subtopic.name)
subtopic_noms = [x for x in topic_noms if x.subtopic == subtopic.name]
print_list.append(subtopic.section_header())
topic_print_list.append(subtopic.section_header())
#print("About to sort" + subtopic.name)
subtopic_noms = sorted(subtopic_noms, key=operator.attrgetter(sort_order), reverse=False)
#print("Sorted " + subtopic.name)
subtopic_gars = []
if subtopic.name in gar_dict:
subtopic_gars = gar_dict[subtopic.name]
collapsed_topic = []
for nom in subtopic_noms:
#print(" In first for loop, printing " + nom.title + "; edit_summary is " + nom.edit_summary)
if nom.title in collapsed:
#print("putting " + nom.title + " in collapsed")
collapsed_topic.append(nom)
else:
#print(nom.title + " is not in collapsed")
nom_text = nom.print_GAN_entry()
print_list.append(nom_text)
topic_print_list.append(nom_text)
if nom.has_a_pledge() and nom.status not in ('H','R','2'):
pledge_list.append(nom_text)
if nom.edit_summary != "No change" and nom.edit_summary != "":
state.update_log.append(nom.edit_summary)
topic_update_log.append(nom.edit_summary)
if nom.has_a_pledge():
state.pledge_update_log.append(nom.edit_summary)
#print("At end of first for for " + subtopic.name + "; collapsed_topic has length " + len(collapsed_topic))
if len(collapsed_topic) > 0:
#print("In the collapsed_topic if")
print_list.append("{{cot|Additional nominations}}")
print_list.append("* These are not displayed as the nominator currently has more than twenty active nominations")
topic_print_list.append("{{cot|Additional nominations}}")
topic_print_list.append("* These are not displayed as the nominator currently has more than twenty active nominations")
for nom in collapsed_topic:
#print("In the for loop inside collapsed topic")
nom_text = nom.print_GAN_entry()
print_list.append(nom_text)
topic_print_list.append(nom_text)
if nom.edit_summary != "No change" and nom.edit_summary != "":
state.update_log.append(nom.edit_summary)
topic_update_log.append(nom.edit_summary)
if nom.has_a_pledge():
state.pledge_update_log.append(nom.edit_summary)
print_list.append("{{cob}}")
topic_print_list.append("{{cob}}")
if len(subtopic_gars) > 0:
#print_list.append("=== " + subtopic.name + " reassessments ===\n")
print_list.append("==== Reassessments ====\n")
gars = gar_dict[subtopic.name]
gars = sorted(gars, key = lambda d: d[1]['GARdate'])
for gar in subtopic_gars:
gar_line = "# {{GARentry|" + gar[0] + "|" + gar[1]['GARpage']
shortdesc = gar[1]['shortdesc']
if shortdesc is not None:
gar_line += "|shortdesc=" + shortdesc
gar_line += "}}"
orignom = gar[1]['orignom']
if orignom is not None:
gar_line += " GA nominator: [[User:" + orignom + "|" + orignom + "]]"
user = pywikibot.User(site, "User:" + orignom)
if user is not None:
try:
ule = user.last_edit
except pywikibot.exceptions.APIError as e:
GAN.report_operational_issue(
gan_conn,
"GANbot:exceptions",
gar[0],
f"API error getting GAR nominator's last edits for nominator {orignom}: {type(e).__name__}: {e}",
)
ule = None
if ule is not None:
ule_ts = ule[2]
ule_dt = GAH.convert_timestamp_to_datetime(ule_ts)
ule_delta = datetime.datetime.utcnow() - ule_dt
if ule_delta.days > 21:
gar_line += " (inactive for " + str(ule_delta.days) + " days)"
gar_line += "."
GARdate = gar[1]['GARdate']
if GARdate is not None:
gar_line += " GAR created: " + GARdate.strftime("%H:%M, %-d %B %Y (UTC)")
print_list.append(gar_line)
# Save topic page
page = pywikibot.Page(site, GA_config.strings['GA topic pages base'] + topic.name)
topic_page_text = '\n'.join(topic_print_list)
page.text = topic_page_text
topic_update_text = "\n".join(topic_update_log)
if len(topic_update_log) == 0:
topic_update_text = "Update metrics"
if state.topic_update_needed[topic.name]:
t0 = time.perf_counter()
if save_page_report_operational(
gan_conn,
page,
topic_update_text,
"GANbot:exceptions",
GA_config.strings['GA topic pages base'] + topic.name,
"Error saving topic page",
):
prof.add("save_topic_page", time.perf_counter() - t0)
t0 = time.perf_counter()
GAN.write_backlog_by_sort_order(gan_conn, noms.noms, sort_order)
prof.add("write_backlog_by_sort_order", time.perf_counter() - t0)
t0 = time.perf_counter()
GAN.mark_superseded_reviews(gan_conn)
prof.add("mark_superseded_reviews", time.perf_counter() - t0)
GAN.log(gan_conn,"GANbot", "N/A","About to run integrity checks")
t0 = time.perf_counter()
GAN.integrity_checks(gan_conn)
prof.add("integrity_checks", time.perf_counter() - t0)
error_edit_summary = ''
current_errors_text = ''
def format_errors_list(errors):
cleaned = []
for e in errors:
if not e:
continue
line = e.strip()
# Remove any leading bullet if already present
if line.startswith("*"):
line = line[1:].strip()
cleaned.append(f"* {line}")
return "\n".join(cleaned)
if len(GA_config.current_errors) > 0:
current_errors_text = format_errors_list(GA_config.current_errors)
error_edit_summary = "[[" + GA_config.strings['GAN errors page'] + "|Errors listed!]] "
page_text = '\n'.join(print_list)
page = pywikibot.Page(site,GA_config.strings['GAN page'])
page.text=page_text
page.text = GA_config.strings['GAN top text'] + page_text + GA_config.strings['GAN bottom text']
update_text = "\n".join(state.update_log)
if len(state.update_log) == 0:
update_text = "Update metrics"
pledge_page = pywikibot.Page(site, GA_config.strings['Open pledges page'])
pledge_page_text = '\n'.join(pledge_list)
pledge_page.text = pledge_page_text
pledge_update_text = "\n".join(state.pledge_update_log)
if pledge_update_text == '':
pledge_update_text = "Updating list of open pledges"
#state.update_needed = True # Uncomment this to force a page update to test layout and sort changes; the page will not update otherwise unless a nomination changes status in some way.
GAN.log(gan_conn,"GANbot", "N/A","About to save main GAN page")
if state.update_needed:
t0 = time.perf_counter()
if save_page_report_operational(
gan_conn,
page,
error_edit_summary + update_text,
"GANbot:exceptions",
GA_config.strings['GAN page'],
"Error saving main GAN page",
):
prof.add("save main GAN page", time.perf_counter() - t0)
t0 = time.perf_counter()
if save_page_report_operational(
gan_conn,
pledge_page,
pledge_update_text,
"GANbot:exceptions",
GA_config.strings['Open pledges page'],
"Error saving pledge page",
):
prof.add("save pledge page", time.perf_counter() - t0)
GAN.log(gan_conn,"GANbot", "N/A","About to write errors page")
GAN.write_errors_page(current_errors_text)
GAN.log(gan_conn, "GANbot", "N/A", "About to write bugs page")
if len(GA_config.current_bug_messages) > 0:
bug_page_text = "\n\n".join(GA_config.current_bug_messages)
else:
bug_page_text = ""
GAN.write_bugs_page(bug_page_text)
GAN.log(gan_conn, "GANbot", "N/A", "About to update operational status page")
if not GA_config.transient_operational_issue:
GAN.clear_operational_status()
GAN.log(gan_conn,"GANbot", "N/A","About to write sortable table")
# Now write the sortable table
t0 = time.perf_counter()
print_list = []
print_list.append('{| class="wikitable sortable"')
print_list.append('!Article')
print_list.append('!Status')
print_list.append('!Nominator')
print_list.append('!Subtopic')
print_list.append('!{{Tooltip|Age|Age in days}}')
print_list.append('!{{Tooltip|Revs|Reviews}}')
print_list.append('!{{Tooltip|GAs|Promoted GAs}}')
print_list.append('!{{Tooltip|R/G|Reviews per GA}}')
print_list.append('!Note')
for n in noms.noms:
print_list.append("|-")
print_list.append("|[[" + n.title + "]]")
if n.status == "":
print_list.append("|")
else:
print_list.append("|" + n.status_string)
print_list.append("| [[User:" + n.nominator + "|" + n.nominator + "]]")
print_list.append("|[[Wikipedia:Good article nominations#" + n.subtopic + "|" + n.subtopic + "]]")
print_list.append("|" + str(n.age_in_days))
print_list.append("|" + str(n.nominator_reviews))
print_list.append("|" + str(n.nominator_GAs))
print_list.append("|" + str(n.R_over_G))
print_list.append("|" + n.note)
print_list.append("|}")
sortable_table_text = "\n".join(print_list)
prof.add("build sortable table", time.perf_counter() - t0)
page = pywikibot.Page(site,GA_config.strings['GAN as a single table'])
page.text=sortable_table_text
if state.update_needed:
t0 = time.perf_counter()
if save_page_report_operational(
gan_conn,
page,
error_edit_summary + update_text,
"GANbot:exceptions",
GA_config.strings['GAN as a single table'],
"Error saving sortable GAN page",
):
prof.add("save sortable GAN page", time.perf_counter() - t0)
run_ok = True
except Exception as e:
_fatal("main run", e, gan_conn)
sys.exit(1)
finally:
elapsed = time.perf_counter() - start_time
runtime_log_counters = getattr(GA_config, "runtime_log_counters", False)
runtime_log_profile_summary = getattr(GA_config, "runtime_log_profile_summary", False)
status = "OK" if run_ok else "FAILED"
print(
"[RUN_END] "
f"status={status} "
f"runtime_s={elapsed:.2f} "
f"saves={GA.SAVES_ATTEMPTED}/{GA.SAVES_SUCCEEDED}/{GA.SAVES_FAILED} "
f"op_issues={1 if GA_config.transient_operational_issue else 0} "
f"wiki_errors={len(GA_config.current_errors)} "
f"bug_msgs={len(GA_config.current_bug_messages)}",
flush=True,
)
if runtime_log_counters:
print(
f"[FAST_PARSE] used={GAN.fast_parse_used} fallback={GAN.fast_parse_fallback}",
flush=True,
)
if runtime_log_profile_summary:
print(prof.report(), flush=True)
try:
if gan_conn is not None:
gan_conn.close()
except Exception:
pass
if __name__ == "__main__":
main()
Content Disclaimer
Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.
- The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
- There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
- It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
- Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
- Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.