User:PotatoBot/Code/5

#!/usr/bin/python
# -*- coding: utf-8  -*-

import wikipedia as w
import codecs, catlib, pagegenerators, time, mysave, re
from datetime import date

# PotatoBot Task 5: Creates redirects from trade names to drug articles

excludes = ('', u'Adiuretin', u'Amicacin', u'Apirelina', u'Bayer Brand of Azlocillin‎', u'Benzchinamide', u'BZQ', u'Brevital sodium', u'Cephaloglycine', u'Cephaoglycin acid', u'Chlorazepate', u'Chlorazepic acid', u'Cialis/Tadalafil', u'Cialis/Taladafil Hcl', u'Citrovorum factor', u'Clorazepic acid', u'Co-Vidarabine', u'Covidarabine', u'Dapropterin', u'Deacetyllanatoside C', u'Dextrin, Caloreen', u'Diovan HCT', u'Dl-Tranylcypromine', u'Dolasteron', u'Fish berry', u'Fondaparinux sodium', u'FR-008-III', u'Fujimycin', u'Gamatran citrate', u'Genzyme)', u'Glycocoll', u'Guanabenz(USAN)', u'Indian berry', u'IRINOTECAN, CPT-11', u'Karnitin', u'Kitasamycin A3', u'Kyselina jantarova', u'Lrbesartan', u'Lyophilized Cytoxan', u'Lysuride', u'Metamfetamine', u'Methenamide', u'Methoxamedrine', u'Metossamina', u'Metoxamina', u'Micardis HCT', u'Naphcillin', u'Navelbine Base', u'Oriental berry', u'Polymyxin E. Sulfate', u'Polymyxin E sulfate', u'Propilniazida', u'Pyridium Plus', u'Quetiapin hemifumarate', u'Sapropterin', u'Secremax, SecreFlo', u'Sodium sulamyd', u'Spongoadenosine', u'SU-11248', u'THIORPHAN', u'Trifluopromazine', u'Turimycin A5', u'Wellcome U3B', u'Xiaflextm',
)

def main():
	# Prepare log
	listout = '\n'
	db = {}
	Rtemplate = {False: '{{R from trade name}}', True: '{{R from alternative name}}'}
		
	# Load DrugBank data
	n, id= 0, ''
	with codecs.open('data/drugcards.txt', 'r', 'utf-8') as f:
		for l in f:
			line = l.strip()
			if line[:15] == '#BEGIN_DRUGCARD':
				n = int(line[18:23])
				list = [[], n]
				generic = ''
			elif line[:13] == '#END_DRUGCARD':
				if generic == '':
					w.output('  \03{red}ERROR IN DRUGCARD %d: no generic name\03{default}' % n)
					listout += '# %s: Error in Drugcard %d (no generic name found)\n' % n
				elif n != list[1]:
					w.output('  \03{red}ERROR IN DRUGCARD %d: IDs do not match\03{default}' % n)
					listout += '# %s: Error in Drugcard %d (IDs of start and end tag do not match)\n' % n
				elif generic in db:
					w.output('  \03{red}ERROR IN DRUGCARDS %d, %d: generic name found twice\03{default}' % (n, db[generic][1]))
					listout += '# %s: Error in Drugcards %d, %d (generic name found twice)\n' % (n, db[generic][1])
				elif len(list[0]) > 8:
					db[generic] = list
				n = 0
			elif line[:2] == '# ':
				id = line[2:-1]
			elif line != '' and n != 0:
				if id == 'Generic_Name':
					generic = line
				elif id == 'Brand_Names' and line != 'Not Available' and 'hydrochloride' not in line.lower() and len(line) < 25:
					p = line.find(' (')
					if p == -1: p = line.find(' [')
					if p == -1: p = line.find(' Roche')
					if p > -1: line = line[:p].strip()
					if line not in excludes:
						list[0] += [line]

	w.output('DrugBank entries loaded: %d' % len(db))
	
	# Create redirects
	for drug in db:
		w.output('* %s (#%d)' % (drug, db[drug][1]))
		drugpage = mysave.resolveredir(w.Page(w.getSite(), drug))
		if not drugpage.exists():
			drugpage = mysave.resolveredir(w.Page(w.getSite(), drug.capitalize()))
		notfound = []
		for tradename in db[drug][0]:
			if tradename.lower().find(drug.lower() + ' ') != 0:
				tradepage = w.Page(w.getSite(), tradename)
				if drugpage.title()[-1] == 'e': drug_e = drugpage.title()[:-1]
				else: drug_e = drugpage.title()
				alt = tradename in (drug_e, drug_e + 'a', drug_e + 'e', drug_e + 'o', drug_e + 'um')
				if tradepage.exists() and tradepage.isRedirectPage():
					tradetext = tradepage.get(get_redirect=True)
					brandTsearch = re.search(r'\{\{\s*[Rr] from brand name\s*\}\}', tradetext)
					alterTsearch = re.search(r'\{\{\s*[Rr] from alternative name\s*\}\}', tradetext)
					tradeTsearch = re.search(r'\{\{\s*[Rr] from trade name\s*\}\}', tradetext)
					if tradepage.getRedirectTarget() != drugpage:
						w.output('  \03{yellow}%s doesn\'t redirect to the right page (%s)?\03{default}' \
							% (tradepage.title(), drugpage.title()))
						listout += '# %s: redirects to %s instead of %s\n'\
							% (tradepage.aslink(), tradepage.getRedirectTarget().aslink(), drugpage.aslink())
					elif brandTsearch:
						listout += mysave.savepage(tradepage, tradetext[:brandTsearch.start()] + Rtemplate[alt] + tradetext[brandTsearch.end():], 
							'Replace {{R from brand name}} with ' + Rtemplate[alt], minor = True)
					elif alterTsearch and not alt:
						listout += mysave.savepage(tradepage, tradetext[:alterTsearch.start()] + Rtemplate[alt] + tradetext[alterTsearch.end():], 
							'Replace ' + Rtemplate[not alt] + ' with ' + Rtemplate[alt], minor = True)
					elif tradeTsearch and alt:
						listout += mysave.savepage(tradepage, tradetext[:tradeTsearch.start()] + Rtemplate[alt] + tradetext[tradeTsearch.end():], 
							'Replace ' + Rtemplate[alt] + ' with ' + Rtemplate[not alt], minor = True)
					elif not tradeTsearch and not alterTsearch:
						minusCats = w.removeCategoryLinks(tradetext, w.getSite())
						listout += mysave.savepage(tradepage, minusCats + ' ' + Rtemplate[alt] + tradetext[len(minusCats):], \
							'Add ' + Rtemplate[alt], minor = True)
				elif drugpage.exists():
					listout += mysave.makeredir(tradepage, drugpage, Rtemplate[alt])
				else:
					notfound += [tradepage.aslink()]
		if notfound != []:
			listout += '# %s: target %s not found\n' % (', '.join(notfound), drugpage.aslink())

	# Output log
	listout += '\nTrade names from DrugBank completely included.'
	w.output('')
	logpage = w.Page(w.getSite(), 'User:PotatoBot/Lists/Trade names log')
	mysave.savepage(logpage, logpage.get() + listout, 'Creating trade names log')

if __name__ == "__main__":
	try:
		main()
	finally:
		w.stopme()

mysave.py

#!/usr/bin/python
# -*- coding: utf-8  -*-

import pywikibot as w
import re

# Code for saving redirects and other pages

def savepage(page, text, BRFANo, summary = '', minor = False):
	"""Save text to a page and log exceptions."""
	if summary != '':
		w.setAction(summary + '. See [[Wikipedia:Bots/Requests for approval/PotatoBot ' + BRFANo + '|approval]]. Report errors and suggestions at [[User talk:PotatoBot]].')
	try:
		if not '#' in page.title():
			page.put(text, minorEdit = minor)
			w.output('  \03{green}saving %s -> \03{gray}%s\03{default}' % (page.title(), text))
			return ''
		else:
			w.output('  \03{red}cannot save %s because it is a section\03{default}' % page.title())
			return '# %s: this is a secion title' % page.title(aslink=True)
	except w.LockedPage:
		w.output('  \03{red}cannot save %s because it is locked\03{default}' % page.title())
		return '# %s: page was locked\n' % page.title(aslink=True)
	except w.EditConflict:
		w.output('  \03{red}cannot save %s because of edit conflict\03{default}' % page.title())
		return '# %s: edit conflict occurred\n' % page.title(aslink=True)
	except w.SpamfilterError, error:
		w.output('  \03{red}cannot save %s because of spam blacklist entry %s\03{default}' % (page.title(), error.url))
		return '# %s: spam blacklist entry\n' % page.title(aslink=True)
	except:
		w.output('  \03{red}unknown error on saving %s\03{default}' % page.title())
		return '# %s: unknown error occurred\n' % page.title(aslink=True)

def resolveredir(page):
	"""Return target if input is a redirect, else return input."""
	try:
		if page.isRedirectPage():
			try:
				w.output('  \03{gray}resolving redir %s to %s\03{default}'\
					% (page.title(), page.getRedirectTarget().title()))
				return page.getRedirectTarget()
			except:
				w.output('  \03{yellow}target %s is a broken redir\03{default}' % page.title())
				return w.Page(w.getSite(), page.title() + ' (broken redirect)')
		else:
			return page
	except:
		w.output('  \03{yellow}target %s is a bad link\03{default}' % page.title())
		return w.Page(w.getSite(), page.title() + ' (bad link)') # workaround for wikipedia.py breaking wikiasite: links

def makeredir(redirpage, page, BRFANo, templates = ''):
	"""Create a redirect and log existing page that isn't a redirect to the desired article."""
	page = resolveredir(page)
	if redirpage.exists():
		comment = ''
		if redirpage.isDisambig():
			comment = ' (disambiguation)'
			dab = redirpage
		if redirpage.isRedirectPage():
			try:
				if redirpage.getRedirectTarget().title() == page.title() or \
						redirpage.getRedirectTarget().sectionFreeTitle() == page.title():
					# Already a redir to the desired article
					return ''
				elif redirpage.getRedirectTarget().isDisambig():
					comment = ' (redirect to disambiguation)'
					dab = redirpage.getRedirectTarget()
				else:
					comment = ' (redirect)'
			except:
				comment = ' (broken redir)'
		if 'disambiguation' in comment and page in [resolveredir(p) for p in dab.linkedPages()]:
			w.output('  link to %s already on dab page %s' % (page.title(), redirpage.title()))
			return ''
		elif redirpage.title() != page.title():
			w.output('  \03{yellow}redir to %s failed, page %s already exists\03{default}' % (page.title(), redirpage.title()))
			return '# %s: redirecting to %s failed, page already exists%s\n' % (redirpage.title(aslink=True), page.title(aslink=True), comment)
		else:
			return ''
	# Else create redirect, or write page name to list if an error occurs
	else:
		return savepage(redirpage, '#REDIRECT %s %s' % (page.title(aslink=True), templates), BRFANo, 'Redirect to ' + page.title(aslink=True))

def findATCs(page, includeVet = True):
	"""Look for ATC codes in infoboxes."""
	ATCvet, prefix, suffix, supp = False, '', '', ''
	ATCvetpos, prefixpos, suffixpos, supppos = -1, -1, -1, -1
	templatenames = ('Drugbox', 'Chembox Identifiers')
	templates = page.templatesWithParams()
	for tuple in templates:
		if tuple[0] in templatenames:
			idx = templatenames.index(tuple[0])
			templatepos = templates.index(tuple)
			for param in tuple[1]:
				value = param.partition('=')
				if value[0].strip() == 'ATCvet':
					ATCvet = value[2].strip() == 'yes' and includeVet
					ATCvetpos = tuple[1].index(param)
				elif value[0].strip() == ('ATC_prefix', 'ATCCode_prefix')[idx] and value[2].strip().lower != 'none':
					prefix = value[2].strip()
					prefixpos = tuple[1].index(param)
				elif value[0].strip() == ('ATC_suffix', 'ATCCode_suffix')[idx]:
					suffix = value[2].strip()
					suffixpos = tuple[1].index(param)
				elif value[0].strip() == ('ATC_supplemental', 'ATC_Supplemental')[idx]:
					supp = value[2].strip()
					supppos = tuple[1].index(param)
	codes = (prefix != '') * [(ATCvet*'Q' + prefix + suffix)]
	for tupleSupp in page.templatesWithParams(supp):
		if tupleSupp[0] in ['ATC', 'ATCvet']:
			codes.append((tupleSupp[0] == 'ATCvet')*'Q' + tupleSupp[1][0] + tupleSupp[1][1])
	return (codes, ATCvetpos, prefixpos, suffixpos, supppos)

def addTemplateParam(page, newtemplates, BRFANo, summary = 'Updating template', minor = False):
	text = page.get()
	oldtemplates = page.templatesWithParams()
	pointer = 0
	for i in range(len(oldtemplates)):
		search1 = re.compile(r'\{\{\s*(%s|%s)%s\s*\|' % (oldtemplates[i][0][0].upper(), oldtemplates[i][0][0].lower(),\
			oldtemplates[i][0].replace(' ', '( |_)'))).search(text, pointer)
		if search1:
			pointer = end() - 1
		if newtemplates[i] != oldtemplates[i]:
			if newtemplates[i][0].strip() == oldtemplates[i][0].strip():
				for j in range(len(oldtemplates[i][1])):
					oldparam = oldtemplates[i][1][j].partition('=')
					newparam = newtemplates[i][1][j].partition('=')
					# Todo: unnamed params #
					if newparam[0] == oldparam[0]:
						span = re.compile(r'\|\s*%s\s*=\s*([^|}\s]*)\s*(}|\|)' % oldparam[0]).\
							search(text, pointer).span(1)
						pointer = span(1)
						if newparam[2].strip() != oldparam[2].strip():
							text = text[:span(0)] + newparam[2] + text[span(1):]
					else:
						text = text[:] + newtemplates[i][1][j] + text[:]
						pointer = len(text[:] + newtemplates[i][1][j])
			else:
				w.output('\03{yellow}template list does not match page %s: %s vs. %s\03{default}' % \
					(page.title(), newtemplates[i][0].strip(), oldtemplates[i][0].strip()))
				return '# %s: template list did not match templates on page' % page.title(aslink=True)
	if text != page.get():
		return savepage(page, text, BRFANo, summary, minor)
	else:
		return ''

def fmtdate(date):
	"""Format date in English w style."""
	return '%d %s %d' % (date.day, ('', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August',\
		'September', 'October', 'November', 'December')[date.month], date.year)

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.