import pywikibot
from pywikibot import pagegenerators as pg
import codecs #used in logfiles, unicoded strings
import sys, re
import datetime
import time
import urllib.request
from urllib.parse import quote
from collections import defaultdict
wheretoskip='Wikipedy:Links_nei_betsjuttingssiden/skips'
sourcefromfile='https://tools.wmflabs.org/multichill/queries2/nlwp/links_naar_doorverwijspaginas.txt'
sourcefromfile='https://multichill.toolforge.org/queries2/nlwp/links_naar_doorverwijspaginas.txt'
wikiurl={'nl':u'Wikipedia:Links_naar_doorverwijspagina%27s/data','fy':'Wikipedy:Links_nei_betsjuttingssiden/data'}
linkstostr=u'https://nl.wikipedia.org/w/index.php?title=Speciaal%3AVerwijzingenNaarHier&namespace=0&target='
linkstostr= 'https://fy.wikipedia.org/w/index.php?title=Wiki%3AWat+is+hjirmei+keppele&namespace=0&target='
template = 'template:Dp'
template = 'Berjocht:Neibetsjuttings'
disamb_addition=' (doorverwijspagina)'
disamb_addition=' (betsjuttingsside)'
allowed_namespaces = [0]
treshold=1 #less then this number of backlinks? the item will be skipped
max_new_pages=100
lng='fy'
linenr=0
def getSkiplinks(site):
skiplinks = defaultdict(list)
try:
skiplinkspage = pywikibot.Page(site, wheretoskip).get()
lines = re.findall("\*.*", skiplinkspage)
linkre = re.compile("\[([^\[\|\]]*)[\]\|]")
for line in lines:
titles = linkre.findall(line)
if len(titles) > 1:
skiplinks[titles[0]] += titles[1:]
except pywikibot.NoPage:
pass
return skiplinks
def getlinksfromfile(filename):
with urllib.request.urlopen(filename) as response:
html = response.read().decode("utf-8")
result=prevx=''
collect=False
for i in range(len(html)):
x=html[i:i+1]
if (x=='[') and (prevx=='['):
collect=True
result=''
if (collect):
result=result+x
if (x==']') and (prevx==']'):
yield(result[1:len(result)-2])
result=''
collect=False
prevx=x
def getnewpages(site):
for page in pg.NewpagesPageGenerator(site,0,max_new_pages):
dt=page.oldest_revision
timediff=dt.timestamp.today()-dt.timestamp
if (timediff<datetime.timedelta(24/24)): #page less 24 hours old (script runs every 24 hrs)
if (page.namespace().id in allowed_namespaces):
if page.exists():
yield(page.title())
else:
break
def count_links(dppage):
linksfound=0
if (dppage.title().find(disamb_addition)==-1):
for onelink in dppage.backlinks():
if onelink.namespace().id in allowed_namespaces:
if not onelink.isRedirectPage():
if not onelink.isDisambig():
if not (dppage.title() == onelink.title() + disamb_addition):
if not onelink.title() in skiplinks[dppage.title()]:
linksfound +=1
#print('%s-%s-%s' % (linksfound,dppage.title(),onelink.title()))
return linksfound
def process_one_disambiguation_page(site,pagetitle,result):
if (pagetitle.find(disamb_addition)<0):
dpPage=pywikibot.Page(site,pagetitle) #pagetitle can contain spaces or underscores
linksfound = count_links(dpPage)
if (linksfound>=treshold):
if (not (dpPage.title() in result)):
result.update({dpPage.title():linksfound})
else:
pass
def process_one_regular_page(site,pagetitle,result):
page=pywikibot.Page(site,pagetitle)
for link in page.linkedPages():
if link.isDisambig():
process_one_disambiguation_page(site, link.title(),result)
"""
"""
def get_one_line(page):
one_line=''
for x in page.text:
one_line=one_line+x
if (x=='\n'):
yield one_line
one_line=''
def count_wiki_links(line):
counted=0
start=line.find('[[')
while (start>=0) and (counted<10):
counted+=1
start=line[start+1:].find('[[')
return counted
def check_one_page(page,show):
for line in get_one_line(page):
#if (show): print(line)
if (count_wiki_links(line)>1):
if (show): print(line)
return True
return False
def action_one_page(lng,dppage):
global linenr
linksfound=0
for onelink in dppage.backlinks():
if onelink.namespace().id in allowed_namespaces:
if not onelink.isRedirectPage():
if not onelink.isDisambig():
if (not (dppage.title() == (onelink.title() + disamb_addition))):
if (not(onelink.title() in skiplinks[dppage.title()])):
linksfound +=1
print('%s-%s-%s' % (linksfound,dppage.title(),onelink.title()))
if linksfound>=treshold:
linenr+=1
return('|%s||[[%s]]||%s||[%s%s links]' % (str(linenr),dppage.title(),str(linksfound),linkstostr,quote(dppage.title())))
else:
return(None)
def genPagesWithTemplate(sitecode,template):
print('Start fetching pages')
site=pywikibot.Site(sitecode)
refPage = pywikibot.Page(pywikibot.Link(template,site))
#gen = pg.ReferringPageGenerator(refPage)
#gen = pg.getReferences(refPage)
#gen = refPage.backlinks()
gen=pg.NamespaceFilterPageGenerator(refPage.getReferences(),namespaces=[0])
for onepage in gen:
if onepage.namespace().id in allowed_namespaces:
print('Yielding %s' % onepage.title())
yield(onepage)
starttime=time.strftime(time.ctime())
print('Start: %s' % starttime)
site=pywikibot.Site(lng)
skiplinks=getSkiplinks(site)
#print(skiplinks); print(0/0)
result={}
wikistr = u'{{verwijzing2|WP:LND/D}}\n'
wikistr += u'Deze pagina wordt met regelmaat door een bot opnieuw gemaakt.\n'
wikistr += u'Zie de geschiedenis van de pagina wanneer, en door welke bot.\n'
wikistr += u'Als hier links zijn meegeteld die niet gerepareerd hoeven te worden, voeg die dan toe op [[%s]].\n' %(wheretoskip)
wikistr += u'{| class="wikitable sortable"\n|-\n! Artikel !! XtraLinks !! Aantal !! Links \n'
wikistr = u'{{verwijzing2|WP:LND/D}}\n'
wikistr=''
wikistr += 'Dizze side wurdt geregeldwei troch in bot opnij oanmakke.'
wikistr += 'Sjoch de skiednis fan de side wannear, en troch hokker bot.'
wikistr += 'At hjir links meiteld binne dy\'t net repareard hoege te wurden, foegje dy dan ta op [[Wikipedy:Links_nei_betsjuttingssiden/skips]].\n'
wikistr += u'{| class="wikitable sortable"\n|-\n! Artikel !! XtraLinks !! Oantal !! Links \n'
#for link in getlinksfromfile(sourcefromfile):
# process_one_disambiguation_page(site,link,result)
#for link in getnewpages(site):
# process_one_regular_page(site,link,result)
#process_one_regular_page(site,wikiurl[lng],result) #the actual page, refresh
if (True):
l=0
for dppage in genPagesWithTemplate(lng,template):
oneline = action_one_page(lng,dppage)
if not(oneline is None):
print('-->',oneline.title())
result.update({oneline.title():1})
wikistr += '|-\n%s\n' % oneline
l+=1
if l>1105: break #just for testing now a few
print('--------------------------------')
print(result)
print('--------------------------------')
if (False):
for item in result:
print(item)
ptc=pywikibot.Page(site,item)
if check_one_page(ptc,False):
xstr='X'
else:
xstr=''
wikistr+='|-\n|[[%s]]||%s||%s||[%s%s link]\n' % (item,xstr,result[item],linkstostr,quote(item))
wikistr += '|}'
stoptime=time.strftime(time.ctime())
wikistr += '\n\n%s-%s' % (starttime,stoptime)
print(wikistr)
#print(0/0)
pywikibot.Page(site,wikiurl[lng]).put(wikistr,summary='#dp-update')
print('Klaar')