Meidogger:Edoderoobot/dpdata fy.py

Ut Wikipedy
import pywikibot
from pywikibot import pagegenerators as pg
import codecs #used in logfiles, unicoded strings
import sys, re
import datetime
import time
import urllib.request
from urllib.parse import quote
from collections import defaultdict

wheretoskip='Wikipedy:Links_nei_betsjuttingssiden/skips'
sourcefromfile='https://tools.wmflabs.org/multichill/queries2/nlwp/links_naar_doorverwijspaginas.txt'
sourcefromfile='https://multichill.toolforge.org/queries2/nlwp/links_naar_doorverwijspaginas.txt'
wikiurl={'nl':u'Wikipedia:Links_naar_doorverwijspagina%27s/data','fy':'Wikipedy:Links_nei_betsjuttingssiden/data'}
linkstostr=u'https://nl.wikipedia.org/w/index.php?title=Speciaal%3AVerwijzingenNaarHier&namespace=0&target='
linkstostr= 'https://fy.wikipedia.org/w/index.php?title=Wiki%3AWat+is+hjirmei+keppele&namespace=0&target='
template = 'template:Dp'
template = 'Berjocht:Neibetsjuttings'
disamb_addition=' (doorverwijspagina)'
disamb_addition=' (betsjuttingsside)'
allowed_namespaces = [0]
treshold=1 #less then this number of backlinks? the item will be skipped
max_new_pages=100
lng='fy'
linenr=0

def getSkiplinks(site):
 skiplinks = defaultdict(list)
 try:
   skiplinkspage = pywikibot.Page(site, wheretoskip).get()
   lines = re.findall("\*.*", skiplinkspage)
   linkre = re.compile("\[([^\[\|\]]*)[\]\|]")
   for line in lines:
     titles = linkre.findall(line)
     if len(titles) > 1:
       skiplinks[titles[0]] += titles[1:]
 except pywikibot.NoPage:
   pass
 return skiplinks

def getlinksfromfile(filename):
 with urllib.request.urlopen(filename) as response:
   html = response.read().decode("utf-8")
   result=prevx=''
   collect=False
   for i in range(len(html)):
        x=html[i:i+1]
        if (x=='[') and (prevx=='['):
          collect=True
          result=''
        if (collect):
          result=result+x  
        
        if (x==']') and (prevx==']'):
            yield(result[1:len(result)-2])
            result=''
            collect=False
        prevx=x

def getnewpages(site):
  for page in pg.NewpagesPageGenerator(site,0,max_new_pages):
    dt=page.oldest_revision
    timediff=dt.timestamp.today()-dt.timestamp  
    if (timediff<datetime.timedelta(24/24)): #page less 24 hours old (script runs every 24 hrs)
      if (page.namespace().id in allowed_namespaces):
        if page.exists():
          yield(page.title())
    else: 
      break     

def count_links(dppage):
  linksfound=0
  if (dppage.title().find(disamb_addition)==-1):
   for onelink in dppage.backlinks():
    if onelink.namespace().id in allowed_namespaces:
     if not onelink.isRedirectPage():
      if not onelink.isDisambig():
       if not (dppage.title() == onelink.title() + disamb_addition):
        if not onelink.title() in skiplinks[dppage.title()]:
              linksfound +=1
              #print('%s-%s-%s' % (linksfound,dppage.title(),onelink.title()))
  return linksfound

def process_one_disambiguation_page(site,pagetitle,result):
 if (pagetitle.find(disamb_addition)<0):
  dpPage=pywikibot.Page(site,pagetitle)  #pagetitle can contain spaces or underscores
  linksfound = count_links(dpPage)
  if (linksfound>=treshold):
    if (not (dpPage.title() in result)):
      result.update({dpPage.title():linksfound})
    else:
      pass  

def process_one_regular_page(site,pagetitle,result):
   page=pywikibot.Page(site,pagetitle)
   for link in page.linkedPages():
        if link.isDisambig():
            process_one_disambiguation_page(site, link.title(),result)
"""

"""
def get_one_line(page):
    one_line=''
    for x in page.text:
      one_line=one_line+x
      if (x=='\n'):
        yield one_line
        one_line=''

def count_wiki_links(line):
    counted=0
    start=line.find('[[')
    while (start>=0) and (counted<10):
      counted+=1
      start=line[start+1:].find('[[')
    return counted

def check_one_page(page,show):
  for line in get_one_line(page):
    #if (show): print(line)
    if (count_wiki_links(line)>1):
        if (show): print(line)
        return True
  return False

def action_one_page(lng,dppage):
  global linenr

  linksfound=0
  for onelink in dppage.backlinks():
    if onelink.namespace().id in allowed_namespaces:
      if not onelink.isRedirectPage():
        if not onelink.isDisambig():
          if (not (dppage.title() == (onelink.title() + disamb_addition))):
            if (not(onelink.title() in skiplinks[dppage.title()])):
              linksfound +=1
              print('%s-%s-%s' % (linksfound,dppage.title(),onelink.title()))
  if linksfound>=treshold:
    linenr+=1
    return('|%s||[[%s]]||%s||[%s%s links]' % (str(linenr),dppage.title(),str(linksfound),linkstostr,quote(dppage.title())))
  else:
    return(None)


def genPagesWithTemplate(sitecode,template):
   print('Start fetching pages')
   site=pywikibot.Site(sitecode)
   refPage = pywikibot.Page(pywikibot.Link(template,site))
   #gen = pg.ReferringPageGenerator(refPage)
   #gen = pg.getReferences(refPage)
   #gen = refPage.backlinks()
   gen=pg.NamespaceFilterPageGenerator(refPage.getReferences(),namespaces=[0])
   for onepage in gen:
     if onepage.namespace().id in allowed_namespaces:
       print('Yielding %s' % onepage.title())
       yield(onepage)



starttime=time.strftime(time.ctime())
print('Start: %s' % starttime)
site=pywikibot.Site(lng)
skiplinks=getSkiplinks(site)
#print(skiplinks); print(0/0)
result={}
wikistr = u'{{verwijzing2|WP:LND/D}}\n'
wikistr += u'Deze pagina wordt met regelmaat door een bot opnieuw gemaakt.\n'
wikistr += u'Zie de geschiedenis van de pagina wanneer, en door welke bot.\n'
wikistr += u'Als hier links zijn meegeteld die niet gerepareerd hoeven te worden, voeg die dan toe op [[%s]].\n' %(wheretoskip)
wikistr += u'{| class="wikitable sortable"\n|-\n! Artikel !! XtraLinks !! Aantal !! Links \n'

wikistr = u'{{verwijzing2|WP:LND/D}}\n'
wikistr=''
wikistr += 'Dizze side wurdt geregeldwei troch in bot opnij oanmakke.'
wikistr += 'Sjoch de skiednis fan de side wannear, en troch hokker bot.'
wikistr += 'At hjir links meiteld binne dy\'t net repareard hoege te wurden, foegje dy dan ta op [[Wikipedy:Links_nei_betsjuttingssiden/skips]].\n'
wikistr += u'{| class="wikitable sortable"\n|-\n! Artikel !! XtraLinks !! Oantal !! Links \n'


#for link in getlinksfromfile(sourcefromfile):
#  process_one_disambiguation_page(site,link,result)
#for link in getnewpages(site):
#  process_one_regular_page(site,link,result)
#process_one_regular_page(site,wikiurl[lng],result)  #the actual page, refresh

if (True):
    l=0
    for dppage in genPagesWithTemplate(lng,template):
      oneline = action_one_page(lng,dppage)
      if not(oneline is None): 
        print('-->',oneline.title())
        result.update({oneline.title():1})
        wikistr += '|-\n%s\n' % oneline
        l+=1
        if l>1105: break #just for testing now a few

print('--------------------------------')
print(result)
print('--------------------------------')
if (False):
  for item in result:
    print(item)
    ptc=pywikibot.Page(site,item)
    if check_one_page(ptc,False):
        xstr='X'
    else:
        xstr=''
    wikistr+='|-\n|[[%s]]||%s||%s||[%s%s link]\n' % (item,xstr,result[item],linkstostr,quote(item))


wikistr += '|}'
stoptime=time.strftime(time.ctime())
wikistr += '\n\n%s-%s' % (starttime,stoptime)

print(wikistr)
#print(0/0)

pywikibot.Page(site,wikiurl[lng]).put(wikistr,summary='#dp-update')
print('Klaar')