3 # Copyright (C) 2018 Nathan TeBlunthuis
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <https://www.gnu.org/licenses/>.
22 from pprint import pprint
23 from json.decoder import JSONDecodeError
24 from itertools import islice
26 def write_logevents(logevents,out):
27 for logevent in logevents:
28 # if there is hidden information, we skip this one because there
29 # is nothing to report
30 if 'userhidden' in logevent or 'actionhidden' in logevent or 'commenthidden' in logevent:
33 le_output = [logevent['comment'],
34 str(logevent['logid']),
36 str(logevent['pageid']),
37 logevent['timestamp'],
40 str(logevent['user'])]
42 if "params" in logevent:
43 params = logevent["params"]
47 if "rights" in logevent:
48 le_output.extend(['false',
49 logevent['rights']['new'],
50 logevent['rights']['old']])
53 elif "newgroups" in params and "oldgroups" in params:
54 le_output.extend(['false',
55 ','.join(params['newgroups']),
56 ','.join(params['oldgroups'])])
58 le_output.extend(['true', '', ''])
60 out.write("\t".join(le_output) + "\n")
64 def get_events_for_wiki(wikiname, url, wikitype="wikia"):
68 #out = open("../wikipedias/adminlist_output/logevents/nobackup/%s.tsv" % wikiname, "w")
69 out = open("logevents-2017/%s.tsv" % wikiname, "w")
70 out.write("\t".join(['comment', 'logid', 'ns', 'pageid', 'timestamp','title', 'type', 'user', 'ancient', 'rights-new', 'rights-old\n']))
72 if wikitype == "wikia":
73 api_url = url + 'api.php'
74 else: #wikitype == wikipedia
75 api_url = url + "w/api.php"
77 query = {'action': 'query',
84 response = requests.get(api_url, params=query)
85 hit_url = response.url
87 if wikitype == "wikia":
88 re_str = "^http://(community|www)\.wikia\.com/"
89 if re.match(re_str, hit_url):
90 # api_url 'http://community.wikia.com/wiki/Community_Central:Not_a_valid_Wikia':
91 print("ERROR: %s no longer exists" % wikiname)
94 re_str = "^(http|https)://.*\.wikia.com/api\.php"
95 if re.match(re_str, hit_url):
97 ## this is the only way out
99 ## check that we hit the right wiki
100 except (JSONDecodeError):
101 print(" New Error! ")
103 re_str = "^((http|https)://.*\.wikia\.com)"
104 new_url = re.findall(re_str, hit_url)[0][0]
105 return get_events_for_wiki(wikiname, new_url, wikitype=wikitype)
108 logevents = rv['query']['logevents']
109 write_logevents(logevents, out)
110 except KeyError as e:
111 print("ERROR: %s contains no logevent data" % wikiname)
115 while 'query-continue' in rv or 'continue' in rv:
116 if 'query-continue' in rv:
117 query['lestart'] = rv['query-continue']['logevents']['lestart']
119 query['continue'] = str(rv['continue'])
120 query['lecontinue'] = str(rv['continue']['lecontinue'])
122 response = requests.get(api_url,params=query)
124 logevents=rv['query']['logevents']
125 write_logevents(logevents, out)
129 files = [re.sub('\.tsv$', '', i) for i in os.listdir("logevents-2017")]
131 # interate through the list of wikis
132 #for line in ["anime,http://anime.wikia.com/"]:
133 #for line in ["blogging,http://blogging.wikia.com/"]:
140 # for line in open("list_of_wikis.csv", "r").readlines():
141 for line in islice(open("../wikis.needing.userroles.csv", "r"),i,None):
143 (wiki, url) = line.split(",")
145 print("Processing wiki: %s" % wiki)
148 print("SKIPPING: file \"%s\" already exists)" % wiki)
151 if "wikia.com" in url:
153 else:# "wikipedia.org in url":
154 wikitype = "wikipedia"
157 get_events_for_wiki(wiki, url, wikitype=wikitype)