4 from ...util import none_or
5 from ..errors import MalformedResponse
6 from .collection import Collection
8 logger = logging.getLogger("mw.api.collections.recent_changes")
11 class RecentChanges(Collection):
13 Recent changes (revisions, page creations, registrations, moves, etc.)
16 RCCONTINUE = re.compile(r"([0-9]{4}-[0-9]{2}-[0-9]{2}T" +
17 r"[0-9]{2}:[0-9]{2}:[0-9]{2}Z|" +
21 PROPERTIES = {'user', 'userid', 'comment', 'timestamp', 'title',
22 'ids', 'sizes', 'redirect', 'flags', 'loginfo',
25 SHOW = {'minor', '!minor', 'bot', '!bot', 'anon', '!anon',
26 'redirect', '!redirect', 'patrolled', '!patrolled'}
28 TYPES = {'edit', 'external', 'new', 'log'}
30 DIRECTIONS = {'newer', 'older'}
34 def _check_rccontinue(self, rccontinue):
35 if rccontinue is None:
37 elif self.RCCONTINUE.match(rccontinue):
41 "rccontinue {0} is not formatted correctly ".format(rccontinue) +
42 "'%Y-%m-%dT%H:%M:%SZ|<last_rcid>'"
45 def query(self, *args, limit=None, **kwargs):
47 Enumerate recent changes.
48 See `<https://www.mediawiki.org/wiki/API:Recentchanges>`_
51 start : :class:`mw.Timestamp`
52 The timestamp to start enumerating from
53 end : :class:`mw.Timestamp`
54 The timestamp to end enumerating
58 Filter log entries to only this namespace(s)
60 Only list changes by this user
62 Don't list changes by this user
64 Only list changes tagged with this tag
66 Include additional pieces of information
68 * user - Adds the user responsible for the edit and tags if they are an IP
69 * userid - Adds the user id responsible for the edit
70 * comment - Adds the comment for the edit
71 * parsedcomment - Adds the parsed comment for the edit
72 * flags - Adds flags for the edit
73 * timestamp - Adds timestamp of the edit
74 * title - Adds the page title of the edit
75 * ids - Adds the page ID, recent changes ID and the new and old revision ID
76 * sizes - Adds the new and old page length in bytes
77 * redirect - Tags edit if page is a redirect
78 * patrolled - Tags patrollable edits as being patrolled or unpatrolled
79 * loginfo - Adds log information (logid, logtype, etc) to log entries
80 * tags - Lists tags for the entry
81 * sha1 - Adds the content checksum for entries associated with a revision
84 Which tokens to obtain for each change
89 Show only items that meet this criteria. For example, to see
90 only minor edits done by logged-in users, set
91 show={'minor', '!anon'}.
105 How many total changes to return
107 Which types of changes to show
115 Only list changes which are the latest revision
117 Use this to continue loading results from where you last left off
119 limit = none_or(limit, int)
126 kwargs['limit'] = self.MAX_CHANGES
128 kwargs['limit'] = min(limit - changes_yielded, self.MAX_CHANGES)
130 rc_docs, rccontinue = self._query(*args, **kwargs)
136 if limit is not None and changes_yielded >= limit:
140 if rccontinue is not None and len(rc_docs) > 0:
142 kwargs['rccontinue'] = rccontinue
146 def _query(self, start=None, end=None, direction=None, namespace=None,
147 user=None, excludeuser=None, tag=None, properties=None,
148 token=None, show=None, limit=None, type=None,
149 toponly=None, rccontinue=None):
153 'list': "recentchanges"
156 params['rcstart'] = none_or(start, str)
157 params['rcend'] = none_or(end, str)
159 assert direction in {None} | self.DIRECTIONS, \
160 "Direction must be one of {0}".format(self.DIRECTIONS)
162 params['rcdir'] = direction
163 params['rcnamespace'] = none_or(namespace, int)
164 params['rcuser'] = none_or(user, str)
165 params['rcexcludeuser'] = none_or(excludeuser, str)
166 params['rctag'] = none_or(tag, str)
167 params['rcprop'] = self._items(properties, levels=self.PROPERTIES)
168 params['rctoken'] = none_or(tag, str)
169 params['rcshow'] = self._items(show, levels=self.SHOW)
170 params['rclimit'] = none_or(limit, int)
171 params['rctype'] = self._items(type, self.TYPES)
172 params['rctoponly'] = none_or(toponly, bool)
173 params['rccontinue'] = self._check_rccontinue(rccontinue)
175 doc = self.session.get(params)
178 rc_docs = doc['query']['recentchanges']
180 if 'query-continue' in doc:
182 doc['query-continue']['recentchanges']['rccontinue']
183 elif len(rc_docs) > 0:
184 rccontinue = "|".join([rc_docs[-1]['timestamp'],
185 str(rc_docs[-1]['rcid'] + 1)])
189 except KeyError as e:
190 raise MalformedResponse(str(e), doc)
192 return rc_docs, rccontinue