3 from ...util import none_or
4 from ..errors import MalformedResponse
5 from .collection import Collection
7 logger = logging.getLogger("mw.api.collections.revisions")
10 class Revisions(Collection):
12 A collection of revisions indexes by title, page_id and user_text.
13 Note that revisions of deleted pages are queriable via
14 :class:`mw.api.DeletedRevs`.
17 PROPERTIES = {'ids', 'flags', 'timestamp', 'user', 'userid', 'size',
18 'sha1', 'contentmodel', 'comment', 'parsedcomment',
19 'content', 'tags', 'flagged'}
21 DIFF_TO = {'prev', 'next', 'cur'}
23 # This is *not* the right way to do this, but it should work for all queries.
26 def get(self, rev_id, **kwargs):
28 Get a single revision based on it's ID. Throws a :py:class:`KeyError`
29 if the rev_id cannot be found.
35 Passed to :py:meth:`query`
42 revs = list(self.query(revids={rev_id}, **kwargs))
45 raise KeyError(rev_id)
49 def query(self, *args, limit=None, **kwargs):
51 Get revision information.
52 See `<https://www.mediawiki.org/wiki/API:Properties#revisions_.2F_rv>`_
56 Which properties to get for each revision:
58 * ids - The ID of the revision
59 * flags - Revision flags (minor)
60 * timestamp - The timestamp of the revision
61 * user - User that made the revision
62 * userid - User id of revision creator
63 * size - Length (bytes) of the revision
64 * sha1 - SHA-1 (base 16) of the revision
65 * contentmodel - Content model id
66 * comment - Comment by the user for revision
67 * parsedcomment - Parsed comment by the user for the revision
68 * content - Text of the revision
69 * tags - Tags for the revision
71 Limit how many revisions will be returned
72 No more than 500 (5000 for bots) allowed
74 From which revision id to start enumeration (enum)
76 Stop revision enumeration on this revid
77 start : :class:`mw.Timestamp`
78 From which revision timestamp to start enumeration (enum)
79 end : :class:`mw.Timestamp`
80 Enumerate up to this timestamp
84 Only include revisions made by user_text
86 Exclude revisions made by user
88 Only list revisions tagged with this tag
89 expandtemplates : bool
90 Expand templates in revision content (requires "content" propery)
92 Generate XML parse tree for revision content (requires "content" propery)
94 Parse revision content (requires "content" propery)
96 Only retrieve the content of this section number
98 Which tokens to obtain for each revision
100 * rollback - See `<https://www.mediawiki.org/wiki/API:Edit_-_Rollback#Token>`_
102 When more results are available, use this to continue
104 Revision ID to diff each revision to. Use "prev", "next" and
105 "cur" for the previous, next and current revision respectively
107 Text to diff each revision to. Only diffs a limited number of
108 revisions. Overrides diffto. If section is set, only that
109 section will be diffed against this text
111 Serialization format used for difftotext and expected for output of content
120 An iterator of rev dicts returned from the API.
123 revisions_yielded = 0
127 kwargs['limit'] = self.MAX_REVISIONS
129 kwargs['limit'] = min(limit - revisions_yielded, self.MAX_REVISIONS)
131 rev_docs, rvcontinue = self._query(*args, **kwargs)
135 revisions_yielded += 1
137 if limit != None and revisions_yielded >= limit:
141 if rvcontinue != None and len(rev_docs) > 0:
142 kwargs['rvcontinue'] = rvcontinue
147 def _query(self, revids=None, titles=None, pageids=None, properties=None,
148 limit=None, start_id=None, end_id=None, start=None,
149 end=None, direction=None, user=None, excludeuser=None,
150 tag=None, expandtemplates=None, generatexml=None,
151 parse=None, section=None, token=None, rvcontinue=None,
152 diffto=None, difftotext=None, contentformat=None):
160 params['revids'] = self._items(revids, type=int)
161 params['titles'] = self._items(titles)
162 params['pageids'] = self._items(pageids, type=int)
164 params['rvprop'] = self._items(properties, levels=self.PROPERTIES)
166 if revids == None: # Can't have a limit unless revids is none
167 params['rvlimit'] = none_or(limit, int)
169 params['rvstartid'] = none_or(start_id, int)
170 params['rvendid'] = none_or(end_id, int)
171 params['rvstart'] = self._check_timestamp(start)
172 params['rvend'] = self._check_timestamp(end)
174 params['rvdir'] = self._check_direction(direction)
175 params['rvuser'] = none_or(user, str)
176 params['rvexcludeuser'] = none_or(excludeuser, int)
177 params['rvtag'] = none_or(tag, str)
178 params['rvexpandtemplates'] = none_or(expandtemplates, bool)
179 params['rvgeneratexml'] = none_or(generatexml, bool)
180 params['rvparse'] = none_or(parse, bool)
181 params['rvsection'] = none_or(section, int)
182 params['rvtoken'] = none_or(token, str)
183 params['rvcontinue'] = none_or(rvcontinue, str)
184 params['rvdiffto'] = self._check_diffto(diffto)
185 params['rvdifftotext'] = none_or(difftotext, str)
186 params['rvcontentformat'] = none_or(contentformat, str)
188 doc = self.session.get(params)
191 if 'query-continue' in doc:
192 rvcontinue = doc['query-continue']['revisions']['rvcontinue']
196 pages = doc['query'].get('pages', {}).values()
199 for page_doc in pages:
200 if 'missing' in page_doc or 'revisions' not in page_doc: continue
202 page_rev_docs = page_doc['revisions']
203 del page_doc['revisions']
205 for rev_doc in page_rev_docs:
206 rev_doc['page'] = page_doc
208 rev_docs.extend(page_rev_docs)
210 return rev_docs, rvcontinue
212 except KeyError as e:
213 raise MalformedResponse(str(e), doc)
216 def _check_diffto(self, diffto):
217 if diffto == None or diffto in self.DIFF_TO: