-
Notifications
You must be signed in to change notification settings - Fork 1.4k
/
RefreshLinksJob.php
637 lines (577 loc) · 24.4 KB
/
RefreshLinksJob.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
use MediaWiki\Deferred\LinksUpdate\LinksUpdate;
use MediaWiki\Deferred\RefreshSecondaryDataUpdate;
use MediaWiki\Logger\LoggerFactory;
use MediaWiki\MainConfigNames;
use MediaWiki\MediaWikiServices;
use MediaWiki\Page\PageAssertionException;
use MediaWiki\Page\PageIdentity;
use MediaWiki\Parser\ParserOutput;
use MediaWiki\Revision\RevisionRecord;
use MediaWiki\Revision\RevisionRenderer;
use MediaWiki\Title\Title;
use MediaWiki\User\User;
use Wikimedia\Rdbms\IDBAccessObject;
use Wikimedia\Stats\StatsFactory;
/**
* Job to update link tables for rerendered wiki pages.
*
* This job comes in a few variants:
*
* - a) Recursive jobs to update links for backlink pages for a given title.
* Scheduled by {@see LinksUpdate::queueRecursiveJobsForTable()}; used to
* refresh pages which link/transclude a given title.
* These jobs have (recursive:true,table:<table>) set. They just look up
* which pages link to the job title and schedule them as a set of non-recursive
* RefreshLinksJob jobs (and possible one new recursive job as a way of
* continuation).
* - b) Jobs to update links for a set of pages (the job title is ignored).
* These jobs have (pages:(<page ID>:(<namespace>,<title>),...) set.
* - c) Jobs to update links for a single page (the job title).
* These jobs need no extra fields set.
*
* Job parameters for all jobs:
* - recursive (bool): When false, updates the current page. When true, updates
* the pages which link/transclude the current page.
* - triggeringRevisionId (int): The revision of the edit which caused the link
* refresh. For manually triggered updates, the last revision of the page (at the
* time of scheduling).
* - triggeringUser (array): The user who triggered the refresh, in the form of a
* [ 'userId' => int, 'userName' => string ] array. This is not necessarily the user
* who created the revision.
* - triggeredRecursive (bool): Set on all jobs which were partitioned from another,
* recursive job. For debugging.
* - Standard deduplication params (see {@see JobQueue::deduplicateRootJob()}).
* For recursive jobs:
* - table (string): Which table to use (imagelinks or templatelinks) when searching for
* affected pages.
* - range (array): Used for recursive jobs when some pages have already been partitioned
* into separate jobs. Contains the list of ranges that still need to be partitioned.
* See {@see BacklinkJobUtils::partitionBacklinkJob()}.
* - division: Number of times the job was partitioned already (for debugging).
* For non-recursive jobs:
* - pages (array): Associative array of [ <page ID> => [ <namespace>, <dbkey> ] ].
* Might be omitted, then the job title will be used.
* - isOpportunistic (bool): Set for opportunistic single-page updates. These are "free"
* updates that are queued when most of the work needed to be performed anyway for
* non-linkrefresh-related reasons, and can be more easily discarded if they don't seem
* useful. See {@see WikiPage::triggerOpportunisticLinksUpdate()}.
* - useRecursiveLinksUpdate (bool): When true, triggers recursive jobs for each page.
*
* Metrics:
* - `refreshlinks_superseded_updates_total`: The number of times the job was cancelled
* because the target page had already been refreshed by a different edit or job.
* The job is considered to have succeeded in this case.
*
* - `refreshlinks_warnings_total`: The number of times the job failed due to a recoverable issue.
* Possible `reason` label values include:
* - `lag_wait_failed`: The job timed out while waiting for replication.
*
* - `refreshlinks_failures_total`: The number of times the job failed.
* The `reason` label may be:
* - `page_not_found`: The target page did not exist.
* - `rev_not_current`: The target revision was no longer the latest revision for the target page.
* - `rev_not_found`: The target revision was not found.
* - `lock_failure`: The job failed to acquire an exclusive lock to refresh the target page.
*
* - `refreshlinks_parsercache_operations_total`: The number of times the job attempted
* to fetch parser output from the parser cache.
* Possible `status` label values include:
* - `cache_hit`: The parser output was found in the cache.
* - `cache_miss`: The parser output was not found in the cache.
*
* @ingroup JobQueue
* @see RefreshSecondaryDataUpdate
* @see WikiPage::doSecondaryDataUpdates()
*/
class RefreshLinksJob extends Job {
/** @var int Lag safety margin when comparing root job times to last-refresh times */
private const NORMAL_MAX_LAG = 10;
/** @var int How many seconds to wait for replica DBs to catch up */
private const LAG_WAIT_TIMEOUT = 15;
public function __construct( PageIdentity $page, array $params ) {
if ( empty( $params['pages'] ) && !$page->canExist() ) {
// BC with the Title class
throw new PageAssertionException(
'The given PageIdentity {pageIdentity} does not represent a proper page',
[ 'pageIdentity' => $page ]
);
}
parent::__construct( 'refreshLinks', $page, $params );
// Avoid the overhead of de-duplication when it would be pointless
$this->removeDuplicates = (
// Ranges rarely will line up
!isset( $params['range'] ) &&
// Multiple pages per job make matches unlikely
!( isset( $params['pages'] ) && count( $params['pages'] ) != 1 )
);
$this->params += [ 'causeAction' => 'RefreshLinksJob', 'causeAgent' => 'unknown' ];
// Tell JobRunner to not automatically wrap run() in a transaction round.
// Each runForTitle() call will manage its own rounds in order to run DataUpdates
// and to avoid contention as well.
$this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND;
}
/**
* @param PageIdentity $page
* @param array $params
* @return RefreshLinksJob
*/
public static function newPrioritized( PageIdentity $page, array $params ) {
$job = new self( $page, $params );
$job->command = 'refreshLinksPrioritized';
return $job;
}
/**
* @param PageIdentity $page
* @param array $params
* @return RefreshLinksJob
*/
public static function newDynamic( PageIdentity $page, array $params ) {
$job = new self( $page, $params );
$job->command = 'refreshLinksDynamic';
return $job;
}
public function run() {
$ok = true;
if ( !empty( $this->params['recursive'] ) ) {
// Job to update all (or a range of) backlink pages for a page
// When the base job branches, wait for the replica DBs to catch up to the primary.
// From then on, we know that any template changes at the time the base job was
// enqueued will be reflected in backlink page parses when the leaf jobs run.
$services = MediaWikiServices::getInstance();
if ( !isset( $this->params['range'] ) ) {
$lbFactory = $services->getDBLoadBalancerFactory();
if ( !$lbFactory->waitForReplication( [
'timeout' => self::LAG_WAIT_TIMEOUT
] ) ) {
// only try so hard, keep going with what we have
$stats = $services->getStatsFactory();
$stats->getCounter( 'refreshlinks_warnings_total' )
->setLabel( 'reason', 'lag_wait_failed' )
->copyToStatsdAt( 'refreshlinks_warning.lag_wait_failed' )
->increment();
}
}
// Carry over information for de-duplication
$extraParams = $this->getRootJobParams();
$extraParams['triggeredRecursive'] = true;
// Carry over cause information for logging
$extraParams['causeAction'] = $this->params['causeAction'];
$extraParams['causeAgent'] = $this->params['causeAgent'];
// Convert this into no more than $wgUpdateRowsPerJob RefreshLinks per-title
// jobs and possibly a recursive RefreshLinks job for the rest of the backlinks
$jobs = BacklinkJobUtils::partitionBacklinkJob(
$this,
$services->getMainConfig()->get( MainConfigNames::UpdateRowsPerJob ),
1, // job-per-title
[ 'params' => $extraParams ]
);
$services->getJobQueueGroup()->push( $jobs );
} elseif ( isset( $this->params['pages'] ) ) {
// Job to update link tables for a set of titles
foreach ( $this->params['pages'] as [ $ns, $dbKey ] ) {
$title = Title::makeTitleSafe( $ns, $dbKey );
if ( $title && $title->canExist() ) {
$ok = $this->runForTitle( $title ) && $ok;
} else {
$ok = false;
$this->setLastError( "Invalid title ($ns,$dbKey)." );
}
}
} else {
// Job to update link tables for a given title
$ok = $this->runForTitle( $this->title );
}
return $ok;
}
/**
* @param PageIdentity $pageIdentity
* @return bool
*/
protected function runForTitle( PageIdentity $pageIdentity ) {
$services = MediaWikiServices::getInstance();
$stats = $services->getStatsFactory();
$renderer = $services->getRevisionRenderer();
$parserCache = $services->getParserCache();
$lbFactory = $services->getDBLoadBalancerFactory();
$ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
// Load the page from the primary DB
$page = $services->getWikiPageFactory()->newFromTitle( $pageIdentity );
$page->loadPageData( IDBAccessObject::READ_LATEST );
if ( !$page->exists() ) {
// Probably due to concurrent deletion or renaming of the page
$logger = LoggerFactory::getInstance( 'RefreshLinksJob' );
$logger->warning(
'The page does not exist. Perhaps it was deleted?',
[
'page_title' => $this->title->getPrefixedDBkey(),
'job_params' => $this->getParams(),
'job_metadata' => $this->getMetadata()
]
);
$this->incrementFailureCounter( $stats, 'page_not_found' );
// retry later to handle unlucky race condition
return false;
}
// Serialize link update job by page ID so they see each others' changes.
// The page ID and latest revision ID will be queried again after the lock
// is acquired to bail if they are changed from that of loadPageData() above.
// Serialize links updates by page ID so they see each others' changes
$dbw = $lbFactory->getPrimaryDatabase();
/** @noinspection PhpUnusedLocalVariableInspection */
$scopedLock = LinksUpdate::acquirePageLock( $dbw, $page->getId(), 'job' );
if ( $scopedLock === null ) {
// Another job is already updating the page, likely for a prior revision (T170596)
$this->setLastError( 'LinksUpdate already running for this page, try again later.' );
$this->incrementFailureCounter( $stats, 'lock_failure' );
// retry later when overlapping job for previous rev is done
return false;
}
if ( $this->isAlreadyRefreshed( $page ) ) {
// this job has been superseded, e.g. by overlapping recursive job
// for a different template edit, or by direct edit or purge.
$stats->getCounter( 'refreshlinks_superseded_updates_total' )
->copyToStatsdAt( 'refreshlinks_outcome.good_update_superseded' )
->increment();
// treat as success
return true;
}
// These can be fairly long-running jobs, while commitAndWaitForReplication
// releases primary snapshots, let the replica release their snapshot as well
$lbFactory->flushReplicaSnapshots( __METHOD__ );
// Parse during a fresh transaction round for better read consistency
$lbFactory->beginPrimaryChanges( __METHOD__ );
$output = $this->getParserOutput( $renderer, $parserCache, $page, $stats );
$options = $this->getDataUpdateOptions();
$lbFactory->commitPrimaryChanges( __METHOD__ );
if ( !$output ) {
// probably raced out.
// Specific refreshlinks_outcome metric sent by getCurrentRevisionIfUnchanged().
// FIXME: Why do we retry this? Can this be a cancellation?
return false;
}
// Tell DerivedPageDataUpdater to use this parser output
$options['known-revision-output'] = $output;
// Execute corresponding DataUpdates immediately
$page->doSecondaryDataUpdates( $options );
InfoAction::invalidateCache( $page );
// NOTE: Since 2019 (f588586e) this no longer saves the new ParserOutput to the ParserCache!
// This means the page will have to be rendered on-the-fly when it is next viewed.
// This is to avoid spending limited ParserCache capacity on rarely visited pages.
// TODO: Save the ParserOutput to ParserCache by calling WikiPage::updateParserCache()
// for pages that are likely to benefit (T327162).
// Commit any writes here in case this method is called in a loop.
// In that case, the scoped lock will fail to be acquired.
$lbFactory->commitAndWaitForReplication( __METHOD__, $ticket );
return true;
}
/**
* @return string|null Minimum lag-safe TS_MW timestamp with regard to root job creation
*/
private function getLagAwareRootTimestamp() {
// Get the timestamp of the change that triggered this job
$rootTimestamp = $this->params['rootJobTimestamp'] ?? null;
if ( $rootTimestamp === null ) {
return null;
}
if ( !empty( $this->params['isOpportunistic'] ) ) {
// Neither clock skew nor DB snapshot/replica DB lag matter much for
// such updates; focus on reusing the (often recently updated) cache
$lagAwareTimestamp = $rootTimestamp;
} else {
// For transclusion updates, the template changes must be reflected
$lagAwareTimestamp = wfTimestamp(
TS_MW,
(int)wfTimestamp( TS_UNIX, $rootTimestamp ) + self::NORMAL_MAX_LAG
);
}
return $lagAwareTimestamp;
}
/**
* @param WikiPage $page
* @return bool Whether something updated the backlinks with data newer than this job
*/
private function isAlreadyRefreshed( WikiPage $page ) {
$lagAwareTimestamp = $this->getLagAwareRootTimestamp();
return ( $lagAwareTimestamp !== null && $page->getLinksTimestamp() > $lagAwareTimestamp );
}
/**
* @see DerivedPageDataUpdater::shouldGenerateHTMLOnEdit
* @return bool true if at least one of slots require rendering HTML on edit, false otherwise.
* This is needed for example in populating ParserCache.
*/
private function shouldGenerateHTMLOnEdit( RevisionRecord $revision ): bool {
$services = MediaWikiServices::getInstance();
foreach ( $revision->getSlots()->getSlotRoles() as $role ) {
$slot = $revision->getSlots()->getSlot( $role );
$contentHandler = $services->getContentHandlerFactory()->getContentHandler( $slot->getModel() );
if ( $contentHandler->generateHTMLOnEdit() ) {
return true;
}
}
return false;
}
/**
* Get the parser output if the page is unchanged from what was loaded in $page
*
* @param RevisionRenderer $renderer
* @param ParserCache $parserCache
* @param WikiPage $page Page already loaded with READ_LATEST
* @param StatsFactory $stats
* @return ParserOutput|null Combined output for all slots; might only contain metadata
*/
private function getParserOutput(
RevisionRenderer $renderer,
ParserCache $parserCache,
WikiPage $page,
StatsFactory $stats
) {
$revision = $this->getCurrentRevisionIfUnchanged( $page, $stats );
if ( !$revision ) {
// race condition?
return null;
}
$cachedOutput = $this->getParserOutputFromCache( $parserCache, $page, $revision, $stats );
$statsCounter = $stats->getCounter( 'refreshlinks_parsercache_operations_total' );
if ( $cachedOutput && $this->canUseParserOutputFromCache( $cachedOutput, $revision ) ) {
$statsCounter
->setLabel( 'status', 'cache_hit' )
->setLabel( 'html_changed', 'n/a' )
->copyToStatsdAt( 'refreshlinks.parser_cached' )
->increment();
return $cachedOutput;
}
$causeAction = $this->params['causeAction'] ?? 'RefreshLinksJob';
$parserOptions = $page->makeParserOptions( 'canonical' );
// T371713: Temporary statistics collection code to determine
// feasibility of Parsoid selective update
$sampleRate = MediaWikiServices::getInstance()->getMainConfig()->get(
MainConfigNames::ParsoidSelectiveUpdateSampleRate
);
$doSample = $sampleRate && mt_rand( 1, $sampleRate ) === 1;
if ( $doSample && $cachedOutput === null ) {
// In order to collect accurate statistics, check for
// a dirty copy in the cache even if we wouldn't have
// to otherwise.
$cachedOutput = $parserCache->getDirty( $page, $parserOptions );
}
$renderedRevision = $renderer->getRenderedRevision(
$revision,
$parserOptions,
null,
[
'audience' => $revision::RAW,
'causeAction' => $causeAction,
// Providing a previous parse potentially allows for
// selective updates
'previous-output' => $cachedOutput,
]
);
$parseTimestamp = wfTimestampNow(); // timestamp that parsing started
$output = $renderedRevision->getRevisionParserOutput( [
// To avoid duplicate parses, this must match DerivedPageDataUpdater::shouldGenerateHTMLOnEdit() (T301309)
'generate-html' => $this->shouldGenerateHTMLOnEdit( $revision )
] );
$output->setCacheTime( $parseTimestamp ); // notify LinksUpdate::doUpdate()
// T371713: Temporary statistics collection code to determine
// feasibility of Parsoid selective update
if ( $doSample ) {
$labels = [
'source' => 'RefreshLinksJob',
'type' => $cachedOutput === null ? 'full' : 'selective',
'reason' => $causeAction,
'parser' => $parserOptions->getUseParsoid() ? 'parsoid' : 'legacy',
'opportunistic' => empty( $this->params['isOpportunistic'] ) ? 'false' : 'true',
];
$totalStat = $stats->getCounter( 'parsercache_selective_total' );
$timeStat = $stats->getCounter( 'parsercache_selective_cpu_seconds' );
foreach ( $labels as $key => $value ) {
$totalStat->setLabel( $key, $value );
$timeStat->setLabel( $key, $value );
}
$totalStat->increment();
$timeStat->incrementBy( $output->getTimeProfile( 'cpu' ) );
}
// Collect stats on parses that don't actually change the page content.
// In that case, we could abort here, and perhaps we could also avoid
// triggering CDN purges (T369898).
if ( !$cachedOutput ) {
// There was no cached output
$htmlChanged = 'unknown';
} elseif ( $cachedOutput->getRawText() === $output->getRawText() ) {
// We have cached output, but we couldn't be sure that it was still good.
// So we parsed again, but the result turned out to be the same HTML as
// before.
$htmlChanged = 'no';
} else {
// Re-parsing yielded HTML different from the cached output.
$htmlChanged = 'yes';
}
$statsCounter
->setLabel( 'status', 'cache_miss' )
->setLabel( 'html_changed', $htmlChanged )
->copyToStatsdAt( 'refreshlinks.parser_uncached' )
->increment();
return $output;
}
/**
* Get the current revision record if it is unchanged from what was loaded in $page
*
* @param WikiPage $page Page already loaded with READ_LATEST
* @param StatsFactory $stats
* @return RevisionRecord|null The same instance that $page->getRevisionRecord() uses
*/
private function getCurrentRevisionIfUnchanged(
WikiPage $page,
StatsFactory $stats
) {
$title = $page->getTitle();
// Get the latest ID since acquirePageLock() in runForTitle() flushed the transaction.
// This is used to detect edits/moves after loadPageData() but before the scope lock.
// The works around the chicken/egg problem of determining the scope lock key name
$latest = $title->getLatestRevID( IDBAccessObject::READ_LATEST );
$triggeringRevisionId = $this->params['triggeringRevisionId'] ?? null;
if ( $triggeringRevisionId && $triggeringRevisionId !== $latest ) {
// This job is obsolete and one for the latest revision will handle updates
$this->incrementFailureCounter( $stats, 'rev_not_current' );
$this->setLastError( "Revision $triggeringRevisionId is not current" );
return null;
}
// Load the current revision. Note that $page should have loaded with READ_LATEST.
// This instance will be reused in WikiPage::doSecondaryDataUpdates() later on.
$revision = $page->getRevisionRecord();
if ( !$revision ) {
// revision just got deleted?
$this->incrementFailureCounter( $stats, 'rev_not_found' );
$this->setLastError( "Revision not found for {$title->getPrefixedDBkey()}" );
return null;
} elseif ( $revision->getId() !== $latest || $revision->getPageId() !== $page->getId() ) {
// Do not clobber over newer updates with older ones. If all jobs where FIFO and
// serialized, it would be OK to update links based on older revisions since it
// would eventually get to the latest. Since that is not the case (by design),
// only update the link tables to a state matching the current revision's output.
$this->incrementFailureCounter( $stats, 'rev_not_current' );
$this->setLastError( "Revision {$revision->getId()} is not current" );
return null;
}
return $revision;
}
/**
* Get the parser output from cache if it reflects the change that triggered this job
*
* @param ParserCache $parserCache
* @param WikiPage $page
* @param RevisionRecord $currentRevision
* @param StatsFactory $stats
* @return ParserOutput|null
*/
private function getParserOutputFromCache(
ParserCache $parserCache,
WikiPage $page,
RevisionRecord $currentRevision,
StatsFactory $stats
): ?ParserOutput {
// Parsoid can do selective updates, so it is always worth the I/O
// to check for a previous parse.
$parserOptions = $page->makeParserOptions( 'canonical' );
if ( $parserOptions->getUseParsoid() ) {
return $parserCache->getDirty( $page, $parserOptions ) ?: null;
}
// If page_touched changed after this root job, then it is likely that
// any views of the pages already resulted in re-parses which are now in
// cache. The cache can be reused to avoid expensive parsing in some cases.
$rootTimestamp = $this->params['rootJobTimestamp'] ?? null;
if ( $rootTimestamp !== null ) {
$opportunistic = !empty( $this->params['isOpportunistic'] );
if ( $page->getTouched() >= $rootTimestamp || $opportunistic ) {
// Cache is suspected to be up-to-date so it's worth the I/O of checking.
// We call canUseParserOutputFromCache() later to check if it's usable.
return $parserCache->getDirty( $page, $parserOptions ) ?: null;
}
}
return null;
}
private function canUseParserOutputFromCache(
ParserOutput $cachedOutput,
RevisionRecord $currentRevision
) {
// As long as the cache rev ID matches the current rev ID and it reflects
// the job's triggering change, then it is usable.
return $cachedOutput->getCacheRevisionId() == $currentRevision->getId()
&& $cachedOutput->getCacheTime() >= $this->getLagAwareRootTimestamp();
}
/**
* Increment the RefreshLinks failure counter metric with the given reason.
*
* @param StatsFactory $stats
* @param string $reason Well-known failure reason string
* @return void
*/
private function incrementFailureCounter( StatsFactory $stats, $reason ): void {
$stats->getCounter( 'refreshlinks_failures_total' )
->setLabel( 'reason', $reason )
->copyToStatsdAt( "refreshlinks_outcome.bad_$reason" )
->increment();
}
/**
* @return array
*/
private function getDataUpdateOptions() {
$options = [
'recursive' => !empty( $this->params['useRecursiveLinksUpdate'] ),
// Carry over cause so the update can do extra logging
'causeAction' => $this->params['causeAction'],
'causeAgent' => $this->params['causeAgent']
];
if ( !empty( $this->params['triggeringUser'] ) ) {
$userInfo = $this->params['triggeringUser'];
if ( $userInfo['userId'] ) {
$options['triggeringUser'] = User::newFromId( $userInfo['userId'] );
} else {
// Anonymous, use the username
$options['triggeringUser'] = User::newFromName( $userInfo['userName'], false );
}
}
return $options;
}
public function getDeduplicationInfo() {
$info = parent::getDeduplicationInfo();
unset( $info['causeAction'] );
unset( $info['causeAgent'] );
if ( is_array( $info['params'] ) ) {
// For per-pages jobs, the job title is that of the template that changed
// (or similar), so remove that since it ruins duplicate detection
if ( isset( $info['params']['pages'] ) ) {
unset( $info['namespace'] );
unset( $info['title'] );
}
}
return $info;
}
public function workItemCount() {
if ( !empty( $this->params['recursive'] ) ) {
return 0; // nothing actually refreshed
} elseif ( isset( $this->params['pages'] ) ) {
return count( $this->params['pages'] );
}
return 1; // one title
}
}