User:Novem Linguae/Scripts/DraftCleaner.js
Appearance
Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
Documentation for this user script can be added at User:Novem Linguae/Scripts/DraftCleaner. |
// <nowiki>
// === Compiled with Novem Linguae's publish.php script ======================
$(async function() {
// === main.js ======================================================
/* THIS SCRIPT IS BUGGY ABOUT 10% OF THE TIME. Be sure to check the diff that pops up before submitting.
- Adds "Run DraftCleaner" link to the left sidebar
- Top uses:
- remove extra line breaks (for example, 3 enters in a row)
- in the first sentence, bold the title
- convert curly quotes to regular quotes
- put <ref>s after periods
- clean external links out of the main article area (turn them into references)
- add ==References== section
- remove bold from headings
- Other uses:
- converts [inline external links] to <ref>s
- removes spaces in front of <ref>s
- get rid of any level 2 heading that contains the article's title
- converts =TitleHeading= to ==H2Heading==
- replaces Covid-19 with COVID-19
- removes enter characters between <ref>s
- trims whitespace at beginning and end
- remove self wikilinks to the article title
- convert ==Reference== to ==References==
- turn bare URLs into references
- fix errant spaces at beginning of lines, which makes a blockquote looking thing
- delete whitespace at the end of lines
- convert double spaces to single spaces
- remove blank heading
- in refs, turn short links into long links, so you can see the domain
- change year range dash to ndash
- if in draftspace, and draft in categories, disable the categories
- delete <br>. in drafts, these are usually poorly placed
- fix empty references section
- right align images
- remove whitespace if that is the only character on a line
- correct capitalization of see also, references, further reading, external links
- if article has headings but no lead, remove first heading
- replace unicode bullets with asterisks
Add one of the following to your User:yourName/common.js (at the top) to change the position where DraftCleaner puts its link:
window.draftCleanerPutInToolsMenu = true;
window.draftCleanerPutInMoreMenu = true;
This page was assembled from 3 files using my publish.php script. I have an offline test suite with around 100 unit tests for the DraftCleaner and StringFilter classes.
*/
( function () {
async function getWikicode( title ) {
const pageIsDeleted = !mw.config.get( 'wgCurRevisionId' );
if ( pageIsDeleted ) {
return '';
}
let wikicode = '';
title = encodeURIComponent( title );
await $.ajax( {
url: 'https://en.wikipedia.org/w/api.php?action=parse&page=' + title + '&prop=wikitext&formatversion=2&format=json',
success: function ( result ) {
wikicode = result.parse.wikitext;
},
dataType: 'json'
} );
return wikicode;
}
function goToShowChangesScreen( titleWithNamespaceAndUnderscores, wikicode, editSummary ) {
const titleEncoded = encodeURIComponent( titleWithNamespaceAndUnderscores );
const wgServer = mw.config.get( 'wgServer' );
const wgScriptPath = mw.config.get( 'wgScriptPath' );
const baseURL = wgServer + wgScriptPath + '/';
// https://stackoverflow.com/a/12464290/3480193
$( `<form action="${ baseURL }index.php?title=${ titleEncoded }&action=submit" method="POST"/>` )
.append( $( '<input type="hidden" name="wpTextbox1">' ).val( wikicode ) )
.append( $( '<input type="hidden" name="wpSummary">' ).val( editSummary ) )
.append( $( '<input type="hidden" name="mode">' ).val( 'preview' ) )
.append( $( '<input type="hidden" name="wpDiff">' ).val( 'Show changes' ) )
.append( $( '<input type="hidden" name="wpUltimateParam">' ).val( '1' ) )
.appendTo( $( document.body ) ) // it has to be added somewhere into the <body>
.trigger( 'submit' );
}
/** returns the pagename, including the namespace name, but with spaces replaced by underscores */
function getArticleName() {
return mw.config.get( 'wgPageName' );
}
// don't run when not viewing articles
const action = mw.config.get( 'wgAction' );
const isNotViewing = action != 'view';
if ( isNotViewing ) {
return;
}
// don't run when viewing diffs
const isDiff = mw.config.get( 'wgDiffNewId' );
if ( isDiff ) {
return;
}
// Don't run in virtual namespaces
const isVirtualNamespace = mw.config.get( 'wgNamespaceNumber' ) < 0;
if ( isVirtualNamespace ) {
return;
}
let menuID = 'p-navigation';
// @ts-ignore
if ( window.draftCleanerPutInToolsMenu ) {
menuID = 'p-tb';
// @ts-ignore
} else if ( window.draftCleanerPutInMoreMenu ) {
menuID = 'p-cactions';
}
const titleWithNamespaceAndUnderscores = getArticleName();
const namespaceNumber = mw.config.get( 'wgNamespaceNumber' );
let running = false;
// Add DraftCleaner to the toolbar
mw.loader.using( [ 'mediawiki.util' ], () => {
mw.util.addPortletLink( menuID, '#', 'Run DraftCleaner', 'DraftCleanerLink' );
$( '#DraftCleanerLink' ).on( 'click', async () => {
// prevent running the script while script is already in progress
if ( running ) {
return;
}
running = true;
mw.notify( 'Parsing page content...' );
// get page wikicode
const titleWithNamespaceAndSpaces = titleWithNamespaceAndUnderscores.replace( /_/g, ' ' );
const originalWikicode = await getWikicode( titleWithNamespaceAndUnderscores );
let wikicode = originalWikicode;
const dc = new DraftCleaner();
wikicode = dc.cleanDraft( wikicode, namespaceNumber, titleWithNamespaceAndSpaces );
const needsChanges = wikicode != originalWikicode;
if ( needsChanges ) {
const summary = 'clean up ([[User:Novem Linguae/Scripts/DraftCleaner.js|DraftCleaner]])';
await goToShowChangesScreen( titleWithNamespaceAndUnderscores, wikicode, summary );
} else {
mw.notify( 'No changes needed!' );
}
} );
} );
}() );
// === modules/DraftCleaner.js ======================================================
class DraftCleaner {
cleanDraft( wikicode, namespaceNumber, titleWithNamespaceAndSpaces ) {
// run before other stuff
wikicode = this.deleteSomeHTMLTags( wikicode );
wikicode = this.deleteNonAFCDraftTags( wikicode );
wikicode = this.deleteAFCDraftTagsIfMainspace( wikicode, mw.config.get( 'wgNamespaceNumber' ) );
wikicode = this.fixWikilinksContainingURL( wikicode );
wikicode = this.fixExternalLinksToWikipediaArticles( wikicode );
wikicode = this.deleteWeirdUnicodeCharacters( wikicode );
wikicode = this.trimEveryLine( wikicode );
wikicode = this.convertH1ToH2( wikicode );
wikicode = this.convertVeryLongHeadingToParagraph( wikicode );
wikicode = this.deleteHeadingsWithTitle( wikicode, titleWithNamespaceAndSpaces );
wikicode = this.unlinkWikilinksToThisTitle( wikicode, titleWithNamespaceAndSpaces );
wikicode = this.capitalizeCOVID19( wikicode );
wikicode = this.removeBoldFromHeadings( wikicode );
wikicode = this.convertReferenceToReferences( wikicode );
wikicode = this.deleteMultipleReferenceTags( wikicode );
wikicode = this.addReferencesSectionIfMissing( wikicode );
wikicode = this.fixEmptyReferencesSection( wikicode );
wikicode = this.deleteWhitespaceAtEndOfLines( wikicode );
wikicode = this.convertSmartQuotesToRegularQuotes( wikicode );
// wikicode = this.fixWordEmphasizedWithSingleQuotes(wikicode); // most of these appear in citations as names of newspaper articles, arguably should keep these single quotes
wikicode = this.convertDoubleSpacesToSingleSpaces( wikicode );
wikicode = this.deleteBlankHeadings( wikicode );
wikicode = this.changeYearRangeDashToNDash( wikicode );
wikicode = this.disableCategoriesInDraftspace( wikicode, namespaceNumber );
// wikicode = this.deleteBRTagsOutsideInfoboxes(wikicode, namespaceNumber); // edge case in image captions, and probably other places
// wikicode = this.rightAlignImages(wikicode); // commenting out, too many false positives in featured articles
wikicode = this.correctCapitalizationOfEndMatterHeaders( wikicode );
wikicode = this.ifNoLeadSectionDeleteFirstHeading( wikicode );
wikicode = this.deleteCopyPastedEditAndEditSource( wikicode );
wikicode = this.replaceUnicodeBulletsWithAsterisks( wikicode );
wikicode = this.deleteEmptySections( wikicode );
wikicode = this.fixHeadingsInAllCaps( wikicode );
wikicode = this.deleteDuplicateReferencesSection( wikicode );
wikicode = this.deleteBlankLinesBetweenBullets( wikicode );
wikicode = this.removeUnderscoresFromWikilinks( wikicode );
wikicode = this.fixPipedWikilinksWithIdenticalParameters( wikicode );
wikicode = this.removeBorderFromImagesInInfoboxes( wikicode );
wikicode = this.removeExtraAFCSubmissionTemplates( wikicode );
wikicode = this.moveAFCSubmissionTemplatesToTop( wikicode );
// all ==sections== should start with a capital letter
// after swap, if citation has no spaces on either side, and is not touching two other citations, add a space on the right
// strip [[File: from infobox's image field
// example 1: | image = [[File:SAMIR 1626.png|thumb|Samir Mohanty]]
// example 2: | image = [[File:SAMIR 1626.png]]
// trim whitespace inside refs, e.g. <ref> abc </ref>
// replace unreliable sources with {{cn}}.
// if adjacent to other sources, just delete
// if ref is used multiple times, account for that
// duplicate citation fixer
// move refs that are below {{Reflist}}, to above {{Reflist}}
// move refs out of headings
// delete AFC submission templates located mid-article, they end up self-hiding then appear as inexplicable whitespace. example: {{AfC submission|t||ts=20211212134609|u=Doezdemir|ns=118|demo=}}<!-- Important, do not remove this line before article has been created. -->
// fix redundant wikilinks, e.g. [[Spotify|Spotify]]
// change youtu.be to youtube.com, to avoid the blacklist. test: https://youtu.be/bnWHeRNIPiA
// delete ©®™
// convert all <references /> to {{Reflist}}. <references /> doesn't use two column format and looks weird with a large # of references
// remove px from images, should use default
// convert refs toward the end. we want deleteSomeHTMLTags() to run first, to get rid of tags around URLs
wikicode = this.bareURLToRef( wikicode );
wikicode = this.refShortLinkToLongLink( wikicode );
wikicode = this.inlineExternalLinksToRefs( wikicode );
wikicode = this.moveRefsOutsideOfItalics( wikicode );
wikicode = this.deleteSpacesInFrontOfRefs( wikicode );
wikicode = this.deleteNewLinesBetweenRefs( wikicode );
wikicode = this.swapRefPeriodWithPeriodRef( wikicode );
wikicode = this.swapRefCommaWithCommaRef( wikicode );
// stuff we want to run at the end
wikicode = this.fixDoublePeriod( wikicode ); // need test cases. I've seen this one not work.
wikicode = this.boldArticleTitle( wikicode, titleWithNamespaceAndSpaces );
wikicode = this.trimEmptyLines( wikicode );
wikicode = this.deleteMoreThanTwoEntersInARow( wikicode );
return wikicode;
}
// surround bare URL's with <ref></ref>
// Useful for seeing all URL's in the reflist section, and for CiteHighlighter ref quality highlighting
bareURLToRef( wikicode ) {
return wikicode.replace( /^(http[^\n ]*) {0,}$/gm, '<ref>$1</ref>' );
}
// in refs, turn [short links] into long links, so you can see the domain
// also fixes <ref> link </ref> issues with extra spaces in the ref
refShortLinkToLongLink( wikicode ) {
// <ref>[https://test.com''Test'']</ref>
wikicode = wikicode.replace( /(<ref[^>]*>) {0,}\[ {0,}([^'\]]*)(''[^\]]*)\] {0,}(<\/ref>)/gm, '$1$2 $3$4' );
// <ref>[https://test.com Test]</ref>
wikicode = wikicode.replace( /(<ref[^>]*>) {0,}\[ {0,}([^\]]*) {0,}\] {0,}(<\/ref>)/gm, '$1$2$3' );
return wikicode;
}
// convert inline external links to references
inlineExternalLinksToRefs( wikicode ) {
const sectionsToSkip = [ 'External link', 'Further reading', 'Links' ];
let regExString = '== ?(?:';
for ( const sectionToSkip of sectionsToSkip ) {
regExString += sectionToSkip + '|';
}
regExString = regExString.slice( 0, -1 ) + ')';
const hasSectionToSkip = wikicode.match( new RegExp( regExString, 'i' ) );
const sf = new StringFilter();
if ( hasSectionToSkip ) {
const regExToSplitArticle = new RegExp( '((' + regExString + ').*$)', 'is' );
const topHalf = wikicode.replace( regExToSplitArticle, '' );
const bottomHalf = wikicode.match( regExToSplitArticle )[ 1 ];
const buffer = sf.surgicalReplaceOutsideTags(
/(?<!>|> )\[(http[^ \]]+) ?(.*?)\](?!<\/ref>| <\/ref>)/gm,
'$2<ref>$1</ref>',
topHalf,
[ '<ref', '{{' ],
[ '</ref>', '/>', '}}' ]
);
wikicode = buffer + bottomHalf;
} else {
wikicode = sf.surgicalReplaceOutsideTags(
/(?<!>|> )\[(http[^ \]]+) ?(.*?)\](?!<\/ref>| <\/ref>)/gm,
'$2<ref>$1</ref>',
wikicode,
[ '<ref', '{{' ],
[ '</ref>', '/>', '}}' ]
);
}
return wikicode;
}
/**
* ''Test<ref></ref>'' => ''Test''<ref></ref>
*/
moveRefsOutsideOfItalics( wikicode ) {
wikicode = wikicode.replace( /''([^']+)(<ref>[^<]+<\/ref>)''/gm, '\'\'$1\'\'$2' );
return wikicode;
}
// get rid of spaces in front of <refs>
deleteSpacesInFrontOfRefs( wikicode ) {
return wikicode.replace( /(?<!(?:\||=)) {1,}<ref/gm, '<ref' );
}
// get rid of any level 2 heading that contains the article's title
// this takes care of 2 common cases: heading at the bottom next to the {{AFC Submission}} template, and heading at the top above the lead
deleteHeadingsWithTitle( wikicode, titleWithNamespaceAndSpaces ) {
let headingNameToLookFor = titleWithNamespaceAndSpaces;
headingNameToLookFor = headingNameToLookFor.replace( /^Draft:/, '' );
headingNameToLookFor = this._escapeRegEx( headingNameToLookFor );
let regEx = new RegExp( '^== ?' + headingNameToLookFor + ' ?==\n', 'gmi' );
wikicode = wikicode.replace( regEx, '' );
// now look for titles that contain Draft: at the beginning, too
headingNameToLookFor = titleWithNamespaceAndSpaces;
headingNameToLookFor = this._escapeRegEx( headingNameToLookFor );
regEx = new RegExp( '^== ?' + headingNameToLookFor + ' ?==\n', 'gmi' );
wikicode = wikicode.replace( regEx, '' );
return wikicode;
}
// remove wikilinks to article name
// Example: if title is Draft:Menna Shahin, change [[Menna Shahin]] to Menna Shahin
unlinkWikilinksToThisTitle( wikicode, titleWithNamespaceAndSpaces ) {
let wikilinkToLookFor = titleWithNamespaceAndSpaces;
wikilinkToLookFor = wikilinkToLookFor.replace( /^Draft:/, '' );
wikilinkToLookFor = this._escapeRegEx( wikilinkToLookFor );
const regEx = new RegExp( '\\[\\[(' + wikilinkToLookFor + ')\\]\\]', 'gm' );
wikicode = wikicode.replace( regEx, '$1' );
return wikicode;
}
// if located in the first paragraph, bold the article title
boldArticleTitle( wikicode, titleWithNamespaceAndSpaces ) {
let titleToLookFor = titleWithNamespaceAndSpaces;
titleToLookFor = titleToLookFor.replace( /^Draft:/, '' );
titleToLookFor = titleToLookFor.replace( / \(.*?\)$/, '' );
titleToLookFor = this._escapeRegEx( titleToLookFor );
// Don't bold the title if it's already bolded. Fixes a "bold twice" bug.
const hasBoldedTitle = wikicode.match( new RegExp( `'''${ titleToLookFor }'''`, 'i' ) );
if ( hasBoldedTitle ) {
return wikicode;
}
// Be pretty strict, to avoid adding ''' to image paths and infoboxes, which messes up the image. Also, only replace first match.
const regEx = new RegExp( '^(The )?(' + titleToLookFor + ')([ <,])', 'mi' );
wikicode = wikicode.replace( regEx, "$1'''$2'''$3" );
return wikicode;
}
// /covid-19/i -> COVID-19
// Careful of this string in URLs.
capitalizeCOVID19( wikicode ) {
const sf = new StringFilter();
wikicode = sf.surgicalReplaceOutsideTags( / covid-19/gmi, ' COVID-19', wikicode, [ '{{', '[[' ], [ '}}', ']]' ] );
wikicode = sf.surgicalReplaceOutsideTags( /\ncovid-19/gmi, '\nCOVID-19', wikicode, [ '{{', '[[' ], [ '}}', ']]' ] );
return wikicode;
}
// remove bold from headings
removeBoldFromHeadings( wikicode ) {
return wikicode.replace( /^(=.*)'''(.*)'''(.*=)$/gm, '$1$2$3' );
}
// remove enter characters between <ref>s
deleteNewLinesBetweenRefs( wikicode ) {
return wikicode.replace( /<\/ref>\n{1,}<ref>/gm, '</ref><ref>' );
}
// convert ==Reference== to ==References==
convertReferenceToReferences( wikicode ) {
return wikicode.replace( /^== ?Reference ?==$/gmi, '== References ==' );
}
// TOOL - swap ref period with period ref
swapRefPeriodWithPeriodRef( wikicode ) {
wikicode = wikicode.replace( /((?:<ref[^>]*?>[^>]*?<\/ref>){1,})\. /gm, '.$1 ' );
wikicode = wikicode.replace( /((?:<ref[^>]*?>[^>]*?<\/ref>){1,})\.\n/gm, '.$1\n' );
return wikicode;
}
swapRefCommaWithCommaRef( wikicode ) {
wikicode = wikicode.replace( /((?:<ref[^>]*?>[^>]*?<\/ref>){1,}), /gm, ',$1 ' );
wikicode = wikicode.replace( /((?:<ref[^>]*?>[^>]*?<\/ref>){1,}),\n/gm, ',$1\n' );
return wikicode;
}
// fix errant spaces at beginning of lines, which makes a blockquote looking thing (AFCH does it)
trimEveryLine( wikicode ) {
let output = '';
const lines = wikicode.split( '\n' );
const lineCount = lines.length;
let i = 0;
for ( const line of lines ) {
i++;
const trimmed = line.trim();
if ( trimmed.startsWith( '|' ) || trimmed.startsWith( '}' ) ) { // don't trim lines that start with | or }. It is common in FAs to indent these a bit.
output += line;
} else {
output += line.trim();
}
if ( i !== lineCount ) {
output += '\n';
}
}
return output;
}
// add references section if missing
addReferencesSectionIfMissing( wikicode ) {
const hasRefSection = wikicode.match( /^== ?References ?==$/mi );
const hasReflist = wikicode.match( /(?:{{Reflist|<references)/mi );
if ( !hasRefSection && !hasReflist ) {
const hasBottomAFCTemplate = wikicode.match( /(\n{{AfC submission[^}]*}}\s*)$/ );
if ( hasBottomAFCTemplate ) {
wikicode = wikicode.replace( /(\n{{AfC submission[^}]*}}\s*)$/, '\n\n== References ==\n{{Reflist}}$1' );
} else {
wikicode = wikicode.replace( /$/, '\n\n== References ==\n{{Reflist}}' );
}
}
return wikicode;
}
// fix empty references section
fixEmptyReferencesSection( wikicode ) {
const hasRefSection = wikicode.match( /^== ?References ?==$/mi );
const hasReflist = wikicode.match( /(?:{{Reflist|<references)/mi );
if ( !hasReflist && hasRefSection ) {
wikicode = wikicode.replace( /(?<=== ?References ?==)/gmi, '\n{{Reflist}}' );
}
return wikicode;
}
// delete whitespace at the end of lines
// (?!\|)(?!\}\}) is to stop this from deleting spaces after = in infoboxes
deleteWhitespaceAtEndOfLines( wikicode ) {
return wikicode.replace( /[ \t]+\n(?!\|)(?!\}\})/g, '\n' );
}
// convert smart quotes to regular quotes
convertSmartQuotesToRegularQuotes( wikicode ) {
const sf = new StringFilter();
wikicode = sf.surgicalReplaceOutsideTags( /”/g, '"', wikicode, [ '[[File:' ], [ ']]' ] );
wikicode = sf.surgicalReplaceOutsideTags( /“/g, '"', wikicode, [ '[[File:' ], [ ']]' ] );
wikicode = sf.surgicalReplaceOutsideTags( /‘/g, "'", wikicode, [ '[[File:' ], [ ']]' ] );
wikicode = sf.surgicalReplaceOutsideTags( /’/g, "'", wikicode, [ '[[File:' ], [ ']]' ] );
wikicode = sf.surgicalReplaceOutsideTags( /…/g, '...', wikicode, [ '[[File:' ], [ ']]' ] );
return wikicode;
}
// convert double spaces to single spaces
convertDoubleSpacesToSingleSpaces( wikicode ) {
return wikicode.replace( /\. {2,}/g, '. ' );
}
// remove blank heading
deleteBlankHeadings( wikicode ) {
return wikicode.replace( /\n={2,} {0,}={2,}\n/g, '\n' );
}
// Change year range dash to ndash. Skip text inside of [[File:
changeYearRangeDashToNDash( wikicode ) {
const sf = new StringFilter();
// (1111-1111)
wikicode = sf.surgicalReplaceOutsideTags( /(\(\d{4}) ?- ?(\d{4}\))/gm, '$1–$2', wikicode, [ '[[File:' ], [ ']]' ] );
// 1839 - 1926)
wikicode = sf.surgicalReplaceOutsideTags( /( \d{4}) ?- ?(\d{4}\))/gm, '$1–$2', wikicode, [ '[[File:' ], [ ']]' ] );
return wikicode;
}
// if in draftspace, and draft has categories, disable the categories
disableCategoriesInDraftspace( wikicode, namespace ) {
const draft = ( namespace == 118 );
if ( draft ) {
wikicode = wikicode.replace( /:?(\[\[)(Category:[^\]]*\]\])/gm, '$1:$2' );
}
wikicode = wikicode.replace( /\[\[:Category:Created via preloaddraft\]\]/gi, '[[Category:Created via preloaddraft]]' );
return wikicode;
}
// delete <br> in drafts, these are usually poorly placed
deleteBRTagsOutsideInfoboxes( wikicode ) {
let output = '';
const lines = wikicode.split( '\n' );
const lineCount = lines.length;
let i = 0;
for ( const line of lines ) {
i++;
// Skip lines that start with { or |. This is the easiest way to detect infoboxes
if ( line.startsWith( '{' ) || line.startsWith( '|' ) ) {
output += line;
} else {
output += line.replace( /<br ?\/?>/gm, '' );
}
if ( i !== lineCount ) {
output += '\n';
}
}
return output;
}
// right align images
rightAlignImages( wikicode ) {
return wikicode.replace( /(\[\[File:[^\]]*\|)left(\|[^\]]*\]\])/gm, '$1right$2' );
}
// correct capitalization of see also, references, further reading, external links
correctCapitalizationOfEndMatterHeaders( wikicode ) {
wikicode = wikicode.replace( /^(== ?)References( ?==)$/gmi, '$1References$2' );
wikicode = wikicode.replace( /^(== ?)External links( ?==)$/gmi, '$1External links$2' );
wikicode = wikicode.replace( /^(== ?)Further reading( ?==)$/gmi, '$1Further reading$2' );
wikicode = wikicode.replace( /^(== ?)See also( ?==)$/gmi, '$1See also$2' );
return wikicode;
}
// if article has headings but no lead, remove first heading
ifNoLeadSectionDeleteFirstHeading( wikicode ) {
let output = '';
const lines = wikicode.split( '\n' );
const lineCount = lines.length;
let i = 0;
let textCount = 0;
for ( const line of lines ) {
i++;
// scan for first heading.
// empty lines, lines with templates, or lines with images do not count.
if ( line.startsWith( '{' ) || line.length === 0 || line.startsWith( '[[File:' ) ) {
output += line;
} else if ( line.startsWith( '==' ) && !textCount ) {
continue; // delete this line by not putting it in the output string
} else {
textCount++;
output += line;
}
if ( i !== lineCount ) {
output += '\n';
}
}
return output;
}
// delete [edit], [edit source], and [editar] from headings
deleteCopyPastedEditAndEditSource( wikicode ) {
wikicode = wikicode.replace( /\[edit\]( ?={2,})$/gm, '$1' );
wikicode = wikicode.replace( /\[edit source\]( ?={2,})$/gm, '$1' );
wikicode = wikicode.replace( /\[editar\]( ?={2,})$/gm, '$1' );
return wikicode;
}
// at beginning of lines, replace unicode bullets with asterisks
replaceUnicodeBulletsWithAsterisks( wikicode ) {
return wikicode.replace( /^\s{0,}[·•●]\s{0,}/gm, '* ' );
}
// remove whitespace if that is the only character on a line
trimEmptyLines( wikicode ) {
return wikicode.replace( /^\s*$/gm, '' );
}
// no more than 2 newlines (1 blank line) in a row. except stubs, which get 3 newlines (2 blank lines)
// Note: AFCH does this too
deleteMoreThanTwoEntersInARow( wikicode ) {
wikicode = wikicode.replace( /\n{3,}/gm, '\n\n' );
wikicode = wikicode.replace( /\n{2}(\{\{[^}]*stub\}\})/gi, '\n\n\n$1' );
return wikicode;
}
// convert =TitleHeading= to ==H2Heading==
convertH1ToH2( wikicode ) {
return wikicode.replace( /^= ?([^=]*?) ?=$/gm, '== $1 ==' );
}
convertVeryLongHeadingToParagraph( wikicode ) {
let output = '';
const lines = wikicode.split( '\n' );
const lineCount = lines.length;
let i = 0;
for ( const line of lines ) {
i++;
if ( line.length > 150 && line.match( /^==.*==$/gm ) && !line.match( /<ref/ ) ) {
output += line.replace( /^={1,}\s*(.*?)\s*={1,}$/m, '$1' );
} else {
output += line;
}
if ( i !== lineCount ) {
output += '\n';
}
}
return output;
}
fixWordEmphasizedWithSingleQuotes( wikicode ) {
return wikicode.replace( / '(\w+)' /g, ' "$1" ' );
}
fixDoublePeriod( wikicode ) {
return wikicode.replace( /(?<=[A-Za-z\]])\.\.(?=<ref| |\n)/g, '.' );
}
fixWikilinksContainingURL( wikicode ) {
// non-piped wikilink
wikicode = wikicode.replace( /\[\[https?:\/\/en\.(?:m\.)?wikipedia\.org\/wiki\/([^|]*)\]\]/g, '[[$1]]' );
// piped wikilink
wikicode = wikicode.replace( /\[\[https?:\/\/en\.(?:m\.)?wikipedia\.org\/wiki\/([^|]*)\|([^\]]*)\]\]/g, '[[$1|$2]]' );
// non-piped external link
wikicode = wikicode.replace( /\[\[(http[^|]*)\]\]/g, '[$1]' );
// piped external link
wikicode = wikicode.replace( /\[\[(http[^|]*)\|([^\]]*)\]\]/g, '[$1 $2]' );
return wikicode;
}
fixExternalLinksToWikipediaArticles( wikicode ) {
// [https://en.wikipedia.org/wiki/Article] and [https://en.wikipedia.org/wiki/Article Article name]
return wikicode.replace( /(?<!\[)\[https?:\/\/en\.wikipedia\.org\/wiki\/([^ \]]*)( [^\]]*)?\]/gs, ( match, p1 ) => {
p1 = decodeURIComponent( p1 );
p1 = p1.replace( /_/g, ' ' );
return `[[${ p1 }]]`;
} );
}
deleteBlankLinesBetweenBullets( wikicode ) {
const lines = wikicode.split( '\n' );
const buffer = [];
const length = lines.length;
for ( let i = 0; i < length; i++ ) {
const previous = lines[ i - 1 ];
const current = lines[ i ];
const next = lines[ i + 1 ];
if (
typeof previous !== 'undefined' &&
typeof next !== 'undefined' &&
previous.startsWith( '*' ) &&
current === '' &&
next.startsWith( '*' )
) {
continue;
}
buffer.push( current );
}
return buffer.join( '\n' );
}
deleteWeirdUnicodeCharacters( wikicode ) {
return wikicode.replace( /[]/g, '' );
}
deleteSomeHTMLTags( wikicode ) {
wikicode = wikicode.replace( /<\/?p( [^>]*)?\/?>/g, '' );
wikicode = wikicode.replace( /<\/?strong( [^>]*)?\/?>/g, '' );
wikicode = wikicode.replace( /<\/?em( [^>]*)?\/?>/g, '' );
wikicode = wikicode.replace( /<\/?nowiki( [^>]*)?\/?>/g, '' );
wikicode = wikicode.replace( /<\/?u( [^>]*)?\/?>/g, '' );
wikicode = wikicode.replace( /(?:<big>|<\/big>)/g, '' );
return wikicode;
}
deleteNonAFCDraftTags( wikicode ) {
wikicode = wikicode.replace( /{{Preloaddraft submit}}\n{0,2}/gi, '' );
wikicode = wikicode.replace( /<!-- When you move this draft into article space, please link it to the Wikidata entry and remove the QID in the infobox code\. -->\n{0,2}/gi, '' );
wikicode = wikicode.replace( /{{Draft}}\n{0,2}/gi, '' );
return wikicode;
}
deleteAFCDraftTagsIfMainspace( wikicode, namespaceNumber ) {
const isMainspace = namespaceNumber == 0;
if ( isMainspace ) {
// {{AfC submission}}, {{AfC topic}}, {{AfC comment}}, etc.
wikicode = wikicode.replace( /{{AfC [^}]*}}\n?/g, '' );
wikicode = wikicode.replace( /{{Draft topics[^}]*}}\n?/g, '' );
}
return wikicode;
}
fixHeadingsInAllCaps( wikicode ) {
// create a concatenated string with the text from every heading
const matches = wikicode.matchAll( /== {0,}(.+) {0,}==/g );
let headingString = '';
for ( const match of matches ) {
headingString += match[ 1 ];
}
// if string only contains caps
if ( this._isUpperCase( headingString ) ) {
// convert all headings to sentence case
const matches = wikicode.matchAll( /== {0,}(.+) {0,}==/g );
for ( const match of matches ) {
let matchRegex = this._escapeRegEx( match[ 1 ].trim() );
matchRegex = new RegExp( '== {0,}' + matchRegex + ' {0,}==', 'g' );
const sentenceCase = this._toSentenceCase( match[ 1 ].trim() );
wikicode = wikicode.replace( matchRegex, '== ' + sentenceCase + ' ==' );
}
}
return wikicode;
}
deleteEmptySections( wikicode ) {
return wikicode.replace( /\n*== ?(?:See also|External links) ?==\n*$/, '' );
}
deleteDuplicateReferencesSection( wikicode ) {
const matches = wikicode.match( /==\s*References\s*==/gi );
if ( matches !== null && matches.length > 1 ) {
// run regexes that are likely to delete the extra section
const attempt = wikicode.replace(
`== References ==
<!-- Inline citations added to your article will automatically display here. See en.wikipedia.org/wiki/WP:REFB for instructions on how to add citations. -->
{{reflist}}`
, '' );
const matches2 = attempt.match( /==\s*References\s*==/gi );
if ( matches2.length === 1 ) {
wikicode = attempt.trim();
wikicode = wikicode.replace( /==\s*References\s*==/gi, '== References ==' );
}
}
return wikicode;
}
removeUnderscoresFromWikilinks( wikicode ) {
const sf = new StringFilter();
wikicode = sf.surgicalReplaceInsideTags( /_/g, ' ', wikicode, [ '[[' ], [ ']]' ] );
return wikicode;
}
fixPipedWikilinksWithIdenticalParameters( wikicode ) {
const matches = wikicode.matchAll( /\[\[([^|\]]+)\|([^\]]+)\]\]/g );
for ( const match of matches ) {
if ( match[ 1 ] === match[ 2 ] ) {
wikicode = this._replaceAll( wikicode, `[[${ match[ 1 ] }|${ match[ 1 ] }]]`, `[[${ match[ 1 ] }]]` );
}
}
return wikicode;
}
removeBorderFromImagesInInfoboxes( wikicode ) {
wikicode = wikicode.replace( /(\|\s*logo\s*=\s*)\[\[File:([^\]|]*)[^\]\]]*\]\]/g, '$1$2' );
wikicode = wikicode.replace( /(\|\s*cover\s*=\s*)\[\[File:([^\]|]*)[^\]\]]*\]\]/g, '$1$2' );
return wikicode;
}
/** These often hide towards the bottom of a draft. When the draft is submitted, unsubmitted templates (t) detect this and show up as blank, creating a weird extra line break. So this basically fixes the line break. */
removeExtraAFCSubmissionTemplates( wikicode ) {
const hasSubmittedTemplate = wikicode.match( /{{AfC submission\|\|/ );
const hasUnsubmittedTemplate = wikicode.match( /{{AfC submission\|t\|/ );
if ( hasSubmittedTemplate && hasUnsubmittedTemplate ) {
wikicode = wikicode.replace( /{{AfC submission\|t\|[^}}]*\}\}\n?/gm, '' );
}
return wikicode;
}
moveAFCSubmissionTemplatesToTop( wikicode ) {
const hasTemplateAtBottom = wikicode.match( /\n[^\n]+\n*({{AfC submission[^}]*}})\s*$/i );
if ( hasTemplateAtBottom ) {
// delete all submission templates
wikicode = wikicode.replace( /{{AfC submission[^}}]*\}\}\n?/gm, '' );
// insert template at top
wikicode = hasTemplateAtBottom[ 1 ] + '\n----\n\n' + wikicode;
}
return wikicode;
}
deleteMultipleReferenceTags( wikicode ) {
const hasReflist = wikicode.match( /{{Reflist}}/i );
const hasReferencesTag = wikicode.match( /<references ?\/>/i );
if ( hasReflist && hasReferencesTag ) {
// delete all references tags
wikicode = wikicode.replace( /<references ?\/>\n?/gi, '' );
}
return wikicode;
}
_isUpperCase( str ) {
return str === str.toUpperCase();
}
_toSentenceCase( string ) {
return string.charAt( 0 ).toUpperCase() + string.slice( 1 ).toLowerCase();
}
_replaceAll( haystack, needle, replacement ) {
const regex = new RegExp( this._escapeRegEx( needle ), 'g' );
haystack = haystack.replace( regex, replacement );
return haystack;
}
_escapeRegEx( string ) {
return string.replace( /[.*+?^${}()|[\]\\]/g, '\\$&' ); // $& means the whole matched string
}
}
// === modules/StringFilter.js ======================================================
/**
* Lets you use regex to specify what parts of a very long string you want to specify as "off limits", then you can do additional regex's and search/replace to the remaining parts of the string.
*/
class StringFilter {
/**
* Does a replace, but specifies areas of the file that should NOT be replaced. Those areas are specified by providing an openingTag and a closingTag, and those areas are marked as off limits.
*/
surgicalReplaceOutsideTags( regex, replacement, haystack, openingTags, closingTags ) {
const allTags = [ ...openingTags, ...closingTags ];
const parts = this._splitStringUsingMultiplePatterns( haystack, allTags );
const resultArray = [];
for ( let part of parts ) {
let openingTagMatch = false;
for ( const tag of openingTags ) {
if ( part.startsWith( tag ) ) {
openingTagMatch = true;
break;
}
}
if ( !openingTagMatch ) {
part = part.replace( regex, replacement );
}
resultArray.push( part );
}
return resultArray.join( '' );
}
/**
* Does a replace, but specifies areas of the file that SHOULD be replaced, then skips the rest of the file. The area that should be replaced is specified by providing an openingTag and a closingTag.
*/
surgicalReplaceInsideTags( regex, replacement, haystack, openingTags, closingTags ) {
const allTags = [ ...openingTags, ...closingTags ];
const parts = this._splitStringUsingMultiplePatterns( haystack, allTags );
const resultArray = [];
for ( let part of parts ) {
for ( const tag of openingTags ) {
if ( part.startsWith( tag ) ) {
part = part.replace( regex, replacement );
}
}
resultArray.push( part );
}
return resultArray.join( '' );
}
/**
* Also keeps the pattern in the result, unlike string.prototype.split. Algorithm isn't perfect, will fail with this pattern: <ref>Test/>Test</ref>. But should be good enough for DraftCleaner stuff.
*
* @param {string} string
* @param {string[]} patterns
* @return {string[]}
*/
_splitStringUsingMultiplePatterns( string, patterns ) {
const length = string.length;
const result = [];
let positionOfLastMatch = 0;
for ( let i = 0; i < length; i++ ) {
const lookAhead = string.substring( i ); // the rest of the string after current position
let patternMatch = false;
for ( const pattern of patterns ) {
if ( lookAhead.startsWith( pattern ) ) {
patternMatch = true;
break;
}
}
if ( patternMatch ) {
const chunk = string.slice( positionOfLastMatch, i );
// if blank (happens if i=0 matches), continue instead of putting an empty "" into the array
if ( !chunk ) {
continue;
}
result.push( chunk );
positionOfLastMatch = i;
}
}
// Don't forget the last chunk.
result.push( string.substring( positionOfLastMatch ) );
return result;
}
}
});
// </nowiki>