[go: up one dir, main page]

Jump to content

User:Novem Linguae/Scripts/DraftCleaner.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// <nowiki>

// === Compiled with Novem Linguae's publish.php script ======================

$(async function() {

// === main.js ======================================================

/* THIS SCRIPT IS BUGGY ABOUT 10% OF THE TIME. Be sure to check the diff that pops up before submitting.

- Adds "Run DraftCleaner" link to the left sidebar

- Top uses:
	- remove extra line breaks (for example, 3 enters in a row)
	- in the first sentence, bold the title
	- convert curly quotes to regular quotes
	- put <ref>s after periods
	- clean external links out of the main article area (turn them into references)
	- add ==References== section
	- remove bold from headings

- Other uses:
	- converts [inline external links] to <ref>s
	- removes spaces in front of <ref>s
	- get rid of any level 2 heading that contains the article's title
	- converts =TitleHeading= to ==H2Heading==
	- replaces Covid-19 with COVID-19
	- removes enter characters between <ref>s
	- trims whitespace at beginning and end
	- remove self wikilinks to the article title
	- convert ==Reference== to ==References==
	- turn bare URLs into references
	- fix errant spaces at beginning of lines, which makes a blockquote looking thing
	- delete whitespace at the end of lines
	- convert double spaces to single spaces
	- remove blank heading
	- in refs, turn short links into long links, so you can see the domain
	- change year range dash to ndash
	- if in draftspace, and draft in categories, disable the categories
	- delete <br>. in drafts, these are usually poorly placed
	- fix empty references section
	- right align images
	- remove whitespace if that is the only character on a line
	- correct capitalization of see also, references, further reading, external links
	- if article has headings but no lead, remove first heading
	- replace unicode bullets with asterisks

Add one of the following to your User:yourName/common.js (at the top) to change the position where DraftCleaner puts its link:
    window.draftCleanerPutInToolsMenu = true;
	window.draftCleanerPutInMoreMenu = true;

This page was assembled from 3 files using my publish.php script. I have an offline test suite with around 100 unit tests for the DraftCleaner and StringFilter classes.
*/

( function () {
	async function getWikicode( title ) {
		const pageIsDeleted = !mw.config.get( 'wgCurRevisionId' );
		if ( pageIsDeleted ) {
			return '';
		}

		let wikicode = '';
		title = encodeURIComponent( title );
		await $.ajax( {
			url: 'https://en.wikipedia.org/w/api.php?action=parse&page=' + title + '&prop=wikitext&formatversion=2&format=json',
			success: function ( result ) {
				wikicode = result.parse.wikitext;
			},
			dataType: 'json'
		} );
		return wikicode;
	}

	function goToShowChangesScreen( titleWithNamespaceAndUnderscores, wikicode, editSummary ) {
		const titleEncoded = encodeURIComponent( titleWithNamespaceAndUnderscores );
		const wgServer = mw.config.get( 'wgServer' );
		const wgScriptPath = mw.config.get( 'wgScriptPath' );
		const baseURL = wgServer + wgScriptPath + '/';
		// https://stackoverflow.com/a/12464290/3480193
		$( `<form action="${ baseURL }index.php?title=${ titleEncoded }&action=submit" method="POST"/>` )
			.append( $( '<input type="hidden" name="wpTextbox1">' ).val( wikicode ) )
			.append( $( '<input type="hidden" name="wpSummary">' ).val( editSummary ) )
			.append( $( '<input type="hidden" name="mode">' ).val( 'preview' ) )
			.append( $( '<input type="hidden" name="wpDiff">' ).val( 'Show changes' ) )
			.append( $( '<input type="hidden" name="wpUltimateParam">' ).val( '1' ) )
			.appendTo( $( document.body ) ) // it has to be added somewhere into the <body>
			.trigger( 'submit' );
	}

	/** returns the pagename, including the namespace name, but with spaces replaced by underscores */
	function getArticleName() {
		return mw.config.get( 'wgPageName' );
	}

	// don't run when not viewing articles
	const action = mw.config.get( 'wgAction' );
	const isNotViewing = action != 'view';
	if ( isNotViewing ) {
		return;
	}

	// don't run when viewing diffs
	const isDiff = mw.config.get( 'wgDiffNewId' );
	if ( isDiff ) {
		return;
	}

	// Don't run in virtual namespaces
	const isVirtualNamespace = mw.config.get( 'wgNamespaceNumber' ) < 0;
	if ( isVirtualNamespace ) {
		return;
	}

	let menuID = 'p-navigation';
	// @ts-ignore
	if ( window.draftCleanerPutInToolsMenu ) {
		menuID = 'p-tb';
	// @ts-ignore
	} else if ( window.draftCleanerPutInMoreMenu ) {
		menuID = 'p-cactions';
	}

	const titleWithNamespaceAndUnderscores = getArticleName();
	const namespaceNumber = mw.config.get( 'wgNamespaceNumber' );

	let running = false;

	// Add DraftCleaner to the toolbar
	mw.loader.using( [ 'mediawiki.util' ], () => {
		mw.util.addPortletLink( menuID, '#', 'Run DraftCleaner', 'DraftCleanerLink' );
		$( '#DraftCleanerLink' ).on( 'click', async () => {
			// prevent running the script while script is already in progress
			if ( running ) {
				return;
			}
			running = true;

			mw.notify( 'Parsing page content...' );

			// get page wikicode
			const titleWithNamespaceAndSpaces = titleWithNamespaceAndUnderscores.replace( /_/g, ' ' );
			const originalWikicode = await getWikicode( titleWithNamespaceAndUnderscores );
			let wikicode = originalWikicode;

			const dc = new DraftCleaner();
			wikicode = dc.cleanDraft( wikicode, namespaceNumber, titleWithNamespaceAndSpaces );

			const needsChanges = wikicode != originalWikicode;
			if ( needsChanges ) {
				const summary = 'clean up ([[User:Novem Linguae/Scripts/DraftCleaner.js|DraftCleaner]])';
				await goToShowChangesScreen( titleWithNamespaceAndUnderscores, wikicode, summary );
			} else {
				mw.notify( 'No changes needed!' );
			}
		} );
	} );
}() );


// === modules/DraftCleaner.js ======================================================


class DraftCleaner {
	cleanDraft( wikicode, namespaceNumber, titleWithNamespaceAndSpaces ) {
		// run before other stuff
		wikicode = this.deleteSomeHTMLTags( wikicode );
		wikicode = this.deleteNonAFCDraftTags( wikicode );
		wikicode = this.deleteAFCDraftTagsIfMainspace( wikicode, mw.config.get( 'wgNamespaceNumber' ) );

		wikicode = this.fixWikilinksContainingURL( wikicode );
		wikicode = this.fixExternalLinksToWikipediaArticles( wikicode );
		wikicode = this.deleteWeirdUnicodeCharacters( wikicode );
		wikicode = this.trimEveryLine( wikicode );
		wikicode = this.convertH1ToH2( wikicode );
		wikicode = this.convertVeryLongHeadingToParagraph( wikicode );
		wikicode = this.deleteHeadingsWithTitle( wikicode, titleWithNamespaceAndSpaces );
		wikicode = this.unlinkWikilinksToThisTitle( wikicode, titleWithNamespaceAndSpaces );
		wikicode = this.capitalizeCOVID19( wikicode );
		wikicode = this.removeBoldFromHeadings( wikicode );
		wikicode = this.convertReferenceToReferences( wikicode );
		wikicode = this.deleteMultipleReferenceTags( wikicode );
		wikicode = this.addReferencesSectionIfMissing( wikicode );
		wikicode = this.fixEmptyReferencesSection( wikicode );
		wikicode = this.deleteWhitespaceAtEndOfLines( wikicode );
		wikicode = this.convertSmartQuotesToRegularQuotes( wikicode );
		// wikicode = this.fixWordEmphasizedWithSingleQuotes(wikicode); // most of these appear in citations as names of newspaper articles, arguably should keep these single quotes
		wikicode = this.convertDoubleSpacesToSingleSpaces( wikicode );
		wikicode = this.deleteBlankHeadings( wikicode );
		wikicode = this.changeYearRangeDashToNDash( wikicode );
		wikicode = this.disableCategoriesInDraftspace( wikicode, namespaceNumber );
		// wikicode = this.deleteBRTagsOutsideInfoboxes(wikicode, namespaceNumber); // edge case in image captions, and probably other places
		// wikicode = this.rightAlignImages(wikicode); // commenting out, too many false positives in featured articles
		wikicode = this.correctCapitalizationOfEndMatterHeaders( wikicode );
		wikicode = this.ifNoLeadSectionDeleteFirstHeading( wikicode );
		wikicode = this.deleteCopyPastedEditAndEditSource( wikicode );
		wikicode = this.replaceUnicodeBulletsWithAsterisks( wikicode );
		wikicode = this.deleteEmptySections( wikicode );
		wikicode = this.fixHeadingsInAllCaps( wikicode );
		wikicode = this.deleteDuplicateReferencesSection( wikicode );
		wikicode = this.deleteBlankLinesBetweenBullets( wikicode );
		wikicode = this.removeUnderscoresFromWikilinks( wikicode );
		wikicode = this.fixPipedWikilinksWithIdenticalParameters( wikicode );
		wikicode = this.removeBorderFromImagesInInfoboxes( wikicode );
		wikicode = this.removeExtraAFCSubmissionTemplates( wikicode );
		wikicode = this.moveAFCSubmissionTemplatesToTop( wikicode );

		// all ==sections== should start with a capital letter
		// after swap, if citation has no spaces on either side, and is not touching two other citations, add a space on the right
		// strip [[File: from infobox's image field
			// example 1: | image               = [[File:SAMIR 1626.png|thumb|Samir Mohanty]]
			// example 2: | image               = [[File:SAMIR 1626.png]]
		// trim whitespace inside refs, e.g. <ref> abc </ref>
		// replace unreliable sources with {{cn}}.
			// if adjacent to other sources, just delete
			// if ref is used multiple times, account for that
		// duplicate citation fixer
		// move refs that are below {{Reflist}}, to above {{Reflist}}
		// move refs out of headings
		// delete AFC submission templates located mid-article, they end up self-hiding then appear as inexplicable whitespace. example: {{AfC submission|t||ts=20211212134609|u=Doezdemir|ns=118|demo=}}<!-- Important, do not remove this line before article has been created. -->
		// fix redundant wikilinks, e.g. [[Spotify|Spotify]]
		// change youtu.be to youtube.com, to avoid the blacklist. test: https://youtu.be/bnWHeRNIPiA
		// delete ©®™
		// convert all <references /> to {{Reflist}}. <references /> doesn't use two column format and looks weird with a large # of references
		// remove px from images, should use default

		// convert refs toward the end. we want deleteSomeHTMLTags() to run first, to get rid of  tags around URLs
		wikicode = this.bareURLToRef( wikicode );
		wikicode = this.refShortLinkToLongLink( wikicode );
		wikicode = this.inlineExternalLinksToRefs( wikicode );
		wikicode = this.moveRefsOutsideOfItalics( wikicode );
		wikicode = this.deleteSpacesInFrontOfRefs( wikicode );
		wikicode = this.deleteNewLinesBetweenRefs( wikicode );
		wikicode = this.swapRefPeriodWithPeriodRef( wikicode );
		wikicode = this.swapRefCommaWithCommaRef( wikicode );

		// stuff we want to run at the end
		wikicode = this.fixDoublePeriod( wikicode ); // need test cases. I've seen this one not work.
		wikicode = this.boldArticleTitle( wikicode, titleWithNamespaceAndSpaces );
		wikicode = this.trimEmptyLines( wikicode );
		wikicode = this.deleteMoreThanTwoEntersInARow( wikicode );
		return wikicode;
	}

	// surround bare URL's with <ref></ref>
	// Useful for seeing all URL's in the reflist section, and for CiteHighlighter ref quality highlighting
	bareURLToRef( wikicode ) {
		return wikicode.replace( /^(http[^\n ]*) {0,}$/gm, '<ref>$1</ref>' );
	}

	// in refs, turn [short links] into long links, so you can see the domain
	// also fixes <ref> link </ref> issues with extra spaces in the ref
	refShortLinkToLongLink( wikicode ) {
		// <ref>[https://test.com''Test'']</ref>
		wikicode = wikicode.replace( /(<ref[^>]*>) {0,}\[ {0,}([^'\]]*)(''[^\]]*)\] {0,}(<\/ref>)/gm, '$1$2 $3$4' );
		// <ref>[https://test.com Test]</ref>
		wikicode = wikicode.replace( /(<ref[^>]*>) {0,}\[ {0,}([^\]]*) {0,}\] {0,}(<\/ref>)/gm, '$1$2$3' );
		return wikicode;
	}

	// convert inline external links to references
	inlineExternalLinksToRefs( wikicode ) {
		const sectionsToSkip = [ 'External link', 'Further reading', 'Links' ];
		let regExString = '== ?(?:';
		for ( const sectionToSkip of sectionsToSkip ) {
			regExString += sectionToSkip + '|';
		}
		regExString = regExString.slice( 0, -1 ) + ')';
		const hasSectionToSkip = wikicode.match( new RegExp( regExString, 'i' ) );

		const sf = new StringFilter();

		if ( hasSectionToSkip ) {
			const regExToSplitArticle = new RegExp( '((' + regExString + ').*$)', 'is' );
			const topHalf = wikicode.replace( regExToSplitArticle, '' );
			const bottomHalf = wikicode.match( regExToSplitArticle )[ 1 ];
			const buffer = sf.surgicalReplaceOutsideTags(
				/(?<!>|> )\[(http[^ \]]+) ?(.*?)\](?!<\/ref>| <\/ref>)/gm,
				'$2<ref>$1</ref>',
				topHalf,
				[ '<ref', '{{' ],
				[ '</ref>', '/>', '}}' ]
			);
			wikicode = buffer + bottomHalf;
		} else {
			wikicode = sf.surgicalReplaceOutsideTags(
				/(?<!>|> )\[(http[^ \]]+) ?(.*?)\](?!<\/ref>| <\/ref>)/gm,
				'$2<ref>$1</ref>',
				wikicode,
				[ '<ref', '{{' ],
				[ '</ref>', '/>', '}}' ]
			);
		}

		return wikicode;
	}

	/**
	 * ''Test<ref></ref>'' => ''Test''<ref></ref>
	 */
	moveRefsOutsideOfItalics( wikicode ) {
		wikicode = wikicode.replace( /''([^']+)(<ref>[^<]+<\/ref>)''/gm, '\'\'$1\'\'$2' );
		return wikicode;
	}

	// get rid of spaces in front of <refs>
	deleteSpacesInFrontOfRefs( wikicode ) {
		return wikicode.replace( /(?<!(?:\||=)) {1,}<ref/gm, '<ref' );
	}

	// get rid of any level 2 heading that contains the article's title
	// this takes care of 2 common cases: heading at the bottom next to the {{AFC Submission}} template, and heading at the top above the lead
	deleteHeadingsWithTitle( wikicode, titleWithNamespaceAndSpaces ) {
		let headingNameToLookFor = titleWithNamespaceAndSpaces;
		headingNameToLookFor = headingNameToLookFor.replace( /^Draft:/, '' );
		headingNameToLookFor = this._escapeRegEx( headingNameToLookFor );
		let regEx = new RegExp( '^== ?' + headingNameToLookFor + ' ?==\n', 'gmi' );
		wikicode = wikicode.replace( regEx, '' );
		// now look for titles that contain Draft: at the beginning, too
		headingNameToLookFor = titleWithNamespaceAndSpaces;
		headingNameToLookFor = this._escapeRegEx( headingNameToLookFor );
		regEx = new RegExp( '^== ?' + headingNameToLookFor + ' ?==\n', 'gmi' );
		wikicode = wikicode.replace( regEx, '' );
		return wikicode;
	}

	// remove wikilinks to article name
	// Example: if title is Draft:Menna Shahin, change [[Menna Shahin]] to Menna Shahin
	unlinkWikilinksToThisTitle( wikicode, titleWithNamespaceAndSpaces ) {
		let wikilinkToLookFor = titleWithNamespaceAndSpaces;
		wikilinkToLookFor = wikilinkToLookFor.replace( /^Draft:/, '' );
		wikilinkToLookFor = this._escapeRegEx( wikilinkToLookFor );
		const regEx = new RegExp( '\\[\\[(' + wikilinkToLookFor + ')\\]\\]', 'gm' );
		wikicode = wikicode.replace( regEx, '$1' );
		return wikicode;
	}

	// if located in the first paragraph, bold the article title
	boldArticleTitle( wikicode, titleWithNamespaceAndSpaces ) {
		let titleToLookFor = titleWithNamespaceAndSpaces;
		titleToLookFor = titleToLookFor.replace( /^Draft:/, '' );
		titleToLookFor = titleToLookFor.replace( / \(.*?\)$/, '' );
		titleToLookFor = this._escapeRegEx( titleToLookFor );
		// Don't bold the title if it's already bolded. Fixes a "bold twice" bug.
		const hasBoldedTitle = wikicode.match( new RegExp( `'''${ titleToLookFor }'''`, 'i' ) );
		if ( hasBoldedTitle ) {
			return wikicode;
		}
		// Be pretty strict, to avoid adding ''' to image paths and infoboxes, which messes up the image. Also, only replace first match.
		const regEx = new RegExp( '^(The )?(' + titleToLookFor + ')([ <,])', 'mi' );
		wikicode = wikicode.replace( regEx, "$1'''$2'''$3" );
		return wikicode;
	}

	// /covid-19/i -> COVID-19
	// Careful of this string in URLs.
	capitalizeCOVID19( wikicode ) {
		const sf = new StringFilter();
		wikicode = sf.surgicalReplaceOutsideTags( / covid-19/gmi, ' COVID-19', wikicode, [ '{{', '[[' ], [ '}}', ']]' ] );
		wikicode = sf.surgicalReplaceOutsideTags( /\ncovid-19/gmi, '\nCOVID-19', wikicode, [ '{{', '[[' ], [ '}}', ']]' ] );
		return wikicode;
	}

	// remove bold from headings
	removeBoldFromHeadings( wikicode ) {
		return wikicode.replace( /^(=.*)'''(.*)'''(.*=)$/gm, '$1$2$3' );
	}

	// remove enter characters between <ref>s
	deleteNewLinesBetweenRefs( wikicode ) {
		return wikicode.replace( /<\/ref>\n{1,}<ref>/gm, '</ref><ref>' );
	}

	// convert ==Reference== to ==References==
	convertReferenceToReferences( wikicode ) {
		return wikicode.replace( /^== ?Reference ?==$/gmi, '== References ==' );
	}

	// TOOL - swap ref period with period ref
	swapRefPeriodWithPeriodRef( wikicode ) {
		wikicode = wikicode.replace( /((?:<ref[^>]*?>[^>]*?<\/ref>){1,})\. /gm, '.$1 ' );
		wikicode = wikicode.replace( /((?:<ref[^>]*?>[^>]*?<\/ref>){1,})\.\n/gm, '.$1\n' );
		return wikicode;
	}

	swapRefCommaWithCommaRef( wikicode ) {
		wikicode = wikicode.replace( /((?:<ref[^>]*?>[^>]*?<\/ref>){1,}), /gm, ',$1 ' );
		wikicode = wikicode.replace( /((?:<ref[^>]*?>[^>]*?<\/ref>){1,}),\n/gm, ',$1\n' );
		return wikicode;
	}

	// fix errant spaces at beginning of lines, which makes a blockquote looking thing (AFCH does it)
	trimEveryLine( wikicode ) {
		let output = '';
		const lines = wikicode.split( '\n' );
		const lineCount = lines.length;
		let i = 0;
		for ( const line of lines ) {
			i++;
			const trimmed = line.trim();
			if ( trimmed.startsWith( '|' ) || trimmed.startsWith( '}' ) ) { // don't trim lines that start with | or }. It is common in FAs to indent these a bit.
				output += line;
			} else {
				output += line.trim();
			}
			if ( i !== lineCount ) {
				output += '\n';
			}
		}
		return output;
	}

	// add references section if missing
	addReferencesSectionIfMissing( wikicode ) {
		const hasRefSection = wikicode.match( /^== ?References ?==$/mi );
		const hasReflist = wikicode.match( /(?:{{Reflist|<references)/mi );
		if ( !hasRefSection && !hasReflist ) {
			const hasBottomAFCTemplate = wikicode.match( /(\n{{AfC submission[^}]*}}\s*)$/ );
			if ( hasBottomAFCTemplate ) {
				wikicode = wikicode.replace( /(\n{{AfC submission[^}]*}}\s*)$/, '\n\n== References ==\n{{Reflist}}$1' );
			} else {
				wikicode = wikicode.replace( /$/, '\n\n== References ==\n{{Reflist}}' );
			}
		}
		return wikicode;
	}

	// fix empty references section
	fixEmptyReferencesSection( wikicode ) {
		const hasRefSection = wikicode.match( /^== ?References ?==$/mi );
		const hasReflist = wikicode.match( /(?:{{Reflist|<references)/mi );
		if ( !hasReflist && hasRefSection ) {
			wikicode = wikicode.replace( /(?<=== ?References ?==)/gmi, '\n{{Reflist}}' );
		}
		return wikicode;
	}

	// delete whitespace at the end of lines
	// (?!\|)(?!\}\}) is to stop this from deleting spaces after = in infoboxes
	deleteWhitespaceAtEndOfLines( wikicode ) {
		return wikicode.replace( /[ \t]+\n(?!\|)(?!\}\})/g, '\n' );
	}

	// convert smart quotes to regular quotes
	convertSmartQuotesToRegularQuotes( wikicode ) {
		const sf = new StringFilter();
		wikicode = sf.surgicalReplaceOutsideTags( /”/g, '"', wikicode, [ '[[File:' ], [ ']]' ] );
		wikicode = sf.surgicalReplaceOutsideTags( /“/g, '"', wikicode, [ '[[File:' ], [ ']]' ] );
		wikicode = sf.surgicalReplaceOutsideTags( /‘/g, "'", wikicode, [ '[[File:' ], [ ']]' ] );
		wikicode = sf.surgicalReplaceOutsideTags( /’/g, "'", wikicode, [ '[[File:' ], [ ']]' ] );
		wikicode = sf.surgicalReplaceOutsideTags( /…/g, '...', wikicode, [ '[[File:' ], [ ']]' ] );
		return wikicode;
	}

	// convert double spaces to single spaces
	convertDoubleSpacesToSingleSpaces( wikicode ) {
		return wikicode.replace( /\. {2,}/g, '. ' );
	}

	// remove blank heading
	deleteBlankHeadings( wikicode ) {
		return wikicode.replace( /\n={2,} {0,}={2,}\n/g, '\n' );
	}

	// Change year range dash to ndash. Skip text inside of [[File:
	changeYearRangeDashToNDash( wikicode ) {
		const sf = new StringFilter();
		// (1111-1111)
		wikicode = sf.surgicalReplaceOutsideTags( /(\(\d{4}) ?- ?(\d{4}\))/gm, '$1–$2', wikicode, [ '[[File:' ], [ ']]' ] );
		//  1839 - 1926)
		wikicode = sf.surgicalReplaceOutsideTags( /( \d{4}) ?- ?(\d{4}\))/gm, '$1–$2', wikicode, [ '[[File:' ], [ ']]' ] );
		return wikicode;
	}

	// if in draftspace, and draft has categories, disable the categories
	disableCategoriesInDraftspace( wikicode, namespace ) {
		const draft = ( namespace == 118 );
		if ( draft ) {
			wikicode = wikicode.replace( /:?(\[\[)(Category:[^\]]*\]\])/gm, '$1:$2' );
		}
		wikicode = wikicode.replace( /\[\[:Category:Created via preloaddraft\]\]/gi, '[[Category:Created via preloaddraft]]' );
		return wikicode;
	}

	// delete <br> in drafts, these are usually poorly placed
	deleteBRTagsOutsideInfoboxes( wikicode ) {
		let output = '';
		const lines = wikicode.split( '\n' );
		const lineCount = lines.length;
		let i = 0;
		for ( const line of lines ) {
			i++;
			// Skip lines that start with { or |. This is the easiest way to detect infoboxes
			if ( line.startsWith( '{' ) || line.startsWith( '|' ) ) {
				output += line;
			} else {
				output += line.replace( /<br ?\/?>/gm, '' );
			}
			if ( i !== lineCount ) {
				output += '\n';
			}
		}
		return output;
	}

	// right align images
	rightAlignImages( wikicode ) {
		return wikicode.replace( /(\[\[File:[^\]]*\|)left(\|[^\]]*\]\])/gm, '$1right$2' );
	}

	// correct capitalization of see also, references, further reading, external links
	correctCapitalizationOfEndMatterHeaders( wikicode ) {
		wikicode = wikicode.replace( /^(== ?)References( ?==)$/gmi, '$1References$2' );
		wikicode = wikicode.replace( /^(== ?)External links( ?==)$/gmi, '$1External links$2' );
		wikicode = wikicode.replace( /^(== ?)Further reading( ?==)$/gmi, '$1Further reading$2' );
		wikicode = wikicode.replace( /^(== ?)See also( ?==)$/gmi, '$1See also$2' );
		return wikicode;
	}

	// if article has headings but no lead, remove first heading
	ifNoLeadSectionDeleteFirstHeading( wikicode ) {
		let output = '';
		const lines = wikicode.split( '\n' );
		const lineCount = lines.length;
		let i = 0;
		let textCount = 0;
		for ( const line of lines ) {
			i++;
			// scan for first heading.
			// empty lines, lines with templates, or lines with images do not count.
			if ( line.startsWith( '{' ) || line.length === 0 || line.startsWith( '[[File:' ) ) {
				output += line;
			} else if ( line.startsWith( '==' ) && !textCount ) {
				continue; // delete this line by not putting it in the output string
			} else {
				textCount++;
				output += line;
			}
			if ( i !== lineCount ) {
				output += '\n';
			}
		}
		return output;
	}

	// delete [edit], [edit source], and [editar] from headings
	deleteCopyPastedEditAndEditSource( wikicode ) {
		wikicode = wikicode.replace( /\[edit\]( ?={2,})$/gm, '$1' );
		wikicode = wikicode.replace( /\[edit source\]( ?={2,})$/gm, '$1' );
		wikicode = wikicode.replace( /\[editar\]( ?={2,})$/gm, '$1' );
		return wikicode;
	}

	// at beginning of lines, replace unicode bullets with asterisks
	replaceUnicodeBulletsWithAsterisks( wikicode ) {
		return wikicode.replace( /^\s{0,}[·•●]\s{0,}/gm, '* ' );
	}

	// remove whitespace if that is the only character on a line
	trimEmptyLines( wikicode ) {
		return wikicode.replace( /^\s*$/gm, '' );
	}

	// no more than 2 newlines (1 blank line) in a row. except stubs, which get 3 newlines (2 blank lines)
	// Note: AFCH does this too
	deleteMoreThanTwoEntersInARow( wikicode ) {
		wikicode = wikicode.replace( /\n{3,}/gm, '\n\n' );
		wikicode = wikicode.replace( /\n{2}(\{\{[^}]*stub\}\})/gi, '\n\n\n$1' );
		return wikicode;
	}

	// convert =TitleHeading= to ==H2Heading==
	convertH1ToH2( wikicode ) {
		return wikicode.replace( /^= ?([^=]*?) ?=$/gm, '== $1 ==' );
	}

	convertVeryLongHeadingToParagraph( wikicode ) {
		let output = '';
		const lines = wikicode.split( '\n' );
		const lineCount = lines.length;
		let i = 0;
		for ( const line of lines ) {
			i++;
			if ( line.length > 150 && line.match( /^==.*==$/gm ) && !line.match( /<ref/ ) ) {
				output += line.replace( /^={1,}\s*(.*?)\s*={1,}$/m, '$1' );
			} else {
				output += line;
			}
			if ( i !== lineCount ) {
				output += '\n';
			}
		}
		return output;
	}

	fixWordEmphasizedWithSingleQuotes( wikicode ) {
		return wikicode.replace( / '(\w+)' /g, ' "$1" ' );
	}

	fixDoublePeriod( wikicode ) {
		return wikicode.replace( /(?<=[A-Za-z\]])\.\.(?=<ref| |\n)/g, '.' );
	}

	fixWikilinksContainingURL( wikicode ) {
		// non-piped wikilink
		wikicode = wikicode.replace( /\[\[https?:\/\/en\.(?:m\.)?wikipedia\.org\/wiki\/([^|]*)\]\]/g, '[[$1]]' );
		// piped wikilink
		wikicode = wikicode.replace( /\[\[https?:\/\/en\.(?:m\.)?wikipedia\.org\/wiki\/([^|]*)\|([^\]]*)\]\]/g, '[[$1|$2]]' );
		// non-piped external link
		wikicode = wikicode.replace( /\[\[(http[^|]*)\]\]/g, '[$1]' );
		// piped external link
		wikicode = wikicode.replace( /\[\[(http[^|]*)\|([^\]]*)\]\]/g, '[$1 $2]' );
		return wikicode;
	}

	fixExternalLinksToWikipediaArticles( wikicode ) {
		// [https://en.wikipedia.org/wiki/Article] and [https://en.wikipedia.org/wiki/Article Article name]
		return wikicode.replace( /(?<!\[)\[https?:\/\/en\.wikipedia\.org\/wiki\/([^ \]]*)( [^\]]*)?\]/gs, ( match, p1 ) => {
			p1 = decodeURIComponent( p1 );
			p1 = p1.replace( /_/g, ' ' );
			return `[[${ p1 }]]`;
		} );
	}

	deleteBlankLinesBetweenBullets( wikicode ) {
		const lines = wikicode.split( '\n' );
		const buffer = [];
		const length = lines.length;
		for ( let i = 0; i < length; i++ ) {
			const previous = lines[ i - 1 ];
			const current = lines[ i ];
			const next = lines[ i + 1 ];
			if (
				typeof previous !== 'undefined' &&
				typeof next !== 'undefined' &&
				previous.startsWith( '*' ) &&
				current === '' &&
				next.startsWith( '*' )
			) {
				continue;
			}
			buffer.push( current );
		}
		return buffer.join( '\n' );
	}

	deleteWeirdUnicodeCharacters( wikicode ) {
		return wikicode.replace( /[–]/g, '' );
	}

	deleteSomeHTMLTags( wikicode ) {
		wikicode = wikicode.replace( /<\/?p( [^>]*)?\/?>/g, '' );
		wikicode = wikicode.replace( /<\/?strong( [^>]*)?\/?>/g, '' );
		wikicode = wikicode.replace( /<\/?em( [^>]*)?\/?>/g, '' );
		wikicode = wikicode.replace( /<\/?nowiki( [^>]*)?\/?>/g, '' );
		wikicode = wikicode.replace( /<\/?u( [^>]*)?\/?>/g, '' );
		wikicode = wikicode.replace( /(?:<big>|<\/big>)/g, '' );
		return wikicode;
	}

	deleteNonAFCDraftTags( wikicode ) {
		wikicode = wikicode.replace( /{{Preloaddraft submit}}\n{0,2}/gi, '' );
		wikicode = wikicode.replace( /<!-- When you move this draft into article space, please link it to the Wikidata entry and remove the QID in the infobox code\. -->\n{0,2}/gi, '' );
		wikicode = wikicode.replace( /{{Draft}}\n{0,2}/gi, '' );
		return wikicode;
	}

	deleteAFCDraftTagsIfMainspace( wikicode, namespaceNumber ) {
		const isMainspace = namespaceNumber == 0;
		if ( isMainspace ) {
			// {{AfC submission}}, {{AfC topic}}, {{AfC comment}}, etc.
			wikicode = wikicode.replace( /{{AfC [^}]*}}\n?/g, '' );
			wikicode = wikicode.replace( /{{Draft topics[^}]*}}\n?/g, '' );
		}
		return wikicode;
	}

	fixHeadingsInAllCaps( wikicode ) {
		// create a concatenated string with the text from every heading
		const matches = wikicode.matchAll( /== {0,}(.+) {0,}==/g );
		let headingString = '';
		for ( const match of matches ) {
			headingString += match[ 1 ];
		}

		// if string only contains caps
		if ( this._isUpperCase( headingString ) ) {
			// convert all headings to sentence case
			const matches = wikicode.matchAll( /== {0,}(.+) {0,}==/g );
			for ( const match of matches ) {
				let matchRegex = this._escapeRegEx( match[ 1 ].trim() );
				matchRegex = new RegExp( '== {0,}' + matchRegex + ' {0,}==', 'g' );
				const sentenceCase = this._toSentenceCase( match[ 1 ].trim() );
				wikicode = wikicode.replace( matchRegex, '== ' + sentenceCase + ' ==' );
			}
		}

		return wikicode;
	}

	deleteEmptySections( wikicode ) {
		return wikicode.replace( /\n*== ?(?:See also|External links) ?==\n*$/, '' );
	}

	deleteDuplicateReferencesSection( wikicode ) {
		const matches = wikicode.match( /==\s*References\s*==/gi );
		if ( matches !== null && matches.length > 1 ) {
			// run regexes that are likely to delete the extra section
			const attempt = wikicode.replace(
`== References ==
<!-- Inline citations added to your article will automatically display here. See en.wikipedia.org/wiki/WP:REFB for instructions on how to add citations. -->
{{reflist}}`
			, '' );
			const matches2 = attempt.match( /==\s*References\s*==/gi );
			if ( matches2.length === 1 ) {
				wikicode = attempt.trim();
				wikicode = wikicode.replace( /==\s*References\s*==/gi, '== References ==' );
			}
		}
		return wikicode;
	}

	removeUnderscoresFromWikilinks( wikicode ) {
		const sf = new StringFilter();
		wikicode = sf.surgicalReplaceInsideTags( /_/g, ' ', wikicode, [ '[[' ], [ ']]' ] );
		return wikicode;
	}

	fixPipedWikilinksWithIdenticalParameters( wikicode ) {
		const matches = wikicode.matchAll( /\[\[([^|\]]+)\|([^\]]+)\]\]/g );
		for ( const match of matches ) {
			if ( match[ 1 ] === match[ 2 ] ) {
				wikicode = this._replaceAll( wikicode, `[[${ match[ 1 ] }|${ match[ 1 ] }]]`, `[[${ match[ 1 ] }]]` );
			}
		}
		return wikicode;
	}

	removeBorderFromImagesInInfoboxes( wikicode ) {
		wikicode = wikicode.replace( /(\|\s*logo\s*=\s*)\[\[File:([^\]|]*)[^\]\]]*\]\]/g, '$1$2' );
		wikicode = wikicode.replace( /(\|\s*cover\s*=\s*)\[\[File:([^\]|]*)[^\]\]]*\]\]/g, '$1$2' );
		return wikicode;
	}

	/** These often hide towards the bottom of a draft. When the draft is submitted, unsubmitted templates (t) detect this and show up as blank, creating a weird extra line break. So this basically fixes the line break. */
	removeExtraAFCSubmissionTemplates( wikicode ) {
		const hasSubmittedTemplate = wikicode.match( /{{AfC submission\|\|/ );
		const hasUnsubmittedTemplate = wikicode.match( /{{AfC submission\|t\|/ );
		if ( hasSubmittedTemplate && hasUnsubmittedTemplate ) {
			wikicode = wikicode.replace( /{{AfC submission\|t\|[^}}]*\}\}\n?/gm, '' );
		}
		return wikicode;
	}

	moveAFCSubmissionTemplatesToTop( wikicode ) {
		const hasTemplateAtBottom = wikicode.match( /\n[^\n]+\n*({{AfC submission[^}]*}})\s*$/i );
		if ( hasTemplateAtBottom ) {
			// delete all submission templates
			wikicode = wikicode.replace( /{{AfC submission[^}}]*\}\}\n?/gm, '' );

			// insert template at top
			wikicode = hasTemplateAtBottom[ 1 ] + '\n----\n\n' + wikicode;
		}
		return wikicode;
	}

	deleteMultipleReferenceTags( wikicode ) {
		const hasReflist = wikicode.match( /{{Reflist}}/i );
		const hasReferencesTag = wikicode.match( /<references ?\/>/i );
		if ( hasReflist && hasReferencesTag ) {
			// delete all references tags
			wikicode = wikicode.replace( /<references ?\/>\n?/gi, '' );
		}
		return wikicode;
	}

	_isUpperCase( str ) {
		return str === str.toUpperCase();
	}

	_toSentenceCase( string ) {
		return string.charAt( 0 ).toUpperCase() + string.slice( 1 ).toLowerCase();
	}

	_replaceAll( haystack, needle, replacement ) {
		const regex = new RegExp( this._escapeRegEx( needle ), 'g' );
		haystack = haystack.replace( regex, replacement );
		return haystack;
	}

	_escapeRegEx( string ) {
		return string.replace( /[.*+?^${}()|[\]\\]/g, '\\$&' ); // $& means the whole matched string
	}
}


// === modules/StringFilter.js ======================================================

/**
 * Lets you use regex to specify what parts of a very long string you want to specify as "off limits", then you can do additional regex's and search/replace to the remaining parts of the string.
 */
class StringFilter {
	/**
	 * Does a replace, but specifies areas of the file that should NOT be replaced. Those areas are specified by providing an openingTag and a closingTag, and those areas are marked as off limits.
	 */
	surgicalReplaceOutsideTags( regex, replacement, haystack, openingTags, closingTags ) {
		const allTags = [ ...openingTags, ...closingTags ];
		const parts = this._splitStringUsingMultiplePatterns( haystack, allTags );
		const resultArray = [];
		for ( let part of parts ) {
			let openingTagMatch = false;
			for ( const tag of openingTags ) {
				if ( part.startsWith( tag ) ) {
					openingTagMatch = true;
					break;
				}
			}
			if ( !openingTagMatch ) {
				part = part.replace( regex, replacement );
			}
			resultArray.push( part );
		}
		return resultArray.join( '' );
	}

	/**
	 * Does a replace, but specifies areas of the file that SHOULD be replaced, then skips the rest of the file. The area that should be replaced is specified by providing an openingTag and a closingTag.
	 */
	surgicalReplaceInsideTags( regex, replacement, haystack, openingTags, closingTags ) {
		const allTags = [ ...openingTags, ...closingTags ];
		const parts = this._splitStringUsingMultiplePatterns( haystack, allTags );
		const resultArray = [];
		for ( let part of parts ) {
			for ( const tag of openingTags ) {
				if ( part.startsWith( tag ) ) {
					part = part.replace( regex, replacement );
				}
			}
			resultArray.push( part );
		}
		return resultArray.join( '' );
	}

	/**
	 * Also keeps the pattern in the result, unlike string.prototype.split. Algorithm isn't perfect, will fail with this pattern: <ref>Test/>Test</ref>. But should be good enough for DraftCleaner stuff.
	 *
	 * @param {string} string
	 * @param {string[]} patterns
	 * @return {string[]}
	 */
	_splitStringUsingMultiplePatterns( string, patterns ) {
		const length = string.length;
		const result = [];
		let positionOfLastMatch = 0;
		for ( let i = 0; i < length; i++ ) {
			const lookAhead = string.substring( i ); // the rest of the string after current position
			let patternMatch = false;
			for ( const pattern of patterns ) {
				if ( lookAhead.startsWith( pattern ) ) {
					patternMatch = true;
					break;
				}
			}
			if ( patternMatch ) {
				const chunk = string.slice( positionOfLastMatch, i );
				// if blank (happens if i=0 matches), continue instead of putting an empty "" into the array
				if ( !chunk ) {
					continue;
				}
				result.push( chunk );
				positionOfLastMatch = i;
			}
		}
		// Don't forget the last chunk.
		result.push( string.substring( positionOfLastMatch ) );
		return result;
	}
}


});

// </nowiki>