[go: up one dir, main page]

Bikarhêner:Balyozxane/updateW.py

#<nowiki>
import pywikibot
import re
from pywikibot import pagegenerators
from pywikibot.bot import AutomaticTWSummaryBot, ConfigParserBot, SingleSiteBot
from pywikibot.exceptions import NoPageError
from pywikibot.data import api
import requests
import re

def escapeRegExp(text):
    return re.sub(r'[.*+?^${}()|[\]\\]', r'\\&', text)

def sortAlphabetically(content):
    wergerSections = extractWergerSections(content)
    if wergerSections:
        fetchPromises = []
        for wergerSection in wergerSections:
            lines = wergerSection.strip().split("\n")
            langSet = []
            currentMainLang = None

            for line in lines:
                langCodeMatches = re.match(r'\* \{\{Z\|([a-zA-Z-]+)\}\}:', line)
                if langCodeMatches:
                    langCode = langCodeMatches.group(1).lower() or ""
                    if not line.startswith("*:"):  # Lines that don't start with "*:" are normal lines
                        if currentMainLang:
                            langSet.append(currentMainLang)
                        currentMainLang = {
                            'type': 'mainLang',
                            'line': line,
                            'langCode': langCode,
                            'subsets': [],
                        }
                    elif currentMainLang:
                        currentMainLang['subsets'].append(line)
                else:
                    if currentMainLang:
                        currentMainLang['subsets'].append(line)
                    else:
                        langSet.append({
                            'type': 'unknown',  # Mark unknown lines to distinguish from subsets
                            'line': line,
                        })

            if currentMainLang:
                langSet.append(currentMainLang)

            try:
                # Fetch language names and perform sorting here
                langSet = sort_by_kurdish_alphabet(langSet)
                pywikibot.output(f"langSet hat rêzkirin")

                sortedLines = []
                for item in langSet:
                    if item['type'] == 'mainLang':
                        sortedLines.append(item['line'])
                        sortedLines.extend(item['subsets'])
                    else:
                        sortedLines.append(item['line'])
                sortedContent = "\n".join(sortedLines)
                try:
                    content = content.replace(wergerSection, sortedContent)
                except Exception as e:
                    print(f"An error occurred: {str(e)}")
            except Exception as e:
                print(f"An error occurred: {str(e)}")

        return content

def extractWergerSections(content):
    werger_sections = []
    werger_ser_regex = r'\{\{werger-ser(?:\|[^\}]+)?}}'  # Regular expression to match {{werger-ser}} with optional arguments
    matches = re.finditer(werger_ser_regex, content, re.IGNORECASE)

    for match in matches:
        start_index = match.start()
        end_index = content.find("{{werger-bin}}", start_index)

        if end_index != -1:
            section_content = content[start_index + len(match.group(0)):end_index].strip()
            werger_sections.append(section_content)

    return werger_sections

def sort_by_kurdish_alphabet(langSet):
    kurdish_alphabet = "ABCCÇDEÊFGHIÎJKLÎMNOPQRSŞTUÛVWXYZabccçdeêfghiîjklîmnopqrsştuûvwxyzǃǀǁǂ"
    pywikibot.output(f"langSet tê rêzkirin") 
    lang_codes = [item['langCode'] for item in langSet]
    lang_names = fetch_language_names(lang_codes)

    def kurdish_key(lang_item):
        lang_code = lang_item['langCode']
        lang_name = lang_names.get(lang_code, lang_code)
        lang_name = lang_name.lower()
        return [kurdish_alphabet.find(char) for char in lang_name]

    langSet.sort(key=kurdish_key)
    
    return langSet


def fetch_language_names(lang_codes):
    pywikibot.output(f"lang_codes: {lang_codes}")
    language_names = {}
    language_data_url = "https://ku.wiktionary.org/w/index.php?title=MediaWiki:Gadget-translation editor.js/ziman.json&action=raw"

    try:
        response = requests.get(language_data_url)
        data = response.json()
        for lang_code in lang_codes:
            if lang_code in data:
                language_names[lang_code] = data[lang_code]
        pywikibot.output(f"fetched language_names") 
        return language_names
    except Exception as e:
        print(f"Error fetching language names: {e}")
        return {}

def sanitize_page_title(page_title):
    # Define a regular expression pattern to match illegal characters
    illegal_chars_pattern = r'[#<>[\]|{}]'

    # Remove any illegal characters from the page title
    sanitized_title = re.sub(illegal_chars_pattern, '', page_title)

    return sanitized_title
    
@staticmethod
def page_exists(lang_code, page_title):
    # Define domain mappings
    wm_liens = {
        'cmn': 'zh',
        'fra-nor': 'nrm',
        'ko-Hani': 'ko',
        'lzh': 'zh-classical',
        'nan': 'zh-min-nan',
        'nb': 'no',
        'rup': 'roa-rup',
        'yue': 'zh-yue',
        'zza': 'diq',
    }
    wiktios = [
        'af', 'am', 'an', 'ang', 'ar', 'ast', 'ay', 'az', 'be', 'bg',
         'bn', 'br', 'bs', 'ca', 'chr', 'co', 'cs', 'csb', 'cy', 'da',
         'de', 'dv', 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'fi',
         'fj', 'fo', 'fr', 'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv',
         'ha', 'he', 'hi', 'hr', 'hsb', 'hu', 'hy', 'ia', 'id', 'ie',
         'io', 'is', 'it', 'iu', 'ja', 'jbo', 'jv', 'ka', 'kk', 'kl',
         'km', 'kn', 'ko', 'ks', 'ku', 'kw', 'ky', 'la', 'lb', 'li',
         'ln', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr',
         'ms', 'mt', 'my', 'na', 'nah', 'nds', 'ne', 'nl', 'nn', 'no',
         'oc', 'om', 'or', 'pa', 'pl', 'pnb', 'ps', 'pt', 'qu', 'ro',
         'roa-rup', 'ru', 'rw', 'sa', 'scn', 'sd', 'sg', 'sh', 'si',
         'simple', 'sk', 'sl', 'sm', 'so', 'sq', 'sr', 'ss', 'st', 'su',
         'sv', 'sw', 'ta', 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'tpi',
         'tr', 'ts', 'tt', 'ug', 'uk', 'ur', 'uz', 'vec', 'vi', 'vo',
         'wa', 'wo', 'yi', 'za', 'zh', 'zh-min-nan', 'zu'
    ]
    keepApos = ['fr', 'de']

    # Check if the language code is valid
    if not lang_code:
        return False

    # Check if page_title starts with "^" and remove it
    if page_title.startswith("^"):
        page_title = page_title.replace("^", '', 1)

    page_title = sanitize_page_title(page_title)
    
    # Call remove_diacritics to process the page title
    processed_title = remove_diacritics(lang_code, page_title)

    try:
        # Determine the domain based on lang_code
        if lang_code in wm_liens:
            domain = wm_liens[lang_code]
        elif lang_code in wiktios:
            domain = lang_code
        else:
            return False

        site = pywikibot.Site(code=domain, fam="wiktionary")

        # Check if the site is valid before loading the page
        if site.sitename() != 'RemovedSite':
            page = pywikibot.Page(site, processed_title)

        # Debug output
        print(f"Checking page existence: {site}:{page}")

        # Check if the page exists by loading its content
        try:
            page.get()
            return True
        except pywikibot.exceptions.IsRedirectPageError:
            # Redirect pages are considered valid
            return True
        except pywikibot.exceptions.NoPageError:
            # Non-existing pages are explicitly checked here
            return False

    except pywikibot.exceptions.UnknownSiteError:
        return False

@staticmethod
def remove_diacritics(lang_code, text):
    try:
        # Initialize a MediaWiki API session
        site = pywikibot.Site()
        
        # Define the parameters for the API request
        params = {
            'action': 'expandtemplates',
            'format': 'json',
            'text': f'{{{{#invoke:ziman/şablon|makeEntryName|{lang_code}|{text}}}}}',
            'prop': 'wikitext'
        }

        # Make the API request using the Request class
        request = api.Request(site=site, parameters=params)
        response = request.submit()

        data = response['expandtemplates']

        # Extract and return the expanded wikitext
        expanded_text = data['wikitext']
        return expanded_text

    except Exception as e:
        print(f"Error expanding template: {e}")
        return text

class AppendTextBot(
    SingleSiteBot,
    ConfigParserBot,
    AutomaticTWSummaryBot,
):
    summary_key = 'basic-changing'
    use_redirects = False
    update_options = {
        'summary': None,
        'text': '',
        'top': False,
    }

    def treat_page(self) -> None:
        page = self.current_page
        pywikibot.output(f"Processing page: {page.title()}")

        # Get the page content
        page_text = page.text

        # Sort alphabetically
        page_text = sortAlphabetically(page_text)
        pywikibot.output(f"page_text hat rêzkirin")

   
        # Define a regex pattern to match W templates
        w_template_pattern = r"\{\{W(\+|-{1,2})?\|([^\|]+)\|([^\|\}]+)"
        matches = re.findall(w_template_pattern, page_text)

        pywikibot.output(f"+/- dest pê kir")
        total_matches = len(matches)
        current_match = 0

        for sign, lang_code, page_title in matches:
            current_match += 1
            pywikibot.output(f"Processing match {current_match}/{total_matches} of {page.title()} +{lang_code}:{page_title}")
            try:
            
                exists = page_exists(lang_code, page_title)

                # Determine the new outcome based on page existence
                new_outcome = '+' if exists else '-'

                # Construct the new template with the updated outcome
                new_template = f"{{{{W{new_outcome}|{lang_code}|{page_title}"

                # Replace the old template with the new one in the page content
                page_text = page_text.replace(f"{{{{W{sign}|{lang_code}|{page_title}", new_template)
            except Exception as e:
                # Handle the exception (print error message, log, etc.)
                print(f"Error processing match {current_match}: {e}")
        # Save the updated page content
        self.put_current(page_text, summary="Sererastkirin/rêzkirina şablonên {{[[Şablon:W-|W-]]}}, {{[[Şablon:W+|W+]]}} ([[User:Balyozxane/updateW.py|updateW]])")

def main(*args: str) -> None:
    local_args = pywikibot.handle_args(args)
    gen_factory = pagegenerators.GeneratorFactory()
    local_args = gen_factory.handle_args(local_args)

    options = {'text': ''}

    for arg in local_args:
        option, _, value = arg.partition(':')
        if option in ('summary', 'text'):
            if not value:
                pywikibot.input(f'Please enter a value for {option}')
            options[option] = value
        else:
            options[option] = True

    gen = gen_factory.getCombinedGenerator(preload=True)

    if not pywikibot.bot.suggest_help(missing_generator=not gen):
        bot = AppendTextBot(generator=gen, **options)
        bot.run()

if __name__ == '__main__':
    main()

#</nowiki>