/home/alex/dev/sync2mw.py (1)

From RaySoft
#!/usr/local/bin/python3
# ------------------------------------------------------------------------------
# sync2mw.py
# ==========
#
# Scope     Native
# Copyright (C) 2024 by RaySoft, Zurich, Switzerland
# License   GNU General Public License (GPL) 2.0
#           https://www.gnu.org/licenses/gpl2.txt
#
# ------------------------------------------------------------------------------

CONFIG_FILE = '/home/alex/dev/sync2mw.json'

WIKI_URL = 'www.raysoft.ch'
WIKI_USER = 'alex'
WIKI_PASSWD = '******************************'

# ------------------------------------------------------------------------------

PROGRAM_NAME = 'sync2mw'
PROGRAM_VERSION = '0.2'

# ------------------------------------------------------------------------------

from argparse import ArgumentParser, FileType
from json import load
# from logging import basicConfig, DEBUG
from mwclient import LoginError, MwClientError, Site
from os.path import isfile
from re import compile, escape, search, sub, IGNORECASE
from sys import exit, stderr

# ------------------------------------------------------------------------------

def check_unique_json_names(pairs):
    dict = {}

    for key, value in pairs:
        if key in dict:
            raise ValueError(f'Duplicate key {key} in json document')
        else:
            dict[key] = value

    return dict

# ------------------------------------------------------------------------------

def main():
    # basicConfig(level=DEBUG)

    parser = ArgumentParser(
        description='Sync a local file to a MediaWiki page.', prog=PROGRAM_NAME,
    )
    parser.add_argument(
        '-V', '--version', action='version',
        version=f'%(prog)s {PROGRAM_VERSION}',
    )
    parser.add_argument(
        '-c', '--config', metavar='FILE', dest='config', action='store',
        type=FileType(mode='r'), default=CONFIG_FILE,
        help=f"use alternative config file (default is '{CONFIG_FILE}')",
    )
    parser.add_argument(
        '-d', '--dry-run', dest='dry_run', action='store_true',
        help='print the page to stdout instead to the wiki page',
    )

    group1 = parser.add_mutually_exclusive_group(required=True)
    group1.add_argument(
        '-a', '--all', dest='all', action='store_true',
        help='process all files which are listed in the JSON file',
    )
    group1.add_argument(
        '-p', '--pattern', metavar='PATTERN', nargs='+', dest='pattern',
        action='append', help='process all files which matches a pattern',
    )

    args = parser.parse_args()

    try:
        json_data = load(args.config, object_pairs_hook=check_unique_json_names)
    except (TypeError, ValueError, OverflowError) as error:
        print("Error: Can't parse JSON file!", file=stderr)
        print(error, file=stderr)
        return 1

    tag_list = []

    if args.all:
        tag_list = list(json_data.keys())
    else:
        # Transform a two dimensional array into an one dimensional
        args.pattern = [item for sublist in args.pattern for item in sublist]

        for pattern in args.pattern:
            match = False
            regex = compile(escape(pattern))

            for tag in json_data.keys():
                if tag in ['self', 'global']:
                    continue

                if regex.search(json_data[tag]['local']) \
                   or regex.search(json_data[tag]['remote']):
                    match = True

                    tag_list.append(tag)

            if not match:
                print(f"Warning: Pattern '{pattern}' doesn't match any file!",
                      file=stderr)

    try:
        site = Site(WIKI_URL, scheme='https')
    except MwClientError as error:
        print("Error: Can't connect to the remote site!", file=stderr)
        print(error, file=stderr)
        return 1

    try:
        site.login(WIKI_USER, WIKI_PASSWD)
    except LoginError as error:
        print("Error: Can't log in to the remote site", file=stderr)
        print(error, file=stderr)
        return 1

    for tag in tag_list:
        print()
        print(f"Process tag '{tag}':")
        print(f"Local:  {json_data[tag]['local']}")
        print(f"Remote: {json_data[tag]['remote']}")

        if not isfile(json_data[tag]['local']):
            print("Error: Can't find the local file!", file=stderr)
            print()
            continue

        page = site.Pages[json_data[tag]['remote']]

        if not page.exists:
            print("Error: Can't find the remote file!", file=stderr)
            print()
            continue

        if 'process' in json_data[tag] and json_data[tag]['process'] == 'never':
            print('The file is set to be never processed!')
            continue

        # Open and read local file
        with open(json_data[tag]['local']) as file:
            local_content_list = file.readlines()

        # Select lines from local file
        if 'lines' in json_data[tag]:
            start_line, end_line = json_data[tag]['lines'].split(':')

            start_line = int(start_line) - 1
            end_line = int(end_line)

            local_content_list = local_content_list[start_line:end_line]

        local_content = ''.join(local_content_list)

        if search(r' +\n', local_content):
            print("Warning: The local file contains spaces at the end of line!",
                  file=stderr)

        if search(r'\t', local_content):
            print("Warning: The local file contains tabs!", file=stderr)

        # Filter local file with global and specific filters
        for section in ['global', tag]:
            if 'filters' in json_data[section]:
                for filter in json_data[section]['filters']:
                    flags = 0

                    if len(filter) == 3 and search(r'i', filter[2]):
                        flags += IGNORECASE

                    local_content = sub(filter[0], filter[1], local_content,
                                        flags=flags)

        # Get the remote file
        remote_content = page.text(cache=False).split('\n')

        # Get the header of the remote file
        wiki_text = ''

        regex = compile('<!-- sync2mw id="' + escape(tag) + '" -->')

        while remote_content:
            line = remote_content.pop(0)

            wiki_text += line + '\n'

            if regex.search(line):
                wiki_text += remote_content.pop(0) + '\n'

                break

        if len(remote_content) == 0:
            print("Error: Can't find ID tag in remote file!",
                  file=stderr)
            print()
            continue

        wiki_text += local_content.strip() + '\n'

        # Get the footer of the remote file
        last_line = ''

        while remote_content:
            line = remote_content.pop(0)

            if regex.search(line):
                wiki_text += last_line + '\n'
                wiki_text += line + '\n'
                break

            last_line = line

        wiki_text += '\n'.join(remote_content)

        if args.dry_run:
            print(wiki_text)
        else:
            status = page.save(wiki_text, summary='Updated by sync2mw.py',
                               bot=False)

            if 'result' in status and status['result'] != 'Success':
                print("Error: Can't upload content!", file=stderr)
                print()
                continue
            elif 'nochange' in status:
                print('No changes uploaded')
            else:
                page.purge()

                print('Page changed successfully')

    return 0

# ------------------------------------------------------------------------------

if __name__ == '__main__':
    return_value = main()

    exit(return_value)

Usage

Create a configuration file using the following template
{
  "TAG": {
    "local": "PATH",
    "remote": "PAGE NAME",
    "lines": "FROM:TO",
    "filters": [
      ["SEARCH", "REPLACE", "FLAGS"]
    ],
    "process": "FREQUENCY"
  }
}
  • TAG: Unique identifier (Mandatory)
  • local: Path to the local file (Mandatory)
  • remote: Name of the MediaWiki page (Mandatory)
  • lines: Limit of lines which shall be used
  • filters: Filters to remove confidential information
  • process: Frequency to update the remote page:
    • never
    • always (Default)
Include a start and end tag in the remote page
<!-- sync2mw id="TAG" -->
Run the script
~/dev/sync2mw.py --pattern='sync2mw.py'

Output:

Process tag 'gI*X5u':
Local:  /home/alex/dev/sync2mw.py
Remote: /home/alex/dev/sync2mw.py (1)
Page changed successfully