201309/26

serializer.py

Written by me this afternoon, this script traverses a directory structure and strings all of the supported files together into a single plaintext file – essentially, a compressionless archive. Under normal circumstances, no one should ever need this…

Source

import datetime
import os
import re
import sys

# ----------------------------------- CONSTANTS -----------------------------------
SUPPORTED_EXTENSIONS=(
    "css", "js", "htm", "html", "xhtm", "xhtml",
    "php", "py",
    "txt", "xml",
    "htaccess", "config", "ini"
    )
# ---------------------------------------------------------------------------------


"""
Recurses through a directory tree and serializes all supported files contained
within

@param string path
"""
def read_directory(path):

    # Keep some metrics
    files_processed = 0
    bytes_read      = 0

    # Header
    path = os.path.abspath(path)
    print("$ serializer {0:s} {1:%Y-%m-%d %X} $".format(path, datetime.datetime.now()))

    # Iterate over all directories below this
    for current_directory, subdirectories, files in os.walk(path):

        # Skip hidden directories
        if re.match("^\..+$", os.path.basename(current_directory)):
            continue

        # Iterate over all files at the current directory
        for filename in files:
            # Gather information about the current file
            absolute_path_to_file = os.path.abspath("{0}/{1}".format(current_directory, filename))
            path_to_file          = os.path.relpath(absolute_path_to_file)
            file_extension        = filename.split(".")[-1]

            # Clean up the relative path
            path_to_file = path_to_file.replace("\\", "/")
            path_to_file = re.sub("^\.+/", "", path_to_file)

            # Only process files with the proper extensions
            if file_extension in SUPPORTED_EXTENSIONS:
                fp = open(absolute_path_to_file, "r")
                file_contents = fp.read().strip()
                fp.close()

                bytes_read += len(file_contents)
                files_processed += 1
                print("#### START {0}\n{1}\n#### END {0}".format(path_to_file, file_contents))

    # Print metrics
    print()
    print("{:8d} files processed".format(files_processed))
    print("{:8d} bytes read".format(bytes_read))



"""
Main Routine
"""
def main():
    target = "."

    # Attempt to resolve any directory given as an argument
    if len(sys.argv) > 1:
        target = os.path.relpath( sys.argv[1] )

    if os.path.isdir(target):
        read_directory(target)
    else:
        print("{} is not a valid directory".format(target))
        exit(1)


# Execute Main Routine
if "__main__" == __name__:
    main()

serializer.py

Source

Tagged