Source code for yael.util

#!/usr/bin/env python
# coding=utf-8

"""
Common utility (static) functions.
"""

import hashlib
import os
import re

from yael.obfuscation import Obfuscation

__author__ = "Alberto Pettarin"
__copyright__ = "Copyright 2015, Alberto Pettarin (www.albertopettarin.it)"
__license__ = "MIT"
__version__ = "0.0.6"
__email__ = "alberto@albertopettarin.it"
__status__ = "Development"

#: pattern to match viewport value `width=W, height=H`
VP_PATTERN_WH = re.compile(r"^width[ ]*=[ ]*([0-9\.]*)[ px]*,[ ]*height[ ]*=[ ]*([0-9\.]*)[ px]*$")

#: pattern to match viewport value `height=H, width=W`
VP_PATTERN_HW = re.compile(r"^height[ ]*=[ ]*([0-9\.]*)[ px]*,[ ]*width[ ]*=[ ]*([0-9\.]*)[ px]*$")

[docs]def directory_size(path):
    """
    Compute the total size, in bytes,
    of all the files in the filesystem tree
    rooted at the given directory.

    :param path: the path of the root directory
    :type  path: str
    :returns:    the total size in bytes of the subtree rooted at `path`
    :type:       integer

    """

    total = 0
    for dir_path, unused_dir_names, file_names in os.walk(path):
        for file_name in file_names:
            total += os.path.getsize(os.path.join(dir_path, file_name))
    return total

[docs]def list_all_files(path):
    """
    List all files in the filesystem tree
    rooted at the given directory.

    :param path: the path to the root directory
    :type  path: str
    :returns:    the list of files in the subtree rooted at `path`
    :type:       list of str
    """

    accumulator = []
    if (path != None) and (os.path.exists(path)) and (os.path.isdir(path)):
        for dir_path, unused_dir_names, file_names in os.walk(path):
            for file_name in file_names:
                accumulator.append(os.path.join(dir_path, file_name))
    return accumulator

[docs]def norm_join(path1, path2):
    """
    Join the two given paths and normalize the result.

    :param path1: prefix path
    :type  path1: str
    :param path2: suffix path
    :type  path2: str
    :returns:     the join of the two paths, normalized
    :rtype:       str

    """

    if (path1 == None) or (path2 == None):
        return None
    return os.path.normpath(os.path.join(path1, path2))

[docs]def norm_join_parent(path1, path2):
    """
    Join the parent directory of path1 with path2 and normalize the result.

    :param path1: prefix path
    :type  path1: str
    :param path2: suffix path
    :type  path2: str
    :returns:     the join of the parent directory of path1 with path2,
                  and normalize (e.g., "a/../b/c" => "b/c") the result
    :rtype:       str

    """

    if (path1 == None) or (path2 == None):
        return None
    return norm_join(os.path.dirname(path1), path2)


[docs]def safe_strip(string):
    """
    Strip the given string, dealing safely with None arguments.

    :param string: the string to be stripped
    :type  string: str
    :returns:      the stripped string (or None if string is None)
    :rtype:        str

    """

    if string != None:
        string = string.strip()
    return string


[docs]def safe_first(lis):
    """
    Return the first element of the list,
    dealing safely with None or empty arguments.

    :param lis: a list of objects or values
    :type  lis: list of object
    :returns:   the first element of the list (or None if lis is None or empty)
    :rtype:     object or value

    """

    if (lis == None) or (len(lis) < 1):
        return None
    return lis[0]


[docs]def safe_len(lis):
    """
    Return the number of elements of the list,
    dealing safely with non-list arguments.

    :param lis: a list of objects or values
    :type  lis: list of object
    :returns:   the length of the list (or -1 if lis is not a list)
    :rtype:     int
    """

    try:
        return len(lis)
    except:
        pass
    return -1


[docs]def query_xpath(obj, query, args, nsp, required=None, formatted_query=None):
    """
    Perform an xpath query on an XML (`lxml`) node `obj`.

    The `query` template will be formatted using `args`
    and the namespaces `nsp`.
    (This works only in Python 2.6+.)

    If `required` is not None and the result is empty,
    raise an exception.

    If `formatted_query` is passed,
    use it instead of formatting `query` with `args`.
    (Useful if working in Python <2.6.)

    :param obj:             the XML (`lxml`) node object
    :type  obj:             object
    :param query:           a string template to be formatted with args
    :type  query:           str
    :param args:            a list of arguments to format the query
    :type  args:            list of str
    :param nsp:             namespace dictionary,
                            mapping prefixes to namespace strings
    :type  nsp:             dict
    :param required:        required element
    :type  required:        str
    :param formatted_query: a pre-formatted query
    :type  formatted_query: str
    :returns:               the matched XML node objects
    :rtype:                 list of object

    """

    if formatted_query == None:
        xpath_query = query.format(*args)
    else:
        xpath_query = formatted_query
    result = obj.xpath(xpath_query, namespaces=nsp)

    if (required != None) and (len(result) < 1):
        raise Exception("Cannot find '%s' element" % required)

    return result


[docs]def parse_viewport_string(string):
    """
    Parse the given viewport value and return
    the corresponding dictionary {"width": W, "height": H}.

    :param string: a viewport value
    :type  string: str
    :returns:      {"width": W, "height": H} or None if `string` is not valid
    :rtype:        dict

    """

    if string != None:
        match = VP_PATTERN_WH.match(string)
        if match != None:
            return {"width": match.group(1), "height": match.group(2)}
        match = VP_PATTERN_HW.match(string)
        if match != None:
            return {"width": match.group(2), "height": match.group(1)}
    return None


[docs]def split_reference(string):
    """
    Split the given reference (BASE#F) and return
    a dictionary {"base": BASE, "fragment": F}.

    If there is no fragment, return {"base": BASE}.

    If the string is None, return {}.

    :param string: a reference
    :type  string: str
    :returns:      a dictionary containing base and fragment id
    :rtype:        dict

    """

    # TODO improve this
    if string != None:
        val = string.split("#")
        if len(val) == 2:
            return {"base": val[0], "fragment": val[1]}
        elif len(val) == 1:
            return {"base": val[0], "fragment": None}
    return {}


[docs]def is_valid(obj, allowed_class, single=True):
    """
    If `single` is True, return True if
    `obj` is an instance of `allowed_class` or None.

    If `single` is False, return True if
    `obj` is a list (possibly, empty) of instances of `allowed_class`.

    :param obj:           the object to be checked
    :type  obj:           object or list
    :param allowed_class: the allowed class
    :type  allowed_class: class
    :param single:        if True, obj must be a single object
    :type  single:        bool
    :returns:             whether the given object is valid
    :rtype:               bool

    """

    if single:
        if obj == None:
            return True
        return isinstance(obj, allowed_class)
    else:
        if isinstance(obj, list):
            if len(obj) == 0:
                return True
            for element in obj:
                if not isinstance(element, allowed_class):
                    return False
            return True
        return False


[docs]def obfuscate_data(data, key, algorithm):
    """
    Obfuscate/deobfuscate data with the given key and algorithm.

    :param data:      the data to be obfuscated/deobfuscated
    :type  data:      bytes
    :param key:       the string to be used as the obfuscation key
    :type  key:       str
    :param algorithm: the algorithm to be used ("adobe" or "idpf")
    :type  algorithm: str
    :rtype:           bytes
    """

    if algorithm == Obfuscation.ADOBE:
        outer_max = 64
        inner_max = 16
        clean_key = key
        clean_key = clean_key.replace(u"urn:uuid:", "") # TODO check this
        clean_key = clean_key.replace(u"-", "")
        clean_key = clean_key.replace(u":", "")
        digest = clean_key
    elif algorithm == Obfuscation.IDPF:
        outer_max = 52
        inner_max = 20
        clean_key = key
        clean_key = clean_key.replace(u"\u0020", "")
        clean_key = clean_key.replace(u"\u0009", "")
        clean_key = clean_key.replace(u"\u000d", "")
        clean_key = clean_key.replace(u"\u000a", "")
        try:
            # Python 2
            digest = hashlib.sha1(clean_key).digest()
        except:
            # Python 3
            digest = hashlib.sha1(clean_key.encode("utf-8")).digest()
    else:
        return None

    if type(data) == str:
        # Python 2
        byte_data = bytearray(data)
    else:
        # Python 3
        byte_data = data

    if type(digest) == str:
        # Python 2
        key_data = bytearray(digest)
    else:
        # Python 3
        key_data = digest

    key_size = len(key_data)
    i = 0
    outer = 0
    accumulator = bytearray()
    while (outer < outer_max) and (i < len(byte_data)):
        inner = 0
        while (inner < inner_max) and (i < len(byte_data)):
            source_byte = byte_data[i]
            key_byte = key_data[inner % key_size]
            obfuscated_byte = source_byte ^ key_byte
            accumulator.append(obfuscated_byte)
            inner += 1
            i += 1
        outer += 1
    while i < len(byte_data):
        accumulator.append(byte_data[i])
        i += 1
    return bytes(accumulator)


[docs]def clip_time_seconds(string):
    """
    Convert the given clip time string in seconds
    (possibly with decimal digits).

    :param string: the clip time string to be converted
    :type  string: str
    :returns:      the clip time in seconds
    :rtype:        float
    """
    if (string == None) or (len(string) < 1):
        return 0
    value = 0
    if "ms" in string:
        value = float(string.replace("ms", "")) * 0.001
    elif "s" in string:
        value = float(string.replace("s", ""))
    elif "h" in string:
        value = float(string.replace("h", "")) * 3600
    elif "min" in string:
        value = float(string.replace("min", "")) * 60
    else:
        v_h = 0
        v_m = 0
        v_s = 0
        v_d = 0
        str_hms = string
        if "." in str_hms:
            str_hms, str_d = str_hms.split(".")
            if len(str_d) > 0:
                v_d = 1.0 * int(str_d) / (10 ** len(str_d))
        arr_hms = str_hms.split(":")
        v_n = len(arr_hms)
        if v_n >= 1:
            v_s = int(arr_hms[-1])
        if v_n >= 2:
            v_m = int(arr_hms[-2])
        if v_n >= 3:
            v_h = int(arr_hms[-3])
        value = v_h * 3600 + v_m * 60 + v_s + v_d
    return value