#!/usr/bin/env python
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""
Parses a JSON file listing the known Certificate Transparency logs
(log_list.json) and generates a C++ header file to be included in Firefox.

The current log_list.json file available under security/manager/tools
was originally downloaded from
https://www.certificate-transparency.org/known-logs
and edited to include the disqualification time for the disqualified logs using
https://cs.chromium.org/chromium/src/net/cert/ct_known_logs_static-inc.h
"""

from __future__ import print_function
from string import Template
import argparse
import base64
import datetime
import json
import os.path
import sys
import textwrap
import urllib2


OUTPUT_TEMPLATE = """\
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/* This file was automatically generated by $prog. */

#ifndef $include_guard
#define $include_guard

#include "CTLog.h"

#include <stddef.h>

struct CTLogInfo
{
  // See bug 1338873 about making these fields const.
  const char* name;
  // Index within kCTLogOperatorList.
  mozilla::ct::CTLogStatus status;
  // 0 for qualified logs, disqualification time for disqualified logs
  // (in milliseconds, measured since the epoch, ignoring leap seconds).
  uint64_t disqualificationTime;
  size_t operatorIndex;
  const char* key;
  size_t keyLength;
};

struct CTLogOperatorInfo
{
  // See bug 1338873 about making these fields const.
  const char* name;
  mozilla::ct::CTLogOperatorId id;
};

const CTLogInfo kCTLogList[] = {
$logs
};

const CTLogOperatorInfo kCTLogOperatorList[] = {
$operators
};

#endif // $include_guard
"""


def get_disqualification_time(time_str):
    """
    Convert a time string such as "2017-01-01T00:00:00Z" to an integer
    representing milliseconds since the epoch.
    Timezones in the string are not supported and will result in an exception.
    """
    t = datetime.datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%SZ")
    epoch = datetime.datetime.utcfromtimestamp(0)
    seconds_since_epoch = (t - epoch).total_seconds()
    return int(seconds_since_epoch * 1000)


def get_hex_lines(blob, width):
    """ Convert a binary string to a multiline text of C escape sequences. """
    text = "".join(["\\x{:02x}".format(ord(c)) for c in blob])
    # When escaped, a single byte takes 4 chars (e.g. "\x00").
    # Make sure we don't break an escaped byte between the lines.
    return textwrap.wrap(text, width - width % 4)


def get_operator_and_index(json_data, operator_id):
    """ Return operator's entry from the JSON along with its array index. """
    matches = [(operator, index) for (index, operator) in enumerate(
        json_data["operators"]) if operator["id"] == operator_id]
    assert len(matches) != 0, "No operators with id {0} defined.".format(
        operator_id)
    assert len(matches) == 1, "Found multiple operators with id {0}.".format(
        operator_id)
    return matches[0]


def get_log_info_structs(json_data):
    """ Return array of CTLogInfo initializers for the known logs. """
    tmpl = Template(textwrap.dedent("""\
          { $description,
            $status,
            $disqualification_time, // $disqualification_time_comment
            $operator_index, // $operator_comment
        $indented_log_key,
            $log_key_len }"""))
    initializers = []
    for log in json_data["logs"]:
        log_key = base64.decodestring(log["key"])
        # "operated_by" is a list, we assume here it always contains one item.
        operated_by = log["operated_by"]
        assert len(operated_by) == 1, "operated_by must contain one item."
        operator, operator_index = get_operator_and_index(json_data,
                                                          operated_by[0])
        if "disqualification_time" in log:
            status = "mozilla::ct::CTLogStatus::Disqualified"
            disqualification_time = get_disqualification_time(
                log["disqualification_time"])
            disqualification_time_comment = 'Date.parse("{0}")'.format(
                log["disqualification_time"])
        else:
            status = "mozilla::ct::CTLogStatus::Included"
            disqualification_time = 0
            disqualification_time_comment = "no disqualification time"
        is_test_log = "test_only" in operator and operator["test_only"]
        prefix = ""
        suffix = ","
        if is_test_log:
            prefix = "#ifdef DEBUG\n"
            suffix = ",\n#endif // DEBUG"
        toappend = tmpl.substitute(
            # Use json.dumps for C-escaping strings.
            # Not perfect but close enough.
            description=json.dumps(log["description"]),
            operator_index=operator_index,
            operator_comment="operated by {0}".
            # The comment must not contain "/".
            format(operator["name"]).replace("/", "|"),
            status=status,
            disqualification_time=disqualification_time,
            disqualification_time_comment=disqualification_time_comment,
            # Maximum line width is 80.
            indented_log_key="\n".
            join(['    "{0}"'.format(l) for l in get_hex_lines(log_key, 74)]),
            log_key_len=len(log_key))
        initializers.append(prefix + toappend + suffix)
    return initializers


def get_log_operator_structs(json_data):
    """ Return array of CTLogOperatorInfo initializers. """
    tmpl = Template("  { $name, $id }")
    initializers = []
    for operator in json_data["operators"]:
        prefix = ""
        suffix = ","
        is_test_log = "test_only" in operator and operator["test_only"]
        if is_test_log:
            prefix = "#ifdef DEBUG\n"
            suffix = ",\n#endif // DEBUG"
        toappend = tmpl.substitute(
            name=json.dumps(operator["name"]),
            id=operator["id"])
        initializers.append(prefix + toappend + suffix)
    return initializers


def generate_cpp_header_file(json_data, out_file):
    """ Generate the C++ header file for the known logs. """
    filename = os.path.basename(out_file.name)
    include_guard = filename.replace(".", "_").replace("/", "_")
    log_info_initializers = get_log_info_structs(json_data)
    operator_info_initializers = get_log_operator_structs(json_data)
    out_file.write(Template(OUTPUT_TEMPLATE).substitute(
        prog=os.path.basename(sys.argv[0]),
        include_guard=include_guard,
        logs="\n".join(log_info_initializers),
        operators="\n".join(operator_info_initializers)))

def patch_in_test_logs(json_data):
    """ Insert Mozilla-specific test log data. """
    max_id = 0
    for operator in json_data["operators"]:
        if operator["id"] > max_id:
            max_id = operator["id"]
    mozilla_test_operator_1 = {"name": "Mozilla Test Org 1", "id": max_id + 1,
        "test_only": True}
    mozilla_test_operator_2 = {"name": "Mozilla Test Org 2", "id": max_id + 2,
        "test_only": True}
    json_data["operators"].append(mozilla_test_operator_1)
    json_data["operators"].append(mozilla_test_operator_2)
    # The easiest way to get this is
    # `openssl x509 -noout -pubkey -in <path/to/default-ee.pem>`
    mozilla_rsa_log_1 = {"description": "Mozilla Test RSA Log 1",
        "key": """
            MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAuohRqESOFtZB/W62iAY2
            ED08E9nq5DVKtOz1aFdsJHvBxyWo4NgfvbGcBptuGobya+KvWnVramRxCHqlWqdF
            h/cc1SScAn7NQ/weadA4ICmTqyDDSeTbuUzCa2wO7RWCD/F+rWkasdMCOosqQe6n
            cOAPDY39ZgsrsCSSpH25iGF5kLFXkD3SO8XguEgfqDfTiEPvJxbYVbdmWqp+ApAv
            OnsQgAYkzBxsl62WYVu34pYSwHUxowyR3bTK9/ytHSXTCe+5Fw6naOGzey8ib2nj
            tIqVYR3uJtYlnauRCE42yxwkBCy/Fosv5fGPmRcxuLP+SSP6clHEMdUDrNoYCjXt
            jQIDAQAB
        """,
        "operated_by": [max_id + 1]}
    # Similarly,
    # `openssl x509 -noout -pubkey -in <path/to/other-test-ca.pem>`
    mozilla_rsa_log_2 = {"description": "Mozilla Test RSA Log 2",
        "key": """
            MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAwXXGUmYJn3cIKmeR8bh2
            w39c5TiwbErNIrHL1G+mWtoq3UHIwkmKxKOzwfYUh/QbaYlBvYClHDwSAkTFhKTE
            SDMF5ROMAQbPCL6ahidguuai6PNvI8XZgxO53683g0XazlHU1tzSpss8xwbrzTBw
            7JjM5AqlkdcpWn9xxb5maR0rLf7ISURZC8Wj6kn9k7HXU0BfF3N2mZWGZiVHl+1C
            aQiICBFCIGmYikP+5Izmh4HdIramnNKDdRMfkysSjOKG+n0lHAYq0n7wFvGHzdVO
            gys1uJMPdLqQqovHYWckKrH9bWIUDRjEwLjGj8N0hFcyStfehuZVLx0eGR1xIWjT
            uwIDAQAB
        """,
        "operated_by": [max_id + 2]}
    # `openssl x509 -noout -pubkey -in <path/to/root_secp256r1_256.pem`
    mozilla_ec_log = {"description": "Mozilla Test EC Log",
        "key": """
            MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAET7+7u2Hg+PmxpgpZrIcE4uwFC0I+
            PPcukj8sT3lLRVwqadIzRWw2xBGdBwbgDu3I0ZOQ15kbey0HowTqoEqmwA==
        """,
        "operated_by": [max_id + 1]}
    json_data["logs"].append(mozilla_rsa_log_1)
    json_data["logs"].append(mozilla_rsa_log_2)
    json_data["logs"].append(mozilla_ec_log)

def run(args):
    """
    Load the input JSON file and generate the C++ header according to the
    command line arguments.
    """
    if args.file:
        print("Reading file: ", args.file)
        with open(args.file, "rb") as json_file:
            json_text = json_file.read()
    elif args.url:
        print("Fetching URL: ", args.url)
        json_request = urllib2.urlopen(args.url)
        try:
            json_text = json_request.read()
        finally:
            json_request.close()

    json_data = json.loads(json_text)

    print("Writing output: ", args.out)

    patch_in_test_logs(json_data)

    with open(args.out, "w") as out_file:
        generate_cpp_header_file(json_data, out_file)

    print("Done.")


def parse_arguments_and_run():
    """ Parse the command line arguments and run the program. """
    arg_parser = argparse.ArgumentParser(
        description="Parses a JSON file listing the known "
        "Certificate Transparency logs and generates "
        "a C++ header file to be included in Firefox.",
        epilog="Example: python %s --url" % os.path.basename(sys.argv[0]))

    source_group = arg_parser.add_mutually_exclusive_group(required=True)
    source_group.add_argument("--file", nargs="?",
                              const="log_list.json",
                              help="Read the known CT logs JSON data from the "
                              "specified local file (%(const)s by default).")
    source_group.add_argument("--url",
                              help="Download the known CT logs JSON file "
                              "from the specified URL.")

    arg_parser.add_argument("--out",
                            default="../../certverifier/CTKnownLogs.h",
                            help="Path and filename of the header file "
                            "to be generated. Defaults to %(default)s")

    run(arg_parser.parse_args())


if __name__ == "__main__":
    parse_arguments_and_run()
