/*
 * Copyright (C) 2009 Google Inc. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 *     * Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above
 * copyright notice, this list of conditions and the following disclaimer
 * in the documentation and/or other materials provided with the
 * distribution.
 *     * Neither the name of Google Inc. nor the names of its
 * contributors may be used to endorse or promote products derived from
 * this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "public/web/WebFrameSerializer.h"

#include "core/HTMLNames.h"
#include "core/dom/Document.h"
#include "core/dom/Element.h"
#include "core/frame/Frame.h"
#include "core/frame/FrameSerializer.h"
#include "core/frame/LocalFrame.h"
#include "core/frame/RemoteFrame.h"
#include "core/html/HTMLAllCollection.h"
#include "core/html/HTMLFrameElementBase.h"
#include "core/html/HTMLFrameOwnerElement.h"
#include "core/html/HTMLInputElement.h"
#include "core/html/HTMLTableElement.h"
#include "core/loader/DocumentLoader.h"
#include "platform/Histogram.h"
#include "platform/SerializedResource.h"
#include "platform/SharedBuffer.h"
#include "platform/mhtml/MHTMLArchive.h"
#include "platform/mhtml/MHTMLParser.h"
#include "platform/network/ResourceRequest.h"
#include "platform/network/ResourceResponse.h"
#include "platform/tracing/TraceEvent.h"
#include "platform/weborigin/KURL.h"
#include "public/platform/WebString.h"
#include "public/platform/WebURL.h"
#include "public/platform/WebURLResponse.h"
#include "public/platform/WebVector.h"
#include "public/web/WebDataSource.h"
#include "public/web/WebDocument.h"
#include "public/web/WebFrame.h"
#include "public/web/WebFrameSerializerCacheControlPolicy.h"
#include "public/web/WebFrameSerializerClient.h"
#include "web/WebFrameSerializerImpl.h"
#include "web/WebLocalFrameImpl.h"
#include "web/WebRemoteFrameImpl.h"
#include "wtf/Assertions.h"
#include "wtf/HashMap.h"
#include "wtf/HashSet.h"
#include "wtf/Noncopyable.h"
#include "wtf/Vector.h"
#include "wtf/text/StringConcatenate.h"

namespace blink {

namespace {

class MHTMLFrameSerializerDelegate final : public FrameSerializer::Delegate {
  WTF_MAKE_NONCOPYABLE(MHTMLFrameSerializerDelegate);

 public:
  explicit MHTMLFrameSerializerDelegate(
      WebFrameSerializer::MHTMLPartsGenerationDelegate&);
  bool shouldIgnoreAttribute(const Attribute&) override;
  bool rewriteLink(const Element&, String& rewrittenLink) override;
  bool shouldSkipResourceWithURL(const KURL&) override;
  bool shouldSkipResource(const Resource&) override;

 private:
  WebFrameSerializer::MHTMLPartsGenerationDelegate& m_webDelegate;
};

MHTMLFrameSerializerDelegate::MHTMLFrameSerializerDelegate(
    WebFrameSerializer::MHTMLPartsGenerationDelegate& webDelegate)
    : m_webDelegate(webDelegate) {}

bool MHTMLFrameSerializerDelegate::shouldIgnoreAttribute(
    const Attribute& attribute) {
  // TODO(fgorski): Presence of srcset attribute causes MHTML to not display
  // images, as only the value of src is pulled into the archive. Discarding
  // srcset prevents the problem. Long term we should make sure to MHTML plays
  // nicely with srcset.
  return attribute.localName() == HTMLNames::srcsetAttr;
}

bool MHTMLFrameSerializerDelegate::rewriteLink(const Element& element,
                                               String& rewrittenLink) {
  if (!element.isFrameOwnerElement())
    return false;

  auto* frameOwnerElement = toHTMLFrameOwnerElement(&element);
  Frame* frame = frameOwnerElement->contentFrame();
  if (!frame)
    return false;

  WebString contentID = m_webDelegate.getContentID(WebFrame::fromFrame(frame));
  if (contentID.isNull())
    return false;

  KURL cidURI = MHTMLParser::convertContentIDToURI(contentID);
  DCHECK(cidURI.isValid());

  if (isHTMLFrameElementBase(&element)) {
    rewrittenLink = cidURI.getString();
    return true;
  }

  if (isHTMLObjectElement(&element)) {
    Document* doc = frameOwnerElement->contentDocument();
    bool isHandledBySerializer = doc->isHTMLDocument() ||
                                 doc->isXHTMLDocument() ||
                                 doc->isImageDocument();
    if (isHandledBySerializer) {
      rewrittenLink = cidURI.getString();
      return true;
    }
  }

  return false;
}

bool MHTMLFrameSerializerDelegate::shouldSkipResourceWithURL(const KURL& url) {
  return m_webDelegate.shouldSkipResource(url);
}

bool MHTMLFrameSerializerDelegate::shouldSkipResource(
    const Resource& resource) {
  return m_webDelegate.cacheControlPolicy() ==
             WebFrameSerializerCacheControlPolicy::
                 SkipAnyFrameOrResourceMarkedNoStore &&
         resource.hasCacheControlNoStoreHeader();
}

bool cacheControlNoStoreHeaderPresent(
    const WebLocalFrameImpl& webLocalFrameImpl) {
  const ResourceResponse& response =
      webLocalFrameImpl.dataSource()->response().toResourceResponse();
  if (response.cacheControlContainsNoStore())
    return true;

  const ResourceRequest& request =
      webLocalFrameImpl.dataSource()->request().toResourceRequest();
  return request.cacheControlContainsNoStore();
}

bool frameShouldBeSerializedAsMHTML(
    WebLocalFrame* frame,
    WebFrameSerializerCacheControlPolicy cacheControlPolicy) {
  WebLocalFrameImpl* webLocalFrameImpl = toWebLocalFrameImpl(frame);
  DCHECK(webLocalFrameImpl);

  if (cacheControlPolicy == WebFrameSerializerCacheControlPolicy::None)
    return true;

  bool needToCheckNoStore =
      cacheControlPolicy == WebFrameSerializerCacheControlPolicy::
                                SkipAnyFrameOrResourceMarkedNoStore ||
      (!frame->parent() &&
       cacheControlPolicy ==
           WebFrameSerializerCacheControlPolicy::FailForNoStoreMainFrame);

  if (!needToCheckNoStore)
    return true;

  return !cacheControlNoStoreHeaderPresent(*webLocalFrameImpl);
}

}  // namespace

WebThreadSafeData WebFrameSerializer::generateMHTMLHeader(
    const WebString& boundary,
    WebLocalFrame* frame,
    MHTMLPartsGenerationDelegate* delegate) {
  TRACE_EVENT0("page-serialization", "WebFrameSerializer::generateMHTMLHeader");
  DCHECK(frame);
  DCHECK(delegate);

  if (!frameShouldBeSerializedAsMHTML(frame, delegate->cacheControlPolicy()))
    return WebThreadSafeData();

  WebLocalFrameImpl* webLocalFrameImpl = toWebLocalFrameImpl(frame);
  DCHECK(webLocalFrameImpl);

  Document* document = webLocalFrameImpl->frame()->document();

  RefPtr<RawData> buffer = RawData::create();
  MHTMLArchive::generateMHTMLHeader(boundary, document->title(),
                                    document->suggestedMIMEType(),
                                    *buffer->mutableData());
  return buffer.release();
}

WebThreadSafeData WebFrameSerializer::generateMHTMLParts(
    const WebString& boundary,
    WebLocalFrame* webFrame,
    MHTMLPartsGenerationDelegate* webDelegate) {
  TRACE_EVENT0("page-serialization", "WebFrameSerializer::generateMHTMLParts");
  DCHECK(webFrame);
  DCHECK(webDelegate);

  if (!frameShouldBeSerializedAsMHTML(webFrame,
                                      webDelegate->cacheControlPolicy()))
    return WebThreadSafeData();

  // Translate arguments from public to internal blink APIs.
  LocalFrame* frame = toWebLocalFrameImpl(webFrame)->frame();
  MHTMLArchive::EncodingPolicy encodingPolicy =
      webDelegate->useBinaryEncoding()
          ? MHTMLArchive::EncodingPolicy::UseBinaryEncoding
          : MHTMLArchive::EncodingPolicy::UseDefaultEncoding;

  // Serialize.
  TRACE_EVENT_BEGIN0("page-serialization",
                     "WebFrameSerializer::generateMHTMLParts serializing");
  Vector<SerializedResource> resources;
  {
    SCOPED_BLINK_UMA_HISTOGRAM_TIMER(
        "PageSerialization.MhtmlGeneration.SerializationTime.SingleFrame");
    MHTMLFrameSerializerDelegate coreDelegate(*webDelegate);
    FrameSerializer serializer(resources, coreDelegate);
    serializer.serializeFrame(*frame);
  }
  TRACE_EVENT_END1("page-serialization",
                   "WebFrameSerializer::generateMHTMLParts serializing",
                   "resource count",
                   static_cast<unsigned long long>(resources.size()));

  // Get Content-ID for the frame being serialized.
  String frameContentID = webDelegate->getContentID(webFrame);

  // Encode serializer's output as MHTML.
  RefPtr<RawData> output = RawData::create();
  {
    SCOPED_BLINK_UMA_HISTOGRAM_TIMER(
        "PageSerialization.MhtmlGeneration.EncodingTime.SingleFrame");
    bool isFirstResource = true;
    for (const SerializedResource& resource : resources) {
      TRACE_EVENT0("page-serialization",
                   "WebFrameSerializer::generateMHTMLParts encoding");
      // Frame is the 1st resource (see FrameSerializer::serializeFrame doc
      // comment). Frames get a Content-ID header.
      String contentID = isFirstResource ? frameContentID : String();

      MHTMLArchive::generateMHTMLPart(boundary, contentID, encodingPolicy,
                                      resource, *output->mutableData());

      isFirstResource = false;
    }
  }
  return output.release();
}

WebThreadSafeData WebFrameSerializer::generateMHTMLFooter(
    const WebString& boundary) {
  TRACE_EVENT0("page-serialization", "WebFrameSerializer::generateMHTMLFooter");
  RefPtr<RawData> buffer = RawData::create();
  MHTMLArchive::generateMHTMLFooter(boundary, *buffer->mutableData());
  return buffer.release();
}

bool WebFrameSerializer::serialize(
    WebLocalFrame* frame,
    WebFrameSerializerClient* client,
    WebFrameSerializer::LinkRewritingDelegate* delegate) {
  WebFrameSerializerImpl serializerImpl(frame, client, delegate);
  return serializerImpl.serialize();
}

WebString WebFrameSerializer::generateMetaCharsetDeclaration(
    const WebString& charset) {
  // TODO(yosin) We should call |FrameSerializer::metaCharsetDeclarationOf()|.
  String charsetString =
      "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=" +
      static_cast<const String&>(charset) + "\">";
  return charsetString;
}

WebString WebFrameSerializer::generateMarkOfTheWebDeclaration(
    const WebURL& url) {
  StringBuilder builder;
  builder.append("\n<!-- ");
  builder.append(FrameSerializer::markOfTheWebDeclaration(url));
  builder.append(" -->\n");
  return builder.toString();
}

WebString WebFrameSerializer::generateBaseTagDeclaration(
    const WebString& baseTarget) {
  // TODO(yosin) We should call |FrameSerializer::baseTagDeclarationOf()|.
  if (baseTarget.isEmpty())
    return String("<base href=\".\">");
  String baseString = "<base href=\".\" target=\"" +
                      static_cast<const String&>(baseTarget) + "\">";
  return baseString;
}

}  // namespace blink
