// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_SAFE_BROWSING_BROWSER_THREAT_DETAILS_H_
#define COMPONENTS_SAFE_BROWSING_BROWSER_THREAT_DETAILS_H_

// A class that encapsulates the detailed threat reports sent when
// users opt-in to do so from the safe browsing warning page.

// An instance of this class is generated when a safe browsing warning page
// is shown (SafeBrowsingBlockingPage).

#include <memory>
#include <string>
#include <unordered_set>
#include <vector>

#include "base/containers/hash_tables.h"
#include "base/gtest_prod_util.h"
#include "base/macros.h"
#include "base/memory/ref_counted.h"
#include "components/safe_browsing/common/safe_browsing.mojom.h"
#include "components/safe_browsing/proto/csd.pb.h"
#include "components/security_interstitials/content/unsafe_resource.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/web_contents_observer.h"
#include "net/base/completion_callback.h"

namespace history {
class HistoryService;
}  // namespace history

namespace network {
class SharedURLLoaderFactory;
}  // namespace network

namespace safe_browsing {

class BaseUIManager;
class ReferrerChainProvider;

// Maps a URL to its Resource.
class ThreatDetailsCacheCollector;
class ThreatDetailsRedirectsCollector;
class ThreatDetailsFactory;

using ResourceMap =
    base::hash_map<std::string,
                   std::unique_ptr<ClientSafeBrowsingReportRequest::Resource>>;

// Maps a key of an HTML element to its corresponding HTMLElement proto message.
// HTML Element keys have the form "<frame_id>-<node_id>", where |frame_id| is
// the FrameTreeNode ID of the frame containing the element, and
// |node_id| is a sequential ID for the element generated by the renderer.
using ElementMap = base::hash_map<std::string, std::unique_ptr<HTMLElement>>;

// Maps the key of an iframe element to the FrameTreeNode ID of the frame that
// rendered the contents of the iframe.
using KeyToFrameTreeIdMap = base::hash_map<std::string, int>;

// Maps a FrameTreeNode ID of a frame to a set of child IDs. The child IDs are
// the Element IDs of the top-level HTML Elements in this frame.
using FrameTreeIdToChildIdsMap = base::hash_map<int, std::unordered_set<int>>;

// Callback used to notify a caller that ThreatDetails has finished creating and
// sending a report.
using ThreatDetailsDoneCallback = base::Callback<void(content::WebContents*)>;

class ThreatDetails : public base::RefCounted<ThreatDetails>,
                      public content::WebContentsObserver {
 public:
  typedef security_interstitials::UnsafeResource UnsafeResource;

  // Constructs a new ThreatDetails instance, using the factory.
  static ThreatDetails* NewThreatDetails(
      BaseUIManager* ui_manager,
      content::WebContents* web_contents,
      const UnsafeResource& resource,
      scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
      history::HistoryService* history_service,
      ReferrerChainProvider* referrer_chain_provider,
      bool trim_to_ad_tags,
      ThreatDetailsDoneCallback done_callback);

  // Makes the passed |factory| the factory used to instantiate
  // SafeBrowsingBlockingPage objects. Useful for tests.
  static void RegisterFactory(ThreatDetailsFactory* factory) {
    factory_ = factory;
  }

  // The SafeBrowsingBlockingPage calls this from the IO thread when
  // the user is leaving the blocking page and has opted-in to sending
  // the report. We start the redirection urls collection from history service
  // in UI thread; then do cache collection back in IO thread. We also record
  // if the user did proceed with the warning page, and how many times user
  // visited this page before. When we are done, we send the report.
  virtual void FinishCollection(bool did_proceed, int num_visits);

  void OnCacheCollectionReady();

  void OnRedirectionCollectionReady();

 protected:
  friend class ThreatDetailsFactoryImpl;
  friend class TestThreatDetailsFactory;

  ThreatDetails(
      BaseUIManager* ui_manager,
      content::WebContents* web_contents,
      const UnsafeResource& resource,
      scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
      history::HistoryService* history_service,
      ReferrerChainProvider* referrer_chain_provider,
      bool trim_to_ad_tags,
      ThreatDetailsDoneCallback done_callback);

  // Default constructor for testing only.
  ThreatDetails();

  ~ThreatDetails() override;

  virtual void AddDOMDetails(const int frame_tree_node_id,
                             std::vector<mojom::ThreatDOMDetailsNodePtr> params,
                             const KeyToFrameTreeIdMap& child_frame_tree_map);

  // The report protocol buffer.
  std::unique_ptr<ClientSafeBrowsingReportRequest> report_;

  // Used to get a pointer to the HTTP cache.
  scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory_;

 private:
  friend class base::RefCounted<ThreatDetails>;

  // Starts the collection of the report.
  void StartCollection();

  // Whether the url is "public" so we can add it to the report.
  bool IsReportableUrl(const GURL& url) const;

  // Finds an existing Resource for the given url, or creates a new one if not
  // found, and adds it to |resources_|. Returns the found/created resource.
  ClientSafeBrowsingReportRequest::Resource* FindOrCreateResource(
      const GURL& url);

  // Finds an existing HTMLElement for a given key, or creates a new one if not
  // found and adds it to |elements_|. Returns the found/created element.
  HTMLElement* FindOrCreateElement(const std::string& element_key);

  // Adds a Resource to resources_ with the given parent-child
  // relationship. |parent| and |tagname| can be empty, |children| can be NULL.
  // Returns the Resource that was affected, or null if no work was done.
  ClientSafeBrowsingReportRequest::Resource* AddUrl(
      const GURL& url,
      const GURL& parent,
      const std::string& tagname,
      const std::vector<GURL>* children);

  void RequestThreatDOMDetails(content::RenderFrameHost* frame);

  void OnReceivedThreatDOMDetails(
      mojom::ThreatReporterPtr threat_reporter,
      content::RenderFrameHost* sender,
      std::vector<mojom::ThreatDOMDetailsNodePtr> params);

  void AddRedirectUrlList(const std::vector<GURL>& urls);

  // Adds an HTML Element to the DOM structure. |frame_tree_node_id| is the
  // unique ID of the frame the element came from. |element_node_id| is a unique
  // ID of the element within the frame. |tag_name| is the tag of the element.
  // |parent_element_node_id| is the unique ID of the parent element within the
  // frame. |attributes| contains the names and values of the element's
  // attributes. |resource| is set if this element is a resource.
  void AddDomElement(const int frame_tree_node_id,
                     const int element_node_id,
                     const std::string& tag_name,
                     const int parent_element_node_id,
                     const std::vector<mojom::AttributeNameValuePtr> attributes,
                     const ClientSafeBrowsingReportRequest::Resource* resource);

  // Populates the referrer chain data in |report_|. This may be skipped if the
  // referrer chain provider isn't available, or the type of report doesn't
  // include the referrer chain.
  void MaybeFillReferrerChain();

  // Called when the report is complete. Runs |done_callback_|.
  void AllDone();

  scoped_refptr<BaseUIManager> ui_manager_;

  const UnsafeResource resource_;

  ReferrerChainProvider* referrer_chain_provider_;

  // For every Url we collect we create a Resource message. We keep
  // them in a map so we can avoid duplicates.
  ResourceMap resources_;

  // Store all HTML elements collected, keep them in a map for easy lookup.
  ElementMap elements_;

  // For each iframe element encountered we map the key of the iframe to the
  // FrameTreeNode ID of the frame containing the contents of that iframe.
  // We populate this map when receiving results from ThreatDomDetails, and use
  // it in a second pass (after FinishCollection) to attach children to iframe
  // elements.
  // Should only be accessed on the IO thread.
  KeyToFrameTreeIdMap iframe_key_to_frame_tree_id_map_;

  // When getting a set of elements from a frame, we store the frame's
  // FrameTreeNode ID and a collection of all top-level elements in that frame.
  // It is populated as we receive sets of nodes from different renderers.
  // It is used together with |iframe_key_to_frame_tree_id_map_| in a second
  // pass to insert child elements under their parent iframe elements.
  FrameTreeIdToChildIdsMap frame_tree_id_to_children_map_;

  // Result from the cache extractor.
  bool cache_result_;

  // Whether user did proceed with the safe browsing blocking page or
  // not.
  bool did_proceed_;

  // How many times this user has visited this page before.
  int num_visits_;

  // Keeps track of whether we have an ambiguous DOM in this report. This can
  // happen when the HTML Elements returned by a renderer can't be
  // associated with a parent Element in the parent frame.
  bool ambiguous_dom_;

  // Whether this report should be trimmed down to only ad tags, not the entire
  // page contents. Used for sampling ads.
  bool trim_to_ad_tags_;

  // A vector containing the IDs of the DOM Elements to trim to. If an element
  // ID is in this list, then its siblings and its children should be included
  // in the report. Only populated if this report will be trimmed.
  std::set<int> trimmed_dom_element_ids_;

  // The factory used to instantiate SafeBrowsingBlockingPage objects.
  // Useful for tests, so they can provide their own implementation of
  // SafeBrowsingBlockingPage.
  static ThreatDetailsFactory* factory_;

  // Used to collect details from the HTTP Cache.
  scoped_refptr<ThreatDetailsCacheCollector> cache_collector_;

  // Used to collect redirect urls from the history service
  scoped_refptr<ThreatDetailsRedirectsCollector> redirects_collector_;

  // Callback to run when the report is finished.
  ThreatDetailsDoneCallback done_callback_;

  // Whether this ThreatDetails has begun finalizing the report and is expected
  // to invoke |done_callback_| when it finishes.
  bool all_done_expected_;

  // Whether the |done_callback_| has been invoked.
  bool is_all_done_;

  FRIEND_TEST_ALL_PREFIXES(ThreatDetailsTest, HistoryServiceUrls);
  FRIEND_TEST_ALL_PREFIXES(ThreatDetailsTest, HttpsResourceSanitization);
  FRIEND_TEST_ALL_PREFIXES(ThreatDetailsTest, HTTPCacheNoEntries);
  FRIEND_TEST_ALL_PREFIXES(ThreatDetailsTest, HTTPCache);
  FRIEND_TEST_ALL_PREFIXES(ThreatDetailsTest, ThreatDOMDetails_AmbiguousDOM);
  FRIEND_TEST_ALL_PREFIXES(ThreatDetailsTest,
                           ThreatDOMDetails_EmptyReportNotSent);
  FRIEND_TEST_ALL_PREFIXES(ThreatDetailsTest, ThreatDOMDetails_MultipleFrames);
  FRIEND_TEST_ALL_PREFIXES(ThreatDetailsTest, ThreatDOMDetails_TrimToAdTags);
  FRIEND_TEST_ALL_PREFIXES(ThreatDetailsTest, ThreatDOMDetails);

  DISALLOW_COPY_AND_ASSIGN(ThreatDetails);
};

// Factory for creating ThreatDetails.  Useful for tests.
class ThreatDetailsFactory {
 public:
  virtual ~ThreatDetailsFactory() {}

  virtual ThreatDetails* CreateThreatDetails(
      BaseUIManager* ui_manager,
      content::WebContents* web_contents,
      const security_interstitials::UnsafeResource& unsafe_resource,
      scoped_refptr<network::SharedURLLoaderFactory> url_loader_factory,
      history::HistoryService* history_service,
      ReferrerChainProvider* referrer_chain_provider,
      bool trim_to_ad_tags,
      ThreatDetailsDoneCallback done_callback) = 0;
};

}  // namespace safe_browsing

#endif  // COMPONENTS_SAFE_BROWSING_BROWSER_THREAT_DETAILS_H_
