#include "stdafx.h"

void PrintGraphStat(const PNGraph& G) {
  PNGraph WCC = TSnap::GetMxWcc(G);
  PNGraph SCC = TSnap::GetMxScc(G);
  TFltPrV DegCCfV;
  int ClosedTriads, OpenTriads, FullDiam;
  double EffDiam;
  printf("Nodes\t%d\n", G->GetNodes());
  printf("Edges\t%d\n", G->GetEdges());
  printf("Nodes in largest WCC\t%d (%.3f)\n", WCC->GetNodes(), WCC->GetNodes()/double(G->GetNodes()));
  printf("Edges in largest WCC\t%d (%.3f)\n", WCC->GetEdges(), WCC->GetEdges()/double(G->GetEdges()));
  printf("Nodes in largest SCC\t%d (%.3f)\n", SCC->GetNodes(), SCC->GetNodes()/double(G->GetNodes()));
  printf("Edges in largest SCC\t%d (%.3f)\n", SCC->GetEdges(), SCC->GetEdges()/double(G->GetEdges()));
  const double CCF = TSnap::GetClustCf(G, DegCCfV, ClosedTriads, OpenTriads);
  printf("Average clustering coefficient\t%.4f\n", CCF);
  printf("Number of triangles\t%d\n", ClosedTriads);
  printf("Fraction of closed triangles\t%.4g\n", ClosedTriads/double(ClosedTriads+OpenTriads));
  TSnap::GetBfsEffDiam(G, 1000, false, EffDiam, FullDiam);
  printf("Diameter (longest shortest path)\t%d\n", FullDiam);
  printf("90-percentile effective diameter\t%.2g\n", EffDiam);
}

void PrintGraphStatTable(const PNGraph& G, TStr OutFNm, TStr Desc="") {
  TFltPrV DegCCfV;
  int ClosedTriads, OpenTriads, FullDiam;
  double EffDiam;
  TSnap::PrintInfo(G, OutFNm);
  TExeTm ExeTm; printf("C");
  const double CCF = TSnap::GetClustCf(G, DegCCfV, ClosedTriads, OpenTriads);
  printf("[%s]D", ExeTm.GetStr());
  TSnap::GetBfsEffDiam(G, 1000, false, EffDiam, FullDiam);
  printf("[%s]CC", ExeTm.GetStr());
  PNGraph WCC = TSnap::GetMxWcc(G);
  PNGraph SCC = TSnap::GetMxScc(G);
  printf("[%s]\n", ExeTm.GetStr());
  FILE* F = stdout;
  if (! OutFNm.Empty()) {
    F = fopen(TStr::Fmt("%s.html", OutFNm.CStr()).CStr(), "wt"); }
  fprintf(F, "\n");
  fprintf(F, "<table id=\"datatab\" summary=\"Dataset statistics\">\n");
  fprintf(F, "  <tr> <th colspan=\"2\">Dataset statistics</th> </tr>\n");
  fprintf(F, "  <tr><td>Nodes</td> <td>%d</td></tr>\n", G->GetNodes());
  fprintf(F, "  <tr><td>Edges</td> <td>%d</td></tr>\n", G->GetEdges());
  fprintf(F, "  <tr><td>Nodes in largest WCC</td> <td>%d (%.3f)</td></tr>\n", WCC->GetNodes(), WCC->GetNodes()/double(G->GetNodes()));
  fprintf(F, "  <tr><td>Edges in largest WCC</td> <td>%d (%.3f)</td></tr>\n", WCC->GetEdges(), WCC->GetEdges()/double(G->GetEdges()));
  fprintf(F, "  <tr><td>Nodes in largest SCC</td> <td>%d (%.3f)</td></tr>\n", SCC->GetNodes(), SCC->GetNodes()/double(G->GetNodes()));
  fprintf(F, "  <tr><td>Edges in largest SCC</td> <td>%d (%.3f)</td></tr>\n", SCC->GetEdges(), SCC->GetEdges()/double(G->GetEdges()));
  fprintf(F, "  <tr><td>Average clustering coefficient</td> <td>%.4f</td></tr>\n", CCF);
  fprintf(F, "  <tr><td>Number of triangles</td> <td>%d</td></tr>\n", ClosedTriads);
  fprintf(F, "  <tr><td>Fraction of closed triangles</td> <td>%.4g</td></tr>\n", ClosedTriads/double(ClosedTriads+OpenTriads));
  fprintf(F, "  <tr><td>Diameter (longest shortest path)</td> <td>%d</td></tr>\n", FullDiam);
  fprintf(F, "  <tr><td>90-percentile effective diameter</td> <td>%.2g</td></tr>\n", EffDiam);
  fprintf(F, "</table>\n");
  fprintf(F, "<br>\n");
  if (! OutFNm.Empty()) {
    fprintf(F, "\n<table id=\"datatab\" summary=\"Table of datasets\">\n");
    fprintf(F, "<tr>\n");
	  fprintf(F, "  <th>File</th>\n");
	  fprintf(F, "  <th>Description</th>\n");
    fprintf(F, "</tr>\n");
    fprintf(F, "<tr>\n");
	  fprintf(F, "  <td><a href=\"%s.txt.gz\">%s.txt.gz</a></td>\n", OutFNm.CStr(), OutFNm.CStr());
	  fprintf(F, "  <td>%s</td>\n", Desc.CStr());
    fprintf(F, "</tr>\n");
    fprintf(F, "</table>\n");
    fclose(F);
    TSnap::SaveEdgeList(G, OutFNm+".txt", Desc);
  }
}

// LiveJournal network from Matt Richardson, ISWC '03
void MakeEpinions() {
  PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\Epinions\\epinions.txt", 0, 1);
  PrintGraphStatTable(G, "soc-Epinions1", "Directed Epinions social network");
}

// LiveJournal network from Lars Backstrom, KDD '06
void MakeLiveJournal1() {
  PNGraph G = TSnap::LoadEdgeListStr<PNGraph>("W:\\Data\\_graphData\\LiveJournal-Lars\\friends.gz", 0, 1);
  PrintGraphStatTable(G, "soc-LiveJournal1", "Directed LiveJournal friednship social network");
}

// Gnutella network from M. Ripeanu, IEEE Internet Computing Journal 2002
void MakeGnutella() {
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Gnutella\\g.04.id.gz", 0, 1);
  PrintGraphStatTable(G, "p2p-Gnutella04", "Directed Gnutella P2P network from August 4 2002"); }
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Gnutella\\g.05.id.gz", 0, 1);
  PrintGraphStatTable(G, "p2p-Gnutella05", "Directed Gnutella P2P network from August 5 2002"); }
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Gnutella\\g.06.id.gz", 0, 1);
  PrintGraphStatTable(G, "p2p-Gnutella06", "Directed Gnutella P2P network from August 6 2002"); }
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Gnutella\\g.08.id.gz", 0, 1);
  PrintGraphStatTable(G, "p2p-Gnutella08", "Directed Gnutella P2P network from August 8 2002"); }
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Gnutella\\g.09.id.gz", 0, 1);
  PrintGraphStatTable(G, "p2p-Gnutella09", "Directed Gnutella P2P network from August 9 2002"); }
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Gnutella\\g.24.id.gz", 0, 1);
  PrintGraphStatTable(G, "p2p-Gnutella24", "Directed Gnutella P2P network from August 24 2002"); }
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Gnutella\\g.25.id.gz", 0, 1);
  PrintGraphStatTable(G, "p2p-Gnutella25", "Directed Gnutella P2P network from August 25 2002"); }
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Gnutella\\g.30.id.gz", 0, 1);
  PrintGraphStatTable(G, "p2p-Gnutella30", "Directed Gnutella P2P network from August 30 2002"); }
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Gnutella\\g.31.id.gz", 0, 1);
  PrintGraphStatTable(G, "p2p-Gnutella31", "Directed Gnutella P2P network from August 31 2002"); }
}

// Webgraphs
void MakeWebGraphs() {
  //{ PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Google\\google_edges.gz", 0, 1);
  //PrintGraphStatTable(G, "web-Google", "Webgraph from the Google programming contest, 2002"); }
  //{ PNGraph G = TSnap::LoadConnList<PNGraph>("W:\\Data\\_graphData\\WWW-Stanford\\StanfordBerkeleyWeb.net");
  //PrintGraphStatTable(G, "web-BerkStan", "Berkely-Stanford web graph from 2002"); }
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\WWW-Stanford\\StanfordWeb.net", 0, 1);
  PrintGraphStatTable(G, "web-Stanford", "Stanford web graph from 2002"); }
  //{ PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\WWW-Barabasi\\www_edges.gz", 0, 1);
  //PrintGraphStatTable(G, "web-NotreDame", "University of Notre Dame web graph from 1999 by Albert, Jeong and Barabasi"); }
}

// road networks of California, Texas and Pennsylvania
void MakeRoadNets() {
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Roards\\CA_edges.gz", 0, 1);
  PrintGraphStatTable(G, "roadNet-CA", "California road network"); }
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Roards\\PA_edges.gz", 0, 1);
  PrintGraphStatTable(G, "roadNet-PA", "Pennsylvania road network"); }
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Roards\\TX_edges.gz", 0, 1);
  PrintGraphStatTable(G, "roadNet-TX", "Texas road network"); }
}

// Arxiv collaboration networks
void MakeCollaborationNets() {
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Arxiv\\Arxiv-CoAuth\\astro-ph.coauth", 0, 1);
  PrintGraphStatTable(G, "CA-AstroPh", "Collaboration network of Arxiv Astro Physics category (there is an edge if authors coauthored at least one paper)"); }
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Arxiv\\Arxiv-CoAuth\\cond-mat.coauth", 0, 1);
  PrintGraphStatTable(G, "CA-CondMat", "Collaboration network of Arxiv Condensed Matter category (there is an edge if authors coauthored at least one paper)"); }
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Arxiv\\Arxiv-CoAuth\\gr-qc.coauth", 0, 1);
  PrintGraphStatTable(G, "CA-GrQc", "Collaboration network of Arxiv General Relativity category (there is an edge if authors coauthored at least one paper)"); }
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Arxiv\\Arxiv-CoAuth\\hep-ph.coauth", 0, 1);
  PrintGraphStatTable(G, "CA-HepPh", "Collaboration network of Arxiv High Energy Physics category (there is an edge if authors coauthored at least one paper)"); }
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Arxiv\\Arxiv-CoAuth\\hep-th.coauth", 0, 1);
  PrintGraphStatTable(G, "CA-HepTh", "Collaboration network of Arxiv High Energy Physics Theory category (there is an edge if authors coauthored at least one paper)"); }
}

// Arxiv citation networks
void MakeArxivCitationNets() {
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Arxiv\\hep-ph-citations", 0, 1);
  PrintGraphStatTable(G, "Cit-HepPh", "Paper citation network of Arxiv High Energy Physics category"); }
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Arxiv\\hep-th-citations", 0, 1);
  PrintGraphStatTable(G, "Cit-HepTh", "Paper citation network of Arxiv High Energy Physics Theory category"); }
}

// US Patent citation network
void MakePatentsCitationNet() {
  { PNGraph G = TSnap::LoadEdgeList<PNGraph>("W:\\Data\\_graphData\\Patents\\cite75_99.zip", 0, 1, ',');
  PrintGraphStatTable(G, "Cit-Patents", "US Patent citation network 1975-1999"); }
}

// Amazon product copurchasing networks
void MakeAmazonCoPurchNets() {
  { PNGraph G = TSnap::LoadEdgeListStr<PNGraph>("W:\\Data\\_graphData\\AmazonAsinNet\\2003_03_02_asin01.out", 0, 1);
  PrintGraphStatTable(G, "Amazon0302", "Amazon product co-purchaisng network from March 02 2003"); }
  { PNGraph G = TSnap::LoadEdgeListStr<PNGraph>("W:\\Data\\_graphData\\AmazonAsinNet\\2003_03_12_asin01.out", 0, 1);
  PrintGraphStatTable(G, "Amazon0312", "Amazon product co-purchaisng network from March 12 2003"); }
  { PNGraph G = TSnap::LoadEdgeListStr<PNGraph>("W:\\Data\\_graphData\\AmazonAsinNet\\2003_05_05_asin01.out", 0, 1);
  PrintGraphStatTable(G, "Amazon0505", "Amazon product co-purchaisng network from May 05 2003"); }
  { PNGraph G = TSnap::LoadEdgeListStr<PNGraph>("W:\\Data\\_graphData\\AmazonAsinNet\\2003_06_01_asin01.out", 0, 1);
  PrintGraphStatTable(G, "Amazon0601", "Amazon product co-purchaisng network from June 01 2003"); }
}

// Email networks
void MakeEmailNets() {
  { PNGraph G = TSnap::LoadEdgeListStr<PNGraph>("W:\\Data\\EmailGraph\\EMailTsactAnon.Txt", 1, 3);
  PrintGraphStatTable(G, "Email-EuAll", "Email network of a large European Research Institution (directed edge means at least one email was sent between October 2003 and March 2005)"); }
  { PNGraph G = TSnap::LoadEdgeListStr<PNGraph>("W:\\Data\\_graphData\\Enron\\Graph\\enron.directed.edges", 0, 1);
  PrintGraphStatTable(G, "Email-Enron", "Enron email network (edge indicated that email was exchanged, undirected edges)"); }
}

// Slashdot network
void MakeSlashdotNet(TStr InFNm, TStr OutFNm, TStr Desc) {
  TSsParser Ss(InFNm, ssfTabSep);
  PNGraph Graph = TNGraph::New();
  TStrHash<TInt> StrSet(Mega(1), true);
  while (Ss.Next()) {
    const int SrcNId = StrSet.AddKey(Ss[0]);
    if (! Graph->IsNode(SrcNId)) { Graph->AddNode(SrcNId); }
    for (int dst = 2; dst < Ss.Len(); dst++) {
      const int DstNId = StrSet.AddKey(Ss[dst]);
      if (! Graph->IsNode(DstNId)) { Graph->AddNode(DstNId); }
      Graph->AddEdge(SrcNId, DstNId);
    }
  }
  PrintGraphStatTable(Graph, OutFNm, Desc);
}

void MakeWikipediaNets() {
  { PNGraph G = TSnap::LoadEdgeListStr<PNGraph>("Wiki-Vote.txt.gz", 0, 1);
  PrintGraphStatTable(G, "wiki-Vote", ""); }
  { PNGraph G = TSnap::LoadEdgeListStr<PNGraph>("Wiki-Talk.txt.gz", 0, 1);
  PrintGraphStatTable(G, "wiki-Talk", ""); }
}

int main(int argc, char* argv[]) {
  //MakeEpinions();
  //MakeLiveJournal1();
  //MakeGnutella();
  //MakeRoadNets();
  //MakeWebGraphs();
  //MakeCollaborationNets();
  //MakeArxivCitationNets();
  //MakeAmazonCoPurchNets();
  //MakePatentsCitationNet();
  //MakeEmailNets();
  //MakeSlashdotNet("W:\\Data\\SlashdotZoo\\slashdot-08nov06.txt", "soc-Slashdot0811", "Slashdot Zoo social network from Noveber 6 2008");
  //MakeSlashdotNet("W:\\Data\\SlashdotZoo\\slashdot-09feb21.txt", "soc-Slashdot0902", "Slashdot Zoo social network from February 0 2009");
  MakeWikipediaNets();

  return 0;
}

