fork download
  1. #include <iostream>
  2. #include <sstream>
  3. #include <vector>
  4. #include <fstream>
  5. #include <igraph.h>
  6. #include <map>
  7. #include <unordered_map>
  8. #include <unordered_set>
  9. #include <queue>
  10. #include <chrono>
  11. #include <climits>
  12. #include "gurobi_c++.h"
  13.  
  14. #define VERBOSE false
  15. #define WRITE_ILPS_TO_FILE false
  16. #define PRINT_COMPONENT_SIZES false
  17. #define PRINT_FACTOR 100
  18. #define ILP_TIME_LIMIT 3600 // in seconds
  19.  
  20. using namespace std;
  21.  
  22. typedef struct edge_attributes_struct{
  23. igraph_integer_t edge_idx;
  24. igraph_real_t label;
  25. igraph_real_t variant;
  26. }edge_attributes;
  27.  
  28.  
  29. void print_variation_graph(const igraph_t *graph){
  30. igraph_vs_t vs;
  31. igraph_vit_t vit;
  32.  
  33. igraph_vs_all(&vs);
  34. igraph_vit_create(graph, vs, &vit);
  35. while (!IGRAPH_VIT_END(vit)) {
  36. cout << IGRAPH_VIT_GET(vit) << ": " << endl;
  37.  
  38. igraph_vector_int_t edges;
  39. igraph_vector_int_init(&edges, 0);
  40. igraph_incident(graph, &edges, IGRAPH_VIT_GET(vit), IGRAPH_OUT);
  41. for(long int i = 0; i < igraph_vector_int_size(&edges); i++){
  42. igraph_integer_t start, end;
  43. igraph_edge(graph, igraph_vector_int_get(&edges, i), &start, &end);
  44. cout << "\t(" << start << ", " << end << ", "
  45. << "label: " << igraph_cattribute_EAN(graph, "label", igraph_vector_int_get(&edges, i)) << ", "
  46. << "variant: " << igraph_cattribute_EAN(graph, "variant" , igraph_vector_int_get(&edges, i)) << ")" << endl;
  47. ;
  48. }
  49. igraph_vector_int_destroy(&edges);
  50. IGRAPH_VIT_NEXT(vit);
  51. cout << endl;
  52. }
  53. cout<< endl;
  54. }
  55.  
  56.  
  57. void print_alignment_graph(igraph_t *graph){
  58.  
  59. cout << "\nPrinting alignment graph:" << endl;
  60. cout << "pos = " << igraph_cattribute_GAN(graph, "position")
  61. << ", substring = " << igraph_cattribute_GAS(graph, "substring") << endl;
  62.  
  63. igraph_vs_t vs;
  64. igraph_vit_t vit;
  65.  
  66. igraph_vs_all(&vs);
  67. igraph_vit_create(graph, vs, &vit);
  68. while (!IGRAPH_VIT_END(vit)) {
  69. cout << IGRAPH_VIT_GET(vit)
  70. << " startend: " << igraph_cattribute_VAN(graph, "startend", IGRAPH_VIT_GET(vit))
  71. << endl;
  72.  
  73. igraph_vector_int_t edges;
  74. igraph_vector_int_init(&edges, 0);
  75. igraph_incident(graph, &edges, IGRAPH_VIT_GET(vit), IGRAPH_OUT);
  76. for(long int i = 0; i < igraph_vector_int_size(&edges); i++){
  77. igraph_integer_t start, end;
  78. igraph_edge(graph, igraph_vector_int_get(&edges, i), &start, &end);
  79. cout << "\t(" << start << ", " << end << ", "
  80. << "weight: " << igraph_cattribute_EAN(graph, "weight", igraph_vector_int_get(&edges, i)) << ", "
  81. << "variant: " << igraph_cattribute_EAN(graph, "variant" , igraph_vector_int_get(&edges, i)) << ", "
  82. //<< "initsol: " << igraph_cattribute_EAN(graph, "initsol" , igraph_vector_int_get(&edges, i))
  83. << ")" << endl;
  84. }
  85. IGRAPH_VIT_NEXT(vit);
  86. cout << endl;
  87. }
  88. cout<< endl;
  89. }
  90.  
  91.  
  92. void read_edge_file(string edge_file_name, igraph_t* graph, int* num_variants){
  93.  
  94. ifstream infile(edge_file_name);
  95.  
  96. int num_vertices = -1;
  97.  
  98. igraph_vector_int_t edges;
  99. igraph_vector_int_init(&edges, 0);
  100.  
  101. int start_vertex, end_vertex;
  102. char edge_label;
  103. string variant_number;
  104.  
  105. vector<edge_attributes> attributes_vector;
  106. int i = 0;
  107. while (infile >> start_vertex >> end_vertex >> edge_label >> variant_number)
  108. {
  109. if(start_vertex > num_vertices-1){
  110. num_vertices = start_vertex + 1;
  111. }
  112. if(end_vertex > num_vertices-1){
  113. num_vertices= end_vertex + 1;
  114. }
  115.  
  116. igraph_real_t label;
  117. if(edge_label == 'A'){
  118. label = 0;
  119. }else if(edge_label == 'T'){
  120. label = 1;
  121. }else if(edge_label == 'C'){
  122. label = 2;
  123. }else if(edge_label == 'G'){
  124. label = 3;
  125. }else if(edge_label == 'N'){
  126. label = 4;
  127. }
  128.  
  129. igraph_real_t variant;
  130.  
  131. if(variant_number != "-"){
  132. attributes_vector.push_back({i, label, stod(variant_number.c_str())});
  133. }else{
  134. attributes_vector.push_back({i, label, -1});
  135. }
  136.  
  137. //add edge
  138. igraph_vector_int_push_back(&edges, start_vertex);
  139. igraph_vector_int_push_back(&edges, end_vertex);
  140. i++;
  141. }
  142.  
  143. bool directed = true;
  144. igraph_empty(graph, num_vertices, directed);
  145. igraph_add_edges(graph, &edges, NULL);
  146. igraph_vector_int_destroy(&edges);
  147.  
  148. int max_variant = -1;
  149. for(edge_attributes a: attributes_vector){
  150. igraph_cattribute_EAN_set(graph, "label", a.edge_idx, a.label);
  151. igraph_cattribute_EAN_set(graph, "variant", a.edge_idx, a.variant);
  152. if(a.variant > max_variant){
  153. max_variant = a.variant;
  154. }
  155. }
  156. *num_variants = max_variant + 1;
  157.  
  158. }
  159.  
  160.  
  161. void write_alignment_graph_to_file(igraph_t *graph, string file_name){
  162.  
  163. string output = to_string(igraph_ecount(graph)) + "\n";
  164.  
  165. igraph_es_t es;
  166. igraph_eit_t eit;
  167.  
  168. igraph_es_all(&es, IGRAPH_EDGEORDER_ID);
  169. igraph_eit_create(graph, es, &eit);
  170.  
  171. int source, sink;
  172.  
  173. while (!IGRAPH_EIT_END(eit)) {
  174. igraph_integer_t start, end;
  175. igraph_edge(graph, IGRAPH_EIT_GET(eit), &start, &end);
  176. int weight = igraph_cattribute_EAN(graph, "weight", IGRAPH_EIT_GET(eit));
  177. int variant = igraph_cattribute_EAN(graph, "variant", IGRAPH_EIT_GET(eit));
  178. int initsol = igraph_cattribute_EAN(graph, "initsol", IGRAPH_EIT_GET(eit));
  179. output += to_string(start) + " " + to_string(end) + " " + to_string(weight) + " " + to_string(variant) + " " + to_string(initsol) + "\n";
  180.  
  181. if(igraph_cattribute_VAN(graph, "startend", start) == 1){
  182. source = start;
  183. }
  184. if(igraph_cattribute_VAN(graph, "startend", end) == 2){
  185. sink = end;
  186. }
  187.  
  188. IGRAPH_EIT_NEXT(eit);
  189. }
  190.  
  191. igraph_es_destroy(&es);
  192. igraph_eit_destroy(&eit);
  193.  
  194. output += to_string(source) + " " + to_string(sink);
  195.  
  196. ofstream out_file(file_name);
  197. out_file << output;
  198. out_file.close();
  199.  
  200.  
  201. }
  202.  
  203.  
  204. void read_alignment_graph_from_file(igraph_t *graph, string file_name){
  205.  
  206. ifstream in_file(file_name);
  207.  
  208. int E;
  209. in_file >> E;
  210.  
  211. if(E == 0){
  212. bool directed = true;
  213. igraph_empty(graph, 0, directed);
  214. return;
  215. }
  216.  
  217. igraph_vector_int_t edges;
  218. igraph_vector_t weights;
  219. igraph_vector_t variants;
  220. igraph_vector_t initsols;
  221.  
  222. igraph_vector_int_init(&edges, 2*E);
  223. igraph_vector_init(&weights, E);
  224. igraph_vector_init(&variants, E);
  225. igraph_vector_init(&initsols, E);
  226.  
  227. int num_vertices = 0;
  228. int start, end;
  229. float weight, variant, initsol;
  230.  
  231. int i = 0;
  232. int j = 0;
  233. while (in_file >> start >> end >> weight >> variant >> initsol){
  234. //cout << start << " " << end << " " << weight << " " << variant << endl;
  235.  
  236. //edges[i] = start;
  237. igraph_vector_int_set(&edges, i, start);
  238.  
  239. //edges[i+1] = end;
  240. igraph_vector_int_set(&edges, i+1, end);
  241.  
  242. i += 2;
  243.  
  244. //weights[j] = weight;
  245. igraph_vector_set(&weights, j, weight);
  246.  
  247. //variants[j] = variant;
  248. igraph_vector_set(&variants, j, variant);
  249.  
  250. igraph_vector_set(&initsols, j, initsol);
  251.  
  252. j++;
  253.  
  254. if(start > num_vertices-1){
  255. num_vertices = start + 1;
  256. }
  257. if(end > num_vertices-1){
  258. num_vertices= end + 1;
  259. }
  260. }
  261.  
  262. in_file.close();
  263.  
  264. bool directed = true;
  265. igraph_empty(graph, num_vertices, directed);
  266. igraph_add_edges(graph, &edges, NULL);
  267.  
  268. igraph_cattribute_EAN_setv(graph, "weight", &weights);
  269. igraph_cattribute_EAN_setv(graph, "variant", &variants);
  270. igraph_cattribute_EAN_setv(graph, "initsol", &initsols);
  271. igraph_cattribute_VAN_set(graph, "startend", start, 1);
  272. igraph_cattribute_VAN_set(graph, "startend", end, 2);
  273.  
  274. igraph_vector_int_destroy(&edges);
  275. igraph_vector_destroy(&weights);
  276. igraph_vector_destroy(&variants);
  277. igraph_vector_destroy(&initsols);
  278.  
  279. }
  280.  
  281.  
  282. void read_pos_substring_file(string pos_string_file_name, vector<int>& positions, vector<string>& substrings){
  283. ifstream infile(pos_string_file_name);
  284. string line;
  285.  
  286. while (std::getline(infile, line)){
  287.  
  288. int pos;
  289. string substring;
  290.  
  291. istringstream iss(line); // separates line over spaces
  292. iss >> pos;
  293.  
  294. if (line.size() > 0){
  295. while (iss >> substring){
  296. positions.push_back(pos);
  297. substrings.push_back(substring);
  298. }
  299. }
  300. }
  301. }
  302.  
  303.  
  304. void get_reachable_vertex(igraph_t* graph, igraph_vs_t vertex_selector, int dist, igraph_vector_int_list_t* reachable_list){
  305.  
  306. igraph_integer_t order = dist;
  307. igraph_integer_t mindist = 0;
  308.  
  309. igraph_neighborhood(graph, reachable_list, vertex_selector, order, IGRAPH_OUT, mindist);
  310. }
  311.  
  312.  
  313. // obtains induced graph in time polylogarithmic in the size of the induced graph
  314. // this is written because the native igraph library requires size proportional to the size of original graph
  315. void get_induced_variation_graph(igraph_t* graph, igraph_vector_int_t* vertex_set, igraph_t* result){
  316. unordered_map<int, int> umap;
  317. int n = igraph_vector_int_size(vertex_set);
  318. for(int i = 0; i < n; i++){
  319. // unordered map records the existence of a vertex and gives it a new index
  320. umap[igraph_vector_int_get(vertex_set, i)] = i;
  321. }
  322.  
  323. // tempory vector to incident edges for each vertex
  324. igraph_vector_int_t incident_edges;
  325. igraph_vector_int_init(&incident_edges, 0);
  326.  
  327. igraph_vector_int_t induced_edges;
  328. igraph_vector_int_init(&induced_edges, 0);
  329.  
  330. vector<edge_attributes> attributes_vector;
  331.  
  332. int new_eid = 0;
  333. for(int i = 0; i < n; i++){
  334. igraph_incident(graph, &incident_edges, igraph_vector_int_get(vertex_set, i), IGRAPH_OUT);
  335.  
  336. //cout << igraph_vector_int_get(vertex_set, i) << "~: ";
  337.  
  338. int m_v = igraph_vector_int_size(&incident_edges);
  339. for (int j = 0; j < m_v; j++){
  340. igraph_integer_t start, end;
  341. int old_eid = igraph_vector_int_get(&incident_edges, j);
  342. igraph_edge(graph, old_eid, &start, &end);
  343.  
  344. //cout << "(" << start << ", " << end << ") ";
  345.  
  346. // check if end is in the vertex_set
  347. if(umap.find(end) != umap.end()){
  348. //add edge
  349. igraph_vector_int_push_back(&induced_edges, umap[start]);
  350. igraph_vector_int_push_back(&induced_edges, umap[end]);
  351. attributes_vector.push_back({new_eid,
  352. igraph_cattribute_EAN(graph, "label", old_eid),
  353. igraph_cattribute_EAN(graph, "variant", old_eid)});
  354. new_eid++;
  355. }
  356. }
  357. //cout << endl;
  358. }
  359. bool directed = true;
  360. igraph_empty(result, n, directed);
  361. igraph_add_edges(result, &induced_edges, NULL);
  362. igraph_vector_int_destroy(&induced_edges);
  363. igraph_vector_int_destroy(&incident_edges);
  364.  
  365. for(edge_attributes a: attributes_vector){
  366. igraph_cattribute_EAN_set(result, "label", a.edge_idx, a.label);
  367. igraph_cattribute_EAN_set(result, "variant", a.edge_idx, a.variant);
  368. }
  369.  
  370. //print_variation_graph(result);
  371. }
  372.  
  373.  
  374. void reverse_weighted_reachable(igraph_t* graph, igraph_integer_t start_vid, igraph_vector_int_t* result, int delta){
  375.  
  376. // topologically sort the vertices of the reversed graph
  377. igraph_vector_int_t sorted_vertex;
  378. igraph_vector_int_init(&sorted_vertex, igraph_vcount(graph));
  379. igraph_topological_sorting(graph, &sorted_vertex, IGRAPH_IN);
  380. int n = igraph_vector_int_size(&sorted_vertex);
  381.  
  382. //cout << "size = " << n << endl;
  383.  
  384. // get start index
  385. int start_idx = -1;
  386.  
  387. //cout << "reverse top order: ";
  388. for(int i = 0; i < n; i++){
  389. //cout << i << ":" << igraph_vector_int_get(&sorted_vertex, i) << ", ";
  390. if(start_vid == igraph_vector_int_get(&sorted_vertex, i)){
  391. start_idx = i;
  392. break;
  393. }
  394. }
  395. //cout << endl;
  396.  
  397. vector<int> distance(n, INT_MAX-1);
  398. //cout << "index of start: " << igraph_vector_int_get(&sorted_vertex, start_idx) << endl;
  399. distance[igraph_vector_int_get(&sorted_vertex, start_idx)] = 0;
  400.  
  401. igraph_vector_int_t incident_edges;
  402. igraph_vector_int_init(&incident_edges, 0);
  403.  
  404. for(int i = start_idx; i < n; i++){
  405. igraph_integer_t vid = igraph_vector_int_get(&sorted_vertex, i);
  406. // update neightbors
  407. igraph_incident(graph, &incident_edges, vid, IGRAPH_IN);
  408. int m = igraph_vector_int_size(&incident_edges);
  409. //cout << "vid = " << vid << endl;
  410. for(int i = 0; i < m; i++){
  411.  
  412. igraph_integer_t start, end;
  413. igraph_edge(graph, igraph_vector_int_get(&incident_edges, i), &start, &end);
  414. int weight = igraph_cattribute_EAN(graph, "weight", igraph_vector_int_get(&incident_edges, i));
  415. //cout << "start = " << start << " end = " << end << " weight = " << weight << endl;
  416.  
  417. if(distance[start] > distance[end] + weight){
  418. distance[start] = distance[end] + weight;
  419. }
  420. }
  421. }
  422.  
  423. igraph_vector_int_destroy(&incident_edges);
  424. igraph_vector_int_destroy(&sorted_vertex);
  425.  
  426. for(int i = 0; i < n; i++){
  427. //cout << i << ":" << distance[i] << ", ";
  428. if(distance[i] <= delta){
  429. igraph_vector_int_push_back(result, i);
  430. }
  431. }
  432.  
  433. }
  434.  
  435.  
  436. /*
  437. The main idea is that we consider the vertices of the alignment graph as laided out in |P||V| grid.
  438. Each row of length |V| is sorted in the same topological order.
  439. We hash x,y coordinates to track if a vertex as already been encountered and store as value the min dist
  440. result graph should not be initialized
  441. */
  442. void create_pruned_alignment_graph(igraph_t* graph, string pattern, int delta, igraph_t* result){
  443.  
  444. // topologically sort the vertices, this will be used to get the delta_x for every edge considered
  445. igraph_vector_int_t sorted_vertex;
  446. igraph_vector_int_init(&sorted_vertex, igraph_vcount(graph));
  447. igraph_topological_sorting(graph, &sorted_vertex, IGRAPH_OUT);
  448. int n = igraph_vector_int_size(&sorted_vertex);
  449. vector<int> inv_sorted_vertex(n);
  450. for(int i = 0; i < n; i++){
  451. inv_sorted_vertex[igraph_vector_int_get(&sorted_vertex, i)] = i;
  452. }
  453.  
  454. /*
  455.   for(int i = 0; i < n; i++){
  456.   cout << igraph_vector_int_get(&sorted_vertex, i) << ", ";
  457.   }
  458.   cout << endl;
  459.   for(int i = 0; i < n; i++){
  460.   cout << inv_sorted_vertex[i] << ", ";
  461.   }
  462.   cout << endl;
  463.   */
  464.  
  465. typedef struct vertex_struct{
  466. int id;
  467. int x;
  468. int y;
  469. int dist;
  470. }vertex;
  471.  
  472. // hash to track seen coordinates, distances are stored here, not in queue
  473. unordered_map<int, vertex> seen_coordinates;
  474.  
  475. igraph_vector_int_t alignment_graph_edges; // start, end, start, end,
  476. vector <int> weights;
  477. vector <int> variants;
  478.  
  479. igraph_vector_int_init(&alignment_graph_edges, 0);
  480.  
  481. int source_vertex_id = 0;
  482. int sink_vertex_id = 0;
  483.  
  484. queue<vertex> q;
  485. vertex u = {0, inv_sorted_vertex[0], 0, 0};
  486. q.push(u);
  487.  
  488. int new_vid = 1;
  489. igraph_vector_int_t incident_edges;
  490. igraph_vector_int_init(&incident_edges, 0);
  491.  
  492. while(!q.empty()){
  493.  
  494. vertex u = q.front();
  495. q.pop();
  496. igraph_integer_t old_uid = igraph_vector_int_get(&sorted_vertex, u.x);
  497.  
  498.  
  499. int u_coord = u.y * n + u.x;
  500. vertex u_ = seen_coordinates[u_coord];
  501. int u_dist = u_.dist;
  502.  
  503. //cout << u.x << ", " << u.y << ": " << u_dist << endl;
  504.  
  505. // vertices on same level (insertion)
  506. if(u_dist + 1 <= delta){
  507.  
  508. igraph_incident(graph, &incident_edges, old_uid, IGRAPH_OUT);
  509. int m = igraph_vector_int_size(&incident_edges);
  510. //cout << "m = " << m << endl;
  511. for(int i = 0; i < m; i++){
  512.  
  513. igraph_integer_t start, end;
  514. igraph_edge(graph, igraph_vector_int_get(&incident_edges, i), &start, &end);
  515.  
  516. int v_x = inv_sorted_vertex[end];
  517. int v_y = u.y;
  518. int v_coordinate = v_y * n + v_x;
  519. int vid;
  520.  
  521. if(seen_coordinates.find(v_coordinate) == seen_coordinates.end()){
  522. // never encounter coordinate
  523. // add vertex with new vertex id and edge
  524.  
  525. vid = new_vid;
  526. seen_coordinates[v_coordinate] = {vid, v_x, v_y, u_dist + 1};
  527. new_vid++;
  528.  
  529.  
  530. // enque vertex
  531. q.push({vid, v_x, v_y, -1});
  532.  
  533. }else{
  534. // coordinate seen before
  535. vertex v = seen_coordinates[v_coordinate];
  536. vid = v.id;
  537.  
  538. // update distance if needed
  539. if(u_dist + 1 < v.dist){
  540. seen_coordinates[v_coordinate] = {vid, v_x, v_y, u_dist + 1};
  541. }
  542. }
  543. igraph_vector_int_push_back(&alignment_graph_edges, u.id);
  544. igraph_vector_int_push_back(&alignment_graph_edges, vid);
  545. weights.push_back(1);
  546. variants.push_back(igraph_cattribute_EAN(graph, "variant", igraph_vector_int_get(&incident_edges, i)));
  547. }
  548. }
  549.  
  550. // deletion edges
  551. if(u.y < pattern.length() && u_dist + 1 <= delta){
  552. int v_x = u.x;
  553. int v_y = u.y + 1;
  554. int v_coordinate = v_y * n + v_x;
  555.  
  556. int vid;
  557. // never seen before
  558. if(seen_coordinates.find(v_coordinate) == seen_coordinates.end()){
  559.  
  560. vid = new_vid;
  561. seen_coordinates[v_coordinate] = {vid, v_x, v_y, u_dist + 1};
  562. new_vid++;
  563.  
  564. // enque vertex
  565. q.push({vid, v_x, v_y, -1});
  566.  
  567. }else{
  568. // coordinate seen before
  569. vertex v = seen_coordinates[v_coordinate];
  570. vid = v.id;
  571.  
  572. // update distance if needed
  573. if(u_dist + 1 < v.dist){
  574. seen_coordinates[v_coordinate] = {vid, v_x, v_y, u_dist + 1};
  575. }
  576. }
  577. igraph_vector_int_push_back(&alignment_graph_edges, u.id);
  578. igraph_vector_int_push_back(&alignment_graph_edges, vid);
  579. weights.push_back(1);
  580. variants.push_back(-2);
  581. }
  582.  
  583. // sub/match edges
  584.  
  585. if(u.y < pattern.length()){
  586. igraph_incident(graph, &incident_edges, old_uid, IGRAPH_OUT);
  587. int m = igraph_vector_int_size(&incident_edges);
  588.  
  589. for(int i = 0; i < m; i++){
  590.  
  591. // check if symbol matches
  592. int sym = (int) igraph_cattribute_EAN(graph, "label", igraph_vector_int_get(&incident_edges, i));
  593.  
  594. //cout << "(" << u.x << ", " << u.y << ") sym: " << sym << endl;
  595. if( (pattern[u.y] == 'A' && sym == 0)
  596. || (pattern[u.y] == 'T' && sym == 1)
  597. || (pattern[u.y] == 'C' && sym == 2)
  598. || (pattern[u.y] == 'G' && sym == 3)
  599. || (pattern[u.y] == 'N' || sym == 4)
  600. ){
  601.  
  602. igraph_integer_t start, end;
  603. igraph_edge(graph, igraph_vector_int_get(&incident_edges, i), &start, &end);
  604.  
  605. int v_x = inv_sorted_vertex[end];
  606. int v_y = u.y + 1;
  607. int v_coordinate = v_y * n + v_x;
  608. int vid;
  609.  
  610. if(seen_coordinates.find(v_coordinate) == seen_coordinates.end()){
  611. // never encounter coordinate
  612. // add vertex with new vertex id and edge
  613.  
  614. // Adds 0 to the distance from u
  615. vid = new_vid;
  616. seen_coordinates[v_coordinate] = {vid, v_x, v_y, u_dist};
  617. new_vid++;
  618.  
  619. // enque vertex
  620. q.push({vid, v_x, v_y, -1});
  621.  
  622. }else{
  623. // coordinate seen before
  624. vertex v = seen_coordinates[v_coordinate];
  625. vid = v.id;
  626.  
  627. // update distance if needed
  628. if(u_dist < v.dist){
  629. seen_coordinates[v_coordinate] = {vid, v_x, v_y, u_dist};
  630. }
  631. }
  632. igraph_vector_int_push_back(&alignment_graph_edges, u.id);
  633. igraph_vector_int_push_back(&alignment_graph_edges, vid);
  634. weights.push_back(0);
  635. variants.push_back(igraph_cattribute_EAN(graph, "variant", igraph_vector_int_get(&incident_edges, i)));
  636.  
  637. }else if (u_dist + 1 <= delta){
  638.  
  639. igraph_integer_t start, end;
  640. igraph_edge(graph, igraph_vector_int_get(&incident_edges, i), &start, &end);
  641.  
  642. int v_x = inv_sorted_vertex[end];
  643. int v_y = u.y + 1;
  644. int v_coordinate = v_y * n + v_x;
  645. int vid;
  646.  
  647. if(seen_coordinates.find(v_coordinate) == seen_coordinates.end()){
  648. // never encounter coordinate
  649. // add vertex with new vertex id and edge
  650.  
  651. // Adds 1 to the distance from u
  652. vid = new_vid;
  653. seen_coordinates[v_coordinate] = {vid, v_x, v_y, u_dist + 1};
  654. new_vid++;
  655.  
  656. // enque vertex
  657. q.push({vid, v_x, v_y, -1});
  658.  
  659. }else{
  660. // coordinate seen before
  661. vertex v = seen_coordinates[v_coordinate];
  662. vid = v.id;
  663.  
  664. // update distance if needed
  665. if(u_dist + 1 < v.dist){
  666. seen_coordinates[v_coordinate] = {vid, v_x, v_y, u_dist + 1};
  667. }
  668. }
  669. igraph_vector_int_push_back(&alignment_graph_edges, u.id);
  670. igraph_vector_int_push_back(&alignment_graph_edges, vid);
  671. weights.push_back(1);
  672. variants.push_back(igraph_cattribute_EAN(graph, "variant", igraph_vector_int_get(&incident_edges, i)));
  673. }
  674. }
  675. }
  676.  
  677.  
  678. // edges to sink vertex
  679.  
  680. if(u.y == pattern.length()){
  681.  
  682. int v_y = u.y+1;
  683. int v_coordinate = n * v_y;
  684.  
  685. int vid;
  686.  
  687. if(seen_coordinates.find(v_coordinate) == seen_coordinates.end()){
  688. vid = new_vid;
  689. seen_coordinates[v_coordinate] = {vid, 0, v_y, u_dist};
  690. sink_vertex_id = vid;
  691. new_vid++;
  692.  
  693. //q.push({vid, 0, n*(pattern.length()+1), -1});
  694.  
  695. }else{
  696. // coordinate seen before
  697. vertex v = seen_coordinates[v_coordinate];
  698. vid = v.id;
  699.  
  700. // update distance if needed
  701. if(u_dist < v.dist){
  702. seen_coordinates[v_coordinate] = {vid, 0, v_y, u_dist};
  703. }
  704. }
  705.  
  706. igraph_vector_int_push_back(&alignment_graph_edges, u.id);
  707. igraph_vector_int_push_back(&alignment_graph_edges, vid);
  708. weights.push_back(0);
  709. variants.push_back(-3);
  710.  
  711. }
  712. }
  713. // construct temporary forward-pruned only alignment graph
  714. bool directed = true;
  715. igraph_t forward_pruned_only_graph;
  716. igraph_empty(&forward_pruned_only_graph, new_vid, directed);
  717. igraph_add_edges(&forward_pruned_only_graph, &alignment_graph_edges, NULL);
  718.  
  719. for(int i = 0; i < weights.size(); i++){
  720. igraph_cattribute_EAN_set(&forward_pruned_only_graph, "weight", i, weights.at(i));
  721. igraph_cattribute_EAN_set(&forward_pruned_only_graph, "variant", i, variants.at(i));
  722. }
  723.  
  724. igraph_vector_int_destroy(&alignment_graph_edges);
  725. igraph_vector_int_destroy(&sorted_vertex);
  726.  
  727. igraph_vector_t zeros;
  728. igraph_vector_init(&zeros, igraph_vcount(&forward_pruned_only_graph));
  729. igraph_cattribute_VAN_setv(&forward_pruned_only_graph, "startend", &zeros);
  730. igraph_cattribute_VAN_set(&forward_pruned_only_graph, "startend", source_vertex_id, 1);
  731. igraph_cattribute_VAN_set(&forward_pruned_only_graph, "startend", sink_vertex_id, 2);
  732.  
  733.  
  734. //cout << "============= Forward Prunned Alignment Graph Constructed =============" << endl;
  735. //print_alignment_graph(&forward_pruned_only_graph);
  736. //cout << "Forward pruned alignment graph size: " << igraph_vcount(&forward_pruned_only_graph) << endl;
  737. // reverse pruning
  738.  
  739. // get vertices reachable from the last vertex with distance at most delta
  740. igraph_vector_int_t reverse_reachable_vertex;
  741. igraph_vector_int_init(&reverse_reachable_vertex, 0);
  742.  
  743. reverse_weighted_reachable(&forward_pruned_only_graph, sink_vertex_id, &reverse_reachable_vertex, delta);
  744.  
  745. igraph_vs_t vs;
  746. igraph_vs_vector(&vs, &reverse_reachable_vertex);
  747.  
  748. igraph_induced_subgraph(&forward_pruned_only_graph, result, vs, IGRAPH_SUBGRAPH_AUTO);
  749. igraph_destroy(&forward_pruned_only_graph);
  750. igraph_vector_int_destroy(&reverse_reachable_vertex);
  751.  
  752. }
  753.  
  754.  
  755. void add_initial_solution(igraph_t* alignment_graph){
  756.  
  757. // get source and sink
  758. int start_vertex = 0;
  759. int end_vertex = 0;
  760. igraph_vs_t vs;
  761. igraph_vit_t vit;
  762. igraph_vs_all(&vs);
  763. igraph_vit_create(alignment_graph, vs, &vit);
  764. while (!IGRAPH_VIT_END(vit)) {
  765. int vid = IGRAPH_VIT_GET(vit);
  766. int startend = (int)igraph_cattribute_VAN(alignment_graph, "startend", vid);
  767. if(startend == 1){
  768. start_vertex = vid;
  769. }else if (startend == 2){
  770. end_vertex = vid;
  771. }
  772. IGRAPH_VIT_NEXT(vit);
  773. }
  774. igraph_vs_destroy(&vs);
  775. igraph_vit_destroy(&vit);
  776.  
  777.  
  778. // get edge weights
  779. igraph_es_t es;
  780. igraph_es_all(&es, IGRAPH_EDGEORDER_ID);
  781. igraph_vector_t weights;
  782. igraph_vector_init(&weights, 0);
  783. igraph_cattribute_EANV(alignment_graph, "weight", es, &weights);
  784. igraph_es_destroy(&es);
  785.  
  786. // get path with minimum weight
  787. igraph_vector_int_t path_edges;
  788. igraph_vector_int_init(&path_edges, 0);
  789.  
  790. //cout << "num vertex in graph: " << igraph_vcount(alignment_graph) << endl;
  791. //cout << "start:" << start_vertex << " end vertex: " << end_vertex << endl;
  792.  
  793. igraph_get_shortest_path_dijkstra(alignment_graph, NULL, &path_edges, start_vertex, end_vertex, &weights, IGRAPH_OUT);
  794. igraph_vector_destroy(&weights);
  795.  
  796. // put it in attribute vector
  797. igraph_vector_t attribute_vector;
  798. igraph_vector_init(&attribute_vector, igraph_ecount(alignment_graph));
  799. int m = igraph_vector_int_size(&path_edges);
  800. for(int i = 0; i < m; i++){
  801. int eid = igraph_vector_int_get(&path_edges, i);
  802. //int weight = igraph_cattribute_EAN(alignment_graph, "weight", eid);
  803. //cout << "edge id: " << eid << " weight: " << weight << endl;
  804. igraph_vector_set(&attribute_vector, eid, 1);
  805. }
  806. igraph_vector_int_destroy(&path_edges);
  807.  
  808. // set edge attrbitue
  809. igraph_cattribute_EAN_setv(alignment_graph, "initsol", &attribute_vector);
  810. igraph_vector_destroy(&attribute_vector);
  811.  
  812. }
  813.  
  814. void create_alignment_graph(igraph_t* variation_graph, int pos, string s, int alpha, int delta, igraph_t* alignment_graph){
  815.  
  816. igraph_vs_t start_vertex_selector;
  817. igraph_vs_1(&start_vertex_selector, pos);
  818.  
  819. igraph_vector_int_list_t reachable_vertex_list;
  820. igraph_vector_int_list_init(&reachable_vertex_list, 0);
  821.  
  822. get_reachable_vertex(variation_graph, start_vertex_selector, alpha + delta, &reachable_vertex_list);
  823. igraph_vs_destroy(&start_vertex_selector);
  824.  
  825. // since vertex selector is only for one vertex, we only need the first element
  826. igraph_vector_int_t* reachable_vertex = igraph_vector_int_list_get_ptr(&reachable_vertex_list, 0);
  827.  
  828. igraph_t reachable_graph;
  829. get_induced_variation_graph(variation_graph, reachable_vertex, &reachable_graph);
  830.  
  831.  
  832. //cout << "induced variation graph size: " << igraph_vcount(&reachable_graph) << endl;
  833.  
  834. int start_vertex, end_vertex;
  835. create_pruned_alignment_graph(&reachable_graph, s, delta, alignment_graph);
  836.  
  837. //cout << "pruned alignment graph size: " << igraph_vcount(alignment_graph) << endl;
  838.  
  839. add_initial_solution(alignment_graph);
  840.  
  841. igraph_vector_int_list_destroy(&reachable_vertex_list);
  842. igraph_destroy(&reachable_graph);
  843. }
  844.  
  845.  
  846. void get_variants(igraph_t* g, unordered_set<int>& variants){
  847.  
  848. igraph_es_t es;
  849. igraph_es_all(&es, IGRAPH_EDGEORDER_ID);
  850. igraph_eit_t eit;
  851. igraph_eit_create(g, es, &eit);
  852.  
  853. while (!IGRAPH_EIT_END(eit)) {
  854. int variant = igraph_cattribute_EAN(g, "variant" ,IGRAPH_EIT_GET(eit));
  855. if(variant >= 0){
  856. variants.insert(variant);
  857. }
  858.  
  859. IGRAPH_EIT_NEXT(eit);
  860. }
  861. igraph_es_destroy(&es);
  862. igraph_eit_destroy(&eit);
  863. }
  864.  
  865.  
  866. // returns a vector that assigns each alignment graph to a set using an integer
  867. // also prints the number of sets and size of each corresponding ILP
  868. void analyze_alignment_graph_set(int num_variants,
  869. vector<vector<int>*>& alignment_graph_to_variants,
  870. vector<int>& global_variable_to_component,
  871. vector<int>& subILP_to_component,
  872. vector<int>& global_variable_to_local_idx,
  873. vector<int>& component_to_global_variable_count,
  874. vector<vector<int>>& component_to_subILPs,
  875. int* num_components
  876. ){
  877.  
  878.  
  879. igraph_vector_int_t edges;
  880. igraph_vector_int_init(&edges, 0);
  881.  
  882. int ilp_idx = num_variants;
  883.  
  884. for(int i = 0; i < alignment_graph_to_variants.size(); i++){
  885. //print_alignment_graph(&g);
  886.  
  887. // obtain set of variants
  888. //unordered_set<int> variants;
  889. //get_variants(&g, variants);
  890.  
  891. // add edges for bipartite graph
  892. //cout << i << ": ";
  893. for(int v: *alignment_graph_to_variants[i]){
  894. //cout << v << ", ";
  895. igraph_vector_int_push_back(&edges, ilp_idx);
  896. igraph_vector_int_push_back(&edges, v);
  897. }
  898. //cout << endl;
  899. ilp_idx++;
  900.  
  901. }
  902. int n = num_variants + alignment_graph_to_variants.size();
  903.  
  904. bool directed = false;
  905. igraph_t bipartite;
  906. igraph_empty(&bipartite, n, directed);
  907. igraph_add_edges(&bipartite, &edges, NULL);
  908.  
  909. igraph_vector_int_destroy(&edges);
  910.  
  911. igraph_vector_int_t membership;
  912. igraph_vector_int_init(&membership, n);
  913. igraph_connected_components(&bipartite, &membership, NULL, NULL, IGRAPH_STRONG);
  914.  
  915.  
  916. global_variable_to_component = vector<int>(num_variants);
  917. subILP_to_component = vector<int>(alignment_graph_to_variants.size());
  918. global_variable_to_local_idx = vector<int>(num_variants);
  919.  
  920. unordered_map<int, int> sizes;
  921.  
  922. *num_components = 0;
  923. for(int i = 0; i < n; i++){
  924.  
  925. int component = igraph_vector_int_get(&membership, i);
  926. if(component + 1 > *num_components){
  927. *num_components = component + 1;
  928. }
  929.  
  930. if(i < num_variants){
  931.  
  932. global_variable_to_component[i] = component;
  933.  
  934. if(sizes.find(component) == sizes.end()){
  935. sizes[component] = 1;
  936. }else{
  937. sizes[component] = sizes[component] + 1;
  938. }
  939. global_variable_to_local_idx[i] = sizes[component] - 1;
  940.  
  941. }else{
  942. subILP_to_component[i - num_variants] = component;
  943. }
  944. }
  945.  
  946.  
  947. component_to_global_variable_count = vector<int>(*num_components, 0);
  948. component_to_subILPs = vector<vector<int>>(*num_components);
  949. for (auto size : sizes){
  950. component_to_global_variable_count[size.first] = size.second;
  951. component_to_subILPs[size.first] = vector<int>();
  952. }
  953.  
  954. // clean up
  955. igraph_vector_int_destroy(&membership);
  956.  
  957.  
  958. // print statements for debugging
  959. if(VERBOSE){
  960. cout << "Global variable to component" << endl;
  961. for(int i = 0; i < global_variable_to_component.size(); i++){
  962. cout << "x_" << i << ": " << global_variable_to_component[i] << ", local:" << global_variable_to_local_idx[i] << endl;
  963. }
  964.  
  965. cout << "\nSubILP to component" << endl;
  966. for(int i = 0; i < subILP_to_component.size(); i++){
  967. cout << "ILP_" << i << ": " << subILP_to_component[i] << endl;
  968. }
  969. }
  970.  
  971. /*
  972.   cout << "\nComponent to global variant count:\n" << endl;
  973.   cout << "[";
  974.   for(int i = 0; i < component_to_global_variable_count.size(); i++){
  975.   //cout << "component_" << i << " has variants: " << component_to_global_variable_count[i] << endl;
  976.   cout << component_to_global_variable_count[i] << ", ";
  977.   }
  978.   cout << "]" << endl;
  979.   */
  980.  
  981. unordered_map<int, int> component_to_num_edges;
  982. for(int i = 0; i < alignment_graph_to_variants.size(); i++){
  983.  
  984. //igraph_t g = alignment_graphs.at(i);
  985. int component = subILP_to_component[i];
  986. component_to_subILPs[component].push_back(i);
  987. /*
  988.   if(component_to_num_edges.find(component) == component_to_num_edges.end()){
  989.   component_to_num_edges[component] = igraph_ecount(&g);
  990.   }else{
  991.   component_to_num_edges[component] = component_to_num_edges[component] + igraph_ecount(&g);
  992.   }
  993.   */
  994. }
  995. /*
  996.   for(int i = 0; i < component_to_global_variable_count.size(); i++){
  997.   cout << "component_" << i << " has total variables: " << component_to_global_variable_count[i] + component_to_num_edges[i] << endl;
  998.   }
  999.   */
  1000. }
  1001.  
  1002.  
  1003. void construct_ILPs(int component,
  1004. vector<vector<int>>& component_to_subILPs,
  1005. vector<int>& global_variable_to_local_idx,
  1006. vector<int>& component_to_global_variable_count,
  1007. GRBModel& model,
  1008. int delta,
  1009. string graph_directory,
  1010. vector<igraph_t*>& alignment_graphs,
  1011. bool use_files
  1012. ){
  1013.  
  1014.  
  1015. GRBVar* global_var = model.addVars(component_to_global_variable_count[component]);
  1016.  
  1017. GRBLinExpr obj = GRBLinExpr();
  1018. for(int i = 0; i < component_to_global_variable_count[component]; i++){
  1019. obj += global_var[i];
  1020.  
  1021. global_var[i].set(GRB_DoubleAttr_Start, 0);
  1022. }
  1023. model.setObjective(obj, GRB_MAXIMIZE);
  1024.  
  1025.  
  1026. if(PRINT_COMPONENT_SIZES){
  1027. cout << "\nComponent "<< component << " has size (number of alignment graphs): " << component_to_subILPs[component].size() << endl;
  1028. }
  1029.  
  1030. for(int i = 0; i < component_to_subILPs[component].size(); i++){
  1031.  
  1032. auto start = chrono::steady_clock::now();
  1033.  
  1034. int alignment_graph_idx = component_to_subILPs[component][i];
  1035.  
  1036. igraph_t alignment_graph;
  1037.  
  1038. if(use_files == 1){
  1039. string file_name = graph_directory + "g_" + to_string(alignment_graph_idx);
  1040. //cout << file_name << endl;
  1041. read_alignment_graph_from_file(&alignment_graph, file_name);
  1042. }else{
  1043. alignment_graph = *alignment_graphs[alignment_graph_idx];
  1044. }
  1045.  
  1046. if(igraph_ecount(&alignment_graph) == 0){
  1047. cout << "no edges in alignment graph." << endl;
  1048. continue;
  1049. }
  1050.  
  1051. /*
  1052.   if(VERBOSE){
  1053.   cout << "Alignment_graph_" << i << " ILP construction in component (model): " << component << endl;
  1054.   }else if(!VERBOSE && i % 1 == 0){
  1055.   cout << "Constructing ILP for alignment_graph: " << i << " out of " << component_to_subILPs[component].size() << " (increments of 1)" << endl;
  1056.   }
  1057.   */
  1058.  
  1059. GRBVar* local_var = model.addVars(igraph_ecount(&alignment_graph), GRB_BINARY);
  1060.  
  1061. igraph_vs_t vs;
  1062. igraph_vit_t vit;
  1063. igraph_bool_t loops = true; // there are no loops, this is to get const. query time for degrees
  1064.  
  1065. igraph_vs_all(&vs);
  1066. igraph_vit_create(&alignment_graph, vs, &vit);
  1067.  
  1068.  
  1069. while (!IGRAPH_VIT_END(vit)) {
  1070.  
  1071. int vid = IGRAPH_VIT_GET(vit);
  1072.  
  1073. igraph_integer_t in_degree, out_degree;
  1074. igraph_degree_1(&alignment_graph, &in_degree, vid, IGRAPH_IN, loops);
  1075. igraph_degree_1(&alignment_graph, &out_degree, vid, IGRAPH_OUT, loops);
  1076.  
  1077. /*
  1078.   cout << "graph: " << i << " vertex: " << vid
  1079.   << " in-degree: " << in_degree
  1080.   << " out-degree: " << out_degree
  1081.   << " startend: " << igraph_cattribute_VAN(&alignment_graphs[i], "startend", vid)
  1082.   << endl;
  1083.   */
  1084.  
  1085. GRBLinExpr lhs, rhs;
  1086. if(igraph_cattribute_VAN(&alignment_graph, "startend", vid) == 1){
  1087. lhs = GRBLinExpr(1);
  1088. rhs = GRBLinExpr();
  1089.  
  1090. // iterate through outbound edges
  1091. igraph_vector_int_t edges;
  1092. igraph_vector_int_init(&edges, 0);
  1093. igraph_incident(&alignment_graph, &edges, vid, IGRAPH_OUT);
  1094. int m = igraph_vector_int_size(&edges);
  1095. for(int j = 0; j < m; j++){
  1096. rhs += local_var[igraph_vector_int_get(&edges, j)];
  1097. }
  1098. igraph_vector_int_destroy(&edges);
  1099.  
  1100. }else if(igraph_cattribute_VAN(&alignment_graph, "startend", vid) == 2){
  1101. lhs = GRBLinExpr();
  1102. rhs = GRBLinExpr(1);
  1103.  
  1104. // iterate through inbound edges
  1105. igraph_vector_int_t edges;
  1106. igraph_vector_int_init(&edges, 0);
  1107. igraph_incident(&alignment_graph, &edges, vid, IGRAPH_IN);
  1108. int m = igraph_vector_int_size(&edges);
  1109. for(int j = 0; j < m; j++){
  1110. lhs += local_var[igraph_vector_int_get(&edges, j)];
  1111. }
  1112. igraph_vector_int_destroy(&edges);
  1113.  
  1114. }else{
  1115. lhs = GRBLinExpr();
  1116. rhs = GRBLinExpr();
  1117.  
  1118. igraph_vector_int_t edges;
  1119. igraph_vector_int_init(&edges, 0);
  1120.  
  1121. // iterate through in bound edges
  1122. igraph_incident(&alignment_graph, &edges, vid, IGRAPH_IN);
  1123. int m = igraph_vector_int_size(&edges);
  1124. for(int j = 0; j < m; j++){
  1125. lhs += local_var[igraph_vector_int_get(&edges, j)];
  1126. }
  1127.  
  1128. // iterate through outbound edges
  1129. igraph_incident(&alignment_graph, &edges, vid, IGRAPH_OUT);
  1130. m = igraph_vector_int_size(&edges);
  1131. for(int j = 0; j < m; j++){
  1132. rhs += local_var[igraph_vector_int_get(&edges, j)];
  1133. }
  1134.  
  1135. igraph_vector_int_destroy(&edges);
  1136.  
  1137. }
  1138.  
  1139. model.addConstr(lhs, GRB_EQUAL, rhs);
  1140. IGRAPH_VIT_NEXT(vit);
  1141. }
  1142.  
  1143. igraph_vs_destroy(&vs);
  1144. igraph_vit_destroy(&vit);
  1145.  
  1146. // add binding constraints and delta constraint
  1147. igraph_es_t es;
  1148. igraph_eit_t eit;
  1149. igraph_es_all(&es, IGRAPH_EDGEORDER_ID);
  1150. igraph_eit_create(&alignment_graph, es, &eit);
  1151.  
  1152. GRBLinExpr delta_constraint_rhs = GRBLinExpr(delta);
  1153. GRBLinExpr delta_constraint_lhs = GRBLinExpr();
  1154.  
  1155. while(!IGRAPH_EIT_END(eit)){
  1156. int eid = IGRAPH_EIT_GET(eit);
  1157. int variant = igraph_cattribute_EAN(&alignment_graph, "variant", eid);
  1158. if(variant >= 0){
  1159. int idx = global_variable_to_local_idx[variant];
  1160.  
  1161. GRBLinExpr lhs = GRBLinExpr();
  1162. lhs += global_var[idx];
  1163. lhs += local_var[eid];
  1164. GRBLinExpr rhs = GRBLinExpr(1);
  1165. model.addConstr(lhs, GRB_LESS_EQUAL, rhs);
  1166. }
  1167.  
  1168. delta_constraint_lhs += igraph_cattribute_EAN(&alignment_graph, "weight", eid)*local_var[eid];
  1169.  
  1170.  
  1171. // added for feasibility check of initsol, delete afterward
  1172. /*
  1173.   GRBLinExpr lhs = GRBLinExpr();
  1174.   lhs += local_var[eid];
  1175.   GRBLinExpr rhs = GRBLinExpr((int)igraph_cattribute_EAN(&alignment_graph, "initsol", eid));
  1176.   model.addConstr(lhs, GRB_EQUAL, rhs);
  1177.   */
  1178.  
  1179. local_var[eid].set(GRB_DoubleAttr_Start , (int)igraph_cattribute_EAN(&alignment_graph, "initsol", eid));
  1180.  
  1181.  
  1182. IGRAPH_EIT_NEXT(eit);
  1183. }
  1184.  
  1185. model.addConstr(delta_constraint_lhs, GRB_LESS_EQUAL, delta_constraint_rhs);
  1186.  
  1187. igraph_es_destroy(&es);
  1188. igraph_eit_destroy(&eit);
  1189.  
  1190. /*
  1191.   if(VERBOSE){
  1192.   auto stop = chrono::steady_clock::now();
  1193.   auto duration = duration_cast<chrono::milliseconds>(stop - start);
  1194.   cout << "\tTime for alignment_graph_" << alignment_graph_idx << " ILP construction: " << duration.count() << " milliseconds"
  1195.   " component (model): " << component << endl;
  1196.   }
  1197.   */
  1198. igraph_destroy(&alignment_graph);
  1199. }
  1200.  
  1201. // update and view models for checking
  1202. model.update();
  1203. if(WRITE_ILPS_TO_FILE){
  1204. string file_name = "model_" + to_string(component) + ".lp";
  1205. model.write(file_name);
  1206. }
  1207.  
  1208. }
  1209.  
  1210.  
  1211. int main(int argc, char** argv){
  1212.  
  1213. GRBEnv env = GRBEnv();
  1214. env.set("OutputFlag", "0");
  1215. env.start();
  1216.  
  1217. if(argc < 8){
  1218. cout << "Wrong number of arguments provided" << endl;
  1219. return 0;
  1220. }
  1221. string edge_file_name = argv[1];
  1222. string pos_substring_file_name = argv[2];
  1223. string scratch_directory = argv[3];
  1224. string sol_directory = argv[4];
  1225. int alpha = atoi(argv[5]);
  1226. int delta = atoi(argv[6]);
  1227. int use_files = atoi(argv[7]);
  1228.  
  1229. igraph_set_attribute_table(&igraph_cattribute_table);
  1230.  
  1231. // construct variation graph
  1232. igraph_t variation_graph;
  1233. int num_variants;
  1234.  
  1235. read_edge_file(edge_file_name, &variation_graph, &num_variants);
  1236.  
  1237. cout << "\n============= Variation Graph Constructed =============\n" << endl;
  1238. //cout << "Construction time: " << duration.count() << " milliseconds\n" <<endl;
  1239. cout << "Number of variants: " << num_variants << endl;
  1240. cout << "Number of vertices: " << igraph_vcount(&variation_graph) << endl;
  1241. cout << "Number of edges: " << igraph_ecount(&variation_graph) << endl;
  1242.  
  1243. cout << "\nReading pos_substring file...\n" << endl;
  1244. // read locations and strings
  1245. vector<int> positions;
  1246. vector<string> substrings;
  1247.  
  1248. read_pos_substring_file(pos_substring_file_name, positions, substrings);
  1249.  
  1250. int N = positions.size();
  1251.  
  1252. cout << "\nConstructing alignment graphs...\n" << endl;
  1253. vector<vector<int>*> alignment_graph_to_variants = vector<vector<int>*>(N);
  1254.  
  1255.  
  1256. vector<igraph_t*> alignment_graphs;
  1257. for(int i = 0; i < N; i++){
  1258. alignment_graphs.push_back(new igraph_t);
  1259. }
  1260.  
  1261. #pragma omp parallel for
  1262. for(int i = 0; i < N; i++){
  1263. if(VERBOSE){
  1264. cout << "Constructing alignment graphs for position: " << positions[i] << endl;
  1265. }else if (!VERBOSE && i % PRINT_FACTOR == 0){
  1266. cout << "Constructing alignment graph: " << i << " (prints mod " << PRINT_FACTOR << ")" << endl;
  1267. }
  1268.  
  1269. igraph_t alignment_graph;
  1270. create_alignment_graph(&variation_graph, positions[i], substrings[i], alpha, delta, &alignment_graph);
  1271.  
  1272. // obtain set of variants
  1273. alignment_graph_to_variants[i] = new vector<int>;
  1274. unordered_set<int> variants;
  1275. get_variants(&alignment_graph, variants);
  1276.  
  1277. for(int v: variants){
  1278. alignment_graph_to_variants[i]->push_back(v);
  1279. }
  1280.  
  1281. igraph_cattribute_GAN_set(&alignment_graph, "position", positions[i]);
  1282. igraph_cattribute_GAS_set(&alignment_graph, "substring", substrings[i].c_str());
  1283.  
  1284. if(use_files == 1){
  1285. string file_name = scratch_directory + "g_" + to_string(i);
  1286. write_alignment_graph_to_file(&alignment_graph, file_name);
  1287. }else{
  1288. igraph_copy(alignment_graphs[i], &alignment_graph);
  1289. }
  1290.  
  1291.  
  1292. //sum_of_sizes += igraph_ecount(&alignment_graph);
  1293.  
  1294. igraph_destroy(&alignment_graph);
  1295. }
  1296.  
  1297. //cout << "average alignment graph size: " << (float)sum_of_sizes/(float)N << endl;
  1298.  
  1299.  
  1300. positions.clear();
  1301. positions.shrink_to_fit();
  1302. substrings.clear();
  1303. substrings.shrink_to_fit();
  1304.  
  1305. igraph_destroy(&variation_graph);
  1306. cout << "\n============= Alignment Graphs Constructed =============\n" << endl;
  1307.  
  1308. cout << "Analyzing alignment graph set..." << endl;
  1309.  
  1310. vector<int> global_variable_to_component;
  1311. vector<int> subILP_to_component;
  1312. vector<int> global_variable_to_local_idx;
  1313. vector<int> component_to_variant_count;
  1314. vector<vector<int>> component_to_subILPs;
  1315. int num_components;
  1316. analyze_alignment_graph_set(num_variants,
  1317. alignment_graph_to_variants,
  1318. global_variable_to_component,
  1319. subILP_to_component,
  1320. global_variable_to_local_idx,
  1321. component_to_variant_count,
  1322. component_to_subILPs,
  1323. &num_components);
  1324.  
  1325. alignment_graph_to_variants.clear();
  1326. alignment_graph_to_variants.shrink_to_fit();
  1327.  
  1328. cout << "\n============= Alignment Graphs Analyzed =============\n" << endl;
  1329. cout << "number of components: " << num_components << endl;
  1330.  
  1331. // we need to invert the map from separate ILPs
  1332. map<pair<int, int>, int> component_index_pair_to_variant;
  1333. for(int i = 0; i < num_variants; i++){
  1334.  
  1335. auto p = make_pair(global_variable_to_component[i], global_variable_to_local_idx[i]);
  1336.  
  1337. if(component_index_pair_to_variant.find(p) == component_index_pair_to_variant.end()){
  1338. component_index_pair_to_variant[p] = i;
  1339. }
  1340. }
  1341.  
  1342.  
  1343. cout << "\nConstructing ILP models from alignment graphs...\n" << endl;
  1344.  
  1345. vector<int> solution = vector<int>(num_variants, 1);
  1346.  
  1347. bool some_ILP_timed_out = false;
  1348.  
  1349. #pragma omp parallel for
  1350. for(int component = 0; component < num_components; component++){
  1351.  
  1352. if(component % PRINT_FACTOR == 0){
  1353. cout << "Solving ILP_" << component << " out of " << num_components << " (prints mod " << PRINT_FACTOR << ")" <<endl;
  1354. }
  1355.  
  1356. //cout << "Solving ILP_" << component << " out of " << num_components << endl;
  1357.  
  1358.  
  1359. //cout << "component: " << component << endl;
  1360. if(component_to_subILPs[component].size() > 0){
  1361.  
  1362. GRBModel model = GRBModel(env);
  1363.  
  1364. construct_ILPs(component,
  1365. component_to_subILPs,
  1366. global_variable_to_local_idx,
  1367. component_to_variant_count,
  1368. model,
  1369. delta,
  1370. scratch_directory,
  1371. alignment_graphs,
  1372. use_files
  1373. );
  1374. //cout << "\n============= ILP model "<< component << " constructed =============\n" << endl;
  1375.  
  1376.  
  1377. model.getEnv().set(GRB_DoubleParam_TimeLimit, ILP_TIME_LIMIT);
  1378. auto start = chrono::steady_clock::now();
  1379. model.optimize();
  1380. auto stop = chrono::steady_clock::now();
  1381. auto duration = duration_cast<chrono::seconds>(stop - start);
  1382.  
  1383. if(duration.count() > .95 * ILP_TIME_LIMIT){
  1384. cout << "ILP was timed out" << endl;
  1385. some_ILP_timed_out = true;
  1386. }
  1387. //if(VERBOSE){
  1388. // cout << "time: " << duration.count() << " milliseconds\n" <<endl;
  1389. //}
  1390.  
  1391. //cout << "\n============= ILP models solved =============\n" << endl;
  1392.  
  1393. GRBVar* vars = NULL;
  1394. vars = model.getVars();
  1395. //cout << "ILP_" << component << " local assignments: " << endl;
  1396. //cout << "variable count: " << component_to_variant_count[component] << endl;
  1397. for (int j = 0; j < component_to_variant_count[component]; j++) {
  1398. //cout << vars[j].get(GRB_StringAttr_VarName) << " = " << vars[j].get(GRB_DoubleAttr_X) << endl;
  1399. solution[component_index_pair_to_variant[make_pair(component, j)]] = vars[j].get(GRB_DoubleAttr_X);
  1400. }
  1401. }
  1402.  
  1403. }
  1404.  
  1405. int sum = 0;
  1406. cout << "\nFinal assignment: " << endl;
  1407.  
  1408.  
  1409. string sol_file_name = sol_directory + "ILP_sol_" + to_string(alpha) + "_" + to_string(delta) + "_" + to_string(N);
  1410. ofstream out_file(sol_file_name);
  1411. for(int i = 0; i < solution.size(); i++){
  1412. cout << "x_" << i << " = " << solution[i] << endl;
  1413. out_file << solution[i] << endl;
  1414. sum += solution[i];
  1415. }
  1416. out_file.close();
  1417. cout << "\nNumber of variants deleted: " << sum << " out of " << num_variants <<endl;
  1418. if(some_ILP_timed_out){
  1419. cout << "Some ILP was timed-out, solution may be sub-optimal" << endl;
  1420. }else{
  1421. cout << "No ILPs were timed-out, solution is optimal" << endl;
  1422. }
  1423.  
  1424.  
  1425. }
Success #stdin #stdout #stderr 0.3s 40552KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Error: unexpected symbol in "using namespace"
Execution halted