From a83516fa63df2e6e366abe01d9a90c4248419357 Mon Sep 17 00:00:00 2001
From: Birgitta Paeuker <5paeuker@informatik.uni-hamburg.de>
Date: Wed, 11 Sep 2019 20:09:44 +0200
Subject: [PATCH] cut genome names after 10 characters, replace nan with 1.0 in
 distance matrices

---
 src/nejo_idx.py | 3 +++
 src/parser.py   | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/src/nejo_idx.py b/src/nejo_idx.py
index 94b43e3..ddc57fb 100755
--- a/src/nejo_idx.py
+++ b/src/nejo_idx.py
@@ -43,6 +43,9 @@ class NeighborJoining:
       for j in range(0,i):
         if i < self._num_of_taxa:
           dist = dm.distance(i,j)
+          #for andi:
+          if str(dist) in ["nan"]:
+            dist = 1.0
           assert dist >= 0
         else:
           dist = None
diff --git a/src/parser.py b/src/parser.py
index 0724f24..ed90591 100644
--- a/src/parser.py
+++ b/src/parser.py
@@ -31,6 +31,9 @@ def parse_command_line():
                        "NJ-tree (derived from the distances delivered by\n"
                        "the corresponding tool) will be compared to the\n"
                        "reference tree.\n"
+                       "The genome names will be cut to 10 characters. \n"
+                       "The reference tree must contain the \n"
+                       "exact genome names resulting\n"
                        "For datafiles without reference and if at least\n"
                        "two tools have been specified, the resulting\n"
                        "NJ-trees are compared against each other."
-- 
GitLab