Clean up autoeps handling

e1337ee6 · Corey Koval · a42c2d50 · e1337ee6 · e1337ee6
Commit e1337ee6 authored May 28, 2021 by Corey Koval
Show whitespace changes
Inline Side-by-side

Showing with 22 additions and 12 deletions

df-aggregator.py df-aggregator.py +20 -10

cesium.tpl views/cesium.tpl +2 -2

No files found.
--- a/df-aggregator.py
+++ b/df-aggregator.py
@@ -232,11 +232,13 @@ def do_dbscan(X, epsilon, minsamp):
 # Autocalculate the best eps value.
 ####################################
 def autoeps_calc(X):
+    # only use a sample of the data to speed up calculation.
    X = X[:min(2000, len(X)):2]
    min_distances = []
    for x in X:
        distances = []
        for y in X:
+            # calculate euclidian distance
            distance = math.sqrt(sum([(a - b) ** 2 for a, b in zip(x, y)]))
            if distance > 0:
                distances.append(distance)
@@ -247,7 +249,10 @@ def autoeps_calc(X):
        for x1, y1 in enumerate(sorted_distances):
            x2 = x1 + 1
            y2 = sorted_distances[x2]
+            # calculate slope
            m = (y2 - y1) / (x2 - x1)
+            # once the slope starts getting steeper, use that as the eps value
            if m > 0.003:
                # print(f"Slope: {round(m, 3)}, eps: {y1}")
                return y1
@@ -278,21 +283,26 @@ def process_data(database_name, epsilon, min_samp):
            WHERE aoi_id=? ORDER BY confidence DESC LIMIT 25000''', [aoi])
        intersect_array = np.array(curs.fetchall())
        if intersect_array.size != 0:
-            if epsilon > 0:
+            if ((epsilon.isnumeric() and float(epsilon) > 0)
+              or epsilon == "auto"):
                X = StandardScaler().fit_transform(intersect_array[:,0:2])
                n_points = len(X)
-                autoeps = autoeps_calc(X)
                min_samp = max(3, round(0.05 * n_points, 0))
-                print(f"min_samp: {min_samp}, eps: {autoeps}")
+                if epsilon == "auto":
-                size_x = sys.getsizeof(X)/1024
+                    epsilon = autoeps_calc(X)
-                print(f"The dataset is {size_x} kilobytes")
+                    print(f"min_samp: {min_samp}, eps: {epsilon}")
+                elif epsilon.isnumeric():
+                    epsilon = float(epsilon)
+                else:
+                    epsilon = ms.eps
+                # size_x = sys.getsizeof(X)/1024
+                # print(f"The dataset is {size_x} kilobytes")
                print(f"Computing Clusters from {n_points} intersections.")
                while not DBSCAN_WAIT_Q.empty():
                    print("Waiting for my turn...")
                    time.sleep(1)
                starttime = time.time()
-                db = Process(target=do_dbscan,args=(X,autoeps,min_samp))
+                db = Process(target=do_dbscan,args=(X,epsilon,min_samp))
-                # db = Process(target=do_dbscan,args=(X,epsilon,min_samp))
                db.daemon = True
                db.start()
                try:
@@ -773,7 +783,7 @@ def rx_params():
 ###############################################
 @get('/output.czml')
 def tx_czml_out():
-    eps = float(request.query.eps) if request.query.eps else ms.eps
+    eps = request.query.eps if request.query.eps else str(ms.eps)
    min_samp = float(request.query.minpts) if request.query.minpts else ms.min_samp
    if request.query.plotpts == "true":
        plotallintersects = True
@@ -1223,8 +1233,8 @@ if __name__ == '__main__':
    parser.add_option("-d", "--database", dest="database_name", help="REQUIRED Database File", metavar="FILE")
    parser.add_option("-r", "--receivers", dest="rx_file", help="List of receiver URLs", metavar="FILE")
    parser.add_option("-g", "--geofile", dest="geofile", help="GeoJSON Output File", metavar="FILE")
-    parser.add_option("-e", "--epsilon", dest="eps", help="Max Clustering Distance, Default 0.2. 0 to disable clustering.",
+    parser.add_option("-e", "--epsilon", dest="eps", help="Max Clustering Distance, Default \"auto\". 0 to disable clustering.",
-    metavar="NUMBER", type="float", default=0.2)
+    metavar="NUMBER or \"auto\"", default="auto")
    parser.add_option("-c", "--confidence", dest="conf", help="Minimum confidence value, default 10",
    metavar="NUMBER", type="int", default=10)
    parser.add_option("-p", "--power", dest="pwr", help="Minimum power value, default 10",

--- a/views/cesium.tpl
+++ b/views/cesium.tpl
@@ -254,7 +254,7 @@
      }
      if (clustering_en !== null) {
        if (clustering_en.checked) {
-          parameter += "eps=1&";
+          parameter += "eps=auto&";
        } else {
          parameter += "eps=0&";
        }
@@ -421,7 +421,7 @@
        <span class="slidetitle"><h4>Clustering:</h4></span>
        <span class="slidespan" style="text-align:left; width: 100px;margin: 5px;">
        <label class="switch">
-          <input id="clustering_en" name="clustering_en" {{"checked" if epsilon > 0 else ""}} type="checkbox" onchange="updateParams()">
+          <input id="clustering_en" name="clustering_en" {{"checked" if epsilon == "auto" else ""}} type="checkbox" onchange="updateParams()">
          <span class="switchslider round"></span>
        </label></span>
        <span class="tooltiptext">Turns clustering on or off. Clustering On will draw ellipses.</span>