From d00e84b9922bf2f201bdda9a51746fa2c9973170 Mon Sep 17 00:00:00 2001 From: Nazim Mir Date: Thu, 2 Jun 2022 14:23:25 +0530 Subject: [PATCH 1/3] Fixing handling of Junk Text in triage scoring. --- consensus_and_scoring/TriagerScoring.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/consensus_and_scoring/TriagerScoring.py b/consensus_and_scoring/TriagerScoring.py index b2f1cdc..e738520 100644 --- a/consensus_and_scoring/TriagerScoring.py +++ b/consensus_and_scoring/TriagerScoring.py @@ -225,6 +225,11 @@ def determinePassingIndices(starts, ends, numUsers, users, length, category): 'passingFunc': evalThresholdMatrix, 'scale': 1.8 }, + 'Junk Text': + { + 'passingFunc': ignoreThis, + 'scale': 0 + } } passFunc = actionDeterminant[category]['passingFunc'] scale = actionDeterminant[category]['scale'] @@ -245,6 +250,9 @@ def findPassingIndices(starts, ends, numUsers, users, length, passingFunc = eval passersArray[i] = 1 return passersArray +def ignoreThis(percent, TotalNumUsers, scale): + return 'X' + def minPercent(percent, totalNumUsers, scale): if percent>=scale: return 'H' @@ -351,8 +359,12 @@ def addToSourceText(starts, ends, texts, sourceText): for i in range(len(starts)): pointer = 0 for c in range(starts[i], ends[i]): - sourceText[c] = texts[i][pointer] - pointer +=1 + #print(f"{texts=}\n {i=}\n {c=}\n {pointer=}") + try: + sourceText[c] = texts[i][pointer] + pointer +=1 + except IndexError: + pass return sourceText def makeList(size): out = [] From 671909b26d206c510f3dc28caa802a4c83fb96e7 Mon Sep 17 00:00:00 2001 From: ericwimsatt Date: Thu, 2 Jun 2022 19:36:51 -0700 Subject: [PATCH 2/3] partial fix --- consensus_and_scoring/TriagerScoring.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/consensus_and_scoring/TriagerScoring.py b/consensus_and_scoring/TriagerScoring.py index e738520..502ba55 100644 --- a/consensus_and_scoring/TriagerScoring.py +++ b/consensus_and_scoring/TriagerScoring.py @@ -359,13 +359,11 @@ def addToSourceText(starts, ends, texts, sourceText): for i in range(len(starts)): pointer = 0 for c in range(starts[i], ends[i]): - #print(f"{texts=}\n {i=}\n {c=}\n {pointer=}") - try: + if pointer Date: Thu, 2 Jun 2022 19:47:34 -0700 Subject: [PATCH 3/3] forces int to a string --- consensus_and_scoring/TriagerScoring.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/consensus_and_scoring/TriagerScoring.py b/consensus_and_scoring/TriagerScoring.py index 502ba55..908d767 100644 --- a/consensus_and_scoring/TriagerScoring.py +++ b/consensus_and_scoring/TriagerScoring.py @@ -43,7 +43,7 @@ def importData(path, out_path): #redundancy = art_data[''] length = art_data['article_text_length'].iloc[0] #print(length) - source_text = makeList(length) + source_text = makeList(length + 1) #flagExclusions = exclusionList(users, flags, cats) flagExclusions = [] #print(flagExclusions) @@ -359,9 +359,8 @@ def addToSourceText(starts, ends, texts, sourceText): for i in range(len(starts)): pointer = 0 for c in range(starts[i], ends[i]): - if pointer