-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathactor_replacement_json.py
More file actions
65 lines (42 loc) · 1.64 KB
/
actor_replacement_json.py
File metadata and controls
65 lines (42 loc) · 1.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
from jsonlite import jsonlite2json
from replacement_factory import replacement_factory
import json
import sys
import os
DEBUG = False
if len(sys.argv) < 3:
print("Usage: actor_replacement.py parsed_text.jsonlite actor_dict.json")
print()
print("Actor dict can be generated from CSV using actor_parser.py")
sys.exit(1)
parsed_jsonl, actor_json = sys.argv[1:]
with open(parsed_jsonl) as f:
data = jsonlite2json(f)
with open(actor_json) as f:
repl = json.load(f)
# Set up replacement actor dictionary
repl_dict_named_entities = {entity["name"].lower(): entity["class"].lower() for entity in repl}
repl_dict_named_entities.update({"{} {}".format(entity["role"].lower(),
entity["name"].lower()):
entity["class"].lower() for entity in repl})
multiple_replace_named_entities = replacement_factory(repl_dict_named_entities)
# Here we specify where the dictionary has to be applied
for entry in data:
for key in entry:
entry[key] = entry[key].lower()
for i, entry in enumerate(data):
if 'S' in entry:
entry['S'] = multiple_replace_named_entities(entry['S'])
if 'O' in entry:
entry['O'] = multiple_replace_named_entities(entry['O'])
if 'tua' in entry:
entry['tua'] = multiple_replace_named_entities(entry['tua'])
if (i % 500 == 0):
print('Progress: %.1f%%\r' % (i / (len(data) - 1) * 100), end='')
print("Progress: 100%")
fn = 'output_' + os.path.split(parsed_jsonl)[-1]
with open(fn, 'w') as f:
print('Saving ouput to', fn)
for line in data:
f.writelines([json.dumps(line), '\n'])