-
Notifications
You must be signed in to change notification settings - Fork 0
/
correction.py
65 lines (49 loc) · 1.65 KB
/
correction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import xml.etree.cElementTree as ET
from collections import defaultdict
import re
mapping_street = {'Ave': 'Avenue',
'AVE': 'Avenue',
'Ave.': 'Avenue',
'Blvd': 'Boulevard',
'Blvd.': 'Boulevard',
'Ct': 'Court',
'Dr': 'Drive',
'Dr.': 'Drive',
'Rd': 'Road',
'Rd.': 'Road',
'St': 'Street',
'St.': 'Street',
'Tr': 'Trail'}
mapping_dir = {'East': 'E',
'N.E.': 'NE',
'N.W.': 'NW',
'N.W': 'NW',
'North': 'N',
'Northeast': 'NE',
'Northwest': 'NW',
'S.E': 'SE',
'S.W.': 'SW',
'South': 'S',
'South-east': 'SE',
'South-west': 'SW',
'Southeast': 'SE',
'Southwest': 'SW'}
street_format = re.compile(r'\b(\S+)\s(\S+)$')
postal_format = re.compile(r'[A-z]\d[A-z]\s\d[A-z]\d')
def replace_name(match):
street, dir = match.groups()
if street in mapping_street:
street = mapping_street[street]
if dir in mapping_dir:
dir = mapping_dir[dir]
return (street + " " + dir)
def update_addr(street_name):
name = re.sub(street_format, replace_name, street_name)
return name
def update_postal(postcode):
no_space_format = re.compile(r'([A-z]\d[A-z])(\d[A-z]\d)')
if not postal_format.match(postcode):
m = no_space_format.match(postcode)
if m:
postcode = m.group(1) + " " + m.group(2)
return postcode.upper()