import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()
Reading the csv file into memory, this assumes you have the file of all addresses in Belgium saved in the following folder
belgium = pd.read_csv('../data/belgium_addresses.csv')
We can count the amount of times a streetname occurs accross all municipalities. Between the French and Dutch names there are many similarities in names just being translations of eachother.
# Selecting only Dutch streetnames
streets = belgium[['streetname_nl', 'postcode']].drop_duplicates()
# Grouping on name and counting
result = streets.groupby('streetname_nl').count().rename(columns={'postcode': 'count'}).sort_values(by='count', ascending=False).head(10)
result
# Plot the result
plt.figure(figsize=(18, 6))
sns.barplot(x='streetname_nl', y='count', data=result.reset_index())
# Selecting only French streetnames
streets = belgium[['streetname_fr', 'postcode']].drop_duplicates()
# Grouping on name and counting
result = streets.groupby('streetname_fr').count().rename(columns={'postcode': 'count'}).sort_values(by='count', ascending=False).head(10)
result
# Plot the result
plt.figure(figsize=(18, 6))
sns.barplot(x='streetname_fr', y='count', data=result.reset_index())
As we lack the geo data to compute the actual length of a street we resort to calculating the length of the names of the streets. With the French names there are some odd results where there seem to be comments added to the streetname.
# Select the Dutch names and map to length
long = list(map(lambda street: (len(street), street), belgium['streetname_nl'].dropna().unique()))
# Sort to get the highest length
long.sort(reverse=True)
result = pd.DataFrame(long[:10], columns=['length', 'streetname_nl'])
# Plot the results
plt.figure(figsize=(16, 10))
sns.barplot(y='streetname_nl', x='length', data=result.reset_index(), orient='h')
# Select the French names and map to length
long = list(map(lambda street: (len(street), street), belgium['streetname_fr'].dropna().unique()))
# Sort to get the highest length
long.sort(reverse=True)
result = pd.DataFrame(long[:10], columns=['length', 'streetname_fr'])
# Plot the results
plt.figure(figsize=(16, 10))
sns.barplot(y='streetname_fr', x='length', data=result.reset_index(), orient='h')
For both the streets in Dutch and in French we can count the amount of houses on each street and see on which streets the highest amount of houses have been built.
# Select the housenumbers of each street
streets = belgium[['streetname_nl', 'postcode', 'house_number']].drop_duplicates()
# Group and count
result = streets.groupby(['streetname_nl', 'postcode']).count().rename(columns={'house_number': 'count'}).sort_values(by='count', ascending=False).head(10)
result
# Plot the results
plt.figure(figsize=(16, 10))
sns.barplot(y='streetname_nl', x='count', data=result.reset_index(), orient='h')
# Select the housenumbers of each street
streets = belgium[['streetname_fr', 'postcode', 'house_number']].drop_duplicates()
# Group and count
result = streets.groupby(['streetname_fr', 'postcode']).count().rename(columns={'house_number': 'count'}).sort_values(by='count', ascending=False).head(10)
result
# Plot the results
plt.figure(figsize=(16, 10))
sns.barplot(y='streetname_fr', x='count', data=result.reset_index(), orient='h')