In this post, I demonstrate how to deploy an interactive web application in just a little over two hundred lines of Python. A Python module called Dash is used - Dash was developed by Plotly by building on top of Flask, a popular Python web framework, to create an easy way to develop interactive web data visualizations using Python.
For this example, I have built a Dash app that performs one-way analysis of variance on a user-uploaded dataset. Here’s a silent video of the Dash app in action:
The GitHub repository for this project can be found here.
The full code for anova-dash.py from the video above can be found below. The anova-dash.py Python script allows us to run some calculations in Python, define some HTML whose contents come from the aforementioned Python calculations, and serve this HTML over localhost (or http://0.0.0.0) at our desired port (in this case, 8080) as it were a webpage.
######################
## Import Libraries ##
######################
# Libraries for Dash front-end
import base64
import datetime
import io
import dash
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
import dash_html_components as html
from dash_table import DataTable
import plotly.express as px
# Libraries for back-end analysis
import pandas as pd
# Libraries for ANOVA
import statsmodels.api as sm
from statsmodels.formula.api import ols
######################
## Define Functions ##
######################
def parse_contents(contents, filename):
"""Parse the contents of an uploaded file to a dataframe in Python
contents: The contents of an uploaded file
filename: The filename of an uploaded file
"""
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
# Assuming that the user uploaded a CSV file
df = pd.read_csv(
io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename:
# Assuming that the user uploaded an excel file
df = pd.read_excel(io.BytesIO(decoded))
except Exception as e:
print(e)
return pd.DataFrame()
return df
def render_table(df, id):
"""Render a dataframe as an HTML table in Dash
df: The source dataframe
id: The element ID
"""
return DataTable(id=id,
columns=[{'name': i, 'id': i} for i in df.columns],
# export_format='xlsx',
# export_headers='display',
data=df.to_dict('records'))
def render_bar(df, x_col, y_col):
"""Render a bar chart using Plotly
df: The user-uploaded data
x_col: The name of the input column for the ANOVA analysis
y_col: The name of the response column for the ANOVA analysis
"""
return px.bar(df,
x=x_col,
y=y_col)
################
## App Layout ##
################
# Formatting template file
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
# App title (appears in browser tab title or window's top bar)
app.title = 'One-Way ANOVA'
# App layout
app.layout = html.Div([
# A hidden table where the user-uploaded data will be temporarily cached while the user is accessing the web app
html.Div(id='input-data', style={'display': 'none'}),
# A header
html.H1('One-Way ANOVA'),
# A paragraph
html.P('This web app performs a one-way analysis of variance on user-uploaded data.'),
# Label for file upload component
html.H6('File Upload'),
# File upload component
dcc.Upload(
id='upload-data',
className='upload',
children=[
html.P('Drag and Drop or '),
html.A('Select Files'),
]
),
# Label for dropdown
html.H6('Input column (X)'),
# A dropdown menu whose values are populated by the columns of user-uploaded data
dcc.Dropdown(
id='x-dropdown',
options=[
]
),
# Label for dropdown
html.H6('Response column (Y)'),
# A dropdown menu whose values are populated by the columns of user-uploaded data
dcc.Dropdown(
id='y-dropdown',
options=[
]
),
# Print/ show output of analysis
html.Div(id='output-section')#,
# Display a chart
#dcc.Graph(id='output-chart')
])
###################
## App Callbacks ##
###################
@app.callback([Output('x-dropdown', 'options'),
Output('y-dropdown', 'options'),
Output('input-data', 'children')],
[Input('upload-data', 'contents')],
[State('upload-data', 'filename')])
def load_data(contents, filename):
"""Load data, populate dropdown menus, and temporarily cache data while user is accessing the web app
contents: Contents of the user-uploaded file
filename: Name of the user-uploaded file
"""
# Check if user has uploaded data
if contents is not None:
# Load data
df = parse_contents(contents, filename)
# Populate dropdown menus
columns = [{'label': col, 'value': col} for col in df.columns]
# Temporarily store input data to hidden table while user is accessing the web app
df = df.to_json(date_format='iso',
orient='split')
return columns, columns, df
# Don't do anything if user has not uploaded data
else:
return [], [], ''
#@app.callback([Output('output-section', 'children'),
# Output('output-chart', 'figure')],
@app.callback(Output('output-section', 'children'),
[Input('input-data', 'children'),
Input('x-dropdown', 'value'),
Input('y-dropdown', 'value')])
def run_anova(df, x_col, y_col):
"""Run ANOVA and display results
df: The user-uploaded data
x_col: The name of the input column for the ANOVA analysis
y_col: The name of the response column for the ANOVA analysis
"""
# Check if user has uploaded data
if len(df)>0:
# Load cached data from hidden table
df = pd.read_json(df, orient='split')
try:
# Run ANOVA
model = ols(y_col+' ~ C('+x_col+')', data=df).fit()
aov_table = sm.stats.anova_lm(model, typ=2)
aov_table = aov_table.reset_index()
#print(aov_table)
# Show ANOVA results as plain text
text_output = str(aov_table)
# Show ANOVA results as a table
table_output = [render_table(aov_table, id='anova-table')]
# Show the sum of squares as a bar chart
#chart_output = render_bar(aov_table, x_col='index', y_col='sum_sq')
return table_output#, chart_output
except:
# Tell user to select input and response columns
text_output = 'Please select valid input and response columns.'
# Show a blank chart
#chart_output = render_bar(pd.DataFrame({'x': [0], 'y': [0]}), x_col='x', y_col='y')
return text_output#, chart_output
# Tell the user to upload data first if they have not yet uploaded data
else:
# Tell user to select input and response columns
text_output = 'Please upload data for analysis first.'
# Show a blank chart
#chart_output = render_bar(pd.DataFrame({'x': [0], 'y': [0]}), x_col='x', y_col='y')
return text_output#, chart_output
################
## Run Server ##
################
if __name__ == '__main__':
app.run_server(debug=False, port=8080, host='0.0.0.0')
Let’s go through the code section by section. I’ve divided the code into five parts:
- Import Libraries
- Define Functions
- App Layout
- App Callbacks
- Run Server
Part one, Import Libraries, is self-explanatory.
Part two, Define Functions, is also mostly self-explanatory but I’d like to point out how I’ve defined the functions render_table() and render_bar() for use later when we define the app layout in part three. The code within render_table() and render_bar() are Plotly and Dash functions for rendering a table and bar chart, respectively.
def render_table(df, id):
"""Render a dataframe as an HTML table in Dash
df: The source dataframe
id: The element ID
"""
return DataTable(id=id,
columns=[{'name': i, 'id': i} for i in df.columns],
# export_format='xlsx',
# export_headers='display',
data=df.to_dict('records'))
def render_bar(df, x_col, y_col):
"""Render a bar chart using Plotly
df: The user-uploaded data
x_col: The name of the input column for the ANOVA analysis
y_col: The name of the response column for the ANOVA analysis
"""
return px.bar(df,
x=x_col,
y=y_col)
Lastly, the parse_contents() function in part two uses the base64 module to converted a user-uploaded file (either an Excel file or a CSV file) into a Pandas DataFrame.
def parse_contents(contents, filename):
"""Parse the contents of an uploaded file to a dataframe in Python
contents: The contents of an uploaded file
filename: The filename of an uploaded file
"""
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
try:
if 'csv' in filename:
# Assuming that the user uploaded a CSV file
df = pd.read_csv(
io.StringIO(decoded.decode('utf-8')))
elif 'xls' in filename:
# Assuming that the user uploaded an excel file
df = pd.read_excel(io.BytesIO(decoded))
except Exception as e:
print(e)
return pd.DataFrame()
return df
Part three, App Layout, defines the web app layout using Dash functions. While a more in-depth discussion can be found in the Dash layout documentation, I’d like to emphasize a few key takeaways. Firstly, this section defines a Dash app object named app. After specifying the app title, bulk of the code in this section defines the app layout in app.layout using Dash functions and classes from the dash_html_components (html) and dash_core_components (dcc) libraries. (Note: dash_html_components and dash_core_components have been renamed html and dcc, respectively, in the latest versions of Dash.)
The dash_html_components (html) functions and classes each correspond to a set of HTML tags which are added to the web app layout. For example, the html.Div() function generates a pair of <div> tags. Each dash_html_components (html) function includes the first positional children argument which defines what appears between the opening and closing HTML tags of the function. For example, the code html.Div(html.H1(‘Hello World!’)) tells us that “Hello World!” is contained inside <h1> tags which are, in turn, contained inside <div> tags.
The dash_core_components (dcc) functions and classes have been built by the Dash development team to quickly define complex and often interactive components such as file upload menus, dropdown filters, and tables. Most of these components have been created in React.js.
It is also important to note that some of these functions have the id argument – this will be useful later on when defining callbacks in part four.
################
## App Layout ##
################
# Formatting template file
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
# App title (appears in browser tab title or window's top bar)
app.title = 'One-Way ANOVA'
# App layout
app.layout = html.Div([
# A hidden table where the user-uploaded data will be temporarily cached while the user is accessing the web app
html.Div(id='input-data', style={'display': 'none'}),
# A header
html.H1('One-Way ANOVA'),
# A paragraph
html.P('This web app performs a one-way analysis of variance on user-uploaded data.'),
# Label for file upload component
html.H6('File Upload'),
# File upload component
dcc.Upload(
id='upload-data',
className='upload',
children=[
html.P('Drag and Drop or '),
html.A('Select Files'),
]
),
# Label for dropdown
html.H6('Input column (X)'),
# A dropdown menu whose values are populated by the columns of user-uploaded data
dcc.Dropdown(
id='x-dropdown',
options=[
]
),
# Label for dropdown
html.H6('Response column (Y)'),
# A dropdown menu whose values are populated by the columns of user-uploaded data
dcc.Dropdown(
id='y-dropdown',
options=[
]
),
# Print/ show output of analysis
html.Div(id='output-section')#,
# Display a chart
#dcc.Graph(id='output-chart')
])
Part four, App Callbacks, define functions which determine the interactivity of the Dash web app with respect to the Python calculations being done in the backend. These are different from the functions defined previously in part two as the app callbacks require function decorators to determine when they will be called. In Python, decorators allow us to take a function and add some functionality before it. Decorators allow us to perform “metaprogramming” by allowing a part of our program to modify another part of itself at compile time.
In Dash, function decorators are used to define what inputs are read into Python from the HTML frontend and where the values returned by the Python function are displayed in the HTML frontend.
For example, in the load_data() function below which returns three variables, each of these variables map to a function attribute from one of the html or dcc functions from the app layout defined in part three and identified by their id argument. For load_data(), Output(‘x-dropdown’, ‘options’) tells Python to send back the first function output to the options argument of the app layout component where id=“x-dropdown”, which is the dropdown menu defined in part three.
For inputs, it is similar yet slightly more complex. The load_data() function has two arguments, contents and filename. As defined by [Input(‘upload-data’, ‘contents’)], contents are read from the contents value of the app layout component where id=“upload-data”, which is the file upload menu defined in part three. Furthermore, any change applied to the value of contents in the app layout will trigger running the load_data() function and update the outputs defined by the decorator. Meanwhile, the filename argument of the load_data() function is taken from the filename value of the app layout component where id=“upload-data”, as defined by [State(‘upload-data’, ‘filename’)], filename. However, changing the filename value will not trigger running the load_data() function unlike changing the contents value since inputs defined using State() do not trigger running the function unlike those defined using Input(). For the load_data() function, this decorator design decision was made such that the function isn’t needlessly run twice when both the contents and filename values of the data upload component change (which is the usual case when a file with both a filename and contents is uploaded).
@app.callback([Output('x-dropdown', 'options'),
Output('y-dropdown', 'options'),
Output('input-data', 'children')],
[Input('upload-data', 'contents')],
[State('upload-data', 'filename')])
def load_data(contents, filename):
"""Load data, populate dropdown menus, and temporarily cache data while user is accessing the web app
contents: Contents of the user-uploaded file
filename: Name of the user-uploaded file
"""
# Check if user has uploaded data
if contents is not None:
# Load data
df = parse_contents(contents, filename)
# Populate dropdown menus
columns = [{'label': col, 'value': col} for col in df.columns]
# Temporarily store input data to hidden table while user is accessing the web app
df = df.to_json(date_format='iso',
orient='split')
return columns, columns, df
# Don't do anything if user has not uploaded data
else:
return [], [], ''
#@app.callback([Output('output-section', 'children'),
# Output('output-chart', 'figure')],
@app.callback(Output('output-section', 'children'),
[Input('input-data', 'children'),
Input('x-dropdown', 'value'),
Input('y-dropdown', 'value')])
def run_anova(df, x_col, y_col):
"""Run ANOVA and display results
df: The user-uploaded data
x_col: The name of the input column for the ANOVA analysis
y_col: The name of the response column for the ANOVA analysis
"""
# Check if user has uploaded data
if len(df)>0:
# Load cached data from hidden table
df = pd.read_json(df, orient='split')
try:
# Run ANOVA
model = ols(y_col+' ~ C('+x_col+')', data=df).fit()
aov_table = sm.stats.anova_lm(model, typ=2)
aov_table = aov_table.reset_index()
#print(aov_table)
# Show ANOVA results as plain text
text_output = str(aov_table)
# Show ANOVA results as a table
table_output = [render_table(aov_table, id='anova-table')]
# Show the sum of squares as a bar chart
#chart_output = render_bar(aov_table, x_col='index', y_col='sum_sq')
return table_output#, chart_output
except:
# Tell user to select input and response columns
text_output = 'Please select valid input and response columns.'
# Show a blank chart
#chart_output = render_bar(pd.DataFrame({'x': [0], 'y': [0]}), x_col='x', y_col='y')
return text_output#, chart_output
# Tell the user to upload data first if they have not yet uploaded data
else:
# Tell user to select input and response columns
text_output = 'Please upload data for analysis first.'
# Show a blank chart
#chart_output = render_bar(pd.DataFrame({'x': [0], 'y': [0]}), x_col='x', y_col='y')
return text_output#, chart_output
Lastly, part five runs the Flask server and specifies the port and the host.
if __name__ == '__main__':
app.run_server(debug=False, port=8080, host='0.0.0.0')