Interactive Web Apps Using Python

June 22, 2021

scroll for more

Share on:

Interactive Web Apps Using Python

In this post, I demonstrate how to deploy an interactive web application in just a little over two hundred lines of Python. A Python module called Dash is used - Dash was developed by Plotly by building on top of Flask, a popular Python web framework, to create an easy way to develop interactive web data visualizations using Python.

For this example, I have built a Dash app that performs one-way analysis of variance on a user-uploaded dataset. Here’s a silent video of the Dash app in action:

The GitHub repository for this project can be found here.

The full code for anova-dash.py from the video above can be found below. The anova-dash.py Python script allows us to run some calculations in Python, define some HTML whose contents come from the aforementioned Python calculations, and serve this HTML over localhost (or http://0.0.0.0) at our desired port (in this case, 8080) as it were a webpage.

######################
## Import Libraries ##
######################

# Libraries for Dash front-end
import base64
import datetime
import io

import dash
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
import dash_html_components as html
from dash_table import DataTable

import plotly.express as px

# Libraries for back-end analysis
import pandas as pd

# Libraries for ANOVA
import statsmodels.api as sm
from statsmodels.formula.api import ols

######################
## Define Functions ##
######################

def parse_contents(contents, filename):
    """Parse the contents of an uploaded file to a dataframe in Python
    contents: The contents of an uploaded file
    filename: The filename of an uploaded file
    """
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    try:
        if 'csv' in filename:
            # Assuming that the user uploaded a CSV file
            df = pd.read_csv(
                io.StringIO(decoded.decode('utf-8')))
        elif 'xls' in filename:
            # Assuming that the user uploaded an excel file
            df = pd.read_excel(io.BytesIO(decoded))
    except Exception as e:
        print(e)
        return pd.DataFrame()
    return df

def render_table(df, id):
    """Render a dataframe as an HTML table in Dash
    df: The source dataframe
    id: The element ID
    """
    return DataTable(id=id,
                     columns=[{'name': i, 'id': i} for i in df.columns],
                     # export_format='xlsx',
                     # export_headers='display',
                     data=df.to_dict('records'))

def render_bar(df, x_col, y_col):
    """Render a bar chart using Plotly
    df: The user-uploaded data
    x_col: The name of the input column for the ANOVA analysis
    y_col: The name of the response column for the ANOVA analysis
    """
    return px.bar(df, 
                  x=x_col, 
                  y=y_col)

################
## App Layout ##
################

# Formatting template file
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

# App title (appears in browser tab title or window's top bar)
app.title = 'One-Way ANOVA'

# App layout
app.layout = html.Div([
    # A hidden table where the user-uploaded data will be temporarily cached while the user is accessing the web app
    html.Div(id='input-data', style={'display': 'none'}),
    # A header
    html.H1('One-Way ANOVA'),
    # A paragraph
    html.P('This web app performs a one-way analysis of variance on user-uploaded data.'),
    # Label for file upload component
    html.H6('File Upload'),
    # File upload component
    dcc.Upload(
        id='upload-data',
        className='upload',
        children=[
            html.P('Drag and Drop or '),
            html.A('Select Files'),
        ]
    ),
    # Label for dropdown
    html.H6('Input column (X)'),
    # A dropdown menu whose values are populated by the columns of user-uploaded data
    dcc.Dropdown(
        id='x-dropdown',
        options=[
        ]
    ),
    # Label for dropdown
    html.H6('Response column (Y)'),
    # A dropdown menu whose values are populated by the columns of user-uploaded data
    dcc.Dropdown(
        id='y-dropdown',
        options=[
        ]
    ),
    # Print/ show output of analysis
    html.Div(id='output-section')#,
    # Display a chart
    #dcc.Graph(id='output-chart')
])

###################
## App Callbacks ##
###################

@app.callback([Output('x-dropdown', 'options'),
               Output('y-dropdown', 'options'),
               Output('input-data', 'children')],
              [Input('upload-data', 'contents')],
              [State('upload-data', 'filename')])
def load_data(contents, filename):
    """Load data, populate dropdown menus, and temporarily cache data while user is accessing the web app 
    contents: Contents of the user-uploaded file
    filename: Name of the user-uploaded file
    """
    # Check if user has uploaded data
    if contents is not None:
        # Load data
        df = parse_contents(contents, filename)
        # Populate dropdown menus
        columns = [{'label': col, 'value': col} for col in df.columns]
        # Temporarily store input data to hidden table while user is accessing the web app
        df = df.to_json(date_format='iso', 
                        orient='split')
        return columns, columns, df
    # Don't do anything if user has not uploaded data
    else:
        return [], [], ''

#@app.callback([Output('output-section', 'children'),
#               Output('output-chart', 'figure')],
@app.callback(Output('output-section', 'children'),
              [Input('input-data', 'children'),
               Input('x-dropdown', 'value'),
               Input('y-dropdown', 'value')])
def run_anova(df, x_col, y_col):
    """Run ANOVA and display results
    df: The user-uploaded data
    x_col: The name of the input column for the ANOVA analysis
    y_col: The name of the response column for the ANOVA analysis
    """
    # Check if user has uploaded data
    if len(df)>0:
        # Load cached data from hidden table
        df = pd.read_json(df, orient='split')
        try:
            # Run ANOVA
            model = ols(y_col+' ~ C('+x_col+')', data=df).fit()
            aov_table = sm.stats.anova_lm(model, typ=2)
            aov_table = aov_table.reset_index()
            #print(aov_table)

            # Show ANOVA results as plain text
            text_output = str(aov_table)
            
            # Show ANOVA results as a table
            table_output = [render_table(aov_table, id='anova-table')]

            # Show the sum of squares as a bar chart
            #chart_output = render_bar(aov_table, x_col='index', y_col='sum_sq')

            return table_output#, chart_output
        
        except:
            # Tell user to select input and response columns
            text_output = 'Please select valid input and response columns.'
            # Show a blank chart
            #chart_output = render_bar(pd.DataFrame({'x': [0], 'y': [0]}), x_col='x', y_col='y')
            return text_output#, chart_output
    
    # Tell the user to upload data first if they have not yet uploaded data
    else:
        # Tell user to select input and response columns
        text_output = 'Please upload data for analysis first.'
        # Show a blank chart
        #chart_output = render_bar(pd.DataFrame({'x': [0], 'y': [0]}), x_col='x', y_col='y')
        return text_output#, chart_output

################
## Run Server ##
################

if __name__ == '__main__':
    app.run_server(debug=False, port=8080, host='0.0.0.0')

Let’s go through the code section by section. I’ve divided the code into five parts:

  1. Import Libraries
  2. Define Functions
  3. App Layout
  4. App Callbacks
  5. Run Server

Part one, Import Libraries, is self-explanatory.

Part two, Define Functions, is also mostly self-explanatory but I’d like to point out how I’ve defined the functions render_table() and render_bar() for use later when we define the app layout in part three. The code within render_table() and render_bar() are Plotly and Dash functions for rendering a table and bar chart, respectively.

def render_table(df, id):
    """Render a dataframe as an HTML table in Dash
    df: The source dataframe
    id: The element ID
    """
    return DataTable(id=id,
                     columns=[{'name': i, 'id': i} for i in df.columns],
                     # export_format='xlsx',
                     # export_headers='display',
                     data=df.to_dict('records'))

def render_bar(df, x_col, y_col):
    """Render a bar chart using Plotly
    df: The user-uploaded data
    x_col: The name of the input column for the ANOVA analysis
    y_col: The name of the response column for the ANOVA analysis
    """
    return px.bar(df, 
                  x=x_col, 
                  y=y_col)

Lastly, the parse_contents() function in part two uses the base64 module to converted a user-uploaded file (either an Excel file or a CSV file) into a Pandas DataFrame.

def parse_contents(contents, filename):
    """Parse the contents of an uploaded file to a dataframe in Python
    contents: The contents of an uploaded file
    filename: The filename of an uploaded file
    """
    content_type, content_string = contents.split(',')
    decoded = base64.b64decode(content_string)
    try:
        if 'csv' in filename:
            # Assuming that the user uploaded a CSV file
            df = pd.read_csv(
                io.StringIO(decoded.decode('utf-8')))
        elif 'xls' in filename:
            # Assuming that the user uploaded an excel file
            df = pd.read_excel(io.BytesIO(decoded))
    except Exception as e:
        print(e)
        return pd.DataFrame()
    return df

Part three, App Layout, defines the web app layout using Dash functions. While a more in-depth discussion can be found in the Dash layout documentation, I’d like to emphasize a few key takeaways. Firstly, this section defines a Dash app object named app. After specifying the app title, bulk of the code in this section defines the app layout in app.layout using Dash functions and classes from the dash_html_components (html) and dash_core_components (dcc) libraries. (Note: dash_html_components and dash_core_components have been renamed html and dcc, respectively, in the latest versions of Dash.)

The dash_html_components (html) functions and classes each correspond to a set of HTML tags which are added to the web app layout. For example, the html.Div() function generates a pair of <div> tags. Each dash_html_components (html) function includes the first positional children argument which defines what appears between the opening and closing HTML tags of the function. For example, the code html.Div(html.H1(‘Hello World!’)) tells us that “Hello World!” is contained inside <h1> tags which are, in turn, contained inside <div> tags.

The dash_core_components (dcc) functions and classes have been built by the Dash development team to quickly define complex and often interactive components such as file upload menus, dropdown filters, and tables. Most of these components have been created in React.js.

It is also important to note that some of these functions have the id argument – this will be useful later on when defining callbacks in part four.

################
## App Layout ##
################

# Formatting template file
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

# App title (appears in browser tab title or window's top bar)
app.title = 'One-Way ANOVA'

# App layout
app.layout = html.Div([
    # A hidden table where the user-uploaded data will be temporarily cached while the user is accessing the web app
    html.Div(id='input-data', style={'display': 'none'}),
    # A header
    html.H1('One-Way ANOVA'),
    # A paragraph
    html.P('This web app performs a one-way analysis of variance on user-uploaded data.'),
    # Label for file upload component
    html.H6('File Upload'),
    # File upload component
    dcc.Upload(
        id='upload-data',
        className='upload',
        children=[
            html.P('Drag and Drop or '),
            html.A('Select Files'),
        ]
    ),
    # Label for dropdown
    html.H6('Input column (X)'),
    # A dropdown menu whose values are populated by the columns of user-uploaded data
    dcc.Dropdown(
        id='x-dropdown',
        options=[
        ]
    ),
    # Label for dropdown
    html.H6('Response column (Y)'),
    # A dropdown menu whose values are populated by the columns of user-uploaded data
    dcc.Dropdown(
        id='y-dropdown',
        options=[
        ]
    ),
    # Print/ show output of analysis
    html.Div(id='output-section')#,
    # Display a chart
    #dcc.Graph(id='output-chart')
])

Part four, App Callbacks, define functions which determine the interactivity of the Dash web app with respect to the Python calculations being done in the backend. These are different from the functions defined previously in part two as the app callbacks require function decorators to determine when they will be called. In Python, decorators allow us to take a function and add some functionality before it. Decorators allow us to perform “metaprogramming” by allowing a part of our program to modify another part of itself at compile time.

In Dash, function decorators are used to define what inputs are read into Python from the HTML frontend and where the values returned by the Python function are displayed in the HTML frontend.

For example, in the load_data() function below which returns three variables, each of these variables map to a function attribute from one of the html or dcc functions from the app layout defined in part three and identified by their id argument. For load_data(), Output(‘x-dropdown’, ‘options’) tells Python to send back the first function output to the options argument of the app layout component where id=“x-dropdown”, which is the dropdown menu defined in part three.

For inputs, it is similar yet slightly more complex. The load_data() function has two arguments, contents and filename. As defined by [Input(‘upload-data’, ‘contents’)], contents are read from the contents value of the app layout component where id=“upload-data”, which is the file upload menu defined in part three. Furthermore, any change applied to the value of contents in the app layout will trigger running the load_data() function and update the outputs defined by the decorator. Meanwhile, the filename argument of the load_data() function is taken from the filename value of the app layout component where id=“upload-data”, as defined by [State(‘upload-data’, ‘filename’)], filename. However, changing the filename value will not trigger running the load_data() function unlike changing the contents value since inputs defined using State() do not trigger running the function unlike those defined using Input(). For the load_data() function, this decorator design decision was made such that the function isn’t needlessly run twice when both the contents and filename values of the data upload component change (which is the usual case when a file with both a filename and contents is uploaded).

@app.callback([Output('x-dropdown', 'options'),
               Output('y-dropdown', 'options'),
               Output('input-data', 'children')],
              [Input('upload-data', 'contents')],
              [State('upload-data', 'filename')])
def load_data(contents, filename):
    """Load data, populate dropdown menus, and temporarily cache data while user is accessing the web app 
    contents: Contents of the user-uploaded file
    filename: Name of the user-uploaded file
    """
    # Check if user has uploaded data
    if contents is not None:
        # Load data
        df = parse_contents(contents, filename)
        # Populate dropdown menus
        columns = [{'label': col, 'value': col} for col in df.columns]
        # Temporarily store input data to hidden table while user is accessing the web app
        df = df.to_json(date_format='iso', 
                        orient='split')
        return columns, columns, df
    # Don't do anything if user has not uploaded data
    else:
        return [], [], ''

#@app.callback([Output('output-section', 'children'),
#               Output('output-chart', 'figure')],
@app.callback(Output('output-section', 'children'),
              [Input('input-data', 'children'),
               Input('x-dropdown', 'value'),
               Input('y-dropdown', 'value')])
def run_anova(df, x_col, y_col):
    """Run ANOVA and display results
    df: The user-uploaded data
    x_col: The name of the input column for the ANOVA analysis
    y_col: The name of the response column for the ANOVA analysis
    """
    # Check if user has uploaded data
    if len(df)>0:
        # Load cached data from hidden table
        df = pd.read_json(df, orient='split')
        try:
            # Run ANOVA
            model = ols(y_col+' ~ C('+x_col+')', data=df).fit()
            aov_table = sm.stats.anova_lm(model, typ=2)
            aov_table = aov_table.reset_index()
            #print(aov_table)

            # Show ANOVA results as plain text
            text_output = str(aov_table)
            
            # Show ANOVA results as a table
            table_output = [render_table(aov_table, id='anova-table')]

            # Show the sum of squares as a bar chart
            #chart_output = render_bar(aov_table, x_col='index', y_col='sum_sq')

            return table_output#, chart_output
        
        except:
            # Tell user to select input and response columns
            text_output = 'Please select valid input and response columns.'
            # Show a blank chart
            #chart_output = render_bar(pd.DataFrame({'x': [0], 'y': [0]}), x_col='x', y_col='y')
            return text_output#, chart_output
    
    # Tell the user to upload data first if they have not yet uploaded data
    else:
        # Tell user to select input and response columns
        text_output = 'Please upload data for analysis first.'
        # Show a blank chart
        #chart_output = render_bar(pd.DataFrame({'x': [0], 'y': [0]}), x_col='x', y_col='y')
        return text_output#, chart_output

Lastly, part five runs the Flask server and specifies the port and the host.

if __name__ == '__main__':
    app.run_server(debug=False, port=8080, host='0.0.0.0')
Share on:

Read more posts about...

See all posts