{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mean=50.303 stdv=4.426\n" ] } ], "source": [ "# From: https://machinelearningmastery.com/a-gentle-introduction-to-normality-tests-in-python/\n", "# generate gaussian data\n", "from numpy.random import seed\n", "from numpy.random import randn\n", "from numpy import mean\n", "from numpy import std\n", "# seed the random number generator\n", "seed(1)\n", "# generate univariate observations\n", "data = 5 * randn(100) + 50\n", "# summarize\n", "print('mean=%.3f stdv=%.3f' % (mean(data), std(data)))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAD8CAYAAABw1c+bAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAEZBJREFUeJzt3X+MZWV9x/H3p/wwKdICMiAC61pLsGjKaqarhtTwoyAsRLSxLcRaajGrRhtNTCu2iRhtE0yj9AdGssIKWkRbFSVlBTZogib+YBYXBYFC6VrG3bKLIEi1Navf/jFn4+xw7+54z925A8/7ldzcc57znPt85+TMZ07OnHtOqgpJUjt+ZdIFSJKWlsEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5Jasz+ky5gkMMPP7xWrlw56TIk6Slj06ZND1fV1GL6LsvgX7lyJTMzM5MuQ5KeMpJ8b7F9PdUjSY0x+CWpMQa/JDXG4Jekxhj8ktSYvQZ/kmOTfDnJ3UnuSvL2rv2wJBuT3Ne9Hzpk/Qu6PvcluWDcP4Ak6ZezmCP+ncA7q+q3gJcBb01yAnARcEtVHQfc0s3vJslhwMXAS4HVwMXD/kBIkpbGXoO/qrZV1e3d9I+Au4GjgXOBq7tuVwOvHrD6K4GNVfVIVT0KbATOHEfhkqTR/FLn+JOsBF4MfAM4sqq2wdwfB+CIAascDTw4b362a5MkTciiv7mb5JnAZ4F3VNXjSRa12oC2gU93T7IWWAuwYsWKxZYlLamVF90wkXG3XHL2RMbV09OijviTHMBc6F9TVZ/rmh9KclS3/Chg+4BVZ4Fj580fA2wdNEZVrauq6aqanppa1O0mJEkjWMxVPQGuBO6uqg/NW3Q9sOsqnQuALwxY/SbgjCSHdv/UPaNrkyRNyGKO+E8CXg+cmmRz91oDXAKcnuQ+4PRuniTTSa4AqKpHgPcDt3Wv93VtkqQJ2es5/qr6KoPP1QOcNqD/DPDGefPrgfWjFihJGi+/uStJjTH4JakxBr8kNcbgl6TGGPyS1Jhl+cxdaW8m9Q1a6enAI35JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4Jakxe71XT5L1wDnA9qp6Udf2aeD4rsshwA+ratWAdbcAPwJ+Buysqukx1S1JGtFibtJ2FXAZ8PFdDVX1R7umk3wQeGwP659SVQ+PWqAkabwW88zdW5OsHLQsSYA/BE4db1mSpH2l7zn+3wUeqqr7hiwv4OYkm5Ks7TmWJGkM+t6P/3zg2j0sP6mqtiY5AtiY5J6qunVQx+4Pw1qAFStW9CxLkjTMyEf8SfYHfh/49LA+VbW1e98OXAes3kPfdVU1XVXTU1NTo5YlSdqLPqd6fg+4p6pmBy1MclCSg3dNA2cAd/YYT5I0BnsN/iTXAl8Djk8ym+TCbtF5LDjNk+Q5STZ0s0cCX01yB/BN4IaqunF8pUuSRrGYq3rOH9L+pwPatgJruukHgBN71idJGjMftq6R+cBz6anJWzZIUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhqzmEcvrk+yPcmd89rem+T7STZ3rzVD1j0zyb1J7k9y0TgLlySNZjFH/FcBZw5ov7SqVnWvDQsXJtkP+DBwFnACcH6SE/oUK0nqb6/BX1W3Ao+M8Nmrgfur6oGq+inwKeDcET5HkjRGfc7xvy3Jt7tTQYcOWH408OC8+dmubaAka5PMJJnZsWNHj7IkSXsyavB/BHg+sArYBnxwQJ8MaKthH1hV66pquqqmp6amRixLkrQ3IwV/VT1UVT+rqp8DH2XutM5Cs8Cx8+aPAbaOMp4kaXxGCv4kR82bfQ1w54ButwHHJXlekgOB84DrRxlPkjQ++++tQ5JrgZOBw5PMAhcDJydZxdypmy3Am7q+zwGuqKo1VbUzyduAm4D9gPVVddc++SkkSYu21+CvqvMHNF85pO9WYM28+Q3Aky71lCRNjt/claTGGPyS1BiDX5IaY/BLUmMMfklqzF6v6pE0eSsvumFiY2+55OyJja19wyN+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMXsN/iTrk2xPcue8tr9Lck+Sbye5LskhQ9bdkuQ7STYnmRln4ZKk0SzmiP8q4MwFbRuBF1XVbwP/Drx7D+ufUlWrqmp6tBIlSeO01+CvqluBRxa03VxVO7vZrwPH7IPaJEn7wDjO8f8Z8MUhywq4OcmmJGv39CFJ1iaZSTKzY8eOMZQlSRqkV/An+WtgJ3DNkC4nVdVLgLOAtyZ5xbDPqqp1VTVdVdNTU1N9ypIk7cHIwZ/kAuAc4HVVVYP6VNXW7n07cB2wetTxJEnjMVLwJzkTeBfwqqr68ZA+ByU5eNc0cAZw56C+kqSls5jLOa8FvgYcn2Q2yYXAZcDBwMbuUs3Lu77PSbKhW/VI4KtJ7gC+CdxQVTfuk59CkrRoe33mblWdP6D5yiF9twJruukHgBN7VSdJGjsfti5pjyb1oHcf8r7veMsGSWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjFhX8SdYn2Z7kznlthyXZmOS+7v3QIete0PW5r3tAuyRpghZ7xH8VcOaCtouAW6rqOOCWbn43SQ4DLgZeCqwGLh72B0KStDQWFfxVdSvwyILmc4Gru+mrgVcPWPWVwMaqeqSqHgU28uQ/IJKkJdTnHP+RVbUNoHs/YkCfo4EH583Pdm1PkmRtkpkkMzt27OhRliRpT/b1P3czoK0GdayqdVU1XVXTU1NT+7gsSWpXn+B/KMlRAN379gF9ZoFj580fA2ztMaYkqac+wX89sOsqnQuALwzocxNwRpJDu3/qntG1SZImZLGXc14LfA04PslskguBS4DTk9wHnN7Nk2Q6yRUAVfUI8H7gtu71vq5NkjQh+y+mU1WdP2TRaQP6zgBvnDe/Hlg/UnWSpLHzm7uS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMaMHPxJjk+yed7r8STvWNDn5CSPzevznv4lS5L6WNQTuAapqnuBVQBJ9gO+D1w3oOtXquqcUceRJI3XuE71nAb8R1V9b0yfJ0naR8YV/OcB1w5Z9vIkdyT5YpIXjmk8SdKIegd/kgOBVwH/OmDx7cBzq+pE4J+Az+/hc9YmmUkys2PHjr5lSZKGGMcR/1nA7VX10MIFVfV4VT3RTW8ADkhy+KAPqap1VTVdVdNTU1NjKEuSNMg4gv98hpzmSfLsJOmmV3fj/WAMY0qSRjTyVT0ASX4VOB1407y2NwNU1eXAa4G3JNkJ/AQ4r6qqz5iSpH56BX9V/Rh41oK2y+dNXwZc1mcMSdJ4+c1dSWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMb3u1aPlYeVFN0y6BElPIR7xS1JjDH5JaozBL0mNMfglqTEGvyQ1pnfwJ9mS5DtJNieZGbA8Sf4xyf1Jvp3kJX3HlCSNblyXc55SVQ8PWXYWcFz3einwke5dkjQBS3Gq51zg4zXn68AhSY5agnElSQOMI/gLuDnJpiRrByw/Gnhw3vxs1yZJmoBxnOo5qaq2JjkC2Jjknqq6dd7yDFinFjZ0fzTWAqxYsWIMZUl6KpvkN9K3XHL2xMZeCr2P+Ktqa/e+HbgOWL2gyyxw7Lz5Y4CtAz5nXVVNV9X01NRU37IkSUP0Cv4kByU5eNc0cAZw54Ju1wN/0l3d8zLgsara1mdcSdLo+p7qORK4Lsmuz/pkVd2Y5M0AVXU5sAFYA9wP/Bh4Q88xJUk99Ar+qnoAOHFA++Xzpgt4a59xJEnj4zd3JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNGcfD1peVST2g+en+cGapJU/3HBn5iD/JsUm+nOTuJHclefuAPicneSzJ5u71nn7lSpL66nPEvxN4Z1Xd3j1wfVOSjVX13QX9vlJV5/QYR5I0RiMf8VfVtqq6vZv+EXA3cPS4CpMk7Rtj+edukpXAi4FvDFj88iR3JPlikheOYzxJ0uh6/3M3yTOBzwLvqKrHFyy+HXhuVT2RZA3weeC4IZ+zFlgLsGLFir5lSZKG6HXEn+QA5kL/mqr63MLlVfV4VT3RTW8ADkhy+KDPqqp1VTVdVdNTU1N9ypIk7UGfq3oCXAncXVUfGtLn2V0/kqzuxvvBqGNKkvrrc6rnJOD1wHeSbO7a/gpYAVBVlwOvBd6SZCfwE+C8qqoeY0qSeho5+Kvqq0D20ucy4LJRx5AkjZ+3bJCkxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUmKfdM3cnZVLP6JSkX5ZH/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxfR+2fmaSe5Pcn+SiAcufkeTT3fJvJFnZZzxJUn99Hra+H/Bh4CzgBOD8JCcs6HYh8GhV/SZwKfCBUceTJI1HnyP+1cD9VfVAVf0U+BRw7oI+5wJXd9OfAU5Lssfn9EqS9q0+wX808OC8+dmubWCfqtoJPAY8q8eYkqSe+tyyYdCRe43QZ65jshZY280+keTeHrWN0+HAw5MuYhlxe+zO7bE7t8fufqntkX4nw5+72I59gn8WOHbe/DHA1iF9ZpPsD/w68MigD6uqdcC6HvXsE0lmqmp60nUsF26P3bk9duf22N1y3R59TvXcBhyX5HlJDgTOA65f0Od64IJu+rXAl6pq4BG/JGlpjHzEX1U7k7wNuAnYD1hfVXcleR8wU1XXA1cCn0hyP3NH+ueNo2hJ0uh63Za5qjYAGxa0vWfe9P8Cf9BnjGVg2Z1+mjC3x+7cHrtze+xuWW6PeOZFktriLRskqTEG/wJJ9kvyrST/1s0/r7vdxH3d7ScOnHSNS2nA9rgqyX8m2dy9Vk26xqWSZEuS73Q/90zXdliSjd3+sTHJoZOuc6kM2R7vTfL9efvHmknXuVSSHJLkM0nuSXJ3kpcv1/3D4H+ytwN3z5v/AHBpVR0HPMrcbShasnB7APxFVa3qXpsnUdQEndL93Lsu0bsIuKXbP27p5luycHvA3O/Lrv1jw9A1n37+Abixql4AnMjc782y3D8M/nmSHAOcDVzRzQc4lbnbTcDc7SdePZnqlt7C7aGB5t+WpKn9Q7+Q5NeAVzB3JSNV9dOq+iHLdP8w+Hf398BfAj/v5p8F/LC73QQMvi3F09nC7bHL3yb5dpJLkzxjAnVNSgE3J9nUfdMc4Miq2gbQvR8xseqW3qDtAfC2bv9Yv1xObSyB3wB2AB/rTo1ekeQglun+YfB3kpwDbK+qTfObB3Rt4jKoIdsD4N3AC4DfAQ4D3rXUtU3QSVX1EubuSPvWJK+YdEETNmh7fAR4PrAK2AZ8cIL1LaX9gZcAH6mqFwP/wzI5rTOIwf8LJwGvSrKFuTuNnsrcEe8h3e0mYPBtKZ6unrQ9kvxzVW2rOf8HfIy5u7Q2oaq2du/bgeuY+9kfSnIUQPe+fXIVLq1B26OqHqqqn1XVz4GP0s7+MQvMVtU3uvnPMPeHYFnuHwZ/p6reXVXHVNVK5r5h/KWqeh3wZeZuNwFzt5/4woRKXFJDtscfz9uJw9z5yjsnWOaSSXJQkoN3TQNnMPezz78tSTP7x7DtsWv/6LyGRvaPqvpv4MEkx3dNpwHfZZnuH72+uduIdwGfSvI3wLfo/nnTsGuSTDF3Gmwz8OYJ17NUjgSu6x4nsT/wyaq6McltwL8kuRD4L57631RfrGHb4xPdJb4FbAHeNLkSl9yfM/f7cSDwAPAG5g6ul93+4Td3JakxnuqRpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNeb/AewD/UIL3FA3AAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from matplotlib import pyplot\n", "# histogram plot\n", "pyplot.hist(data)\n", "pyplot.show()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from statsmodels.graphics.gofplots import qqplot\n", "# q-q plot\n", "qqplot(data, line='s')\n", "pyplot.show()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Statistics=0.992, p=0.822\n", "Sample looks Gaussian (fail to reject H0)\n" ] } ], "source": [ "# Shapiro-Wilk Test\n", "from scipy.stats import shapiro\n", "# normality test\n", "stat, p = shapiro(data)\n", "print('Statistics=%.3f, p=%.3f' % (stat, p))\n", "# interpret\n", "alpha = 0.05\n", "if p > alpha:\n", "\tprint('Sample looks Gaussian (fail to reject H0)')\n", "else:\n", "\tprint('Sample does not look Gaussian (reject H0)')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Statistics=0.102, p=0.950\n", "Sample looks Gaussian (fail to reject H0)\n" ] } ], "source": [ "# D'Agostino and Pearson's Test\n", "from scipy.stats import normaltest\n", "# normality test\n", "stat, p = normaltest(data)\n", "print('Statistics=%.3f, p=%.3f' % (stat, p))\n", "# interpret\n", "alpha = 0.05\n", "if p > alpha:\n", "\tprint('Sample looks Gaussian (fail to reject H0)')\n", "else:\n", "\tprint('Sample does not look Gaussian (reject H0)')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(0.9756130576133728, 0.937462329864502)\n", "(0.9822366833686829, 0.6493431925773621)\n", "(0.9902752041816711, 0.6868489384651184)\n", "(0.9980342984199524, 0.297299861907959)\n" ] } ], "source": [ "# From: https://medium.com/@rrfd/testing-for-normality-applications-with-python-6bf06ed646a9\n", "# Shapiro-Wilk Test\n", "from scipy.stats import shapiro\n", "from scipy.stats import norm\n", "# Create the random variables with mean 5, and sd 3\n", "x_10 = norm.rvs(loc=5, scale=3, size=10)\n", "x_50 = norm.rvs(loc=5, scale=3, size=50)\n", "x_100 = norm.rvs(loc=5, scale=3, size=100)\n", "x_1000 = norm.rvs(loc=5, scale=3, size=1000)\n", "# Print the p values\n", "print(shapiro(x_10))\n", "print(shapiro(x_50))\n", "print(shapiro(x_100))\n", "print(shapiro(x_1000))\n", "# (The test statistic, the p-value) for x_N above\n", "# (0.9122101664543152, 0.2965205907821655) # Null Accepted\n", "# (0.9418673515319824, 0.015981076285243034) # Null Rejected\n", "# (0.9918511509895325, 0.810341477394104) # Null Accepted\n", "# (0.9987027645111084, 0.6903988718986511) Null Accepted" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(0.9467288255691528, 7.196542352923968e-14)\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "from sklearn.datasets.samples_generator import make_blobs\n", "X, y_true = make_blobs(n_samples=300, centers=4,\n", " cluster_std=0.60, random_state=0)\n", "plt.scatter(X[:, 0], X[:, 1], s=50);\n", "from scipy.stats import shapiro\n", "from scipy.stats import norm\n", "stat, p = shapiro(X)\n", "print(shapiro(X))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }