Dear all, Please be informed that, due to an important technical maintenance, the Gitlab server (git.list.lu) will not be available on Thursday April 22nd, from 9 A.M. to 1 P.M. (Luxembourg Time Zone). Thank you for your understanding.

including evolution example

parent 9fc2351e
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Calculating evolved annotations"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from __future__ import print_function\n",
"from shapely.geometry import LineString\n",
"import pandas as pd\n",
"from IPython.display import display, HTML\n",
"\n",
"#load initial dataset\n",
"annotations ='https://git.list.lu/ELISA/AnnotationDataset/raw/master/Direct%20Maintenance/DataSet_2010/snomedct_original_annot_in_2009.csv'\n",
"original=pd.read_csv(annotations,delimiter=\",\")\n",
"\n",
"\n",
"#load evolved annotations\n",
"evolved = 'https://git.list.lu/ELISA/AnnotationDataset/raw/master/Direct%20Maintenance/DataSet_2010/snomedct_evolved_in_2010.csv'\n",
"evolvedAnnot=pd.read_csv(evolved,delimiter=\",\")\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>kos</th>\n",
" <th>document</th>\n",
" <th>class</th>\n",
" <th>sentence</th>\n",
" <th>startOffset</th>\n",
" <th>endOffset</th>\n",
" <th>prefix</th>\n",
" <th>sufix</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>SNOMED2009</td>\n",
" <td>2630847.nxml</td>\n",
" <td>C0085672</td>\n",
" <td>Microbiology</td>\n",
" <td>43328</td>\n",
" <td>43340</td>\n",
" <td>applied and environmental</td>\n",
" <td>2004 70 7520 7529 15574955</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>SNOMED2009</td>\n",
" <td>2630849.nxml</td>\n",
" <td>C0001687</td>\n",
" <td>adverse effects</td>\n",
" <td>2874</td>\n",
" <td>2889</td>\n",
" <td>and humans at risk for</td>\n",
" <td>during and subsequent to</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>SNOMED2009</td>\n",
" <td>2630851.nxml</td>\n",
" <td>C0023470</td>\n",
" <td>myeloid leukemia</td>\n",
" <td>27071</td>\n",
" <td>27087</td>\n",
" <td>clavularia viridis on human</td>\n",
" <td>(hl-60) cells biochem. biophys.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>SNOMED2009</td>\n",
" <td>2630920.nxml</td>\n",
" <td>C0422392</td>\n",
" <td>screening method</td>\n",
" <td>6574</td>\n",
" <td>6590</td>\n",
" <td>cost-effective multiplex genetic</td>\n",
" <td>in order to determine</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>SNOMED2009</td>\n",
" <td>2630927.nxml</td>\n",
" <td>C0565514</td>\n",
" <td>compression</td>\n",
" <td>12649</td>\n",
" <td>12660</td>\n",
" <td>ring-reinforced ptfe to prevent</td>\n",
" <td>, short-term anticoagulation,</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"HTML(original.head(5).to_html())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>document</th>\n",
" <th>concept</th>\n",
" <th>annotation</th>\n",
" <th>start</th>\n",
" <th>end</th>\n",
" <th>kosAtt</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2630847.nxml</td>\n",
" <td>C0085672</td>\n",
" <td>Microbiology</td>\n",
" <td>43328</td>\n",
" <td>43340</td>\n",
" <td>Microbiology</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2630851.nxml</td>\n",
" <td>C0023470</td>\n",
" <td>Myeloid leukemia</td>\n",
" <td>27071</td>\n",
" <td>27087</td>\n",
" <td>Myeloid leukemia</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2630920.nxml</td>\n",
" <td>C0422392</td>\n",
" <td>screening method</td>\n",
" <td>6574</td>\n",
" <td>6590</td>\n",
" <td>screening method</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2630927.nxml</td>\n",
" <td>C0332459</td>\n",
" <td>Compression</td>\n",
" <td>12649</td>\n",
" <td>12660</td>\n",
" <td>Compression</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2630928.nxml</td>\n",
" <td>C1303150</td>\n",
" <td>disease management programs</td>\n",
" <td>34297</td>\n",
" <td>34324</td>\n",
" <td>Disease management program</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"HTML(evolvedAnnot.head(5).to_html())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Compute the intersection between the offsets\n",
"\n",
"## Using the offset we can group the annotations that occurs in the same part of text"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Microbiology:C0085672 evolvedTo Microbiology:C0085672\n",
"myeloid leukemia:C0023470 evolvedTo Myeloid leukemia:C0023470\n",
"screening method:C0422392 evolvedTo screening method:C0422392\n",
"compression:C0565514 evolvedTo Compression:C0332459\n",
"management:C1273870 evolvedTo disease management programs:C1303150\n",
"stroke:C0038454 evolvedTo stroke:C0038454\n",
"cholelithiasis:C0008350 evolvedTo cholelithiasis:C0008350\n",
"osteoporosis:C0029456 evolvedTo osteoporosis:C0029456\n",
"schizophrenia:C0036341 evolvedTo schizophrenia:C0036341\n",
"vibration:C0455941 evolvedTo vibration:C0459800\n",
"Parkinson's disease:C0030567 evolvedTo Parkinson's disease:C0030567\n",
"nerve injury:C0161479 evolvedTo Nerve injury:C0161479\n",
"myoclonus:C0027066 evolvedTo myoclonus:C0027066\n",
"cardiac arrest:C0018790 evolvedTo cardiac arrest:C0018790\n",
"lower urinary tract:C0447570 evolvedTo problems of the lower urinary tract:C0268818\n",
"occupational therapy:C1318464 evolvedTo occupational therapy:C1318464\n",
"Armed Forces:C0402580 evolvedTo Armed Forces:C0402580\n",
"hysteria:C0020701 evolvedTo hysteria:C0020701\n",
"Optic atrophy:C0029124 evolvedTo Optic atrophy:C0029124\n",
"hydronephrosis:C0020295 evolvedTo Hydronephrosis:C0020295\n",
"small bowel obstruction:C0235329 evolvedTo small bowel obstruction:C0235329\n",
"vertebra:C0447984 evolvedTo vertebra c7:C1306475\n",
"Tympanoplasty:C0585590 evolvedTo Tympanoplasty:C0041447\n",
"attempted suicide:C0038663 evolvedTo attempted suicide:C0038663\n",
"spontaneous abortion:C0000786 evolvedTo Spontaneous abortion:C0000786\n",
"Trichomonas:C0040921 evolvedTo Trichomonas:C0040919\n",
"developmental disorder:C0008073 evolvedTo Developmental disorder:C0008073\n",
"inpatient stay:C0420512 evolvedTo Inpatient stay:C0420512\n",
"Spondylitis:C0038012 evolvedTo Spondylitis:C0038012\n",
"chronic sinusitis:C0149516 evolvedTo Chronic sinusitis:C0149516\n",
"diverticulum:C0012817 evolvedTo Meckel diverticulum:C0025037\n",
"Hydrotherapy:C0020311 evolvedTo Hydrotherapy:C0020311\n",
"Partial colectomy:C0400031 evolvedTo Partial colectomy:C0149750\n",
"postoperative complication:C0032787 evolvedTo Postoperative complication:C0032787\n",
"lymphadenitis:C0024205 evolvedTo Lymphadenitis:C0024205\n",
"Infectious Diseases:C0009450 evolvedTo division of Infectious Diseases:C0587472\n",
"volvulus:C0042961 evolvedTo volvulus:C0042961\n",
"solution:C0525069 evolvedTo Ringer solution:C0073386\n",
"exotropia:C0015310 evolvedTo exotropia:C0015310\n",
"enophthalmos:C0014306 evolvedTo enophthalmos:C0014306\n",
"posterior uveitis:C0042167 evolvedTo posterior uveitis:C0042167\n",
"scarring:C2004491 evolvedTo scarring:C2004491\n",
"neck pain:C0007859 evolvedTo neck pain:C0007859\n",
"hypopituitarism:C0020635 evolvedTo hypopituitarism:C0020635\n",
"synovectomy:C1532181 evolvedTo synovectomy:C0185304\n",
"diverticulosis:C1510475 evolvedTo colonic diverticulosis:C0012819\n",
"punch biopsy of skin:C0191321 evolvedTo punch biopsy of skin:C0191321\n",
"Enthesopathy:C0242490 evolvedTo Enthesopathy:C0242490\n",
"gouty arthritis:C0003868 evolvedTo gouty arthritis:C0003868\n",
"postoperative myocardial infarction:C0589368 evolvedTo postoperative myocardial infarction:C0589368\n",
"Tomography:C0040395 evolvedTo single photon emission computed tomography:C0040399\n",
"Computed Tomography:C0040405 evolvedTo single photon emission computed tomography:C0040399\n",
"depression:C0344315 evolvedTo depression:C1269683\n"
]
}
],
"source": [
"count=0\n",
"for index1, anno1 in original.iterrows():\n",
" for index2, anno2 in evolvedAnnot.iterrows():\n",
" if anno1.document == anno2.document:\n",
" line1 = LineString([(int(anno1.startOffset),int(anno1.startOffset)), (int(anno1.endOffset),int(anno1.endOffset))])\n",
" line2 = LineString([(int(anno2.start),int(anno2.start)), (int(anno2.end),int(anno2.end))])\n",
" if line1.intersection(line2):\n",
" print(str(anno1['sentence'])+':'+str(anno1['class'])+' evolvedTo '+str(anno2['annotation'])+':'+str(anno2['concept']))\n",
" count = count+1"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"53"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"count"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
...@@ -3,3 +3,19 @@ This Dataset is under the license: Attribution-NonCommercial 3.0 Unported (CC BY ...@@ -3,3 +3,19 @@ This Dataset is under the license: Attribution-NonCommercial 3.0 Unported (CC BY
More infomation access: https://creativecommons.org/licenses/by-nc/3.0/ More infomation access: https://creativecommons.org/licenses/by-nc/3.0/
![cc-by-nc-sa](https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc.png) ![cc-by-nc-sa](https://mirrors.creativecommons.org/presskit/buttons/88x31/png/by-nc.png)
The files inside the folder: **Direct Maintenance** correspond to the papers:
Combining rules, background knowledge and change patterns to maintain semantic annotations
https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5977713/
Evolving semantic annotations through multiple versions of controlled medical terminologies
https://link.springer.com/article/10.1007%2Fs12553-018-0261-3
Access the file **Find_Evolved_Annotations** to see how to compute the evolved annotations.
For further questions open an issue.
New annotations and evolution cases are welcome!
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment