@inproceedings{f442331fe77447c1bda7143a2f7eca3e,
title = "A DNA index structure using frequency and position information of genetic alphabet",
abstract = "Exact match queries, wildcard match queries, and k-mismatch queries are widely used in lots of molecular biology applications including the searching of ESTs (Expressed Sequence Tag) and DNA transcription factors. In this paper, we suggest an efficient indexing and processing mechanism for such queries. Our indexing method places a sliding window at every possible location of a DNA sequence and extracts its signature by considering the occurrence frequency of each nucleotide. It then stores a set of signatures using a multi-dimensional index, such as the R*-tree. Also, by assigning a weight to each position of a window, it prevents signatures from being concentrated around a few spots in indexing space. Our query processing method converts a query sequence into a multi-dimensional rectangle and searches the index for the signatures overlapped with the rectangle.",
author = "Kim, {Woo Cheol} and Sanghyun Park and Won, {Jung Im} and Kim, {Sang Wook} and Yoon, {Jee Hee}",
year = "2005",
doi = "10.1007/11430919_21",
language = "English",
isbn = "3540260765",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "162--172",
booktitle = "Advances in Knowledge Discovery and Data Mining - 9th Pacific-Asia Conference, PAKDD 2005, Proceedings",
address = "Germany",
note = "9th Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining, PAKDD 2005 ; Conference date: 18-05-2005 Through 20-05-2005",
}