% esel98.bib %%%%% 1998 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @INPROCEEDINGS{EKCA98:GP , AUTHOR = { Matthew P. Evett and Taghi M. Khoshgoftar and Pei-Der Chien and Edward B. Allen } , TITLE = { {GP}-Based Software Quality Prediction } , BOOKTITLE = { Genetic Programming 1998: {P}roceedings of the Third Annual Conference } , YEAR = 1998 , EDITOR = { John R. Koza and Wolfgang Banzhaf and Kumar Chellapilla and Kalyanmoym Deb and Marco Dorigo and David B. Fogel and Max H. Garzon and David E. Goldberg and Hitoshi Iba and Rick Riolo } , ORGANIZATION = {AAAI} , MONTH = jul , ADDRESS = {Madison, WI USA} , PUBLISHER = {Morgan Kaufmann} , PAGES = {60--65} , NOTE = {} , KEYWORDS = {} , ANNOTE = {} , INLIBRARY = {} , ISBN = {1-55860-548-7} , ABSTRACT = { } } @INPROCEEDINGS{KA98:ASSET , AUTHOR = {Taghi M.\ Khoshgoftaar and Edward B.\ Allen} , TITLE = { Tutorial: {B}uilding a Corporate Metrics Program for High Quality Software } , BOOKTITLE = { 1998 IEEE Workshop on Application-Specific Software Engineering and Technology } , YEAR = 1998 , EDITOR = {} , ORGANIZATION = IEEECS , MONTH = mar , ADDRESS = {Richardson, TX USA} , PUBLISHER = {} , PAGES = {} , NOTE = {} , KEYWORDS = { software metric, software quality metric, software testing, software life cycle, coupling } , ANNOTE = {} , INLIBRARY = {} , ISBN = {0-8186-8582-4} , ABSTRACT = { } } @INCOLLECTION{KA98:CISE, author = {Taghi M. Khoshgoftaar and Edward B. Allen}, editor = {Witold Pedrycz and James F. Peters}, month = {}, year = 1998, title = {Neural Networks for Software Quality Prediction}, booktitle = {Computational Intelligence in Software Engineering}, series = {Advances in Fuzzy Systems --- Applications and Theory}, volume = 16, chapter = {}, pages = {33--63}, publisher = {World Scientific}, address = {Singapore}, annote = {}, isbn = {981-02-3503-8}, inlibrary = {Khoshgoftaar}, abstract = {Society's reliance on large complex computer systems mandates high reliability. Reliable software is a necessary component. Controlling faults in software requires that one can predict problems early enough to take preventive action. Software metrics are a basis for such predictions. % This study systematically presents a methodology for developing models that predict software quality factors with special emphasis on artificial neural network models. The individual details of this methodology may be familiar, but the whole modeling process must be integrated to produce successful predictions of software quality. We use two case studies to illustrate each step. One case study predicted the number of faults to be discovered in each module, and the other predicted whether each module would be considered {\em fault-prone}. % The first case study was based on a sample of modules from a military command, control and communications system. We developed a quantitative neural network model, and for comparison, we developed a multiple linear regression model on the same data. The neural network approach produced a better quantitative model. % The second case study was based on very large telecommunications system. We modeled modules reused with changes from the previous release, representing over seven million lines of code. We compared a classification neural network model with a nonparametric discriminant model, and found the classification neural network model was more accurate. %}, keywords = {software quality models, software metrics, principal components analysis, multiple linear regression, fault-prone software, discriminant analysis, classification, neural network, backpropagation algorithm, classification, regression analysis, faults, average relative error} } @ARTICLE{KA98:ESE , AUTHOR = { Taghi M. Khoshgoftaar and Edward B. Allen } , TITLE = { Classification of Fault-Prone Software Modules: {P}rior Probabilities, Costs, and Model Evaluation } , JOURNAL = { Empirical Software Engineering: {A}n International Journal } , YEAR = 1998 , MONTH = sep , VOLUME = 3 , NUMBER = 3 , PAGES = {275--298} , NOTE = {} , KEYWORDS = {} , ANNOTE = {} , INLIBRARY = {} , ABSTRACT = { Software quality models can give timely predictions of reliability indicators, for targeting software improvement efforts. In some cases, classification techniques are sufficient for useful software quality models. The software engineering community has not applied informed prior probabilities widely to software quality classification modeling studies. Moreover, even though costs are of paramount concern to software managers, costs of misclassification have received little attention in the software engineering literature. This paper applies informed prior probabilities and costs of misclassification to software quality classification. We also discuss the advantages and limitations of several statistical methods for evaluating the accuracy of software quality classification models. We conducted two full-scale industrial case studies which integrated these concepts with nonparametric discriminant analysis to illustrate how they can be used by a classification technique. The case studies supported our hypothesis that classification models of software quality can benefit by considering informed prior probabilities and by minimizing the expected cost of misclassifications. The case studies also illustrated the advantages and limitations of resubstitution, cross-validation, and data splitting for model evaluation. } } @ARTICLE{KA98:RQSE, author = {Taghi M. Khoshgoftaar and Edward B. Allen}, month = sep, year = 1998, title = {An Information Theoretic Approach to Predicting Software Faults}, journal = {International Journal of Reliability, Quality and Safety Engineering}, volume = 5, number = 3, pages = {227--248}, annote = {}, inlibrary = {}, abstract = {Software measurement and modeling is intended to improve quality by predicting quality factors, such as reliability, early in the life cycle. The field of software measurement generally assumes that attributes of software products early in the life cycle are somehow related to the amount of information in those products, and thus, are related to the quality that eventually results from the development process. Kolmogorov complexity and information theory offer a way to quantify the amount of information in a finite object, such as a program, in a unifying framework. Based on these principles, we propose a new synthetic measure of information composed from a set of conventional primitive metrics in a module. Since not all information is equally relevant to fault-insertion, we also consider components of the overall information content. We present a model for fault-insertion based on a nonhomogeneous Poisson process and Poisson regression. This approach is attractive, because the underlying assumptions are appropriate for software quality data. This approach also gives insight into design attributes that affect fault insertion. A validation case study of a large sample of modules from a very large telecommunications system provides empirical evidence that the components of synthetic module complexity can be useful in software quality modeling. A large telecommunications system is an example of a computer system with rigorous software quality requirements.}, keywords = {Kolmogorov complexity, information theory, Poisson regression, software metrics, software reliability, principal components analysis.} } @INPROCEEDINGS{KA98:ICSM , AUTHOR = { Taghi M. Khoshgoftaar and Edward B. Allen } , TITLE = { Can a Software Quality Model Hit a Moving Target? } , BOOKTITLE = { Proceedings of the International Conference on Software Maintenance } , YEAR = 1998 , ORGANIZATION = IEEECS , MONTH = nov , ADDRESS = {Bethesda, MD USA} , PUBLISHER = {} , PAGES = {68--70} , NOTE = {Position paper.} , KEYWORDS = { } , ANNOTE = {} , INLIBRARY = {} , ISBN = {0-8186-8779-7} , ABSTRACT = { \ldots If the future is not similar to the past, then predictions are impossible. \ldots Measurement of one past release is not enough. \ldots Threats to model accuracy can be detected early. \ldots There is hope for the future. } } @INPROCEEDINGS{KA98:ISSRE , AUTHOR = { Taghi M. Khoshgoftaar and Edward B. Allen } , TITLE = { Predicting the Order of Fault-Prone Modules in Legacy Software } , BOOKTITLE = { Proceedings of the Ninth International Symposium on Software Reliability Engineering } , YEAR = 1998 , ORGANIZATION = IEEECS , MONTH = nov , ADDRESS = {Paderborn, Germany} , PUBLISHER = {} , PAGES = {344--353} , NOTE = {} , KEYWORDS = { software reliability, fault-prone modules, software quality models, module-order models, software reuse, classification, multiple linear regression, principal components analysis } , ANNOTE = {} , INLIBRARY = {} , ISBN = {0-8186-8991-9} , ABSTRACT = { A goal of software quality modeling is to recommend modules for reliability enhancement early enough to prevent poor quality. Reliability improvement techniques include more rigorous design and code reviews and automatic test case generation to support more extensive testing. This paper introduces the concept of module-order models for guiding software reliability enhancement and provides an empirical case study that shows how such models can be used. A {\em module-order model\/} predicts the rank-order of modules according to a quantitative quality factor. % The case study examined two releases of a large legacy telecommunications system. One release was used to build models and the subsequent release was used to evaluate the models. This was a realistic simulation of how such models can be used. Three models with differing sets of independent variables were compared to assess the importance of the amount of development in module-order models. We found that the amount of new and changed code due to development of a release can be a better predictor of the amount of new and changed code due to subsequent bug fixes, compared to software product metrics alone. In such projects, process-related measures derived from configuration management data may be adequate for software quality modeling, without resorting to software product measurement tools and expertise. } } @INPROCEEDINGS{KA98:WESS, author = {Taghi M. Khoshgoftaar and Edward B. Allen}, month = nov, year = 1998, title = {Predicting Software Modules That Will Need Rework}, booktitle = {Proceedings: Third Annual Workshop on Empirical Studies of Software Maintenance}, pages = {49--50}, note = {Position paper. Available at \verb+ www.cs.umd.edu/~sharip/wess+.}, address = {Bethesda, Maryland USA}, organization = IEEECS, keywords = {Software Maintenance} } @ARTICLE{KAHTF98:Comp , AUTHOR = { Taghi M. Khoshgoftaar and Edward B. Allen and Robert Halstead and Gary P. Trio and Ronald Flass } , TITLE = { Process Measures for Predicting Software Quality} , JOURNAL = COMP , YEAR = 1998 , MONTH = apr , VOLUME = 31 , NUMBER = 4 , PAGES = {66--72} , NOTE = {} , KEYWORDS = { software process , software reuse , spiral life cycle , software quality modeling , logistic regression } , ANNOTE = {} , INLIBRARY = LIBCOMP , ABSTRACT = { Many systems require high assurance of software reliability. A software-quality model can make timely predictions of reliability indicators. These enable one to improve software-development processes by targeting reliability-improvement techniques more effectively and efficiently. Software reliability is essential for tactical military systems, such as the Joint Surveillance Target Attack Radar System, {\sc jstars}. It is an embedded application, which performs real time detection, location, classification, and tracking of moving and fixed objects on the ground. This paper presents a case study of a large subsystem of \textsc{jstars} to improve integration and testing. The dependent variable of a logistic-regression model was the class of a module: {\em not fault-prone} or {\em fault-prone}. Measures of the process history of each module were independent variables. The case study supports our hypothesis that the likelihood of discovering additional faults during integration and test can be usefully modeled as a function of the module history prior to integration. This history is readily available by combining data from the project's configuration-management system and problem-reporting system. } } @INPROCEEDINGS{KAJH98:ASSET , AUTHOR = { Taghi M. Khoshgoftaar and Edward B. Allen and Wendell D. Jones and John P. Hudepohl } , TITLE = { Return on Investment of Software Quality Models } , BOOKTITLE = { Proceedings 1998 IEEE Workshop on Application-Specific Software Engineering and Technology } , YEAR = 1998 , ORGANIZATION = IEEECS , MONTH = mar , ADDRESS = {Richardson, TX USA} , PUBLISHER = {} , PAGES = {145--150} , NOTE = {} , KEYWORDS = {} , ANNOTE = {} , INLIBRARY = {} , ISBN = {0-8186-8582-4} , ABSTRACT = { Software quality classification models can be used to target reliability enhancement efforts toward high risk modules. We summarize a generalized classification rule which we have proposed. Cost aspects of a software quality classification model are discussed. The contribution of this paper is a demonstration of how to assess the return on investment of model accuracy, in the context of a software quality classification model. An industrial case study of a very large telecommunications system illustrates the method. The dependent variable of the model was the probability that a module will have faults discovered by customers. The independent variables were software product and process metrics. The model is compared to random selection of modules for reliability enhancement. Calculation of return on investment can guide selection of the generalized-classification rule's parameter so that the model is well-suited to the project. } } @INPROCEEDINGS{KANJH98:RQD , AUTHOR = { Taghi M. Khoshgoftaar and Edward B. Allen and Archana Naik and Wendell D. Jones and John P. Hudepohl } , TITLE = { Modeling Software Quality with Classification Trees } , BOOKTITLE = { Proceedings of the Fourth ISSAT International Conference on Reliability and Quality in Design } , YEAR = 1998 , EDITOR = {Hoang Pham and Ming-Wei Lu} , ORGANIZATION = {International Society of Science and Applied Technologies } , MONTH = aug , ADDRESS = {Seattle WA} , PUBLISHER = {} , PAGES = {178--182} , NOTE = {} , KEYWORDS = {} , ANNOTE = {} , INLIBRARY = {Khoshgoftaar} , ISBN = {0-9639998-3-4} , ABSTRACT = { This paper introduces the Classification And Regression Trees, \textsc{cart}, modeling technique as a method for identifying fault-prone software modules. In contrast to other classification-tree algorithms, the {\sc cart} algorithm first builds as large a tree as possible, which typically overfits the data, and then prunes the tree to an appropriate size for good predictive accuracy. A classification tree also facilitates interpretation of software product metrics to characterize the fault-prone class. A case study applied \textsc{cart} to a very large legacy telecommunications system. We developed a classification-tree model using software product metrics to predict whether or not each module in a similar project or a subsequent release would be considered fault-prone. Model results could be used to identify those modules that would probably benefit from extra reviews and testing, and thus, reduce the risk of discovering faults after release. Identifying fault-prone modules early in the development can lead to better reliability. } } @INPROCEEDINGS{KANJH98:HASE , AUTHOR = { Taghi M. Khoshgoftaar and Edward B. Allen and Archana Naik and Wendell D. Jones and John P. Hudepohl } , TITLE = { Using Classification Trees for Software Quality Models: {L}essons Learned } , BOOKTITLE = { Proceedings of the Third IEEE International High-Assurance Systems Engineering Symposium } , YEAR = 1998 , ORGANIZATION = IEEECS , MONTH = nov , ADDRESS = {Bethesda, MD USA} , PUBLISHER = {} , PAGES = {82--89} , NOTE = {} , KEYWORDS = {} , ANNOTE = {} , INLIBRARY = {} , ISBN = {0-8186-9221-9} , ABSTRACT = { High software reliability is an important attribute of high-assurance systems. We focus on the absence of faults discovered by users as an indicator of reliability. Software quality models are tools for focusing efforts to find faults early in development. Such models yield timely predictions of reliability indicators on a module-by-module basis, enabling one to target reliability enhancement techniques. For example, Enhanced Measurement for Early Risk Assessment of Latent Defects (\textsc{emerald}) is a system at Nortel which provides software quality models that predict which modules are likely to be fault-prone. This paper introduces the Classification And Regression Trees (\textsc{cart}) algorithm to practitioners in high-assurance systems engineering. The contribution of this paper is practical lessons-learned on building classification trees for software quality modeling, including an innovative way to control the balance between misclassification rates. A case study of a very large telecommunications system used \textsc{cart} to build software quality models. The models predicted whether or not modules were fault-prone, based on various sets of software product and process metrics as independent variables. We found that a model based on two software product metrics had comparable accuracy to a model based on forty product and process metrics. The results of this paper are being refined and further evaluated for the \textsc{emerald} system. } } @INCOLLECTION{KEAC98:CISE, author = {Taghi M. Khoshgoftaar and Matthew P. Evett and Edward B. Allen and Pei-Der Chien}, editor = {Witold Pedrycz and James F. Peters}, month = {}, year = 1998, title = {An Application of Genetic Programming to Software Quality Prediction}, booktitle = {Computational Intelligence in Software Engineering}, series = {Advances in Fuzzy Systems --- Applications and Theory}, volume = 16, chapter = {}, pages = {176--195~}, publisher = {World Scientific}, address = {Singapore}, annote = {}, isbn = {981-02-3503-8}, inlibrary = {Khoshgoftaar}, abstract = {Because highly reliable software is becoming an essential ingredient in many systems, software developers apply various techniques to discover faults early in development, such as more rigorous reviews, more extensive testing, and strategic assignment of key personnel. Our goal is to target reliability enhancement activities to those modules that are most likely to have problems. % This paper presents a methodology that incorporates genetic programming for predicting the order of software modules based on the expected number of faults. This is the first application of genetic programming to software engineering that we know of. % We found that genetic programming can be used to generate software quality models whose inputs are software metrics collected earlier in development, and whose output is a prediction of the number of faults that will be discovered later in development or during operations. % We established ordinal evaluation criteria for models, and conducted an industrial case study of software from a military communications system. Case study results were sufficiently good to be useful to a project for choosing modules for extra reliability enhancement treatment.}, keywords = {} } @MASTERSTHESIS{Naik98:MS , AUTHOR = {Archana Naik} , TITLE = { Prediction of Software Quality Using Classification Tree Modeling } , SCHOOL = {Florida Atlantic University} , YEAR = 1998 , ADDRESS = {Boca Raton, FL USA} , MONTH = dec , NOTE = {Advised by Taghi M.\ Khoshgoftaar.} , KEYWORDS = {} , ANNOTE = {} , INLIBRARY = {} , ABSTRACT = { } } @INPROCEEDINGS{SK98:HASE , AUTHOR = { Donald F. Schenker and Taghi M. Khoshgoftaar } , TITLE = { The Application of Fuzzy-Enhanced Case-Based Reasoning for Identifying Fault-Prone Modules } , BOOKTITLE = { Proceedings of the Third IEEE International High-Assurance Systems Engineering Symposium } , YEAR = 1998 , ORGANIZATION = IEEECS , MONTH = nov , ADDRESS = {Bethesda, MD USA} , PUBLISHER = {} , PAGES = {90--97} , NOTE = {} , KEYWORDS = {} , ANNOTE = {} , INLIBRARY = {} , ISBN = {0-8186-9221-9} , ABSTRACT = { } }