sustainableanna.blogg.se - Transcriptic robotize

Statement_sequences = filterOutLongEquallyLabeledSequences(statement_sequences) Print('Finding similar sequences of statements.', end=' ') Print('Building reverse hash for reporting. Report.startTimer('Marking similar statements')Ĭlusterize(hash_to_statement, clusters_map) Print('Choosing pattern for each statement.', end=' ') Print('Marking each statement with its hash value')Ĭlusters_map = build_unifiers(hash_to_statement) If arguments.clusterize_using_dcup or arguments.clusterize_using_hash: Print('Number of different hash values: ', len(hash_to_statement))

Hash_to_statement = build_hash_to_statement(dcup_hash = True) Hash_to_statement = build_hash_to_statement(dcup_hash = False) Report.startTimer('Building statement hash') Print('Building statement hash.', end=' ') Print('Calculating size for each statement.', end=' ') Print('Number of statements: ', statement_count) # return a.getLevel()._cmp_(b.getLevel()) Pairs_sequences.append(pair_sequences.subSequence(first+n, pair_sequences.getLength() - first - n)) Pairs_sequences.append(pair_sequences.subSequence(0, first-1)) New_pairs_sequences = all_pairsubsequences_size_n_threshold(n)įor (candidate_sequence, first) in new_pairs_sequences:ĭistance = candidate_sequence.calcDistance() Size = new_pair_sequences.getMaxCoveredLineNumbersCount() New_pair_sequences = pair_sequences.subSequence(first, n) Return ) for (s1,s2) in suffix_tree_instance.getBestMaxSubstrings(arguments.size_threshold, f, f_elem)]ĭef all_pairsubsequences_size_n_threshold(n):įor first in range(0, pair_sequences.getLength()-n+1): Suffix_tree_instance = suffix_tree.SuffixTree(fcode) Return StatementSequence(x).getCoveredLineNumbersCount() Print('consists of many similar statements.') Print('Warning: sequence of statements starting at %s:%d'%(first_statement.getSourceFile().getFileName(), min(first_statement.getCoveredLineNumbers()))) # assert(new_u.getSubstitutions().getSize() = 0)įor i in range(first_statement_index, i):įirst_statement = sequence New_u = Unifier(cluster.getUnifierTree(), statement) # therefore it will work correct even if unifiers are smaller than hashing depth value # clusters_map contain hash values for statements, not unifiers If bestcluster = None or mincost > arguments.clustering_threshold:ĭef clusterize(hash_to_statement, clusters_map): Print('%d,' %(processed_statements_count,), end=' ') If verbose and ((processed_statements_count % 1000) = 0): H = statement.getDCupHash(arguments.hashing_depth) Use -force to override this restriction.')ĭef build_hash_to_statement(dcup_hash = True):įor statement_sequence in statement_sequences: Print('It starts at %s:%d.'%(first_statement.getSourceFile().getFileName(), min(first_statement.getCoveredLineNumbers()))) Print('Warning: sequences of statements, consists of %d elements is too long.' %(len(sequence),)) Sequences_without_restriction = statement_sequencesįor sequence in sequences_without_restriction: Print('maximum sequence length: %d' % (max_seq_length,)) Print('average sequence length: %f' % (avg_seq_length,)) Print('Input is empty or the size of the input is below the size threshold')Īvg_seq_length = old_div(sum(sequences_lengths),float(n_sequences)) Sequences = source_file.getTree().getAllStatementSequences() Source File: clone_detection_algorithm.py def findDuplicateCode(source_files, report):