Monday, May 4, 2020

HASH searches.

# Search 150K lines(in a file) from 1 million lines (in a file)

BASH (N/A)
N/A
PYTHON (2 seconds)
# Sets are using hash. #!/bin/python with open('150k-lines') as file1, open('one-mil-lines') as file2: a_lines = set(file1.read().splitlines()) b_lines = set(file2.read().splitlines()) for line in a_lines: if not line in b_lines: print "NOT OK %s" %(line) else: print "OK %s" %(line)
GOLANG (2.95 seconds)
package main import ( "bufio" "fmt" "os" "log" ) func main() { readFile1, err := os.Open("150k-lines") readFile2, err := os.Open("one-mil-lines") // readFile1, err := os.Open("testfile1") // readFile2, err := os.Open("testfile2") if err != nil { log.Fatalf("failed to open file: %s", err) } fileScanner1 := bufio.NewScanner(readFile1) fileScanner2 := bufio.NewScanner(readFile2) fileScanner1.Split(bufio.ScanLines) fileScanner2.Split(bufio.ScanLines) var fileTextLines1, fileTextLines2 []string var key string var value int var ok bool hash1 := make(map[string]int) hash2 := make(map[string]int) linenum1 := 1 linenum2 := 1 for fileScanner1.Scan() { fileTextLines1 = append(fileTextLines1, fileScanner1.Text()) hash1[fileScanner1.Text()] = linenum1 linenum1++ } for fileScanner2.Scan() { fileTextLines2 = append(fileTextLines2, fileScanner2.Text()) hash2[fileScanner2.Text()] = linenum2 linenum2++ } readFile1.Close() readFile2.Close() for key, value = range hash1 { if value, ok = hash2[key]; ok { fmt.Println(key, value) } } }
C (3.45 seconds)
/* * You must download hashmap.h and hashmap.c from THIS LINK */ #include #include #include #include #include "hashmap.h" #define KEY_MAX_LENGTH (256) #define KEY_PREFIX ("somekey") typedef struct data_struct_s { char key_string[KEY_MAX_LENGTH]; int number; } data_struct_t; int main() { int error; map_t mymap; data_struct_t* value; mymap = hashmap_new(); char key_string[KEY_MAX_LENGTH]; FILE *file = fopen("150k-lines", "r"); FILE *fileOne = fopen("one-mil-lines", "r"); int KEY_COUNT = 0; char lineinF[256]; while(fgets(lineinF, sizeof lineinF, fileOne)!=NULL) { if(lineinF[0] != '\n') { value = malloc(sizeof(data_struct_t)); lineinF[strlen(lineinF) - 1] = '\0'; snprintf(value->key_string, KEY_MAX_LENGTH, "%s", lineinF); value->number = KEY_COUNT; error = hashmap_put(mymap, value->key_string, value); assert(error==MAP_OK); KEY_COUNT++; } } fclose(fileOne); //printf("%d\n",KEY_COUNT); //fclose(file); //hashmap_free(mymap); //return 0; while(fgets(lineinF, sizeof lineinF, file)!=NULL) { if(lineinF[0] != '\n') { lineinF[strlen(lineinF) - 1] = '\0'; snprintf(key_string, KEY_MAX_LENGTH, "%s", lineinF); error = hashmap_get(mymap, key_string, (void**)(&value)); if(error == 0) printf("OK - %s\n",lineinF); else printf("NOT OK - %s\n",lineinF); } } fclose(file); /* Destroy the map */ hashmap_free(mymap); return 1; }

Saturday, May 2, 2020

Data structure searches

# Search 150K lines(in a file) from 1 million lines (in a file)

BASH (N/A)
N/A
PYTHON (2 seconds)
#!/bin/python with open('150k-lines') as file1, open('one-mil-lines') as file2: a_lines = set(file1.read().splitlines()) b_lines = set(file2.read().splitlines()) for line in a_lines: if not line in b_lines: print "NOT OK %s" %(line) else: print "OK %s" %(line)
GOLANG (N/A)
No code yet.
C (600 milliseconds)
#include <stdio.h> #include <stdlib.h> #include <string.h> // Binary Search function int b_s(char *inArray[], int size, char *searchMe){ // Get start/mid/end of the array. int start = 0; int mid; int end = size - 1; // While start is less than end, set mid and search string. while(start <= end){ mid = (start + end)/2; if (strcmp(inArray[mid], searchMe) == 0){ // Found, return 0 return 0; } else if (strcmp(inArray[mid], searchMe) > 0){ // Not found and based on size reduce one from end to search. end = mid - 1; } else if (strcmp(inArray[mid], searchMe) < 0){ // Not found and based on size add one to start to search. start = mid + 1; } } // no result so return 1 return 1; } // Main. int main() { // Open files FILE *file = fopen("150k-lines", "r"); FILE *fileOne = fopen("one-mil-lines", "r"); // Set static array of 20 Mil. static char *sArray[20000000] = {0}; char line[256]; long int i = 0; // Store each line in one-mil-lines to an array. while(fgets(line, sizeof line, fileOne)!=NULL) { line[strlen(line) - 1] = '\0'; sArray[i]=strdup(line); i++; } // Set exit code to 0 int ext_code = 0; char ln[256]; // For each line in 150k-lines, perform binary search, // And print result. while(fgets(ln, sizeof ln, file) != NULL) { ln[strlen(ln) - 1] = '\0'; int f = b_s(sArray,i,ln); if(f == 0) { printf("OK - %d - %s\n",f,ln); } else { printf("NOT OK - %d, %s\n",f,ln); ++ext_code; } } // Exit. return ext_code; }

Linear Search

# Search 150K lines(in a file) from 1 million lines (in a file)

BASH (236 Minutes)
#!/bin/bash for LINE in $(cat 150k-lines); do grep -x -w $LINE one-mil-lines > /dev/null 2>&1 if [[ $? -eq 0 ]]; then echo "OK -- $LINE" else echo "NOT OK -- $LINE" fi done
PYTHON (32 Minutes)
#!/usr/bin/python file = open("150k-lines", "r") fileline = file.readlines() fileone = open("one-mil-lines", "r") milline = fileone.readlines() found = 0 for line in fileline: for ol in milline: if line == ol: found = 1 break if found == 1: print "OK %s" %(line) else: print "NOT OK %s" %(line) found = 0
GOLANG (Coder :- Paul Tso) (11 Minutes)
package main import ( "bufio" "fmt" "os" "log" ) func main() { readFile1, err := os.Open("150k-lines") readFile2, err := os.Open("one-mil-lines") if err != nil { log.Fatalf("failed to open file: %s", err) } fileScanner1 := bufio.NewScanner(readFile1) fileScanner2 := bufio.NewScanner(readFile2) fileScanner1.Split(bufio.ScanLines) fileScanner2.Split(bufio.ScanLines) var fileTextLines1, fileTextLines2 []string for fileScanner1.Scan() { fileTextLines1 = append(fileTextLines1, fileScanner1.Text()) } for fileScanner2.Scan() { fileTextLines2 = append(fileTextLines2, fileScanner2.Text()) } readFile1.Close() readFile2.Close() for _, eachline1 := range fileTextLines1 { for _, eachline2 := range fileTextLines2 { if (len(eachline1) == len(eachline2) && (eachline1 == eachline2)) { fmt.Println(eachline1) } } } }
C (5 Minutes)
#include <stdio.h> #include <stdlib.h> #include <string.h> #define BUFF_SIZE 1000 int main(int argc, char* argv[]) { char const* const fileName = argv[1]; char const* const fileNameone = argv[2]; FILE *file = fopen(fileName, "r"); FILE *fileone = fopen(fileNameone, "rb"); fseek(fileone, 0, SEEK_END); long fsize = ftell(fileone); fseek(fileone, 0, SEEK_SET); char* fileonecontent = (char*)malloc(fsize + 1); fread(fileonecontent, 1, fsize, fileone); fclose(fileone); char line[256]; while (fgets(line, sizeof(line), file)) { if ((strstr(fileonecontent, line)) != NULL) printf("OK - %s",line); else printf("NOT OK - %s",line); } /* fileonecontent[fsize] = 0; */ free(fileonecontent); }