feat: adding in initial generator that takes regexes, words, and generates a list of answers

This commit is contained in:
Lucas Oskorep
2024-09-02 01:05:20 -04:00
parent 0f8af5e718
commit f9556804d7
11 changed files with 338717 additions and 84 deletions

11
checker/Cargo.toml Normal file
View File

@@ -0,0 +1,11 @@
[package]
name = "regex-checker"
version = "0.1.0"
edition = "2021"
[dependencies]
csv = "1.3.0"
fancy-regex = "0.13.0"
rayon = "1.10.0"
serde = "1.0.209"
serde_derive = "1.0.209"

File diff suppressed because it is too large Load Diff

4
checker/regex.csv Normal file
View File

@@ -0,0 +1,4 @@
^(?!.*(.).*\1)[abcdefghijklmnopqrstuvwxyz]+$
^(?!.*(.).*\1)[hijklmn]+$
^(?!.*(.).*\1)[opqrst]+$
^(?!.*(.).*\1)[uvwxyz]+$
1 ^(?!.*(.).*\1)[abcdefghijklmnopqrstuvwxyz]+$
2 ^(?!.*(.).*\1)[hijklmn]+$
3 ^(?!.*(.).*\1)[opqrst]+$
4 ^(?!.*(.).*\1)[uvwxyz]+$

53
checker/src/main.rs Normal file
View File

@@ -0,0 +1,53 @@
use std::string::String;
use std::error::Error;
use fancy_regex::Regex;
use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator};
use rayon::iter::ParallelIterator;
use crate::utils::{read_csv, read_lines, write_to_csv};
use crate::word::{Answer, Word};
mod word;
mod utils;
fn map_to_answers(regex: String, words: &Vec<Word>) -> Vec<Answer> {
let re = Regex::new(&*regex).unwrap();
let answers: Vec<Answer> = words.par_iter()
.filter(|w| re.is_match(&*w.word).unwrap_or(false))
.map(|word| Answer {
count: word.count,
question: regex.clone(),
answer: word.word.clone(),
})
.collect();
answers
}
fn create_answers(regexes: Vec<String>, words: &Vec<Word>) -> Result<Vec<Answer>, Box<dyn Error>> {
let answers = regexes.par_iter()
.map(|regex| { map_to_answers(regex.to_string(), words) })
.filter(|vec| vec.len() > 0)
.flatten()
.collect();
Ok(answers)
}
fn main() -> Result<(), Box<dyn Error>> {
println!("Hello, world!");
let regex_chart = "./regex.csv";
let regexes = read_lines(regex_chart)?;
let word_freq_chart = "./word_freq.csv";
let mut words = read_csv(word_freq_chart)?;
words.truncate(10000);
let processed_words: Vec<Word> = words.into_par_iter().filter(|word| word.word.len() > 2).collect();
let answers = create_answers(regexes, &processed_words)?;
println!("Answers: {:?}", answers);
write_to_csv(answers, "processed_answers.csv")?;
Ok(())
}

47
checker/src/utils.rs Normal file
View File

@@ -0,0 +1,47 @@
use std::error::Error;
use std::fs::File;
use std::io::{self, BufRead};
use std::path::Path;
use csv::{QuoteStyle, ReaderBuilder, WriterBuilder};
use crate::word::{Answer, Word};
pub fn read_csv(file_path: &str) -> Result<Vec<Word>, Box<dyn Error>> {
let file = File::open(file_path)?;
let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(file);
let mut records = Vec::new();
for result in rdr.deserialize() {
let record: Word = result?;
records.push(record);
}
Ok(records)
}
pub fn write_to_csv(records: Vec<Answer>, path: &str) -> Result<(), Box<dyn Error>> {
// Create a new CSV writer with the file path
let file = File::create(path)?;
let mut wtr = WriterBuilder::new()
.quote_style(QuoteStyle::Always)
.from_writer(file);
// Write the records with headers
for record in records {
wtr.serialize(record)?;
}
// Ensure all data is written to the file
wtr.flush()?;
Ok(())
}
pub fn read_lines<P>(filename: P) -> io::Result<Vec<String>>
where
P: AsRef<Path>,
{
let file = File::open(filename)?;
let buf = io::BufReader::new(file);
buf.lines().collect()
}

14
checker/src/word.rs Normal file
View File

@@ -0,0 +1,14 @@
use serde_derive::{Deserialize, Serialize};
#[derive(Debug, Deserialize)]
pub struct Word {
pub(crate) word: String,
pub(crate) count: i64,
}
#[derive(Debug, Deserialize, Serialize)]
pub struct Answer {
pub(crate) question: String,
pub(crate) answer: String,
pub(crate) count: i64,
}

333334
checker/word_freq.csv Normal file

File diff suppressed because it is too large Load Diff