feat: adding in initial generator that takes regexes, words, and generates a list of answers

This commit is contained in:
Lucas Oskorep 2024-09-02 01:05:20 -04:00
parent 0f8af5e718
commit f9556804d7
11 changed files with 338717 additions and 84 deletions

View File

@ -1,3 +1,8 @@
# regexle
# Regexle
Regex focused cross-word puzzle generator
## Data flow
![data-flow.png](docs/data-flow.png)
[(raw-excalidraw-file)](docs/data-flow.excalidraw)
Regex-only crossword generation and site

11
checker/Cargo.toml Normal file
View File

@ -0,0 +1,11 @@
[package]
name = "regex-checker"
version = "0.1.0"
edition = "2021"
[dependencies]
csv = "1.3.0"
fancy-regex = "0.13.0"
rayon = "1.10.0"
serde = "1.0.209"
serde_derive = "1.0.209"

File diff suppressed because it is too large Load Diff

4
checker/regex.csv Normal file
View File

@ -0,0 +1,4 @@
^(?!.*(.).*\1)[abcdefghijklmnopqrstuvwxyz]+$
^(?!.*(.).*\1)[hijklmn]+$
^(?!.*(.).*\1)[opqrst]+$
^(?!.*(.).*\1)[uvwxyz]+$
1 ^(?!.*(.).*\1)[abcdefghijklmnopqrstuvwxyz]+$
2 ^(?!.*(.).*\1)[hijklmn]+$
3 ^(?!.*(.).*\1)[opqrst]+$
4 ^(?!.*(.).*\1)[uvwxyz]+$

53
checker/src/main.rs Normal file
View File

@ -0,0 +1,53 @@
use std::string::String;
use std::error::Error;
use fancy_regex::Regex;
use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator};
use rayon::iter::ParallelIterator;
use crate::utils::{read_csv, read_lines, write_to_csv};
use crate::word::{Answer, Word};
mod word;
mod utils;
fn map_to_answers(regex: String, words: &Vec<Word>) -> Vec<Answer> {
let re = Regex::new(&*regex).unwrap();
let answers: Vec<Answer> = words.par_iter()
.filter(|w| re.is_match(&*w.word).unwrap_or(false))
.map(|word| Answer {
count: word.count,
question: regex.clone(),
answer: word.word.clone(),
})
.collect();
answers
}
fn create_answers(regexes: Vec<String>, words: &Vec<Word>) -> Result<Vec<Answer>, Box<dyn Error>> {
let answers = regexes.par_iter()
.map(|regex| { map_to_answers(regex.to_string(), words) })
.filter(|vec| vec.len() > 0)
.flatten()
.collect();
Ok(answers)
}
fn main() -> Result<(), Box<dyn Error>> {
println!("Hello, world!");
let regex_chart = "./regex.csv";
let regexes = read_lines(regex_chart)?;
let word_freq_chart = "./word_freq.csv";
let mut words = read_csv(word_freq_chart)?;
words.truncate(10000);
let processed_words: Vec<Word> = words.into_par_iter().filter(|word| word.word.len() > 2).collect();
let answers = create_answers(regexes, &processed_words)?;
println!("Answers: {:?}", answers);
write_to_csv(answers, "processed_answers.csv")?;
Ok(())
}

47
checker/src/utils.rs Normal file
View File

@ -0,0 +1,47 @@
use std::error::Error;
use std::fs::File;
use std::io::{self, BufRead};
use std::path::Path;
use csv::{QuoteStyle, ReaderBuilder, WriterBuilder};
use crate::word::{Answer, Word};
pub fn read_csv(file_path: &str) -> Result<Vec<Word>, Box<dyn Error>> {
let file = File::open(file_path)?;
let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(file);
let mut records = Vec::new();
for result in rdr.deserialize() {
let record: Word = result?;
records.push(record);
}
Ok(records)
}
pub fn write_to_csv(records: Vec<Answer>, path: &str) -> Result<(), Box<dyn Error>> {
// Create a new CSV writer with the file path
let file = File::create(path)?;
let mut wtr = WriterBuilder::new()
.quote_style(QuoteStyle::Always)
.from_writer(file);
// Write the records with headers
for record in records {
wtr.serialize(record)?;
}
// Ensure all data is written to the file
wtr.flush()?;
Ok(())
}
pub fn read_lines<P>(filename: P) -> io::Result<Vec<String>>
where
P: AsRef<Path>,
{
let file = File::open(filename)?;
let buf = io::BufReader::new(file);
buf.lines().collect()
}

14
checker/src/word.rs Normal file
View File

@ -0,0 +1,14 @@
use serde_derive::{Deserialize, Serialize};
#[derive(Debug, Deserialize)]
pub struct Word {
pub(crate) word: String,
pub(crate) count: i64,
}
#[derive(Debug, Deserialize, Serialize)]
pub struct Answer {
pub(crate) question: String,
pub(crate) answer: String,
pub(crate) count: i64,
}

333334
checker/word_freq.csv Normal file

File diff suppressed because it is too large Load Diff

1194
docs/data-flow.excalidraw Normal file

File diff suppressed because it is too large Load Diff

BIN
docs/data-flow.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 183 KiB

View File

@ -1,82 +0,0 @@
{
"type": "excalidraw",
"version": 2,
"source": "https://excalidraw-jetbrains-plugin",
"elements": [
{
"id": "oC8LgfrGVs7Ww4Lpe1PEj",
"type": "rectangle",
"x": 290,
"y": 125,
"width": 262,
"height": 167,
"angle": 0,
"strokeColor": "#1e1e1e",
"backgroundColor": "transparent",
"fillStyle": "solid",
"strokeWidth": 2,
"strokeStyle": "solid",
"roughness": 1,
"opacity": 100,
"groupIds": [],
"frameId": null,
"roundness": {
"type": 3
},
"seed": 31779651,
"version": 19,
"versionNonce": 206623949,
"isDeleted": false,
"boundElements": [
{
"type": "text",
"id": "-e5FUpE-nI0s-KaM54PWT"
}
],
"updated": 1725243919617,
"link": null,
"locked": false
},
{
"id": "-e5FUpE-nI0s-KaM54PWT",
"type": "text",
"x": 336.73009490966797,
"y": 196,
"width": 168.53981018066406,
"height": 25,
"angle": 0,
"strokeColor": "#1e1e1e",
"backgroundColor": "transparent",
"fillStyle": "solid",
"strokeWidth": 2,
"strokeStyle": "solid",
"roughness": 1,
"opacity": 100,
"groupIds": [],
"frameId": null,
"roundness": null,
"seed": 1603354339,
"version": 16,
"versionNonce": 1210522317,
"isDeleted": false,
"boundElements": null,
"updated": 1725243923457,
"link": null,
"locked": false,
"text": "Test Image Here",
"fontSize": 20,
"fontFamily": 1,
"textAlign": "center",
"verticalAlign": "middle",
"baseline": 18,
"containerId": "oC8LgfrGVs7Ww4Lpe1PEj",
"originalText": "Test Image Here",
"lineHeight": 1.25
}
],
"appState": {
"gridSize": null,
"viewBackgroundColor": "#ffffff"
},
"files": {}
}