feat: adding in initial generator that takes regexes, words, and generates a list of answers

2024-09-02 01:05:20 -04:00
parent 0f8af5e718
commit f9556804d7
11 changed files with 338717 additions and 84 deletions
--- a/README.md
+++ b/README.md
@@ -1,3 +1,8 @@
-# regexle
+# Regexle
+
+Regex focused cross-word puzzle generator
+
+## Data flow
+![data-flow.png](docs/data-flow.png)
+[(raw-excalidraw-file)](docs/data-flow.excalidraw)

-Regex-only crossword generation and site
--- a/checker/Cargo.toml
+++ b/checker/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "regex-checker"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+csv = "1.3.0"
+fancy-regex = "0.13.0"
+rayon = "1.10.0"
+serde = "1.0.209"
+serde_derive = "1.0.209"
--- a/checker/processed_answers.csv
+++ b/checker/processed_answers.csv
--- a/checker/regex.csv
+++ b/checker/regex.csv
@@ -0,0 +1,4 @@
+^(?!.*(.).*\1)[abcdefghijklmnopqrstuvwxyz]+$
+^(?!.*(.).*\1)[hijklmn]+$
+^(?!.*(.).*\1)[opqrst]+$
+^(?!.*(.).*\1)[uvwxyz]+$
--- a/checker/src/main.rs
+++ b/checker/src/main.rs
@@ -0,0 +1,53 @@
+use std::string::String;
+use std::error::Error;
+use fancy_regex::Regex;
+use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator};
+use rayon::iter::ParallelIterator;
+use crate::utils::{read_csv, read_lines, write_to_csv};
+use crate::word::{Answer, Word};
+
+mod word;
+mod utils;
+
+
+fn map_to_answers(regex: String, words: &Vec<Word>) -> Vec<Answer> {
+    let re = Regex::new(&*regex).unwrap();
+    let answers: Vec<Answer> = words.par_iter()
+        .filter(|w| re.is_match(&*w.word).unwrap_or(false))
+        .map(|word| Answer {
+            count: word.count,
+            question: regex.clone(),
+            answer: word.word.clone(),
+        })
+        .collect();
+    answers
+}
+
+fn create_answers(regexes: Vec<String>, words: &Vec<Word>) -> Result<Vec<Answer>, Box<dyn Error>> {
+    let answers = regexes.par_iter()
+        .map(|regex| { map_to_answers(regex.to_string(), words) })
+        .filter(|vec| vec.len() > 0)
+        .flatten()
+        .collect();
+    Ok(answers)
+}
+
+
+fn main() -> Result<(), Box<dyn Error>> {
+    println!("Hello, world!");
+
+
+    let regex_chart = "./regex.csv";
+    let regexes = read_lines(regex_chart)?;
+
+    let word_freq_chart = "./word_freq.csv";
+    let mut words = read_csv(word_freq_chart)?;
+    words.truncate(10000);
+    let processed_words: Vec<Word> = words.into_par_iter().filter(|word| word.word.len() > 2).collect();
+
+    let answers = create_answers(regexes, &processed_words)?;
+    println!("Answers: {:?}", answers);
+
+    write_to_csv(answers, "processed_answers.csv")?;
+    Ok(())
+}
--- a/checker/src/utils.rs
+++ b/checker/src/utils.rs
@@ -0,0 +1,47 @@
+use std::error::Error;
+use std::fs::File;
+use std::io::{self, BufRead};
+use std::path::Path;
+use csv::{QuoteStyle, ReaderBuilder, WriterBuilder};
+use crate::word::{Answer, Word};
+
+pub fn read_csv(file_path: &str) -> Result<Vec<Word>, Box<dyn Error>> {
+    let file = File::open(file_path)?;
+    let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(file);
+    let mut records = Vec::new();
+
+    for result in rdr.deserialize() {
+        let record: Word = result?;
+        records.push(record);
+    }
+
+    Ok(records)
+}
+
+
+pub fn write_to_csv(records: Vec<Answer>, path: &str) -> Result<(), Box<dyn Error>> {
+    // Create a new CSV writer with the file path
+    let file = File::create(path)?;
+    let mut wtr = WriterBuilder::new()
+        .quote_style(QuoteStyle::Always)
+        .from_writer(file);
+
+    // Write the records with headers
+    for record in records {
+        wtr.serialize(record)?;
+    }
+
+    // Ensure all data is written to the file
+    wtr.flush()?;
+    Ok(())
+}
+
+pub fn read_lines<P>(filename: P) -> io::Result<Vec<String>>
+where
+    P: AsRef<Path>,
+{
+    let file = File::open(filename)?;
+    let buf = io::BufReader::new(file);
+
+    buf.lines().collect()
+}
--- a/checker/src/word.rs
+++ b/checker/src/word.rs
@@ -0,0 +1,14 @@
+use serde_derive::{Deserialize, Serialize};
+
+#[derive(Debug, Deserialize)]
+pub struct Word {
+    pub(crate) word: String,
+    pub(crate) count: i64,
+}
+
+#[derive(Debug, Deserialize, Serialize)]
+pub struct Answer {
+    pub(crate) question: String,
+    pub(crate) answer: String,
+    pub(crate) count: i64,
+}
--- a/checker/word_freq.csv
+++ b/checker/word_freq.csv
--- a/docs/data-flow.excalidraw
+++ b/docs/data-flow.excalidraw
--- a/docs/data-flow.png
+++ b/docs/data-flow.png
--- a/test.excalidraw
+++ b/test.excalidraw
@@ -1,82 +0,0 @@
-{
-  "type": "excalidraw",
-  "version": 2,
-  "source": "https://excalidraw-jetbrains-plugin",
-  "elements": [
-    {
-      "id": "oC8LgfrGVs7Ww4Lpe1PEj",
-      "type": "rectangle",
-      "x": 290,
-      "y": 125,
-      "width": 262,
-      "height": 167,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": {
-        "type": 3
-      },
-      "seed": 31779651,
-      "version": 19,
-      "versionNonce": 206623949,
-      "isDeleted": false,
-      "boundElements": [
-        {
-          "type": "text",
-          "id": "-e5FUpE-nI0s-KaM54PWT"
-        }
-      ],
-      "updated": 1725243919617,
-      "link": null,
-      "locked": false
-    },
-    {
-      "id": "-e5FUpE-nI0s-KaM54PWT",
-      "type": "text",
-      "x": 336.73009490966797,
-      "y": 196,
-      "width": 168.53981018066406,
-      "height": 25,
-      "angle": 0,
-      "strokeColor": "#1e1e1e",
-      "backgroundColor": "transparent",
-      "fillStyle": "solid",
-      "strokeWidth": 2,
-      "strokeStyle": "solid",
-      "roughness": 1,
-      "opacity": 100,
-      "groupIds": [],
-      "frameId": null,
-      "roundness": null,
-      "seed": 1603354339,
-      "version": 16,
-      "versionNonce": 1210522317,
-      "isDeleted": false,
-      "boundElements": null,
-      "updated": 1725243923457,
-      "link": null,
-      "locked": false,
-      "text": "Test Image Here",
-      "fontSize": 20,
-      "fontFamily": 1,
-      "textAlign": "center",
-      "verticalAlign": "middle",
-      "baseline": 18,
-      "containerId": "oC8LgfrGVs7Ww4Lpe1PEj",
-      "originalText": "Test Image Here",
-      "lineHeight": 1.25
-    }
-  ],
-  "appState": {
-    "gridSize": null,
-    "viewBackgroundColor": "#ffffff"
-  },
-  "files": {}
-}