Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save norandom/0fbde49bff18e356c7abdb05787c60cb to your computer and use it in GitHub Desktop.

Select an option

Save norandom/0fbde49bff18e356c7abdb05787c60cb to your computer and use it in GitHub Desktop.
combine_markdown.sh
#!/bin/bash
# Script to combine all markdown files in each folder into one file per folder
# Usage: ./combine_markdown.sh [base_directory]
BASE_DIR="${1:-crawled_docs}"
OUTPUT_DIR="combined_docs"
if [ ! -d "$BASE_DIR" ]; then
echo "Error: Directory $BASE_DIR does not exist"
exit 1
fi
# Create output directory if it doesn't exist
mkdir -p "$OUTPUT_DIR"
echo "Combining markdown files in $BASE_DIR..."
echo "Output files will be saved to: $OUTPUT_DIR"
# Find all directories containing .md files
find "$BASE_DIR" -type f -name "*.md" -exec dirname {} \; | sort | uniq | while read -r dir; do
echo "Processing directory: $dir"
# Get the folder name for the output file
folder_name=$(basename "$dir")
# Create path-based filename to avoid conflicts
relative_path="${dir#$BASE_DIR/}"
safe_path=$(echo "$relative_path" | tr '/' '_')
output_file="$OUTPUT_DIR/_combined_${safe_path}.md"
# Create header for combined file
echo "# Combined Documentation for $relative_path" > "$output_file"
echo "" >> "$output_file"
echo "Generated on: $(date)" >> "$output_file"
echo "Source directory: $dir" >> "$output_file"
echo "" >> "$output_file"
echo "---" >> "$output_file"
echo "" >> "$output_file"
# Find all .md files in current directory (not subdirectories)
find "$dir" -maxdepth 1 -name "*.md" -not -name "_combined_*" | sort | while read -r file; do
filename=$(basename "$file")
echo " Adding: $filename"
# Add section header
echo "## $filename" >> "$output_file"
echo "" >> "$output_file"
# Add file content
cat "$file" >> "$output_file"
echo "" >> "$output_file"
echo "---" >> "$output_file"
echo "" >> "$output_file"
done
echo " Created: $output_file"
done
echo "Done! Combined files created in $OUTPUT_DIR/ with '_combined_' prefix."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment